diff -uNr --exclude=CVS ../kernel.org/linux/CREDITS linuxppc64_2_4/CREDITS --- ../kernel.org/linux/CREDITS Sun Oct 21 12:20:57 2001 +++ linuxppc64_2_4/CREDITS Thu Oct 25 21:00:18 2001 @@ -969,6 +969,14 @@ S: 80050-430 - Curitiba - Paraná S: Brazil +N: Tom Gall +E: tom_gall@vnet.ibm.com +E: tgall@rochcivictheatre.org +D: ppc64, ppc +S: 710 Walnut St +S: Mantorville, MN 55955 +S: USA + N: Nigel Gamble E: nigel@nrg.org E: nigel@sgi.com diff -uNr --exclude=CVS ../kernel.org/linux/Documentation/Configure.help linuxppc64_2_4/Documentation/Configure.help --- ../kernel.org/linux/Documentation/Configure.help Sat Oct 20 21:17:19 2001 +++ linuxppc64_2_4/Documentation/Configure.help Wed Nov 14 10:19:35 2001 @@ -120,6 +120,14 @@ If you don't know what to do here, say N. +PowerPC64 processor +CONFIG_PPC64 + The PowerPC architecture was designed for both 32 bit and 64 bit + processor implementations. 64 bit PowerPC processors are in many + ways a superset of their 32 bit PowerPC cousins. Each 64 bit PowerPC + processor also has a 32 bit mode to allow for 32 bit compatibility. + The home of the PowerPC 64 Linux project is at + Multiquad support for NUMA systems CONFIG_MULTIQUAD This option is used for getting Linux to run on a (IBM/Sequent) NUMA @@ -12878,6 +12886,20 @@ hard drives and ADFS-formatted floppy disks. This is experimental codes, so if you're unsure, say N. +JFS filesystem support +CONFIG_JFS_FS + This is a port of IBM's Journaled Filesystem . More information is + available in the file Documentation/filesystems/jfs.txt. + + If you do not intend to use the JFS filesystem, say N. + +JFS Debugging +CONFIG_JFS_DEBUG + If you are experiencing any problems with the JFS filesystem, say + Y here. This will result in additional debugging messages to be + written to the system log. Under normal circumstances, this + results in very little overhead. + /dev/pts file system for Unix98 PTYs CONFIG_DEVPTS_FS You should say Y here if you said Y to "Unix98 PTY support" above. @@ -17404,6 +17426,47 @@ 7xx, 8xx, 74xx, and 82xx processors. Currently, the default option is to build a kernel which works on the first three. +# Choice: i or p +Platform support +CONFIG_PPC_ISERIES + Linux runs on certain models of the IBM AS/400, now known as the + IBM iSeries. Generally if you can run LPAR (Logical Partitioning) + on your iSeries you can run Linux in a partition on your machine. + + Linux also runs on most models of IBM pSeries hardware. (pSeries + used to be known as the RS/6000) + + If you have an iSeries and want to run Linux in a partition, + select the iSeries option to build your kernel. + + If you have a pSeries and want to run Linux, select pSeries + as the option to build your kernel. + + See for exact model information to see what + can run the 64 bit PowerPC kernel. + + iSeries Linux information from IBM can be found at: + + + pSeries Linux information from IBM can be found at: + + + Project information can be found at: + + + +Platform support +CONFIG_PPC_PSERIES + Linux runs on most models of IBM pSeries hardware. (pSeries used + to be known as the RS/6000) + + See for exact model information for the + 64 bit PowerPC kernel. + + pSeries Linux information from IBM can be found at: + + + Embedded 8xx Board Type CONFIG_RPXLITE RPX-Lite: @@ -17598,6 +17661,14 @@ have it autoloaded. The act of removing the module shuts down the sound hardware for more power savings. + +Support for Large Memory +CONFIG_MSCHUNKS + MsChunks stands for Main Store Chunks and specifically allows the + 64 bit PowerPC Linux kernel to optimize for machines with sparse + discontiguous memory. iSeries kernels need to have this on. + It is recommended that for pSeries hardware that you answer Y. + ADB raw keycode support CONFIG_MAC_ADBKEYCODES This provides support for sending raw ADB keycodes to console @@ -19059,6 +19130,11 @@ To use this option, you have to check that the "/proc file system support" (CONFIG_PROC_FS) is enabled, too. + +Include realtime debugging +CONFIG_PPCDBG + Include in-kernel PowerPC 64 information hooks that may be turned on/off + in real time. # # A couple of things I keep forgetting: # capitalize: AppleTalk, Ethernet, DOS, DMA, FAT, FTP, Internet, diff -uNr --exclude=CVS ../kernel.org/linux/Documentation/cachetlb.txt linuxppc64_2_4/Documentation/cachetlb.txt --- ../kernel.org/linux/Documentation/cachetlb.txt Sun Oct 21 12:40:36 2001 +++ linuxppc64_2_4/Documentation/cachetlb.txt Fri Oct 26 02:56:03 2001 @@ -260,8 +260,9 @@ Here is the new interface: - void copy_user_page(void *to, void *from, unsigned long address) - void clear_user_page(void *to, unsigned long address) + void copy_user_page(struct page *to, struct page *from, + unsigned long address) + void clear_user_page(struct page *to, unsigned long address) These two routines store data in user anonymous or COW pages. It allows a port to efficiently avoid D-cache alias @@ -279,6 +280,11 @@ If D-cache aliasing is not an issue, these two routines may simply call memcpy/memset directly and do nothing more. + + There are default versions of these procedures supplied in + include/linux/highmem.h. If a port does not want to use the + default versions it should declare them and define the symbol + __HAVE_ARCH_USER_PAGE in include/asm/page.h. void flush_dcache_page(struct page *page) diff -uNr --exclude=CVS ../kernel.org/linux/Documentation/filesystems/00-INDEX linuxppc64_2_4/Documentation/filesystems/00-INDEX --- ../kernel.org/linux/Documentation/filesystems/00-INDEX Wed Jun 20 13:10:27 2001 +++ linuxppc64_2_4/Documentation/filesystems/00-INDEX Thu Sep 13 14:29:38 2001 @@ -22,6 +22,8 @@ - info and mount options for the OS/2 HPFS. isofs.txt - info and mount options for the ISO 9660 (CDROM) filesystem. +jfs.txt + - info and mount options for the JFS filesystem. ncpfs.txt - info on Novell Netware(tm) filesystem using NCP protocol. ntfs.txt diff -uNr --exclude=CVS ../kernel.org/linux/Documentation/filesystems/changelog.jfs linuxppc64_2_4/Documentation/filesystems/changelog.jfs --- ../kernel.org/linux/Documentation/filesystems/changelog.jfs Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/Documentation/filesystems/changelog.jfs Wed Nov 14 10:19:35 2001 @@ -0,0 +1,726 @@ +IBM's Journaled File System (JFS) for Linux version 1.0.9 +Team members +Steve Best sbest@us.ibm.com +Dave Kleikamp shaggy@us.ibm.com +Barry Arndt barndt@us.ibm.com + + +Release November 9, 2001 (version 1.0.9) + +This is our forty-seventh release of IBM's Enterprise JFS technology port to Linux. +Beta 1 was release 0.1.0 on 12/8/2000, Beta 2 was release 0.2.0 on 3/7/2001, +Beta 3 was release 0.3.0 on 4/30/2001, and release 1.0.0 on 6/28/2001. + +The first drop on February 2, 2000 (jfs-0.0.1.tar.gz) had the following +functionality (enough to allow other developers to evaluate and potentially +contribute to this project): + + - MKFS functional and able to format an existing or newly created partition for JFS. + - mount/unmount functional + - able to view the self(.) and parent (..) directories entries. + +The second drop on February 28, 2000 (jfs-0.0.2.tar.gz) had fixes for MKFS and the +following utilities logredo, xchkdmp, xpeek are functional. The file system was able +to mkdir, create files and directories were working (and rm, rmdir). + +The third drop on March 24, 2000 (jfs-0.0.3.tar.gz) provided read and write capability +for the file system. Also, support for hard and soft links are functional. + +The fourth drop on March 29,2000 (jfs-0.0.4.tar.gz) provided rename capability for +the file system and fixes for write problems. + +The fifth drop on April 6, 2000 (jfs-0.0.5.tar.gz) provided mknod capability for the +file system. + +The sixth drop on April 21, 2000 (jfs-0.0.6.tar.gz) provided fixes for the file system +and utilities. + +The seventh drop on May 11, 2000 (jfs-0.0.7.tar.gz) provided changes to the file system +to be able to work on the development kernel 2.3.99-pre6. Drop 7 contains fixes for the +file system to be built within the kernel. + +The eighth drop on June 20, 2000 (jfs-0.0.8.tar.gz) provided fixes for the file system +and changes to the file system to be able to work on the development kernel 2.4.0-test1-ac21. +In the utility area: fsck and logdump are functional. + +The ninth drop on July 13, 2000 (jfs-0.0.9.tar.gz) has the meta-data of the file system +using the page cache. In the utility area: fixes for fsck.jfs and xpeek are included. + +The tenth drop on August 11, 2000 provided fixes for the file system +and changes to the file system to be able to work on the development kernel 2.4.0-test5. +In the utility area: fixes for fsck.jfs are included. The utility portion of extendfs has +been ported (still need to port the file system portion for extendfs). Man pages have +been written for the utilities. The compiler warnings have been removed from all of the +utilities. + +The eleventh drop on September 1, 2000 provided fixes for the file system +and changes to the file system to be able to work on the development kernel 2.4.0-test7. +The utility portion of defrag has been ported (still need to port the file system portion +for defrag). + +The twelfth drop on September 15, 2000 (jfs-0.0.12.tar.gz) provided the Transaction +Manager Log I/O routines. + +The thirteenth drop on September 29, 2000 (jfs-0.0.13.tar.gz) provided case-sensitive +support. In the utility area: fsck.jfs has the support for block/character special files. +Note: The JFS partition must be formatted/reformatted using mkfs.jfs created by drop 13 +for case-sensitive to work. + +The fourteenth drop on October 4, 2000 (jfs-0.0.14.tar.gz) included a fix to the file +system for not mounting a JFS partition if the size of the partition is > 4G on 2.2.x +series of the kernel. + +The fifteenth drop on October 9, 2000 (jfs-0.0.15.tar.gz) included the Transaction +Manager routines (Initialize, Begin, Locks, End). The file system has fixes and +changes to the file system to be able to work on the development kernel 2.4.0-test9. +The utility xchklog has been ported. + +The sixteenth drop on October 27, 2000 (jfs-0.0.16.tar.gz) included the Transaction +Manager routines (Commit, Buffer Management, Abort). The file system and utilities +have fixes included. + +The seventeenth drop on November 10, 2000 (jfs-0.0.17.tar.gz) has the transaction +sub operations back into the file system operations. The file system is now journaling +appropriate file system operations. The file system is temporary doing synchronous logging +(which slows down operations that are logged) to make sure that the transaction processing +is right. Asynchronous logging will be added shortly. The file system has fixes and changes +to the file system to be able to work on the development kernel 2.4.0-test10. + +The eighteenth drop on November 15, 2000 (jfs-0.0.18.tar.gz) included fixes to the file +system. + +The nineteenth drop on December 4, 2000 (jfs-0.0.19.tar.gz) is now doing asynchronous +logging. The file system has fixes and changes to the file system to be able to work +on the development kernel 2.4.0-test11. + +The twentieth drop on December 8, 2000 (jfs-0.1.0.tar.gz beta) included fixes to the file +system. + +The twenty-first drop on December 14, 2000 (jfs-0.1.1.tar.gz) included a fsck fix +to handle sparse files correctly. The file system has fixes and changes to the file +system to be able to work on the development kernel 2.4.0-test12 and 2.2.18 kernel. + +The twenty-second drop on January 5, 2001 (jfs-0.1.2.tar.gz) included fixes to the +file system. The file system has changes to able to work on the 2.4.0 kernel. + +The twenty-third drop on January 12, 2001 (jfs-0.1.3.tar.gz) included fixes to the +file system. Fsck now supports fifo correctly. + +The twenty-fourth drop on January 26, 2001 (jfs-0.1.4.tar.gz) included fixes to the +file system. The new feature in the file system is "Lazy Commit" which increases +performance while going asynchronous logging. The main makefile for the utilities +now has an install option. + +The twenty-fifth drop on February 7, 2001 (jfs-0.1.5.tar.gz) included fixes to the +file system. + +The twenty-sixth drop on February 26, 2001 (jfs-0.1.6.tar.gz) included fixes to the +file system. The log manager no longer uses the page cache for log pages, this +eliminates dead-locks that were occurring in the log manager. The file system has +general work done to remove SMP dead-lock problems. Fsck now supports default values +passed by fstab correctly. + +The twenty-seventh drop on March 7, 2001 (beta 2) (jfs-0.2.0.tar.gz) included fixes to the +file system. This drop contains jfsprogs.spec that can be used to create RPM for the +JFS utilities. The file system has general work done to remove SMP and UP hang related problems. +The file system performance has been increased by changes to extent inode cache. + +The twenty-eighth drop on March 21, 2001 (jfs-0.2.1.tar.gz) included fixes to the +file system. The file system has been changed to use standard types. + +The twenty-ninth drop on April 2, 2001 (jfs-0.2.2.tar.gz) included fixes to the +file system. The utilities have been changed to use standard types. + +The thirtieth drop on April 30, 2001 (jfs-0.3.0.tar.gz) included fixes to the +file system. The rest of the utilities have been changed to use standard types. +Both the file system and the utilities have been changed to use endian macros, +so that JFS will now store the meta-data as little endian when running on all +architectures, with this change you must re-format all of the JFS partitions +using the new mkfs.jfs included in this drop, if you are running on a big-endian system. + +The thirty-first drop on May 9, 2001 (jfs-0.3.1.tar.gz) included fixes to the file +system and the utilities. + +The thirty-second drop on May 18, 2001 (jfs-0.3.2-patch.tar.gz) included fixes to the file +system and the utilities. + +The thirty-third drop on May 25, 2001 (jfs-0.3.3-patch.tar.gz) included fixes to the file +system and the utilities. + +The thirty-fourth drop on June 8, 2001 (jfs-0.3.4-patch.tar.gz) included fixes to the +file system and the utilities. + +The thirty-fifth drop on June 15, 2001 (jfs-0.3.5-patch.tar.gz) includes fixes to the +file system and utilities. + +The thirty-sixth drop on June 22, 2001 (jfs-0.3.6-patch.tar.gz) includes fixes to the +file system and utilities. The change made to the file system to fix the rm -rf problem +is a disk layout change and with this change you must re-format all of the JFS partitions +using the new mkfs.jfs included in this drop to have the rm -rf problem fixed. + + +The thirty-seventh drop on June 25, 2001 (jfs-0.3.7-patch.tar.gz) includes fixes to the +file system and utilities. + +The thirty-eighth drop on June 28, 2001 (jfs-1.0.0-patch.tar.gz) includes fixes to the +file system and utilities. + +The thirty-ninth drop on July 10, 2001 (jfs-1.0.1-patch.tar.gz) includes fixes to the +file system and utilities. + +The fortieth drop on August 3, 2001 (jfs-1.0.2-patch.tar.gz) includes fixes to the +file system and utilities. + +The forty-first drop on August 20, 2001 (jfs-1.0.3-patch.tar.gz) includes fixes to the +file system and utilities. + +The forty-second drop on August 31, 2001 (jfs-2.2-1.0.4-patch.tar.gz or +jfs-2.4-1.0.4-patch.tar.gz) includes fixes to the file system and utilities. + +The forty-third drop on September 14, 2001 (jfs-2.2-1.0.5-patch.tar.gz or jfs-2.4-1.0.5- +patch.tar.gz) includes fixes to the file system and utilities. + +The forty-fourth drop on September 28, 2001 (jfs-2.4-1.0.6-patch.tar.gz) includes fixes +to the file system and utilities. + +The forty-fifth drop on October 10, 2001 (jfs-2.4-1.0.7-patch.tar.gz and +jfsutils-1.0.7.tar.gz) includes fixes to the file system and utilities. + +The forty-sixth drop on October 17, 2001 (jfs-2.4-1.0.8-patch.tar.gz and +jfsutils-1.0.8.tar.gz) includes fixes to the file system and utilities. + +The forty-seventh drop on November 9, 2001 (jfs-2.4-1.0.9-patch.tar.gz and +jfsutils-1.0.9.tar.gz) includes fixes to the file system and utilities. + + +Drop 47 has the temporary restriction that the block size must be 4K. MKFS.jfs defaults +the block size to 4K. + + +JFS today: + + +- Utilities: + Function in drop 1 + - MKFS.JFS builds on 2.2.12 + - MKFS successfully formats a new partition for JFS + + Function and Fixes in drop 2 + - MKFS supports all parameters now + - MKFS has fixes from release pre-alpha 0.0.1 + mkfs.jfs results in segmentation fault if no arguments are specified + mkfs.jfs hangs on invocation + + - XPEEK, utility to PEEK and POKE JFS on-disk data/structures + + Function and Fixes in drop 5 + - MKFS.jfs has a fix to support -l option correctly. + + Function and Fixes in drop 6 + - libfs has compiler warning fixes and a bug fix + + Function in drop 8 + - fsck.jfs successfully checks and repairs a jfs partition + - logdump, utility that dumps the contents of the journal log. + + Function and Fixes in drop 9 + - fixes for fsck.jfs and xpeek have been done. + + Function and Fixes in drop 10 + - fixes for fsck.jfs + - man pages for utilities + - extendfs utility part has been ported (still need to port FS portion) + - compiler warnings have been removed. + + Function and Fixes in drop 11 + - defrag utility part has been ported (still need to port FS portion) + + Function and Fixes in drop 13 + - fsck.jfs supports block special files and character special files (Jitterbug problem #28) + + Function and Fixes in drop 15 + - ported xchklog utility, extracts a log from fsck. + + Function and Fixes in drop 16 + - fixes for fsck.jfs to handle case-sensitive support correctly (Jitterbug problem #36) + - cleanup changes for the utilities + + Function and Fixes in drop 18 + - cleanup changes for the utilities + + Function and Fixes in drop 21 + - fix in fsck to handle sparse files correctly + + Function and Fixes in drop 23 + - fix in fsck to handle fifo correctly + + Function and Fixes in drop 24 + - man pages updates for the utilities + - install option for utilities + + Function and Fixes in drop 26 + - man page updates for fsck + - fsck now supports default options passed by fstab correctly + + Function and Fixes in drop 27 + - new jfsprogs.spec file + - fix in fsck for hard links + - fix for unicode conversion problem + + Function and Fixes in drop 29 + - cleanup changes for the utilities + - Code cleanup to use standard types + + Function and Fixes in drop 30 + - added endian macros support + - Code cleanup to use standard types (part 2) + - mkfs now clears out the 1st 4k bytes of the partition + + Function and Fixes in drop 31 + - completed endian macros support needed for xpeek + - added socket support for fsck + - minor bug fixes + + Function and Fixes in drop 32 + - Remove the warning message from fsck when partition is mounted read-only + + Function and Fixes in drop 33 + - Fix fsck to handle mount read-only correctly + - Fix top level utilities makefile to be able to easily overide version of gcc compiler + - Man pages are now available in html format + + Function and Fixes in drop 34 + - fsck fix to handle pre-existing lost+found sub dir + + Function and Fixes in drop 35 + - updated fsck error handling + - updated mkfs config options and the man page for fsck + + Function and Fixes in drop 36 + - Fixed jitterbug # 10 rm -rf fails on a big directory + + Function and Fixes in drop 38 + - Fixed small logredo problem + + Function and Fixes in drop 39 (1.0.1) + - Updated jfsprogs.spec file make it work on more distros + - Add force option to mkfs so the confirmation message isn't displayed + - Fixed fsck to handle index table on the root directory. + - Fixed error message displayed when partition was just created and haven't rebooted (jitterbug 130) + - Correctly place the built utilities in /sbin vs. /usr/sbin when using the make install + option. + + Note: If you have used the make install option to copy over the utilities from a + previous release then you should remove the following JFS utilities (fsck.jfs, logdump, + logredo, mkfs.jfs, xchkdmp, xchklog, and xpeek) that could be in /usr/sbin. This release + of make install places the utilities in the correct location which is /sbin. + + Function and Fixes in drop 40 (1.0.2) + - Fixed mkfs to display the correct error message if device name is not valid + or missing + - gzip the man pages and place /usr/share/man/man8 + - Fixed mkfs to properly setup buf_ai (caused Bus error with mkfs on SPARC Linux) + - Fixed fsck to display path correctly + + Function and Fixes in drop 41 (1.0.3) + - Fixed compiler warnings on 64 bit systems + - Created jfsutils package + + Function and Fixes in drop 42 (1.0.4) + - Fixed typecast problem causing intermittent fsck failures on 64 bit hardware (jitterbug 159) + - Fixed pointer calculation problem causing intermittent fsck failures on 64 bit hardware + - Fixed compiler warnings on s/390 and IA64 + - Fixed structure size mismatch between file system and utilities causing fsck problems when + large numbers of inodes are used + - Fixed seg fault in fsck when logging path lengths greater than 512 characters + - Fixed fsck printf format errors + + Function and Fixes in drop 43 (1.0.5) + - Fixed jfsprogs.spec to handle utilities not being in the file systcem source tree + - Cleaned up include files + - Fixed inconsistencies in mkfs man, html pages + + Function and Fixes in drop 44 (1.0.6) + - If fsck rebuilds root directory, reset di_next_index to 2 + - fsck needs to process, validate inodes with size=0 + + Function and Fixes in drop 45 (1.0.7) + - improve fsck's 'mounted' detection + This will remove the message 'Cannot access file system description file to determine + mount status and file system type of /dev/device name'. + - improve utils' checking for fs type jfs + - replace __uX with uintX_t + - change fsck.jfs options to be similar to e2fsck + - set fsck.jfs default to automatically fix the file system + +Function and Fixes in drop 46 (1.0.8) + - install prefix support for jfsutils + - cleanup option handling make -y behave (jitterbug 177), -f override -p + - Add more informative error message when running fsck.jfs RO, (jitterbug 173) + - clean up remove carriage return after new line in messaging + - print mkfs.jfs version correctly + +Function and Fixes in drop 47 (1.0.9) + - don't print heartbeat if fsck.jfs output is redirected + - make mkfs.jfs options conform to mkfs, clean up parse code + - fix typo in mkfs.jfs man_html page + - allow xpeek to show us directory xtrees + - fix fsck.jfs infinite loop on big endian hardware (jitterbug 182) + - fix infinite loop when endian swapping bad directory tree page + + +- JFS: + Function in drop 1 + - builds on 2.2.12 + - successfully MOUNTs & UMOUNTs + - limited READ capability (i.e. LS is operational) + - other system commands (i.e. DF, CHOWN, CHMOD, etc. working limited) + + Function and Fixes in drop 2 + - Write capabilities are operational + - MKDIR + - CREATE file + - RMDIR + - RM + - Problems fixed from release pre-alpha 0.0.1 + using 2.2.14 jfs_imap.c:3004: `event' undeclared (first use in this function) + + Function and Fixes in drop 3 + - WRITE a file + - READ a file + - Support for hard and soft links + + Function and Fixes in drop 4 + - MV + - Executables can now be started from a jfs partition + - Problems while writing files have been fixed + + Function and Fixes in drop 5 + - added support for special files (mknod) + - alpha changes have been included + + Function and Fixes in drop 6 + - Jitterbug problem 9 du output is incorrect on jfs + - Jitterbug problem 11 unresolved symbol jfs_rwlock_lock on SMP build + + Function and Fixes in drop 7 + - moved JFS upto the 2.3.x development kernel (2.3.99-pre6) + - Jitterbug problem 14 can't build JFS within the kernel + + Function and Fixes in drop 8 + - moved JFS upto the 2.4.0 development kernel (2.4.0-test1-ac21) + - Jitterbug problem 17 undefined: jfs_rdwrlock_lock + - PowerPC build problem + + Function and Fixes in drop 9 + - moved JFS upto the 2.4.0 development kernel (2.4.0-test3) + - moved meta-data from buffer cache to page cache + - fixes for the file system are included + + Function and Fixes in drop 10 + - moved JFS upto the 2.4.0 development kernel (2.4.0-test5) + - fixes for the file system are included + + Function and Fixes in drop 11 + - moved JFS upto the 2.4.0 development kernel (2.4.0-test7) + - fixes for the file system are included + - start of journaling code has been included (jfs_txnmgr.h) + - WIP of log manager (jfs_logmgr.c) + + Function and Fixes in drop 12 + - Transaction Manager Log I/O -> Write log records is functional + + Function and Fixes in drop 13 + - case-sensitive support has been added + + Function and Fixes in drop 14 + - JFS mount error 22 is fixed (Jitterbug problem #30) + + Function and Fixes in drop 15 + - Following Transaction Manager routines are functional + Initialize -> Initialize transaction manager + Begin -> Start a transaction + Locks -> Acquire/release locks + End -> End a transaction + - moved JFS upto the 2.4.0 development kernel (2.4.0-test9) + - Fixed 2.2.x series block write problem. + + Function and Fixes in drop 16 + - Following Transaction Manager routines are functional + Commit -> Commit the transaction + Buffer Management -> Update inode, directory, extent + Abort -> Stop the commit from occurring + - File System build problem on 2.2.x series of the kernel (Jitterbug #35) + - Fixed case-sensitive bug in the filesystem (Jitterbug #36) + + + Function and Fixes in drop 17 + - Added transaction sub operations back into the file system operations. + - The file system is now journaling appropriate file system operations. + - moved JFS upto the 2.4.0 development kernel (2.4.0-test10) + + Function and Fixes in drop 18 + - symlink fix + - inode corruption problem + + Function and Fixes in drop 19 + - Added asynchronous logging support back into the file system. + - moved JFS upto the 2.4.0 development kernel (2.4.0-test11) + - Fixes to transaction related hang problems. + + Function and Fixes in drop 20 + - Fix to remove memory on module cleanup + + Function and Fixes in drop 21 + - Fix so fsck doesn't report the message + Secondary file/directory allocation structure(2) is not a correct redundant copy of primary structure. + - Fix for setup of finish_aync_io + - moved JFS upto the 2.4.0 development kernel (2.4.0-test12) + - moved JFS upto the 2.2.18 kernel + + Function and Fixes in drop 22 + + - moved JFS upto the 2.4.0 kernel + - Fix to do transaction processing for mknod correctly + - Fix for spinlock.h needed on 2.2.18 ppc + + Function and Fixes in drop 23 + - Fix for undefined BUG() in 2.2.x series + - Fix for 2.2 kernels, struct pipe_inode_info overlays the file system dependent portion of the inode structure. + + Function and Fixes in drop 24 + - rmmod jfs fix + - Implemented "Lazy Commit" - asynchronous logging enhancement to increase performance + - Removed ino_t from on-disk structures (fixes a mount problem) + + Function and Fixes in drop 25 + - Fix for deadlocks by putting IWRITE_LOCK/UNLOCK within jfs_delete_inode + - Fix to handle removing a link to an inode that isn't the last link. + - Fixes to general transaction processing SMP related hangs. + + Function and Fixes in drop 26 + - Fix for file writes on 2.2.x series of the kernel + - Change to log pages, so they no longer use the page cache. + - Fixes to general transaction processing SMP related dead-lock problems + + Function and Fixes in drop 27 + - Report error on read_metapage failure in jfs_readlink + - Report correct error in jfs_lookup if VFS fails to locate the inode + - Fix for a buffer overrun problem in jfs_readlink when compiled for linux-2.2 + - Fix to support fsync call correctly (jitterbug #57) + - Fixes to general SMP related dead-lock problems + - Fix to define BUG() if 2.2 and !i386 + - Fixes to general UP related hangs + - Changes to handling the inode extent cache increase the performance of the file system + + Function and Fixes in drop 28 + - Added ifdef IS_KIOBUFIO so JFS will be if kiobufs is in the kernel. Kiobufs has changed + the # of parameters for generic_make_request() + - Make jfsFYI a module parameter + insmod jfs.o jfsFYI=1 will turn on JFS FYI messages without having to recompile this + option is only supported on the 2.4.x kernel level. + - Defines cleanup_module() in terms of exit_jfs_fs() + - Fix to jfs_read_super, if get_empty_inode returns NULL cleanup correctly + - Code cleanup to use standard types + + + Function and Fixes in drop 29 + - Fix for assert(iagp->wmap[extno] & mask); (line #2875) in jfs_imap while running dbench + - Fixed hang on scsi + - added /proc/fs/jfs/jfsFYI (2.4.* kernels only) + echo 1 > /proc/fs/jfs/jfsFYI ; Turns on very verbose output to syslog + echo 0 > /proc/fs/jfs/jfsFYI ; Turns it back off + + Notes: + - IMPORTANT: If building jfs.o as a module on 2.2.* kernels, you will have to rebuild and + install the kernel to add wake_up_process to ksyms.c. Not needed on 2.4.* kernels. + + Function and Fixes in drop 30 + - Added endian macros support + - Fixed dbench hang + - jfs fails SPEC SFS validation on both NFSv2/NFSv3, fix to not add null-terminator to the symlink + value and accounting for it in the size. + - Fixed a bug where a casting of a 32 bit block # wasn't correct when a 64 bit value is needed. + xtLookup messages where display in the /var/log/messages (i.e of a message is below) + xtLookup: lstart (0x80000) >= size (0x80000) + + Notes: release 0.3.0 requires a re-format of the JFS partition, if the system architecture is + big-endian (i.e. s/390, PowerPC, etc.) + + Function and Fixes in drop 31 + - Removed max hard links check (showed up during cp -a /usr /jfs/usr) + - Fixed inode writing hang could have showed up running (dbench, iozone, etc), + the change was to prevent a deadlock during inode writing. + + Function and Fixes in drop 32 + - Fix for assert(mp->count) jfs_metapage.c 675! report as hardlink problem in drop 31 (dtDeleteUp + was discarding the wrong metapage_t. + - Fix seg fault problem while creating hard links. + - Fixed dbench hang do to transaction locks not being freed. + - Added support to correctly handle read-only and remounting the file system. + + Function and Fixes in drop 33 + + - Fixed statfs call to return the maximum number of inodes that JFS could allocate. (problem + reported as rpm exits with a (x) inodes needed message without installing the package). + - Fix to handle a case where a inode wasn't getting written to disk. + - Increase the performance of unlinking files. + - Fix to null terminate symlinks. + - General SMP fixes. + + Function and Fixes in drop 34 + + - Fixed to remove a hang waiting on inode (jitterbug #73) + - Fixed dbench hang on SMP 8-way + - Fixed a log sync problem, improved performance with this fix + + Function and Fixes in drop 35 + - Increase the performance of unlinking files, most unlinks are done asynchronously now + - Fixed "XT_GETPAGE: xtree page corrupt" during creating files on nfs mounted partition + + Function and Fixes in drop 36 + - Fixed jitterbug # 10 rm -rf fails on a big directory + + Notes: release 0.3.6 requires a re-format of the JFS partition, for the rm -rf problem + to be fixed. + + Function and Fixes in drop 37 + - Fixed find_entry called with index = 0 or 1 (jitterbug #126) + - Fixed the rm -rf case where if files weren't created sequential + then the rm -rf wasn't working correctly + + + Notes: If you used release 0.3.6 please re-format all JFS partitions, for the rm -rf problem + to be fixed. + + Function and Fixes in drop 38 + - Fixed some general log problems + + Function and Fixes in drop 39 (1.0.1) + - Fixed hang during copying files on 2.2.x series + - Fixed TxLock compile problem + - Fixed to correctly update the number of blocks for directories (this was causing the FS + to show fsck error after compiling mozilla). + - Fixed to prevent old data from being written to disk from the page cache. + + Function and Fixes in drop40 (1.0.2) + - Fixed multiple truncate hang + - Fixed hang on unlink a file and sync happening at the same time + - Improved handling of kmalloc error conditions + - Fixed hang in blk_get_queue and SMP deadlock: bh_end_io call generic_make_request + (jitterbug 145 and 146) + - stbl was not set correctly set in dtDelete + - changed trap to printk in dbAllocAG to avoid system hang + + Function and Fixes in drop41 (1.0.3) + - Patch to move from previous release to latest release needs to update the version number in super.c + - Jitterbug problems (134,140,152) removing files have been fixed + - Set rc=ENOSPC if ialloc fails in jfs_create and jfs_mkdir + - Fixed jfs_txnmgr.c 775! assert + - Fixed jfs_txnmgr.c 884! assert(mp->nohomeok==0) + - Fix hang - prevent tblocks from being exhausted + - Fix oops trying to mount reiserfs + - Fail more gracefully in jfs_imap.c + - Print more information when char2uni fails + - Fix timing problem between Block map and metapage cache - jitterbug 139 + - Code Cleanup (removed many ifdef's, obsolete code, ran code through indent) Mostly 2.4 tree + - Split source tree (Now have a separate source tree for 2.2, 2.4, and jfsutils) + + Function and Fixes in drop42 (1.0.4) + - Fixed compiler warnings in the FS when building on 64 bits systems + - Fixed deadlock where jfsCommit hung in hold_metapage + - Fixed problems with remount + - Reserve metapages for jfsCommit thread + - Get rid of buggy invalidate_metapage & use discard_metapage + - Don't hand metapages to jfsIOthread (too many context switches) (jitterbug 125, bugzilla 238) + - Fix error message in jfs_strtoUCS + + Function and Fixes in drop43 (1.0.5) + - Allow separate allocation of JFS-private superblock/inode data. + - Remove checks in namei.c that are already done by the VFS. + - Remove redundant mutex defines. + - Replace all occurrences of #include with #include + - Work around race condition in remount -fixes OOPS during shutdown + - Truncate large files incrementally ( affects directories too) + + Function and Fixes in drop44 (1.0.6) + - Create jfs_incore.h which merges linux/jfs_fs.h, linux/jfs_fs_i.h, and jfs_fs_sb.h + - Create a configuration option to handle JFS_DEBUG define + - Fixed a few cases where positive error codes were returned to the VFS. + - Replace jfs_dir_read by generic_read_dir. + - jfs_fsync_inode is only called by jfs_fsync_file, merge the two and rename to jfs_fsync. + - Add a bunch of missing externs. + - jfs_rwlock_lock is unused, nuke it. + - Always use atomic set/test_bit operations to protect jfs_ip->cflag + - Combine jfs_ip->flag with jfs_ip->cflag + - Fixed minor format errors reported by fsck + - cflags should be long so bitops always works correctly + - Use GFP_NOFS for runtime memory allocations + - Support VM changes in 2.4.10 of the kernel + - Remove ifdefs supporting older 2.4 kernels. JFS now requires at least 2.4.3 or 2.4.2-ac2 + - Simplify and remove one use of IWRITE_TRYLOCK + - jfs_truncate was not passing tid to xtTruncate + - removed obsolete extent_page workaround + - correct recovery from failed diAlloc call (disk full) + - In write_metapage, don't call commit_write if prepare_write failed + + Function and Fixes in drop45 (1.0.7) + - cleanup remove IS_KIOBUFIO define. + - cleanup remove TRUNC_NO_TOSS define. + - have jFYI's use the name directly from dentry + - Remove nul _ALLOC and _FREE macros and also make spinlocks static. + - cleanup add externs where needed in the header files + - jfs_write_inode is a bad place to call iput. Also limit warnings. + - More truncate cleanup + - Truncate cleanup + - Add missing statics in jfs_metapage.c + - fsync fixes + - Clean up symlink code - use page_symlink_inode_operations + - unicode handling cleanup + - cleanup replace UniChar with wchar_t + - Get rid of CDLL_* macros - use list.h instead + - 2.4.11-prex mount problem Call new_inode instead of get_empty_inode + - use kernel min/max macros + - Add MODULE_LICENSE stub for older kernels + - IA64/gcc3 fixes + - Log Manager fixes, introduce __SLEEP_COND macro + - Mark superblock dirty when some errors detected (forcing fsck to be run). + - More robust remounting from r/o to r/w. + - Misc. cleanup add static where appropriate + - small cleanup in jfs_umount_rw + - add MODULE_ stuff + - Set *dropped_lock in alloc_metapage + - Get rid of unused log list + - cleanup jfs_imap.c to remove _OLD_STUFF and _NO_MORE_MOUNT_INODE defines + - Log manager cleanup + - Transaction manager cleanup + - correct memory allocations flags + - Better handling of iterative truncation + - Change continue to break, otherwise we don't re-acquire LAZY_LOCK + + Function and Fixes in drop46 (1.0.8) + - Synclist was being built backwards causing logredo to quit too early + - jfs_compat.h needs to include module.h + - uncomment EXPORTS_NO_SYMBOLS in super.c + - Minor code cleanup + - xtree of zero-truncated file not being logged + - Fix logging on file truncate + - remove unused metapage fields + + Function and Fixes in drop47 (1.0.9) + - Fix data corruption problem when creating files while deleting others. (jitterbug 183) + - Make sure all metadata is written before finalizing the log + - Fix serialization problem in shutdown by setting i_size of directory sooner.(bugzilla # 334) + - JFS should quit whining when special files are marked dirty during read-only mount. + - Must always check rc after DT_GETPAGE + - Add diExtendFS + - Removing defconfig from JFS source - not really needed + + +Please send bugs, comments, cards and letters to linuxjfs@us.ibm.com. + +The JFS mailing list can be subscribed to by using the link labeled "Mail list Subscribe" +at our web page http://oss.software.ibm.com/jfs/. + + + diff -uNr --exclude=CVS ../kernel.org/linux/Documentation/filesystems/jfs.txt linuxppc64_2_4/Documentation/filesystems/jfs.txt --- ../kernel.org/linux/Documentation/filesystems/jfs.txt Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/Documentation/filesystems/jfs.txt Wed Nov 14 10:19:35 2001 @@ -0,0 +1,168 @@ +IBM's Journaled File System (JFS) for Linux version 1.0.9 +Team members +Steve Best sbest@us.ibm.com +Dave Kleikamp shaggy@us.ibm.com +Barry Arndt barndt@us.ibm.com + + +Release November 9, 2001 (version 1.0.9) + +This is our forty-seventh release of IBM's Enterprise JFS technology port to Linux. +Beta 1 was release 0.1.0 on 12/8/2000, Beta 2 was release 0.2.0 on 3/7/2001, +Beta 3 was release 0.3.0 on 4/30/2001, and release 1.0.0 on 6/28/2001. + +The changelog.jfs file contains detailed information of changes done in each source +code drop. + +JFS has a source tree that can be built on 2.2.14 - 2.2.19 and 2.4.3 - 2.4.14 +kernel.org source trees. + +There is a anonymous cvs access available for the JFS tree. The steps below are +what is needed to pull the JFS cvs tree from the oss.software.ibm.com server. + +id anoncvs +password anoncvs + +To checkout 2.2.x series of the JFS files do the following: +CVSROOT should be set to :pserver:anoncvs@oss.software.ibm.com:/usr/cvs/jfs +cvs checkout linux-2.2.12 + +To checkout 2.4.x series of the JFS files do the following: +CVSROOT should be set to :pserver:anoncvs@oss.software.ibm.com:/usr/cvs/jfs +cvs checkout linux24 + +To checkout the JFS utilities do the following: +CVSROOT should be set to :pserver:anoncvs@oss.software.ibm.com:/usr/cvs/jfs +cvs checkout jfsutils + +The cvs tree contains the latest changes being done to JFS. To receive notification +of commits to the cvs tree, please send e-mail to linuxjfs@us.ibm.com stating that +you would like notifications sent to you. + +The jfs-2.4-1.0.9-patch.tar.gz file contains a readme and patch files for different +levels of the 2.4 kernel. Please see the README in the jfs-2.4-1.0.9-patch.tar.gz +file for help on applying the two patch files. + +Similarly jfs-2.2-1.0.5-patch.tar.gz contains a readme and patch files for +different levels of the 2.2 kernel. + +The following files in the kernel source tree have been changed so JFS can be built. +The jfs-2.4-1.0.9.tar.gz source tar ball now contains each of the files below with +the extension of the kernel level it is associated with. As an example, there are now +three Config.in files named Config.in-2.4.0, Config.in-2.4.5, and Config.in-2.4.7. + +Similarly, the jfs-2.2-1.0.5.tar.gz source tar ball contains the files +Config.in-2.2.14, Config.in-2.2.16, and Config.in-2.2.18. + +If you use the tar ball to build JFS you must rename each of the kernel files to the +file names listed below. The standard kernel from www.kernel.org is the source of the +kernel files that are included in the jfs tar files. + +Note: Some of these files below do not need to be changed for the 2.4.x series of the +kernel. + +In sub dir fs Config.in, Makefile, filesystem.c +In sub dir fs/nls Config.in +In sub dir arch/i386 defconfig +In sub dir Documentation Configure.help +In sub dir Documentation/filesystems 00-INDEX +In sub dir include/linux fs.h +In sub dir linux MAINTAINERS +In sub dir linux/kernel/ksyms.c + +Please backup the above files before the JFS patch file is added to the kernel source +tree. There are three new header files in the sub dir include/linux named jfs_fs.h, +jfs_fs_i.h,and jfs_fs_sb.h. All other JFS files are located in the include/linux/jfs +or fs/jfs sub dirs. + +Our development team has used the Linux kernel levels 2.2.14-2.2.19 and +2.4.3 - 2.4.14 kernels with gcc version egcs-2.91.66 19990314/Linux (egcs-1.1.2 release) +for our port so far. A goal of the JFS team is to have JFS run on all architectures +that Linux supports, there is no architecture specific code in JFS. JFS has been run +on the following architectures (x86, PowerPC, Alpha, s/390, ARM) so far. + +To make JFS build, during the "make config" step of building the kernel answer y to +the Prompt for development and/or incomplete code/drivers in the Code maturity level +options section. In the Filesystems section use the m for the answer to +JFS filesystem support (experimental) (CONFIG_JFS_FS) [Y/m/n?] + +Note: If you are use JFS as module on the 2.2.x series of the kernel you must rebuild +the kernel and run that rebuilt kernel for JFS to run. The reason for this change is +that JFS needs to export symbols. + +Build in /usr/src/linux with the command: + + +make modules +make modules_install + +If you rebuild jfs.o after having mounted and unmounted a partition, "modprobe -r jfs" +will unload the old module. + +For the file system debugging messages are being written to /var/log/messages. + +There are two ways to the build the JFS utilities, the first using the jfsprogs.spec +file and the second way is to run make and then make install in the +/jfsutils sub dir. + +To use the jfsprogs.spec located in sub dir jfsutils/SPECS you need to update +the version of JFS in the spec file, also the corresponding jfsutils-x.x.x.tar.gz file +must be in the SOURCES directory of your system. Now it's time to begin the build. First, +change into the directory holding jfsprogs.spec file: + +cd /jfsutils/SPECS + +Next, start the build with a rpm -b command: + +rpm -ba jfsprogs.spec + +The a following the -b option directs RPM to perform all phases of the build process. + + +The second way to build the utilities for JFS (mkfs, xpeek, logredo, xchkdmp, fsck, logdump, xchklog). + +Build in /jfsutils with the command: + +make +make install + +One of results of this build is a program called mkfs.jfs. +To format a JFS partition use the following command. + + mkfs -t jfs device-name + +will result in the specified device being formatted. + + + +JFS TODO list: + +Plans for our near term development items + + - Fix for 2.2 SMP kernel hangs + - get defrag capabilities operational in the FS + - get extendfs capabilities operational in the FS + - test EXTENDFS utility, for growing JFS partitions + - test defrag utility, calls file system to defrag the file system. + - add support for block sizes (512,1024,2048) + - add support for logfile on dedicated partition + + +Longer term work items + + - get access control list functionality operational + - get extended attributes functionality operational + - add quota support + +Please send bugs, comments, cards and letters to linuxjfs@us.ibm.com. + +The JFS mailing list can be subscribed to by using the link labeled "Mail list Subscribe" +at our web page http://oss.software.ibm.com/jfs/. + + + + + + + + diff -uNr --exclude=CVS ../kernel.org/linux/MAINTAINERS linuxppc64_2_4/MAINTAINERS --- ../kernel.org/linux/MAINTAINERS Mon Oct 22 10:37:17 2001 +++ linuxppc64_2_4/MAINTAINERS Fri Oct 26 02:56:02 2001 @@ -801,6 +801,13 @@ W: http://sources.redhat.com/jffs2/ S: Maintained +JFS FILESYSTEM +P: Dave Kleikamp +M: shaggy@austin.ibm.com +L: jfs-discussion@oss.software.ibm.com +W: http://oss.software.ibm.com/developerworks/opensource/jfs/ +S: Supported + JOYSTICK DRIVER P: Vojtech Pavlik M: vojtech@suse.cz @@ -874,6 +881,13 @@ W: http://www.linuxppc.org/ L: linuxppc-dev@lists.linuxppc.org S: Maintained + +LINUX FOR 64BIT POWERPC +P: David Engebretsen +M: engebret@us.ibm.com +W: http://linuxppc64.org +L: linuxppc64-dev@lists.linuxppc.org +S: Supported LOGICAL DISK MANAGER SUPPORT (LDM, Windows 2000/XP Dynamic Disks) P: Richard Russon (FlatCap) diff -uNr --exclude=CVS ../kernel.org/linux/Makefile linuxppc64_2_4/Makefile --- ../kernel.org/linux/Makefile Wed Oct 24 00:21:20 2001 +++ linuxppc64_2_4/Makefile Fri Oct 26 06:05:00 2001 @@ -5,7 +5,8 @@ KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) -ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/) +#ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/) +ARCH := ppc64 KERNELPATH=kernel-$(shell echo $(KERNELRELEASE) | sed -e "s/-//") CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \ @@ -19,7 +20,7 @@ HOSTCC = gcc HOSTCFLAGS = -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -CROSS_COMPILE = +CROSS_COMPILE = /usr/local/ppc64-current3.0/bin/powerpc64-linux- # # Include the make variables (CC, etc...) @@ -151,6 +152,7 @@ DRIVERS-$(CONFIG_SCSI) += drivers/scsi/scsidrv.o DRIVERS-$(CONFIG_FUSION_BOOT) += drivers/message/fusion/fusion.o DRIVERS-$(CONFIG_IEEE1394) += drivers/ieee1394/ieee1394drv.o +DRIVERS-$(CONFIG_PPC_ISERIES) += drivers/iseries/iseries.o ifneq ($(CONFIG_CD_NO_IDESCSI)$(CONFIG_BLK_DEV_IDECD)$(CONFIG_BLK_DEV_SR)$(CONFIG_PARIDE_PCD),) DRIVERS-y += drivers/cdrom/driver.o diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/Makefile linuxppc64_2_4/arch/ppc64/Makefile --- ../kernel.org/linux/arch/ppc64/Makefile Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/Makefile Wed Nov 28 19:06:29 2001 @@ -0,0 +1,88 @@ +# This file is included by the global makefile so that you can add your own +# architecture-specific flags and dependencies. Remember to do have actions +# for "archclean" and "archdep" for cleaning up and making dependencies for +# this architecture +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1994 by Linus Torvalds +# Changes for PPC by Gary Thomas +# Rewritten by Cort Dougan and Paul Mackerras +# Adjusted for PPC64 by Tom Gall +# + +KERNELLOAD =0xc000000000000000 + +ifeq ($(shell uname -m),ppc64) +CHECKS = checks +endif + +LINKFLAGS = -T arch/ppc64/vmlinux.lds -Ttext $(KERNELLOAD) -Bstatic +CFLAGS := $(CFLAGS) -fsigned-char -msoft-float -pipe \ + -Wno-uninitialized -mminimal-toc -fno-builtin +CPP = $(CC) -E $(CFLAGS) + + +# For 64-bit apps, temporarily reduce the size of the address space +# available to user application. This allow us to use strace without +# having to compile a strace64 program. This shouldn't affect anyone +# other than Steve Munroe, Peter Bergner. I will back this hack out +# later... -Peter +#CPPFLAGS := $(CPPFLAGS) -DPPC64_32B_ADDR_SPACE +#CFLAGS := $(CFLAGS) -DPPC64_32B_ADDR_SPACE + + +HEAD := arch/ppc64/kernel/head.o + +ARCH_SUBDIRS = arch/ppc64/kernel arch/ppc64/mm arch/ppc64/lib +SUBDIRS := $(SUBDIRS) $(ARCH_SUBDIRS) +ARCHIVES := arch/ppc64/kernel/kernel.o arch/ppc64/mm/mm.o arch/ppc64/lib/lib.o $(ARCHIVES) +CORE_FILES := arch/ppc64/kernel/kernel.o arch/ppc64/mm/mm.o arch/ppc64/lib/lib.o $(CORE_FILES) + +ifdef CONFIG_XMON +SUBDIRS += arch/ppc64/xmon +CORE_FILES += arch/ppc64/xmon/x.o +endif +ifdef CONFIG_KDB +SUBDIRS += arch/ppc64/kdb +CORE_FILES += arch/ppc64/kdb/kdba.o +endif + +MAKEBOOT = $(MAKE) -C arch/$(ARCH)/boot + +checks: + @$(MAKE) -C arch/$(ARCH)/kernel checks + +ifdef CONFIG_PPC_PSERIES +BOOT_TARGETS = zImage znetboot.initrd zImage.initrd +endif + +ifdef CONFIG_PPC_ISERIES +BOOT_TARGETS = vmlinux.sminitrd vmlinux.initrd vmlinux.sm +endif + +$(BOOT_TARGETS): vmlinux + @$(MAKEBOOT) $@ + +znetboot: vmlinux +ifdef CONFIG_SMP + cp -f vmlinux /tftpboot/vmlinux.smp +else + cp -f vmlinux /tftpboot/vmlinux +endif + @$(MAKEBOOT) $@ + +%_config: arch/ppc64/configs/%_defconfig + rm -f .config arch/ppc64/defconfig + cp -f arch/ppc64/configs/$(@:config=defconfig) arch/ppc64/defconfig + +archclean: + rm -f arch/ppc64/kernel/{ppc_defs.h,checks,mk_defs.s,mk_defs_out.c,mk_defs_tpl} + @$(MAKEBOOT) clean + +archmrproper: + +archdep: + $(MAKEBOOT) fastdep diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/boot/Makefile linuxppc64_2_4/arch/ppc64/boot/Makefile --- ../kernel.org/linux/arch/ppc64/boot/Makefile Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/boot/Makefile Mon Nov 5 11:20:37 2001 @@ -0,0 +1,129 @@ +# Makefile for making ELF bootable images for booting on CHRP +# using Open Firmware. +# +# Geert Uytterhoeven September 1997 +# +# Based on coffboot by Paul Mackerras +# Simplified for ppc64 by Todd Inglett +# +# NOTE: this code is built for 32 bit in ELF32 format even though +# it packages a 64 bit kernel. We do this to simplify the +# bootloader and increase compatibility with OpenFirmware. +# +# To this end we need to define BOOTCC, etc, as the tools +# needed to build the 32 bit image. These are normally HOSTCC, +# but may be a third compiler if, for example, you are cross +# compiling from an intel box. Once the 64bit ppc gcc is +# stable it will probably simply be a compiler switch to +# compile for 32bit mode. +# To make it easier to setup a cross compiler, +# CROSS32_COMPILE is setup as a prefix just like CROSS_COMPILE +# in the toplevel makefile. + +CROSS32_COMPILE = +#CROSS32_COMPILE = /usr/local/ppc/bin/powerpc-linux- + +BOOTCC = $(CROSS32_COMPILE)gcc +BOOTCFLAGS = $(HOSTCFLAGS) -I$(HPATH) +BOOTLD = $(CROSS32_COMPILE)ld +BOOTAS = $(CROSS32_COMPILE)as +BOOTAFLAGS = -D__ASSEMBLY__ $(HOSTCFLAGS) + +.c.o: + $(BOOTCC) $(BOOTCFLAGS) -c -o $*.o $< +.S.o: + $(BOOTCC) $(BOOTAFLAGS) -traditional -c -o $*.o $< + +CFLAGS = $(CPPFLAGS) -O -fno-builtin -DSTDC_HEADERS +LD_ARGS = -Ttext 0x00400000 -e _start + +OBJS = crt0.o start.o main.o zlib.o image.o imagesize.o +#LIBS = $(TOPDIR)/lib/lib.a +LIBS = + +ifeq ($(CONFIG_SMP),y) +TFTPIMAGE=/tftpboot/zImage.chrp.smp +else +TFTPIMAGE=/tftpboot/zImage.chrp +endif + + +ifeq ($(CONFIG_PPC_ISERIES),y) +all: vmlinux.sm +else +all: $(TOPDIR)/zImage +endif + + +znetboot: zImage + cp zImage $(TFTPIMAGE) + + +ifeq ($(CONFIG_PPC_ISERIES),y) + +addSystemMap: addSystemMap.c + $(HOSTCC) $(HOSTCFLAGS) -o addSystemMap addSystemMap.c + +vmlinux.sm: $(TOPDIR)/vmlinux addSystemMap + ./addSystemMap $(TOPDIR)/System.map $(TOPDIR)/vmlinux vmlinux.sm + + +addRamDisk: addRamDisk.c + $(HOSTCC) $(HOSTCFLAGS) -o addRamDisk addRamDisk.c + +vmlinux.initrd: $(TOPDIR)/vmlinux addRamDisk ramdisk.image.gz $(TOPDIR)/System.map + ./addRamDisk ramdisk.image.gz $(TOPDIR)/System.map $(TOPDIR)/vmlinux vmlinux.initrd + +vmlinux.sminitrd: vmlinux.sm addRamDisk ramdisk.image.gz $(TOPDIR)/System.map + ./addRamDisk ramdisk.image.gz $(TOPDIR)/System.map vmlinux.sm vmlinux.sminitrd + +endif + + +znetboot.initrd: zImage.initrd + cp zImage.initrd $(TFTPIMAGE) + +floppy: zImage + mcopy zImage a:zImage + +piggyback: piggyback.c + $(HOSTCC) $(HOSTCFLAGS) -DKERNELBASE=$(KERNELBASE) -o piggyback piggyback.c + +addnote: addnote.c + $(HOSTCC) $(HOSTCFLAGS) -o addnote addnote.c + +image.o: piggyback vmlinux.gz + ./piggyback image < vmlinux.gz | $(BOOTAS) -o image.o + +sysmap.o: piggyback ../../../System.map + ./piggyback sysmap < ../../../System.map | $(BOOTAS) -o sysmap.o + +initrd.o: ramdisk.image.gz piggyback + ./piggyback initrd < ramdisk.image.gz | $(BOOTAS) -o initrd.o + +zImage: $(OBJS) no_initrd.o addnote + $(BOOTLD) $(LD_ARGS) -T zImage.lds -o $@ $(OBJS) no_initrd.o $(LIBS) + ./addnote $@ + +zImage.initrd: $(OBJS) initrd.o addnote + $(BOOTLD) $(LD_ARGS) -T zImage.lds -o $@ $(OBJS) initrd.o $(LIBS) + ./addnote $@ + + +vmlinux.gz: $(TOPDIR)/vmlinux + $(OBJCOPY) -S -O binary $(TOPDIR)/vmlinux vmlinux + ls -l vmlinux | awk '{printf "/* generated -- do not edit! */\nint uncompressed_size = %d;\n", $$5}' > imagesize.c + $(CROSS_COMPILE)nm -n $(TOPDIR)/vmlinux | tail -1 | awk '{printf "long vmlinux_end = 0x%s;\n", substr($$1,8)}' >> imagesize.c + gzip -vf9 vmlinux + +imagesize.c: vmlinux.gz + +clean: + rm -f piggyback note addnote $(OBJS) zImage zImage.initrd vmlinux.gz no_initrd.o imagesize.c addSystemMap vmlinux.sm addRamDisk vmlinux.initrd vmlinux.sminitrd + +fastdep: + $(TOPDIR)/scripts/mkdep *.[Sch] > .depend + +dep: + $(CPP) $(CPPFLAGS) -M *.S *.c > .depend + diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/boot/addRamDisk.c linuxppc64_2_4/arch/ppc64/boot/addRamDisk.c --- ../kernel.org/linux/arch/ppc64/boot/addRamDisk.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/boot/addRamDisk.c Mon Oct 8 21:26:21 2001 @@ -0,0 +1,324 @@ +#include +#include +#include +#include +#include +#include +#include + +#define ElfHeaderSize (64 * 1024) +#define ElfPages (ElfHeaderSize / 4096) +#define KERNELBASE (0xc000000000000000) + +void get4k(FILE *file, char *buf ) +{ + unsigned j; + unsigned num = fread(buf, 1, 4096, file); + for ( j=num; j<4096; ++j ) + buf[j] = 0; +} + +void put4k(FILE *file, char *buf ) +{ + fwrite(buf, 1, 4096, file); +} + +void death(const char *msg, FILE *fdesc, const char *fname) +{ + printf(msg); + fclose(fdesc); + unlink(fname); + exit(1); +} + +int main(int argc, char **argv) +{ + char inbuf[4096]; + FILE *ramDisk = NULL; + FILE *sysmap = NULL; + FILE *inputVmlinux = NULL; + FILE *outputVmlinux = NULL; + + unsigned i = 0; + unsigned long ramFileLen = 0; + unsigned long ramLen = 0; + unsigned long roundR = 0; + + unsigned long sysmapFileLen = 0; + unsigned long sysmapLen = 0; + unsigned long sysmapPages = 0; + char* ptr_end = NULL; + unsigned long offset_end = 0; + + unsigned long kernelLen = 0; + unsigned long actualKernelLen = 0; + unsigned long round = 0; + unsigned long roundedKernelLen = 0; + unsigned long ramStartOffs = 0; + unsigned long ramPages = 0; + unsigned long roundedKernelPages = 0; + unsigned long hvReleaseData = 0; + u_int32_t eyeCatcher = 0xc8a5d9c4; + unsigned long naca = 0; + unsigned long xRamDisk = 0; + unsigned long xRamDiskSize = 0; + long padPages = 0; + + + if (argc < 2) + { + printf("Name of RAM disk file missing.\n"); + exit(1); + } + + if (argc < 3) + { + printf("Name of System Map input file is missing.\n"); + exit(1); + } + + if (argc < 4) + { + printf("Name of vmlinux file missing.\n"); + exit(1); + } + + if (argc < 5) + { + printf("Name of vmlinux output file missing.\n"); + exit(1); + } + + + ramDisk = fopen(argv[1], "r"); + if ( ! ramDisk ) + { + printf("RAM disk file \"%s\" failed to open.\n", argv[1]); + exit(1); + } + + sysmap = fopen(argv[2], "r"); + if ( ! sysmap ) + { + printf("System Map file \"%s\" failed to open.\n", argv[2]); + exit(1); + } + + inputVmlinux = fopen(argv[3], "r"); + if ( ! inputVmlinux ) + { + printf("vmlinux file \"%s\" failed to open.\n", argv[3]); + exit(1); + } + + outputVmlinux = fopen(argv[4], "w+"); + if ( ! outputVmlinux ) + { + printf("output vmlinux file \"%s\" failed to open.\n", argv[4]); + exit(1); + } + + + + /* Input Vmlinux file */ + fseek(inputVmlinux, 0, SEEK_END); + kernelLen = ftell(inputVmlinux); + fseek(inputVmlinux, 0, SEEK_SET); + printf("kernel file size = %d\n", kernelLen); + if ( kernelLen == 0 ) + { + printf("You must have a linux kernel specified as argv[3]\n"); + exit(1); + } + + actualKernelLen = kernelLen - ElfHeaderSize; + + printf("actual kernel length (minus ELF header) = %d\n", actualKernelLen); + + round = actualKernelLen % 4096; + roundedKernelLen = actualKernelLen; + if ( round ) + roundedKernelLen += (4096 - round); + printf("Vmlinux length rounded up to a 4k multiple = %ld/0x%lx \n", roundedKernelLen, roundedKernelLen); + roundedKernelPages = roundedKernelLen / 4096; + printf("Vmlinux pages to copy = %ld/0x%lx \n", roundedKernelPages, roundedKernelPages); + + + + /* Input System Map file */ + /* (needs to be processed simply to determine if we need to add pad pages due to the static variables not being included in the vmlinux) */ + fseek(sysmap, 0, SEEK_END); + sysmapFileLen = ftell(sysmap); + fseek(sysmap, 0, SEEK_SET); + printf("%s file size = %ld/0x%lx \n", argv[2], sysmapFileLen, sysmapFileLen); + + sysmapLen = sysmapFileLen; + + roundR = 4096 - (sysmapLen % 4096); + if (roundR) + { + printf("Rounding System Map file up to a multiple of 4096, adding %ld/0x%lx \n", roundR, roundR); + sysmapLen += roundR; + } + printf("Rounded System Map size is %ld/0x%lx \n", sysmapLen, sysmapLen); + + /* Process the Sysmap file to determine where _end is */ + sysmapPages = sysmapLen / 4096; + for (i=0; i +#include +#include +#include +#include +#include + +void xlate( char * inb, char * trb, unsigned len ) +{ + unsigned i; + for ( i=0; i> 4; + char c2 = c & 0xf; + if ( c1 > 9 ) + c1 = c1 + 'A' - 10; + else + c1 = c1 + '0'; + if ( c2 > 9 ) + c2 = c2 + 'A' - 10; + else + c2 = c2 + '0'; + *trb++ = c1; + *trb++ = c2; + } + *trb = 0; +} + +#define ElfHeaderSize (64 * 1024) +#define ElfPages (ElfHeaderSize / 4096) + +void get4k( /*istream *inf*/FILE *file, char *buf ) +{ + unsigned j; + unsigned num = fread(buf, 1, 4096, file); + for ( j=num; j<4096; ++j ) + buf[j] = 0; +} + +void put4k( /*ostream *outf*/FILE *file, char *buf ) +{ + fwrite(buf, 1, 4096, file); +} + +int main(int argc, char **argv) +{ + char inbuf[4096]; + FILE *sysmap = NULL; + char* ptr_end = NULL; + FILE *inputVmlinux = NULL; + FILE *outputVmlinux = NULL; + long i = 0; + unsigned long sysmapFileLen = 0; + unsigned long sysmapLen = 0; + unsigned long roundR = 0; + unsigned long kernelLen = 0; + unsigned long actualKernelLen = 0; + unsigned long round = 0; + unsigned long roundedKernelLen = 0; + unsigned long sysmapStartOffs = 0; + unsigned long sysmapPages = 0; + unsigned long roundedKernelPages = 0; + long padPages = 0; + if ( argc < 2 ) + { + printf("Name of System Map file missing.\n"); + exit(1); + } + + if ( argc < 3 ) + { + printf("Name of vmlinux file missing.\n"); + exit(1); + } + + if ( argc < 4 ) + { + printf("Name of vmlinux output file missing.\n"); + exit(1); + } + + sysmap = fopen(argv[1], "r"); + if ( ! sysmap ) + { + printf("System Map file \"%s\" failed to open.\n", argv[1]); + exit(1); + } + inputVmlinux = fopen(argv[2], "r"); + if ( ! inputVmlinux ) + { + printf("vmlinux file \"%s\" failed to open.\n", argv[2]); + exit(1); + } + outputVmlinux = fopen(argv[3], "w"); + if ( ! outputVmlinux ) + { + printf("output vmlinux file \"%s\" failed to open.\n", argv[3]); + exit(1); + } + + + + fseek(inputVmlinux, 0, SEEK_END); + kernelLen = ftell(inputVmlinux); + fseek(inputVmlinux, 0, SEEK_SET); + printf("kernel file size = %ld\n", kernelLen); + if ( kernelLen == 0 ) + { + printf("You must have a linux kernel specified as argv[2]\n"); + exit(1); + } + + + actualKernelLen = kernelLen - ElfHeaderSize; + + printf("actual kernel length (minus ELF header) = %ld/%lxx \n", actualKernelLen, actualKernelLen); + + round = actualKernelLen % 4096; + roundedKernelLen = actualKernelLen; + if ( round ) + roundedKernelLen += (4096 - round); + + printf("Kernel length rounded up to a 4k multiple = %ld/%lxx \n", roundedKernelLen, roundedKernelLen); + roundedKernelPages = roundedKernelLen / 4096; + printf("Kernel pages to copy = %ld/%lxx\n", roundedKernelPages, roundedKernelPages); + + + + /* Sysmap file */ + fseek(sysmap, 0, SEEK_END); + sysmapFileLen = ftell(sysmap); + fseek(sysmap, 0, SEEK_SET); + printf("%s file size = %ld\n", argv[1], sysmapFileLen); + + sysmapLen = sysmapFileLen; + + roundR = 4096 - (sysmapLen % 4096); + if (roundR) + { + printf("Rounding System Map file up to a multiple of 4096, adding %ld\n", roundR); + sysmapLen += roundR; + } + printf("Rounded System Map size is %ld\n", sysmapLen); + + /* Process the Sysmap file to determine the true end of the kernel */ + sysmapPages = sysmapLen / 4096; + printf("System map pages to copy = %ld\n", sysmapPages); + for (i=0; i +#include +#include +#include + +char arch[] = "PowerPC"; + +#define N_DESCR 6 +unsigned int descr[N_DESCR] = { + 0xffffffff, /* real-mode = true */ + 0x00c00000, /* real-base, i.e. where we expect OF to be */ + 0xffffffff, /* real-size */ + 0xffffffff, /* virt-base */ + 0xffffffff, /* virt-size */ + 0x4000, /* load-base */ +}; + +unsigned char buf[512]; + +#define GET_16BE(off) ((buf[off] << 8) + (buf[(off)+1])) +#define GET_32BE(off) ((GET_16BE(off) << 16) + GET_16BE((off)+2)) + +#define PUT_16BE(off, v) (buf[off] = ((v) >> 8) & 0xff, \ + buf[(off) + 1] = (v) & 0xff) +#define PUT_32BE(off, v) (PUT_16BE((off), (v) >> 16), \ + PUT_16BE((off) + 2, (v))) + +/* Structure of an ELF file */ +#define E_IDENT 0 /* ELF header */ +#define E_PHOFF 28 +#define E_PHENTSIZE 42 +#define E_PHNUM 44 +#define E_HSIZE 52 /* size of ELF header */ + +#define EI_MAGIC 0 /* offsets in E_IDENT area */ +#define EI_CLASS 4 +#define EI_DATA 5 + +#define PH_TYPE 0 /* ELF program header */ +#define PH_OFFSET 4 +#define PH_FILESZ 16 +#define PH_HSIZE 32 /* size of program header */ + +#define PT_NOTE 4 /* Program header type = note */ + +#define ELFCLASS32 1 +#define ELFDATA2MSB 2 + +unsigned char elf_magic[4] = { 0x7f, 'E', 'L', 'F' }; + +int +main(int ac, char **av) +{ + int fd, n, i; + int ph, ps, np; + int nnote, ns; + + if (ac != 2) { + fprintf(stderr, "Usage: %s elf-file\n", av[0]); + exit(1); + } + fd = open(av[1], O_RDWR); + if (fd < 0) { + perror(av[1]); + exit(1); + } + + nnote = strlen(arch) + 1 + (N_DESCR + 3) * 4; + + n = read(fd, buf, sizeof(buf)); + if (n < 0) { + perror("read"); + exit(1); + } + + if (n < E_HSIZE || memcmp(&buf[E_IDENT+EI_MAGIC], elf_magic, 4) != 0) + goto notelf; + + if (buf[E_IDENT+EI_CLASS] != ELFCLASS32 + || buf[E_IDENT+EI_DATA] != ELFDATA2MSB) { + fprintf(stderr, "%s is not a big-endian 32-bit ELF image\n", + av[1]); + exit(1); + } + + ph = GET_32BE(E_PHOFF); + ps = GET_16BE(E_PHENTSIZE); + np = GET_16BE(E_PHNUM); + if (ph < E_HSIZE || ps < PH_HSIZE || np < 1) + goto notelf; + if (ph + (np + 1) * ps + nnote > n) + goto nospace; + + for (i = 0; i < np; ++i) { + if (GET_32BE(ph + PH_TYPE) == PT_NOTE) { + fprintf(stderr, "%s already has a note entry\n", + av[1]); + exit(0); + } + ph += ps; + } + + /* XXX check that the area we want to use is all zeroes */ + for (i = 0; i < ps + nnote; ++i) + if (buf[ph + i] != 0) + goto nospace; + + /* fill in the program header entry */ + ns = ph + ps; + PUT_32BE(ph + PH_TYPE, PT_NOTE); + PUT_32BE(ph + PH_OFFSET, ns); + PUT_32BE(ph + PH_FILESZ, nnote); + + /* fill in the note area we point to */ + /* XXX we should probably make this a proper section */ + PUT_32BE(ns, strlen(arch) + 1); + PUT_32BE(ns + 4, N_DESCR * 4); + PUT_32BE(ns + 8, 0x1275); + strcpy(&buf[ns + 12], arch); + ns += 12 + strlen(arch) + 1; + for (i = 0; i < N_DESCR; ++i) + PUT_32BE(ns + i * 4, descr[i]); + + /* Update the number of program headers */ + PUT_16BE(E_PHNUM, np + 1); + + /* write back */ + lseek(fd, (long) 0, SEEK_SET); + i = write(fd, buf, n); + if (i < 0) { + perror("write"); + exit(1); + } + if (i < n) { + fprintf(stderr, "%s: write truncated\n", av[1]); + exit(1); + } + + exit(0); + + notelf: + fprintf(stderr, "%s does not appear to be an ELF file\n", av[0]); + exit(1); + + nospace: + fprintf(stderr, "sorry, I can't find space in %s to put the note\n", + av[0]); + exit(1); +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/boot/crt0.S linuxppc64_2_4/arch/ppc64/boot/crt0.S --- ../kernel.org/linux/arch/ppc64/boot/crt0.S Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/boot/crt0.S Thu Sep 13 14:13:35 2001 @@ -0,0 +1,265 @@ +/* + * Copyright (C) Paul Mackerras 1997. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * NOTE: this code runs in 32 bit mode and is packaged as ELF32. + */ + .text + .globl _start +_start: + lis 9,_start@h + lis 8,_etext@ha + addi 8,8,_etext@l +1: dcbf 0,9 + icbi 0,9 + addi 9,9,0x20 + cmplwi 0,9,8 + blt 1b + sync + isync + + ## Clear out the BSS as per ANSI C requirements + + lis 7,_end@ha + addi 7,7,_end@l # r7 = &_end + lis 8,__bss_start@ha # + addi 8,8,__bss_start@l # r8 = &_bss_start + + ## Determine how large an area, in number of words, to clear + + subf 7,8,7 # r7 = &_end - &_bss_start + 1 + addi 7,7,3 # r7 += 3 + srwi. 7,7,2 # r7 = size in words. + beq 3f # If the size is zero, do not bother + addi 8,8,-4 # r8 -= 4 + mtctr 7 # SPRN_CTR = number of words to clear + li 0,0 # r0 = 0 +2: stwu 0,4(8) # Clear out a word + bdnz 2b # If we are not done yet, keep clearing +3: + + + b start + + + +/* + * Flush the dcache and invalidate the icache for a range of addresses. + * + * flush_cache(addr, len) + */ + .global flush_cache +flush_cache: + addi 4,4,0x1f /* len = (len + 0x1f) / 0x20 */ + rlwinm. 4,4,27,5,31 + mtctr 4 + beqlr +1: dcbf 0,3 + icbi 0,3 + addi 3,3,0x20 + bdnz 1b + sync + isync + blr + + +#define r0 0 +#define r3 3 +#define r4 4 +#define r5 5 +#define r6 6 +#define r7 7 +#define r8 8 + + .globl strcpy +strcpy: + addi r5,r3,-1 + addi r4,r4,-1 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + stbu r0,1(r5) + bne 1b + blr + + .globl strncpy +strncpy: + cmpwi 0,r5,0 + beqlr + mtctr r5 + addi r6,r3,-1 + addi r4,r4,-1 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + stbu r0,1(r6) + bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */ + blr + + .globl strcat +strcat: + addi r5,r3,-1 + addi r4,r4,-1 +1: lbzu r0,1(r5) + cmpwi 0,r0,0 + bne 1b + addi r5,r5,-1 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + stbu r0,1(r5) + bne 1b + blr + + .globl strcmp +strcmp: + addi r5,r3,-1 + addi r4,r4,-1 +1: lbzu r3,1(r5) + cmpwi 1,r3,0 + lbzu r0,1(r4) + subf. r3,r0,r3 + beqlr 1 + beq 1b + blr + + .globl strlen +strlen: + addi r4,r3,-1 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + bne 1b + subf r3,r3,r4 + blr + + .globl memset +memset: + rlwimi r4,r4,8,16,23 + rlwimi r4,r4,16,0,15 + addi r6,r3,-4 + cmplwi 0,r5,4 + blt 7f + stwu r4,4(r6) + beqlr + andi. r0,r6,3 + add r5,r0,r5 + subf r6,r0,r6 + rlwinm r0,r5,32-2,2,31 + mtctr r0 + bdz 6f +1: stwu r4,4(r6) + bdnz 1b +6: andi. r5,r5,3 +7: cmpwi 0,r5,0 + beqlr + mtctr r5 + addi r6,r6,3 +8: stbu r4,1(r6) + bdnz 8b + blr + + .globl bcopy +bcopy: + mr r6,r3 + mr r3,r4 + mr r4,r6 + b memcpy + + .globl memmove +memmove: + cmplw 0,r3,r4 + bgt backwards_memcpy + /* fall through */ + + .globl memcpy +memcpy: + rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */ + addi r6,r3,-4 + addi r4,r4,-4 + beq 2f /* if less than 8 bytes to do */ + andi. r0,r6,3 /* get dest word aligned */ + mtctr r7 + bne 5f +1: lwz r7,4(r4) + lwzu r8,8(r4) + stw r7,4(r6) + stwu r8,8(r6) + bdnz 1b + andi. r5,r5,7 +2: cmplwi 0,r5,4 + blt 3f + lwzu r0,4(r4) + addi r5,r5,-4 + stwu r0,4(r6) +3: cmpwi 0,r5,0 + beqlr + mtctr r5 + addi r4,r4,3 + addi r6,r6,3 +4: lbzu r0,1(r4) + stbu r0,1(r6) + bdnz 4b + blr +5: subfic r0,r0,4 + mtctr r0 +6: lbz r7,4(r4) + addi r4,r4,1 + stb r7,4(r6) + addi r6,r6,1 + bdnz 6b + subf r5,r0,r5 + rlwinm. r7,r5,32-3,3,31 + beq 2b + mtctr r7 + b 1b + + .globl backwards_memcpy +backwards_memcpy: + rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */ + add r6,r3,r5 + add r4,r4,r5 + beq 2f + andi. r0,r6,3 + mtctr r7 + bne 5f +1: lwz r7,-4(r4) + lwzu r8,-8(r4) + stw r7,-4(r6) + stwu r8,-8(r6) + bdnz 1b + andi. r5,r5,7 +2: cmplwi 0,r5,4 + blt 3f + lwzu r0,-4(r4) + subi r5,r5,4 + stwu r0,-4(r6) +3: cmpwi 0,r5,0 + beqlr + mtctr r5 +4: lbzu r0,-1(r4) + stbu r0,-1(r6) + bdnz 4b + blr +5: mtctr r0 +6: lbzu r7,-1(r4) + stbu r7,-1(r6) + bdnz 6b + subf r5,r0,r5 + rlwinm. r7,r5,32-3,3,31 + beq 2b + mtctr r7 + b 1b + + .globl memcmp +memcmp: + cmpwi 0,r5,0 + blelr + mtctr r5 + addi r6,r3,-1 + addi r4,r4,-1 +1: lbzu r3,1(r6) + lbzu r0,1(r4) + subf. r3,r0,r3 + bdnzt 2,1b + blr diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/boot/main.c linuxppc64_2_4/arch/ppc64/boot/main.c --- ../kernel.org/linux/arch/ppc64/boot/main.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/boot/main.c Wed Oct 31 08:32:02 2001 @@ -0,0 +1,292 @@ +/* + * Copyright (C) Paul Mackerras 1997. + * + * Updates for PPC64 by Todd Inglett & Dave Engebretsen. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#define __KERNEL__ +#include "zlib.h" +#include +#include +#include + +void memmove(void *dst, void *im, int len); + +extern void *finddevice(const char *); +extern int getprop(void *, const char *, void *, int); +extern void printf(const char *fmt, ...); +extern int sprintf(char *buf, const char *fmt, ...); +void gunzip(void *, int, unsigned char *, int *); +void *claim(unsigned int, unsigned int, unsigned int); +void flush_cache(void *, int); +void pause(void); +static struct bi_record *make_bi_recs(unsigned long); + +#define RAM_START 0x00000000 +#define RAM_END (64<<20) + +#define BOOT_START ((unsigned long)_start) +#define BOOT_END ((unsigned long)_end) + +/* Value picked to match that used by yaboot */ +#define PROG_START 0x01400000 + +char *avail_ram; +char *begin_avail, *end_avail; +char *avail_high; +unsigned int heap_use; +unsigned int heap_max; +unsigned long initrd_start = 0; +unsigned long initrd_size = 0; + +extern char _end[]; +extern char image_data[]; +extern int image_len; +extern char initrd_data[]; +extern int initrd_len; +extern char sysmap_data[]; +extern int sysmap_len; +extern int uncompressed_size; +extern long vmlinux_end; + +static char scratch[128<<10]; /* 128kB of scratch space for gunzip */ + +typedef void (*kernel_entry_t)( unsigned long, + unsigned long, + void *, + struct bi_record *); + +void +chrpboot(unsigned long a1, unsigned long a2, void *prom) +{ + unsigned len; + void *dst = (void *)-1; + unsigned long claim_addr; + unsigned char *im; + extern char _start; + struct bi_record *bi_recs; + kernel_entry_t kernel_entry; + + printf("chrpboot starting: loaded at 0x%x\n\r", (unsigned)&_start); + + if (initrd_len) { + initrd_size = initrd_len; + initrd_start = (RAM_END - initrd_size) & ~0xFFF; + a1 = a2 = 0; + claim(initrd_start, RAM_END - initrd_start, 0); + printf("initial ramdisk moving 0x%lx <- 0x%lx (%lx bytes)\n\r", + initrd_start, (unsigned long)initrd_data, initrd_size); + memcpy((void *)initrd_start, (void *)initrd_data, initrd_size); + } + + im = image_data; + len = image_len; + uncompressed_size = PAGE_ALIGN(uncompressed_size); + + for(claim_addr = PROG_START; + claim_addr <= PROG_START * 8; + claim_addr += 0x100000) { + printf(" trying: 0x%08lx\n\r", claim_addr); + dst = claim(claim_addr, uncompressed_size, 0); + if (dst != (void *)-1) break; + } + if (dst == (void *)-1) { + printf("claim error, can't allocate kernel memory\n\r"); + return; + } + + if (im[0] == 0x1f && im[1] == 0x8b) { + avail_ram = scratch; + begin_avail = avail_high = avail_ram; + end_avail = scratch + sizeof(scratch); + printf("gunzipping (0x%x <- 0x%x:0x%0x)...", + (unsigned)dst, (unsigned)im, (unsigned)im+len); + gunzip(dst, uncompressed_size, im, &len); + printf("done %u bytes\n\r", len); + printf("%u bytes of heap consumed, max in use %u\n\r", + (unsigned)(avail_high - begin_avail), heap_max); + } else { + memmove(dst, im, len); + } + + flush_cache(dst, len); + + bi_recs = make_bi_recs((unsigned long)dst + vmlinux_end); + + kernel_entry = (kernel_entry_t)dst; + printf( "kernel:\n\r" + " entry addr = 0x%lx\n\r" + " a1 = 0x%lx,\n\r" + " a2 = 0x%lx,\n\r" + " prom = 0x%lx,\n\r" + " bi_recs = 0x%lx,\n\r", + (unsigned long)kernel_entry, a1, a2, + (unsigned long)prom, (unsigned long)bi_recs); + + kernel_entry( a1, a2, prom, bi_recs ); + + printf("returned?\n\r"); + + pause(); +} + +static struct bi_record * +make_bi_recs(unsigned long addr) +{ + struct bi_record *bi_recs; + struct bi_record *rec; + + bi_recs = rec = bi_rec_init(addr); + + rec = bi_rec_alloc(rec, 2); + rec->tag = BI_FIRST; + /* rec->data[0] = ...; # Written below before return */ + /* rec->data[1] = ...; # Written below before return */ + + rec = bi_rec_alloc_bytes(rec, strlen("chrpboot")+1); + rec->tag = BI_BOOTLOADER_ID; + sprintf( (char *)rec->data, "chrpboot"); + + rec = bi_rec_alloc(rec, 2); + rec->tag = BI_MACHTYPE; + rec->data[0] = _MACH_pSeries; + rec->data[1] = 1; + + if ( initrd_size > 0 ) { + rec = bi_rec_alloc(rec, 2); + rec->tag = BI_INITRD; + rec->data[0] = initrd_start; + rec->data[1] = initrd_size; + } + +#if 0 + if ( sysmap_len > 0 ) { + rec = bi_rec_alloc(rec, 2); + rec->tag = BI_SYSMAP; + rec->data[0] = (unsigned long)sysmap_data; + rec->data[1] = sysmap_len; + } +#endif + + rec = bi_rec_alloc(rec, 1); + rec->tag = BI_LAST; + rec->data[0] = (bi_rec_field)bi_recs; + + /* Save the _end_ address of the bi_rec's in the first bi_rec + * data field for easy access by the kernel. + */ + bi_recs->data[0] = (bi_rec_field)rec; + bi_recs->data[1] = (bi_rec_field)rec + rec->size - (bi_rec_field)bi_recs; + + return bi_recs; +} + +struct memchunk { + unsigned int size; + unsigned int pad; + struct memchunk *next; +}; + +static struct memchunk *freechunks; + +void *zalloc(void *x, unsigned items, unsigned size) +{ + void *p; + struct memchunk **mpp, *mp; + + size *= items; + size = _ALIGN(size, sizeof(struct memchunk)); + heap_use += size; + if (heap_use > heap_max) + heap_max = heap_use; + for (mpp = &freechunks; (mp = *mpp) != 0; mpp = &mp->next) { + if (mp->size == size) { + *mpp = mp->next; + return mp; + } + } + p = avail_ram; + avail_ram += size; + if (avail_ram > avail_high) + avail_high = avail_ram; + if (avail_ram > end_avail) { + printf("oops... out of memory\n\r"); + pause(); + } + return p; +} + +void zfree(void *x, void *addr, unsigned nb) +{ + struct memchunk *mp = addr; + + nb = _ALIGN(nb, sizeof(struct memchunk)); + heap_use -= nb; + if (avail_ram == addr + nb) { + avail_ram = addr; + return; + } + mp->size = nb; + mp->next = freechunks; + freechunks = mp; +} + +#define HEAD_CRC 2 +#define EXTRA_FIELD 4 +#define ORIG_NAME 8 +#define COMMENT 0x10 +#define RESERVED 0xe0 + +#define DEFLATED 8 + +void gunzip(void *dst, int dstlen, unsigned char *src, int *lenp) +{ + z_stream s; + int r, i, flags; + + /* skip header */ + i = 10; + flags = src[3]; + if (src[2] != DEFLATED || (flags & RESERVED) != 0) { + printf("bad gzipped data\n\r"); + exit(); + } + if ((flags & EXTRA_FIELD) != 0) + i = 12 + src[10] + (src[11] << 8); + if ((flags & ORIG_NAME) != 0) + while (src[i++] != 0) + ; + if ((flags & COMMENT) != 0) + while (src[i++] != 0) + ; + if ((flags & HEAD_CRC) != 0) + i += 2; + if (i >= *lenp) { + printf("gunzip: ran out of data in header\n\r"); + exit(); + } + + s.zalloc = zalloc; + s.zfree = zfree; + r = inflateInit2(&s, -MAX_WBITS); + if (r != Z_OK) { + printf("inflateInit2 returned %d\n\r", r); + exit(); + } + s.next_in = src + i; + s.avail_in = *lenp - i; + s.next_out = dst; + s.avail_out = dstlen; + r = inflate(&s, Z_FINISH); + if (r != Z_OK && r != Z_STREAM_END) { + printf("inflate returned %d msg: %s\n\r", r, s.msg); + exit(); + } + *lenp = s.next_out - (unsigned char *) dst; + inflateEnd(&s); +} + diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/boot/mknote.c linuxppc64_2_4/arch/ppc64/boot/mknote.c --- ../kernel.org/linux/arch/ppc64/boot/mknote.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/boot/mknote.c Fri May 4 17:13:58 2001 @@ -0,0 +1,43 @@ +/* + * Copyright (C) Cort Dougan 1999. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Generate a note section as per the CHRP specification. + * + */ + +#include + +#define PL(x) printf("%c%c%c%c", ((x)>>24)&0xff, ((x)>>16)&0xff, ((x)>>8)&0xff, (x)&0xff ); + +int main(void) +{ +/* header */ + /* namesz */ + PL(strlen("PowerPC")+1); + /* descrsz */ + PL(6*4); + /* type */ + PL(0x1275); + /* name */ + printf("PowerPC"); printf("%c", 0); + +/* descriptor */ + /* real-mode */ + PL(0xffffffff); + /* real-base */ + PL(0x00c00000); + /* real-size */ + PL(0xffffffff); + /* virt-base */ + PL(0xffffffff); + /* virt-size */ + PL(0xffffffff); + /* load-base */ + PL(0x4000); + return 0; +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/boot/no_initrd.c linuxppc64_2_4/arch/ppc64/boot/no_initrd.c --- ../kernel.org/linux/arch/ppc64/boot/no_initrd.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/boot/no_initrd.c Fri May 4 17:13:58 2001 @@ -0,0 +1,2 @@ +char initrd_data[1]; +int initrd_len = 0; diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/boot/piggyback.c linuxppc64_2_4/arch/ppc64/boot/piggyback.c --- ../kernel.org/linux/arch/ppc64/boot/piggyback.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/boot/piggyback.c Fri May 4 17:13:58 2001 @@ -0,0 +1,74 @@ +/* + * Copyright 2001 IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include + +extern long ce_exec_config[]; + +int main(int argc, char *argv[]) +{ + int i, cnt, pos, len; + unsigned int cksum, val; + unsigned char *lp; + unsigned char buf[8192]; + if (argc != 2) + { + fprintf(stderr, "usage: %s name out-file\n", + argv[0]); + exit(1); + } + fprintf(stdout, "#\n"); + fprintf(stdout, "# Miscellaneous data structures:\n"); + fprintf(stdout, "# WARNING - this file is automatically generated!\n"); + fprintf(stdout, "#\n"); + fprintf(stdout, "\n"); + fprintf(stdout, "\t.data\n"); + fprintf(stdout, "\t.globl %s_data\n", argv[1]); + fprintf(stdout, "%s_data:\n", argv[1]); + pos = 0; + cksum = 0; + while ((len = read(0, buf, sizeof(buf))) > 0) + { + cnt = 0; + lp = (unsigned char *)buf; + len = (len + 3) & ~3; /* Round up to longwords */ + for (i = 0; i < len; i += 4) + { + if (cnt == 0) + { + fprintf(stdout, "\t.long\t"); + } + fprintf(stdout, "0x%02X%02X%02X%02X", lp[0], lp[1], lp[2], lp[3]); + val = *(unsigned long *)lp; + cksum ^= val; + lp += 4; + if (++cnt == 4) + { + cnt = 0; + fprintf(stdout, " # %x \n", pos+i-12); + fflush(stdout); + } else + { + fprintf(stdout, ","); + } + } + if (cnt) + { + fprintf(stdout, "0\n"); + } + pos += len; + } + fprintf(stdout, "\t.globl %s_len\n", argv[1]); + fprintf(stdout, "%s_len:\t.long\t0x%x\n", argv[1], pos); + fflush(stdout); + fclose(stdout); + fprintf(stderr, "cksum = %x\n", cksum); + exit(0); +} + diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/boot/start.c linuxppc64_2_4/arch/ppc64/boot/start.c --- ../kernel.org/linux/arch/ppc64/boot/start.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/boot/start.c Thu Sep 13 14:13:35 2001 @@ -0,0 +1,654 @@ +/* + * Copyright (C) Paul Mackerras 1997. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include + +#include + +int (*prom)(void *); + +void *chosen_handle; +void *stdin; +void *stdout; +void *stderr; + +void exit(void); +void *finddevice(const char *name); +int getprop(void *phandle, const char *name, void *buf, int buflen); +void chrpboot(int a1, int a2, void *prom); /* in main.c */ + +void printk(char *fmt, ...); + +void +start(int a1, int a2, void *promptr) +{ + prom = (int (*)(void *)) promptr; + chosen_handle = finddevice("/chosen"); + if (chosen_handle == (void *) -1) + exit(); + if (getprop(chosen_handle, "stdout", &stdout, sizeof(stdout)) != 4) + exit(); + stderr = stdout; + if (getprop(chosen_handle, "stdin", &stdin, sizeof(stdin)) != 4) + exit(); + + chrpboot(a1, a2, promptr); + for (;;) + exit(); +} + +int +write(void *handle, void *ptr, int nb) +{ + struct prom_args { + char *service; + int nargs; + int nret; + void *ihandle; + void *addr; + int len; + int actual; + } args; + + args.service = "write"; + args.nargs = 3; + args.nret = 1; + args.ihandle = handle; + args.addr = ptr; + args.len = nb; + args.actual = -1; + (*prom)(&args); + return args.actual; +} + +int +read(void *handle, void *ptr, int nb) +{ + struct prom_args { + char *service; + int nargs; + int nret; + void *ihandle; + void *addr; + int len; + int actual; + } args; + + args.service = "read"; + args.nargs = 3; + args.nret = 1; + args.ihandle = handle; + args.addr = ptr; + args.len = nb; + args.actual = -1; + (*prom)(&args); + return args.actual; +} + +void +exit() +{ + struct prom_args { + char *service; + } args; + + for (;;) { + args.service = "exit"; + (*prom)(&args); + } +} + +void +pause(void) +{ + struct prom_args { + char *service; + } args; + + args.service = "enter"; + (*prom)(&args); +} + +void * +finddevice(const char *name) +{ + struct prom_args { + char *service; + int nargs; + int nret; + const char *devspec; + void *phandle; + } args; + + args.service = "finddevice"; + args.nargs = 1; + args.nret = 1; + args.devspec = name; + args.phandle = (void *) -1; + (*prom)(&args); + return args.phandle; +} + +void * +claim(unsigned long virt, unsigned long size, unsigned long align) +{ + struct prom_args { + char *service; + int nargs; + int nret; + unsigned int virt; + unsigned int size; + unsigned int align; + void *ret; + } args; + + args.service = "claim"; + args.nargs = 3; + args.nret = 1; + args.virt = virt; + args.size = size; + args.align = align; + (*prom)(&args); + return args.ret; +} + +int +getprop(void *phandle, const char *name, void *buf, int buflen) +{ + struct prom_args { + char *service; + int nargs; + int nret; + void *phandle; + const char *name; + void *buf; + int buflen; + int size; + } args; + + args.service = "getprop"; + args.nargs = 4; + args.nret = 1; + args.phandle = phandle; + args.name = name; + args.buf = buf; + args.buflen = buflen; + args.size = -1; + (*prom)(&args); + return args.size; +} + +int +putc(int c, void *f) +{ + char ch = c; + + if (c == '\n') + putc('\r', f); + return write(f, &ch, 1) == 1? c: -1; +} + +int +putchar(int c) +{ + return putc(c, stdout); +} + +int +fputs(char *str, void *f) +{ + int n = strlen(str); + + return write(f, str, n) == n? 0: -1; +} + +int +readchar(void) +{ + char ch; + + for (;;) { + switch (read(stdin, &ch, 1)) { + case 1: + return ch; + case -1: + printk("read(stdin) returned -1\r\n"); + return -1; + } + } +} + +static char line[256]; +static char *lineptr; +static int lineleft; + +int +getchar(void) +{ + int c; + + if (lineleft == 0) { + lineptr = line; + for (;;) { + c = readchar(); + if (c == -1 || c == 4) + break; + if (c == '\r' || c == '\n') { + *lineptr++ = '\n'; + putchar('\n'); + break; + } + switch (c) { + case 0177: + case '\b': + if (lineptr > line) { + putchar('\b'); + putchar(' '); + putchar('\b'); + --lineptr; + } + break; + case 'U' & 0x1F: + while (lineptr > line) { + putchar('\b'); + putchar(' '); + putchar('\b'); + --lineptr; + } + break; + default: + if (lineptr >= &line[sizeof(line) - 1]) + putchar('\a'); + else { + putchar(c); + *lineptr++ = c; + } + } + } + lineleft = lineptr - line; + lineptr = line; + } + if (lineleft == 0) + return -1; + --lineleft; + return *lineptr++; +} + + + +/* String functions lifted from lib/vsprintf.c and lib/ctype.c */ +unsigned char _ctype[] = { +_C,_C,_C,_C,_C,_C,_C,_C, /* 0-7 */ +_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C, /* 8-15 */ +_C,_C,_C,_C,_C,_C,_C,_C, /* 16-23 */ +_C,_C,_C,_C,_C,_C,_C,_C, /* 24-31 */ +_S|_SP,_P,_P,_P,_P,_P,_P,_P, /* 32-39 */ +_P,_P,_P,_P,_P,_P,_P,_P, /* 40-47 */ +_D,_D,_D,_D,_D,_D,_D,_D, /* 48-55 */ +_D,_D,_P,_P,_P,_P,_P,_P, /* 56-63 */ +_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U, /* 64-71 */ +_U,_U,_U,_U,_U,_U,_U,_U, /* 72-79 */ +_U,_U,_U,_U,_U,_U,_U,_U, /* 80-87 */ +_U,_U,_U,_P,_P,_P,_P,_P, /* 88-95 */ +_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L, /* 96-103 */ +_L,_L,_L,_L,_L,_L,_L,_L, /* 104-111 */ +_L,_L,_L,_L,_L,_L,_L,_L, /* 112-119 */ +_L,_L,_L,_P,_P,_P,_P,_C, /* 120-127 */ +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 128-143 */ +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 144-159 */ +_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 160-175 */ +_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 176-191 */ +_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U, /* 192-207 */ +_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L, /* 208-223 */ +_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L, /* 224-239 */ +_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L}; /* 240-255 */ + +size_t strnlen(const char * s, size_t count) +{ + const char *sc; + + for (sc = s; count-- && *sc != '\0'; ++sc) + /* nothing */; + return sc - s; +} + +unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base) +{ + unsigned long result = 0,value; + + if (!base) { + base = 10; + if (*cp == '0') { + base = 8; + cp++; + if ((*cp == 'x') && isxdigit(cp[1])) { + cp++; + base = 16; + } + } + } + while (isxdigit(*cp) && + (value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) { + result = result*base + value; + cp++; + } + if (endp) + *endp = (char *)cp; + return result; +} + +long simple_strtol(const char *cp,char **endp,unsigned int base) +{ + if(*cp=='-') + return -simple_strtoul(cp+1,endp,base); + return simple_strtoul(cp,endp,base); +} + +static int skip_atoi(const char **s) +{ + int i=0; + + while (isdigit(**s)) + i = i*10 + *((*s)++) - '0'; + return i; +} + +#define ZEROPAD 1 /* pad with zero */ +#define SIGN 2 /* unsigned/signed long */ +#define PLUS 4 /* show plus */ +#define SPACE 8 /* space if plus */ +#define LEFT 16 /* left justified */ +#define SPECIAL 32 /* 0x */ +#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */ + +static char * number(char * str, long long num, int base, int size, int precision, int type) +{ + char c,sign,tmp[66]; + const char *digits="0123456789abcdefghijklmnopqrstuvwxyz"; + int i; + + if (type & LARGE) + digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + if (type & LEFT) + type &= ~ZEROPAD; + if (base < 2 || base > 36) + return 0; + c = (type & ZEROPAD) ? '0' : ' '; + sign = 0; + if (type & SIGN) { + if (num < 0) { + sign = '-'; + num = -num; + size--; + } else if (type & PLUS) { + sign = '+'; + size--; + } else if (type & SPACE) { + sign = ' '; + size--; + } + } + if (type & SPECIAL) { + if (base == 16) + size -= 2; + else if (base == 8) + size--; + } + i = 0; + if (num == 0) + tmp[i++]='0'; + else while (num != 0) + tmp[i++] = digits[do_div(num,base)]; + if (i > precision) + precision = i; + size -= precision; + if (!(type&(ZEROPAD+LEFT))) + while(size-->0) + *str++ = ' '; + if (sign) + *str++ = sign; + if (type & SPECIAL) { + if (base==8) + *str++ = '0'; + else if (base==16) { + *str++ = '0'; + *str++ = digits[33]; + } + } + if (!(type & LEFT)) + while (size-- > 0) + *str++ = c; + while (i < precision--) + *str++ = '0'; + while (i-- > 0) + *str++ = tmp[i]; + while (size-- > 0) + *str++ = ' '; + return str; +} + +/* Forward decl. needed for IP address printing stuff... */ +int sprintf(char * buf, const char *fmt, ...); + +int vsprintf(char *buf, const char *fmt, va_list args) +{ + int len; + unsigned long long num; + int i, base; + char * str; + const char *s; + + int flags; /* flags to number() */ + + int field_width; /* width of output field */ + int precision; /* min. # of digits for integers; max + number of chars for from string */ + int qualifier; /* 'h', 'l', or 'L' for integer fields */ + /* 'z' support added 23/7/1999 S.H. */ + /* 'z' changed to 'Z' --davidm 1/25/99 */ + + + for (str=buf ; *fmt ; ++fmt) { + if (*fmt != '%') { + *str++ = *fmt; + continue; + } + + /* process flags */ + flags = 0; + repeat: + ++fmt; /* this also skips first '%' */ + switch (*fmt) { + case '-': flags |= LEFT; goto repeat; + case '+': flags |= PLUS; goto repeat; + case ' ': flags |= SPACE; goto repeat; + case '#': flags |= SPECIAL; goto repeat; + case '0': flags |= ZEROPAD; goto repeat; + } + + /* get field width */ + field_width = -1; + if (isdigit(*fmt)) + field_width = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + field_width = va_arg(args, int); + if (field_width < 0) { + field_width = -field_width; + flags |= LEFT; + } + } + + /* get the precision */ + precision = -1; + if (*fmt == '.') { + ++fmt; + if (isdigit(*fmt)) + precision = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + precision = va_arg(args, int); + } + if (precision < 0) + precision = 0; + } + + /* get the conversion qualifier */ + qualifier = -1; + if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') { + qualifier = *fmt; + ++fmt; + } + + /* default base */ + base = 10; + + switch (*fmt) { + case 'c': + if (!(flags & LEFT)) + while (--field_width > 0) + *str++ = ' '; + *str++ = (unsigned char) va_arg(args, int); + while (--field_width > 0) + *str++ = ' '; + continue; + + case 's': + s = va_arg(args, char *); + if (!s) + s = ""; + + len = strnlen(s, precision); + + if (!(flags & LEFT)) + while (len < field_width--) + *str++ = ' '; + for (i = 0; i < len; ++i) + *str++ = *s++; + while (len < field_width--) + *str++ = ' '; + continue; + + case 'p': + if (field_width == -1) { + field_width = 2*sizeof(void *); + flags |= ZEROPAD; + } + str = number(str, + (unsigned long) va_arg(args, void *), 16, + field_width, precision, flags); + continue; + + + case 'n': + if (qualifier == 'l') { + long * ip = va_arg(args, long *); + *ip = (str - buf); + } else if (qualifier == 'Z') { + size_t * ip = va_arg(args, size_t *); + *ip = (str - buf); + } else { + int * ip = va_arg(args, int *); + *ip = (str - buf); + } + continue; + + case '%': + *str++ = '%'; + continue; + + /* integer number formats - set up the flags and "break" */ + case 'o': + base = 8; + break; + + case 'X': + flags |= LARGE; + case 'x': + base = 16; + break; + + case 'd': + case 'i': + flags |= SIGN; + case 'u': + break; + + default: + *str++ = '%'; + if (*fmt) + *str++ = *fmt; + else + --fmt; + continue; + } + if (qualifier == 'L') + num = va_arg(args, long long); + else if (qualifier == 'l') { + num = va_arg(args, unsigned long); + if (flags & SIGN) + num = (signed long) num; + } else if (qualifier == 'Z') { + num = va_arg(args, size_t); + } else if (qualifier == 'h') { + num = (unsigned short) va_arg(args, int); + if (flags & SIGN) + num = (signed short) num; + } else { + num = va_arg(args, unsigned int); + if (flags & SIGN) + num = (signed int) num; + } + str = number(str, num, base, field_width, precision, flags); + } + *str = '\0'; + return str-buf; +} + +int sprintf(char * buf, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i=vsprintf(buf,fmt,args); + va_end(args); + return i; +} + +static char sprint_buf[1024]; + +void +printk(char *fmt, ...) +{ + va_list args; + int n; + + va_start(args, fmt); + n = vsprintf(sprint_buf, fmt, args); + va_end(args); + write(stdout, sprint_buf, n); +} + +int +printf(char *fmt, ...) +{ + va_list args; + int n; + + va_start(args, fmt); + n = vsprintf(sprint_buf, fmt, args); + va_end(args); + write(stdout, sprint_buf, n); + return n; +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/boot/zImage.lds linuxppc64_2_4/arch/ppc64/boot/zImage.lds --- ../kernel.org/linux/arch/ppc64/boot/zImage.lds Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/boot/zImage.lds Tue Sep 11 08:57:14 2001 @@ -0,0 +1,78 @@ +OUTPUT_ARCH(powerpc) +SEARCH_DIR(/lib); SEARCH_DIR(/usr/lib); SEARCH_DIR(/usr/local/lib); SEARCH_DIR(/usr/local/powerpc-any-elf/lib); +/* Do we need any of these for elf? + __DYNAMIC = 0; */ +SECTIONS +{ + /* Read-only sections, merged into text segment: */ + . = + SIZEOF_HEADERS; + .interp : { *(.interp) } + .hash : { *(.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .rel.text : { *(.rel.text) } + .rela.text : { *(.rela.text) } + .rel.data : { *(.rel.data) } + .rela.data : { *(.rela.data) } + .rel.rodata : { *(.rel.rodata) } + .rela.rodata : { *(.rela.rodata) } + .rel.got : { *(.rel.got) } + .rela.got : { *(.rela.got) } + .rel.ctors : { *(.rel.ctors) } + .rela.ctors : { *(.rela.ctors) } + .rel.dtors : { *(.rel.dtors) } + .rela.dtors : { *(.rela.dtors) } + .rel.bss : { *(.rel.bss) } + .rela.bss : { *(.rela.bss) } + .rel.plt : { *(.rel.plt) } + .rela.plt : { *(.rela.plt) } + .plt : { *(.plt) } + .text : + { + *(.text) + *(.fixup) + *(.got1) + } + . = ALIGN(4096); + _etext = .; + PROVIDE (etext = .); + .rodata : + { + *(.rodata) + *(.rodata1) + } + .kstrtab : { *(.kstrtab) } + .fini : { *(.fini) } =0 + .ctors : { *(.ctors) } + .dtors : { *(.dtors) } + /* Read-write section, merged into data segment: */ + . = ALIGN(4096); + .data : + { + *(.data) + *(.data1) + *(.sdata) + *(.sdata2) + *(.got.plt) *(.got) + *(.dynamic) + CONSTRUCTORS + } + . = ALIGN(4096); + _edata = .; + PROVIDE (edata = .); + + .fixup : { *(.fixup) } + + . = ALIGN(4096); + __bss_start = .; + .bss : + { + *(.sbss) *(.scommon) + *(.dynbss) + *(.bss) + *(COMMON) + } + . = ALIGN(4096); + _end = . ; + PROVIDE (end = .); +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/boot/zlib.c linuxppc64_2_4/arch/ppc64/boot/zlib.c --- ../kernel.org/linux/arch/ppc64/boot/zlib.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/boot/zlib.c Mon Jun 18 13:47:13 2001 @@ -0,0 +1,2170 @@ +/* + * This file is derived from various .h and .c files from the zlib-0.95 + * distribution by Jean-loup Gailly and Mark Adler, with some additions + * by Paul Mackerras to aid in implementing Deflate compression and + * decompression for PPP packets. See zlib.h for conditions of + * distribution and use. + * + * Changes that have been made include: + * - changed functions not used outside this file to "local" + * - added minCompression parameter to deflateInit2 + * - added Z_PACKET_FLUSH (see zlib.h for details) + * - added inflateIncomp + * + Copyright (C) 1995 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + gzip@prep.ai.mit.edu madler@alumni.caltech.edu + + * + * + */ + +/*+++++*/ +/* zutil.h -- internal interface and configuration of the compression library + * Copyright (C) 1995 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* From: zutil.h,v 1.9 1995/05/03 17:27:12 jloup Exp */ + +#define _Z_UTIL_H + +#include "zlib.h" + +#ifndef local +# define local static +#endif +/* compile with -Dlocal if your debugger can't find static symbols */ + +#define FAR + +typedef unsigned char uch; +typedef uch FAR uchf; +typedef unsigned short ush; +typedef ush FAR ushf; +typedef unsigned long ulg; + +extern char *z_errmsg[]; /* indexed by 1-zlib_error */ + +#define ERR_RETURN(strm,err) return (strm->msg=z_errmsg[1-err], err) +/* To be used only when the state is known to be valid */ + +#ifndef NULL +#define NULL ((void *) 0) +#endif + + /* common constants */ + +#define DEFLATED 8 + +#ifndef DEF_WBITS +# define DEF_WBITS MAX_WBITS +#endif +/* default windowBits for decompression. MAX_WBITS is for compression only */ + +#if MAX_MEM_LEVEL >= 8 +# define DEF_MEM_LEVEL 8 +#else +# define DEF_MEM_LEVEL MAX_MEM_LEVEL +#endif +/* default memLevel */ + +#define STORED_BLOCK 0 +#define STATIC_TREES 1 +#define DYN_TREES 2 +/* The three kinds of block type */ + +#define MIN_MATCH 3 +#define MAX_MATCH 258 +/* The minimum and maximum match lengths */ + + /* functions */ + +#include +#define zmemcpy memcpy +#define zmemzero(dest, len) memset(dest, 0, len) + +/* Diagnostic functions */ +#ifdef DEBUG_ZLIB +# include +# ifndef verbose +# define verbose 0 +# endif +# define Assert(cond,msg) {if(!(cond)) z_error(msg);} +# define Trace(x) fprintf x +# define Tracev(x) {if (verbose) fprintf x ;} +# define Tracevv(x) {if (verbose>1) fprintf x ;} +# define Tracec(c,x) {if (verbose && (c)) fprintf x ;} +# define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;} +#else +# define Assert(cond,msg) +# define Trace(x) +# define Tracev(x) +# define Tracevv(x) +# define Tracec(c,x) +# define Tracecv(c,x) +#endif + + +typedef uLong (*check_func) OF((uLong check, Bytef *buf, uInt len)); + +/* voidpf zcalloc OF((voidpf opaque, unsigned items, unsigned size)); */ +/* void zcfree OF((voidpf opaque, voidpf ptr)); */ + +#define ZALLOC(strm, items, size) \ + (*((strm)->zalloc))((strm)->opaque, (items), (size)) +#define ZFREE(strm, addr, size) \ + (*((strm)->zfree))((strm)->opaque, (voidpf)(addr), (size)) +#define TRY_FREE(s, p, n) {if (p) ZFREE(s, p, n);} + +/* deflate.h -- internal compression state + * Copyright (C) 1995 Jean-loup Gailly + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/*+++++*/ +/* infblock.h -- header to use infblock.c + * Copyright (C) 1995 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +struct inflate_blocks_state; +typedef struct inflate_blocks_state FAR inflate_blocks_statef; + +local inflate_blocks_statef * inflate_blocks_new OF(( + z_stream *z, + check_func c, /* check function */ + uInt w)); /* window size */ + +local int inflate_blocks OF(( + inflate_blocks_statef *, + z_stream *, + int)); /* initial return code */ + +local void inflate_blocks_reset OF(( + inflate_blocks_statef *, + z_stream *, + uLongf *)); /* check value on output */ + +local int inflate_blocks_free OF(( + inflate_blocks_statef *, + z_stream *, + uLongf *)); /* check value on output */ + +local int inflate_addhistory OF(( + inflate_blocks_statef *, + z_stream *)); + +local int inflate_packet_flush OF(( + inflate_blocks_statef *)); + +/*+++++*/ +/* inftrees.h -- header to use inftrees.c + * Copyright (C) 1995 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* Huffman code lookup table entry--this entry is four bytes for machines + that have 16-bit pointers (e.g. PC's in the small or medium model). */ + +typedef struct inflate_huft_s FAR inflate_huft; + +struct inflate_huft_s { + union { + struct { + Byte Exop; /* number of extra bits or operation */ + Byte Bits; /* number of bits in this code or subcode */ + } what; + uInt Nalloc; /* number of these allocated here */ + Bytef *pad; /* pad structure to a power of 2 (4 bytes for */ + } word; /* 16-bit, 8 bytes for 32-bit machines) */ + union { + uInt Base; /* literal, length base, or distance base */ + inflate_huft *Next; /* pointer to next level of table */ + } more; +}; + +#ifdef DEBUG_ZLIB + local uInt inflate_hufts; +#endif + +local int inflate_trees_bits OF(( + uIntf *, /* 19 code lengths */ + uIntf *, /* bits tree desired/actual depth */ + inflate_huft * FAR *, /* bits tree result */ + z_stream *)); /* for zalloc, zfree functions */ + +local int inflate_trees_dynamic OF(( + uInt, /* number of literal/length codes */ + uInt, /* number of distance codes */ + uIntf *, /* that many (total) code lengths */ + uIntf *, /* literal desired/actual bit depth */ + uIntf *, /* distance desired/actual bit depth */ + inflate_huft * FAR *, /* literal/length tree result */ + inflate_huft * FAR *, /* distance tree result */ + z_stream *)); /* for zalloc, zfree functions */ + +local int inflate_trees_fixed OF(( + uIntf *, /* literal desired/actual bit depth */ + uIntf *, /* distance desired/actual bit depth */ + inflate_huft * FAR *, /* literal/length tree result */ + inflate_huft * FAR *)); /* distance tree result */ + +local int inflate_trees_free OF(( + inflate_huft *, /* tables to free */ + z_stream *)); /* for zfree function */ + + +/*+++++*/ +/* infcodes.h -- header to use infcodes.c + * Copyright (C) 1995 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +struct inflate_codes_state; +typedef struct inflate_codes_state FAR inflate_codes_statef; + +local inflate_codes_statef *inflate_codes_new OF(( + uInt, uInt, + inflate_huft *, inflate_huft *, + z_stream *)); + +local int inflate_codes OF(( + inflate_blocks_statef *, + z_stream *, + int)); + +local void inflate_codes_free OF(( + inflate_codes_statef *, + z_stream *)); + + +/*+++++*/ +/* inflate.c -- zlib interface to inflate modules + * Copyright (C) 1995 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* inflate private state */ +struct internal_state { + + /* mode */ + enum { + METHOD, /* waiting for method byte */ + FLAG, /* waiting for flag byte */ + BLOCKS, /* decompressing blocks */ + CHECK4, /* four check bytes to go */ + CHECK3, /* three check bytes to go */ + CHECK2, /* two check bytes to go */ + CHECK1, /* one check byte to go */ + DONE, /* finished check, done */ + BAD} /* got an error--stay here */ + mode; /* current inflate mode */ + + /* mode dependent information */ + union { + uInt method; /* if FLAGS, method byte */ + struct { + uLong was; /* computed check value */ + uLong need; /* stream check value */ + } check; /* if CHECK, check values to compare */ + uInt marker; /* if BAD, inflateSync's marker bytes count */ + } sub; /* submode */ + + /* mode independent information */ + int nowrap; /* flag for no wrapper */ + uInt wbits; /* log2(window size) (8..15, defaults to 15) */ + inflate_blocks_statef + *blocks; /* current inflate_blocks state */ + +}; + + +int inflateReset(z) +z_stream *z; +{ + uLong c; + + if (z == Z_NULL || z->state == Z_NULL) + return Z_STREAM_ERROR; + z->total_in = z->total_out = 0; + z->msg = Z_NULL; + z->state->mode = z->state->nowrap ? BLOCKS : METHOD; + inflate_blocks_reset(z->state->blocks, z, &c); + Trace((stderr, "inflate: reset\n")); + return Z_OK; +} + + +int inflateEnd(z) +z_stream *z; +{ + uLong c; + + if (z == Z_NULL || z->state == Z_NULL || z->zfree == Z_NULL) + return Z_STREAM_ERROR; + if (z->state->blocks != Z_NULL) + inflate_blocks_free(z->state->blocks, z, &c); + ZFREE(z, z->state, sizeof(struct internal_state)); + z->state = Z_NULL; + Trace((stderr, "inflate: end\n")); + return Z_OK; +} + + +int inflateInit2(z, w) +z_stream *z; +int w; +{ + /* initialize state */ + if (z == Z_NULL) + return Z_STREAM_ERROR; +/* if (z->zalloc == Z_NULL) z->zalloc = zcalloc; */ +/* if (z->zfree == Z_NULL) z->zfree = zcfree; */ + if ((z->state = (struct internal_state FAR *) + ZALLOC(z,1,sizeof(struct internal_state))) == Z_NULL) + return Z_MEM_ERROR; + z->state->blocks = Z_NULL; + + /* handle undocumented nowrap option (no zlib header or check) */ + z->state->nowrap = 0; + if (w < 0) + { + w = - w; + z->state->nowrap = 1; + } + + /* set window size */ + if (w < 8 || w > 15) + { + inflateEnd(z); + return Z_STREAM_ERROR; + } + z->state->wbits = (uInt)w; + + /* create inflate_blocks state */ + if ((z->state->blocks = + inflate_blocks_new(z, z->state->nowrap ? Z_NULL : adler32, 1 << w)) + == Z_NULL) + { + inflateEnd(z); + return Z_MEM_ERROR; + } + Trace((stderr, "inflate: allocated\n")); + + /* reset state */ + inflateReset(z); + return Z_OK; +} + + +int inflateInit(z) +z_stream *z; +{ + return inflateInit2(z, DEF_WBITS); +} + + +#define NEEDBYTE {if(z->avail_in==0)goto empty;r=Z_OK;} +#define NEXTBYTE (z->avail_in--,z->total_in++,*z->next_in++) + +int inflate(z, f) +z_stream *z; +int f; +{ + int r; + uInt b; + + if (z == Z_NULL || z->next_in == Z_NULL) + return Z_STREAM_ERROR; + r = Z_BUF_ERROR; + while (1) switch (z->state->mode) + { + case METHOD: + NEEDBYTE + if (((z->state->sub.method = NEXTBYTE) & 0xf) != DEFLATED) + { + z->state->mode = BAD; + z->msg = "unknown compression method"; + z->state->sub.marker = 5; /* can't try inflateSync */ + break; + } + if ((z->state->sub.method >> 4) + 8 > z->state->wbits) + { + z->state->mode = BAD; + z->msg = "invalid window size"; + z->state->sub.marker = 5; /* can't try inflateSync */ + break; + } + z->state->mode = FLAG; + case FLAG: + NEEDBYTE + if ((b = NEXTBYTE) & 0x20) + { + z->state->mode = BAD; + z->msg = "invalid reserved bit"; + z->state->sub.marker = 5; /* can't try inflateSync */ + break; + } + if (((z->state->sub.method << 8) + b) % 31) + { + z->state->mode = BAD; + z->msg = "incorrect header check"; + z->state->sub.marker = 5; /* can't try inflateSync */ + break; + } + Trace((stderr, "inflate: zlib header ok\n")); + z->state->mode = BLOCKS; + case BLOCKS: + r = inflate_blocks(z->state->blocks, z, r); + if (f == Z_PACKET_FLUSH && z->avail_in == 0 && z->avail_out != 0) + r = inflate_packet_flush(z->state->blocks); + if (r == Z_DATA_ERROR) + { + z->state->mode = BAD; + z->state->sub.marker = 0; /* can try inflateSync */ + break; + } + if (r != Z_STREAM_END) + return r; + r = Z_OK; + inflate_blocks_reset(z->state->blocks, z, &z->state->sub.check.was); + if (z->state->nowrap) + { + z->state->mode = DONE; + break; + } + z->state->mode = CHECK4; + case CHECK4: + NEEDBYTE + z->state->sub.check.need = (uLong)NEXTBYTE << 24; + z->state->mode = CHECK3; + case CHECK3: + NEEDBYTE + z->state->sub.check.need += (uLong)NEXTBYTE << 16; + z->state->mode = CHECK2; + case CHECK2: + NEEDBYTE + z->state->sub.check.need += (uLong)NEXTBYTE << 8; + z->state->mode = CHECK1; + case CHECK1: + NEEDBYTE + z->state->sub.check.need += (uLong)NEXTBYTE; + + if (z->state->sub.check.was != z->state->sub.check.need) + { + z->state->mode = BAD; + z->msg = "incorrect data check"; + z->state->sub.marker = 5; /* can't try inflateSync */ + break; + } + Trace((stderr, "inflate: zlib check ok\n")); + z->state->mode = DONE; + case DONE: + return Z_STREAM_END; + case BAD: + return Z_DATA_ERROR; + default: + return Z_STREAM_ERROR; + } + + empty: + if (f != Z_PACKET_FLUSH) + return r; + z->state->mode = BAD; + z->state->sub.marker = 0; /* can try inflateSync */ + return Z_DATA_ERROR; +} + +/* + * This subroutine adds the data at next_in/avail_in to the output history + * without performing any output. The output buffer must be "caught up"; + * i.e. no pending output (hence s->read equals s->write), and the state must + * be BLOCKS (i.e. we should be willing to see the start of a series of + * BLOCKS). On exit, the output will also be caught up, and the checksum + * will have been updated if need be. + */ + +int inflateIncomp(z) +z_stream *z; +{ + if (z->state->mode != BLOCKS) + return Z_DATA_ERROR; + return inflate_addhistory(z->state->blocks, z); +} + + +int inflateSync(z) +z_stream *z; +{ + uInt n; /* number of bytes to look at */ + Bytef *p; /* pointer to bytes */ + uInt m; /* number of marker bytes found in a row */ + uLong r, w; /* temporaries to save total_in and total_out */ + + /* set up */ + if (z == Z_NULL || z->state == Z_NULL) + return Z_STREAM_ERROR; + if (z->state->mode != BAD) + { + z->state->mode = BAD; + z->state->sub.marker = 0; + } + if ((n = z->avail_in) == 0) + return Z_BUF_ERROR; + p = z->next_in; + m = z->state->sub.marker; + + /* search */ + while (n && m < 4) + { + if (*p == (Byte)(m < 2 ? 0 : 0xff)) + m++; + else if (*p) + m = 0; + else + m = 4 - m; + p++, n--; + } + + /* restore */ + z->total_in += p - z->next_in; + z->next_in = p; + z->avail_in = n; + z->state->sub.marker = m; + + /* return no joy or set up to restart on a new block */ + if (m != 4) + return Z_DATA_ERROR; + r = z->total_in; w = z->total_out; + inflateReset(z); + z->total_in = r; z->total_out = w; + z->state->mode = BLOCKS; + return Z_OK; +} + +#undef NEEDBYTE +#undef NEXTBYTE + +/*+++++*/ +/* infutil.h -- types and macros common to blocks and codes + * Copyright (C) 1995 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* inflate blocks semi-private state */ +struct inflate_blocks_state { + + /* mode */ + enum { + TYPE, /* get type bits (3, including end bit) */ + LENS, /* get lengths for stored */ + STORED, /* processing stored block */ + TABLE, /* get table lengths */ + BTREE, /* get bit lengths tree for a dynamic block */ + DTREE, /* get length, distance trees for a dynamic block */ + CODES, /* processing fixed or dynamic block */ + DRY, /* output remaining window bytes */ + DONEB, /* finished last block, done */ + BADB} /* got a data error--stuck here */ + mode; /* current inflate_block mode */ + + /* mode dependent information */ + union { + uInt left; /* if STORED, bytes left to copy */ + struct { + uInt table; /* table lengths (14 bits) */ + uInt index; /* index into blens (or border) */ + uIntf *blens; /* bit lengths of codes */ + uInt bb; /* bit length tree depth */ + inflate_huft *tb; /* bit length decoding tree */ + int nblens; /* # elements allocated at blens */ + } trees; /* if DTREE, decoding info for trees */ + struct { + inflate_huft *tl, *td; /* trees to free */ + inflate_codes_statef + *codes; + } decode; /* if CODES, current state */ + } sub; /* submode */ + uInt last; /* true if this block is the last block */ + + /* mode independent information */ + uInt bitk; /* bits in bit buffer */ + uLong bitb; /* bit buffer */ + Bytef *window; /* sliding window */ + Bytef *end; /* one byte after sliding window */ + Bytef *read; /* window read pointer */ + Bytef *write; /* window write pointer */ + check_func checkfn; /* check function */ + uLong check; /* check on output */ + +}; + + +/* defines for inflate input/output */ +/* update pointers and return */ +#define UPDBITS {s->bitb=b;s->bitk=k;} +#define UPDIN {z->avail_in=n;z->total_in+=p-z->next_in;z->next_in=p;} +#define UPDOUT {s->write=q;} +#define UPDATE {UPDBITS UPDIN UPDOUT} +#define LEAVE {UPDATE return inflate_flush(s,z,r);} +/* get bytes and bits */ +#define LOADIN {p=z->next_in;n=z->avail_in;b=s->bitb;k=s->bitk;} +#define NEEDBYTE {if(n)r=Z_OK;else LEAVE} +#define NEXTBYTE (n--,*p++) +#define NEEDBITS(j) {while(k<(j)){NEEDBYTE;b|=((uLong)NEXTBYTE)<>=(j);k-=(j);} +/* output bytes */ +#define WAVAIL (qread?s->read-q-1:s->end-q) +#define LOADOUT {q=s->write;m=WAVAIL;} +#define WRAP {if(q==s->end&&s->read!=s->window){q=s->window;m=WAVAIL;}} +#define FLUSH {UPDOUT r=inflate_flush(s,z,r); LOADOUT} +#define NEEDOUT {if(m==0){WRAP if(m==0){FLUSH WRAP if(m==0) LEAVE}}r=Z_OK;} +#define OUTBYTE(a) {*q++=(Byte)(a);m--;} +/* load local pointers */ +#define LOAD {LOADIN LOADOUT} + +/* + * The IBM 150 firmware munges the data right after _etext[]. This + * protects it. -- Cort + */ +local uInt protect_mask[] = {0, 0, 0, 0, 0, 0, 0, 0, 0 ,0 ,0 ,0}; +/* And'ing with mask[n] masks the lower n bits */ +local uInt inflate_mask[] = { + 0x0000, + 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff, + 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff +}; + +/* copy as much as possible from the sliding window to the output area */ +local int inflate_flush OF(( + inflate_blocks_statef *, + z_stream *, + int)); + +/*+++++*/ +/* inffast.h -- header to use inffast.c + * Copyright (C) 1995 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +local int inflate_fast OF(( + uInt, + uInt, + inflate_huft *, + inflate_huft *, + inflate_blocks_statef *, + z_stream *)); + + +/*+++++*/ +/* infblock.c -- interpret and process block types to last block + * Copyright (C) 1995 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* Table for deflate from PKZIP's appnote.txt. */ +local uInt border[] = { /* Order of the bit length code lengths */ + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + +/* + Notes beyond the 1.93a appnote.txt: + + 1. Distance pointers never point before the beginning of the output + stream. + 2. Distance pointers can point back across blocks, up to 32k away. + 3. There is an implied maximum of 7 bits for the bit length table and + 15 bits for the actual data. + 4. If only one code exists, then it is encoded using one bit. (Zero + would be more efficient, but perhaps a little confusing.) If two + codes exist, they are coded using one bit each (0 and 1). + 5. There is no way of sending zero distance codes--a dummy must be + sent if there are none. (History: a pre 2.0 version of PKZIP would + store blocks with no distance codes, but this was discovered to be + too harsh a criterion.) Valid only for 1.93a. 2.04c does allow + zero distance codes, which is sent as one code of zero bits in + length. + 6. There are up to 286 literal/length codes. Code 256 represents the + end-of-block. Note however that the static length tree defines + 288 codes just to fill out the Huffman codes. Codes 286 and 287 + cannot be used though, since there is no length base or extra bits + defined for them. Similarily, there are up to 30 distance codes. + However, static trees define 32 codes (all 5 bits) to fill out the + Huffman codes, but the last two had better not show up in the data. + 7. Unzip can check dynamic Huffman blocks for complete code sets. + The exception is that a single code would not be complete (see #4). + 8. The five bits following the block type is really the number of + literal codes sent minus 257. + 9. Length codes 8,16,16 are interpreted as 13 length codes of 8 bits + (1+6+6). Therefore, to output three times the length, you output + three codes (1+1+1), whereas to output four times the same length, + you only need two codes (1+3). Hmm. + 10. In the tree reconstruction algorithm, Code = Code + Increment + only if BitLength(i) is not zero. (Pretty obvious.) + 11. Correction: 4 Bits: # of Bit Length codes - 4 (4 - 19) + 12. Note: length code 284 can represent 227-258, but length code 285 + really is 258. The last length deserves its own, short code + since it gets used a lot in very redundant files. The length + 258 is special since 258 - 3 (the min match length) is 255. + 13. The literal/length and distance code bit lengths are read as a + single stream of lengths. It is possible (and advantageous) for + a repeat code (16, 17, or 18) to go across the boundary between + the two sets of lengths. + */ + + +local void inflate_blocks_reset(s, z, c) +inflate_blocks_statef *s; +z_stream *z; +uLongf *c; +{ + if (s->checkfn != Z_NULL) + *c = s->check; + if (s->mode == BTREE || s->mode == DTREE) + ZFREE(z, s->sub.trees.blens, s->sub.trees.nblens * sizeof(uInt)); + if (s->mode == CODES) + { + inflate_codes_free(s->sub.decode.codes, z); + inflate_trees_free(s->sub.decode.td, z); + inflate_trees_free(s->sub.decode.tl, z); + } + s->mode = TYPE; + s->bitk = 0; + s->bitb = 0; + s->read = s->write = s->window; + if (s->checkfn != Z_NULL) + s->check = (*s->checkfn)(0L, Z_NULL, 0); + Trace((stderr, "inflate: blocks reset\n")); +} + + +local inflate_blocks_statef *inflate_blocks_new(z, c, w) +z_stream *z; +check_func c; +uInt w; +{ + inflate_blocks_statef *s; + + if ((s = (inflate_blocks_statef *)ZALLOC + (z,1,sizeof(struct inflate_blocks_state))) == Z_NULL) + return s; + if ((s->window = (Bytef *)ZALLOC(z, 1, w)) == Z_NULL) + { + ZFREE(z, s, sizeof(struct inflate_blocks_state)); + return Z_NULL; + } + s->end = s->window + w; + s->checkfn = c; + s->mode = TYPE; + Trace((stderr, "inflate: blocks allocated\n")); + inflate_blocks_reset(s, z, &s->check); + return s; +} + + +local int inflate_blocks(s, z, r) +inflate_blocks_statef *s; +z_stream *z; +int r; +{ + uInt t; /* temporary storage */ + uLong b; /* bit buffer */ + uInt k; /* bits in bit buffer */ + Bytef *p; /* input data pointer */ + uInt n; /* bytes available there */ + Bytef *q; /* output window write pointer */ + uInt m; /* bytes to end of window or read pointer */ + + /* copy input/output information to locals (UPDATE macro restores) */ + LOAD + + /* process input based on current state */ + while (1) switch (s->mode) + { + case TYPE: + NEEDBITS(3) + t = (uInt)b & 7; + s->last = t & 1; + switch (t >> 1) + { + case 0: /* stored */ + Trace((stderr, "inflate: stored block%s\n", + s->last ? " (last)" : "")); + DUMPBITS(3) + t = k & 7; /* go to byte boundary */ + DUMPBITS(t) + s->mode = LENS; /* get length of stored block */ + break; + case 1: /* fixed */ + Trace((stderr, "inflate: fixed codes block%s\n", + s->last ? " (last)" : "")); + { + uInt bl, bd; + inflate_huft *tl, *td; + + inflate_trees_fixed(&bl, &bd, &tl, &td); + s->sub.decode.codes = inflate_codes_new(bl, bd, tl, td, z); + if (s->sub.decode.codes == Z_NULL) + { + r = Z_MEM_ERROR; + LEAVE + } + s->sub.decode.tl = Z_NULL; /* don't try to free these */ + s->sub.decode.td = Z_NULL; + } + DUMPBITS(3) + s->mode = CODES; + break; + case 2: /* dynamic */ + Trace((stderr, "inflate: dynamic codes block%s\n", + s->last ? " (last)" : "")); + DUMPBITS(3) + s->mode = TABLE; + break; + case 3: /* illegal */ + DUMPBITS(3) + s->mode = BADB; + z->msg = "invalid block type"; + r = Z_DATA_ERROR; + LEAVE + } + break; + case LENS: + NEEDBITS(32) + if (((~b) >> 16) != (b & 0xffff)) + { + s->mode = BADB; + z->msg = "invalid stored block lengths"; + r = Z_DATA_ERROR; + LEAVE + } + s->sub.left = (uInt)b & 0xffff; + b = k = 0; /* dump bits */ + Tracev((stderr, "inflate: stored length %u\n", s->sub.left)); + s->mode = s->sub.left ? STORED : TYPE; + break; + case STORED: + if (n == 0) + LEAVE + NEEDOUT + t = s->sub.left; + if (t > n) t = n; + if (t > m) t = m; + zmemcpy(q, p, t); + p += t; n -= t; + q += t; m -= t; + if ((s->sub.left -= t) != 0) + break; + Tracev((stderr, "inflate: stored end, %lu total out\n", + z->total_out + (q >= s->read ? q - s->read : + (s->end - s->read) + (q - s->window)))); + s->mode = s->last ? DRY : TYPE; + break; + case TABLE: + NEEDBITS(14) + s->sub.trees.table = t = (uInt)b & 0x3fff; +#ifndef PKZIP_BUG_WORKAROUND + if ((t & 0x1f) > 29 || ((t >> 5) & 0x1f) > 29) + { + s->mode = BADB; + z->msg = "too many length or distance symbols"; + r = Z_DATA_ERROR; + LEAVE + } +#endif + t = 258 + (t & 0x1f) + ((t >> 5) & 0x1f); + if (t < 19) + t = 19; + if ((s->sub.trees.blens = (uIntf*)ZALLOC(z, t, sizeof(uInt))) == Z_NULL) + { + r = Z_MEM_ERROR; + LEAVE + } + s->sub.trees.nblens = t; + DUMPBITS(14) + s->sub.trees.index = 0; + Tracev((stderr, "inflate: table sizes ok\n")); + s->mode = BTREE; + case BTREE: + while (s->sub.trees.index < 4 + (s->sub.trees.table >> 10)) + { + NEEDBITS(3) + s->sub.trees.blens[border[s->sub.trees.index++]] = (uInt)b & 7; + DUMPBITS(3) + } + while (s->sub.trees.index < 19) + s->sub.trees.blens[border[s->sub.trees.index++]] = 0; + s->sub.trees.bb = 7; + t = inflate_trees_bits(s->sub.trees.blens, &s->sub.trees.bb, + &s->sub.trees.tb, z); + if (t != Z_OK) + { + r = t; + if (r == Z_DATA_ERROR) + s->mode = BADB; + LEAVE + } + s->sub.trees.index = 0; + Tracev((stderr, "inflate: bits tree ok\n")); + s->mode = DTREE; + case DTREE: + while (t = s->sub.trees.table, + s->sub.trees.index < 258 + (t & 0x1f) + ((t >> 5) & 0x1f)) + { + inflate_huft *h; + uInt i, j, c; + + t = s->sub.trees.bb; + NEEDBITS(t) + h = s->sub.trees.tb + ((uInt)b & inflate_mask[t]); + t = h->word.what.Bits; + c = h->more.Base; + if (c < 16) + { + DUMPBITS(t) + s->sub.trees.blens[s->sub.trees.index++] = c; + } + else /* c == 16..18 */ + { + i = c == 18 ? 7 : c - 14; + j = c == 18 ? 11 : 3; + NEEDBITS(t + i) + DUMPBITS(t) + j += (uInt)b & inflate_mask[i]; + DUMPBITS(i) + i = s->sub.trees.index; + t = s->sub.trees.table; + if (i + j > 258 + (t & 0x1f) + ((t >> 5) & 0x1f) || + (c == 16 && i < 1)) + { + s->mode = BADB; + z->msg = "invalid bit length repeat"; + r = Z_DATA_ERROR; + LEAVE + } + c = c == 16 ? s->sub.trees.blens[i - 1] : 0; + do { + s->sub.trees.blens[i++] = c; + } while (--j); + s->sub.trees.index = i; + } + } + inflate_trees_free(s->sub.trees.tb, z); + s->sub.trees.tb = Z_NULL; + { + uInt bl, bd; + inflate_huft *tl, *td; + inflate_codes_statef *c; + + bl = 9; /* must be <= 9 for lookahead assumptions */ + bd = 6; /* must be <= 9 for lookahead assumptions */ + t = s->sub.trees.table; + t = inflate_trees_dynamic(257 + (t & 0x1f), 1 + ((t >> 5) & 0x1f), + s->sub.trees.blens, &bl, &bd, &tl, &td, z); + if (t != Z_OK) + { + if (t == (uInt)Z_DATA_ERROR) + s->mode = BADB; + r = t; + LEAVE + } + Tracev((stderr, "inflate: trees ok\n")); + if ((c = inflate_codes_new(bl, bd, tl, td, z)) == Z_NULL) + { + inflate_trees_free(td, z); + inflate_trees_free(tl, z); + r = Z_MEM_ERROR; + LEAVE + } + ZFREE(z, s->sub.trees.blens, s->sub.trees.nblens * sizeof(uInt)); + s->sub.decode.codes = c; + s->sub.decode.tl = tl; + s->sub.decode.td = td; + } + s->mode = CODES; + case CODES: + UPDATE + if ((r = inflate_codes(s, z, r)) != Z_STREAM_END) + return inflate_flush(s, z, r); + r = Z_OK; + inflate_codes_free(s->sub.decode.codes, z); + inflate_trees_free(s->sub.decode.td, z); + inflate_trees_free(s->sub.decode.tl, z); + LOAD + Tracev((stderr, "inflate: codes end, %lu total out\n", + z->total_out + (q >= s->read ? q - s->read : + (s->end - s->read) + (q - s->window)))); + if (!s->last) + { + s->mode = TYPE; + break; + } + if (k > 7) /* return unused byte, if any */ + { + Assert(k < 16, "inflate_codes grabbed too many bytes") + k -= 8; + n++; + p--; /* can always return one */ + } + s->mode = DRY; + case DRY: + FLUSH + if (s->read != s->write) + LEAVE + s->mode = DONEB; + case DONEB: + r = Z_STREAM_END; + LEAVE + case BADB: + r = Z_DATA_ERROR; + LEAVE + default: + r = Z_STREAM_ERROR; + LEAVE + } +} + + +local int inflate_blocks_free(s, z, c) +inflate_blocks_statef *s; +z_stream *z; +uLongf *c; +{ + inflate_blocks_reset(s, z, c); + ZFREE(z, s->window, s->end - s->window); + ZFREE(z, s, sizeof(struct inflate_blocks_state)); + Trace((stderr, "inflate: blocks freed\n")); + return Z_OK; +} + +/* + * This subroutine adds the data at next_in/avail_in to the output history + * without performing any output. The output buffer must be "caught up"; + * i.e. no pending output (hence s->read equals s->write), and the state must + * be BLOCKS (i.e. we should be willing to see the start of a series of + * BLOCKS). On exit, the output will also be caught up, and the checksum + * will have been updated if need be. + */ +local int inflate_addhistory(s, z) +inflate_blocks_statef *s; +z_stream *z; +{ + uLong b; /* bit buffer */ /* NOT USED HERE */ + uInt k; /* bits in bit buffer */ /* NOT USED HERE */ + uInt t; /* temporary storage */ + Bytef *p; /* input data pointer */ + uInt n; /* bytes available there */ + Bytef *q; /* output window write pointer */ + uInt m; /* bytes to end of window or read pointer */ + + if (s->read != s->write) + return Z_STREAM_ERROR; + if (s->mode != TYPE) + return Z_DATA_ERROR; + + /* we're ready to rock */ + LOAD + /* while there is input ready, copy to output buffer, moving + * pointers as needed. + */ + while (n) { + t = n; /* how many to do */ + /* is there room until end of buffer? */ + if (t > m) t = m; + /* update check information */ + if (s->checkfn != Z_NULL) + s->check = (*s->checkfn)(s->check, q, t); + zmemcpy(q, p, t); + q += t; + p += t; + n -= t; + z->total_out += t; + s->read = q; /* drag read pointer forward */ +/* WRAP */ /* expand WRAP macro by hand to handle s->read */ + if (q == s->end) { + s->read = q = s->window; + m = WAVAIL; + } + } + UPDATE + return Z_OK; +} + + +/* + * At the end of a Deflate-compressed PPP packet, we expect to have seen + * a `stored' block type value but not the (zero) length bytes. + */ +local int inflate_packet_flush(s) + inflate_blocks_statef *s; +{ + if (s->mode != LENS) + return Z_DATA_ERROR; + s->mode = TYPE; + return Z_OK; +} + + +/*+++++*/ +/* inftrees.c -- generate Huffman trees for efficient decoding + * Copyright (C) 1995 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* simplify the use of the inflate_huft type with some defines */ +#define base more.Base +#define next more.Next +#define exop word.what.Exop +#define bits word.what.Bits + + +local int huft_build OF(( + uIntf *, /* code lengths in bits */ + uInt, /* number of codes */ + uInt, /* number of "simple" codes */ + uIntf *, /* list of base values for non-simple codes */ + uIntf *, /* list of extra bits for non-simple codes */ + inflate_huft * FAR*,/* result: starting table */ + uIntf *, /* maximum lookup bits (returns actual) */ + z_stream *)); /* for zalloc function */ + +local voidpf falloc OF(( + voidpf, /* opaque pointer (not used) */ + uInt, /* number of items */ + uInt)); /* size of item */ + +local void ffree OF(( + voidpf q, /* opaque pointer (not used) */ + voidpf p, /* what to free (not used) */ + uInt n)); /* number of bytes (not used) */ + +/* Tables for deflate from PKZIP's appnote.txt. */ +local uInt cplens[] = { /* Copy lengths for literal codes 257..285 */ + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, + 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; + /* actually lengths - 2; also see note #13 above about 258 */ +local uInt cplext[] = { /* Extra bits for literal codes 257..285 */ + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, + 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 192, 192}; /* 192==invalid */ +local uInt cpdist[] = { /* Copy offsets for distance codes 0..29 */ + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, + 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, + 8193, 12289, 16385, 24577}; +local uInt cpdext[] = { /* Extra bits for distance codes */ + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, + 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, + 12, 12, 13, 13}; + +/* + Huffman code decoding is performed using a multi-level table lookup. + The fastest way to decode is to simply build a lookup table whose + size is determined by the longest code. However, the time it takes + to build this table can also be a factor if the data being decoded + is not very long. The most common codes are necessarily the + shortest codes, so those codes dominate the decoding time, and hence + the speed. The idea is you can have a shorter table that decodes the + shorter, more probable codes, and then point to subsidiary tables for + the longer codes. The time it costs to decode the longer codes is + then traded against the time it takes to make longer tables. + + This results of this trade are in the variables lbits and dbits + below. lbits is the number of bits the first level table for literal/ + length codes can decode in one step, and dbits is the same thing for + the distance codes. Subsequent tables are also less than or equal to + those sizes. These values may be adjusted either when all of the + codes are shorter than that, in which case the longest code length in + bits is used, or when the shortest code is *longer* than the requested + table size, in which case the length of the shortest code in bits is + used. + + There are two different values for the two tables, since they code a + different number of possibilities each. The literal/length table + codes 286 possible values, or in a flat code, a little over eight + bits. The distance table codes 30 possible values, or a little less + than five bits, flat. The optimum values for speed end up being + about one bit more than those, so lbits is 8+1 and dbits is 5+1. + The optimum values may differ though from machine to machine, and + possibly even between compilers. Your mileage may vary. + */ + + +/* If BMAX needs to be larger than 16, then h and x[] should be uLong. */ +#define BMAX 15 /* maximum bit length of any code */ +#define N_MAX 288 /* maximum number of codes in any set */ + +#ifdef DEBUG_ZLIB + uInt inflate_hufts; +#endif + +local int huft_build(b, n, s, d, e, t, m, zs) +uIntf *b; /* code lengths in bits (all assumed <= BMAX) */ +uInt n; /* number of codes (assumed <= N_MAX) */ +uInt s; /* number of simple-valued codes (0..s-1) */ +uIntf *d; /* list of base values for non-simple codes */ +uIntf *e; /* list of extra bits for non-simple codes */ +inflate_huft * FAR *t; /* result: starting table */ +uIntf *m; /* maximum lookup bits, returns actual */ +z_stream *zs; /* for zalloc function */ +/* Given a list of code lengths and a maximum table size, make a set of + tables to decode that set of codes. Return Z_OK on success, Z_BUF_ERROR + if the given code set is incomplete (the tables are still built in this + case), Z_DATA_ERROR if the input is invalid (all zero length codes or an + over-subscribed set of lengths), or Z_MEM_ERROR if not enough memory. */ +{ + + uInt a; /* counter for codes of length k */ + uInt c[BMAX+1]; /* bit length count table */ + uInt f; /* i repeats in table every f entries */ + int g; /* maximum code length */ + int h; /* table level */ + register uInt i; /* counter, current code */ + register uInt j; /* counter */ + register int k; /* number of bits in current code */ + int l; /* bits per table (returned in m) */ + register uIntf *p; /* pointer into c[], b[], or v[] */ + inflate_huft *q; /* points to current table */ + struct inflate_huft_s r; /* table entry for structure assignment */ + inflate_huft *u[BMAX]; /* table stack */ + uInt v[N_MAX]; /* values in order of bit length */ + register int w; /* bits before this table == (l * h) */ + uInt x[BMAX+1]; /* bit offsets, then code stack */ + uIntf *xp; /* pointer into x */ + int y; /* number of dummy codes added */ + uInt z; /* number of entries in current table */ + + + /* Generate counts for each bit length */ + p = c; +#define C0 *p++ = 0; +#define C2 C0 C0 C0 C0 +#define C4 C2 C2 C2 C2 + C4 /* clear c[]--assume BMAX+1 is 16 */ + p = b; i = n; + do { + c[*p++]++; /* assume all entries <= BMAX */ + } while (--i); + if (c[0] == n) /* null input--all zero length codes */ + { + *t = (inflate_huft *)Z_NULL; + *m = 0; + return Z_OK; + } + + + /* Find minimum and maximum length, bound *m by those */ + l = *m; + for (j = 1; j <= BMAX; j++) + if (c[j]) + break; + k = j; /* minimum code length */ + if ((uInt)l < j) + l = j; + for (i = BMAX; i; i--) + if (c[i]) + break; + g = i; /* maximum code length */ + if ((uInt)l > i) + l = i; + *m = l; + + + /* Adjust last length count to fill out codes, if needed */ + for (y = 1 << j; j < i; j++, y <<= 1) + if ((y -= c[j]) < 0) + return Z_DATA_ERROR; + if ((y -= c[i]) < 0) + return Z_DATA_ERROR; + c[i] += y; + + + /* Generate starting offsets into the value table for each length */ + x[1] = j = 0; + p = c + 1; xp = x + 2; + while (--i) { /* note that i == g from above */ + *xp++ = (j += *p++); + } + + + /* Make a table of values in order of bit lengths */ + p = b; i = 0; + do { + if ((j = *p++) != 0) + v[x[j]++] = i; + } while (++i < n); + + + /* Generate the Huffman codes and for each, make the table entries */ + x[0] = i = 0; /* first Huffman code is zero */ + p = v; /* grab values in bit order */ + h = -1; /* no tables yet--level -1 */ + w = -l; /* bits decoded == (l * h) */ + u[0] = (inflate_huft *)Z_NULL; /* just to keep compilers happy */ + q = (inflate_huft *)Z_NULL; /* ditto */ + z = 0; /* ditto */ + + /* go through the bit lengths (k already is bits in shortest code) */ + for (; k <= g; k++) + { + a = c[k]; + while (a--) + { + /* here i is the Huffman code of length k bits for value *p */ + /* make tables up to required level */ + while (k > w + l) + { + h++; + w += l; /* previous table always l bits */ + + /* compute minimum size table less than or equal to l bits */ + z = (z = g - w) > (uInt)l ? l : z; /* table size upper limit */ + if ((f = 1 << (j = k - w)) > a + 1) /* try a k-w bit table */ + { /* too few codes for k-w bit table */ + f -= a + 1; /* deduct codes from patterns left */ + xp = c + k; + if (j < z) + while (++j < z) /* try smaller tables up to z bits */ + { + if ((f <<= 1) <= *++xp) + break; /* enough codes to use up j bits */ + f -= *xp; /* else deduct codes from patterns */ + } + } + z = 1 << j; /* table entries for j-bit table */ + + /* allocate and link in new table */ + if ((q = (inflate_huft *)ZALLOC + (zs,z + 1,sizeof(inflate_huft))) == Z_NULL) + { + if (h) + inflate_trees_free(u[0], zs); + return Z_MEM_ERROR; /* not enough memory */ + } + q->word.Nalloc = z + 1; +#ifdef DEBUG_ZLIB + inflate_hufts += z + 1; +#endif + *t = q + 1; /* link to list for huft_free() */ + *(t = &(q->next)) = Z_NULL; + u[h] = ++q; /* table starts after link */ + + /* connect to last table, if there is one */ + if (h) + { + x[h] = i; /* save pattern for backing up */ + r.bits = (Byte)l; /* bits to dump before this table */ + r.exop = (Byte)j; /* bits in this table */ + r.next = q; /* pointer to this table */ + j = i >> (w - l); /* (get around Turbo C bug) */ + u[h-1][j] = r; /* connect to last table */ + } + } + + /* set up table entry in r */ + r.bits = (Byte)(k - w); + if (p >= v + n) + r.exop = 128 + 64; /* out of values--invalid code */ + else if (*p < s) + { + r.exop = (Byte)(*p < 256 ? 0 : 32 + 64); /* 256 is end-of-block */ + r.base = *p++; /* simple code is just the value */ + } + else + { + r.exop = (Byte)e[*p - s] + 16 + 64; /* non-simple--look up in lists */ + r.base = d[*p++ - s]; + } + + /* fill code-like entries with r */ + f = 1 << (k - w); + for (j = i >> w; j < z; j += f) + q[j] = r; + + /* backwards increment the k-bit code i */ + for (j = 1 << (k - 1); i & j; j >>= 1) + i ^= j; + i ^= j; + + /* backup over finished tables */ + while ((i & ((1 << w) - 1)) != x[h]) + { + h--; /* don't need to update q */ + w -= l; + } + } + } + + + /* Return Z_BUF_ERROR if we were given an incomplete table */ + return y != 0 && g != 1 ? Z_BUF_ERROR : Z_OK; +} + + +local int inflate_trees_bits(c, bb, tb, z) +uIntf *c; /* 19 code lengths */ +uIntf *bb; /* bits tree desired/actual depth */ +inflate_huft * FAR *tb; /* bits tree result */ +z_stream *z; /* for zfree function */ +{ + int r; + + r = huft_build(c, 19, 19, (uIntf*)Z_NULL, (uIntf*)Z_NULL, tb, bb, z); + if (r == Z_DATA_ERROR) + z->msg = "oversubscribed dynamic bit lengths tree"; + else if (r == Z_BUF_ERROR) + { + inflate_trees_free(*tb, z); + z->msg = "incomplete dynamic bit lengths tree"; + r = Z_DATA_ERROR; + } + return r; +} + + +local int inflate_trees_dynamic(nl, nd, c, bl, bd, tl, td, z) +uInt nl; /* number of literal/length codes */ +uInt nd; /* number of distance codes */ +uIntf *c; /* that many (total) code lengths */ +uIntf *bl; /* literal desired/actual bit depth */ +uIntf *bd; /* distance desired/actual bit depth */ +inflate_huft * FAR *tl; /* literal/length tree result */ +inflate_huft * FAR *td; /* distance tree result */ +z_stream *z; /* for zfree function */ +{ + int r; + + /* build literal/length tree */ + if ((r = huft_build(c, nl, 257, cplens, cplext, tl, bl, z)) != Z_OK) + { + if (r == Z_DATA_ERROR) + z->msg = "oversubscribed literal/length tree"; + else if (r == Z_BUF_ERROR) + { + inflate_trees_free(*tl, z); + z->msg = "incomplete literal/length tree"; + r = Z_DATA_ERROR; + } + return r; + } + + /* build distance tree */ + if ((r = huft_build(c + nl, nd, 0, cpdist, cpdext, td, bd, z)) != Z_OK) + { + if (r == Z_DATA_ERROR) + z->msg = "oversubscribed literal/length tree"; + else if (r == Z_BUF_ERROR) { +#ifdef PKZIP_BUG_WORKAROUND + r = Z_OK; + } +#else + inflate_trees_free(*td, z); + z->msg = "incomplete literal/length tree"; + r = Z_DATA_ERROR; + } + inflate_trees_free(*tl, z); + return r; +#endif + } + + /* done */ + return Z_OK; +} + + +/* build fixed tables only once--keep them here */ +local int fixed_lock = 0; +local int fixed_built = 0; +#define FIXEDH 530 /* number of hufts used by fixed tables */ +local uInt fixed_left = FIXEDH; +local inflate_huft fixed_mem[FIXEDH]; +local uInt fixed_bl; +local uInt fixed_bd; +local inflate_huft *fixed_tl; +local inflate_huft *fixed_td; + + +local voidpf falloc(q, n, s) +voidpf q; /* opaque pointer (not used) */ +uInt n; /* number of items */ +uInt s; /* size of item */ +{ + Assert(s == sizeof(inflate_huft) && n <= fixed_left, + "inflate_trees falloc overflow"); + if (q) s++; /* to make some compilers happy */ + fixed_left -= n; + return (voidpf)(fixed_mem + fixed_left); +} + + +local void ffree(q, p, n) +voidpf q; +voidpf p; +uInt n; +{ + Assert(0, "inflate_trees ffree called!"); + if (q) q = p; /* to make some compilers happy */ +} + + +local int inflate_trees_fixed(bl, bd, tl, td) +uIntf *bl; /* literal desired/actual bit depth */ +uIntf *bd; /* distance desired/actual bit depth */ +inflate_huft * FAR *tl; /* literal/length tree result */ +inflate_huft * FAR *td; /* distance tree result */ +{ + /* build fixed tables if not built already--lock out other instances */ + while (++fixed_lock > 1) + fixed_lock--; + if (!fixed_built) + { + int k; /* temporary variable */ + unsigned c[288]; /* length list for huft_build */ + z_stream z; /* for falloc function */ + + /* set up fake z_stream for memory routines */ + z.zalloc = falloc; + z.zfree = ffree; + z.opaque = Z_NULL; + + /* literal table */ + for (k = 0; k < 144; k++) + c[k] = 8; + for (; k < 256; k++) + c[k] = 9; + for (; k < 280; k++) + c[k] = 7; + for (; k < 288; k++) + c[k] = 8; + fixed_bl = 7; + huft_build(c, 288, 257, cplens, cplext, &fixed_tl, &fixed_bl, &z); + + /* distance table */ + for (k = 0; k < 30; k++) + c[k] = 5; + fixed_bd = 5; + huft_build(c, 30, 0, cpdist, cpdext, &fixed_td, &fixed_bd, &z); + + /* done */ + fixed_built = 1; + } + fixed_lock--; + *bl = fixed_bl; + *bd = fixed_bd; + *tl = fixed_tl; + *td = fixed_td; + return Z_OK; +} + + +local int inflate_trees_free(t, z) +inflate_huft *t; /* table to free */ +z_stream *z; /* for zfree function */ +/* Free the malloc'ed tables built by huft_build(), which makes a linked + list of the tables it made, with the links in a dummy first entry of + each table. */ +{ + register inflate_huft *p, *q; + + /* Go through linked list, freeing from the malloced (t[-1]) address. */ + p = t; + while (p != Z_NULL) + { + q = (--p)->next; + ZFREE(z, p, p->word.Nalloc * sizeof(inflate_huft)); + p = q; + } + return Z_OK; +} + +/*+++++*/ +/* infcodes.c -- process literals and length/distance pairs + * Copyright (C) 1995 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* simplify the use of the inflate_huft type with some defines */ +#define base more.Base +#define next more.Next +#define exop word.what.Exop +#define bits word.what.Bits + +/* inflate codes private state */ +struct inflate_codes_state { + + /* mode */ + enum { /* waiting for "i:"=input, "o:"=output, "x:"=nothing */ + START, /* x: set up for LEN */ + LEN, /* i: get length/literal/eob next */ + LENEXT, /* i: getting length extra (have base) */ + DIST, /* i: get distance next */ + DISTEXT, /* i: getting distance extra */ + COPY, /* o: copying bytes in window, waiting for space */ + LIT, /* o: got literal, waiting for output space */ + WASH, /* o: got eob, possibly still output waiting */ + END, /* x: got eob and all data flushed */ + BADCODE} /* x: got error */ + mode; /* current inflate_codes mode */ + + /* mode dependent information */ + uInt len; + union { + struct { + inflate_huft *tree; /* pointer into tree */ + uInt need; /* bits needed */ + } code; /* if LEN or DIST, where in tree */ + uInt lit; /* if LIT, literal */ + struct { + uInt get; /* bits to get for extra */ + uInt dist; /* distance back to copy from */ + } copy; /* if EXT or COPY, where and how much */ + } sub; /* submode */ + + /* mode independent information */ + Byte lbits; /* ltree bits decoded per branch */ + Byte dbits; /* dtree bits decoder per branch */ + inflate_huft *ltree; /* literal/length/eob tree */ + inflate_huft *dtree; /* distance tree */ + +}; + + +local inflate_codes_statef *inflate_codes_new(bl, bd, tl, td, z) +uInt bl, bd; +inflate_huft *tl, *td; +z_stream *z; +{ + inflate_codes_statef *c; + + if ((c = (inflate_codes_statef *) + ZALLOC(z,1,sizeof(struct inflate_codes_state))) != Z_NULL) + { + c->mode = START; + c->lbits = (Byte)bl; + c->dbits = (Byte)bd; + c->ltree = tl; + c->dtree = td; + Tracev((stderr, "inflate: codes new\n")); + } + return c; +} + + +local int inflate_codes(s, z, r) +inflate_blocks_statef *s; +z_stream *z; +int r; +{ + uInt j; /* temporary storage */ + inflate_huft *t; /* temporary pointer */ + uInt e; /* extra bits or operation */ + uLong b; /* bit buffer */ + uInt k; /* bits in bit buffer */ + Bytef *p; /* input data pointer */ + uInt n; /* bytes available there */ + Bytef *q; /* output window write pointer */ + uInt m; /* bytes to end of window or read pointer */ + Bytef *f; /* pointer to copy strings from */ + inflate_codes_statef *c = s->sub.decode.codes; /* codes state */ + + /* copy input/output information to locals (UPDATE macro restores) */ + LOAD + + /* process input and output based on current state */ + while (1) switch (c->mode) + { /* waiting for "i:"=input, "o:"=output, "x:"=nothing */ + case START: /* x: set up for LEN */ +#ifndef SLOW + if (m >= 258 && n >= 10) + { + UPDATE + r = inflate_fast(c->lbits, c->dbits, c->ltree, c->dtree, s, z); + LOAD + if (r != Z_OK) + { + c->mode = r == Z_STREAM_END ? WASH : BADCODE; + break; + } + } +#endif /* !SLOW */ + c->sub.code.need = c->lbits; + c->sub.code.tree = c->ltree; + c->mode = LEN; + case LEN: /* i: get length/literal/eob next */ + j = c->sub.code.need; + NEEDBITS(j) + t = c->sub.code.tree + ((uInt)b & inflate_mask[j]); + DUMPBITS(t->bits) + e = (uInt)(t->exop); + if (e == 0) /* literal */ + { + c->sub.lit = t->base; + Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ? + "inflate: literal '%c'\n" : + "inflate: literal 0x%02x\n", t->base)); + c->mode = LIT; + break; + } + if (e & 16) /* length */ + { + c->sub.copy.get = e & 15; + c->len = t->base; + c->mode = LENEXT; + break; + } + if ((e & 64) == 0) /* next table */ + { + c->sub.code.need = e; + c->sub.code.tree = t->next; + break; + } + if (e & 32) /* end of block */ + { + Tracevv((stderr, "inflate: end of block\n")); + c->mode = WASH; + break; + } + c->mode = BADCODE; /* invalid code */ + z->msg = "invalid literal/length code"; + r = Z_DATA_ERROR; + LEAVE + case LENEXT: /* i: getting length extra (have base) */ + j = c->sub.copy.get; + NEEDBITS(j) + c->len += (uInt)b & inflate_mask[j]; + DUMPBITS(j) + c->sub.code.need = c->dbits; + c->sub.code.tree = c->dtree; + Tracevv((stderr, "inflate: length %u\n", c->len)); + c->mode = DIST; + case DIST: /* i: get distance next */ + j = c->sub.code.need; + NEEDBITS(j) + t = c->sub.code.tree + ((uInt)b & inflate_mask[j]); + DUMPBITS(t->bits) + e = (uInt)(t->exop); + if (e & 16) /* distance */ + { + c->sub.copy.get = e & 15; + c->sub.copy.dist = t->base; + c->mode = DISTEXT; + break; + } + if ((e & 64) == 0) /* next table */ + { + c->sub.code.need = e; + c->sub.code.tree = t->next; + break; + } + c->mode = BADCODE; /* invalid code */ + z->msg = "invalid distance code"; + r = Z_DATA_ERROR; + LEAVE + case DISTEXT: /* i: getting distance extra */ + j = c->sub.copy.get; + NEEDBITS(j) + c->sub.copy.dist += (uInt)b & inflate_mask[j]; + DUMPBITS(j) + Tracevv((stderr, "inflate: distance %u\n", c->sub.copy.dist)); + c->mode = COPY; + case COPY: /* o: copying bytes in window, waiting for space */ +#ifndef __TURBOC__ /* Turbo C bug for following expression */ + f = (uInt)(q - s->window) < c->sub.copy.dist ? + s->end - (c->sub.copy.dist - (q - s->window)) : + q - c->sub.copy.dist; +#else + f = q - c->sub.copy.dist; + if ((uInt)(q - s->window) < c->sub.copy.dist) + f = s->end - (c->sub.copy.dist - (q - s->window)); +#endif + while (c->len) + { + NEEDOUT + OUTBYTE(*f++) + if (f == s->end) + f = s->window; + c->len--; + } + c->mode = START; + break; + case LIT: /* o: got literal, waiting for output space */ + NEEDOUT + OUTBYTE(c->sub.lit) + c->mode = START; + break; + case WASH: /* o: got eob, possibly more output */ + FLUSH + if (s->read != s->write) + LEAVE + c->mode = END; + case END: + r = Z_STREAM_END; + LEAVE + case BADCODE: /* x: got error */ + r = Z_DATA_ERROR; + LEAVE + default: + r = Z_STREAM_ERROR; + LEAVE + } +} + + +local void inflate_codes_free(c, z) +inflate_codes_statef *c; +z_stream *z; +{ + ZFREE(z, c, sizeof(struct inflate_codes_state)); + Tracev((stderr, "inflate: codes free\n")); +} + +/*+++++*/ +/* inflate_util.c -- data and routines common to blocks and codes + * Copyright (C) 1995 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* copy as much as possible from the sliding window to the output area */ +local int inflate_flush(s, z, r) +inflate_blocks_statef *s; +z_stream *z; +int r; +{ + uInt n; + Bytef *p, *q; + + /* local copies of source and destination pointers */ + p = z->next_out; + q = s->read; + + /* compute number of bytes to copy as far as end of window */ + n = (uInt)((q <= s->write ? s->write : s->end) - q); + if (n > z->avail_out) n = z->avail_out; + if (n && r == Z_BUF_ERROR) r = Z_OK; + + /* update counters */ + z->avail_out -= n; + z->total_out += n; + + /* update check information */ + if (s->checkfn != Z_NULL) + s->check = (*s->checkfn)(s->check, q, n); + + /* copy as far as end of window */ + zmemcpy(p, q, n); + p += n; + q += n; + + /* see if more to copy at beginning of window */ + if (q == s->end) + { + /* wrap pointers */ + q = s->window; + if (s->write == s->end) + s->write = s->window; + + /* compute bytes to copy */ + n = (uInt)(s->write - q); + if (n > z->avail_out) n = z->avail_out; + if (n && r == Z_BUF_ERROR) r = Z_OK; + + /* update counters */ + z->avail_out -= n; + z->total_out += n; + + /* update check information */ + if (s->checkfn != Z_NULL) + s->check = (*s->checkfn)(s->check, q, n); + + /* copy */ + zmemcpy(p, q, n); + p += n; + q += n; + } + + /* update pointers */ + z->next_out = p; + s->read = q; + + /* done */ + return r; +} + + +/*+++++*/ +/* inffast.c -- process literals and length/distance pairs fast + * Copyright (C) 1995 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* simplify the use of the inflate_huft type with some defines */ +#define base more.Base +#define next more.Next +#define exop word.what.Exop +#define bits word.what.Bits + +/* macros for bit input with no checking and for returning unused bytes */ +#define GRABBITS(j) {while(k<(j)){b|=((uLong)NEXTBYTE)<>3);p-=c;k&=7;} + +/* Called with number of bytes left to write in window at least 258 + (the maximum string length) and number of input bytes available + at least ten. The ten bytes are six bytes for the longest length/ + distance pair plus four bytes for overloading the bit buffer. */ + +local int inflate_fast(bl, bd, tl, td, s, z) +uInt bl, bd; +inflate_huft *tl, *td; +inflate_blocks_statef *s; +z_stream *z; +{ + inflate_huft *t; /* temporary pointer */ + uInt e; /* extra bits or operation */ + uLong b; /* bit buffer */ + uInt k; /* bits in bit buffer */ + Bytef *p; /* input data pointer */ + uInt n; /* bytes available there */ + Bytef *q; /* output window write pointer */ + uInt m; /* bytes to end of window or read pointer */ + uInt ml; /* mask for literal/length tree */ + uInt md; /* mask for distance tree */ + uInt c; /* bytes to copy */ + uInt d; /* distance back to copy from */ + Bytef *r; /* copy source pointer */ + + /* load input, output, bit values */ + LOAD + + /* initialize masks */ + ml = inflate_mask[bl]; + md = inflate_mask[bd]; + + /* do until not enough input or output space for fast loop */ + do { /* assume called with m >= 258 && n >= 10 */ + /* get literal/length code */ + GRABBITS(20) /* max bits for literal/length code */ + if ((e = (t = tl + ((uInt)b & ml))->exop) == 0) + { + DUMPBITS(t->bits) + Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ? + "inflate: * literal '%c'\n" : + "inflate: * literal 0x%02x\n", t->base)); + *q++ = (Byte)t->base; + m--; + continue; + } + do { + DUMPBITS(t->bits) + if (e & 16) + { + /* get extra bits for length */ + e &= 15; + c = t->base + ((uInt)b & inflate_mask[e]); + DUMPBITS(e) + Tracevv((stderr, "inflate: * length %u\n", c)); + + /* decode distance base of block to copy */ + GRABBITS(15); /* max bits for distance code */ + e = (t = td + ((uInt)b & md))->exop; + do { + DUMPBITS(t->bits) + if (e & 16) + { + /* get extra bits to add to distance base */ + e &= 15; + GRABBITS(e) /* get extra bits (up to 13) */ + d = t->base + ((uInt)b & inflate_mask[e]); + DUMPBITS(e) + Tracevv((stderr, "inflate: * distance %u\n", d)); + + /* do the copy */ + m -= c; + if ((uInt)(q - s->window) >= d) /* offset before dest */ + { /* just copy */ + r = q - d; + *q++ = *r++; c--; /* minimum count is three, */ + *q++ = *r++; c--; /* so unroll loop a little */ + } + else /* else offset after destination */ + { + e = d - (q - s->window); /* bytes from offset to end */ + r = s->end - e; /* pointer to offset */ + if (c > e) /* if source crosses, */ + { + c -= e; /* copy to end of window */ + do { + *q++ = *r++; + } while (--e); + r = s->window; /* copy rest from start of window */ + } + } + do { /* copy all or what's left */ + *q++ = *r++; + } while (--c); + break; + } + else if ((e & 64) == 0) + e = (t = t->next + ((uInt)b & inflate_mask[e]))->exop; + else + { + z->msg = "invalid distance code"; + UNGRAB + UPDATE + return Z_DATA_ERROR; + } + } while (1); + break; + } + if ((e & 64) == 0) + { + if ((e = (t = t->next + ((uInt)b & inflate_mask[e]))->exop) == 0) + { + DUMPBITS(t->bits) + Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ? + "inflate: * literal '%c'\n" : + "inflate: * literal 0x%02x\n", t->base)); + *q++ = (Byte)t->base; + m--; + break; + } + } + else if (e & 32) + { + Tracevv((stderr, "inflate: * end of block\n")); + UNGRAB + UPDATE + return Z_STREAM_END; + } + else + { + z->msg = "invalid literal/length code"; + UNGRAB + UPDATE + return Z_DATA_ERROR; + } + } while (1); + } while (m >= 258 && n >= 10); + + /* not enough input or output--restore pointers and return */ + UNGRAB + UPDATE + return Z_OK; +} + + +/*+++++*/ +/* zutil.c -- target dependent utility functions for the compression library + * Copyright (C) 1995 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* From: zutil.c,v 1.8 1995/05/03 17:27:12 jloup Exp */ + +char *zlib_version = ZLIB_VERSION; + +char *z_errmsg[] = { +"stream end", /* Z_STREAM_END 1 */ +"", /* Z_OK 0 */ +"file error", /* Z_ERRNO (-1) */ +"stream error", /* Z_STREAM_ERROR (-2) */ +"data error", /* Z_DATA_ERROR (-3) */ +"insufficient memory", /* Z_MEM_ERROR (-4) */ +"buffer error", /* Z_BUF_ERROR (-5) */ +""}; + + +/*+++++*/ +/* adler32.c -- compute the Adler-32 checksum of a data stream + * Copyright (C) 1995 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* From: adler32.c,v 1.6 1995/05/03 17:27:08 jloup Exp */ + +#define BASE 65521L /* largest prime smaller than 65536 */ +#define NMAX 5552 +/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ + +#define DO1(buf) {s1 += *buf++; s2 += s1;} +#define DO2(buf) DO1(buf); DO1(buf); +#define DO4(buf) DO2(buf); DO2(buf); +#define DO8(buf) DO4(buf); DO4(buf); +#define DO16(buf) DO8(buf); DO8(buf); + +/* ========================================================================= */ +uLong adler32(adler, buf, len) + uLong adler; + Bytef *buf; + uInt len; +{ + unsigned long s1 = adler & 0xffff; + unsigned long s2 = (adler >> 16) & 0xffff; + int k; + + if (buf == Z_NULL) return 1L; + + while (len > 0) { + k = len < NMAX ? len : NMAX; + len -= k; + while (k >= 16) { + DO16(buf); + k -= 16; + } + if (k != 0) do { + DO1(buf); + } while (--k); + s1 %= BASE; + s2 %= BASE; + } + return (s2 << 16) | s1; +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/boot/zlib.h linuxppc64_2_4/arch/ppc64/boot/zlib.h --- ../kernel.org/linux/arch/ppc64/boot/zlib.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/boot/zlib.h Mon Jun 18 13:47:13 2001 @@ -0,0 +1,432 @@ +/* */ + +/* + * This file is derived from zlib.h and zconf.h from the zlib-0.95 + * distribution by Jean-loup Gailly and Mark Adler, with some additions + * by Paul Mackerras to aid in implementing Deflate compression and + * decompression for PPP packets. + */ + +/* + * ==FILEVERSION 960122== + * + * This marker is used by the Linux installation script to determine + * whether an up-to-date version of this file is already installed. + */ + +/* zlib.h -- interface of the 'zlib' general purpose compression library + version 0.95, Aug 16th, 1995. + + Copyright (C) 1995 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + gzip@prep.ai.mit.edu madler@alumni.caltech.edu + */ + +#ifndef _ZLIB_H +#define _ZLIB_H + +/* #include "zconf.h" */ /* included directly here */ + +/* zconf.h -- configuration of the zlib compression library + * Copyright (C) 1995 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* From: zconf.h,v 1.12 1995/05/03 17:27:12 jloup Exp */ + +/* + The library does not install any signal handler. It is recommended to + add at least a handler for SIGSEGV when decompressing; the library checks + the consistency of the input data whenever possible but may go nuts + for some forms of corrupted input. + */ + +/* + * Compile with -DMAXSEG_64K if the alloc function cannot allocate more + * than 64k bytes at a time (needed on systems with 16-bit int). + * Compile with -DUNALIGNED_OK if it is OK to access shorts or ints + * at addresses which are not a multiple of their size. + * Under DOS, -DFAR=far or -DFAR=__far may be needed. + */ + +#ifndef STDC +# if defined(MSDOS) || defined(__STDC__) || defined(__cplusplus) +# define STDC +# endif +#endif + +#ifdef __MWERKS__ /* Metrowerks CodeWarrior declares fileno() in unix.h */ +# include +#endif + +/* Maximum value for memLevel in deflateInit2 */ +#ifndef MAX_MEM_LEVEL +# ifdef MAXSEG_64K +# define MAX_MEM_LEVEL 8 +# else +# define MAX_MEM_LEVEL 9 +# endif +#endif + +#ifndef FAR +# define FAR +#endif + +/* Maximum value for windowBits in deflateInit2 and inflateInit2 */ +#ifndef MAX_WBITS +# define MAX_WBITS 15 /* 32K LZ77 window */ +#endif + +/* The memory requirements for deflate are (in bytes): + 1 << (windowBits+2) + 1 << (memLevel+9) + that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values) + plus a few kilobytes for small objects. For example, if you want to reduce + the default memory requirements from 256K to 128K, compile with + make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7" + Of course this will generally degrade compression (there's no free lunch). + + The memory requirements for inflate are (in bytes) 1 << windowBits + that is, 32K for windowBits=15 (default value) plus a few kilobytes + for small objects. +*/ + + /* Type declarations */ + +#ifndef OF /* function prototypes */ +# ifdef STDC +# define OF(args) args +# else +# define OF(args) () +# endif +#endif + +typedef unsigned char Byte; /* 8 bits */ +typedef unsigned int uInt; /* 16 bits or more */ +typedef unsigned long uLong; /* 32 bits or more */ + +typedef Byte FAR Bytef; +typedef char FAR charf; +typedef int FAR intf; +typedef uInt FAR uIntf; +typedef uLong FAR uLongf; + +#ifdef STDC + typedef void FAR *voidpf; + typedef void *voidp; +#else + typedef Byte FAR *voidpf; + typedef Byte *voidp; +#endif + +/* end of original zconf.h */ + +#define ZLIB_VERSION "0.95P" + +/* + The 'zlib' compression library provides in-memory compression and + decompression functions, including integrity checks of the uncompressed + data. This version of the library supports only one compression method + (deflation) but other algorithms may be added later and will have the same + stream interface. + + For compression the application must provide the output buffer and + may optionally provide the input buffer for optimization. For decompression, + the application must provide the input buffer and may optionally provide + the output buffer for optimization. + + Compression can be done in a single step if the buffers are large + enough (for example if an input file is mmap'ed), or can be done by + repeated calls of the compression function. In the latter case, the + application must provide more input and/or consume the output + (providing more output space) before each call. +*/ + +typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size)); +typedef void (*free_func) OF((voidpf opaque, voidpf address, uInt nbytes)); + +struct internal_state; + +typedef struct z_stream_s { + Bytef *next_in; /* next input byte */ + uInt avail_in; /* number of bytes available at next_in */ + uLong total_in; /* total nb of input bytes read so far */ + + Bytef *next_out; /* next output byte should be put there */ + uInt avail_out; /* remaining free space at next_out */ + uLong total_out; /* total nb of bytes output so far */ + + char *msg; /* last error message, NULL if no error */ + struct internal_state FAR *state; /* not visible by applications */ + + alloc_func zalloc; /* used to allocate the internal state */ + free_func zfree; /* used to free the internal state */ + voidp opaque; /* private data object passed to zalloc and zfree */ + + Byte data_type; /* best guess about the data type: ascii or binary */ + +} z_stream; + +/* + The application must update next_in and avail_in when avail_in has + dropped to zero. It must update next_out and avail_out when avail_out + has dropped to zero. The application must initialize zalloc, zfree and + opaque before calling the init function. All other fields are set by the + compression library and must not be updated by the application. + + The opaque value provided by the application will be passed as the first + parameter for calls of zalloc and zfree. This can be useful for custom + memory management. The compression library attaches no meaning to the + opaque value. + + zalloc must return Z_NULL if there is not enough memory for the object. + On 16-bit systems, the functions zalloc and zfree must be able to allocate + exactly 65536 bytes, but will not be required to allocate more than this + if the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS, + pointers returned by zalloc for objects of exactly 65536 bytes *must* + have their offset normalized to zero. The default allocation function + provided by this library ensures this (see zutil.c). To reduce memory + requirements and avoid any allocation of 64K objects, at the expense of + compression ratio, compile the library with -DMAX_WBITS=14 (see zconf.h). + + The fields total_in and total_out can be used for statistics or + progress reports. After compression, total_in holds the total size of + the uncompressed data and may be saved for use in the decompressor + (particularly if the decompressor wants to decompress everything in + a single step). +*/ + + /* constants */ + +#define Z_NO_FLUSH 0 +#define Z_PARTIAL_FLUSH 1 +#define Z_FULL_FLUSH 2 +#define Z_SYNC_FLUSH 3 /* experimental: partial_flush + byte align */ +#define Z_FINISH 4 +#define Z_PACKET_FLUSH 5 +/* See deflate() below for the usage of these constants */ + +#define Z_OK 0 +#define Z_STREAM_END 1 +#define Z_ERRNO (-1) +#define Z_STREAM_ERROR (-2) +#define Z_DATA_ERROR (-3) +#define Z_MEM_ERROR (-4) +#define Z_BUF_ERROR (-5) +/* error codes for the compression/decompression functions */ + +#define Z_BEST_SPEED 1 +#define Z_BEST_COMPRESSION 9 +#define Z_DEFAULT_COMPRESSION (-1) +/* compression levels */ + +#define Z_FILTERED 1 +#define Z_HUFFMAN_ONLY 2 +#define Z_DEFAULT_STRATEGY 0 + +#define Z_BINARY 0 +#define Z_ASCII 1 +#define Z_UNKNOWN 2 +/* Used to set the data_type field */ + +#define Z_NULL 0 /* for initializing zalloc, zfree, opaque */ + +extern char *zlib_version; +/* The application can compare zlib_version and ZLIB_VERSION for consistency. + If the first character differs, the library code actually used is + not compatible with the zlib.h header file used by the application. + */ + + /* basic functions */ + +extern int inflateInit OF((z_stream *strm)); +/* + Initializes the internal stream state for decompression. The fields + zalloc and zfree must be initialized before by the caller. If zalloc and + zfree are set to Z_NULL, inflateInit updates them to use default allocation + functions. + + inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory. msg is set to null if there is no error message. + inflateInit does not perform any decompression: this will be done by + inflate(). +*/ + + +extern int inflate OF((z_stream *strm, int flush)); +/* + Performs one or both of the following actions: + + - Decompress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), next_in is updated and processing + will resume at this point for the next call of inflate(). + + - Provide more output starting at next_out and update next_out and avail_out + accordingly. inflate() always provides as much output as possible + (until there is no more input data or no more space in the output buffer). + + Before the call of inflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming + more output, and updating the next_* and avail_* values accordingly. + The application can consume the uncompressed output when it wants, for + example when the output buffer is full (avail_out == 0), or after each + call of inflate(). + + If the parameter flush is set to Z_PARTIAL_FLUSH or Z_PACKET_FLUSH, + inflate flushes as much output as possible to the output buffer. The + flushing behavior of inflate is not specified for values of the flush + parameter other than Z_PARTIAL_FLUSH, Z_PACKET_FLUSH or Z_FINISH, but the + current implementation actually flushes as much output as possible + anyway. For Z_PACKET_FLUSH, inflate checks that once all the input data + has been consumed, it is expecting to see the length field of a stored + block; if not, it returns Z_DATA_ERROR. + + inflate() should normally be called until it returns Z_STREAM_END or an + error. However if all decompression is to be performed in a single step + (a single call of inflate), the parameter flush should be set to + Z_FINISH. In this case all pending input is processed and all pending + output is flushed; avail_out must be large enough to hold all the + uncompressed data. (The size of the uncompressed data may have been saved + by the compressor for this purpose.) The next operation on this stream must + be inflateEnd to deallocate the decompression state. The use of Z_FINISH + is never required, but can be used to inform inflate that a faster routine + may be used for the single inflate() call. + + inflate() returns Z_OK if some progress has been made (more input + processed or more output produced), Z_STREAM_END if the end of the + compressed data has been reached and all uncompressed output has been + produced, Z_DATA_ERROR if the input data was corrupted, Z_STREAM_ERROR if + the stream structure was inconsistent (for example if next_in or next_out + was NULL), Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR if no + progress is possible or if there was not enough room in the output buffer + when Z_FINISH is used. In the Z_DATA_ERROR case, the application may then + call inflateSync to look for a good compression block. */ + + +extern int inflateEnd OF((z_stream *strm)); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any + pending output. + + inflateEnd returns Z_OK if success, Z_STREAM_ERROR if the stream state + was inconsistent. In the error case, msg may be set but then points to a + static string (which must not be deallocated). +*/ + + /* advanced functions */ + +extern int inflateInit2 OF((z_stream *strm, + int windowBits)); +/* + This is another version of inflateInit with more compression options. The + fields next_out, zalloc and zfree must be initialized before by the caller. + + The windowBits parameter is the base two logarithm of the maximum window + size (the size of the history buffer). It should be in the range 8..15 for + this version of the library (the value 16 will be allowed soon). The + default value is 15 if inflateInit is used instead. If a compressed stream + with a larger window size is given as input, inflate() will return with + the error code Z_DATA_ERROR instead of trying to allocate a larger window. + + If next_out is not null, the library will use this buffer for the history + buffer; the buffer must either be large enough to hold the entire output + data, or have at least 1< +#include +#include +#include +#include +#include +#include +#ifndef _HVCALLSC_H +#include +#endif +#include + +#ifndef _HVTYPES_H +#include +#endif + + +/*===================================================================== + * Note that this call takes at MOST one page worth of data + */ +int HvCall_readLogBuffer(HvLpIndex lpIndex, void *buffer, u64 bufLen) +{ + struct HvLpBufferList *bufList; + u64 bytesLeft = bufLen; + u64 leftThisPage; + u64 curPtr = virt_to_absolute( (unsigned long) buffer ); + u64 retVal; + int npages; + int i; + + npages = 0; + while (bytesLeft) { + npages++; + leftThisPage = ((curPtr & PAGE_MASK) + PAGE_SIZE) - curPtr; + + if (leftThisPage > bytesLeft) + bytesLeft = 0; + else + bytesLeft -= leftThisPage; + + curPtr = (curPtr & PAGE_MASK) + PAGE_SIZE; + } + + if (npages == 0) + return 0; + + bufList = (struct HvLpBufferList *) + kmalloc(npages * sizeof(struct HvLpBufferList), GFP_ATOMIC); + bytesLeft = bufLen; + curPtr = virt_to_absolute( (unsigned long) buffer ); + for(i=0; i bytesLeft) { + bufList[i].len = bytesLeft; + bytesLeft = 0; + } else { + bufList[i].len = leftThisPage; + bytesLeft -= leftThisPage; + } + + curPtr = (curPtr & PAGE_MASK) + PAGE_SIZE; + } + + + retVal = HvCall3(HvCallBaseReadLogBuffer, lpIndex, + virt_to_absolute((unsigned long)bufList), bufLen); + + kfree(bufList); + + return (int)retVal; +} + +/*===================================================================== + */ +void HvCall_writeLogBuffer(const void *buffer, u64 bufLen) +{ + struct HvLpBufferList bufList; + u64 bytesLeft = bufLen; + u64 leftThisPage; + u64 curPtr = virt_to_absolute( (unsigned long) buffer ); + + while (bytesLeft) { + bufList.addr = curPtr; + + leftThisPage = ((curPtr & PAGE_MASK) + PAGE_SIZE) - curPtr; + + if (leftThisPage > bytesLeft) { + bufList.len = bytesLeft; + bytesLeft = 0; + } else { + bufList.len = leftThisPage; + bytesLeft -= leftThisPage; + } + + curPtr = (curPtr & PAGE_MASK) + PAGE_SIZE; + } + + + HvCall2(HvCallBaseWriteLogBuffer, + virt_to_absolute((unsigned long)&bufList), bufLen); + +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/HvLpConfig.c linuxppc64_2_4/arch/ppc64/kernel/HvLpConfig.c --- ../kernel.org/linux/arch/ppc64/kernel/HvLpConfig.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/HvLpConfig.c Sat Oct 20 06:53:25 2001 @@ -0,0 +1,28 @@ +/* + * HvLpConfig.c + * Copyright (C) 2001 Kyle A. Lucke, IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _HVLPCONFIG_H +#include +#endif + +HvLpIndex HvLpConfig_getLpIndex_outline(void) +{ + return HvLpConfig_getLpIndex(); +} + diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/HvLpEvent.c linuxppc64_2_4/arch/ppc64/kernel/HvLpEvent.c --- ../kernel.org/linux/arch/ppc64/kernel/HvLpEvent.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/HvLpEvent.c Fri May 4 17:13:58 2001 @@ -0,0 +1,77 @@ +/* + * Copyright 2001 Mike Corrigan IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include + +/* Array of LpEvent handler functions */ +LpEventHandler lpEventHandler[HvLpEvent_Type_NumTypes]; +unsigned lpEventHandlerPaths[HvLpEvent_Type_NumTypes]; + +/* Register a handler for an LpEvent type */ + +int HvLpEvent_registerHandler( HvLpEvent_Type eventType, LpEventHandler handler ) +{ + int rc = 1; + if ( eventType < HvLpEvent_Type_NumTypes ) { + lpEventHandler[eventType] = handler; + rc = 0; + } + return rc; + +} + +int HvLpEvent_unregisterHandler( HvLpEvent_Type eventType ) +{ + int rc = 1; + if ( eventType < HvLpEvent_Type_NumTypes ) { + if ( !lpEventHandlerPaths[eventType] ) { + lpEventHandler[eventType] = NULL; + rc = 0; + } + } + return rc; +} + +/* (lpIndex is the partition index of the target partition. + * needed only for VirtualIo, VirtualLan and SessionMgr. Zero + * indicates to use our partition index - for the other types) + */ +int HvLpEvent_openPath( HvLpEvent_Type eventType, HvLpIndex lpIndex ) +{ + int rc = 1; + if ( eventType < HvLpEvent_Type_NumTypes && + lpEventHandler[eventType] ) { + if ( lpIndex == 0 ) + lpIndex = itLpNaca.xLpIndex; + HvCallEvent_openLpEventPath( lpIndex, eventType ); + ++lpEventHandlerPaths[eventType]; + rc = 0; + } + return rc; +} + +int HvLpEvent_closePath( HvLpEvent_Type eventType, HvLpIndex lpIndex ) +{ + int rc = 1; + if ( eventType < HvLpEvent_Type_NumTypes && + lpEventHandler[eventType] && + lpEventHandlerPaths[eventType] ) { + if ( lpIndex == 0 ) + lpIndex = itLpNaca.xLpIndex; + HvCallEvent_closeLpEventPath( lpIndex, eventType ); + --lpEventHandlerPaths[eventType]; + rc = 0; + } + return rc; +} + diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/ItLpQueue.c linuxppc64_2_4/arch/ppc64/kernel/ItLpQueue.c --- ../kernel.org/linux/arch/ppc64/kernel/ItLpQueue.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/ItLpQueue.c Tue Sep 18 14:27:22 2001 @@ -0,0 +1,168 @@ +/* + * ItLpQueue.c + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static __inline__ int set_inUse( struct ItLpQueue * lpQueue ) +{ + int t; + u32 * inUseP = &(lpQueue->xInUseWord); + + __asm__ __volatile__("\n\ +1: lwarx %0,0,%2 \n\ + cmpi 0,%0,0 \n\ + li %0,0 \n\ + bne- 2f \n\ + addi %0,%0,1 \n\ + stwcx. %0,0,%2 \n\ + bne- 1b \n\ +2: eieio" + : "=&r" (t), "=m" (lpQueue->xInUseWord) + : "r" (inUseP), "m" (lpQueue->xInUseWord) + : "cc"); + + return t; +} + +static __inline__ void clear_inUse( struct ItLpQueue * lpQueue ) +{ + lpQueue->xInUseWord = 0; +} + +/* Array of LpEvent handler functions */ +extern LpEventHandler lpEventHandler[HvLpEvent_Type_NumTypes]; +unsigned long ItLpQueueInProcess = 0; + +struct HvLpEvent * ItLpQueue_getNextLpEvent( struct ItLpQueue * lpQueue ) +{ + struct HvLpEvent * nextLpEvent = + (struct HvLpEvent *)lpQueue->xSlicCurEventPtr; + if ( nextLpEvent->xFlags.xValid ) { + /* Set pointer to next potential event */ + lpQueue->xSlicCurEventPtr += ((nextLpEvent->xSizeMinus1 + + LpEventAlign ) / + LpEventAlign ) * + LpEventAlign; + /* Wrap to beginning if no room at end */ + if (lpQueue->xSlicCurEventPtr > lpQueue->xSlicLastValidEventPtr) + lpQueue->xSlicCurEventPtr = lpQueue->xSlicEventStackPtr; + } + else + nextLpEvent = NULL; + + return nextLpEvent; +} + +int ItLpQueue_isLpIntPending( struct ItLpQueue * lpQueue ) +{ + int retval = 0; + struct HvLpEvent * nextLpEvent; + if ( lpQueue ) { + nextLpEvent = (struct HvLpEvent *)lpQueue->xSlicCurEventPtr; + retval = nextLpEvent->xFlags.xValid | lpQueue->xPlicOverflowIntPending; + } + return retval; +} + +void ItLpQueue_clearValid( struct HvLpEvent * event ) +{ + /* Clear the valid bit of the event + * Also clear bits within this event that might + * look like valid bits (on 64-byte boundaries) + */ + unsigned extra = (( event->xSizeMinus1 + LpEventAlign ) / + LpEventAlign ) - 1; + switch ( extra ) { + case 3: + ((struct HvLpEvent*)((char*)event+3*LpEventAlign))->xFlags.xValid=0; + case 2: + ((struct HvLpEvent*)((char*)event+2*LpEventAlign))->xFlags.xValid=0; + case 1: + ((struct HvLpEvent*)((char*)event+1*LpEventAlign))->xFlags.xValid=0; + case 0: + ; + } + mb(); + event->xFlags.xValid = 0; +} + +unsigned ItLpQueue_process( struct ItLpQueue * lpQueue, struct pt_regs *regs ) +{ + unsigned numIntsProcessed = 0; + struct HvLpEvent * nextLpEvent; + + /* If we have recursed, just return */ + if ( !set_inUse( lpQueue ) ) + return 0; + + if (ItLpQueueInProcess == 0) + ItLpQueueInProcess = 1; + else + BUG(); + + for (;;) { + nextLpEvent = ItLpQueue_getNextLpEvent( lpQueue ); + if ( nextLpEvent ) { + /* Count events to return to caller + * and count processed events in lpQueue + */ + ++numIntsProcessed; + lpQueue->xLpIntCount++; + /* Call appropriate handler here, passing + * a pointer to the LpEvent. The handler + * must make a copy of the LpEvent if it + * needs it in a bottom half. (perhaps for + * an ACK) + * + * Handlers are responsible for ACK processing + * + * The Hypervisor guarantees that LpEvents will + * only be delivered with types that we have + * registered for, so no type check is necessary + * here! + */ + if ( nextLpEvent->xType < HvLpEvent_Type_NumTypes ) + lpQueue->xLpIntCountByType[nextLpEvent->xType]++; + if ( nextLpEvent->xType < HvLpEvent_Type_NumTypes && + lpEventHandler[nextLpEvent->xType] ) + lpEventHandler[nextLpEvent->xType](nextLpEvent, regs); + else + printk(KERN_INFO "Unexpected Lp Event type=%d\n", nextLpEvent->xType ); + + ItLpQueue_clearValid( nextLpEvent ); + } + else /* No more valid events + * If overflow events are pending + * process them + */ + if ( lpQueue->xPlicOverflowIntPending ) { + HvCallEvent_getOverflowLpEvents( + lpQueue->xIndex); + } + else /* If nothing left then we are done */ + break; + } + + ItLpQueueInProcess = 0; + mb(); + clear_inUse( lpQueue ); + + get_paca()->lpEvent_count += numIntsProcessed; + + return numIntsProcessed; +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/LparData.c linuxppc64_2_4/arch/ppc64/kernel/LparData.c --- ../kernel.org/linux/arch/ppc64/kernel/LparData.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/LparData.c Mon Oct 22 12:56:06 2001 @@ -0,0 +1,253 @@ +/* + * Copyright 2001 Mike Corrigan, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#define __KERNEL__ 1 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern char _start_boltedStacks[]; + +/* The LparMap data is now located at offset 0x6000 in head.S + * It was put there so that the HvReleaseData could address it + * with a 32-bit offset as required by the iSeries hypervisor + * + * The Naca has a pointer to the ItVpdAreas. The hypervisor finds + * the Naca via the HvReleaseData area. The HvReleaseData has the + * offset into the Naca of the pointer to the ItVpdAreas. + */ + +extern struct ItVpdAreas itVpdAreas; + +/* The LpQueue is used to pass event data from the hypervisor to + * the partition. This is where I/O interrupt events are communicated. + * The ItLpQueue must be initialized (even though only to all zeros) + * If it were uninitialized (in .bss) it would get zeroed after the + * kernel gets control. The hypervisor will have filled in some fields + * before the kernel gets control. By initializing it we keep it out + * of the .bss + */ + +struct ItLpQueue xItLpQueue = {}; + + +/* The HvReleaseData is the root of the information shared between + * the hypervisor and Linux. + */ + +struct HvReleaseData hvReleaseData = { + 0xc8a5d9c4, /* desc = "HvRD" ebcdic */ + sizeof(struct HvReleaseData), + offsetof(struct Naca, xItVpdAreas), + (struct Naca *)(KERNELBASE+0x4000), /* 64-bit Naca address */ + 0x6000, /* offset of LparMap within loadarea (see head.S) */ + 0, + 1, /* tags inactive */ + 0, /* 64 bit */ + 0, /* shared processors */ + 0, /* HMT allowed */ + 6, /* TEMP: This allows non-GA driver */ + 4, /* We are v5r2m0 */ + 3, /* Min supported PLIC = v5r1m0 */ + 3, /* Min usuable PLIC = v5r1m0 */ + { 0xd3, 0x89, 0x95, 0xa4, /* "Linux 2.4 "*/ + 0xa7, 0x40, 0xf2, 0x4b, + 0xf4, 0x4b, 0xf6, 0xf4 }, + {0} +}; + +extern void SystemReset_Iseries(void); +extern void MachineCheck_Iseries(void); +extern void DataAccess_Iseries(void); +extern void InstructionAccess_Iseries(void); +extern void HardwareInterrupt_Iseries(void); +extern void Alignment_Iseries(void); +extern void ProgramCheck_Iseries(void); +extern void FPUnavailable_Iseries(void); +extern void Decrementer_Iseries(void); +extern void Trap_0a_Iseries(void); +extern void Trap_0b_Iseries(void); +extern void SystemCall_Iseries(void); +extern void SingleStep_Iseries(void); +extern void Trap_0e_Iseries(void); +extern void PerformanceMonitor_Iseries(void); +extern void DataAccessSLB_Iseries(void); +extern void InstructionAccessSLB_Iseries(void); + +struct ItLpNaca itLpNaca = { + 0xd397d581, /* desc = "LpNa" ebcdic */ + 0x0400, /* size of ItLpNaca */ + 0x0300, 19, /* offset to int array, # ents */ + 0, 0, 0, /* Part # of primary, serv, me */ + 0, 0x100, /* # of LP queues, offset */ + 0, 0, 0, /* Piranha stuff */ + { 0,0,0,0,0 }, /* reserved */ + 0,0,0,0,0,0,0, /* stuff */ + { 0,0,0,0,0 }, /* reserved */ + 0, /* reserved */ + 0, /* VRM index of PLIC */ + 0, 0, /* min supported, compat SLIC */ + 0, /* 64-bit addr of load area */ + 0, /* chunks for load area */ + 0, 0, /* PASE mask, seg table */ + { 0 }, /* 64 reserved bytes */ + { 0 }, /* 128 reserved bytes */ + { 0 }, /* Old LP Queue */ + { 0 }, /* 384 reserved bytes */ + { + (u64)SystemReset_Iseries, /* 0x100 System Reset */ + (u64)MachineCheck_Iseries, /* 0x200 Machine Check */ + (u64)DataAccess_Iseries, /* 0x300 Data Access */ + (u64)InstructionAccess_Iseries, /* 0x400 Instruction Access */ + (u64)HardwareInterrupt_Iseries, /* 0x500 External */ + (u64)Alignment_Iseries, /* 0x600 Alignment */ + (u64)ProgramCheck_Iseries, /* 0x700 Program Check */ + (u64)FPUnavailable_Iseries, /* 0x800 FP Unavailable */ + (u64)Decrementer_Iseries, /* 0x900 Decrementer */ + (u64)Trap_0a_Iseries, /* 0xa00 Trap 0A */ + (u64)Trap_0b_Iseries, /* 0xb00 Trap 0B */ + (u64)SystemCall_Iseries, /* 0xc00 System Call */ + (u64)SingleStep_Iseries, /* 0xd00 Single Step */ + (u64)Trap_0e_Iseries, /* 0xe00 Trap 0E */ + (u64)PerformanceMonitor_Iseries,/* 0xf00 Performance Monitor */ + 0, /* int 0x1000 */ + 0, /* int 0x1010 */ + 0, /* int 0x1020 CPU ctls */ + (u64)HardwareInterrupt_Iseries, /* SC Ret Hdlr */ + (u64)DataAccessSLB_Iseries, /* 0x380 D-SLB */ + (u64)InstructionAccessSLB_Iseries /* 0x480 I-SLB */ + } +}; + +struct ItIplParmsReal xItIplParmsReal = {}; + +struct IoHriProcessorVpd xIoHriProcessorVpd[maxProcessors] = { + { + xInstCacheOperandSize: 32, + xDataCacheOperandSize: 32, + xProcFreq: 50000000, + xTimeBaseFreq: 50000000, + xPVR: 0x3600 + } +}; + + +u64 xMsVpd[3400] = {}; /* Space for Main Store Vpd 27,200 bytes */ + +u64 xRecoveryLogBuffer[32] = {}; /* Space for Recovery Log Buffer */ + +struct SpCommArea xSpCommArea = { + 0xE2D7C3C2, + 1, + {0}, + 0, 0, 0, 0, {0} +}; + +struct ItVpdAreas itVpdAreas = { + 0xc9a3e5c1, /* "ItVA" */ + sizeof( struct ItVpdAreas ), + 0, 0, + 26, /* # VPD array entries */ + 10, /* # DMA array entries */ + maxProcessors*2, maxProcessors, /* Max logical, physical procs */ + offsetof(struct ItVpdAreas,xPlicDmaToks),/* offset to DMA toks */ + offsetof(struct ItVpdAreas,xSlicVpdAdrs),/* offset to VPD addrs */ + offsetof(struct ItVpdAreas,xPlicDmaLens),/* offset to DMA lens */ + offsetof(struct ItVpdAreas,xSlicVpdLens),/* offset to VPD lens */ + 0, /* max slot labels */ + 1, /* max LP queues */ + {0}, {0}, /* reserved */ + {0}, /* DMA lengths */ + {0}, /* DMA tokens */ + { /* VPD lengths */ + 0,0,0,0, /* 0 - 3 */ + sizeof(struct Paca), /* 4 length of Paca */ + 0, /* 5 */ + sizeof(struct ItIplParmsReal),/* 6 length of IPL parms */ + 26992, /* 7 length of MS VPD */ + 0, /* 8 */ + sizeof(struct ItLpNaca),/* 9 length of LP Naca */ + 0, /* 10 */ + 256, /* 11 length of Recovery Log Buf */ + sizeof(struct SpCommArea), /* 12 length of SP Comm Area */ + 0,0,0, /* 13 - 15 */ + sizeof(struct IoHriProcessorVpd),/* 16 length of Proc Vpd */ + 0,0,0,0,0,0, /* 17 - 22 */ + sizeof(struct ItLpQueue),/* 23 length of Lp Queue */ + 0,0 /* 24 - 25 */ + }, + { /* VPD addresses */ + 0,0,0,0, /* 0 - 3 */ + &xPaca[0], /* 4 first Paca */ + 0, /* 5 */ + &xItIplParmsReal, /* 6 IPL parms */ + &xMsVpd, /* 7 MS Vpd */ + 0, /* 8 */ + &itLpNaca, /* 9 LpNaca */ + 0, /* 10 */ + &xRecoveryLogBuffer, /* 11 Recovery Log Buffer */ + &xSpCommArea, /* 12 SP Comm Area */ + 0,0,0, /* 13 - 15 */ + &xIoHriProcessorVpd, /* 16 Proc Vpd */ + 0,0,0,0,0,0, /* 17 - 22 */ + &xItLpQueue, /* 23 Lp Queue */ + 0,0 + } +}; + + +/* Data area used in flush_hash_page */ +long long flush_hash_page_hpte[2]; + +struct msChunks msChunks = {0, 0, 0, 0, NULL}; + +/* Depending on whether this is called from iSeries or pSeries setup + * code, the location of the msChunks struct may or may not have + * to be reloc'd, so we force the caller to do that for us by passing + * in a pointer to the structure. + */ +unsigned long +msChunks_alloc(unsigned long mem, unsigned long num_chunks, unsigned long chunk_size) +{ + unsigned long offset = reloc_offset(); + struct msChunks *_msChunks = PTRRELOC(&msChunks); + + _msChunks->num_chunks = num_chunks; + _msChunks->chunk_size = chunk_size; + _msChunks->chunk_shift = __ilog2(chunk_size); + _msChunks->chunk_mask = (1UL<<_msChunks->chunk_shift)-1; + + mem = _ALIGN(mem, sizeof(msChunks_entry)); + _msChunks->abs = (msChunks_entry *)(mem + offset); + mem += num_chunks * sizeof(msChunks_entry); + + return mem; +} + + + + diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/Makefile linuxppc64_2_4/arch/ppc64/kernel/Makefile --- ../kernel.org/linux/arch/ppc64/kernel/Makefile Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/Makefile Mon Nov 19 23:01:28 2001 @@ -0,0 +1,77 @@ +# +# Makefile for the linux kernel. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definitions are now in the main makefile... + +USE_STANDARD_AS_RULE := true + +EXTRA_CFLAGS = -mno-minimal-toc + +KHEAD := head.o + +all: $(KHEAD) kernel.o + +O_TARGET := kernel.o + +export-objs := ppc_ksyms.o setup.o + +obj-y := ppc_ksyms.o setup.o entry.o traps.o irq.o idle.o \ + time.o process.o signal.o syscalls.o misc.o ptrace.o \ + align.o semaphore.o bitops.o stab.o htab.o pacaData.o \ + LparData.o udbg.o binfmt_elf32.o sys_ppc32.o sys32.o \ + ioctl32.o ptrace32.o signal32.o open_pic.o xics.o \ + pmc.o mf_proc.o proc_pmc.o proc_pcifr.o iSeries_setup.o \ + ItLpQueue.o hvCall.o mf.o HvLpEvent.o ras.o \ + iSeries_proc.o HvCall.o flight_recorder.o HvLpConfig.o \ + rtc.o + +obj-$(CONFIG_PCI) += pci.o pci_dn.o pci_dma.o +obj-$(CONFIG_PPC_EEH) += eeh.o + +ifeq ($(CONFIG_PPC_ISERIES),y) +obj-$(CONFIG_PCI) += iSeries_pci.o iSeries_pci_reset.o iSeries_IoMmTable.o iSeries_irq.o iSeries_VpdInfo.o XmPciLpEvent.o +endif +ifeq ($(CONFIG_PPC_PSERIES),y) +obj-$(CONFIG_PCI) += pSeries_pci.o pSeries_lpar.o pSeries_hvCall.o + +obj-y += rtasd.o +endif + +obj-$(CONFIG_KGDB) += ppc-stub.o + +obj-$(CONFIG_SMP) += smp.o + +# tibit: for matrox_init2() +ifeq ($(CONFIG_NVRAM),y) + obj-$(CONFIG_NVRAM) += pmac_nvram.o +endif + +obj-y += prom.o lmb.o rtas.o rtas-proc.o chrp_setup.o i8259.o + +include $(TOPDIR)/Rules.make + +# +# This is just to get the dependencies... +# + +head.o: head.S ppc_defs.h + +ppc_defs.h: mk_defs.c ppc_defs.head \ + $(TOPDIR)/include/asm/mmu.h \ + $(TOPDIR)/include/asm/processor.h \ + $(TOPDIR)/include/asm/pgtable.h \ + $(TOPDIR)/include/asm/ptrace.h + $(CC) $(CFLAGS) -S mk_defs.c + cp ppc_defs.head ppc_defs.h +# for bk, this way we can write to the file even if it's not checked out + chmod u+w ppc_defs.h + grep '^#define' mk_defs.s >> ppc_defs.h + rm mk_defs.s + +checks: checks.c + $(HOSTCC) -I$(HPATH) $(HOSTCFLAGS) -D__KERNEL__ -fno-builtin -o checks checks.c + ./checks diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/XmPciLpEvent.c linuxppc64_2_4/arch/ppc64/kernel/XmPciLpEvent.c --- ../kernel.org/linux/arch/ppc64/kernel/XmPciLpEvent.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/XmPciLpEvent.c Sat Oct 20 06:53:25 2001 @@ -0,0 +1,158 @@ +/* + * File XmPciLpEvent.h created by Wayne Holm on Mon Jan 15 2001. + * + * This module handles PCI interrupt events sent by the iSeries Hypervisor. +*/ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +long Pci_Interrupt_Count = 0; +long Pci_Event_Count = 0; + +enum XmPciLpEvent_Subtype { + XmPciLpEvent_BusCreated = 0, // PHB has been created + XmPciLpEvent_BusFailed = 1, // PHB has failed + XmPciLpEvent_BusRecovered = 12, // PHB has been recovered + XmPciLpEvent_NodeFailed = 4, // Multi-adapter bridge has failed + XmPciLpEvent_NodeRecovered = 5, // Multi-adapter bridge has recovered + XmPciLpEvent_SlotInterrupt = 22 // Slot interrupt +}; + +struct XmPciLpEvent_BusInterrupt { + HvBusNumber busNumber; + HvSubBusNumber subBusNumber; +}; + +struct XmPciLpEvent_NodeInterrupt { + HvBusNumber busNumber; + HvSubBusNumber subBusNumber; + HvAgentId deviceId; +}; + +struct XmPciLpEvent { + struct HvLpEvent hvLpEvent; + + union { + u64 alignData; // Align on an 8-byte boundary + + struct { + u32 fisr; + HvBusNumber busNumber; + HvSubBusNumber subBusNumber; + HvAgentId deviceId; + } slotInterrupt; + + struct XmPciLpEvent_BusInterrupt busFailed; + struct XmPciLpEvent_BusInterrupt busRecovered; + struct XmPciLpEvent_BusInterrupt busCreated; + + struct XmPciLpEvent_NodeInterrupt nodeFailed; + struct XmPciLpEvent_NodeInterrupt nodeRecovered; + + } eventData; + +}; + +static void intReceived(struct XmPciLpEvent* eventParm, struct pt_regs* regsParm); + +static void XmPciLpEvent_handler( struct HvLpEvent* eventParm, struct pt_regs* regsParm) +{ + //PPCDBG(PPCDBG_BUSWALK,"XmPciLpEvent_handler, type 0x%x\n",eventParm->xType ); + ++Pci_Event_Count; + + if (eventParm && eventParm->xType == HvLpEvent_Type_PciIo) { + switch( eventParm->xFlags.xFunction ) { + case HvLpEvent_Function_Int: + intReceived( (struct XmPciLpEvent*)eventParm, regsParm ); + break; + case HvLpEvent_Function_Ack: + printk(KERN_ERR "XmPciLpEvent.c: unexpected ack received\n"); + break; + default: + printk(KERN_ERR "XmPciLpEvent.c: unexpected event function %d\n",(int)eventParm->xFlags.xFunction); + break; + } + } + else if (event) { + printk(KERN_ERR "XmPciLpEvent.c: Unrecognized PCI event type 0x%x\n",(int)eventParm->xType); + } + else { + printk(KERN_ERR "XmPciLpEvent.c: NULL event received\n"); + } +} + +static void intReceived(struct XmPciLpEvent* eventParm, struct pt_regs* regsParm) +{ + int irq; + + ++Pci_Interrupt_Count; + //PPCDBG(PPCDBG_BUSWALK,"PCI: XmPciLpEvent.c: intReceived\n"); + + switch (eventParm->hvLpEvent.xSubtype) { + case XmPciLpEvent_SlotInterrupt: + irq = eventParm->hvLpEvent.xCorrelationToken; + /* Dispatch the interrupt handlers for this irq */ + ppc_irq_dispatch_handler(regsParm, irq); + HvCallPci_eoi(eventParm->eventData.slotInterrupt.busNumber, + eventParm->eventData.slotInterrupt.subBusNumber, + eventParm->eventData.slotInterrupt.deviceId); + break; + /* Ignore error recovery events for now */ + case XmPciLpEvent_BusCreated: + printk(KERN_INFO "XmPciLpEvent.c: system bus %d created\n", eventParm->eventData.busCreated.busNumber); + break; + case XmPciLpEvent_BusFailed: + printk(KERN_INFO "XmPciLpEvent.c: system bus %d failed\n", eventParm->eventData.busFailed.busNumber); + break; + case XmPciLpEvent_BusRecovered: + printk(KERN_INFO "XmPciLpEvent.c: system bus %d recovered\n", eventParm->eventData.busRecovered.busNumber); + break; + case XmPciLpEvent_NodeFailed: + printk(KERN_INFO "XmPciLpEvent.c: multi-adapter bridge %d/%d/%d failed\n", eventParm->eventData.nodeFailed.busNumber, eventParm->eventData.nodeFailed.subBusNumber, eventParm->eventData.nodeFailed.deviceId); + break; + case XmPciLpEvent_NodeRecovered: + printk(KERN_INFO "XmPciLpEvent.c: multi-adapter bridge %d/%d/%d recovered\n", eventParm->eventData.nodeRecovered.busNumber, eventParm->eventData.nodeRecovered.subBusNumber, eventParm->eventData.nodeRecovered.deviceId); + break; + default: + printk(KERN_ERR "XmPciLpEvent.c: unrecognized event subtype 0x%x\n", + eventParm->hvLpEvent.xSubtype); + break; + }; +} + + +/* This should be called sometime prior to buswalk (init_IRQ would be good) */ +int XmPciLpEvent_init() +{ + int xRc; + PPCDBG(PPCDBG_BUSWALK,"XmPciLpEvent_init, Register Event type 0x%04X\n",HvLpEvent_Type_PciIo); + + xRc = HvLpEvent_registerHandler(HvLpEvent_Type_PciIo, &XmPciLpEvent_handler); + if (xRc == 0) { + xRc = HvLpEvent_openPath(HvLpEvent_Type_PciIo, 0); + if (xRc != 0) { + printk(KERN_ERR "XmPciLpEvent.c: open event path failed with rc 0x%x\n", xRc); + } + } + else { + printk(KERN_ERR "XmPciLpEvent.c: register handler failed with rc 0x%x\n", xRc); + } + return xRc; +} + diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/align.c linuxppc64_2_4/arch/ppc64/kernel/align.c --- ../kernel.org/linux/arch/ppc64/kernel/align.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/align.c Fri Sep 14 07:41:02 2001 @@ -0,0 +1,363 @@ +/* + * align.c - handle alignment exceptions for the Power PC. + * + * Copyright (c) 1996 Paul Mackerras + * Copyright (c) 1998-1999 TiVo, Inc. + * PowerPC 403GCX modifications. + * Copyright (c) 1999 Grant Erickson + * PowerPC 403GCX/405GP modifications. + * Copyright (c) 2001 PPC64 team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct aligninfo { + unsigned char len; + unsigned char flags; +}; + +#define OPCD(inst) (((inst) & 0xFC000000) >> 26) +#define RS(inst) (((inst) & 0x03E00000) >> 21) +#define RA(inst) (((inst) & 0x001F0000) >> 16) +#define IS_DFORM(code) ((code) >= 32 && (code) <= 47) + +#define INVALID { 0, 0 } + +#define LD 1 /* load */ +#define ST 2 /* store */ +#define SE 4 /* sign-extend value */ +#define F 8 /* to/from fp regs */ +#define U 0x10 /* update index register */ +#define M 0x20 /* multiple load/store */ +#define S 0x40 /* single-precision fp, or byte-swap value */ +#define HARD 0x80 /* string, stwcx. */ +#define D 0x100 /* double-word load/store */ + +#define DCBZ 0x5f /* 8xx/82xx dcbz faults when cache not enabled */ + +/* + * The PowerPC stores certain bits of the instruction that caused the + * alignment exception in the DSISR register. This array maps those + * bits to information about the operand length and what the + * instruction would do. + */ +static struct aligninfo aligninfo[128] = { + { 4, LD }, /* 00 0 0000: lwz / lwarx */ + INVALID, /* 00 0 0001 */ + { 4, ST }, /* 00 0 0010: stw */ + INVALID, /* 00 0 0011 */ + { 2, LD }, /* 00 0 0100: lhz */ + { 2, LD+SE }, /* 00 0 0101: lha */ + { 2, ST }, /* 00 0 0110: sth */ + { 4, LD+M }, /* 00 0 0111: lmw */ + { 4, LD+F+S }, /* 00 0 1000: lfs */ + { 8, LD+F }, /* 00 0 1001: lfd */ + { 4, ST+F+S }, /* 00 0 1010: stfs */ + { 8, ST+F }, /* 00 0 1011: stfd */ + INVALID, /* 00 0 1100 */ + { 8, LD }, /* 00 0 1101: ld */ + INVALID, /* 00 0 1110 */ + { 8, ST }, /* 00 0 1111: std */ + { 4, LD+U }, /* 00 1 0000: lwzu */ + INVALID, /* 00 1 0001 */ + { 4, ST+U }, /* 00 1 0010: stwu */ + INVALID, /* 00 1 0011 */ + { 2, LD+U }, /* 00 1 0100: lhzu */ + { 2, LD+SE+U }, /* 00 1 0101: lhau */ + { 2, ST+U }, /* 00 1 0110: sthu */ + { 4, ST+M }, /* 00 1 0111: stmw */ + { 4, LD+F+S+U }, /* 00 1 1000: lfsu */ + { 8, LD+F+U }, /* 00 1 1001: lfdu */ + { 4, ST+F+S+U }, /* 00 1 1010: stfsu */ + { 8, ST+F+U }, /* 00 1 1011: stfdu */ + INVALID, /* 00 1 1100 */ + { 8, ST }, /* 00 1 1101: std */ + INVALID, /* 00 1 1110 */ + INVALID, /* 00 1 1111 */ + { 8, LD }, /* 01 0 0000: ldx */ + INVALID, /* 01 0 0001 */ + { 8, ST }, /* 01 0 0010: stdx */ + INVALID, /* 01 0 0011 */ + INVALID, /* 01 0 0100 */ + INVALID, /* 01 0 0101: lwax?? */ + INVALID, /* 01 0 0110 */ + INVALID, /* 01 0 0111 */ + { 0, LD+HARD }, /* 01 0 1000: lswx */ + { 0, LD+HARD }, /* 01 0 1001: lswi */ + { 0, ST+HARD }, /* 01 0 1010: stswx */ + { 0, ST+HARD }, /* 01 0 1011: stswi */ + INVALID, /* 01 0 1100 */ + { 8, LD+U }, /* 01 0 1101: ldu */ + INVALID, /* 01 0 1110 */ + { 8, ST+U }, /* 01 0 1111: stdu */ + { 8, LD+U }, /* 01 1 0000: ldux */ + INVALID, /* 01 1 0001 */ + { 8, ST+U }, /* 01 1 0010: stdux */ + INVALID, /* 01 1 0011 */ + INVALID, /* 01 1 0100 */ + INVALID, /* 01 1 0101: lwaux?? */ + INVALID, /* 01 1 0110 */ + INVALID, /* 01 1 0111 */ + INVALID, /* 01 1 1000 */ + INVALID, /* 01 1 1001 */ + INVALID, /* 01 1 1010 */ + INVALID, /* 01 1 1011 */ + INVALID, /* 01 1 1100 */ + INVALID, /* 01 1 1101 */ + INVALID, /* 01 1 1110 */ + INVALID, /* 01 1 1111 */ + INVALID, /* 10 0 0000 */ + INVALID, /* 10 0 0001 */ + { 0, ST+HARD }, /* 10 0 0010: stwcx. */ + INVALID, /* 10 0 0011 */ + INVALID, /* 10 0 0100 */ + INVALID, /* 10 0 0101 */ + INVALID, /* 10 0 0110 */ + INVALID, /* 10 0 0111 */ + { 4, LD+S }, /* 10 0 1000: lwbrx */ + INVALID, /* 10 0 1001 */ + { 4, ST+S }, /* 10 0 1010: stwbrx */ + INVALID, /* 10 0 1011 */ + { 2, LD+S }, /* 10 0 1100: lhbrx */ + INVALID, /* 10 0 1101 */ + { 2, ST+S }, /* 10 0 1110: sthbrx */ + INVALID, /* 10 0 1111 */ + INVALID, /* 10 1 0000 */ + INVALID, /* 10 1 0001 */ + INVALID, /* 10 1 0010 */ + INVALID, /* 10 1 0011 */ + INVALID, /* 10 1 0100 */ + INVALID, /* 10 1 0101 */ + INVALID, /* 10 1 0110 */ + INVALID, /* 10 1 0111 */ + INVALID, /* 10 1 1000 */ + INVALID, /* 10 1 1001 */ + INVALID, /* 10 1 1010 */ + INVALID, /* 10 1 1011 */ + INVALID, /* 10 1 1100 */ + INVALID, /* 10 1 1101 */ + INVALID, /* 10 1 1110 */ + { 0, ST+HARD }, /* 10 1 1111: dcbz */ + { 4, LD }, /* 11 0 0000: lwzx */ + INVALID, /* 11 0 0001 */ + { 4, ST }, /* 11 0 0010: stwx */ + INVALID, /* 11 0 0011 */ + { 2, LD }, /* 11 0 0100: lhzx */ + { 2, LD+SE }, /* 11 0 0101: lhax */ + { 2, ST }, /* 11 0 0110: sthx */ + INVALID, /* 11 0 0111 */ + { 4, LD+F+S }, /* 11 0 1000: lfsx */ + { 8, LD+F }, /* 11 0 1001: lfdx */ + { 4, ST+F+S }, /* 11 0 1010: stfsx */ + { 8, ST+F }, /* 11 0 1011: stfdx */ + INVALID, /* 11 0 1100 */ + INVALID, /* 11 0 1101 */ + INVALID, /* 11 0 1110 */ + INVALID, /* 11 0 1111 */ + { 4, LD+U }, /* 11 1 0000: lwzux */ + INVALID, /* 11 1 0001 */ + { 4, ST+U }, /* 11 1 0010: stwux */ + INVALID, /* 11 1 0011 */ + { 2, LD+U }, /* 11 1 0100: lhzux */ + { 2, LD+SE+U }, /* 11 1 0101: lhaux */ + { 2, ST+U }, /* 11 1 0110: sthux */ + INVALID, /* 11 1 0111 */ + { 4, LD+F+S+U }, /* 11 1 1000: lfsux */ + { 8, LD+F+U }, /* 11 1 1001: lfdux */ + { 4, ST+F+S+U }, /* 11 1 1010: stfsux */ + { 8, ST+F+U }, /* 11 1 1011: stfdux */ + INVALID, /* 11 1 1100 */ + INVALID, /* 11 1 1101 */ + INVALID, /* 11 1 1110 */ + INVALID, /* 11 1 1111 */ +}; + +#define SWAP(a, b) (t = (a), (a) = (b), (b) = t) + +int +fix_alignment(struct pt_regs *regs) +{ + int instr, nb, flags; + int opcode, f1, f2, f3; + int i, t; + int reg, areg; + unsigned char *addr; + union { + int l; + long ll; + float f; + double d; + unsigned char v[8]; + } data; + + if (__is_processor(PV_POWER4)) { + /* + * The POWER4 has a DSISR register but doesn't set it on + * an alignment fault. -- paulus + */ + + instr = *((unsigned int *)regs->nip); + opcode = OPCD(instr); + reg = RS(instr); + areg = RA(instr); + + if (IS_DFORM(opcode)) { + f1 = 0; + f2 = (instr & 0x04000000) >> 26; + f3 = (instr & 0x78000000) >> 27; + } else { + f1 = (instr & 0x00000006) >> 1; + f2 = (instr & 0x00000040) >> 6; + f3 = (instr & 0x00000780) >> 7; + } + + instr = ((f1 << 5) | (f2 << 4) | f3); + } else { + reg = (regs->dsisr >> 5) & 0x1f; /* source/dest register */ + areg = regs->dsisr & 0x1f; /* register to update */ + instr = (regs->dsisr >> 10) & 0x7f; + instr |= (regs->dsisr >> 13) & 0x60; + } + + nb = aligninfo[instr].len; + if (nb == 0) { + long *p; + int i; + + if (instr != DCBZ) + return 0; /* too hard or invalid instruction */ + /* + * The dcbz (data cache block zero) instruction + * gives an alignment fault if used on non-cacheable + * memory. We handle the fault mainly for the + * case when we are running with the cache disabled + * for debugging. + */ + p = (long *) (regs->dar & -L1_CACHE_BYTES); + for (i = 0; i < L1_CACHE_BYTES / sizeof(long); ++i) + p[i] = 0; + return 1; + } + + flags = aligninfo[instr].flags; + addr = (unsigned char *)regs->dar; + + /* Verify the address of the operand */ + if (user_mode(regs)) { + if (verify_area((flags & ST? VERIFY_WRITE: VERIFY_READ), addr, nb)) + return -EFAULT; /* bad address */ + } + + if ((flags & F) && (regs->msr & MSR_FP)) + giveup_fpu(current); + if (flags & M) + return 0; /* too hard for now */ + + /* If we read the operand, copy it in */ + if (flags & LD) { + if (nb == 2) { + data.v[0] = data.v[1] = 0; + if (__get_user(data.v[2], addr) + || __get_user(data.v[3], addr+1)) + return -EFAULT; + } else { + for (i = 0; i < nb; ++i) + if (__get_user(data.v[i], addr+i)) + return -EFAULT; + } + } + /* Unfortunately D (== 0x100) doesn't fit in the aligninfo[n].flags + field. So synthesize it here. */ + if ((flags & F) == 0 && nb == 8) + flags |= D; + + switch (flags & ~U) { + case LD+SE: + if (data.v[2] >= 0x80) + data.v[0] = data.v[1] = -1; + /* fall through */ + case LD: + regs->gpr[reg] = data.l; + break; + case LD+D: + regs->gpr[reg] = data.ll; + break; + case LD+S: + if (nb == 2) { + SWAP(data.v[2], data.v[3]); + } else { + SWAP(data.v[0], data.v[3]); + SWAP(data.v[1], data.v[2]); + } + regs->gpr[reg] = data.l; + break; + case ST: + data.l = regs->gpr[reg]; + break; + case ST+D: + data.ll = regs->gpr[reg]; + break; + case ST+S: + data.l = regs->gpr[reg]; + if (nb == 2) { + SWAP(data.v[2], data.v[3]); + } else { + SWAP(data.v[0], data.v[3]); + SWAP(data.v[1], data.v[2]); + } + break; + case LD+F: + current->thread.fpr[reg] = data.d; + break; + case ST+F: + data.d = current->thread.fpr[reg]; + break; + /* these require some floating point conversions... */ + /* we'd like to use the assignment, but we have to compile + * the kernel with -msoft-float so it doesn't use the + * fp regs for copying 8-byte objects. */ + case LD+F+S: + enable_kernel_fp(); + cvt_fd(&data.f, ¤t->thread.fpr[reg], ¤t->thread.fpscr); + /* current->thread.fpr[reg] = data.f; */ + break; + case ST+F+S: + enable_kernel_fp(); + cvt_df(¤t->thread.fpr[reg], &data.f, ¤t->thread.fpscr); + /* data.f = current->thread.fpr[reg]; */ + break; + default: + printk("align: can't handle flags=%x\n", flags); + return 0; + } + + if (flags & ST) { + if (nb == 2) { + if (__put_user(data.v[2], addr) + || __put_user(data.v[3], addr+1)) + return -EFAULT; + } else { + for (i = 0; i < nb; ++i) + if (__put_user(data.v[i], addr+i)) + return -EFAULT; + } + } + + if (flags & U) { + regs->gpr[areg] = regs->dar; + } + + return 1; +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/binfmt_elf32.c linuxppc64_2_4/arch/ppc64/kernel/binfmt_elf32.c --- ../kernel.org/linux/arch/ppc64/kernel/binfmt_elf32.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/binfmt_elf32.c Thu Oct 18 04:27:04 2001 @@ -0,0 +1,79 @@ +/* + * binfmt_elf32.c: Support 32-bit PPC ELF binaries on Power3 and followons. + * based on the SPARC64 version. + * Copyright (C) 1995, 1996, 1997, 1998 David S. Miller (davem@redhat.com) + * Copyright (C) 1995, 1996, 1997, 1998 Jakub Jelinek (jj@ultra.linux.cz) + * + * Copyright (C) 2000,2001 Ken Aaker (kdaaker@rchland.vnet.ibm.com), IBM Corp + * Copyright (C) 2001 Anton Blanchard (anton@au.ibm.com), IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define ELF_ARCH EM_PPC +#define ELF_CLASS ELFCLASS32 +#define ELF_DATA ELFDATA2MSB; + +#include +#include +#include +#include + +struct timeval32 +{ + int tv_sec, tv_usec; +}; + +#define elf_prstatus elf_prstatus32 +struct elf_prstatus32 +{ + struct elf_siginfo pr_info; /* Info associated with signal */ + short pr_cursig; /* Current signal */ + unsigned int pr_sigpend; /* Set of pending signals */ + unsigned int pr_sighold; /* Set of held signals */ + pid_t pr_pid; + pid_t pr_ppid; + pid_t pr_pgrp; + pid_t pr_sid; + struct timeval32 pr_utime; /* User time */ + struct timeval32 pr_stime; /* System time */ + struct timeval32 pr_cutime; /* Cumulative user time */ + struct timeval32 pr_cstime; /* Cumulative system time */ + elf_gregset_t pr_reg; /* General purpose registers. */ + int pr_fpvalid; /* True if math co-processor being used. */ +}; + +#define elf_prpsinfo elf_prpsinfo32 +struct elf_prpsinfo32 +{ + char pr_state; /* numeric process state */ + char pr_sname; /* char for pr_state */ + char pr_zomb; /* zombie */ + char pr_nice; /* nice val */ + unsigned int pr_flag; /* flags */ + u32 pr_uid; + u32 pr_gid; + pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid; + /* Lots missing */ + char pr_fname[16]; /* filename of executable */ + char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */ +}; + +extern void start_thread32(struct pt_regs *, unsigned long, unsigned long); +#undef start_thread +#define start_thread start_thread32 +#define init_elf_binfmt init_elf32_binfmt + +#undef CONFIG_BINFMT_ELF +#ifdef CONFIG_BINFMT_ELF32 +#define CONFIG_BINFMT_ELF CONFIG_BINFMT_ELF32 +#endif +#undef CONFIG_BINFMT_ELF_MODULE +#ifdef CONFIG_BINFMT_ELF32_MODULE +#define CONFIG_BINFMT_ELF_MODULE CONFIG_BINFMT_ELF32_MODULE +#endif + +#include "../../../fs/binfmt_elf.c" diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/bitops.c linuxppc64_2_4/arch/ppc64/kernel/bitops.c --- ../kernel.org/linux/arch/ppc64/kernel/bitops.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/bitops.c Tue Jun 5 01:30:37 2001 @@ -0,0 +1,90 @@ +/* + * Copyright (C) 1996 Paul Mackerras. + * Adapted for ppc64 - Todd Inglett, Anton Blanchard + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#undef DEBUG_BITOPS + +/* + * Bitops are weird when viewed on big-endian systems. They were designed + * on little endian so the size of the bitset doesn't matter (low order bytes + * come first) as long as the bit in question is valid. + * + * Bits are "tested" often using the C expression (val & (1<> 6); + unsigned long result = offset & ~63UL; + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset &= 63UL; + + if (offset) { + tmp = *p++; + tmp |= ~0UL >> (64-offset); + if (size < 64) + goto found_first; + if (~tmp) + goto found_middle; + size -= 64; + result += 64; + } + while (size & ~63UL) { + if (~(tmp = *(p++))) + goto found_middle; + result += 64; + size -= 64; + } + if (!size) + return result; + tmp = *p; + +found_first: + tmp |= ~0UL << size; + if (tmp == ~0UL) + return result+size; +found_middle: + return result + ffz(tmp); +} + +void BUG_OUTLINE(char* file, unsigned line) +{ + udbg_printf("BUG - kernel BUG at %s:%d! \n", __FILE__, __LINE__); + PPCDBG_ENTER_DEBUGGER(); + printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); + __asm__ __volatile__(".long " BUG_ILLEGAL_INSTR); +} + diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/checks.c linuxppc64_2_4/arch/ppc64/kernel/checks.c --- ../kernel.org/linux/arch/ppc64/kernel/checks.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/checks.c Mon Jun 4 20:06:03 2001 @@ -0,0 +1,67 @@ +/* + * Copyright 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* + * Do various before compile checks of data structures + * + * This is invoked when you do a make checks + * Is this enough or are there more things that we would like to do here? + * -- tgall + */ +int main(void) +{ + int ret = 0; +#if 0 + if ( sizeof(struct thread_struct) % 16 ) + { + printf("Thread struct is not modulo 16 bytes: " + "%d bytes total, %d bytes off\n", + sizeof(struct thread_struct), + sizeof(struct thread_struct)%16); + ret = -1; + } +#endif + + if ( sizeof(struct pt_regs) % 16 ) + { + printf("pt_regs struct is not modulo 16 bytes: " + "%d bytes total, %d bytes off\n", + sizeof(struct pt_regs), + sizeof(struct pt_regs)%16); + ret = -1; + + } + + printf("Task size : %d bytes\n" + "Tss size : %d bytes\n" + "pt_regs size : %d bytes\n" + "Kernel stack size: %d bytes\n", + sizeof(struct task_struct), sizeof(struct thread_struct), + sizeof(struct pt_regs), + sizeof(union task_union) - sizeof(struct task_struct)); + return ret; +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/chrp_setup.c linuxppc64_2_4/arch/ppc64/kernel/chrp_setup.c --- ../kernel.org/linux/arch/ppc64/kernel/chrp_setup.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/chrp_setup.c Wed Nov 7 13:05:40 2001 @@ -0,0 +1,392 @@ +/* + * linux/arch/ppc/kernel/setup.c + * + * Copyright (C) 1995 Linus Torvalds + * Adapted from 'alpha' version by Gary Thomas + * Modified by Cort Dougan (cort@cs.nmt.edu) + * Modified by PPC64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* + * bootup setup stuff.. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "local_irq.h" +#include "i8259.h" +#include "open_pic.h" +#include "xics.h" +#include + +extern volatile unsigned char *chrp_int_ack_special; +extern struct Naca *naca; + +void chrp_setup_pci_ptrs(void); +void chrp_progress(char *, unsigned short); +void chrp_request_regions(void); + +extern int pckbd_setkeycode(unsigned int scancode, unsigned int keycode); +extern int pckbd_getkeycode(unsigned int scancode); +extern int pckbd_translate(unsigned char scancode, unsigned char *keycode, + char raw_mode); +extern char pckbd_unexpected_up(unsigned char keycode); +extern void pckbd_leds(unsigned char leds); +extern void pckbd_init_hw(void); +extern unsigned char pckbd_sysrq_xlate[128]; +extern void openpic_init_IRQ(void); +extern void init_ras_IRQ(void); + +extern void find_and_init_phbs(void); +extern void pSeries_pcibios_fixup(void); +extern void iSeries_pcibios_fixup(void); + +extern void pSeries_get_rtc_time(struct rtc_time *rtc_time); +extern int pSeries_set_rtc_time(struct rtc_time *rtc_time); +void pSeries_calibrate_decr(void); + +kdev_t boot_dev; +unsigned long virtPython0Facilities = 0; // python0 facility area (memory mapped io) (64-bit format) VIRTUAL address. + +extern HPTE *Hash, *Hash_end; +extern unsigned long Hash_size, Hash_mask; +extern int probingmem; +extern unsigned long loops_per_jiffy; + +#ifdef CONFIG_BLK_DEV_RAM +extern int rd_doload; /* 1 = load ramdisk, 0 = don't load */ +extern int rd_prompt; /* 1 = prompt for ramdisk, 0 = don't prompt */ +extern int rd_image_start; /* starting block # of image */ +#endif + +int __chrp +chrp_get_cpuinfo(char *buffer) +{ + long len; /* i --Unused */ + /* unsigned int t; --Unused */ + struct device_node *root; + const char *model = ""; + + root = find_path_device("/"); + if (root) + model = get_property(root, "model", NULL); + len = sprintf(buffer,"machine\t\t: CHRP %s\n", model); + + return len; +} + +void __init chrp_request_regions(void) { + request_region(0x20,0x20,"pic1"); + request_region(0xa0,0x20,"pic2"); + request_region(0x00,0x20,"dma1"); + request_region(0x40,0x20,"timer"); + request_region(0x80,0x10,"dma page reg"); + request_region(0xc0,0x20,"dma2"); +} + +void __init +chrp_setup_arch(void) +{ + extern char cmd_line[]; + struct device_node *root; + unsigned int *opprop; + + /* openpic global configuration register (64-bit format). */ + /* openpic Interrupt Source Unit pointer (64-bit format). */ + /* python0 facility area (mmio) (64-bit format) REAL address. */ + + /* init to some ~sane value until calibrate_delay() runs */ + loops_per_jiffy = 50000000; + +#ifdef CONFIG_BLK_DEV_INITRD + /* this is fine for chrp */ + initrd_below_start_ok = 1; + + if (initrd_start) + ROOT_DEV = MKDEV(RAMDISK_MAJOR, 0); + else +#endif + ROOT_DEV = to_kdev_t(0x0802); /* sda2 (sda1 is for the kernel) */ + + printk("Boot arguments: %s\n", cmd_line); + + /* Find and initialize PCI host bridges */ + /* iSeries needs to be done much later. */ + #ifndef CONFIG_PPC_ISERIES + find_and_init_phbs(); + #endif + + /* Find the Open PIC if present */ + root = find_path_device("/"); + opprop = (unsigned int *) get_property(root, + "platform-open-pic", NULL); + if (opprop != 0) { + int n = prom_n_addr_cells(root); + unsigned long openpic; + + for (openpic = 0; n > 0; --n) + openpic = (openpic << 32) + *opprop++; + printk(KERN_DEBUG "OpenPIC addr: %lx\n", openpic); + udbg_printf("OpenPIC addr: %lx\n", openpic); + OpenPIC_Addr = __ioremap(openpic, 0x40000, _PAGE_NO_CACHE); + } + +#ifdef CONFIG_DUMMY_CONSOLE + conswitchp = &dummy_con; +#endif +} + +void __init +chrp_init2(void) +{ + /* + * It is sensitive, when this is called (not too earlu) + * -- tibit + */ + chrp_request_regions(); + ppc_md.progress(UTS_RELEASE, 0x7777); +} + + +/* Early initialization. Relocation is on but do not reference unbolted pages */ +void __init pSeries_init_early(void) +{ +#ifdef CONFIG_PPC_PSERIES /* This ifdef should go away */ + void *comport; + + hpte_init_pSeries(); + tce_init_pSeries(); + pSeries_pcibios_init_early(); + +#ifdef CONFIG_SMP + smp_init_pSeries(); +#endif + + /* Map the uart for udbg. */ + comport = (void *)__ioremap(naca->serialPortAddr, 16, _PAGE_NO_CACHE); + udbg_init_uart(comport); + + ppc_md.udbg_putc = udbg_putc; + ppc_md.udbg_getc = udbg_getc; + ppc_md.udbg_getc_poll = udbg_getc_poll; +#endif +} + +void __init +chrp_init(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7) +{ +#if 0 /* PPPBBB remove this later... -Peter */ +#ifdef CONFIG_BLK_DEV_INITRD + /* take care of initrd if we have one */ + if ( r6 ) + { + initrd_start = __va(r6); + initrd_end = __va(r6 + r7); + } +#endif /* CONFIG_BLK_DEV_INITRD */ +#endif + + ppc_md.ppc_machine = _machine; + + ppc_md.setup_arch = chrp_setup_arch; + ppc_md.setup_residual = NULL; + ppc_md.get_cpuinfo = chrp_get_cpuinfo; + if(naca->interrupt_controller == IC_OPEN_PIC) { + ppc_md.init_IRQ = openpic_init_IRQ; + ppc_md.get_irq = openpic_get_irq; + ppc_md.post_irq = NULL; + } else { + ppc_md.init_IRQ = xics_init_IRQ; + ppc_md.get_irq = xics_get_irq; + ppc_md.post_irq = NULL; + } + ppc_md.init_ras_IRQ = init_ras_IRQ; + + #ifndef CONFIG_PPC_ISERIES + ppc_md.pcibios_fixup = pSeries_pcibios_fixup; + #else + ppc_md.pcibios_fixup = NULL; + // ppc_md.pcibios_fixup = iSeries_pcibios_fixup; + #endif + + + ppc_md.init = chrp_init2; + + ppc_md.restart = rtas_restart; + ppc_md.power_off = rtas_power_off; + ppc_md.halt = rtas_halt; + + ppc_md.time_init = NULL; + ppc_md.get_boot_time = pSeries_get_rtc_time; + ppc_md.get_rtc_time = pSeries_get_rtc_time; + ppc_md.set_rtc_time = pSeries_set_rtc_time; + ppc_md.calibrate_decr = pSeries_calibrate_decr; + + ppc_md.progress = chrp_progress; + +#ifdef CONFIG_VT + ppc_md.kbd_setkeycode = pckbd_setkeycode; + ppc_md.kbd_getkeycode = pckbd_getkeycode; + ppc_md.kbd_translate = pckbd_translate; + ppc_md.kbd_unexpected_up = pckbd_unexpected_up; + ppc_md.kbd_leds = pckbd_leds; + ppc_md.kbd_init_hw = pckbd_init_hw; +#ifdef CONFIG_MAGIC_SYSRQ + ppc_md.ppc_kbd_sysrq_xlate = pckbd_sysrq_xlate; + SYSRQ_KEY = 0x63; /* Print Screen */ +#endif +#endif + +#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) + ppc_ide_md.insw = chrp_ide_insw; + ppc_ide_md.outsw = chrp_ide_outsw; + ppc_ide_md.default_irq = chrp_ide_default_irq; + ppc_ide_md.default_io_base = chrp_ide_default_io_base; + ppc_ide_md.ide_check_region = chrp_ide_check_region; + ppc_ide_md.ide_request_region = chrp_ide_request_region; + ppc_ide_md.ide_release_region = chrp_ide_release_region; + ppc_ide_md.fix_driveid = chrp_ide_fix_driveid; + ppc_ide_md.ide_init_hwif = chrp_ide_init_hwif_ports; + + ppc_ide_md.io_base = _IO_BASE; +#endif + + ppc_md.progress("Linux ppc64\n", 0x0); +} + +void __chrp +chrp_progress(char *s, unsigned short hex) +{ + struct device_node *root; + int width, *p; + char *os; + static int display_character, set_indicator; + static int max_width; + + if (hex) + udbg_printf(" %s\n", s); + + if (!rtas.base || (_machine != _MACH_pSeries)) + return; + + if (max_width == 0) { + if ( (root = find_path_device("/rtas")) && + (p = (unsigned int *)get_property(root, + "ibm,display-line-length", + NULL)) ) + max_width = *p; + else + max_width = 0x10; + display_character = rtas_token("display-character"); + set_indicator = rtas_token("set-indicator"); + } + if (display_character == RTAS_UNKNOWN_SERVICE) { + /* use hex display */ + if (set_indicator == RTAS_UNKNOWN_SERVICE) + return; + rtas_call(set_indicator, 3, 1, NULL, 6, 0, hex); + return; + } + + rtas_call(display_character, 1, 1, NULL, '\r'); + + width = max_width; + os = s; + while ( *os ) + { + if ( (*os == '\n') || (*os == '\r') ) + width = max_width; + else + width--; + rtas_call(display_character, 1, 1, NULL, *os++ ); + /* if we overwrite the screen length */ + if ( width == 0 ) + while ( (*os != 0) && (*os != '\n') && (*os != '\r') ) + os++; + } + + /* Blank to end of line. */ + while ( width-- > 0 ) + rtas_call(display_character, 1, 1, NULL, ' ' ); +} + +extern void setup_default_decr(void); + +void __init pSeries_calibrate_decr(void) +{ + struct device_node *cpu; + struct div_result divres; + int *fp; + unsigned long freq; + + /* + * The cpu node should have a timebase-frequency property + * to tell us the rate at which the decrementer counts. + */ + freq = 16666000; /* hardcoded default */ + cpu = find_type_devices("cpu"); + if (cpu != 0) { + fp = (int *) get_property(cpu, "timebase-frequency", NULL); + if (fp != 0) + freq = *fp; + } + printk("time_init: decrementer frequency = %lu.%.6lu MHz\n", + freq/1000000, freq%1000000 ); + + tb_ticks_per_jiffy = freq / HZ; + tb_ticks_per_sec = tb_ticks_per_jiffy * HZ; + tb_ticks_per_usec = freq / 1000000; + tb_to_us = mulhwu_scale_factor(freq, 1000000); + div128_by_32( 1024*1024, 0, tb_ticks_per_sec, &divres ); + tb_to_xs = divres.result_low; + + setup_default_decr(); +} + diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/eeh.c linuxppc64_2_4/arch/ppc64/kernel/eeh.c --- ../kernel.org/linux/arch/ppc64/kernel/eeh.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/eeh.c Tue Nov 13 10:47:33 2001 @@ -0,0 +1,313 @@ +/* + * eeh.c + * Copyright (C) 2001 Dave Engebretsen & Todd Inglett IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* Change Activity: + * 2001/10/27 : engebret : Created. + * End Change Activity + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "pci.h" + +#define BUID_HI(buid) ((buid) >> 32) +#define BUID_LO(buid) ((buid) & 0xffffffff) +#define CONFIG_ADDR(busno, devfn) (((((busno) & 0xff) << 8) | ((devfn) & 0xf8)) << 8) + +unsigned long eeh_total_mmio_reads; +unsigned long eeh_total_mmio_ffs; +unsigned long eeh_false_positives; +/* RTAS tokens */ +static int ibm_set_eeh_option; +static int ibm_set_slot_reset; +static int ibm_read_slot_reset_state; + +static int eeh_implemented; +#define EEH_MAX_OPTS 4096 +static char *eeh_opts; +static int eeh_opts_last; +static int eeh_check_opts_config(struct pci_dev *dev); + + +unsigned long eeh_token(unsigned long phb, unsigned long bus, unsigned long devfn, unsigned long offset) +{ + if (phb > 0xff) + panic("eeh_token: phb 0x%lx is too large\n", phb); + if (offset & 0x0fffffff00000000) + panic("eeh_token: offset 0x%lx is out of range\n", offset); + return ((IO_UNMAPPED_REGION_ID << 60) | (phb << 48UL) | ((bus & 0xff) << 40UL) | (devfn << 32UL) | (offset & 0xffffffff)); +} + + + +int eeh_get_state(unsigned long ea) { + return 0; +} + + +/* Check for an eeh failure at the given token address. + * The given value has been read and it should be 1's (0xff, 0xffff or 0xffffffff). + * + * Probe to determine if an error actually occurred. If not return val. + * Otherwise panic. + */ +unsigned long eeh_check_failure(void *token, unsigned long val) +{ + unsigned long config_addr = (unsigned long)token >> 24; /* PPBBDDRR */ + unsigned long phbidx = (config_addr >> 24) & 0xff; + struct pci_controller *phb; + unsigned long ret, rets[2]; + + config_addr &= 0xffff00; /* 00BBDD00 */ + + if (phbidx >= global_phb_number) { + panic("EEH: checking token %p phb index of %ld is greater than max of %d\n", token, phbidx, global_phb_number-1); + } + phb = phbtab[phbidx]; + eeh_false_positives++; + + ret = rtas_call(ibm_read_slot_reset_state, 3, 3, rets, + config_addr, BUID_HI(phb->buid), BUID_LO(phb->buid)); + if (ret == 0 && rets[1] == 1 && rets[2] != 0) { + struct pci_dev *dev; + int bus = ((unsigned long)token >> 40) & 0xffff; /* include PHB# in bus */ + int devfn = (config_addr >> 8) & 0xff; + + dev = pci_find_slot(bus, devfn); + if (dev) + panic("EEH: MMIO failure (%ld) on device:\n %s %s\n", + rets[2], dev->slot_name, dev->name); + else + panic("EEH: MMIO failure (%ld) on device buid %lx, config_addr %lx\n", rets[2], phb->buid, config_addr); + } + return val; /* good case */ +} + +void eeh_init(void) { + ibm_set_eeh_option = rtas_token("ibm,set-eeh-option"); + ibm_set_slot_reset = rtas_token("ibm,set-slot-reset"); + ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state"); + if (ibm_set_eeh_option != RTAS_UNKNOWN_SERVICE) { + printk("PCI Enhanced I/O Error Handling Enabled\n"); + eeh_implemented = 1; + } +} + + +/* Given a PCI device check if eeh should be configured or not. + * This may look at firmware properties and/or kernel cmdline options. + */ +int is_eeh_configured(struct pci_dev *dev) +{ + struct device_node *dn = pci_device_to_OF_node(dev); + struct pci_controller *phb = PCI_GET_PHB_PTR(dev); + unsigned long ret, rets[2]; + + if (dn == NULL || phb == NULL || phb->buid == 0 || !eeh_implemented) + return 0; + + /* Hack: turn off eeh for display class devices. + * This fixes matrox accel framebuffer. + */ + if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) + return 0; + + if (!eeh_check_opts_config(dev)) + return 0; + + ret = rtas_call(ibm_read_slot_reset_state, 3, 3, rets, + CONFIG_ADDR(dn->busno, dn->devfn), + BUID_HI(phb->buid), BUID_LO(phb->buid)); + if (ret == 0 && rets[1] == 1) { + printk("EEH: %s %s is EEH capable.\n", dev->slot_name, dev->name); + return 1; + } + return 0; +} + +int eeh_set_option(struct pci_dev *dev, int option) +{ + struct device_node *dn = pci_device_to_OF_node(dev); + struct pci_controller *phb = PCI_GET_PHB_PTR(dev); + + if (dn == NULL || phb == NULL || phb->buid == 0 || !eeh_implemented) + return -2; + + return rtas_call(ibm_set_eeh_option, 4, 1, NULL, + CONFIG_ADDR(dn->busno, dn->devfn), + BUID_HI(phb->buid), BUID_LO(phb->buid), option); +} + + +static int eeh_proc_falsepositive_read(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + len = sprintf(page, "eeh_false_positives=%ld\n" + "eeh_total_mmio_ffs=%ld\n" + "eeh_total_mmio_reads=%ld\n", + eeh_false_positives, eeh_total_mmio_ffs, eeh_total_mmio_reads); + return len; +} + +/* Implementation of /proc/ppc64/eeh + * For now it is one file showing false positives. + */ +void eeh_init_proc(struct proc_dir_entry *top) +{ + struct proc_dir_entry *ent = create_proc_entry("eeh", S_IRUGO, top); + if (ent) { + ent->nlink = 1; + ent->data = NULL; + ent->read_proc = (void *)eeh_proc_falsepositive_read; + } +} + +/* + * Test if "dev" should be configured on or off. + * This processes the options literally from right to left. + * This lets the user specify stupid combinations of options, + * but at least the result should be very predictable. + */ +static int eeh_check_opts_config(struct pci_dev *dev) +{ + struct device_node *dn = pci_device_to_OF_node(dev); + struct pci_controller *phb = PCI_GET_PHB_PTR(dev); + char devname[32], classname[32], phbname[32]; + char *strs[8], *s; + int nstrs, i; + int ret = 0; + + if (dn == NULL || phb == NULL || phb->buid == 0 || !eeh_implemented) + return 0; + /* Build list of strings to match */ + nstrs = 0; + s = (char *)get_property(dn, "ibm,loc-code", 0); + if (s) + strs[nstrs++] = s; + sprintf(devname, "dev%04x:%04x", dev->vendor, dev->device); + strs[nstrs++] = devname; + sprintf(classname, "class%04x", dev->class); + strs[nstrs++] = classname; + sprintf(phbname, "pci@%lx", phb->buid); + strs[nstrs++] = phbname; + strs[nstrs++] = ""; /* yes, this matches the empty string */ + + /* Now see if any string matches the eeh_opts list. + * The eeh_opts list entries start with + or -. + */ + for (s = eeh_opts; s && (s < (eeh_opts + eeh_opts_last)); s += strlen(s)+1) { + for (i = 0; i < nstrs; i++) { + if (strcasecmp(strs[i], s+1) == 0) { + ret = (strs[0] == '+') ? 1 : 0; + } + } + } + return ret; +} + +/* Handle kernel eeh-on & eeh-off cmd line options for eeh. + * + * We support: + * eeh-off=loc1,loc2,loc3... + * + * and this option can be repeated so + * eeh-off=loc1,loc2 eeh=loc3 + * is the same as eeh-off=loc1,loc2,loc3 + * + * loc is an IBM location code that can be found in a manual or + * via openfirmware (or the Hardware Management Console). + * + * We also support these additional "loc" values: + * + * dev#:# vendor:device id in hex (e.g. dev1022:2000) + * class# class id in hex (e.g. class0200) + * pci@buid all devices under phb (e.g. pci@fef00000) + * + * If no location code is specified all devices are assumed + * so eeh-off means eeh by default is off. + */ + +/* This is implemented as a null separated list of strings. + * Each string looks like this: "+X" or "-X" + * where X is a loc code, dev, class or pci string (as shown above) + * or empty which is used to indicate all. + * + * We interpret this option string list during the buswalk + * so that it will literally behave left-to-right even if + * some combinations don't make sense. Give the user exactly + * what they want! :) + */ + +static int __init eeh_parm(char *str, int state) +{ + char *s, *cur, *curend; + if (!eeh_opts) { + eeh_opts = alloc_bootmem(EEH_MAX_OPTS); + eeh_opts[eeh_opts_last++] = '+'; /* default */ + eeh_opts[eeh_opts_last++] = '\0'; + } + if (*str == '\0') { + eeh_opts[eeh_opts_last++] = state ? '+' : '-'; + eeh_opts[eeh_opts_last++] = '\0'; + return 1; + } + if (*str == '=') + str++; + for (s = str; s && *s != '\0'; s = curend) { + cur = s; + while (*cur == ',') + cur++; /* ignore empties. Don't treat as "all-on" or "all-off" */ + curend = strchr(cur, ','); + if (!curend) + curend = cur + strlen(cur); + if (*cur) { + int curlen = curend-cur; + char *sym = eeh_opts+eeh_opts_last; + if (eeh_opts_last + curlen > EEH_MAX_OPTS-2) { + printk("EEH: sorry...too many eeh cmd line options\n"); + return 1; + } + eeh_opts[eeh_opts_last++] = state ? '+' : '-'; + strncpy(eeh_opts+eeh_opts_last, cur, curlen); + eeh_opts_last += curlen; + eeh_opts[eeh_opts_last++] = '\0'; + } + } + return 1; +} + +static int __init eehoff_parm(char *str) +{ + return eeh_parm(str, 0); +} +static int __init eehon_parm(char *str) +{ + return eeh_parm(str, 1); +} + + +__setup("eeh-off", eehoff_parm); +__setup("eeh-on", eehon_parm); diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/entry.S linuxppc64_2_4/arch/ppc64/kernel/entry.S --- ../kernel.org/linux/arch/ppc64/kernel/entry.S Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/entry.S Thu Nov 29 00:53:22 2001 @@ -0,0 +1,609 @@ +/* + * arch/ppc/kernel/entry.S + * + * + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP + * Copyright (C) 1996 Cort Dougan + * Adapted for Power Macintosh by Paul Mackerras. + * Low-level exception handlers and MMU support + * rewritten by Paul Mackerras. + * Copyright (C) 1996 Paul Mackerras. + * MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net). + * + * This file contains the system call entry code, context switch + * code, and exception/interrupt return code for PowerPC. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + + +#include "ppc_asm.h" +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_PPC_ISERIES +#define DO_SOFT_DISABLE +#endif + +#undef SHOW_SYSCALLS +#undef SHOW_SYSCALLS_TASK + +#ifdef SHOW_SYSCALLS_TASK + .data +show_syscalls_task: + .long -1 +#endif + +/* + * Handle a system call. + */ + .text +_GLOBAL(DoSyscall) + std r0,THREAD+LAST_SYSCALL(r13) + ld r11,_CCR(r1) /* Clear SO bit in CR */ + lis r10,0x1000 + andc r11,r11,r10 + std r11,_CCR(r1) +#ifdef SHOW_SYSCALLS +#ifdef SHOW_SYSCALLS_TASK + LOADBASE(r31,show_syscalls_task) + ld r31,show_syscalls_task@l(r31) + cmp 0,r13,r31 + bne 1f +#endif + LOADADDR(r3,7f) + ld r4,GPR0(r1) + ld r5,GPR3(r1) + ld r6,GPR4(r1) + ld r7,GPR5(r1) + ld r8,GPR6(r1) + ld r9,GPR7(r1) + bl .printk + LOADADDR(r3,77f) + ld r4,GPR8(r1) + ld r5,GPR9(r1) + mr r6,r13 + bl .printk + ld r0,GPR0(r1) + ld r3,GPR3(r1) + ld r4,GPR4(r1) + ld r5,GPR5(r1) + ld r6,GPR6(r1) + ld r7,GPR7(r1) + ld r8,GPR8(r1) +1: +#endif /* SHOW_SYSCALLS */ + cmpi 0,r0,0x7777 /* Special case for 'sys_sigreturn' */ + beq- 10f + cmpi 0,r0,0x6666 /* Special case for 'sys_rt_sigreturn' */ + beq- 16f + + /* MIKEC: I changed TASK_PTRACE to 64-bit here. Should I have? + * TASK_PTRACE is aka task_struct.ptrace (in linux/sched.h) + * which is an unsigned long + */ + ld r10,TASK_PTRACE(r13) + andi. r10,r10,PT_TRACESYS + bne- 50f + cmpli 0,r0,NR_syscalls + bge- 66f +/* Ken Aaker: Need to vector to 32 Bit or default sys_call_table here, + * based on caller's run-mode / personality. + * + */ +#ifdef CONFIG_BINFMT_ELF32 + ld r10,THREAD+THREAD_FLAGS(r13) + andi. r10,r10,PPC_FLAG_32BIT + beq- 15f + LOADADDR(r10,.sys_call_table32) +/* Now mung the first 4 parameters into shape, by making certain that + * the high bits (most significant 32 bits in 64 bit reg) are 0 + * for the first 4 parameter regs(3-6). + */ + clrldi r3,r3,32 + clrldi r4,r4,32 + clrldi r5,r5,32 + clrldi r6,r6,32 + b 17f +15: +#endif + LOADADDR(r10,.sys_call_table) +17: + slwi r0,r0,3 + ldx r10,r10,r0 /* Fetch system call handler [ptr] */ + mtlr r10 + addi r9,r1,STACK_FRAME_OVERHEAD + blrl /* Call handler */ +_GLOBAL(ret_from_syscall_1) +20: std r3,RESULT(r1) /* Save result */ +#ifdef SHOW_SYSCALLS +#ifdef SHOW_SYSCALLS_TASK + cmp 0,r13,r31 + bne 91f +#endif + mr r4,r3 + LOADADDR(r3,79f) + bl .printk + ld r3,RESULT(r1) +91: +#endif + li r10,-_LAST_ERRNO + cmpl 0,r3,r10 + blt 30f + neg r3,r3 + cmpi 0,r3,ERESTARTNOHAND + bne 22f + li r3,EINTR +22: ld r10,_CCR(r1) /* Set SO bit in CR */ + oris r10,r10,0x1000 + std r10,_CCR(r1) +30: std r3,GPR3(r1) /* Update return value */ + b .ret_from_except +66: li r3,ENOSYS + b 22b +/* sys_sigreturn */ +10: addi r3,r1,STACK_FRAME_OVERHEAD + bl .sys_sigreturn + cmpi 0,r3,0 /* Check for restarted system call */ + bge .ret_from_except + b 20b +/* sys_rt_sigreturn */ +16: addi r3,r1,STACK_FRAME_OVERHEAD + bl .sys_rt_sigreturn + cmpi 0,r3,0 /* Check for restarted system call */ + bge .ret_from_except + b 20b + +/* Traced system call support */ +50: bl .syscall_trace + ld r0,GPR0(r1) /* Restore original registers */ + ld r3,GPR3(r1) + ld r4,GPR4(r1) + ld r5,GPR5(r1) + ld r6,GPR6(r1) + ld r7,GPR7(r1) + ld r8,GPR8(r1) + ld r9,GPR9(r1) + cmpli 0,r0,NR_syscalls + bge- 66f +#ifdef CONFIG_BINFMT_ELF32 + ld r10,THREAD+THREAD_FLAGS(r13) + andi. r10,r10,PPC_FLAG_32BIT + beq- 55f + LOADADDR(r10,.sys_call_table32) +/* Now mung the first 4 parameters into shape, by making certain that + * the high bits (most significant 32 bits in 64 bit reg) are 0 + * for the first 4 parameter regs(3-6). + */ + clrldi r3,r3,32 + clrldi r4,r4,32 + clrldi r5,r5,32 + clrldi r6,r6,32 + b 57f +55: +#endif + LOADADDR(r10,.sys_call_table) +57: + slwi r0,r0,3 + ldx r10,r10,r0 /* Fetch system call handler [ptr] */ + mtlr r10 + addi r9,r1,STACK_FRAME_OVERHEAD + blrl /* Call handler */ +_GLOBAL(ret_from_syscall_2) + std r3,RESULT(r1) /* Save result */ + std r3,GPR0(r1) /* temporary gross hack to make strace work */ + li r10,-_LAST_ERRNO + cmpl 0,r3,r10 + blt 60f + neg r3,r3 + cmpi 0,r3,ERESTARTNOHAND + bne 57f + li r3,EINTR +57: ld r10,_CCR(r1) /* Set SO bit in CR */ + oris r10,r10,0x1000 + std r10,_CCR(r1) +60: std r3,GPR3(r1) /* Update return value */ + bl .syscall_trace + b .ret_from_except +66: li r3,ENOSYS + b 57b +#ifdef SHOW_SYSCALLS +7: .string "syscall %d(%x, %x, %x, %x, %x, " +77: .string "%x, %x), current=%p\n" +79: .string " -> %x\n" + .align 2,0 +#endif + +/* + * This routine switches between two different tasks. The process + * state of one is saved on its kernel stack. Then the state + * of the other is restored from its kernel stack. The memory + * management hardware is updated to the second process's state. + * Finally, we can return to the second process, via ret_from_except. + * On entry, r3 points to the THREAD for the current task, r4 + * points to the THREAD for the new task. + * + * Note: there are two ways to get to the "going out" portion + * of this code; either by coming in via the entry (_switch) + * or via "fork" which must set up an environment equivalent + * to the "_switch" path. If you change this (or in particular, the + * SAVE_REGS macro), you'll have to change the fork code also. + * + * The code which creates the new task context is in 'copy_thread' + * in arch/ppc/kernel/process.c + */ +_GLOBAL(_switch) + stdu r1,-INT_FRAME_SIZE(r1) + ld r6,0(r1) + std r6,GPR1(r1) + /* r3-r13 are caller saved -- Cort */ + SAVE_GPR(2, r1) + SAVE_8GPRS(14, r1) + SAVE_10GPRS(22, r1) + mflr r20 /* Return to switch caller */ + mfmsr r22 + li r6,MSR_FP /* Disable floating-point */ + andc r22,r22,r6 + mtmsrd r22 + isync + std r20,_NIP(r1) + std r22,_MSR(r1) + std r20,_LINK(r1) + mfcr r20 + std r20,_CCR(r1) + li r6,0x0ff0 + std r6,TRAP(r1) + std r1,KSP(r3) /* Set old stack pointer */ + + mfspr r5,SPRG3 /* Get Paca */ + addi r3,r3,-THREAD /* old 'current' for return value */ + addi r13,r4,-THREAD /* Convert THREAD to 'current' */ + std r13,PACACURRENT(r5) /* Set new 'current' */ + +#ifdef CONFIG_PPC_ISERIES + ld r7,THREAD_FLAGS(r4) /* Get run light flag */ + mfspr r9,CTRLF + srdi r7,r7,1 /* Align to run light bit in CTRL reg */ + insrdi r9,r7,1,63 /* Insert run light into CTRL */ + mtspr CTRLT,r9 +#endif + ld r1,KSP(r4) /* Load new stack pointer */ + ld r6,_CCR(r1) + mtcrf 0xFF,r6 + /* r3-r13 are destroyed -- Cort */ + REST_8GPRS(14, r1) + REST_10GPRS(22, r1) + + ld r7,_NIP(r1) /* Return to _switch caller in new task */ + ld r1,GPR1(r1) + mtlr r7 + blr + +_GLOBAL(ret_from_fork) + bl .schedule_tail + ld r0,TASK_PTRACE(r13) + andi. r0,r0,PT_TRACESYS + beq+ .ret_from_except + bl .syscall_trace + b .ret_from_except + +_GLOBAL(ret_from_except) +#ifdef CONFIG_PPC_ISERIES + ld r5,SOFTE(r1) + cmpdi 0,r5,0 + beq 4f +irq_recheck: + /* + * Check for pending interrupts (iSeries) + */ + CHECKANYINT(r3,r4) + beq+ 4f /* skip do_IRQ if no interrupts */ + + mfspr r5,SPRG3 + li r3,0 + stb r3,PACAPROCENABLED(r5) /* ensure we are disabled */ + addi r3,r1,STACK_FRAME_OVERHEAD + bl .do_IRQ + b irq_recheck /* loop back and handle more */ +4: +#endif +_GLOBAL(do_bottom_half_ret) + ld r3,_MSR(r1) /* Returning to user mode? */ + andi. r3,r3,MSR_PR + beq+ restore /* if so, check need_resched and signals */ +_GLOBAL(ret_to_user_hook) + nop + /* NEED_RESCHED is a volatile long (64-bits) */ + ld r3,NEED_RESCHED(r13) + cmpi 0,r3,0 /* check need_resched flag */ + beq+ 7f + bl .schedule + /* SIGPENDING is an int (32-bits) */ +7: + lwz r5,SIGPENDING(r13) /* Check for pending unblocked signals */ + cmpwi 0,r5,0 + beq+ restore + li r3,0 + addi r4,r1,STACK_FRAME_OVERHEAD + bl .do_signal +_GLOBAL(do_signal_ret) +restore: + ld r3,_CTR(r1) + ld r0,_LINK(r1) + mtctr r3 + mtlr r0 + ld r3,_XER(r1) + mtspr XER,r3 + REST_8GPRS(5, r1) + REST_10GPRS(14, r1) + REST_8GPRS(24, r1) + + /* make sure we hard disable here, even if rtl is active, to protect + * SRR[01] and SPRG2 -- Cort + */ + mfmsr r0 /* Get current interrupt state */ + li r4,0 + ori r4,r4,MSR_EE|MSR_RI + andc r0,r0,r4 /* clear MSR_EE and MSR_RI */ + mtmsrd r0 /* Update machine state */ +#ifdef CONFIG_PPC_ISERIES + ld r0,SOFTE(r1) + cmpi 0,r0,0 + beq+ 1f + + CHECKANYINT(r4,r3) + beq+ 1f + mfmsr r0 + ori r0,r0,MSR_EE|MSR_RI + mtmsrd r0 + b irq_recheck + +1: +#endif + stdcx. r0,0,r1 /* to clear the reservation */ + + mfspr r4,SPRG3 /* current task's PACA */ +#ifdef DO_SOFT_DISABLE + ld r0,SOFTE(r1) + stb r0,PACAPROCENABLED(r4) +#endif + /* if returning to user mode, save kernel SP */ + ld r0,_MSR(r1) + andi. r0,r0,MSR_PR + beq+ 1f + addi r0,r1,INT_FRAME_SIZE /* size of frame */ + std r0,THREAD+KSP(r13) /* save kernel stack pointer */ + std r1,PACAKSAVE(r4) /* save exception stack pointer */ +1: + ld r0,_MSR(r1) + mtspr SRR1,r0 + ld r2,_CCR(r1) + mtcrf 0xFF,r2 + ld r2,_NIP(r1) + mtspr SRR0,r2 + REST_GPR(13,r1) + ld r0,GPR0(r1) + ld r2,GPR2(r1) + ld r3,GPR3(r1) + ld r4,GPR4(r1) + ld r1,GPR1(r1) + + rfid + +/* + * On CHRP, the Run-Time Abstraction Services (RTAS) have to be + * called with the MMU off. + * + * In addition, we need to be in 32b mode, at least for now. + * + * Note: r3 is an input parameter to rtas, so don't trash it... + */ +_GLOBAL(enter_rtas) + mflr r0 + std r0,16(r1) + stdu r1,-INT_FRAME_SIZE(r1) // Save SP and create stack space + + /* Because RTAS is running in 32b mode, it clobbers the high order half + * of all registers that it saves. We therefore save those registers + * RTAS might touch to the stack. (r0, r3-r13 are caller saved) + */ + SAVE_GPR(2, r1) // Save the TOC + SAVE_8GPRS(14, r1) // Save the non-volatiles + SAVE_10GPRS(22, r1) // ditto + + mfcr r4 + std r4,_CCR(r1) + mfctr r5 + std r5,_CTR(r1) + mfspr r6,XER + std r6,_XER(r1) + mfdar r7 + std r7,_DAR(r1) + mfdsisr r8 + std r8,_DSISR(r1) + mfsrr0 r9 + std r9,_DSISR(r1) + + /* Unfortunatly, the stack pointer and the MSR are also clobbered, so they + * are saved in the PACA (SPRG3) which allows us to restore our original + * state after RTAS returns. + */ + mfspr r4,SPRG3 /* Get PACA */ + std r1,PACAR1(r4) + mfmsr r6 + std r6,PACASAVEDMSR(r4) + + /* Setup our real return addr */ + SET_REG_TO_LABEL(r4,.rtas_return_loc) + SET_REG_TO_CONST(r9,KERNELBASE) + sub r4,r4,r9 + mtlr r4 + + li r0,0 + ori r0,r0,MSR_EE|MSR_SE|MSR_BE|MSR_RI + andc r0,r6,r0 + + li r9,1 + rldicr r9,r9,MSR_SF_LG,(63-MSR_SF_LG) + ori r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP|MSR_RI + andc r6,r0,r9 + sync /* disable interrupts so SRR0/1 */ + mtmsrd r0 /* don't get trashed */ + + SET_REG_TO_LABEL(r4,rtas) + ld r5,RTASENTRY(r4) /* get the rtas->entry value */ + ld r4,RTASBASE(r4) /* get the rtas->base value */ + + mtspr SRR0,r5 + mtspr SRR1,r6 + rfid + +_STATIC(rtas_return_loc) + /* relocation is off at this point */ + mfspr r4,SPRG3 /* Get PACA */ + SET_REG_TO_CONST(r5, KERNELBASE) + sub r4,r4,r5 /* RELOC the PACA base pointer */ + + ld r1,PACAR1(r4) /* Restore our SP */ + LOADADDR(r3,.rtas_restore_regs) + ld r4,PACASAVEDMSR(r4) /* Restore our MSR */ + + mtspr SRR0,r3 + mtspr SRR1,r4 + rfid + +_STATIC(rtas_restore_regs) + /* relocation is on at this point */ + REST_GPR(2, r1) // Restore the TOC + REST_8GPRS(14, r1) // Restore the non-volatiles + REST_10GPRS(22, r1) // ditto + + /* put back current in r13 */ + mfspr r4,SPRG3 + ld r13,PACACURRENT(r4) + + ld r4,_CCR(r1) + mtcr r4 + + ld r5,_CTR(r1) + mtctr r5 + + ld r6,_XER(r1) + mtspr XER,r6 + + ld r7,_DAR(r1) + mtdar r7 + + ld r8,_DSISR(r1) + mtdsisr r8 + + ld r9,_DSISR(r1) + mtsrr0 r9 + + addi r1,r1,INT_FRAME_SIZE + ld r0,16(r1) /* get return address */ + + mtlr r0 + blr /* return to caller */ + + +_GLOBAL(enter_prom) + mflr r0 + std r0,16(r1) + stdu r1,-INT_FRAME_SIZE(r1) // Save SP and create stack space + + /* Because PROM is running in 32b mode, it clobbers the high order half + * of all registers that it saves. We therefore save those registers + * PROM might touch to the stack. (r0, r3-r13 are caller saved) + */ + SAVE_8GPRS(2, r1) // Save the TOC & incoming param(s) + SAVE_8GPRS(14, r1) // Save the non-volatiles + SAVE_10GPRS(22, r1) // ditto + + mfcr r4 + std r4,_CCR(r1) + mfctr r5 + std r5,_CTR(r1) + mfspr r6,XER + std r6,_XER(r1) + mfdar r7 + std r7,_DAR(r1) + mfdsisr r8 + std r8,_DSISR(r1) + mfsrr0 r9 + std r9,_DSISR(r1) + mfmsr r10 + std r10,_MSR(r1) + + /* Unfortunatly, the stack pointer is also clobbered, so it is saved + * in the SPRG2 which allows us to restore our original state after + * PROM returns. + */ + mtspr SPRG2,r1 + + /* put a relocation offset into r3 */ + bl .reloc_offset + LOADADDR(r12,prom) + sub r12,r12,r3 + ld r12,PROMENTRY(r12) /* get the prom->entry value */ + mtlr r12 + + mfmsr r11 /* grab the current MSR */ + li r12,1 + rldicr r12,r12,MSR_SF_LG,(63-MSR_SF_LG) + andc r11,r11,r12 + li r12,1 + rldicr r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG) + andc r11,r11,r12 + mtmsrd r11 + isync + + REST_8GPRS(2, r1) // Restore the TOC & incoming param(s) + REST_8GPRS(14, r1) // Restore the non-volatiles + REST_10GPRS(22, r1) // ditto + blrl + + mfspr r1,SPRG2 + ld r6,_MSR(r1) + mtmsrd r6 + isync + + REST_GPR(2, r1) // Restore the TOC + REST_8GPRS(14, r1) // Restore the non-volatiles + REST_10GPRS(22, r1) // ditto + + ld r4,_CCR(r1) + mtcr r4 + + ld r5,_CTR(r1) + mtctr r5 + + ld r6,_XER(r1) + mtspr XER,r6 + + ld r7,_DAR(r1) + mtdar r7 + + ld r8,_DSISR(r1) + mtdsisr r8 + + ld r9,_DSISR(r1) + mtsrr0 r9 + + addi r1,r1,INT_FRAME_SIZE + ld r0,16(r1) /* get return address */ + + mtlr r0 + blr /* return to caller */ + diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/flight_recorder.c linuxppc64_2_4/arch/ppc64/kernel/flight_recorder.c --- ../kernel.org/linux/arch/ppc64/kernel/flight_recorder.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/flight_recorder.c Thu Nov 29 10:54:01 2001 @@ -0,0 +1,183 @@ +/************************************************************************ + * flight_recorder.c + ************************************************************************ + * This code supports the a generic flight recorder. * + * Copyright (C) 20yy * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the: * + * Free Software Foundation, Inc., * + * 59 Temple Place, Suite 330, * + * Boston, MA 02111-1307 USA * + ************************************************************************ + * This is a simple text based flight recorder. Useful for logging + * information the you may want to retreive at a latter time. Errors or + * debug inforamtion are good examples. A good method to dump the + * information is via the proc file system. + * + * To use. + * 1. Create the flight recorder object. Passing a NULL pointer will + * kmalloc the space for you. If it is too early for kmalloc, create + * space for the object. Beware, don't lie about the size, you will + * pay for that later. + * FlightRecorder* TestFr = alloc_Flight_Recorder(NULL,"TestFr",4096); + * + * 2. Log any notable events, initialzation, error conditions, etc. + * LOGFR(TestFr,"5. Stack Variable(10) %d",StackVariable); + * + * 3. Dump the information to a buffer. + * fr_Dump(TestFr, proc_file_buffer, proc_file_buffer_size); + * + ************************************************************************/ +#include +#include +#include +#include +#include +#include +#include +#include + +static char LogText[512]; +static int LogTextIndex; +static int LogCount = 0; +static spinlock_t Fr_Lock; + +/************************************************************************ + * Build the log time prefix based on Flags. + * 00 = No time prefix + * 01 = Date(mmddyy) Time(hhmmss) prefix + * 02 = Day(dd) Time(hhmmss) prefix + * 03 = Time(hhmmss) prefix + ************************************************************************/ +static void fr_Log_Time(FlightRecorder* Fr) +{ + struct timeval TimeClock; + struct rtc_time LogTime; + + do_gettimeofday(&TimeClock); + to_tm(TimeClock.tv_sec, &LogTime); + + if (Fr->Flags == 1) { + LogTextIndex = sprintf(LogText,"%02d%02d%02d %02d%02d%02d ", + LogTime.tm_mon, LogTime.tm_mday, LogTime.tm_year-2000, + LogTime.tm_hour,LogTime.tm_min, LogTime.tm_sec); + } + else if (Fr->Flags == 2) { + LogTextIndex = sprintf(LogText,"%02d %02d%02d%02d ", + LogTime.tm_mday, + LogTime.tm_hour,LogTime.tm_min, LogTime.tm_sec); + } + + else if (Fr->Flags == 3) { + LogTextIndex = sprintf(LogText,"%02d%02d%02d ", + LogTime.tm_hour,LogTime.tm_min, LogTime.tm_sec); + } + else { + ++LogCount; + LogTextIndex = sprintf(LogText,"%04d. ",LogCount); + } +} + +/************************************************************************/ +/* Log entry into buffer, */ +/* ->If entry is going to wrap, log "WRAP" and start at the top. */ +/************************************************************************/ +static void fr_Log_Data(FlightRecorder* Fr) +{ + int TextLen = strlen(LogText); + int Residual = ( Fr->EndPointer - Fr->NextPointer)-15; + if (TextLen > Residual) { + strcpy(Fr->NextPointer,"WRAP"); + Fr->WrapPointer = Fr->NextPointer + 5; + Fr->NextPointer = Fr->StartPointer; + } + strcpy(Fr->NextPointer,LogText); + Fr->NextPointer += TextLen+1; + strcpy(Fr->NextPointer,"<="); +} +/************************************************************************ + * Build the log text, support variable args. + ************************************************************************/ +void fr_Log_Entry(struct flightRecorder* LogFr, const char *fmt, ...) +{ + va_list arg_ptr; + spin_lock(&Fr_Lock); + + fr_Log_Time(LogFr); + va_start(arg_ptr, fmt); + vsprintf(LogText+LogTextIndex, fmt, arg_ptr); + va_end(arg_ptr); + fr_Log_Data(LogFr); + + spin_unlock(&Fr_Lock); + +} +/************************************************************************ + * Dump Flight Recorder into buffer. + * -> Handles the buffer wrapping. + ************************************************************************/ +int fr_Dump(FlightRecorder* Fr, char *Buffer, int BufferLen) +{ + int LineLen = 0; + char* StartEntry; + char* EndEntry; + spin_lock(&Fr_Lock); + /**************************************************************** + * If Buffer has wrapped, find last usable entry to start with. + ****************************************************************/ + if (Fr->WrapPointer != NULL) { + StartEntry = Fr->NextPointer+3; + StartEntry += strlen(StartEntry)+1; + EndEntry = Fr->WrapPointer; + + while (EndEntry > StartEntry && LineLen < BufferLen) { + LineLen += sprintf(Buffer+LineLen,"%s\n",StartEntry); + StartEntry += strlen(StartEntry) + 1; + } + } + + /**************************************************************** + * Dump from the beginning to the last logged entry + ****************************************************************/ + StartEntry = Fr->StartPointer; + EndEntry = Fr->NextPointer; + while (EndEntry > StartEntry && LineLen < BufferLen) { + LineLen += sprintf(Buffer+LineLen,"%s\n",StartEntry); + StartEntry += strlen(StartEntry) + 1; + } + spin_unlock(&Fr_Lock); + return LineLen; +} + +/************************************************************************ + * Allocate and Initialized the Flight Recorder + * -> If no FlightRecorder pointer is passed, the space is kmalloc. + ************************************************************************/ +FlightRecorder* alloc_Flight_Recorder(FlightRecorder* FrPtr, char* Signature, int SizeOfFr) +{ + FlightRecorder* Fr = FrPtr; /* Pointer to Object */ + int FrSize = (SizeOfFr/16)*16; /* Could be static */ + if (Fr == NULL) + Fr = (FlightRecorder*)kmalloc(SizeOfFr, GFP_KERNEL); + memset(Fr,0,SizeOfFr); + strcpy(Fr->Signature,Signature); + Fr->Size = FrSize; + Fr->Flags = 0; + Fr->StartPointer = (char*)&Fr->Buffer; + Fr->EndPointer = (char*)Fr + Fr->Size; + Fr->NextPointer = Fr->StartPointer; + + fr_Log_Entry(Fr,"Initialized."); + return Fr; +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/head.S linuxppc64_2_4/arch/ppc64/kernel/head.S --- ../kernel.org/linux/arch/ppc64/kernel/head.S Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/head.S Thu Nov 8 23:13:41 2001 @@ -0,0 +1,1849 @@ +/* + * arch/ppc64/kernel/head.S + * + * + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP + * Copyright (C) 1996 Cort Dougan + * Adapted for Power Macintosh by Paul Mackerras. + * Low-level exception handlers and MMU support + * rewritten by Paul Mackerras. + * Copyright (C) 1996 Paul Mackerras. + * + * Adapted for 64bit PowerPC by Dave Engebretsen, Peter Bergner, and + * Mike Corrigan {engebret|bergner|mikejc}@us.ibm.com + * + * This file contains the low-level support and setup for the + * PowerPC-64 platform, including trap and interrupt dispatch. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#define SECONDARY_PROCESSORS + +#include "ppc_asm.h" +#include "ppc_defs.h" +#include +#include +#include +#include + +#ifdef CONFIG_PPC_ISERIES +#define DO_SOFT_DISABLE +#endif + +/* + * We layout physical memory as follows: + * 0x0000 - 0x00ff : Secondary processor spin code + * 0x0100 - 0x2fff : pSeries Interrupt prologs + * 0x3000 - 0x3fff : Interrupt support + * 0x4000 - 0x4fff : NACA + * 0x5000 - 0x5fff : Initial segment table + * 0x6000 : iSeries and common interrupt prologs + * + */ + +/* + * SPRG Usage + * + * Register Definition + * + * SPRG0 reserved for hypervisor + * SPRG1 temp - used to save gpr + * SPRG2 temp - used to save gpr + * SPRG3 virt addr of Paca + */ + +/* + * Entering into this code we make the following assumptions: + * For pSeries: + * 1. The MMU is off & open firmware is running in real mode. + * 2. The kernel is entered at __start + * + * For iSeries: + * 1. The MMU is on (as it always is for iSeries) + * 2. The kernel is entered at SystemReset_Iseries + */ + + .text + .globl _stext +_stext: +_STATIC(__start) + b .__start_initialization_pSeries + + /* At offset 0x20, there is a pointer to iSeries LPAR data. + * This is required by the hypervisor */ + . = 0x20 + .llong hvReleaseData-KERNELBASE + + /* At offset 0x28 and 0x30 are offsets to the msChunks + * array (used by the iSeries LPAR debugger to do translation + * between physical addresses and absolute addresses) and + * to the pidhash table (also used by the debugger) */ + .llong msChunks-KERNELBASE + .llong pidhash-KERNELBASE + + /* Offset 0x38 - Pointer to start of embedded System.map */ + .globl embedded_sysmap_start +embedded_sysmap_start: + .llong 0 + /* Offset 0x40 - Pointer to end of embedded System.map */ + .globl embedded_sysmap_end +embedded_sysmap_end: + .llong 0 + + /* Secondary processors spin on this value until it goes to 1. */ + .globl __secondary_hold_spinloop +__secondary_hold_spinloop: + .llong 0x0 + + /* Secondary processors write this value with their cpu # */ + /* after they enter the spin loop immediatly below. */ + .globl __secondary_hold_acknowledge +__secondary_hold_acknowledge: + .llong 0x0 + + . = 0x60 +/* + * The following code is used on pSeries to hold secondary processors + * in a spin loop after they have been freed from OpenFirmware, but + * before the bulk of the kernel has been relocated. This code + * is relocated to physical address 0x60 before prom_init is run. + * All of it must fit below the first exception vector at 0x100. + */ +_GLOBAL(__secondary_hold) + /* Grab our linux cpu number */ + mr r24,r3 + + /* Tell the master cpu we're here */ + /* Relocation is off & we are located at an address less */ + /* than 0x100, so only need to grab low order offset. */ + std r24,__secondary_hold_acknowledge@l(0) + + /* All secondary cpu's wait here until told to start. */ +100: ld r4,__secondary_hold_spinloop@l(0) + cmpdi 0,r4,1 + bne 100b + +#ifdef CONFIG_HMT + b .hmt_init +#else +#ifdef CONFIG_SMP + mr r3,r24 + b .pseries_secondary_smp_init +#else + BUG_OPCODE +#endif +#endif + +/* + * The following macros define the code that appears as + * the prologue to each of the exception handlers. They + * are split into two parts to allow a single kernel binary + * to be used for pSeries, and iSeries. + */ + +/* + * We make as much of the exception code common between native Pseries + * and Iseries LPAR implementations as possible. + */ + +/* + * This is the start of the interrupt handlers for Pseries + * This code runs with relocation off. + */ +#define EX_SRR0 0 +#define EX_SRR1 8 +#define EX_R20 16 +#define EX_R21 24 +#define EX_R22 32 +#define EX_R23 40 +#define EX_DAR 48 +#define EX_DSISR 56 + +#define EXCEPTION_PROLOG_PSERIES(label) \ + mtspr SPRG2,r20; /* use SPRG2 as scratch reg */ \ + mtspr SPRG1,r21; /* save r21 */ \ + mfspr r20,SPRG3; /* get Paca virt addr */ \ + ld r21,PACAEXCSP(r20); /* get exception stack ptr */ \ + addi r21,r21,EXC_FRAME_SIZE; /* make exception frame */ \ + std r22,EX_R22(r21); /* Save r22 in exc. frame */ \ + std r23,EX_R23(r21); /* Save r23 in exc. frame */ \ + mfspr r22,SRR0; /* EA of interrupted instr */ \ + std r22,EX_SRR0(r21); /* Save SRR0 in exc. frame */ \ + mfspr r23,SRR1; /* machine state at interrupt */ \ + std r23,EX_SRR1(r21); /* Save SRR1 in exc. frame */ \ + clrrdi r22,r20,60; /* Get 0xc part of the vaddr */ \ + ori r22,r22,(label)@l; /* add in the vaddr offset */ \ + /* assumes *_common < 16b */ \ + mfmsr r23; \ + rotldi r23,r23,4; \ + ori r23,r23,0x30B; /* Set IR, DR, SF, ISF, HV */ \ + rotldi r23,r23,60; /* for generic handlers */ \ + mtspr SRR0,r22; \ + mtspr SRR1,r23; \ + mfcr r23; /* save CR in r23 */ \ + rfid + +/* + * This is the start of the interrupt handlers for i_series + * This code runs with relocation on. + */ +#define EXCEPTION_PROLOG_ISERIES \ + mtspr SPRG2,r20; /* use SPRG2 as scratch reg */\ + mtspr SPRG1,r21; /* save r21 */\ + mfspr r20,SPRG3; /* get Paca */\ + ld r21,PACAEXCSP(r20); /* get exception stack ptr */\ + addi r21,r21,EXC_FRAME_SIZE; /* make exception frame */\ + std r22,EX_R22(r21); /* save r22 on exception frame */\ + std r23,EX_R23(r21); /* Save r23 in exc. frame */\ + ld r22,LPPACA+LPPACASRR0(r20); /* Get SRR0 from ItLpPaca */\ + std r22,EX_SRR0(r21); /* save SRR0 in exc. frame */\ + ld r23,LPPACA+LPPACASRR1(r20); /* Get SRR1 from ItLpPaca */\ + std r23,EX_SRR1(r21); /* save SRR1 in exc. frame */\ + mfcr r23; /* save CR in r23 */ + +/* + * The common exception prolog is used for all except a few exceptions + * such as a segment miss on a kernel address. We have to be prepared + * to take another exception from the point where we first touch the + * kernel stack onwards. + * + * On entry r20 points to the paca and r21 points to the exception + * frame on entry, r23 contains the saved CR, and relocation is on. + */ +#define EXCEPTION_PROLOG_COMMON \ + mfspr r22,SPRG2; /* Save r20 in exc. frame */ \ + std r22,EX_R20(r21); \ + mfspr r22,SPRG1; /* Save r21 in exc. frame */ \ + std r22,EX_R21(r21); \ + mfspr r22,DAR; /* Save DAR in exc. frame */ \ + std r22,EX_DAR(r21); \ + std r21,PACAEXCSP(r20); /* update exception stack ptr */ \ + /* iff no protection flt */ \ + mfspr r22,DSISR; /* Save DSISR in exc. frame */ \ + std r22,EX_DSISR(r21); \ + ld r22,EX_SRR1(r21); /* Get SRR1 from exc. frame */ \ + andi. r22,r22,MSR_PR; /* Set CR for later branch */ \ + mr r22,r1; /* Save r1 */ \ + subi r1,r1,INT_FRAME_SIZE; /* alloc frame on kernel stack */ \ + beq- 1f; \ + ld r1,PACAKSAVE(r20); /* kernel stack to use */ \ +1: std r22,GPR1(r1); /* save r1 in stackframe */ \ + std r22,0(r1); /* make stack chain pointer */ \ + std r23,_CCR(r1); /* save CR in stackframe */ \ + ld r22,EX_R20(r21); /* move r20 to stackframe */ \ + std r22,GPR20(r1); \ + ld r23,EX_R21(r21); /* move r21 to stackframe */ \ + std r23,GPR21(r1); \ + ld r22,EX_R22(r21); /* move r22 to stackframe */ \ + std r22,GPR22(r1); \ + ld r23,EX_R23(r21); /* move r23 to stackframe */ \ + std r23,GPR23(r1); \ + mflr r22; /* save LR in stackframe */ \ + std r22,_LINK(r1); \ + mfctr r23; /* save CTR in stackframe */ \ + std r23,_CTR(r1); \ + mfspr r22,XER; /* save XER in stackframe */ \ + std r22,_XER(r1); \ + ld r23,EX_DAR(r21); /* move DAR to stackframe */ \ + std r23,_DAR(r1); \ + ld r22,EX_DSISR(r21); /* move DSISR to stackframe */ \ + std r22,_DSISR(r1); \ + lbz r22,PACAPROCENABLED(r20); \ + std r22,SOFTE(r1); \ + ld r22,EX_SRR0(r21); /* get SRR0 from exc. frame */ \ + ld r23,EX_SRR1(r21); /* get SRR1 from exc. frame */ \ + addi r21,r21,-EXC_FRAME_SIZE;/* pop off exception frame */ \ + std r21,PACAEXCSP(r20); \ + SAVE_GPR(0, r1); /* save r0 in stackframe */ \ + SAVE_8GPRS(2, r1); /* save r2 - r13 in stackframe */ \ + SAVE_4GPRS(10, r1); \ + ld r2,PACATOC(r20); \ + ld r13,PACACURRENT(r20) + +/* + * Note: code which follows this uses cr0.eq (set if from kernel), + * r1, r22 (SRR0), and r23 (SRR1). + */ + +/* + * Exception vectors. + */ +#define STD_EXCEPTION_PSERIES(n, label ) \ + . = n; \ + .globl label##_Pseries; \ +label##_Pseries: \ + EXCEPTION_PROLOG_PSERIES( label##_common ) + +#define STD_EXCEPTION_ISERIES( label ) \ + .globl label##_Iseries; \ +label##_Iseries: \ + EXCEPTION_PROLOG_ISERIES; \ + b label##_common + +#define MASKABLE_EXCEPTION_ISERIES( label ) \ + .globl label##_Iseries; \ +label##_Iseries: \ + EXCEPTION_PROLOG_ISERIES; \ + lbz r22,PACAPROFENABLED(r20); \ + cmpi 0,r22,0; \ + bne- label##_Iseries_profile; \ +label##_Iseries_prof_ret: \ + lbz r22,PACAPROCENABLED(r20); \ + cmpi 0,r22,0; \ + beq- label##_Iseries_masked; \ + b label##_common; \ +label##_Iseries_profile: \ + std r24,48(r21); \ + std r25,56(r21); \ + mflr r24; \ + bl do_profile; \ + mtlr r24; \ + ld r24,48(r21); \ + ld r25,56(r21); \ + b label##_Iseries_prof_ret + +#define STD_EXCEPTION_COMMON( trap, label, hdlr ) \ + .globl label##_common; \ +label##_common: \ + EXCEPTION_PROLOG_COMMON; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + li r20,0; \ + li r6,trap; \ + bl .save_remaining_regs; \ + bl hdlr; \ + b .ret_from_except + +/* + * Start of pSeries system interrupt routines + */ + . = 0x100 + .globl __start_interupts +__start_interupts: + + STD_EXCEPTION_PSERIES( 0x100, SystemReset ) + STD_EXCEPTION_PSERIES( 0x200, MachineCheck ) + STD_EXCEPTION_PSERIES( 0x300, DataAccess ) + STD_EXCEPTION_PSERIES( 0x380, DataAccessSLB ) + STD_EXCEPTION_PSERIES( 0x400, InstructionAccess ) + STD_EXCEPTION_PSERIES( 0x480, InstructionAccessSLB ) + STD_EXCEPTION_PSERIES( 0x500, HardwareInterrupt ) + STD_EXCEPTION_PSERIES( 0x600, Alignment ) + STD_EXCEPTION_PSERIES( 0x700, ProgramCheck ) + STD_EXCEPTION_PSERIES( 0x800, FPUnavailable ) + STD_EXCEPTION_PSERIES( 0x900, Decrementer ) + STD_EXCEPTION_PSERIES( 0xa00, Trap_0a ) + STD_EXCEPTION_PSERIES( 0xb00, Trap_0b ) + STD_EXCEPTION_PSERIES( 0xc00, SystemCall ) + STD_EXCEPTION_PSERIES( 0xd00, SingleStep ) + STD_EXCEPTION_PSERIES( 0xe00, Trap_0e ) + STD_EXCEPTION_PSERIES( 0xf00, PerformanceMonitor ) + STD_EXCEPTION_PSERIES( 0x1300, InstructionBreakpoint ) + + . = 0x4000 + .globl __end_interupts + .globl __start_naca +__end_interupts: +__start_naca: + /* Save space for naca. + * The first dword of the Naca is required by iSeries LPAR to + * point to itVpdAreas. On pSeries native, this value is not used. + */ + .llong itVpdAreas + .llong 0x0 + .llong 0x0 + .llong xPaca + + /* + * Space for the initial segment table + * For LPAR, the hypervisor must fill in at least one entry + * before we get control (with relocate on) + */ + + . = 0x5000 + .globl __end_naca + .globl __start_stab +__end_naca: +__start_stab: + + + . = 0x6000 + .globl __end_stab +__end_stab: + + /* + * The iSeries LPAR map is at this fixed address + * so that the HvReleaseData structure can address + * it with a 32-bit offset. + * + * The VSID values below are dependent on the + * VSID generation algorithm. See include/asm/mmu_context.h. + */ + + .llong 1 /* # ESIDs to be mapped by hypervisor */ + .llong 1 /* # memory ranges to be mapped by hypervisor */ + .llong 5 /* Page # of segment table within load area */ + .llong 0 /* Reserved */ + .llong 0 /* Reserved */ + .llong 0 /* Reserved */ + .llong 0 /* Reserved */ + .llong 0 /* Reserved */ + .llong 0x0c00000000 /* ESID to map (Kernel at EA = 0xC000000000000000) */ + .llong 0x06a99b4b14 /* VSID to map (Kernel at VA = 0x6a99b4b140000000) */ + .llong 8192 /* # pages to map (32 MB) */ + .llong 0 /* Offset from start of loadarea to start of map */ + .llong 0x0006a99b4b140000 /* VPN of first page to map */ + + . = 0x6100 + +/*** ISeries-LPAR interrupt handlers ***/ + + STD_EXCEPTION_ISERIES( MachineCheck ) + STD_EXCEPTION_ISERIES( DataAccess ) + STD_EXCEPTION_ISERIES( DataAccessSLB ) + STD_EXCEPTION_ISERIES( InstructionAccess ) + STD_EXCEPTION_ISERIES( InstructionAccessSLB ) + MASKABLE_EXCEPTION_ISERIES( HardwareInterrupt ) + STD_EXCEPTION_ISERIES( Alignment ) + STD_EXCEPTION_ISERIES( ProgramCheck ) + STD_EXCEPTION_ISERIES( FPUnavailable ) + MASKABLE_EXCEPTION_ISERIES( Decrementer ) + STD_EXCEPTION_ISERIES( Trap_0a ) + STD_EXCEPTION_ISERIES( Trap_0b ) + STD_EXCEPTION_ISERIES( SystemCall ) + STD_EXCEPTION_ISERIES( SingleStep ) + STD_EXCEPTION_ISERIES( Trap_0e ) + STD_EXCEPTION_ISERIES( PerformanceMonitor ) + + .globl SystemReset_Iseries +SystemReset_Iseries: + mfspr 25,SPRG3 /* Get Paca address */ + lhz r24,PACAPACAINDEX(r25) /* Get processor # */ + cmpi 0,r24,0 /* Are we processor 0? */ + beq .__start_initialization_iSeries /* Start up the first processor */ + mfspr r4,CTRLF + li r5,RUNLATCH /* Turn off the run light */ + andc r4,r4,r5 + mtspr CTRLT,r4 + +1: + HMT_LOW +#ifdef CONFIG_SMP + lbz r23,PACAPROCSTART(r25) /* Test if this processor + * should start */ + sync + LOADADDR(r3,current_set) + sldi r28,r24,4 /* get current_set[cpu#] */ + ldx r3,r3,r28 + addi r1,r3,TASK_UNION_SIZE + subi r1,r1,STACK_FRAME_OVERHEAD + + cmpi 0,r23,0 + beq iseries_secondary_smp_loop /* Loop until told to go */ +#ifdef SECONDARY_PROCESSORS + bne .__secondary_start /* Loop until told to go */ +#endif +iseries_secondary_smp_loop: + /* Let the Hypervisor know we are alive */ + /* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */ + lis r3,0x8002 + rldicr r3,r3,32,15 /* r0 = (r3 << 32) & 0xffff000000000000 */ +#else /* CONFIG_SMP */ + /* Yield the processor. This is required for non-SMP kernels + which are running on multi-threaded machines. */ + lis r3,0x8000 + rldicr r3,r3,32,15 /* r3 = (r3 << 32) & 0xffff000000000000 */ + addi r3,r3,18 /* r3 = 0x8000000000000012 which is "yield" */ + li r4,0 /* "yield timed" */ + li r5,-1 /* "yield forever" */ +#endif /* CONFIG_SMP */ + li r0,-1 /* r0=-1 indicates a Hypervisor call */ + sc /* Invoke the hypervisor via a system call */ + mfspr r25,SPRG3 /* Put r25 back ???? */ + b 1b /* If SMP not configured, secondaries + * loop forever */ + + .globl HardwareInterrupt_Iseries_masked +HardwareInterrupt_Iseries_masked: + b maskable_exception_exit + + .globl Decrementer_Iseries_masked +Decrementer_Iseries_masked: + li r22,1 + stb r22,PACALPPACA+LPPACADECRINT(r20) + lwz r22,PACADEFAULTDECR(r20) + mtspr DEC,r22 +maskable_exception_exit: + mtcrf 0xff,r23 /* Restore regs and free exception frame */ + ld r22,EX_SRR0(r21) + ld r23,EX_SRR1(r21) + mtspr SRR0,r22 + mtspr SRR1,r23 + ld r22,EX_R22(r21) + ld r23,EX_R23(r21) + mfspr r21,SPRG1 + mfspr r20,SPRG2 + rfid + +/*** Common interrupt handlers ***/ + + STD_EXCEPTION_COMMON( 0x100, SystemReset, .SystemResetException ) + STD_EXCEPTION_COMMON( 0x200, MachineCheck, .MachineCheckException ) + STD_EXCEPTION_COMMON( 0x900, Decrementer, .timer_interrupt ) + STD_EXCEPTION_COMMON( 0xa00, Trap_0a, .UnknownException ) + STD_EXCEPTION_COMMON( 0xb00, Trap_0b, .UnknownException ) + STD_EXCEPTION_COMMON( 0xd00, SingleStep, .SingleStepException ) + STD_EXCEPTION_COMMON( 0xe00, Trap_0e, .UnknownException ) + STD_EXCEPTION_COMMON( 0xf00, PerformanceMonitor, .PerformanceMonitorException ) + STD_EXCEPTION_COMMON(0x1300, InstructionBreakpoint, .InstructionBreakpointException ) + +/* + * Return from an exception which is handled without calling + * save_remaining_regs. The caller is assumed to have done + * EXCEPTION_PROLOG_COMMON. + */ +fast_exception_return: + ld r3,_CCR(r1) + ld r4,_LINK(r1) + ld r5,_CTR(r1) + ld r6,_XER(r1) + mtcr r3 + mtlr r4 + mtctr r5 + mtspr XER,r6 + REST_GPR(0, r1) + REST_8GPRS(2, r1) + REST_4GPRS(10, r1) + mtspr SRR1,r23 + mtspr SRR0,r22 + REST_4GPRS(20, r1) + ld r1,GPR1(r1) + rfid + + +/* + * Here r20 points to the PACA, r21 to the exception frame, + * r23 contains the saved CR. + * r20 - r23, SRR0 and SRR1 are saved in the exception frame. + */ + .globl DataAccess_common +DataAccess_common: + mfspr r22,DAR + srdi r22,r22,60 + cmpi 0,r22,0xc + + /* Segment fault on a bolted segment. Go off and map that segment. */ + beq .do_stab_bolted +stab_bolted_user_return: + EXCEPTION_PROLOG_COMMON + ld r3,_DSISR(r1) + andis. r0,r3,0xa450 /* weird error? */ + bne 1f /* if not, try to put a PTE */ + andis. r0,r3,0x0020 /* Is it a page table fault? */ + rlwinm r4,r3,32-23,29,29 /* DSISR_STORE -> _PAGE_RW */ + ld r3,_DAR(r1) /* into the hash table */ + + beq 2f /* If so handle it */ + li r4,0x300 /* Trap number */ + bl .do_stab_SI + b 1f + +2: bl .do_hash_page_DSI /* Try to handle as hpte fault */ +1: + ld r4,_DAR(r1) + ld r5,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef DO_SOFT_DISABLE + ld r20,SOFTE(r1) /* Copy saved SOFTE bit */ +#else + rldicl r20,r23,49,63 /* copy EE bit from saved MSR */ +#endif + li r6,0x300 + bl .save_remaining_regs + bl .do_page_fault + b .ret_from_except + + .globl DataAccessSLB_common +DataAccessSLB_common: + mfspr r22,DAR + srdi r22,r22,60 + cmpi 0,r22,0xc + + /* Segment fault on a bolted segment. Go off and map that segment. */ + beq .do_slb_bolted + + EXCEPTION_PROLOG_COMMON + ld r3,_DAR(r1) + li r4,0x380 /* Exception vector */ + bl .ste_allocate + or. r3,r3,r3 /* Check return code */ + beq fast_exception_return /* Return if we succeeded */ + addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef DO_SOFT_DISABLE + ld r20,SOFTE(r1) +#else + rldicl r20,r23,49,63 /* copy EE bit from saved MSR */ +#endif + li r6,0x380 + bl .save_remaining_regs + bl .do_page_fault + b .ret_from_except + + .globl InstructionAccess_common +InstructionAccess_common: + EXCEPTION_PROLOG_COMMON + + andis. r0,r23,0x0020 /* no ste found? */ + beq 2f + mr r3,r22 /* SRR0 at interrupt */ + li r4,0x400 /* Trap number */ + bl .do_stab_SI + b 1f + +2: andis. r0,r23,0x4000 /* no pte found? */ + beq 1f /* if so, try to put a PTE */ + mr r3,r22 /* into the hash table */ + bl .do_hash_page_ISI /* Try to handle as hpte fault */ +1: + mr r4,r22 + mr r5,r23 + addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef DO_SOFT_DISABLE + ld r20,SOFTE(r1) +#else + rldicl r20,r23,49,63 /* copy EE bit from saved MSR */ +#endif + li r6,0x400 + bl .save_remaining_regs + bl .do_page_fault + b .ret_from_except + + .globl InstructionAccessSLB_common +InstructionAccessSLB_common: + EXCEPTION_PROLOG_COMMON + mr r3,r22 /* SRR0 = NIA */ + li r4,0x480 /* Exception vector */ + bl .ste_allocate + or. r3,r3,r3 /* Check return code */ + beq fast_exception_return /* Return if we succeeded */ + + addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef DO_SOFT_DISABLE + ld r20,SOFTE(r1) +#else + rldicl r20,r23,49,63 /* copy EE bit from saved MSR */ +#endif + li r6,0x380 + bl .save_remaining_regs + bl .do_page_fault + b .ret_from_except + + .globl HardwareInterrupt_common +HardwareInterrupt_common: + EXCEPTION_PROLOG_COMMON +HardwareInterrupt_entry: + addi r3,r1,STACK_FRAME_OVERHEAD + li r20,0 + li r6,0x500 + bl .save_remaining_regs + /* Determine if need to run do_irq on a hardware interrupt stack */ + /* The first invocation of do_irq will occur on the kernel */ + /* stack in the current stack */ + /* All other invocations of do_irq will run on the hardware */ + /* interrupt stack associated with the PACA of the current */ + /* processor. */ + /* */ + /* The call to do_irq will preserve the value of r14 - r31 */ + /* */ + mfspr r20,SPRG3 /* get Paca */ + lbz r21,PACAHRDWINTCOUNT(r20) /* get hardware interrupt cnt */ + cmpi 0,r21,1 /* */ + addi r21,r21,1 /* incr hardware interrupt cnt*/ + stb r21,PACAHRDWINTCOUNT(r20) /* */ + bne 2f /* */ + + mr r14,r1 /* preserve current r1 */ + ld r1,PACAHRDWINTSTACK(r20) /* */ + std r14,0(r1) /* set the back chain */ + bl .do_IRQ + lbz r22,PACAHRDWINTCOUNT(r20) /* get hardware interrupt cnt */ + cmp 0,r22,r21 /* debug test */ + bne 3f + subi r21,r21,1 + stb r21,PACAHRDWINTCOUNT(r20) /* */ + mr r1,r14 /* */ + b .ret_from_except + +2: + bl .do_IRQ + + lbz r22,PACAHRDWINTCOUNT(r20) /* get hardware interrupt cnt */ + cmp 0,r22,r21 /* debug test */ + bne 3f /* */ + subi r21,r21,1 /* decr hardware interrupt cnt*/ + stb r21,PACAHRDWINTCOUNT(r20) /* */ + + b .ret_from_except + +3: + /* error - counts out of sync */ +#ifdef CONFIG_XMON + bl .xmon +#endif +4: b 4b + + + .globl Alignment_common +Alignment_common: + EXCEPTION_PROLOG_COMMON + addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef DO_SOFT_DISABLE + ld r20,SOFTE(r1) +#else + rldicl r20,r23,49,63 /* copy EE bit from saved MSR */ +#endif + li r6,0x600 + bl .save_remaining_regs + bl .AlignmentException + b .ret_from_except + + .globl ProgramCheck_common +ProgramCheck_common: + EXCEPTION_PROLOG_COMMON + addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef DO_SOFT_DISABLE + ld r20,SOFTE(r1) +#else + rldicl r20,r23,49,63 /* copy EE bit from saved MSR */ +#endif + li r6,0x700 + bl .save_remaining_regs + bl .ProgramCheckException + b .ret_from_except + + .globl FPUnavailable_common +FPUnavailable_common: + EXCEPTION_PROLOG_COMMON + bne .load_up_fpu /* if from user, just load it up */ + li r20,0 + li r6,0x800 + bl .save_remaining_regs /* if from kernel, take a trap */ + bl .KernelFP + b .ret_from_except + + .globl SystemCall_common +SystemCall_common: + EXCEPTION_PROLOG_COMMON +#ifdef CONFIG_PPC_ISERIES + cmpi 0,r0,0x5555 /* Special syscall to handle pending */ + bne+ 1f /* interrupts */ + andi. r6,r23,MSR_PR /* Only allowed from kernel */ + beq+ HardwareInterrupt_entry +1: +#endif + std r3,ORIG_GPR3(r1) +#ifdef DO_SOFT_DISABLE + ld r20,SOFTE(r1) +#else + rldicl r20,r23,49,63 /* copy EE bit from saved MSR */ +#endif + li r6,0xC00 + bl .save_remaining_regs + bl .DoSyscall + b .ret_from_except + +_GLOBAL(do_hash_page_ISI) + li r4,0 +_GLOBAL(do_hash_page_DSI) + rlwimi r4,r23,32-13,30,30 /* Insert MSR_PR as _PAGE_USER */ + ori r4,r4,1 /* add _PAGE_PRESENT */ + + mflr r21 /* Save LR in r21 */ + +#ifdef DO_SOFT_DISABLE + /* + * We hard enable here (but first soft disable) so that the hash_page + * code can spin on the hash_table_lock with problem on a shared + * processor. + */ + li r0,0 + stb r0,PACAPROCENABLED(r20) /* Soft Disabled */ + + mfmsr r0 + ori r0,r0,MSR_EE+MSR_RI + mtmsrd r0 /* Hard Enable, RI on */ +#endif + + /* + * r3 contains the faulting address + * r4 contains the required access permissions + * + * at return r3 = 0 for success + */ + + bl .hash_page /* build HPTE if possible */ + +#ifdef DO_SOFT_DISABLE + /* + * Now go back to hard disabled. + */ + mfmsr r0 + li r4,0 + ori r4,r4,MSR_EE+MSR_RI + andc r0,r0,r4 + mtmsrd r0 /* Hard Disable, RI off */ + + ld r0,SOFTE(r1) + cmpdi 0,r0,0 /* See if we will soft enable in */ + /* save_remaining_regs */ + beq 5f + CHECKANYINT(r4,r5) + bne- HardwareInterrupt_entry /* Convert this DSI into an External */ + /* to process interrupts which occurred */ + /* during hash_page */ +5: + stb r0,PACAPROCENABLED(r20) /* Restore soft enable/disable status */ +#endif + or. r3,r3,r3 /* Check return code */ + beq fast_exception_return /* Return from exception on success */ + + mtlr r21 /* restore LR */ + blr /* Return to DSI or ISI on failure */ + +/* + * r20 points to the PACA, r21 to the exception frame, + * r23 contains the saved CR. + * r20 - r23, SRR0 and SRR1 are saved in the exception frame. + * We assume we aren't going to take any exceptions during this procedure. + */ +_GLOBAL(do_stab_bolted) + std r23,EX_DAR(r21) /* save CR in exc. frame */ + + mfspr r22,DSISR + andis. r22,r22,0x0020 + bne+ 2f + ld r22,8(r21) /* get SRR1 */ + andi. r22,r22,MSR_PR /* check if from user */ + bne+ stab_bolted_user_return /* from user, send the error on up */ + li r3,0 +#ifdef CONFIG_XMON + bl .xmon +#endif +1: b 1b +2: + /* (((ea >> 28) & 0x1fff) << 15) | (ea >> 60) */ + mfspr r21,DAR + rldicl r20,r21,36,32 /* Permits a full 32b of ESID */ + rldicr r20,r20,15,48 + rldicl r21,r21,4,60 + or r20,r20,r21 + + li r21,9 /* VSID_RANDOMIZER */ + sldi r21,r21,32 + oris r21,r21,58231 + ori r21,r21,39831 + + mulld r20,r20,r21 + clrldi r20,r20,28 /* r20 = vsid */ + + mfsprg r21,3 + ld r21,PACASTABVIRT(r21) + + /* Hash to the primary group */ + mfspr r22,DAR + rldicl r22,r22,36,59 + rldicr r22,r22,7,56 + or r21,r21,r22 /* r21 = first ste of the group */ + + /* Search the primary group for a free entry */ + li r22,0 +1: + ld r23,0(r21) /* Test valid bit of the current ste */ + rldicl r23,r23,57,63 + cmpwi r23,0 + bne 2f + ld r23,8(r21) /* Get the current vsid part of the ste */ + rldimi r23,r20,12,0 /* Insert the new vsid value */ + std r23,8(r21) /* Put new entry back into the stab */ + eieio /* Order vsid update */ + ld r23,0(r21) /* Get the esid part of the ste */ + mfspr r20,DAR /* Get the new esid */ + rldicl r20,r20,36,28 /* Permits a full 36b of ESID */ + rldimi r23,r20,28,0 /* Insert the new esid value */ + ori r23,r23,144 /* Turn on valid and kp */ + std r23,0(r21) /* Put new entry back into the stab */ + sync /* Order the update */ + b 3f +2: + addi r22,r22,1 + addi r21,r21,16 + cmpldi r22,7 + ble 1b + + /* Stick for only searching the primary group for now. */ + /* At least for now, we use a very simple random castout scheme */ + /* Use the TB as a random number ; OR in 1 to avoid entry 0 */ + mftb r22 + andi. r22,r22,7 + ori r22,r22,1 + sldi r22,r22,4 + + /* r21 currently points to and ste one past the group of interest */ + /* make it point to the randomly selected entry */ + subi r21,r21,128 + or r21,r21,r22 /* r21 is the entry to invalidate */ + + isync /* mark the entry invalid */ + ld r23,0(r21) + li r22,-129 + and r23,r23,r22 + std r23,0(r21) + sync + + ld r23,8(r21) + rldimi r23,r20,12,0 + std r23,8(r21) + eieio + + ld r23,0(r21) /* Get the esid part of the ste */ + mr r22,r23 + mfspr r20,DAR /* Get the new esid */ + rldicl r20,r20,36,28 /* Permits a full 32b of ESID */ + rldimi r23,r20,28,0 /* Insert the new esid value */ + ori r23,r23,144 /* Turn on valid and kp */ + std r23,0(r21) /* Put new entry back into the stab */ + + rldicl r22,r22,36,28 + rldicr r22,r22,28,35 + slbie r22 + sync + +3: + /* All done -- return from exception. */ + mfsprg r20,3 /* Load the PACA pointer */ + ld r21,PACAEXCSP(r20) /* Get the exception frame pointer */ + addi r21,r21,EXC_FRAME_SIZE + ld r23,EX_DAR(r21) /* get saved CR */ + /* note that this is almost identical to maskable_exception_exit */ + mtcr r23 /* restore CR */ + ld r22,EX_SRR0(r21) /* Get SRR0 from exc. frame */ + ld r23,EX_SRR1(r21) /* Get SRR1 from exc. frame */ + mtspr SRR0,r22 + mtspr SRR1,r23 + ld r22,EX_R22(r21) /* restore r22 and r23 */ + ld r23,EX_R23(r21) + mfspr r20,SPRG2 + mfspr r21,SPRG1 + rfid +_TRACEBACK(do_stab_bolted) + +/* + * r20 points to the PACA, r21 to the exception frame, + * r23 contains the saved CR. + * r20 - r23, SRR0 and SRR1 are saved in the exception frame. + * We assume we aren't going to take any exceptions during this procedure. + */ +_GLOBAL(do_slb_bolted) + std r23,48(r21) /* save CR in exc. frame */ + + /* (((ea >> 28) & 0x1fff) << 15) | (ea >> 60) */ + mfspr r21,DAR + rldicl r20,r21,36,32 /* Permits a full 32b of ESID */ + rldicr r20,r20,15,48 + rldicl r21,r21,4,60 + or r20,r20,r21 + + li r21,9 /* VSID_RANDOMIZER */ + sldi r21,r21,32 + oris r21,r21,58231 + ori r21,r21,39831 + + mulld r20,r20,r21 + clrldi r20,r20,28 /* r20 = vsid */ + + /* Search the SLB for a free entry */ + li r22,1 +1: + slbmfee r23,r22 + rldicl r23,r23,37,63 + cmpwi r23,0 + beq 3f /* Found an invalid entry */ + + addi r22,r22,1 + cmpldi r22,64 + blt 1b + + /* No free entry - just take the next entry, round-robin */ + /* XXX we should get the number of SLB entries from the naca */ +SLB_NUM_ENTRIES = 64 + mfspr r21,SPRG3 + ld r22,PACASTABRR(r21) + addi r23,r22,1 + cmpdi r23,SLB_NUM_ENTRIES + blt 2f + li r23,1 +2: std r23,PACASTABRR(r21) + + /* r20 = vsid, r22 = entry */ +3: + /* Put together the vsid portion of the entry. */ + li r21,0 + rldimi r21,r20,12,0 + ori r20,r21,1024 +#ifndef CONFIG_PPC_ISERIES + ori r20,r20,256 /* map kernel region with large ptes */ +#endif + + /* Put together the esid portion of the entry. */ + mfspr r21,DAR /* Get the new esid */ + rldicl r21,r21,36,28 /* Permits a full 36b of ESID */ + li r23,0 + rldimi r23,r21,28,0 /* Insert esid */ + oris r21,r23,2048 /* valid bit */ + rldimi r21,r22,0,52 /* Insert entry */ + + isync + slbmte r20,r21 + isync + + /* All done -- return from exception. */ + mfsprg r20,3 /* Load the PACA pointer */ + ld r21,PACAEXCSP(r20) /* Get the exception frame pointer */ + addi r21,r21,EXC_FRAME_SIZE + ld r23,EX_DAR(r21) /* get saved CR */ + /* note that this is almost identical to maskable_exception_exit */ + mtcr r23 /* restore CR */ + ld r22,EX_SRR0(r21) /* Get SRR0 from exc. frame */ + ld r23,EX_SRR1(r21) /* Get SRR1 from exc. frame */ + mtspr SRR0,r22 + mtspr SRR1,r23 + ld r22,EX_R22(r21) /* restore r22 and r23 */ + ld r23,EX_R23(r21) + mfspr r20,SPRG2 + mfspr r21,SPRG1 + rfid +_TRACEBACK(do_slb_bolted) + +_GLOBAL(do_stab_SI) + mflr r21 /* Save LR in r21 */ + + /* + * r3 contains the faulting address + * r4 contains the required access permissions + * + * at return r3 = 0 for success + */ + + bl .ste_allocate /* build STE if possible */ + or. r3,r3,r3 /* Check return code */ + beq fast_exception_return /* Return from exception on success */ + mtlr r21 /* restore LR */ + blr /* Return to DSI or ISI on failure */ + +/* + * This code finishes saving the registers to the exception frame. + * Address translation is already on. + */ +_GLOBAL(save_remaining_regs) + /* + * Save the rest of the registers into the pt_regs structure + */ + std r22,_NIP(r1) + std r23,_MSR(r1) + std r6,TRAP(r1) + ld r6,GPR6(r1) + SAVE_2GPRS(14, r1) + SAVE_4GPRS(16, r1) + SAVE_8GPRS(24, r1) + + /* + * Clear the RESULT field + */ + li r22,0 + std r22,RESULT(r1) + + /* + * Test if from user state; result will be tested later + */ + andi. r23,r23,MSR_PR /* Set CR for later branch */ + + /* + * Indicate that r1 contains the kernel stack and + * get the Kernel TOC and CURRENT pointers from the Paca + */ + mfspr r23,SPRG3 /* Get PACA */ + std r22,PACAKSAVE(r23) /* r1 is now kernel sp */ + ld r2,PACATOC(r23) /* Get Kernel TOC pointer */ + + /* + * If from user state, update THREAD.regs + */ + beq 2f /* Modify THREAD.regs if from user */ + addi r24,r1,STACK_FRAME_OVERHEAD + std r24,THREAD+PT_REGS(r13) +2: + SET_REG_TO_CONST(r22, MSR_KERNEL) + +#ifdef DO_SOFT_DISABLE + stb r20,PACAPROCENABLED(r23) /* possibly soft enable */ + ori r22,r22,MSR_EE /* always hard enable */ +#else + rldimi r22,r20,15,48 /* Insert desired EE value */ +#endif + + mtmsrd r22 + blr + + +do_profile: + ld r22,8(r21) /* Get SRR1 */ + andi. r22,r22,MSR_PR /* Test if in kernel */ + bnelr /* return if not in kernel */ + ld r22,0(r21) /* Get SRR0 */ + ld r25,PACAPROFSTEXT(r20) /* _stext */ + subf r22,r25,r22 /* offset into kernel */ + lwz r25,PACAPROFSHIFT(r20) + srd r22,r22,r25 + lwz r25,PACAPROFLEN(r20) /* length of profile table (-1) */ + cmp 0,r22,r25 /* off end? */ + ble 1f + mr r22,r25 /* force into last entry */ +1: sldi r22,r22,2 /* convert to offset into buffer */ + ld r25,PACAPROFBUFFER(r20) /* profile buffer */ + add r25,r25,r22 +2: lwarx r22,0,r25 /* atomically increment */ + addi r22,r22,1 + stwcx. r22,0,r25 + bne- 2b + blr + + +/* + * On pSeries, secondary processors spin in the following code. + * At entry, r3 = this processor's number (in Linux terms, not hardware). + */ +_GLOBAL(pseries_secondary_smp_init) + + /* turn on 64-bit mode */ + bl .enable_64b_mode + isync + + /* Set up a Paca value for this processor. */ + LOADADDR(r24, xPaca) /* Get base vaddr of Paca array */ + mulli r25,r3,PACA_SIZE /* Calculate vaddr of right Paca */ + add r25,r25,r24 /* for this processor. */ + + mtspr SPRG3,r25 /* Save vaddr of Paca in SPRG3 */ + mr r24,r3 /* __secondary_start needs cpu# */ + +1: + HMT_LOW + lbz r23,PACAPROCSTART(r25) /* Test if this processor should */ + /* start. */ + sync + + /* Create a temp kernel stack for use before relocation is on. */ + mr r1,r25 + addi r1,r1,PACAGUARD + addi r1,r1,0x1000 + subi r1,r1,STACK_FRAME_OVERHEAD + + cmpi 0,r23,0 +#ifdef CONFIG_SMP +#ifdef SECONDARY_PROCESSORS + bne .__secondary_start +#endif +#endif + b 1b /* Loop until told to go */ + +_GLOBAL(__start_initialization_iSeries) + + LOADADDR(r1,init_task_union) + addi r1,r1,TASK_UNION_SIZE + li r0,0 + stdu r0,-STACK_FRAME_OVERHEAD(r1) + + LOADADDR(r2,__toc_start) + addi r2,r2,0x4000 + addi r2,r2,0x4000 + + LOADADDR(r9,naca) + SET_REG_TO_CONST(r4, KERNELBASE) + addi r4,r4,0x4000 + std r4,0(r9) /* set the naca pointer */ + + /* Get the pointer to the segment table */ + ld r6,PACA(r4) /* Get the base Paca pointer */ + ld r4,PACASTABVIRT(r6) + + bl .iSeries_fixup_klimit + + b .start_here_common + +_GLOBAL(__start_initialization_pSeries) + mr r31,r3 /* save parameters */ + mr r30,r4 + mr r29,r5 + mr r28,r6 + mr r27,r7 + mr r26,r8 /* YABOOT: debug_print() routine */ + mr r25,r9 /* YABOOT: debug_delay() routine */ + mr r24,r10 /* YABOOT: debug_prom() routine */ + + bl .enable_64b_mode + + /* put a relocation offset into r3 */ + bl .reloc_offset + + LOADADDR(r2,__toc_start) + addi r2,r2,0x4000 + addi r2,r2,0x4000 + + /* Relocate the TOC from a virt addr to a real addr */ + sub r2,r2,r3 + + /* setup the naca pointer which is needed by prom_init */ + LOADADDR(r9,naca) + sub r9,r9,r3 /* addr of the variable naca */ + + SET_REG_TO_CONST(r4, KERNELBASE) + sub r4,r4,r3 + addi r4,r4,0x4000 + std r4,0(r9) /* set the value of naca */ + + /* DRENG / PPPBBB Fix the following comment!!! -Peter */ + /* The following copies the first 0x100 bytes of code from the */ + /* load addr to physical addr 0x0. This code causes secondary */ + /* processors to spin until a flag in the PACA is set. This */ + /* is done at this time rather than with the entire kernel */ + /* relocation which is done below because we need to cause the */ + /* processors to spin on code that is not going to move while OF */ + /* is still alive. Although the spin code is not actually run on */ + /* a uniprocessor, we always do this copy. */ + SET_REG_TO_CONST(r4, KERNELBASE)/* Src addr */ + sub r4,r4,r3 /* current address of __start */ + /* the source addr */ + li r3,0 /* Dest addr */ + li r5,0x100 /* # bytes of memory to copy */ + li r6,0 /* Destination offset */ + bl .copy_and_flush /* copy the first 0x100 bytes */ + + mr r3,r31 + mr r4,r30 + mr r5,r29 + mr r6,r28 + mr r7,r27 + mr r8,r26 + mr r9,r25 + mr r10,r24 + + bl .prom_init + + li r24,0 /* cpu # */ + +/* + * At this point, r3 contains the physical address we are running at, + * returned by prom_init() + */ +_STATIC(__after_prom_start) + +/* + * We need to run with __start at physical address 0. + * This will leave some code in the first 256B of + * real memory, which are reserved for software use. + * The remainder of the first page is loaded with the fixed + * interrupt vectors. The next two pages are filled with + * unknown exception placeholders. + * + * Note: This process overwrites the OF exception vectors. + * r26 == relocation offset + * r27 == KERNELBASE + */ + bl .reloc_offset + mr r26,r3 + SET_REG_TO_CONST(r27,KERNELBASE) + + li r3,0 /* target addr */ + + sub r4,r27,r26 /* source addr */ + /* current address of _start */ + /* i.e. where we are running */ + /* the source addr */ + + LOADADDR(r5,copy_to_here) /* # bytes of memory to copy */ + sub r5,r5,r27 + + li r6,0x100 /* Start offset, the first 0x100 */ + /* bytes were copied earlier. */ + + bl .copy_and_flush /* copy the first n bytes */ + /* this includes the code being */ + /* executed here. */ + + li r0,4f@l /* Jump to the copy of this code */ + mtctr r0 /* that we just made */ + bctr + +4: LOADADDR(r9,rtas) + sub r9,r9,r26 + ld r5,RTASBASE(r9) /* get the value of rtas->base */ + ld r9,RTASSIZE(r9) /* get the value of rtas->size */ + bl .copy_and_flush /* copy upto rtas->base */ + add r6,r6,r9 /* then skip over rtas->size bytes */ + + LOADADDR(r5,klimit) + sub r5,r5,r26 + ld r5,0(r5) /* get the value of klimit */ + sub r5,r5,r27 + bl .copy_and_flush /* copy the rest */ + b .start_here_pSeries + +/* + * Copy routine used to copy the kernel to start at physical address 0 + * and flush and invalidate the caches as needed. + * r3 = dest addr, r4 = source addr, r5 = copy limit, r6 = start offset + * on exit, r3, r4, r5 are unchanged, r6 is updated to be >= r5. + * + * Note: this routine *only* clobbers r0, r6 and lr + */ +_STATIC(copy_and_flush) + addi r5,r5,-8 + addi r6,r6,-8 +4: li r0,16 /* Use the least common */ + /* denominator cache line */ + /* size. This results in */ + /* extra cache line flushes */ + /* but operation is correct. */ + /* Can't get cache line size */ + /* from NACA as it is being */ + /* moved too. */ + + mtctr r0 /* put # words/line in ctr */ +3: addi r6,r6,8 /* copy a cache line */ + ldx r0,r6,r4 + stdx r0,r6,r3 + bdnz 3b + dcbst r6,r3 /* write it to memory */ + sync + icbi r6,r3 /* flush the icache line */ + cmpld 0,r6,r5 + blt 4b + sync + addi r5,r5,8 + addi r6,r6,8 + blr + +.align 8 +copy_to_here: + +/* + * Disable FP for the task which had the FPU previously, + * and save its floating-point registers in its thread_struct. + * Enables the FPU for use in the kernel on return. + * On SMP we know the fpu is free, since we give it up every + * switch. -- Cort + */ +_STATIC(load_up_fpu) + mfmsr r5 /* grab the current MSR */ + ori r5,r5,MSR_FP + mtmsrd r5 /* enable use of fpu now */ + isync +/* + * For SMP, we don't do lazy FPU switching because it just gets too + * horrendously complex, especially when a task switches from one CPU + * to another. Instead we call giveup_fpu in switch_to. + * + */ +#ifndef CONFIG_SMP + LOADBASE(r3,last_task_used_math) + ld r4,last_task_used_math@l(r3) + cmpi 0,r4,0 + beq 1f + addi r4,r4,THREAD /* want THREAD of last_task_used_math */ + SAVE_32FPRS(0, r4) + mffs fr0 + stfd fr0,THREAD_FPSCR-4(r4) + ld r5,PT_REGS(r4) + ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) + li r20,MSR_FP|MSR_FE0|MSR_FE1 + andc r4,r4,r20 /* disable FP for previous task */ + std r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#endif /* CONFIG_SMP */ + /* enable use of FP after return */ + ori r23,r23,MSR_FP|MSR_FE0|MSR_FE1 + addi r5,r13,THREAD /* Get THREAD */ + lfd fr0,THREAD_FPSCR-4(r5) + mtfsf 0xff,fr0 + REST_32FPRS(0, r5) +#ifndef CONFIG_SMP + subi r4,r5,THREAD /* Back to 'current' */ + std r4,last_task_used_math@l(r3) +#endif /* CONFIG_SMP */ + /* restore registers and return */ + b fast_exception_return + +/* + * FP unavailable trap from kernel - print a message, but let + * the task use FP in the kernel until it returns to user mode. + */ +_GLOBAL(KernelFP) + ld r3,_MSR(r1) + ori r3,r3,MSR_FP + std r3,_MSR(r1) /* enable use of FP after return */ + LOADADDR(r3,86f) + mfspr r4,SPRG3 /* Get PACA */ + ld r4,PACACURRENT(r4) /* current */ + ld r5,_NIP(r1) + b .ret_from_except +86: .string "floating point used in kernel (task=%p, pc=%x)\n" + .align 4 + +/* + * giveup_fpu(tsk) + * Disable FP for the task given as the argument, + * and save the floating-point registers in its thread_struct. + * Enables the FPU for use in the kernel on return. + */ +_GLOBAL(giveup_fpu) + mfmsr r5 + ori r5,r5,MSR_FP + mtmsrd r5 /* enable use of fpu now */ + isync + cmpi 0,r3,0 + beqlr- /* if no previous owner, done */ + addi r3,r3,THREAD /* want THREAD of task */ + ld r5,PT_REGS(r3) + cmpi 0,r5,0 + SAVE_32FPRS(0, r3) + mffs fr0 + stfd fr0,THREAD_FPSCR-4(r3) + beq 1f + ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) + li r3,MSR_FP|MSR_FE0|MSR_FE1 + andc r4,r4,r3 /* disable FP for previous task */ + std r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#ifndef CONFIG_SMP + li r5,0 + LOADBASE(r4,last_task_used_math) + std r5,last_task_used_math@l(r4) +#endif /* CONFIG_SMP */ + blr + + + +#ifdef CONFIG_SMP +/* + * This function is called after the master CPU has released the + * secondary processors. The execution environment is relocation off. + * The Paca for this processor has the following fields initialized at + * this point: + * 1. Processor number + * 2. Segment table pointer (virtual address) + * On entry the following are set: + * r1 = stack pointer. vaddr for iSeries, raddr (temp stack) for pSeries + * r24 = cpu# (in Linux terms) + * r25 = Paca virtual address + * SPRG3 = Paca virtual address + */ +_GLOBAL(__secondary_start) + + HMT_MEDIUM /* Set thread priority to MEDIUM */ + + /* set up the TOC (virtual address) */ + LOADADDR(r2,__toc_start) + addi r2,r2,0x4000 + addi r2,r2,0x4000 + + std r2,PACATOC(r25) + li r6,0 + std r6,PACAKSAVE(r25) + stb r6,PACAPROCENABLED(r25) + +#ifndef CONFIG_PPC_ISERIES + /* Initialize the page table pointer register. */ + LOADADDR(r6,_SDR1) + ld r6,0(r6) /* get the value of _SDR1 */ + mtspr SDR1,r6 /* set the htab location */ +#endif + /* Initialize the first segment table (or SLB) entry */ + ld r3,PACASTABVIRT(r25) /* get addr of segment table */ + bl .stab_initialize + + /* Initialize the kernel stack. Just a repeat for iSeries. */ + LOADADDR(r3,current_set) + sldi r28,r24,4 /* get current_set[cpu#] */ + ldx r13,r3,r28 + std r13,PACACURRENT(r25) + addi r1,r13,TASK_UNION_SIZE + subi r1,r1,STACK_FRAME_OVERHEAD + + ld r3,PACASTABREAL(r25) /* get raddr of segment table */ + ori r4,r3,1 /* turn on valid bit */ + +#ifdef CONFIG_PPC_ISERIES + li r0,-1 /* hypervisor call */ + li r3,1 + sldi r3,r3,63 /* 0x8000000000000000 */ + ori r3,r3,4 /* 0x8000000000000004 */ + sc /* HvCall_setASR */ +#else + mtasr r4 /* set the stab location */ +#endif + li r7,0 + mtlr r7 + + /* enable MMU and jump to start_secondary */ + LOADADDR(r3,.start_secondary_prolog) + SET_REG_TO_CONST(r4, MSR_KERNEL) +#ifdef DO_SOFT_DISABLE + ori r4,r4,MSR_EE +#endif + mtspr SRR0,r3 + mtspr SRR1,r4 + rfid +#endif /* CONFIG_SMP */ + +/* + * Running with relocation on at this point. All we want to do is + * zero the stack back-chain pointer before going into C code. + */ +_GLOBAL(start_secondary_prolog) + li r3,0 + std r3,0(r1) /* Zero the stack frame pointer */ + bl .start_secondary + +/* + * This subroutine clobbers r11, r12 and the LR + */ +_GLOBAL(enable_64b_mode) + mfmsr r11 /* grab the current MSR */ + li r12,1 + rldicr r12,r12,MSR_SF_LG,(63-MSR_SF_LG) + or r11,r11,r12 + li r12,1 + rldicr r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG) + or r11,r11,r12 + mtmsrd r11 + isync + blr + +/* + * This subroutine clobbers r11, r12 and the LR + */ +_GLOBAL(enable_32b_mode) + mfmsr r11 /* grab the current MSR */ + li r12,1 + rldicr r12,r12,MSR_SF_LG,(63-MSR_SF_LG) + andc r11,r11,r12 + li r12,1 + rldicr r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG) + andc r11,r11,r12 + mtmsrd r11 + isync + blr + + +/* + * This is where the main kernel code starts. + */ +_STATIC(start_here_pSeries) + /* get a new offset, now that the kernel has moved. */ + bl .reloc_offset + mr r26,r3 + + /* setup the naca pointer which is needed by *tab_initialize */ + LOADADDR(r6,naca) + sub r6,r6,r26 /* addr of the variable naca */ + li r27,0x4000 + std r27,0(r6) /* set the value of naca */ + +#ifdef CONFIG_HMT + /* Start up the second thread on cpu 0 */ + mfspr r3,PVR + srwi r3,r3,16 + cmpwi r3,0x34 /* Pulsar */ + beq 90f + cmpwi r3,0x36 /* Icestar */ + beq 90f + cmpwi r3,0x37 /* SStar */ + beq 90f + b 91f /* HMT not supported */ +90: li r3,0 + bl .hmt_start_secondary +91: +#endif + +#ifdef CONFIG_SMP + /* All secondary cpus are now spinning on a common + * spinloop, release them all now so they can start + * to spin on their individual Paca spinloops. + * For non SMP kernels, the secondary cpus never + * get out of the common spinloop. + */ + li r3,1 + LOADADDR(r5,__secondary_hold_spinloop) + tophys(r4,r5) + std r3,0(r4) +#endif + + /* The following gets the stack and TOC set up with the regs */ + /* pointing to the real addr of the kernel stack. This is */ + /* all done to support the C function call below which sets */ + /* up the htab. This is done because we have relocated the */ + /* kernel but are still running in real mode. */ + + /* real ptr to current */ + LOADADDR(r3,init_task_union) + sub r3,r3,r26 + + /* set up a stack pointer (physical address) */ + addi r1,r3,TASK_UNION_SIZE + li r0,0 + stdu r0,-STACK_FRAME_OVERHEAD(r1) + + /* set up the TOC (physical address) */ + LOADADDR(r2,__toc_start) + addi r2,r2,0x4000 + addi r2,r2,0x4000 + sub r2,r2,r26 + + /* Init naca->debug_switch so it can be used in stab & htab init. */ + bl .ppcdbg_initialize + + /* Get the pointer to the segment table which is used by */ + /* stab_initialize */ + li r27,0x4000 + ld r6,PACA(r27) /* Get the base Paca pointer */ + sub r6,r6,r26 /* convert to physical addr */ + mtspr SPRG3,r6 /* PPPBBB: Temp... -Peter */ + ld r3,PACASTABREAL(r6) + ori r4,r3,1 /* turn on valid bit */ + mtasr r4 /* set the stab location */ + + /* Initialize an initial memory mapping and turn on relocation. */ + bl .stab_initialize + bl .htab_initialize + + LOADADDR(r6,_SDR1) + sub r6,r6,r26 + ld r6,0(r6) /* get the value of _SDR1 */ + mtspr SDR1,r6 /* set the htab location */ + + LOADADDR(r3,.start_here_common) + SET_REG_TO_CONST(r4, MSR_KERNEL) + mtspr SRR0,r3 + mtspr SRR1,r4 + rfid + + /* This is where all platforms converge execution */ +_STATIC(start_here_common) + /* relocation is on at this point */ + + /* Clear out the BSS */ + LOADADDR(r11,_end) + + LOADADDR(r8,__bss_start) + + sub r11,r11,r8 /* bss size */ + addi r11,r11,7 /* round up to an even double word */ + rldicl. r11,r11,61,3 /* shift right by 3 */ + beq 4f + addi r8,r8,-8 + li r0,0 + mtctr r11 /* zero this many doublewords */ +3: stdu r0,8(r8) + bdnz 3b +4: + + /* The following code sets up the SP and TOC now that we are */ + /* running with translation enabled. */ + + /* ptr to current */ + LOADADDR(r3,init_task_union) + + /* set up the stack */ + addi r1,r3,TASK_UNION_SIZE + li r0,0 + stdu r0,-STACK_FRAME_OVERHEAD(r1) + + /* set up the TOC */ + LOADADDR(r2,__toc_start) + addi r2,r2,0x4000 + addi r2,r2,0x4000 + + /* setup the naca pointer */ + LOADADDR(r9,naca) + + SET_REG_TO_CONST(r8, KERNELBASE) + addi r8,r8,0x4000 + std r8,0(r9) /* set the value of the naca ptr */ + + LOADADDR(r4,naca) /* Get Naca ptr address */ + ld r4,0(r4) /* Get the location of the naca */ + ld r4,PACA(r4) /* Get the base Paca pointer */ + mtspr SPRG3,r4 + + /* ptr to current */ + LOADADDR(r13,init_task_union) + std r13,PACACURRENT(r4) + + std r2,PACATOC(r4) + li r5,0 + std r0,PACAKSAVE(r4) + + /* ptr to hardware interrupt stack for processor 0 */ + LOADADDR(r3, hardware_int_paca0) + li r5,0x1000 + sldi r5,r5,3 + subi r5,r5,STACK_FRAME_OVERHEAD + + add r3,r3,r5 + std r3,PACAHRDWINTSTACK(r4) + + li r3,0 + stb r3,PACAHRDWINTCOUNT(r4) + + + /* + * Restore the parms passed in from the bootloader. + */ + mr r3,r31 + mr r4,r30 + mr r5,r29 + mr r6,r28 + mr r7,r27 + + bl .setup_system + + /* Load up the kernel context */ +5: +#ifdef DO_SOFT_DISABLE + mfspr r4,SPRG3 + li r5,0 + stb r5,PACAPROCENABLED(r4) /* Soft Disabled */ + mfmsr r5 + ori r5,r5,MSR_EE /* Hard Enabled */ + mtmsrd r5 +#endif + + bl .start_kernel + +_GLOBAL(hmt_init) +#ifdef CONFIG_HMT + LOADADDR(r5, hmt_thread_data) + mfspr r7,PVR + srwi r7,r7,16 + cmpwi r7,0x34 /* Pulsar */ + beq 90f + cmpwi r7,0x36 /* Icestar */ + beq 91f + cmpwi r7,0x37 /* SStar */ + beq 91f + b 101f +90: mfspr r6,PIR + andi. r6,r6,0x1f + b 92f +91: mfspr r6,PIR + andi. r6,r6,0x3ff +92: sldi r4,r24,3 + stwx r6,r5,r4 + bl .hmt_start_secondary + b 101f + +__hmt_secondary_hold: + LOADADDR(r5, hmt_thread_data) + clrldi r5,r5,4 + li r7,0 + mfspr r6,PIR + mfspr r8,PVR + srwi r8,r8,16 + cmpwi r8,0x34 + bne 93f + andi. r6,r6,0x1f + b 103f +93: andi. r6,r6,0x3f + +103: lwzx r8,r5,r7 + cmpw r8,r6 + beq 104f + addi r7,r7,8 + b 103b + +104: addi r7,r7,4 + lwzx r9,r5,r7 + mr r24,r9 +101: +#endif + mr r3,r24 + b .pseries_secondary_smp_init + +#ifdef CONFIG_HMT +_GLOBAL(hmt_start_secondary) + LOADADDR(r4,__hmt_secondary_hold) + clrldi r4,r4,4 + mtspr NIADORM, r4 + mfspr r4, MSRDORM + li r5, -65 + and r4, r4, r5 + mtspr MSRDORM, r4 + lis r4,0xffef + ori r4,r4,0x7403 + mtspr TSC, r4 + li r4,0x1f4 + mtspr TST, r4 + mfspr r4, HID0 + ori r4, r4, 0x1 + mtspr HID0, r4 + mfspr r4, CTRLF + oris r4, r4, 0x40 + mtspr CTRLT, r4 + blr +#endif + +/* + * We put a few things here that have to be page-aligned. + * This stuff goes at the beginning of the data segment, + * which is page-aligned. + */ + .data + .align 12 + .globl sdata +sdata: + .globl empty_zero_page +empty_zero_page: + .space 4096 + + .globl swapper_pg_dir +swapper_pg_dir: + .space 4096 + + .globl ioremap_dir +ioremap_dir: + .space 4096 + + .globl hardware_int_paca0 +hardware_int_paca0: + .space 8*4096 + + +/* 4096 * 31 bytes of storage */ + .globl stab_array +stab_array: + .space 131072 +/* + * This space gets a copy of optional info passed to us by the bootstrap + * Used to pass parameters into the kernel like root=/dev/sda1, etc. + */ + .globl cmd_line +cmd_line: + .space 512 diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/htab.c linuxppc64_2_4/arch/ppc64/kernel/htab.c --- ../kernel.org/linux/arch/ppc64/kernel/htab.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/htab.c Fri Nov 23 11:55:53 2001 @@ -0,0 +1,1230 @@ +/* + * + * + * PowerPC64 port by Mike Corrigan and Dave Engebretsen + * {mikejc|engebret}@us.ibm.com + * + * Copyright (c) 2000 Mike Corrigan + * + * Module name: htab.c + * + * Description: + * PowerPC Hashed Page Table functions + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_PPC_EEH +#include +#endif + +/* For iSeries */ +#include + +/* Note: pte --> Linux PTE + * HPTE --> PowerPC Hashed Page Table Entry + */ + +HTAB htab_data = {NULL, 0, 0, 0, 0}; + +int proc_dol2crvec(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp); + +void htab_initialize(void); +void make_pte_LPAR(HPTE *htab, + unsigned long va, unsigned long pa, int mode, + unsigned long hash_mask, int large); + +extern unsigned long reloc_offset(void); +extern unsigned long get_kernel_vsid( unsigned long ea ); +extern void cacheable_memzero( void *, unsigned int ); + +extern unsigned long _SDR1; +extern unsigned long klimit; +extern struct Naca *naca; + +extern unsigned long _ASR; +extern inline void make_ste(unsigned long stab, + unsigned long esid, unsigned long vsid); + +extern char _stext[], _etext[], __start_naca[], __end_stab[]; + +static spinlock_t hash_table_lock ____cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; + +#define PTRRELOC(x) ((typeof(x))((unsigned long)(x) - offset)) +#define PTRUNRELOC(x) ((typeof(x))((unsigned long)(x) + offset)) +#define RELOC(x) (*PTRRELOC(&(x))) + +extern unsigned long htab_size( unsigned long ); +unsigned long hpte_getword0_iSeries( unsigned long slot ); + +#define KB (1024) +#define MB (1024*KB) +static inline void +create_pte_mapping(unsigned long start, unsigned long end, + unsigned long mode, unsigned long mask, int large) +{ + unsigned long addr, offset = reloc_offset(); + HTAB *_htab_data = PTRRELOC(&htab_data); + HPTE *htab = (HPTE *)__v2a(_htab_data->htab); + unsigned int step; + + if (large) + step = 16*MB; + else + step = 4*KB; + + for (addr = start; addr < end; addr += step) { + unsigned long vsid = get_kernel_vsid(addr); + unsigned long va = (vsid << 28) | (addr & 0xfffffff); + make_pte(htab, va, (unsigned long)__v2a(addr), mode, mask, + large); + } +} + +void +htab_initialize(void) +{ + unsigned long table, htab_size_bytes; + unsigned long pteg_count; + unsigned long mode_ro, mode_rw, mask; + unsigned long offset = reloc_offset(); + struct Naca *_naca = RELOC(naca); + HTAB *_htab_data = PTRRELOC(&htab_data); + + /* + * Calculate the required size of the htab. We want the number of + * PTEGs to equal one half the number of real pages. + */ + htab_size_bytes = 1UL << _naca->pftSize; + pteg_count = htab_size_bytes >> 7; + + /* For debug, make the HTAB 1/8 as big as it normally would be. */ + ifppcdebug(PPCDBG_HTABSIZE) { + pteg_count >>= 3; + htab_size_bytes = pteg_count << 7; + } + + _htab_data->htab_num_ptegs = pteg_count; + _htab_data->htab_hash_mask = pteg_count - 1; + + if(_machine == _MACH_pSeries) { + /* Find storage for the HPT. Must be contiguous in + * the absolute address space. + */ + table = lmb_alloc(htab_size_bytes, htab_size_bytes); + if ( !table ) + panic("ERROR, cannot find space for HPTE\n"); + _htab_data->htab = (HPTE *)__a2v(table); + + /* htab absolute addr + encoded htabsize */ + RELOC(_SDR1) = table + __ilog2(pteg_count) - 11; + + /* Initialize the HPT with no entries */ + cacheable_memzero((void *)table, htab_size_bytes); + } else { + _htab_data->htab = NULL; + RELOC(_SDR1) = 0; + } + + mode_ro = _PAGE_ACCESSED | _PAGE_COHERENT | PP_RXRX; + mode_rw = _PAGE_ACCESSED | _PAGE_COHERENT | PP_RWXX; + mask = pteg_count-1; + + /* Create PTE's for the kernel text and data sections plus + * the HPT and HPTX arrays. Make the assumption that + * (addr & KERNELBASE) == 0 (ie they are disjoint). + * We also assume that the va is <= 64 bits. + */ +#if 0 + create_pte_mapping((unsigned long)_stext, (unsigned long)__start_naca, mode_ro, mask); + create_pte_mapping((unsigned long)__start_naca, (unsigned long)__end_stab, mode_rw, mask); + create_pte_mapping((unsigned long)__end_stab, (unsigned long)_etext, mode_ro, mask); + create_pte_mapping((unsigned long)_etext, RELOC(klimit), mode_rw, mask); + create_pte_mapping((unsigned long)__a2v(table), (unsigned long)__a2v(table+htab_size_bytes), mode_rw, mask); +#else +#ifndef CONFIG_PPC_ISERIES + if (__is_processor(PV_POWER4) && _naca->physicalMemorySize > 256*MB) { + create_pte_mapping((unsigned long)KERNELBASE, + KERNELBASE + 256*MB, mode_rw, mask, 0); + create_pte_mapping((unsigned long)KERNELBASE + 256*MB, + KERNELBASE + (_naca->physicalMemorySize), + mode_rw, mask, 1); + return; + } +#endif + create_pte_mapping((unsigned long)KERNELBASE, + KERNELBASE+(_naca->physicalMemorySize), + mode_rw, mask, 0); +#endif +} +#undef KB +#undef MB + +/* + * Create a pte. Used during initialization only. + * We assume the PTE will fit in the primary PTEG. + */ +void make_pte(HPTE *htab, + unsigned long va, unsigned long pa, int mode, + unsigned long hash_mask, int large) +{ + HPTE *hptep; + unsigned long hash, i; + volatile unsigned long x = 1; + unsigned long vpn; + +#ifdef CONFIG_PPC_PSERIES + if(_machine == _MACH_pSeriesLP) { + make_pte_LPAR(htab, va, pa, mode, hash_mask, large); + return; + } +#endif + + if (large) + vpn = va >> 24; + else + vpn = va >> 12; + + hash = hpt_hash(vpn, large); + + hptep = htab + ((hash & hash_mask)*HPTES_PER_GROUP); + + for (i = 0; i < 8; ++i, ++hptep) { + if ( hptep->dw0.dw0.v == 0 ) { /* !valid */ + hptep->dw1.dword1 = pa | mode; + hptep->dw0.dword0 = 0; + hptep->dw0.dw0.avpn = va >> 23; + hptep->dw0.dw0.bolted = 1; /* bolted */ + hptep->dw0.dw0.v = 1; /* make valid */ + return; + } + } + + /* We should _never_ get here and too early to call xmon. */ + for(;x;x|=1); +} + +/* Functions to invalidate a HPTE */ +static void hpte_invalidate_iSeries( unsigned long slot ) +{ + HvCallHpt_invalidateSetSwBitsGet( slot, 0, 0 ); +} + +static void hpte_invalidate_pSeries( unsigned long slot ) +{ + /* Local copy of the first doubleword of the HPTE */ + union { + unsigned long d; + Hpte_dword0 h; + } hpte_dw0; + + /* Locate the HPTE */ + HPTE * hptep = htab_data.htab + slot; + + /* Get the first doubleword of the HPTE */ + hpte_dw0.d = hptep->dw0.dword0; + + /* Invalidate the hpte */ + hptep->dw0.dword0 = 0; + + /* Invalidate the tlb */ + { + unsigned long vsid, group, pi, pi_high; + + vsid = hpte_dw0.h.avpn >> 5; + group = slot >> 3; + if(hpte_dw0.h.h) { + group = ~group; + } + pi = (vsid ^ group) & 0x7ff; + pi_high = (hpte_dw0.h.avpn & 0x1f) << 11; + pi |= pi_high; + _tlbie(pi << 12); + } +} + + +/* Select an available HPT slot for a new HPTE + * return slot index (if in primary group) + * return -slot index (if in secondary group) + */ +static long hpte_selectslot_iSeries( unsigned long vpn ) +{ + HPTE hpte; + long ret_slot, orig_slot; + unsigned long primary_hash; + unsigned long hpteg_slot; + unsigned long slot; + unsigned i, k; + union { + unsigned long d; + Hpte_dword0 h; + } hpte_dw0; + + ret_slot = orig_slot = HvCallHpt_findValid( &hpte, vpn ); + if ( hpte.dw0.dw0.v ) { /* If valid ...what do we do now? */ + udbg_printf( "hpte_selectslot_iSeries: vpn 0x%016lx already valid at slot 0x%016lx\n", vpn, ret_slot ); + udbg_printf( "hpte_selectslot_iSeries: returned hpte 0x%016lx 0x%016lx\n", hpte.dw0.dword0, hpte.dw1.dword1 ); + + return (0x8000000000000000); + /* panic("select_hpte_slot found entry already valid\n"); */ + } + if ( ret_slot == -1 ) { /* -1 indicates no available slots */ + + /* No available entry found in secondary group */ + + PMC_SW_SYSTEM(htab_capacity_castouts); + + primary_hash = hpt_hash(vpn, 0); + hpteg_slot = ( primary_hash & htab_data.htab_hash_mask ) * HPTES_PER_GROUP; + k = htab_data.next_round_robin++ & 0x7; + + for ( i=0; idw0.dw0.v == 0 ) { + /* If an available slot found, return it */ + return hpteg_slot + i; + } + hptep++; + } + + /* No available entry found in primary group */ + + PMC_SW_SYSTEM(htab_primary_overflows); + + /* Search the secondary group */ + + hpteg_slot = ( ~primary_hash & htab_data.htab_hash_mask ) * HPTES_PER_GROUP; + hptep = htab_data.htab + hpteg_slot; + + for (i=0; idw0.dw0.v == 0 ) { + /* If an available slot found, return it */ + return -(hpteg_slot + i); + } + hptep++; + } + + /* No available entry found in secondary group */ + + PMC_SW_SYSTEM(htab_capacity_castouts); + + /* Select an entry in the primary group to replace */ + + hpteg_slot = ( primary_hash & htab_data.htab_hash_mask ) * HPTES_PER_GROUP; + hptep = htab_data.htab + hpteg_slot; + k = htab_data.next_round_robin++ & 0x7; + + for (i=0; idw0.dword0; + return dword0; +} + +static long hpte_find_iSeries(unsigned long vpn) +{ + HPTE hpte; + long slot; + + slot = HvCallHpt_findValid( &hpte, vpn ); + if ( hpte.dw0.dw0.v ) { + if ( slot < 0 ) { + slot &= 0x7fffffffffffffff; + slot = -slot; + } + } else + slot = -1; + return slot; +} + +static long hpte_find_pSeries(unsigned long vpn) +{ + union { + unsigned long d; + Hpte_dword0 h; + } hpte_dw0; + long slot; + unsigned long hash; + unsigned long i,j; + + hash = hpt_hash(vpn, 0); + for ( j=0; j<2; ++j ) { + slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP; + for ( i=0; i> 11 ) ) && + ( hpte_dw0.h.v ) && + ( hpte_dw0.h.h == j ) ) { + /* HPTE matches */ + if ( j ) + slot = -slot; + return slot; + } + ++slot; + } + hash = ~hash; + } + return -1; +} + +/* This function is called by iSeries setup when initializing the hpt */ +void build_valid_hpte( unsigned long vsid, unsigned long ea, unsigned long pa, + pte_t * ptep, unsigned hpteflags, unsigned bolted ) +{ + unsigned long vpn, flags; + long hpte_slot; + unsigned hash; + pte_t pte; + + vpn = ((vsid << 28) | ( ea & 0xffff000 )) >> 12; + + spin_lock_irqsave( &hash_table_lock, flags ); + + hpte_slot = ppc_md.hpte_selectslot( vpn ); + hash = 0; + if ( hpte_slot < 0 ) { + if ( hpte_slot == 0x8000000000000000 ) { + udbg_printf("hash_page: ptep = 0x%016lx\n", + (unsigned long)ptep ); + udbg_printf("hash_page: ea = 0x%016lx\n", ea ); + udbg_printf("hash_page: vpn = 0x%016lx\n", vpn ); + + panic("hash_page: hpte already exists\n"); + } + hash = 1; + hpte_slot = -hpte_slot; + } + ppc_md.hpte_create_valid( hpte_slot, vpn, pa >> 12, hash, ptep, + hpteflags, bolted ); + + if ( ptep ) { + /* Get existing pte flags */ + pte = *ptep; + pte_val(pte) &= ~_PAGE_HPTEFLAGS; + + /* Add in the has hpte flag */ + pte_val(pte) |= _PAGE_HASHPTE; + + /* Add in the _PAGE_SECONDARY flag */ + pte_val(pte) |= hash << 15; + + /* Add in the hpte slot */ + pte_val(pte) |= (hpte_slot << 12) & _PAGE_GROUP_IX; + + /* Save the new pte. */ + *ptep = pte; + + } + spin_unlock_irqrestore( &hash_table_lock, flags ); +} + + +/* Create an HPTE and validate it + * It is assumed that the HPT slot currently is invalid. + * The HPTE is set with the vpn, rpn (converted to absolute) + * and flags + */ +static void hpte_create_valid_iSeries(unsigned long slot, unsigned long vpn, + unsigned long prpn, unsigned hash, + void * ptep, unsigned hpteflags, + unsigned bolted ) +{ + /* Local copy of HPTE */ + struct { + /* Local copy of first doubleword of HPTE */ + union { + unsigned long d; + Hpte_dword0 h; + } dw0; + /* Local copy of second doubleword of HPTE */ + union { + unsigned long d; + Hpte_dword1 h; + Hpte_dword1_flags f; + } dw1; + } lhpte; + + unsigned long avpn = vpn >> 11; + unsigned long arpn = physRpn_to_absRpn( prpn ); + + /* Fill in the local HPTE with absolute rpn, avpn and flags */ + lhpte.dw1.d = 0; + lhpte.dw1.h.rpn = arpn; + lhpte.dw1.f.flags = hpteflags; + + lhpte.dw0.d = 0; + lhpte.dw0.h.avpn = avpn; + lhpte.dw0.h.h = hash; + lhpte.dw0.h.bolted = bolted; + lhpte.dw0.h.v = 1; + + /* Now fill in the actual HPTE */ + HvCallHpt_addValidate( slot, hash, (HPTE *)&lhpte ); +} + +static void hpte_create_valid_pSeries(unsigned long slot, unsigned long vpn, + unsigned long prpn, unsigned hash, + void * ptep, unsigned hpteflags, + unsigned bolted) +{ + /* Local copy of HPTE */ + struct { + /* Local copy of first doubleword of HPTE */ + union { + unsigned long d; + Hpte_dword0 h; + } dw0; + /* Local copy of second doubleword of HPTE */ + union { + unsigned long d; + Hpte_dword1 h; + Hpte_dword1_flags f; + } dw1; + } lhpte; + + unsigned long avpn = vpn >> 11; + unsigned long arpn = physRpn_to_absRpn( prpn ); + + HPTE *hptep; + + /* Fill in the local HPTE with absolute rpn, avpn and flags */ + lhpte.dw1.d = 0; + lhpte.dw1.h.rpn = arpn; + lhpte.dw1.f.flags = hpteflags; + + lhpte.dw0.d = 0; + lhpte.dw0.h.avpn = avpn; + lhpte.dw0.h.h = hash; + lhpte.dw0.h.bolted = bolted; + lhpte.dw0.h.v = 1; + + /* Now fill in the actual HPTE */ + hptep = htab_data.htab + slot; + + /* Set the second dword first so that the valid bit + * is the last thing set + */ + + hptep->dw1.dword1 = lhpte.dw1.d; + + /* Guarantee the second dword is visible before + * the valid bit + */ + + __asm__ __volatile__ ("eieio" : : : "memory"); + + /* Now set the first dword including the valid bit */ + hptep->dw0.dword0 = lhpte.dw0.d; + + __asm__ __volatile__ ("ptesync" : : : "memory"); +} + +/* find_linux_pte returns the address of a linux pte for a given + * effective address and directory. If not found, it returns zero. + */ + +pte_t * find_linux_pte( pgd_t * pgdir, unsigned long ea ) +{ + pgd_t *pg; + pmd_t *pm; + pte_t *pt = NULL; + pte_t pte; + pg = pgdir + pgd_index( ea ); + if ( ! pgd_none( *pg ) ) { + + pm = pmd_offset( pg, ea ); + if ( ! pmd_none( *pm ) ) { + pt = pte_offset( pm, ea ); + pte = *pt; + if ( ! pte_present( pte ) ) + pt = NULL; + } + } + + return pt; + +} + +static inline unsigned long computeHptePP( unsigned long pte ) +{ + return ( pte & _PAGE_USER ) | + ( ( ( pte & _PAGE_USER ) >> 1 ) & + ( ( ~( ( pte >> 2 ) & /* _PAGE_RW */ + ( pte >> 7 ) ) ) & /* _PAGE_DIRTY */ + 1 ) ); +} + +static void hpte_updatepp_iSeries(long slot, unsigned long newpp, unsigned long va) +{ + HvCallHpt_setPp( slot, newpp ); +} + +static void hpte_updatepp_pSeries(long slot, unsigned long newpp, unsigned long va) +{ + /* Local copy of first doubleword of HPTE */ + union { + unsigned long d; + Hpte_dword0 h; + } hpte_dw0; + + /* Local copy of second doubleword of HPTE */ + union { + unsigned long d; + Hpte_dword1 h; + Hpte_dword1_flags f; + } hpte_dw1; + + HPTE * hptep = htab_data.htab + slot; + + /* Turn off valid bit in HPTE */ + hpte_dw0.d = hptep->dw0.dword0; + hpte_dw0.h.v = 0; + hptep->dw0.dword0 = hpte_dw0.d; + + /* Ensure it is out of the tlb too */ + _tlbie( va ); + + /* Insert the new pp bits into the HPTE */ + hpte_dw1.d = hptep->dw1.dword1; + hpte_dw1.h.pp = newpp; + hptep->dw1.dword1 = hpte_dw1.d; + + /* Ensure it is visible before validating */ + __asm__ __volatile__ ("eieio" : : : "memory"); + + /* Turn the valid bit back on in HPTE */ + hpte_dw0.h.v = 1; + hptep->dw0.dword0 = hpte_dw0.d; + + __asm__ __volatile__ ("ptesync" : : : "memory"); +} + +/* + * Update the page protection bits. Intended to be used to create + * guard pages for kernel data structures on pages which are bolted + * in the HPT. Assumes pages being operated on will not be stolen. + */ +void hpte_updateboltedpp_iSeries(unsigned long newpp, unsigned long ea ) +{ + unsigned long vsid,va,vpn; + long slot; + + vsid = get_kernel_vsid( ea ); + va = ( vsid << 28 ) | ( ea & 0x0fffffff ); + vpn = va >> PAGE_SHIFT; + + slot = ppc_md.hpte_find( vpn ); + HvCallHpt_setPp( slot, newpp ); +} + + +static __inline__ void set_pp_bit(unsigned long pp, HPTE *addr) +{ + unsigned long old; + unsigned long *p = (unsigned long *)(&(addr->dw1)); + + __asm__ __volatile__( + "1: ldarx %0,0,%3\n\ + rldimi %0,%2,0,62\n\ + stdcx. %0,0,%3\n\ + bne 1b" + : "=&r" (old), "=m" (*p) + : "r" (pp), "r" (p), "m" (*p) + : "cc"); +} + +/* + * Update the page protection bits. Intended to be used to create + * guard pages for kernel data structures on pages which are bolted + * in the HPT. Assumes pages being operated on will not be stolen. + */ +void hpte_updateboltedpp_pSeries(unsigned long newpp, unsigned long ea) +{ + unsigned long vsid,va,vpn,flags; + long slot; + HPTE *hptep; + + vsid = get_kernel_vsid( ea ); + va = ( vsid << 28 ) | ( ea & 0x0fffffff ); + vpn = va >> PAGE_SHIFT; + + slot = ppc_md.hpte_find( vpn ); + hptep = htab_data.htab + slot; + + set_pp_bit(newpp , hptep); + + /* Ensure it is out of the tlb too */ + spin_lock_irqsave( &hash_table_lock, flags ); + _tlbie( va ); + spin_unlock_irqrestore( &hash_table_lock, flags ); +} + + + +/* This is called very early. */ +void hpte_init_iSeries(void) +{ + ppc_md.hpte_invalidate = hpte_invalidate_iSeries; + ppc_md.hpte_updatepp = hpte_updatepp_iSeries; + ppc_md.hpte_updateboltedpp = hpte_updateboltedpp_iSeries; + ppc_md.hpte_getword0 = hpte_getword0_iSeries; + ppc_md.hpte_selectslot = hpte_selectslot_iSeries; + ppc_md.hpte_create_valid = hpte_create_valid_iSeries; + ppc_md.hpte_find = hpte_find_iSeries; +} +void hpte_init_pSeries(void) +{ + ppc_md.hpte_invalidate = hpte_invalidate_pSeries; + ppc_md.hpte_updatepp = hpte_updatepp_pSeries; + ppc_md.hpte_updateboltedpp = hpte_updateboltedpp_pSeries; + ppc_md.hpte_getword0 = hpte_getword0_pSeries; + ppc_md.hpte_selectslot = hpte_selectslot_pSeries; + ppc_md.hpte_create_valid = hpte_create_valid_pSeries; + ppc_md.hpte_find = hpte_find_pSeries; +} + +/* Handle a fault by adding an HPTE + * If the address can't be determined to be valid + * via Linux page tables, return 1. If handled + * return 0 + */ +int hash_page( unsigned long ea, unsigned long access ) +{ + int rc = 1; + void * pgdir = NULL; + unsigned long va, vsid, vpn; + unsigned long newpp, hash_ind, prpn; + unsigned long hpteflags, regionid; + long slot; + struct mm_struct * mm; + pte_t old_pte, new_pte, *ptep; + volatile unsigned long x = 1; + + /* Check for invalid addresses. */ + if (!IS_VALID_EA(ea)) { + return 1; + } + + regionid = REGION_ID(ea); + switch ( regionid ) { + case USER_REGION_ID: + mm = current->mm; + if ( mm == NULL ) { + PPCDBG(PPCDBG_MM, "hash_page returning; mm = 0\n"); + return 1; + } + vsid = get_vsid(mm->context, ea ); + break; + case IO_REGION_ID: + mm = &ioremap_mm; + vsid = get_kernel_vsid( ea ); + break; + case VMALLOC_REGION_ID: + mm = &init_mm; + vsid = get_kernel_vsid( ea ); + break; +#ifdef CONFIG_PPC_EEH + case IO_UNMAPPED_REGION_ID: + udbg_printf("EEH Error ea = 0x%lx\n", ea); + PPCDBG_ENTER_DEBUGGER(); + panic("EEH Error ea = 0x%lx\n", ea); + break; +#endif + case KERNEL_REGION_ID: + /* As htab_initialize is now, we shouldn't ever get here since + * we're bolting the entire 0xC0... region. + */ + udbg_printf("Little faulted on kernel address 0x%lx\n", ea); + PPCDBG_ENTER_DEBUGGER(); + panic("Little faulted on kernel address 0x%lx\n", ea); + break; + default: + /* Not a valid range, send the problem up to do_page_fault */ + return 1; + break; + } + + /* Search the Linux page table for a match with va */ + va = ( vsid << 28 ) | ( ea & 0x0fffffff ); + vpn = va >> PAGE_SHIFT; + pgdir = mm->pgd; + PPCDBG(PPCDBG_MM, "hash_page ea = 0x%16.16lx, va = 0x%16.16lx\n current = 0x%16.16lx, access = %lx\n", ea, va, current, access); + if ( pgdir == NULL ) { + return 1; + } + + /* Lock the Linux page table to prevent mmap and kswapd + * from modifying entries while we search and update + */ + + spin_lock( &mm->page_table_lock ); + + ptep = find_linux_pte( pgdir, ea ); + /* If no pte found, send the problem up to do_page_fault */ + if ( ! ptep ) { + spin_unlock( &mm->page_table_lock ); + return 1; + } + + /* Acquire the hash table lock to guarantee that the linux + * pte we fetch will not change + */ + spin_lock( &hash_table_lock ); + + old_pte = *ptep; + + /* If the pte is not "present" (valid), send the problem + * up to do_page_fault. + */ + if ( ! pte_present( old_pte ) ) { + spin_unlock( &hash_table_lock ); + spin_unlock( &mm->page_table_lock ); + return 1; + } + + /* At this point we have found a pte (which was present). + * The spinlocks prevent this status from changing + * The hash_table_lock prevents the _PAGE_HASHPTE status + * from changing (RPN, DIRTY and ACCESSED too) + * The page_table_lock prevents the pte from being + * invalidated or modified + */ + +/* At this point, we have a pte (old_pte) which can be used to build or update + * an HPTE. There are 5 cases: + * + * 1. There is a valid (present) pte with no associated HPTE (this is + * the most common case) + * 2. There is a valid (present) pte with an associated HPTE. The + * current values of the pp bits in the HPTE prevent access because the + * user doesn't have appropriate access rights. + * 3. There is a valid (present) pte with an associated HPTE. The + * current values of the pp bits in the HPTE prevent access because we are + * doing software DIRTY bit management and the page is currently not DIRTY. + * 4. This is a Kernel address (0xC---) for which there is no page directory. + * There is an HPTE for this page, but the pp bits prevent access. + * Since we always set up kernel pages with R/W access for the kernel + * this case only comes about for users trying to access the kernel. + * This case is always an error and is not dealt with further here. + * 5. This is a Kernel address (0xC---) for which there is no page directory. + * There is no HPTE for this page. + + * Check the user's access rights to the page. If access should be prevented + * then send the problem up to do_page_fault. + */ + + access |= _PAGE_PRESENT; + if ( 0 == ( access & ~(pte_val(old_pte)) ) ) { + /* + * Check if pte might have an hpte, but we have + * no slot information + */ + if ( pte_val(old_pte) & _PAGE_HPTENOIX ) { + unsigned long slot; + pte_val(old_pte) &= ~_PAGE_HPTEFLAGS; + slot = ppc_md.hpte_find( vpn ); + if ( slot != -1 ) { + if ( slot < 0 ) { + pte_val(old_pte) |= _PAGE_SECONDARY; + slot = -slot; + } + pte_val(old_pte) |= ((slot << 12) & _PAGE_GROUP_IX) | _PAGE_HASHPTE; + + } + } + + /* User has appropriate access rights. */ + new_pte = old_pte; + /* If the attempted access was a store */ + if ( access & _PAGE_RW ) + pte_val(new_pte) |= _PAGE_ACCESSED | + _PAGE_DIRTY; + else + pte_val(new_pte) |= _PAGE_ACCESSED; + + /* Only cases 1, 3 and 5 still in play */ + + newpp = computeHptePP( pte_val(new_pte) ); + + /* Check if pte already has an hpte (case 3) */ + if ( pte_val(old_pte) & _PAGE_HASHPTE ) { + /* There MIGHT be an HPTE for this pte */ + unsigned long hash, slot, secondary; + /* Local copy of first doubleword of HPTE */ + union { + unsigned long d; + Hpte_dword0 h; + } hpte_dw0; + hash = hpt_hash(vpn, 0); + secondary = (pte_val(old_pte) & _PAGE_SECONDARY) >> 15; + if ( secondary ) + hash = ~hash; + slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP; + slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12; + /* If there is an HPTE for this page it is indexed by slot */ + hpte_dw0.d = ppc_md.hpte_getword0( slot ); + if ( (hpte_dw0.h.avpn == (vpn >> 11) ) && + (hpte_dw0.h.v) && + (hpte_dw0.h.h == secondary ) ){ + /* HPTE matches */ + ppc_md.hpte_updatepp( slot, newpp, va ); + if ( !pte_same( old_pte, new_pte ) ) + *ptep = new_pte; + } + else { + /* HPTE is not for this pte */ + pte_val(old_pte) &= ~_PAGE_HPTEFLAGS; + } + } + if ( !( pte_val(old_pte) & _PAGE_HASHPTE ) ) { + /* Cases 1 and 5 */ + /* For these cases we need to create a new + * HPTE and update the linux pte (for + * case 1). For case 5 there is no linux pte. + * + * Find an available HPTE slot + */ + slot = ppc_md.hpte_selectslot( vpn ); + + /* Debug code */ + if ( slot == 0x8000000000000000 ) { + unsigned long xold_pte = pte_val(old_pte); + unsigned long xnew_pte = pte_val(new_pte); + + udbg_printf("hash_page: ptep = 0x%016lx\n", (unsigned long)ptep ); + udbg_printf("hash_page: old_pte = 0x%016lx\n", xold_pte ); + udbg_printf("hash_page: new_pte = 0x%016lx\n", xnew_pte ); + udbg_printf("hash_page: ea = 0x%016lx\n", ea ); + udbg_printf("hash_page: va = 0x%016lx\n", va ); + udbg_printf("hash_page: access = 0x%016lx\n", access ); + + panic("hash_page: hpte already exists\n"); + } + + hash_ind = 0; + if ( slot < 0 ) { + slot = -slot; + hash_ind = 1; + } + + /* Set the physical address */ + prpn = pte_val(old_pte) >> PTE_SHIFT; + + if ( ptep ) { + /* Update the linux pte with the HPTE slot */ + pte_val(new_pte) &= ~_PAGE_HPTEFLAGS; + pte_val(new_pte) |= hash_ind << 15; + pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX; + pte_val(new_pte) |= _PAGE_HASHPTE; + /* No need to use ldarx/stdcx here because all + * who might be updating the pte will hold the page_table_lock + * or the hash_table_lock (we hold both) + */ + *ptep = new_pte; + } + + /* copy appropriate flags from linux pte */ + hpteflags = (pte_val(new_pte) & 0x1f8) | newpp; + + /* Create the HPTE */ + ppc_md.hpte_create_valid( slot, vpn, prpn, hash_ind, ptep, hpteflags, 0 ); + + } + + /* Indicate success */ + rc = 0; + } + + spin_unlock( &hash_table_lock ); + if (ptep) + spin_unlock( &mm->page_table_lock ); + + return rc; +} + +void flush_hash_page( unsigned long context, unsigned long ea, pte_t pte ) +{ + unsigned long vsid, vpn, va, hash, secondary, slot, flags; + /* Local copy of first doubleword of HPTE */ + union { + unsigned long d; + Hpte_dword0 h; + } hpte_dw0; + + if ( (ea >= USER_START ) && ( ea <= USER_END ) ) + vsid = get_vsid( context, ea ); + else + vsid = get_kernel_vsid( ea ); + va = (vsid << 28) | (ea & 0x0fffffff); + vpn = va >> PAGE_SHIFT; + hash = hpt_hash(vpn, 0); + secondary = (pte_val(pte) & _PAGE_SECONDARY) >> 15; + if ( secondary ) + hash = ~hash; + slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP; + slot += (pte_val(pte) & _PAGE_GROUP_IX) >> 12; + /* If there is an HPTE for this page it is indexed by slot */ + + spin_lock_irqsave( &hash_table_lock, flags); + hpte_dw0.d = ppc_md.hpte_getword0( slot ); + if ( (hpte_dw0.h.avpn == (vpn >> 11) ) && + (hpte_dw0.h.v) && + (hpte_dw0.h.h == secondary ) ){ + /* HPTE matches */ + ppc_md.hpte_invalidate( slot ); + } + else { + unsigned k; + /* Temporarily lets check for the hpte in all possible slots */ + for ( secondary = 0; secondary < 2; ++secondary ) { + hash = hpt_hash(vpn, 0); + if ( secondary ) + hash = ~hash; + slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP; + for ( k=0; k<8; ++k ) { + hpte_dw0.d = ppc_md.hpte_getword0( slot+k ); + if ( ( hpte_dw0.h.avpn == (vpn >> 11) ) && + ( hpte_dw0.h.v ) && + ( hpte_dw0.h.h == secondary ) ) { + while (1) ; + } + } + } + + } + spin_unlock_irqrestore( &hash_table_lock, flags ); +} + +int proc_dol2crvec(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp) +{ + int vleft, first=1, len, left, val; +#define TMPBUFLEN 256 + char buf[TMPBUFLEN], *p; + static const char *sizestrings[4] = { + "2MB", "256KB", "512KB", "1MB" + }; + static const char *clockstrings[8] = { + "clock disabled", "+1 clock", "+1.5 clock", "reserved(3)", + "+2 clock", "+2.5 clock", "+3 clock", "reserved(7)" + }; + static const char *typestrings[4] = { + "flow-through burst SRAM", "reserved SRAM", + "pipelined burst SRAM", "pipelined late-write SRAM" + }; + static const char *holdstrings[4] = { + "0.5", "1.0", "(reserved2)", "(reserved3)" + }; + + if ( ((_get_PVR() >> 16) != 8) && ((_get_PVR() >> 16) != 12)) + return -EFAULT; + + if ( /*!table->maxlen ||*/ (filp->f_pos && !write)) { + *lenp = 0; + return 0; + } + + vleft = table->maxlen / sizeof(int); + left = *lenp; + + for (; left /*&& vleft--*/; first=0) { + if (write) { + while (left) { + char c; + if(get_user(c,(char *) buffer)) + return -EFAULT; + if (!isspace(c)) + break; + left--; + ((char *) buffer)++; + } + if (!left) + break; + len = left; + if (len > TMPBUFLEN-1) + len = TMPBUFLEN-1; + if(copy_from_user(buf, buffer, len)) + return -EFAULT; + buf[len] = 0; + p = buf; + if (*p < '0' || *p > '9') + break; + val = simple_strtoul(p, &p, 0); + len = p-buf; + if ((len < left) && *p && !isspace(*p)) + break; + buffer += len; + left -= len; +#if 0 + /* DRENG need a def */ + _set_L2CR(0); + _set_L2CR(val); + while ( _get_L2CR() & 0x1 ) + /* wait for invalidate to finish */; +#endif + + } else { + p = buf; + if (!first) + *p++ = '\t'; +#if 0 + /* DRENG need a def */ + val = _get_L2CR(); +#endif + p += sprintf(p, "0x%08x: ", val); + p += sprintf(p, " %s", (val >> 31) & 1 ? "enabled" : + "disabled"); + p += sprintf(p, ", %sparity", (val>>30)&1 ? "" : "no "); + p += sprintf(p, ", %s", sizestrings[(val >> 28) & 3]); + p += sprintf(p, ", %s", clockstrings[(val >> 25) & 7]); + p += sprintf(p, ", %s", typestrings[(val >> 23) & 2]); + p += sprintf(p, "%s", (val>>22)&1 ? ", data only" : ""); + p += sprintf(p, "%s", (val>>20)&1 ? ", ZZ enabled": ""); + p += sprintf(p, ", %s", (val>>19)&1 ? "write-through" : + "copy-back"); + p += sprintf(p, "%s", (val>>18)&1 ? ", testing" : ""); + p += sprintf(p, ", %sns hold",holdstrings[(val>>16)&3]); + p += sprintf(p, "%s", (val>>15)&1 ? ", DLL slow" : ""); + p += sprintf(p, "%s", (val>>14)&1 ? ", diff clock" :""); + p += sprintf(p, "%s", (val>>13)&1 ? ", DLL bypass" :""); + + p += sprintf(p,"\n"); + + len = strlen(buf); + if (len > left) + len = left; + if(copy_to_user(buffer, buf, len)) + return -EFAULT; + left -= len; + buffer += len; + break; + } + } + + if (!write && !first && left) { + if(put_user('\n', (char *) buffer)) + return -EFAULT; + left--, buffer++; + } + if (write) { + p = (char *) buffer; + while (left) { + char c; + if(get_user(c, p++)) + return -EFAULT; + if (!isspace(c)) + break; + left--; + } + } + if (write && first) + return -EINVAL; + *lenp -= left; + filp->f_pos += *lenp; + return 0; +} + diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/hvCall.S linuxppc64_2_4/arch/ppc64/kernel/hvCall.S --- ../kernel.org/linux/arch/ppc64/kernel/hvCall.S Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/hvCall.S Thu Jul 19 14:02:55 2001 @@ -0,0 +1,99 @@ +/* + * arch/ppc64/kernel/hvCall.S + * + * + * This file contains the code to perform calls to the + * iSeries LPAR hypervisor + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "ppc_asm.h" +#include +#include + + .text + +/* + * Hypervisor call + * + * Invoke the iSeries hypervisor via the System Call instruction + * Parameters are passed to this routine in registers r3 - r10 + * + * r3 contains the HV function to be called + * r4-r10 contain the operands to the hypervisor function + * + */ + +_GLOBAL(HvCall) +_GLOBAL(HvCall0) +_GLOBAL(HvCall1) +_GLOBAL(HvCall2) +_GLOBAL(HvCall3) +_GLOBAL(HvCall4) +_GLOBAL(HvCall5) +_GLOBAL(HvCall6) +_GLOBAL(HvCall7) + + + mfcr r0 + std r0,-8(r1) + stdu r1,-(STACK_FRAME_OVERHEAD+16)(r1) + + /* r0 = 0xffffffffffffffff indicates a hypervisor call */ + + li r0,-1 + + /* Invoke the hypervisor */ + + sc + + ld r1,0(r1) + ld r0,-8(r1) + mtcrf 0xff,r0 + + /* return to caller, return value in r3 */ + + blr + +_GLOBAL(HvCall0Ret16) +_GLOBAL(HvCall1Ret16) +_GLOBAL(HvCall2Ret16) +_GLOBAL(HvCall3Ret16) +_GLOBAL(HvCall4Ret16) +_GLOBAL(HvCall5Ret16) +_GLOBAL(HvCall6Ret16) +_GLOBAL(HvCall7Ret16) + + mfcr r0 + std r0,-8(r1) + std r31,-16(r1) + stdu r1,-(STACK_FRAME_OVERHEAD+32)(r1) + + mr r31,r4 + li r0,-1 + mr r4,r5 + mr r5,r6 + mr r6,r7 + mr r7,r8 + mr r8,r9 + mr r9,r10 + + sc + + std r3,0(r31) + std r4,8(r31) + + mr r3,r5 + + ld r1,0(r1) + ld r0,-8(r1) + mtcrf 0xff,r0 + ld r31,-16(r1) + + blr + + diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/i8259.c linuxppc64_2_4/arch/ppc64/kernel/i8259.c --- ../kernel.org/linux/arch/ppc64/kernel/i8259.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/i8259.c Tue Oct 9 21:22:29 2001 @@ -0,0 +1,164 @@ +/* + * c 2001 PPC64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include "i8259.h" +#include +#include + +unsigned char cached_8259[2] = { 0xff, 0xff }; +#define cached_A1 (cached_8259[0]) +#define cached_21 (cached_8259[1]) + +static spinlock_t i8259_lock ____cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; + +int i8259_pic_irq_offset; + +int i8259_irq(int cpu) +{ + int irq; + + spin_lock/*_irqsave*/(&i8259_lock/*, flags*/); + /* + * Perform an interrupt acknowledge cycle on controller 1 + */ + outb(0x0C, 0x20); + irq = inb(0x20) & 7; + if (irq == 2) + { + /* + * Interrupt is cascaded so perform interrupt + * acknowledge on controller 2 + */ + outb(0x0C, 0xA0); + irq = (inb(0xA0) & 7) + 8; + } + else if (irq==7) + { + /* + * This may be a spurious interrupt + * + * Read the interrupt status register. If the most + * significant bit is not set then there is no valid + * interrupt + */ + outb(0x0b, 0x20); + if(~inb(0x20)&0x80) { + spin_unlock/*_irqrestore*/(&i8259_lock/*, flags*/); + return -1; + } + } + spin_unlock/*_irqrestore*/(&i8259_lock/*, flags*/); + return irq; +} + +static void i8259_mask_and_ack_irq(unsigned int irq_nr) +{ + unsigned long flags; + + spin_lock_irqsave(&i8259_lock, flags); + if ( irq_nr >= i8259_pic_irq_offset ) + irq_nr -= i8259_pic_irq_offset; + + if (irq_nr > 7) { + cached_A1 |= 1 << (irq_nr-8); + inb(0xA1); /* DUMMY */ + outb(cached_A1,0xA1); + outb(0x20,0xA0); /* Non-specific EOI */ + outb(0x20,0x20); /* Non-specific EOI to cascade */ + } else { + cached_21 |= 1 << irq_nr; + inb(0x21); /* DUMMY */ + outb(cached_21,0x21); + outb(0x20,0x20); /* Non-specific EOI */ + } + spin_unlock_irqrestore(&i8259_lock, flags); +} + +static void i8259_set_irq_mask(int irq_nr) +{ + outb(cached_A1,0xA1); + outb(cached_21,0x21); +} + +static void i8259_mask_irq(unsigned int irq_nr) +{ + unsigned long flags; + + spin_lock_irqsave(&i8259_lock, flags); + if ( irq_nr >= i8259_pic_irq_offset ) + irq_nr -= i8259_pic_irq_offset; + if ( irq_nr < 8 ) + cached_21 |= 1 << irq_nr; + else + cached_A1 |= 1 << (irq_nr-8); + i8259_set_irq_mask(irq_nr); + spin_unlock_irqrestore(&i8259_lock, flags); +} + +static void i8259_unmask_irq(unsigned int irq_nr) +{ + unsigned long flags; + + spin_lock_irqsave(&i8259_lock, flags); + if ( irq_nr >= i8259_pic_irq_offset ) + irq_nr -= i8259_pic_irq_offset; + if ( irq_nr < 8 ) + cached_21 &= ~(1 << irq_nr); + else + cached_A1 &= ~(1 << (irq_nr-8)); + i8259_set_irq_mask(irq_nr); + spin_unlock_irqrestore(&i8259_lock, flags); +} + +static void i8259_end_irq(unsigned int irq) +{ + if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS))) + i8259_unmask_irq(irq); +} + +struct hw_interrupt_type i8259_pic = { + " i8259 ", + NULL, + NULL, + i8259_unmask_irq, + i8259_mask_irq, + i8259_mask_and_ack_irq, + i8259_end_irq, + NULL +}; + +void __init i8259_init(void) +{ + unsigned long flags; + + spin_lock_irqsave(&i8259_lock, flags); + /* init master interrupt controller */ + outb(0x11, 0x20); /* Start init sequence */ + outb(0x00, 0x21); /* Vector base */ + outb(0x04, 0x21); /* edge tiggered, Cascade (slave) on IRQ2 */ + outb(0x01, 0x21); /* Select 8086 mode */ + outb(0xFF, 0x21); /* Mask all */ + /* init slave interrupt controller */ + outb(0x11, 0xA0); /* Start init sequence */ + outb(0x08, 0xA1); /* Vector base */ + outb(0x02, 0xA1); /* edge triggered, Cascade (slave) on IRQ2 */ + outb(0x01, 0xA1); /* Select 8086 mode */ + outb(0xFF, 0xA1); /* Mask all */ + outb(cached_A1, 0xA1); + outb(cached_21, 0x21); + spin_unlock_irqrestore(&i8259_lock, flags); + request_irq( i8259_pic_irq_offset + 2, no_action, SA_INTERRUPT, + "82c59 secondary cascade", NULL ); + +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/i8259.h linuxppc64_2_4/arch/ppc64/kernel/i8259.h --- ../kernel.org/linux/arch/ppc64/kernel/i8259.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/i8259.h Mon May 7 12:44:10 2001 @@ -0,0 +1,19 @@ +/* + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _PPC_KERNEL_i8259_H +#define _PPC_KERNEL_i8259_H + +#include "local_irq.h" + +extern struct hw_interrupt_type i8259_pic; + +void i8259_init(void); +int i8259_irq(int); + +#endif /* _PPC_KERNEL_i8259_H */ diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/iSeries_IoMmTable.c linuxppc64_2_4/arch/ppc64/kernel/iSeries_IoMmTable.c --- ../kernel.org/linux/arch/ppc64/kernel/iSeries_IoMmTable.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/iSeries_IoMmTable.c Fri Nov 30 16:53:36 2001 @@ -0,0 +1,163 @@ +/************************************************************************/ +/* This module supports the iSeries I/O Address translation mapping */ +/* Copyright (C) 20yy */ +/* */ +/* This program is free software; you can redistribute it and/or modify */ +/* it under the terms of the GNU General Public License as published by */ +/* the Free Software Foundation; either version 2 of the License, or */ +/* (at your option) any later version. */ +/* */ +/* This program is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ +/* GNU General Public License for more details. */ +/* */ +/* You should have received a copy of the GNU General Public License */ +/* along with this program; if not, write to the: */ +/* Free Software Foundation, Inc., */ +/* 59 Temple Place, Suite 330, */ +/* Boston, MA 02111-1307 USA */ +/************************************************************************/ +/* Change Activity: */ +/* Created, December 14, 2000 */ +/* Added Bar table for IoMm performance. */ +/* Ported to ppc64 */ +/* Added dynamic table allocation */ +/* End Change Activity */ +/************************************************************************/ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "iSeries_IoMmTable.h" +#include "pci.h" + +/*******************************************************************/ +/* Table defines */ +/* Each Entry size is 4 MB * 1024 Entries = 4GB I/O address space. */ +/*******************************************************************/ +#define Max_Entries 1024 +unsigned long iSeries_IoMmTable_Entry_Size = 0x0000000000400000; +unsigned long iSeries_Base_Io_Memory = 0xE000000000000000; +unsigned long iSeries_Max_Io_Memory = 0xE000000000000000; +static long iSeries_CurrentIndex = 0; + +/*******************************************************************/ +/* Lookup Tables. */ +/*******************************************************************/ +struct iSeries_Device_Node** iSeries_IoMmTable; +u8* iSeries_IoBarTable; + +/*******************************************************************/ +/* Static and Global variables */ +/*******************************************************************/ +static char* iSeriesPciIoText = "iSeries PCI I/O"; +static spinlock_t iSeriesIoMmTableLock = SPIN_LOCK_UNLOCKED; + +/*******************************************************************/ +/* iSeries_IoMmTable_Initialize */ +/*******************************************************************/ +/* Allocates and initalizes the Address Translation Table and Bar */ +/* Tables to get them ready for use. Must be called before any */ +/* I/O space is handed out to the device BARs. */ +/* A follow up method,iSeries_IoMmTable_Status can be called to */ +/* adjust the table after the device BARs have been assiged to */ +/* resize the table. */ +/*******************************************************************/ +void iSeries_IoMmTable_Initialize(void) +{ + spin_lock(&iSeriesIoMmTableLock); + iSeries_IoMmTable = kmalloc(sizeof(void*)*Max_Entries,GFP_KERNEL); + iSeries_IoBarTable = kmalloc(sizeof(u8)*Max_Entries, GFP_KERNEL); + spin_unlock(&iSeriesIoMmTableLock); + PCIFR("IoMmTable Initialized 0x%p", iSeries_IoMmTable); + if(iSeries_IoMmTable == NULL || iSeries_IoBarTable == NULL) { + panic("PCI: I/O tables allocation failed.\n"); + } +} + +/*******************************************************************/ +/* iSeries_IoMmTable_AllocateEntry */ +/*******************************************************************/ +/* Adds pci_dev entry in address translation table */ +/*******************************************************************/ +/* - Allocates the number of entries required in table base on BAR */ +/* size. */ +/* - Allocates starting at iSeries_Base_Io_Memory and increases. */ +/* - The size is round up to be a multiple of entry size. */ +/* - CurrentIndex is incremented to keep track of the last entry. */ +/* - Builds the resource entry for allocated BARs. */ +/*******************************************************************/ +static void iSeries_IoMmTable_AllocateEntry(struct pci_dev* PciDev, int BarNumber) +{ + struct resource* BarResource = &PciDev->resource[BarNumber]; + long BarSize = pci_resource_len(PciDev,BarNumber); + /***********************************************************/ + /* No space to allocate, quick exit, skip Allocation. */ + /***********************************************************/ + if(BarSize == 0) return; + /***********************************************************/ + /* Set Resource values. */ + /***********************************************************/ + spin_lock(&iSeriesIoMmTableLock); + BarResource->name = iSeriesPciIoText; + BarResource->start = iSeries_IoMmTable_Entry_Size*iSeries_CurrentIndex; + BarResource->start+= iSeries_Base_Io_Memory; + BarResource->end = BarResource->start+BarSize-1; + /***********************************************************/ + /* Allocate the number of table entries needed for BAR. */ + /***********************************************************/ + while (BarSize > 0 ) { + *(iSeries_IoMmTable +iSeries_CurrentIndex) = (struct iSeries_Device_Node*)PciDev->sysdata; + *(iSeries_IoBarTable+iSeries_CurrentIndex) = BarNumber; + BarSize -= iSeries_IoMmTable_Entry_Size; + ++iSeries_CurrentIndex; + } + iSeries_Max_Io_Memory = (iSeries_IoMmTable_Entry_Size*iSeries_CurrentIndex)+iSeries_Base_Io_Memory; + spin_unlock(&iSeriesIoMmTableLock); +} + +/*******************************************************************/ +/* iSeries_allocateDeviceBars */ +/*******************************************************************/ +/* - Allocates ALL pci_dev BAR's and updates the resources with the*/ +/* BAR value. BARS with zero length will have the resources */ +/* The HvCallPci_getBarParms is used to get the size of the BAR */ +/* space. It calls iSeries_IoMmTable_AllocateEntry to allocate */ +/* each entry. */ +/* - Loops through The Bar resourses(0 - 5) including the the ROM */ +/* is resource(6). */ +/*******************************************************************/ +void iSeries_allocateDeviceBars(struct pci_dev* PciDev) +{ + struct resource* BarResource; + int BarNumber; + for(BarNumber = 0; BarNumber <= PCI_ROM_RESOURCE; ++BarNumber) { + BarResource = &PciDev->resource[BarNumber]; + iSeries_IoMmTable_AllocateEntry(PciDev, BarNumber); + } +} + +/************************************************************************/ +/* Translates the IoAddress to the device that is mapped to IoSpace. */ +/* This code is inlined, see the iSeries_pci.c file for the replacement.*/ +/************************************************************************/ +struct iSeries_Device_Node* iSeries_xlateIoMmAddress(void* IoAddress) +{ + return NULL; +} + +/************************************************************************ + * Status hook for IoMmTable + ************************************************************************/ +void iSeries_IoMmTable_Status(void) +{ + PCIFR("IoMmTable......: 0x%p",iSeries_IoMmTable); + PCIFR("IoMmTable Range: 0x%p to 0x%p",iSeries_Base_Io_Memory,iSeries_Max_Io_Memory); + return; +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/iSeries_IoMmTable.h linuxppc64_2_4/arch/ppc64/kernel/iSeries_IoMmTable.h --- ../kernel.org/linux/arch/ppc64/kernel/iSeries_IoMmTable.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/iSeries_IoMmTable.h Fri Nov 30 16:53:36 2001 @@ -0,0 +1,85 @@ +#ifndef _ISERIES_IOMMTABLE_H +#define _ISERIES_IOMMTABLE_H +/************************************************************************/ +/* File iSeries_IoMmTable.h created by Allan Trautman on Dec 12 2001. */ +/************************************************************************/ +/* Interfaces for the write/read Io address translation table. */ +/* Copyright (C) 20yy Allan H Trautman, IBM Corporation */ +/* */ +/* This program is free software; you can redistribute it and/or modify */ +/* it under the terms of the GNU General Public License as published by */ +/* the Free Software Foundation; either version 2 of the License, or */ +/* (at your option) any later version. */ +/* */ +/* This program is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ +/* GNU General Public License for more details. */ +/* */ +/* You should have received a copy of the GNU General Public License */ +/* along with this program; if not, write to the: */ +/* Free Software Foundation, Inc., */ +/* 59 Temple Place, Suite 330, */ +/* Boston, MA 02111-1307 USA */ +/************************************************************************/ +/* Change Activity: */ +/* Created December 12, 2000 */ +/* Ported to ppc64, August 30, 2001 */ +/* End Change Activity */ +/************************************************************************/ + +struct pci_dev; +struct iSeries_Device_Node; + +extern struct iSeries_Device_Node** iSeries_IoMmTable; +extern u8* iSeries_IoBarTable; +extern unsigned long iSeries_Base_Io_Memory; +extern unsigned long iSeries_Max_Io_Memory; +extern unsigned long iSeries_Base_Io_Memory; +extern unsigned long iSeries_IoMmTable_Entry_Size; +/************************************************************************/ +/* iSeries_IoMmTable_Initialize */ +/************************************************************************/ +/* - Initalizes the Address Translation Table and get it ready for use. */ +/* Must be called before any client calls any of the other methods. */ +/* */ +/* Parameters: None. */ +/* */ +/* Return: None. */ +/************************************************************************/ +extern void iSeries_IoMmTable_Initialize(void); +extern void iSeries_IoMmTable_Status(void); + +/************************************************************************/ +/* iSeries_allocateDeviceBars */ +/************************************************************************/ +/* - Allocates ALL pci_dev BAR's and updates the resources with the BAR */ +/* value. BARS with zero length will not have the resources. The */ +/* HvCallPci_getBarParms is used to get the size of the BAR space. */ +/* It calls iSeries_IoMmTable_AllocateEntry to allocate each entry. */ +/* */ +/* Parameters: */ +/* pci_dev = Pointer to pci_dev structure that will be mapped to pseudo */ +/* I/O Address. */ +/* */ +/* Return: */ +/* The pci_dev I/O resources updated with pseudo I/O Addresses. */ +/************************************************************************/ +extern void iSeries_allocateDeviceBars(struct pci_dev* ); + +/************************************************************************/ +/* iSeries_xlateIoMmAddress */ +/************************************************************************/ +/* - Translates an I/O Memory address to Device Node that has been the */ +/* allocated the psuedo I/O Address. */ +/* */ +/* Parameters: */ +/* IoAddress = I/O Memory Address. */ +/* */ +/* Return: */ +/* An iSeries_Device_Node to the device mapped to the I/O address. The*/ +/* BarNumber and BarOffset are valid if the Device Node is returned. */ +/************************************************************************/ +extern struct iSeries_Device_Node* iSeries_xlateIoMmAddress(void* IoAddress); + +#endif /* _ISERIES_IOMMTABLE_H */ diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/iSeries_VpdInfo.c linuxppc64_2_4/arch/ppc64/kernel/iSeries_VpdInfo.c --- ../kernel.org/linux/arch/ppc64/kernel/iSeries_VpdInfo.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/iSeries_VpdInfo.c Thu Nov 15 13:50:11 2001 @@ -0,0 +1,318 @@ +/************************************************************************/ +/* File iSeries_vpdInfo.c created by Allan Trautman on Fri Feb 2 2001. */ +/************************************************************************/ +/* This code gets the card location of the hardware */ +/* Copyright (C) 20yy */ +/* */ +/* This program is free software; you can redistribute it and/or modify */ +/* it under the terms of the GNU General Public License as published by */ +/* the Free Software Foundation; either version 2 of the License, or */ +/* (at your option) any later version. */ +/* */ +/* This program is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ +/* GNU General Public License for more details. */ +/* */ +/* You should have received a copy of the GNU General Public License */ +/* along with this program; if not, write to the: */ +/* Free Software Foundation, Inc., */ +/* 59 Temple Place, Suite 330, */ +/* Boston, MA 02111-1307 USA */ +/************************************************************************/ +/* Change Activity: */ +/* Created, Feb 2, 2001 */ +/* Ported to ppc64, August 20, 2001 */ +/* End Change Activity */ +/************************************************************************/ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +//#include +#include +#include "pci.h" + +/************************************************/ +/* Size of Bus VPD data */ +/************************************************/ +#define BUS_VPDSIZE 1024 +/************************************************/ +/* Bus Vpd Tags */ +/************************************************/ +#define VpdEndOfDataTag 0x78 +#define VpdEndOfAreaTag 0x79 +#define VpdIdStringTag 0x82 +#define VpdVendorAreaTag 0x84 +/************************************************/ +/* Mfg Area Tags */ +/************************************************/ +#define VpdFruFlag 0x4647 // "FG" +#define VpdFruFrameId 0x4649 // "FI" +#define VpdSlotMapFormat 0x4D46 // "MF" +#define VpdAsmPartNumber 0x504E // "PN" +#define VpdFruSerial 0x534E // "SN" +#define VpdSlotMap 0x534D // "SM" + +/************************************************/ +/* Structures of the areas */ +/************************************************/ +struct MfgVpdAreaStruct { + u16 Tag; + u8 TagLength; + u8 AreaData1; + u8 AreaData2; +}; +typedef struct MfgVpdAreaStruct MfgArea; +#define MFG_ENTRY_SIZE 3 + +struct SlotMapStruct { + u8 AgentId; + u8 SecondaryAgentId; + u8 PhbId; + char CardLocation[3]; + char Parms[8]; + char Reserved[2]; +}; +typedef struct SlotMapStruct SlotMap; +#define SLOT_ENTRY_SIZE 16 + +/**************************************************************** + * * + * Bus, Card, Board, FrameId, CardLocation. * + ****************************************************************/ +LocationData* iSeries_GetLocationData(struct pci_dev* PciDev) +{ + struct iSeries_Device_Node* DevNode = (struct iSeries_Device_Node*)PciDev->sysdata; + LocationData* LocationPtr = (LocationData*)kmalloc(LOCATION_DATA_SIZE, GFP_KERNEL); + if (LocationPtr == NULL) { + printk("PCI: LocationData area allocation failed!\n"); + return NULL; + } + memset(LocationPtr,0,LOCATION_DATA_SIZE); + LocationPtr->Bus = ISERIES_BUS(DevNode); + LocationPtr->Board = DevNode->Board; + LocationPtr->FrameId = DevNode->FrameId; + LocationPtr->Card = PCI_SLOT(DevNode->DevFn); + strcpy(&LocationPtr->CardLocation[0],&DevNode->CardLocation[0]); + return LocationPtr; +} + +/************************************************************************/ +/* Formats the device information. */ +/* - Pass in pci_dev* pointer to the device. */ +/* - Pass in buffer to place the data. Danger here is the buffer must */ +/* be as big as the client says it is. Should be at least 128 bytes.*/ +/* Return will the length of the string data put in the buffer. */ +/* Format: */ +/* PCI: Bus 0, Device 26, Vendor 0x12AE Frame 1, Card C10 Ethernet */ +/* controller */ +/************************************************************************/ +int iSeries_Device_Information(struct pci_dev* PciDev,char* Buffer, int BufferSize) +{ + struct iSeries_Device_Node* DevNode = (struct iSeries_Device_Node*)PciDev->sysdata; + char* BufPtr = Buffer; + int LineLen = 0; + + if (DevNode == NULL) { + LineLen = sprintf(BufPtr+LineLen, "PCI: iSeries_Device_Information DevNode is NULL"); + return LineLen; + } + + if (BufferSize >= 128) { + LineLen = sprintf(BufPtr+LineLen,"PCI: Bus%3d, Device%3d, Vendor %04X ", + ISERIES_BUS(DevNode), PCI_SLOT(PciDev->devfn),PciDev->vendor); + + LineLen += sprintf(BufPtr+LineLen,"Frame%3d, Card %4s ", DevNode->FrameId,DevNode->CardLocation); + + if (pci_class_name(PciDev->class >> 8) == 0) { + LineLen += sprintf(BufPtr+LineLen,"0x%04X ",(int)(PciDev->class >> 8)); + } + else { + LineLen += sprintf(BufPtr+LineLen,"%s",pci_class_name(PciDev->class >> 8) ); + } + } + return LineLen; +} +/************************************************************************/ +/* Build a character string of the device location, Frame 1, Card C10 */ +/************************************************************************/ +int device_Location(struct pci_dev* PciDev,char* BufPtr) +{ + struct iSeries_Device_Node* DevNode = (struct iSeries_Device_Node*)PciDev->sysdata; + return sprintf(BufPtr,"PCI: Bus%3d, Device%3d, Vendor %04X, Location %s", + DevNode->DsaAddr.busNumber, + DevNode->AgentId, + DevNode->Vendor, + DevNode->Location); +} + +/*****************************************************************/ +/* Parse the Slot Area */ +/*****************************************************************/ +void iSeries_Parse_SlotArea(SlotMap* MapPtr,int MapLen, struct iSeries_Device_Node* DevNode) +{ + int SlotMapLen = MapLen; + SlotMap* SlotMapPtr = MapPtr; + /*************************************************************/ + /* Parse Slot label until we find the one requrested */ + /*************************************************************/ + while (SlotMapLen > 0) { + if (SlotMapPtr->AgentId == DevNode->AgentId ) { + /*******************************************************/ + /* If Phb wasn't found, grab the entry first one found.*/ + /*******************************************************/ + if (DevNode->PhbId == 0xff) { + DevNode->PhbId = SlotMapPtr->PhbId; + } + /**************************************************/ + /* Found it, extract the data. */ + /**************************************************/ + if (SlotMapPtr->PhbId == DevNode->PhbId ) { + memcpy(&DevNode->CardLocation,&SlotMapPtr->CardLocation,3); + DevNode->CardLocation[3] = 0; + break; + } + } + /*********************************************************/ + /* Point to the next Slot */ + /*********************************************************/ + SlotMapPtr = (SlotMap*)((char*)SlotMapPtr+SLOT_ENTRY_SIZE); + SlotMapLen -= SLOT_ENTRY_SIZE; + } +} + +/*****************************************************************/ +/* Parse the Mfg Area */ +/*****************************************************************/ +static void iSeries_Parse_MfgArea(u8* AreaData,int AreaLen, struct iSeries_Device_Node* DevNode) +{ + MfgArea* MfgAreaPtr = (MfgArea*)AreaData; + int MfgAreaLen = AreaLen; + u16 SlotMapFmt = 0; + + /*************************************************************/ + /* Parse Mfg Data */ + /*************************************************************/ + while (MfgAreaLen > 0) { + int MfgTagLen = MfgAreaPtr->TagLength; + /*******************************************************/ + /* Frame ID (FI 4649020310 ) */ + /*******************************************************/ + if (MfgAreaPtr->Tag == VpdFruFrameId) { /* FI */ + DevNode->FrameId = MfgAreaPtr->AreaData1; + } + /*******************************************************/ + /* Slot Map Format (MF 4D46020004 ) */ + /*******************************************************/ + else if (MfgAreaPtr->Tag == VpdSlotMapFormat){ /* MF */ + SlotMapFmt = (MfgAreaPtr->AreaData1*256)+(MfgAreaPtr->AreaData2); + } + /*******************************************************/ + /* Slot Map (SM 534D90 */ + /*******************************************************/ + else if (MfgAreaPtr->Tag == VpdSlotMap){ /* SM */ + SlotMap* SlotMapPtr; + if (SlotMapFmt == 0x1004) SlotMapPtr = (SlotMap*)((char*)MfgAreaPtr+MFG_ENTRY_SIZE+1); + else SlotMapPtr = (SlotMap*)((char*)MfgAreaPtr+MFG_ENTRY_SIZE); + iSeries_Parse_SlotArea(SlotMapPtr,MfgTagLen, DevNode); + } + /*********************************************************/ + /* Point to the next Mfg Area */ + /* Use defined size, sizeof give wrong answer */ + /*********************************************************/ + MfgAreaPtr = (MfgArea*)((char*)MfgAreaPtr + MfgTagLen + MFG_ENTRY_SIZE); + MfgAreaLen -= (MfgTagLen + MFG_ENTRY_SIZE); + } +} + +/*****************************************************************/ +/* Look for "BUS".. Data is not Null terminated. */ +/* PHBID of 0xFF indicates PHB was not found in VPD Data. */ +/*****************************************************************/ +static int iSeries_Parse_PhbId(u8* AreaPtr,int AreaLength) +{ + u8* PhbPtr = AreaPtr; + int DataLen = AreaLength; + char PhbId = 0xFF; + while (DataLen > 0) { + if (*PhbPtr == 'B' && *(PhbPtr+1) == 'U' && *(PhbPtr+2) == 'S') { + PhbPtr += 3; + while(*PhbPtr == ' ') ++PhbPtr; + PhbId = (*PhbPtr & 0x0F); + break; + } + ++PhbPtr; + --DataLen; + } + return PhbId; +} + +/****************************************************************/ +/* Parse out the VPD Areas */ +/****************************************************************/ +static void iSeries_Parse_Vpd(u8* VpdData, int VpdDataLen, struct iSeries_Device_Node* DevNode) +{ + u8* TagPtr = VpdData; + int DataLen = VpdDataLen-3; + /*************************************************************/ + /* Parse the Areas */ + /*************************************************************/ + while (*TagPtr != VpdEndOfAreaTag && DataLen > 0) { + int AreaLen = *(TagPtr+1) + (*(TagPtr+2)*256); + u8* AreaData = TagPtr+3; + + if (*TagPtr == VpdIdStringTag) { + DevNode->PhbId = iSeries_Parse_PhbId(AreaData,AreaLen); + } + else if (*TagPtr == VpdVendorAreaTag) { + iSeries_Parse_MfgArea(AreaData,AreaLen,DevNode); + } + /********************************************************* + * Point to next Area. + *********************************************************/ + TagPtr = AreaData + AreaLen; + DataLen -= AreaLen; + } +} + +/**************************************************************** + * iSeries_Get_Location_Code(struct iSeries_Device_Node*) * + * + ****************************************************************/ +void iSeries_Get_Location_Code(struct iSeries_Device_Node* DevNode) +{ + int BusVpdLen = 0; + u8* BusVpdPtr = (u8*)kmalloc(BUS_VPDSIZE, GFP_KERNEL); + if (BusVpdPtr == NULL) { + printk("PCI: Bus VPD Buffer allocation failure.\n"); + return; + } + BusVpdLen = HvCallPci_getBusVpd(ISERIES_BUS(DevNode),REALADDR(BusVpdPtr),BUS_VPDSIZE); + if (BusVpdLen == 0) { + kfree(BusVpdPtr); + printk("PCI: Bus VPD Buffer zero length.\n"); + return; + } + //printk("PCI: BusVpdPtr: %p, %d\n",BusVpdPtr, BusVpdLen); + /*************************************************************/ + /* Make sure this is what I think it is */ + /*************************************************************/ + if (*BusVpdPtr != VpdIdStringTag) { /*0x82 */ + printk("PCI: Bus VPD Buffer missing starting tag.\n"); + kfree(BusVpdPtr); + return; + } + /***************************************************************/ + /***************************************************************/ + iSeries_Parse_Vpd(BusVpdPtr,BusVpdLen, DevNode); + sprintf(DevNode->Location,"Frame%3d, Card %-4s",DevNode->FrameId,DevNode->CardLocation); + kfree(BusVpdPtr); +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/iSeries_irq.c linuxppc64_2_4/arch/ppc64/kernel/iSeries_irq.c --- ../kernel.org/linux/arch/ppc64/kernel/iSeries_irq.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/iSeries_irq.c Sat Oct 20 06:53:25 2001 @@ -0,0 +1,260 @@ +/************************************************************************/ +/* This module supports the iSeries PCI bus interrupt handling */ +/* Copyright (C) 20yy */ +/* */ +/* This program is free software; you can redistribute it and/or modify */ +/* it under the terms of the GNU General Public License as published by */ +/* the Free Software Foundation; either version 2 of the License, or */ +/* (at your option) any later version. */ +/* */ +/* This program is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ +/* GNU General Public License for more details. */ +/* */ +/* You should have received a copy of the GNU General Public License */ +/* along with this program; if not, write to the: */ +/* Free Software Foundation, Inc., */ +/* 59 Temple Place, Suite 330, */ +/* Boston, MA 02111-1307 USA */ +/************************************************************************/ +/* Change Activity: */ +/* Created, December 13, 2000 by Wayne Holm */ +/* End Change Activity */ +/************************************************************************/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + + +hw_irq_controller iSeries_IRQ_handler = { + "iSeries irq controller", + iSeries_startup_IRQ, /* startup */ + iSeries_shutdown_IRQ, /* shutdown */ + iSeries_enable_IRQ, /* enable */ + iSeries_disable_IRQ, /* disable */ + NULL, /* ack */ + iSeries_end_IRQ, /* end */ + NULL /* set_affinity */ +}; + + +struct iSeries_irqEntry { + u32 dsa; + struct iSeries_irqEntry* next; +}; + +struct iSeries_irqAnchor { + u8 valid : 1; + u8 reserved : 7; + u16 entryCount; + struct iSeries_irqEntry* head; +}; + +struct iSeries_irqAnchor iSeries_irqMap[NR_IRQS]; + +void iSeries_init_irqMap(int irq); + +/* This is called by init_IRQ. set in ppc_md.init_IRQ by iSeries_setup.c */ +void __init iSeries_init_IRQ(void) +{ + int i; + for (i = 0; i < NR_IRQS; i++) { + irq_desc[i].handler = &iSeries_IRQ_handler; + irq_desc[i].status = 0; + irq_desc[i].status |= IRQ_DISABLED; + irq_desc[i].depth = 1; + iSeries_init_irqMap(i); + } + /* Register PCI event handler and open an event path */ + PPCDBG(PPCDBG_BUSWALK,"Register PCI event handler and open an event path\n"); + XmPciLpEvent_init(); + return; +} + +/********************************************************************** + * Called by iSeries_init_IRQ + * Prevent IRQs 0 and 255 from being used. IRQ 0 appears in + * uninitialized devices. IRQ 255 appears in the PCI interrupt + * line register if a PCI error occurs, + *********************************************************************/ +void __init iSeries_init_irqMap(int irq) +{ + iSeries_irqMap[irq].valid = (irq == 0 || irq == 255)? 0 : 1; + iSeries_irqMap[irq].entryCount = 0; + iSeries_irqMap[irq].head = NULL; +} + +/* This is called out of iSeries_scan_slot to allocate an IRQ for an EADS slot */ +/* It calculates the irq value for the slot. */ +int __init iSeries_allocate_IRQ(HvBusNumber busNumber, HvSubBusNumber subBusNumber, HvAgentId deviceId) +{ + u8 idsel = (deviceId >> 4); + u8 function = deviceId & 0x0F; + int irq = ((((busNumber-1)*16 + (idsel-1)*8 + function)*9/8) % 254) + 1; + return irq; +} + +/* This is called out of iSeries_scan_slot to assign the EADS slot to its IRQ number */ +int __init iSeries_assign_IRQ(int irq, HvBusNumber busNumber, HvSubBusNumber subBusNumber, HvAgentId deviceId) +{ + int rc; + u32 dsa = (busNumber << 16) | (subBusNumber << 8) | deviceId; + struct iSeries_irqEntry* newEntry; + unsigned long flags; + + if (irq < 0 || irq >= NR_IRQS) { + return -1; + } + newEntry = kmalloc(sizeof(*newEntry), GFP_KERNEL); + if (newEntry == NULL) { + return -ENOMEM; + } + newEntry->dsa = dsa; + newEntry->next = NULL; + /******************************************************************** + * Probably not necessary to lock the irq since allocation is only + * done during buswalk, but it should not hurt anything except a + * little performance to be smp safe. + *******************************************************************/ + spin_lock_irqsave(&irq_desc[irq].lock, flags); + + if (iSeries_irqMap[irq].valid) { + /* Push the new element onto the irq stack */ + newEntry->next = iSeries_irqMap[irq].head; + iSeries_irqMap[irq].head = newEntry; + ++iSeries_irqMap[irq].entryCount; + rc = 0; + PPCDBG(PPCDBG_BUSWALK,"iSeries_assign_IRQ 0x%04X.%02X.%02X = 0x%04X\n",busNumber, subBusNumber, deviceId, irq); + } + else { + printk("PCI: Something is wrong with the iSeries_irqMap. \n"); + kfree(newEntry); + rc = -1; + } + spin_unlock_irqrestore(&irq_desc[irq].lock, flags); + return rc; +} + + +/* This is called by iSeries_activate_IRQs */ +unsigned int iSeries_startup_IRQ(unsigned int irq) +{ + struct iSeries_irqEntry* entry; + u32 bus, subBus, deviceId, function, mask; + for(entry=iSeries_irqMap[irq].head; entry!=NULL; entry=entry->next) { + bus = (entry->dsa >> 16) & 0xFFFF; + subBus = (entry->dsa >> 8) & 0xFF; + deviceId = entry->dsa & 0xFF; + function = deviceId & 0x0F; + /* Link the IRQ number to the bridge */ + HvCallXm_connectBusUnit(bus, subBus, deviceId, irq); + /* Unmask bridge interrupts in the FISR */ + mask = 0x01010000 << function; + HvCallPci_unmaskFisr(bus, subBus, deviceId, mask); + PPCDBG(PPCDBG_BUSWALK,"iSeries_activate_IRQ 0x%02X.%02X.%02X Irq:0x%02X\n",bus,subBus,deviceId,irq); + } + return 0; +} + +/* This is called out of iSeries_fixup to activate interrupt + * generation for usable slots */ +void __init iSeries_activate_IRQs() +{ + int irq; + unsigned long flags; + for (irq=0; irq < NR_IRQS; irq++) { + spin_lock_irqsave(&irq_desc[irq].lock, flags); + irq_desc[irq].handler->startup(irq); + spin_unlock_irqrestore(&irq_desc[irq].lock, flags); + } +} + +/* this is not called anywhere currently */ +void iSeries_shutdown_IRQ(unsigned int irq) { + struct iSeries_irqEntry* entry; + u32 bus, subBus, deviceId, function, mask; + + /* irq should be locked by the caller */ + + for (entry=iSeries_irqMap[irq].head; entry; entry=entry->next) { + bus = (entry->dsa >> 16) & 0xFFFF; + subBus = (entry->dsa >> 8) & 0xFF; + deviceId = entry->dsa & 0xFF; + function = deviceId & 0x0F; + /* Invalidate the IRQ number in the bridge */ + HvCallXm_connectBusUnit(bus, subBus, deviceId, 0); + /* Mask bridge interrupts in the FISR */ + mask = 0x01010000 << function; + HvCallPci_maskFisr(bus, subBus, deviceId, mask); + } + +} + +/*********************************************************** + * This will be called by device drivers (via disable_IRQ) + * to disable INTA in the bridge interrupt status register. + ***********************************************************/ +void iSeries_disable_IRQ(unsigned int irq) +{ + struct iSeries_irqEntry* entry; + u32 bus, subBus, deviceId, mask; + + /* The IRQ has already been locked by the caller */ + + for (entry=iSeries_irqMap[irq].head; entry; entry=entry->next) { + bus = (entry->dsa >> 16) & 0xFFFF; + subBus = (entry->dsa >> 8) & 0xFF; + deviceId = entry->dsa & 0xFF; + /* Mask secondary INTA */ + mask = 0x80000000; + HvCallPci_maskInterrupts(bus, subBus, deviceId, mask); + PPCDBG(PPCDBG_BUSWALK,"iSeries_disable_IRQ 0x%02X.%02X.%02X 0x%04X\n",bus,subBus,deviceId,irq); + } +} + +/*********************************************************** + * This will be called by device drivers (via enable_IRQ) + * to enable INTA in the bridge interrupt status register. + ***********************************************************/ +void iSeries_enable_IRQ(unsigned int irq) +{ + struct iSeries_irqEntry* entry; + u32 bus, subBus, deviceId, mask; + + /* The IRQ has already been locked by the caller */ + for (entry=iSeries_irqMap[irq].head; entry; entry=entry->next) { + bus = (entry->dsa >> 16) & 0xFFFF; + subBus = (entry->dsa >> 8) & 0xFF; + deviceId = entry->dsa & 0xFF; + /* Unmask secondary INTA */ + mask = 0x80000000; + HvCallPci_unmaskInterrupts(bus, subBus, deviceId, mask); + PPCDBG(PPCDBG_BUSWALK,"iSeries_enable_IRQ 0x%02X.%02X.%02X 0x%04X\n",bus,subBus,deviceId,irq); + } +} + +/* Need to define this so ppc_irq_dispatch_handler will NOT call + enable_IRQ at the end of interrupt handling. However, this + does nothing because there is not enough information provided + to do the EOI HvCall. This is done by XmPciLpEvent.c */ +void iSeries_end_IRQ(unsigned int irq) +{ +} + diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/iSeries_pci.c linuxppc64_2_4/arch/ppc64/kernel/iSeries_pci.c --- ../kernel.org/linux/arch/ppc64/kernel/iSeries_pci.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/iSeries_pci.c Fri Nov 30 16:53:36 2001 @@ -0,0 +1,938 @@ +/* + * iSeries_pci.c + * + * Copyright (C) 2001 Allan Trautman, IBM Corporation + * + * iSeries specific routines for PCI. + * + * Based on code from pci.c and iSeries_pci.c 32bit + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "iSeries_IoMmTable.h" +#include "pci.h" + +extern struct pci_controller* hose_head; +extern struct pci_controller** hose_tail; +extern int global_phb_number; +extern int panic_timeout; + +extern struct Naca *naca; +extern struct device_node *allnodes; +extern unsigned long phb_tce_table_init(struct pci_controller *phb); +extern unsigned long iSeries_Base_Io_Memory; + +extern struct pci_ops iSeries_pci_ops; +extern struct flightRecorder* PciFr; +extern struct TceTable* tceTables[256]; + +/******************************************************************* + * Counters and control flags. + *******************************************************************/ +extern long Pci_Io_Read_Count; +extern long Pci_Io_Write_Count; +extern long Pci_Cfg_Read_Count; +extern long Pci_Cfg_Write_Count; +extern long Pci_Error_Count; + +extern int Pci_Retry_Max; +extern int Pci_Error_Flag; +extern int Pci_Trace_Flag; + +extern void iSeries_MmIoTest(void); + + +/******************************************************************* + * Forward declares of prototypes. + *******************************************************************/ +struct iSeries_Device_Node* find_Device_Node(struct pci_dev* PciDev); +struct iSeries_Device_Node* get_Device_Node(struct pci_dev* PciDev); + +unsigned long find_and_init_phbs(void); +void fixup_resources(struct pci_dev *dev); +void iSeries_pcibios_fixup(void); +struct pci_controller* alloc_phb(struct device_node *dev, char *model, unsigned int addr_size_words) ; + +void iSeries_Scan_PHBs_Slots(struct pci_controller* Phb); +void iSeries_Scan_EADs_Bridge(HvBusNumber Bus, HvSubBusNumber SubBus, int IdSel); +int iSeries_Scan_Bridge_Slot(HvBusNumber Bus, HvSubBusNumber SubBus, int MaxAgents); +void list_device_nodes(void); + +struct pci_dev; + +LIST_HEAD(Global_Device_List); + +int DeviceCount = 0; + +/********************************************************************************** + * Log Error infor in Flight Recorder to system Console. + * Filter out the device not there errors. + * PCI: EADs Connect Failed 0x18.58.10 Rc: 0x00xx + * PCI: Read Vendor Failed 0x18.58.10 Rc: 0x00xx + * PCI: Connect Bus Unit Failed 0x18.58.10 Rc: 0x00xx + **********************************************************************************/ +void pci_Log_Error(char* Error_Text, int Bus, int SubBus, int AgentId, int HvRc) +{ + if( HvRc != 0x0302) { + char ErrorString[128]; + sprintf(ErrorString,"%s Failed: 0x%02X.%02X.%02X Rc: 0x%04X",Error_Text,Bus,SubBus,AgentId,HvRc); + PCIFR(ErrorString); + printk("PCI: %s\n",ErrorString); + } +} + +/********************************************************************************** + * Dump the iSeries Temp Device Node + *<4>buswalk [swapper : - DeviceNode: 0xC000000000634300 + *<4>00. Device Node = 0xC000000000634300 + *<4> - PciDev = 0x0000000000000000 + *<4> - tDevice = 0x 17:01.00 0x1022 00 + *<4> 4. Device Node = 0xC000000000634480 + *<4> - PciDev = 0x0000000000000000 + *<4> - Device = 0x 18:38.16 Irq:0xA7 Vendor:0x1014 Flags:0x00 + *<4> - Devfn = 0xB0: 22.18 + **********************************************************************************/ +void dumpDevice_Node(struct iSeries_Device_Node* DevNode) +{ + udbg_printf("Device Node = 0x%p\n",DevNode); + udbg_printf(" - PciDev = 0x%p\n",DevNode->PciDev); + udbg_printf(" - Device = 0x%4X:%02X.%02X (0x%02X)\n", + ISERIES_BUS(DevNode), + ISERIES_SUBBUS(DevNode), + DevNode->AgentId, + DevNode->DevFn); + udbg_printf(" - DSA = 0x%04X\n",ISERIES_DSA(DevNode)>>32 ); + + udbg_printf(" = Irq:0x%02X Vendor:0x%04X Flags:0x%02X\n", + DevNode->Irq, + DevNode->Vendor, + DevNode->Flags ); + udbg_printf(" - Location = %s\n",DevNode->CardLocation); +} +/********************************************************************************** + * Walk down the device node chain + **********************************************************************************/ +void list_device_nodes(void) +{ + struct list_head* Device_Node_Ptr = Global_Device_List.next; + while(Device_Node_Ptr != &Global_Device_List) { + dumpDevice_Node( (struct iSeries_Device_Node*)Device_Node_Ptr ); + Device_Node_Ptr = Device_Node_Ptr->next; + } +} + + +/*********************************************************************** + * build_device_node(u16 Bus, int SubBus, u8 DevFn) + * + ***********************************************************************/ +struct iSeries_Device_Node* build_device_node(HvBusNumber Bus, HvSubBusNumber SubBus, int AgentId, int Function) +{ + struct iSeries_Device_Node* DeviceNode; + + PPCDBG(PPCDBG_BUSWALK,"- "__FUNCTION__" 0x%02X.%02X.%02X Function: %02X\n",Bus,SubBus,AgentId, Function); + + DeviceNode = kmalloc(sizeof(struct iSeries_Device_Node), GFP_KERNEL); + if(DeviceNode == NULL) return NULL; + + memset(DeviceNode,0,sizeof(struct iSeries_Device_Node) ); + list_add_tail(&DeviceNode->Device_List,&Global_Device_List); + /*DeviceNode->DsaAddr = ((u64)Bus<<48)+((u64)SubBus<<40)+((u64)0x10<<32); */ + ISERIES_BUS(DeviceNode) = Bus; + ISERIES_SUBBUS(DeviceNode) = SubBus; + DeviceNode->DsaAddr.deviceId = 0x10; + DeviceNode->DsaAddr.barNumber = 0; + DeviceNode->AgentId = AgentId; + DeviceNode->DevFn = PCI_DEVFN(ISERIES_ENCODE_DEVICE(AgentId),Function ); + DeviceNode->IoRetry = 0; + iSeries_Get_Location_Code(DeviceNode); + PCIFR("Device 0x%02X.%2X, Node:0x%p ",ISERIES_BUS(DeviceNode),ISERIES_DEVFUN(DeviceNode),DeviceNode); + return DeviceNode; +} +/**************************************************************************** +* +* Allocate pci_controller(phb) initialized common variables. +* +*****************************************************************************/ +struct pci_controller* pci_alloc_pci_controllerX(char *model, enum phb_types controller_type) +{ + struct pci_controller *hose; + hose = (struct pci_controller*)kmalloc(sizeof(struct pci_controller), GFP_KERNEL); + if(hose == NULL) return NULL; + + memset(hose, 0, sizeof(struct pci_controller)); + if(strlen(model) < 8) strcpy(hose->what,model); + else memcpy(hose->what,model,7); + hose->type = controller_type; + hose->global_number = global_phb_number; + global_phb_number++; + + *hose_tail = hose; + hose_tail = &hose->next; + return hose; +} + +/**************************************************************************** + * + * unsigned int __init find_and_init_phbs(void) + * + * Description: + * This function checks for all possible system PCI host bridges that connect + * PCI buses. The system hypervisor is queried as to the guest partition + * ownership status. A pci_controller is build for any bus which is partially + * owned or fully owned by this guest partition. + ****************************************************************************/ +unsigned long __init find_and_init_phbs(void) +{ + struct pci_controller* phb; + HvBusNumber BusNumber; + + PPCDBG(PPCDBG_BUSWALK,__FUNCTION__" Entry\n"); + + /* Check all possible buses. */ + for (BusNumber = 0; BusNumber < 256; BusNumber++) { + int RtnCode = HvCallXm_testBus(BusNumber); + if (RtnCode == 0) { + phb = pci_alloc_pci_controllerX("PHB HV", phb_type_hypervisor); + if(phb == NULL) { + printk("PCI: Allocate pci_controller failed.\n"); + PCIFR( "Allocate pci_controller failed."); + return -1; + } + phb->pci_mem_offset = phb->local_number = BusNumber; + phb->first_busno = BusNumber; + phb->last_busno = BusNumber; + phb->ops = &iSeries_pci_ops; + + PPCDBG(PPCDBG_BUSWALK, "PCI:Create iSeries pci_controller(%p), Bus: %04X\n",phb,BusNumber); + PCIFR("Create iSeries PHB controller: %04X",BusNumber); + + /***************************************************/ + /* Find and connect the devices. */ + /***************************************************/ + iSeries_Scan_PHBs_Slots(phb); + } + /* Check for Unexpected Return code, a clue that something */ + /* has gone wrong. */ + else if(RtnCode != 0x0301) { + PCIFR("Unexpected Return on Probe(0x%04X): 0x%04X",BusNumber,RtnCode); + } + + } + return 0; +} +/*********************************************************************** + * ppc64_pcibios_init + * + * Chance to initialize and structures or variable before PCI Bus walk. + * + *<4>buswalk [swapper : iSeries_pcibios_init Entry. + *<4>buswalk [swapper : IoMmTable Initialized 0xC00000000034BD30 + *<4>buswalk [swapper : find_and_init_phbs Entry + *<4>buswalk [swapper : Create iSeries pci_controller:(0xC00000001F5C7000), Bus 0x0017 + *<4>buswalk [swapper : Connect EADs: 0x17.00.12 = 0x00 + *<4>buswalk [swapper : iSeries_assign_IRQ 0x0017.00.12 = 0x0091 + *<4>buswalk [swapper : - allocate and assign IRQ 0x17.00.12 = 0x91 + *<4>buswalk [swapper : - FoundDevice: 0x17.28.10 = 0x12AE + *<4>buswalk [swapper : - build_device_node 0x17.28.12 + *<4>buswalk [swapper : iSeries_pcibios_init Exit. + ***********************************************************************/ +void iSeries_pcibios_init(void) +{ + struct pci_controller *phb; + PPCDBG(PPCDBG_BUSWALK,__FUNCTION__" Entry.\n"); + + iSeries_IoMmTable_Initialize(); + + find_and_init_phbs(); + + /* Create the TCE Tables */ + phb = hose_head; + while(phb != NULL) { + create_pci_bus_tce_table(phb->local_number); + PCIFR("Bus 0x%04X TCE Table %p",phb->local_number,tceTables[phb->local_number] ); + phb = phb->next; + } + + + pci_assign_all_busses = 0; + PPCDBG(PPCDBG_BUSWALK,__FUNCTION__" Exit.\n"); +} + +/*********************************************************************** + * iSeries_pcibios_fixup(void) + ***********************************************************************/ +void __init iSeries_pcibios_fixup(void) +{ + struct pci_dev* PciDev; + struct iSeries_Device_Node* DeviceNode; + char Buffer[256]; + int DeviceCount = 0; + + PPCDBG(PPCDBG_BUSWALK,__FUNCTION__" Entry.\n"); + + /******************************************************/ + /* Fix up at the device node and pci_dev relationship */ + /******************************************************/ + pci_for_each_dev(PciDev) { + DeviceNode = find_Device_Node(PciDev); + if(DeviceNode != NULL) { + ++DeviceCount; + PciDev->sysdata = (void*)DeviceNode; + DeviceNode->PciDev = PciDev; + + PPCDBG(PPCDBG_BUSWALK,"PciDev 0x%p <==> DevNode 0x%p\n",PciDev,DeviceNode ); + + iSeries_allocateDeviceBars(PciDev); + + PPCDBGCALL(PPCDBG_BUSWALK,dumpPci_Dev(PciDev) ); + + iSeries_Device_Information(PciDev,Buffer, sizeof(Buffer) ); + printk("%d. %s\n",DeviceCount,Buffer); + + } else { + printk("PCI: Device Tree not found for 0x%016lX\n",(unsigned long)PciDev); + } + } + iSeries_IoMmTable_Status(); + + iSeries_activate_IRQs(); + + // This is test code. + //mf_displaySrc(0xC9000100); + //Pci_IoTest(); + // Pci_CfgIoTest(); + // mf_displaySrc(0xC9000500); + // Pci_MMIoTest(); + //mf_displaySrc(0xC9000999); +} +/*********************************************************************** + * iSeries_pcibios_fixup_bus(int Bus) + * + ***********************************************************************/ +void iSeries_pcibios_fixup_bus(struct pci_bus* PciBus) +{ + PPCDBG(PPCDBG_BUSWALK,__FUNCTION__"(0x%04X) Entry.\n",PciBus->number); + +} +/*********************************************************************** + * find_floppy(void) + * + * Finds the default floppy device, if the system has one, and returns + * the pci_dev for the isa bridge for the floppy device. + * + * Note: On iSeries there will only be a virtual diskette. + ***********************************************************************/ +struct pci_dev* +find_floppy(void) +{ + PPCDBG(PPCDBG_BUSWALK,"- Find Floppy pci_dev.. None on iSeries.\n"); + return NULL; +} + + +/*********************************************************************** + * fixup_resources(struct pci_dev *dev) + * + ***********************************************************************/ +void fixup_resources(struct pci_dev *PciDev) +{ + PPCDBG(PPCDBG_BUSWALK,__FUNCTION__" PciDev %p\n",PciDev); +} + + +/******************************************************************************** +* Loop through each node function to find usable EADs bridges. +*********************************************************************************/ +void iSeries_Scan_PHBs_Slots(struct pci_controller* Phb) +{ + struct HvCallPci_DeviceInfo* DevInfo; + HvBusNumber Bus = Phb->local_number; /* System Bus */ + HvSubBusNumber SubBus = 0; /* EADs is always 0. */ + int HvRc = 0; + int IdSel = 1; + int MaxAgents = 8; + + DevInfo = (struct HvCallPci_DeviceInfo*)kmalloc(sizeof(struct HvCallPci_DeviceInfo), GFP_KERNEL); + if(DevInfo == NULL) return; + + /******************************************************************************** + * Probe for EADs Bridges + ********************************************************************************/ + for (IdSel=1; IdSel < MaxAgents; ++IdSel) { + HvRc = HvCallPci_getDeviceInfo(Bus, SubBus, IdSel,REALADDR(DevInfo), sizeof(struct HvCallPci_DeviceInfo)); + if (HvRc == 0) { + if(DevInfo->deviceType == HvCallPci_NodeDevice) { + iSeries_Scan_EADs_Bridge(Bus, SubBus, IdSel); + } + else printk("PCI: Invalid System Configuration(0x%02X.\n",DevInfo->deviceType); + } + else pci_Log_Error("getDeviceInfo",Bus, SubBus, IdSel,HvRc); + } + kfree(DevInfo); +} + + +/******************************************************************************** +* +*********************************************************************************/ +void iSeries_Scan_EADs_Bridge(HvBusNumber Bus, HvSubBusNumber SubBus, int IdSel) +{ + struct HvCallPci_BridgeInfo* BridgeInfo; + HvAgentId AgentId; + int Function; + int HvRc; + + BridgeInfo = (struct HvCallPci_BridgeInfo*)kmalloc(sizeof(struct HvCallPci_BridgeInfo), GFP_KERNEL); + if(BridgeInfo == NULL) return; + + /********************************************************************* + * Note: hvSubBus and irq is always be 0 at this level! + *********************************************************************/ + for (Function=0; Function < 8; ++Function) { + AgentId = ISERIES_PCI_AGENTID(IdSel, Function); + HvRc = HvCallXm_connectBusUnit(Bus, SubBus, AgentId, 0); + if (HvRc == 0) { + /* Connect EADs: 0x18.00.12 = 0x00 */ + PPCDBG(PPCDBG_BUSWALK,"PCI:Connect EADs: 0x%02X.%02X.%02X\n",Bus, SubBus, AgentId); + PCIFR( "Connect EADs: 0x%02X.%02X.%02X", Bus, SubBus, AgentId); + HvRc = HvCallPci_getBusUnitInfo(Bus, SubBus, AgentId, + REALADDR(BridgeInfo), sizeof(struct HvCallPci_BridgeInfo)); + if (HvRc == 0) { + PPCDBG(PPCDBG_BUSWALK,"PCI: BridgeInfo, Type: 0x%02X, SubBus 0x%02X, MaxAgents 0x%02X\n", + BridgeInfo->busUnitInfo.deviceType, + BridgeInfo->subBusNumber, + BridgeInfo->maxAgents); + + if (BridgeInfo->busUnitInfo.deviceType == HvCallPci_BridgeDevice) { + /* Scan_Bridge_Slot...: 0x18.00.12 */ + iSeries_Scan_Bridge_Slot(Bus,BridgeInfo->subBusNumber,BridgeInfo->maxAgents); + } + else printk("PCI: Invalid Bridge Configuration(0x%02X)",BridgeInfo->busUnitInfo.deviceType); + } + } + else if(HvRc != 0x000B) pci_Log_Error("EADs Connect",Bus,SubBus,AgentId,HvRc); + } + kfree(BridgeInfo); +} + +/******************************************************************************** +* +* This assumes that the node slot is always on the primary bus! +* +*********************************************************************************/ +int iSeries_Scan_Bridge_Slot(HvBusNumber Bus, HvSubBusNumber SubBus, int MaxAgents) +{ + struct iSeries_Device_Node* DeviceNode; + u16 VendorId = 0; + int HvRc = 0; + int Irq = 0; + int IdSel = ISERIES_GET_DEVICE_FROM_SUBBUS(SubBus); + int Function = ISERIES_GET_FUNCTION_FROM_SUBBUS(SubBus); + HvAgentId AgentId = ISERIES_PCI_AGENTID(IdSel, Function); + HvAgentId EADsIdSel = ISERIES_PCI_AGENTID(IdSel, Function); + int FirstSlotId= 0; + + /**********************************************************/ + /* iSeries_allocate_IRQ.: 0x18.00.12(0xA3) */ + /**********************************************************/ + Irq = iSeries_allocate_IRQ(Bus, 0, AgentId); + iSeries_assign_IRQ(Irq, Bus, 0, AgentId); + PPCDBG(PPCDBG_BUSWALK,"PCI:- allocate and assign IRQ 0x%02X.%02X.%02X = 0x%02X\n",Bus, 0, AgentId, Irq ); + + /**************************************************************************** + * Connect all functions of any device found. + ****************************************************************************/ + for (IdSel = 1; IdSel <= MaxAgents; ++IdSel) { + for (Function = 0; Function < 8; ++Function) { + AgentId = ISERIES_PCI_AGENTID(IdSel, Function); + HvRc = HvCallXm_connectBusUnit(Bus, SubBus, AgentId, Irq); + if( HvRc == 0) { + HvRc = HvCallPci_configLoad16(Bus, SubBus, AgentId, PCI_VENDOR_ID, &VendorId); + if( HvRc == 0) { + /**********************************************************/ + /* FoundDevice: 0x18.28.10 = 0x12AE */ + /**********************************************************/ + HvCallPci_configStore8(Bus, SubBus, AgentId, PCI_INTERRUPT_LINE, Irq); + PPCDBG(PPCDBG_BUSWALK,"PCI:- FoundDevice: 0x%02X.%02X.%02X = 0x%04X\n", + Bus, SubBus, AgentId, VendorId); + ++DeviceCount; + PCIFR("Device(%4d): 0x%02X.%02X.%02X",DeviceCount,Bus, SubBus, AgentId); + DeviceNode = build_device_node(Bus, SubBus, EADsIdSel, Function); + DeviceNode->Vendor = VendorId; + DeviceNode->Irq = Irq; + + /*********************************************************** + * On the first device/function, assign irq to slot + ***********************************************************/ + if(Function == 0) { + FirstSlotId = AgentId; + // AHT iSeries_assign_IRQ(Irq, Bus, SubBus, AgentId); + } + } + else pci_Log_Error("Read Vendor",Bus,SubBus,AgentId,HvRc); + } + else pci_Log_Error("Connect Bus Unit",Bus,SubBus, AgentId,HvRc); + } /* for (Function = 0; Function < 8; ++Function) */ + } /* for (IdSel = 1; IdSel <= MaxAgents; ++IdSel) */ + return HvRc; +} +/************************************************************************/ +/* I/0 Memory copy MUST use mmio commands on iSeries */ +/* To do; For performance, include the hv call directly */ +/************************************************************************/ +void* iSeries_memset(void* dest, char c, size_t Count) +{ + u8 ByteValue = c; + long NumberOfBytes = Count; + char* IoBuffer = dest; + while(NumberOfBytes > 0) { + iSeries_Write_Byte( ByteValue, (void*)IoBuffer ); + ++IoBuffer; + -- NumberOfBytes; + } + return dest; +} +void* iSeries_memcpy_toio(void *dest, void *source, size_t count) +{ + char *dst = dest; + char *src = source; + long NumberOfBytes = count; + while(NumberOfBytes > 0) { + iSeries_Write_Byte(*src++, (void*)dst++); + -- NumberOfBytes; + } + return dest; +} +void* iSeries_memcpy_fromio(void *dest, void *source, size_t count) +{ + char *dst = dest; + char *src = source; + long NumberOfBytes = count; + while(NumberOfBytes > 0) { + *dst++ = iSeries_Read_Byte( (void*)src++); + -- NumberOfBytes; + } + return dest; +} +/********************************************************************************** + * Look down the chain to find the matching Device Device + **********************************************************************************/ +struct iSeries_Device_Node* find_Device_Node(struct pci_dev* PciDev) +{ + struct list_head* Device_Node_Ptr = Global_Device_List.next; + int Bus = PciDev->bus->number; + int DevFn = PciDev->devfn; + + while(Device_Node_Ptr != &Global_Device_List) { + struct iSeries_Device_Node* DevNode = (struct iSeries_Device_Node*)Device_Node_Ptr; + if(Bus == ISERIES_BUS(DevNode) && DevFn == DevNode->DevFn) { + return DevNode; + } + Device_Node_Ptr = Device_Node_Ptr->next; + } + return NULL; +} +/******************************************************************/ +/* Returns the device node for the passed pci_dev */ +/* Sanity Check Node PciDev to passed pci_dev */ +/* If none is found, returns a NULL which the client must handle. */ +/******************************************************************/ +struct iSeries_Device_Node* get_Device_Node(struct pci_dev* PciDev) +{ + struct iSeries_Device_Node* Node; + Node = (struct iSeries_Device_Node*)PciDev->sysdata; + if(Node == NULL ) { + Node = find_Device_Node(PciDev); + } + else if(Node->PciDev != PciDev) { + Node = find_Device_Node(PciDev); + } + return Node; +} +/********************************************************************************** + * + * Read PCI Config Space Code + * + **********************************************************************************/ +/** BYTE *************************************************************************/ +int iSeries_Node_read_config_byte(struct iSeries_Device_Node* DevNode, int Offset, u8* ReadValue) +{ + u8 ReadData; + if(DevNode == NULL) { return 0x301; } + ++Pci_Cfg_Read_Count; + DevNode->ReturnCode = HvCallPci_configLoad8(ISERIES_BUS(DevNode),ISERIES_SUBBUS(DevNode),0x10, + Offset,&ReadData); + if(Pci_Trace_Flag == 1) { + PCIFR("RCB: 0x%04X.%02X 0x%04X = 0x%02X",ISERIES_BUS(DevNode),DevNode->DevFn,Offset,ReadData); + } + if(DevNode->ReturnCode != 0 ) { + printk("PCI: RCB: 0x%04X.%02X Error: 0x%04X\n",ISERIES_BUS(DevNode),DevNode->DevFn,DevNode->ReturnCode); + PCIFR( "RCB: 0x%04X.%02X Error: 0x%04X", ISERIES_BUS(DevNode),DevNode->DevFn,DevNode->ReturnCode); + } + *ReadValue = ReadData; + return DevNode->ReturnCode; +} +/** WORD *************************************************************************/ +int iSeries_Node_read_config_word(struct iSeries_Device_Node* DevNode, int Offset, u16* ReadValue) +{ + u16 ReadData; + if(DevNode == NULL) { return 0x301; } + ++Pci_Cfg_Read_Count; + DevNode->ReturnCode = HvCallPci_configLoad16(ISERIES_BUS(DevNode),ISERIES_SUBBUS(DevNode),0x10, + Offset,&ReadData); + if(Pci_Trace_Flag == 1) { + PCIFR("RCW: 0x%04X.%02X 0x%04X = 0x%04X",ISERIES_BUS(DevNode),DevNode->DevFn,Offset,ReadData); + } + if(DevNode->ReturnCode != 0 ) { + printk("PCI: RCW: 0x%04X.%02X Error: 0x%04X\n",ISERIES_BUS(DevNode),DevNode->DevFn,DevNode->ReturnCode); + PCIFR( "RCW: 0x%04X.%02X Error: 0x%04X", ISERIES_BUS(DevNode),DevNode->DevFn,DevNode->ReturnCode); + + } + *ReadValue = ReadData; + return DevNode->ReturnCode; +} +/** DWORD *************************************************************************/ +int iSeries_Node_read_config_dword(struct iSeries_Device_Node* DevNode, int Offset, u32* ReadValue) +{ + u32 ReadData; + if(DevNode == NULL) { return 0x301; } + ++Pci_Cfg_Read_Count; + DevNode->ReturnCode = HvCallPci_configLoad32(ISERIES_BUS(DevNode),ISERIES_SUBBUS(DevNode),0x10, + Offset,&ReadData); + if(Pci_Trace_Flag == 1) { + PCIFR("RCL: 0x%04X.%02X 0x%04X = 0x%08X",ISERIES_BUS(DevNode),DevNode->DevFn,Offset,ReadData); + } + if(DevNode->ReturnCode != 0 ) { + printk("PCI: RCL: 0x%04X.%02X Error: 0x%04X\n",ISERIES_BUS(DevNode),DevNode->DevFn,DevNode->ReturnCode); + PCIFR( "RCL: 0x%04X.%02X Error: 0x%04X", ISERIES_BUS(DevNode),DevNode->DevFn,DevNode->ReturnCode); + } + *ReadValue = ReadData; + return DevNode->ReturnCode; +} +int iSeries_pci_read_config_byte(struct pci_dev* PciDev, int Offset, u8* ReadValue) { + struct iSeries_Device_Node* DevNode = get_Device_Node(PciDev); + if(DevNode == NULL) return 0x0301; + return iSeries_Node_read_config_byte( DevNode ,Offset,ReadValue); +} +int iSeries_pci_read_config_word(struct pci_dev* PciDev, int Offset, u16* ReadValue) { + struct iSeries_Device_Node* DevNode = get_Device_Node(PciDev); + if(DevNode == NULL) return 0x0301; + return iSeries_Node_read_config_word( DevNode ,Offset,ReadValue ); +} +int iSeries_pci_read_config_dword(struct pci_dev* PciDev, int Offset, u32* ReadValue) { + struct iSeries_Device_Node* DevNode = get_Device_Node(PciDev); + if(DevNode == NULL) return 0x0301; + return iSeries_Node_read_config_dword(DevNode ,Offset,ReadValue ); +} +/**********************************************************************************/ +/* */ +/* Write PCI Config Space */ +/* */ +/** BYTE *************************************************************************/ +int iSeries_Node_write_config_byte(struct iSeries_Device_Node* DevNode, int Offset, u8 WriteData) +{ + ++Pci_Cfg_Write_Count; + DevNode->ReturnCode = HvCallPci_configStore8(ISERIES_BUS(DevNode),ISERIES_SUBBUS(DevNode),0x10, + Offset,WriteData); + if(Pci_Trace_Flag == 1) { + PCIFR("WCB: 0x%04X.%02X 0x%04X = 0x%02X",ISERIES_BUS(DevNode),DevNode->DevFn,Offset,WriteData); + } + if(DevNode->ReturnCode != 0 ) { + printk("PCI: WCB: 0x%04X.%02X Error: 0x%04X\n",ISERIES_BUS(DevNode),DevNode->DevFn,DevNode->ReturnCode); + PCIFR( "WCB: 0x%04X.%02X Error: 0x%04X", ISERIES_BUS(DevNode),DevNode->DevFn,DevNode->ReturnCode); + } + return DevNode->ReturnCode; +} +/** WORD *************************************************************************/ +int iSeries_Node_write_config_word(struct iSeries_Device_Node* DevNode, int Offset, u16 WriteData) +{ + ++Pci_Cfg_Write_Count; + DevNode->ReturnCode = HvCallPci_configStore16(ISERIES_BUS(DevNode),ISERIES_SUBBUS(DevNode),0x10, + Offset,WriteData); + if(Pci_Trace_Flag == 1) { + PCIFR("WCW: 0x%04X.%02X 0x%04X = 0x%04X",ISERIES_BUS(DevNode),DevNode->DevFn,Offset,WriteData); + } + if(DevNode->ReturnCode != 0 ) { + printk("PCI: WCW: 0x%04X.%02X Error: 0x%04X\n",ISERIES_BUS(DevNode),DevNode->DevFn,DevNode->ReturnCode); + PCIFR( "WCW: 0x%04X.%02X Error: 0x%04X", ISERIES_BUS(DevNode),DevNode->DevFn,DevNode->ReturnCode); + } + return DevNode->ReturnCode; +} +/** DWORD *************************************************************************/ +int iSeries_Node_write_config_dword(struct iSeries_Device_Node* DevNode, int Offset, u32 WriteData) +{ + ++Pci_Cfg_Write_Count; + DevNode->ReturnCode = HvCallPci_configStore32(ISERIES_BUS(DevNode),ISERIES_SUBBUS(DevNode),0x10, + Offset,WriteData); + if(Pci_Trace_Flag == 1) { + PCIFR("WCL: 0x%04X.%02X 0x%04X = 0x%08X",ISERIES_BUS(DevNode),DevNode->DevFn,Offset,WriteData); + } + if(DevNode->ReturnCode != 0 ) { + printk("PCI: WCL: 0x%04X.%02X Error: 0x%04X\n",ISERIES_BUS(DevNode),DevNode->DevFn,DevNode->ReturnCode); + PCIFR( "WCL: 0x%04X.%02X Error: 0x%04X", ISERIES_BUS(DevNode),DevNode->DevFn,DevNode->ReturnCode); + } + return DevNode->ReturnCode; +} +int iSeries_pci_write_config_byte( struct pci_dev* PciDev,int Offset, u8 WriteValue) +{ + struct iSeries_Device_Node* DevNode = get_Device_Node(PciDev); + if(DevNode == NULL) return 0x0301; + return iSeries_Node_write_config_byte( DevNode,Offset,WriteValue); +} +int iSeries_pci_write_config_word( struct pci_dev* PciDev,int Offset,u16 WriteValue) +{ + struct iSeries_Device_Node* DevNode = get_Device_Node(PciDev); + if(DevNode == NULL) return 0x0301; + return iSeries_Node_write_config_word( DevNode,Offset,WriteValue); +} +int iSeries_pci_write_config_dword(struct pci_dev* PciDev,int Offset,u32 WriteValue) +{ + struct iSeries_Device_Node* DevNode = get_Device_Node(PciDev); + if(DevNode == NULL) return 0x0301; + return iSeries_Node_write_config_dword(DevNode,Offset,WriteValue); +} + +/************************************************************************/ +/* Branch Table */ +/************************************************************************/ +struct pci_ops iSeries_pci_ops = { + iSeries_pci_read_config_byte, + iSeries_pci_read_config_word, + iSeries_pci_read_config_dword, + iSeries_pci_write_config_byte, + iSeries_pci_write_config_word, + iSeries_pci_write_config_dword +}; + +/************************************************************************ + * Check Return Code + * -> On Failure, print and log information. + * Increment Retry Count, if exceeds max, panic partition. + * -> If in retry, print and log success + ************************************************************************ + * PCI: Device 23.90 ReadL I/O Error( 0): 0x1234 + * PCI: Device 23.90 ReadL Retry( 1) + * PCI: Device 23.90 ReadL Retry Successful(1) + ************************************************************************/ +int CheckReturnCode(char* TextHdr, struct iSeries_Device_Node* DevNode, u64 RtnCode) +{ + if(RtnCode != 0) { + ++Pci_Error_Count; + ++DevNode->IoRetry; + PCIFR( "%s: Device 0x%04X:%02X I/O Error(%2d): 0x%04X", + TextHdr,ISERIES_BUS(DevNode),DevNode->DevFn,DevNode->IoRetry,(int)RtnCode); + printk("PCI: %s: Device 0x%04X:%02X I/O Error(%2d): 0x%04X\n", + TextHdr,ISERIES_BUS(DevNode),DevNode->DevFn,DevNode->IoRetry,(int)RtnCode); + /*******************************************************/ + /* Bump the retry and check for retry count exceeded. */ + /* If, Exceeded, panic the system. */ + /*******************************************************/ + if(DevNode->IoRetry > Pci_Retry_Max && Pci_Error_Flag > 0 ) { + mf_displaySrc(0xB6000103); + panic_timeout = 0; + panic("PCI: Hardware I/O Error, SRC B6000103, Automatic Reboot Disabled.\n"); + } + return -1; /* Retry Try */ + } + /******************************************************************** + * If retry was in progress, log success and rest retry count * + *********************************************************************/ + else if(DevNode->IoRetry > 0) { + PCIFR("%s: Device 0x%04X:%02X Retry Successful(%2d).", + TextHdr,ISERIES_BUS(DevNode),DevNode->DevFn,DevNode->IoRetry); + DevNode->IoRetry = 0; + return 0; + } + return 0; +} +/************************************************************************/ +/* Translate the I/O Address into a device node, bar, and bar offset. */ +/* Note: Make sure the passed variable end up on the stack to avoid */ +/* the exposure of being device global. */ +/************************************************************************/ +static inline struct iSeries_Device_Node* xlateIoMmAddress(void* IoAddress, + union HvDsaMap* DsaPtr, + u64* BarOffsetPtr) { + + unsigned long BaseIoAddr = (unsigned long)IoAddress-iSeries_Base_Io_Memory; + long TableIndex = BaseIoAddr/iSeries_IoMmTable_Entry_Size; + struct iSeries_Device_Node* DevNode = *(iSeries_IoMmTable +TableIndex); + if(DevNode != NULL) { + DsaPtr->DsaAddr = ISERIES_DSA(DevNode); + DsaPtr->Dsa.barNumber = *(iSeries_IoBarTable+TableIndex); + *BarOffsetPtr = BaseIoAddr % iSeries_IoMmTable_Entry_Size; + } + else { + panic("PCI: Invalid PCI IoAddress detected!\n"); + } + return DevNode; +} + +/************************************************************************/ +/* Read MM I/O Instructions for the iSeries */ +/* On MM I/O error, all ones are returned and iSeries_pci_IoError is cal*/ +/* else, data is returned in big Endian format. */ +/************************************************************************/ +/* iSeries_Read_Byte = Read Byte ( 8 bit) */ +/* iSeries_Read_Word = Read Word (16 bit) */ +/* iSeries_Read_Long = Read Long (32 bit) */ +/************************************************************************/ +u8 iSeries_Read_Byte(void* IoAddress) +{ + u64 BarOffset; + union HvDsaMap DsaData; + struct HvCallPci_LoadReturn Return; + struct iSeries_Device_Node* DevNode = xlateIoMmAddress(IoAddress,&DsaData,&BarOffset); + + do { + ++Pci_Io_Read_Count; + HvCall3Ret16(HvCallPciBarLoad8, &Return, DsaData.DsaAddr,BarOffset, 0); + } while (CheckReturnCode("RDB",DevNode, Return.rc) != 0); + + if(Pci_Trace_Flag == 1) PCIFR("RDB: IoAddress 0x%p = 0x%02X",IoAddress, (u8)Return.value); + return (u8)Return.value; +} +u16 iSeries_Read_Word(void* IoAddress) +{ + u64 BarOffset; + union HvDsaMap DsaData; + struct HvCallPci_LoadReturn Return; + struct iSeries_Device_Node* DevNode = xlateIoMmAddress(IoAddress,&DsaData,&BarOffset); + + do { + ++Pci_Io_Read_Count; + HvCall3Ret16(HvCallPciBarLoad16,&Return, DsaData.DsaAddr,BarOffset, 0); + } while (CheckReturnCode("RDW",DevNode, Return.rc) != 0); + + if(Pci_Trace_Flag == 1) PCIFR("RDW: IoAddress 0x%p = 0x%04X",IoAddress, swab16((u16)Return.value)); + return swab16((u16)Return.value); +} +u32 iSeries_Read_Long(void* IoAddress) +{ + u64 BarOffset; + union HvDsaMap DsaData; + struct HvCallPci_LoadReturn Return; + struct iSeries_Device_Node* DevNode = xlateIoMmAddress(IoAddress,&DsaData,&BarOffset); + + do { + ++Pci_Io_Read_Count; + HvCall3Ret16(HvCallPciBarLoad32,&Return, DsaData.DsaAddr,BarOffset, 0); + } while (CheckReturnCode("RDL",DevNode, Return.rc) != 0); + + if(Pci_Trace_Flag == 1) PCIFR("RDL: IoAddress 0x%p = 0x%04X",IoAddress, swab32((u32)Return.value)); + return swab32((u32)Return.value); +} +/************************************************************************/ +/* Write MM I/O Instructions for the iSeries */ +/************************************************************************/ +/* iSeries_Write_Byte = Write Byte (8 bit) */ +/* iSeries_Write_Word = Write Word(16 bit) */ +/* iSeries_Write_Long = Write Long(32 bit) */ +/************************************************************************/ +void iSeries_Write_Byte(u8 Data, void* IoAddress) +{ + u64 BarOffset; + union HvDsaMap DsaData; + struct HvCallPci_LoadReturn Return; + struct iSeries_Device_Node* DevNode = xlateIoMmAddress(IoAddress,&DsaData,&BarOffset); + + do { + ++Pci_Io_Write_Count; + Return.rc = HvCall4(HvCallPciBarStore8, DsaData.DsaAddr,BarOffset, Data, 0); + } while (CheckReturnCode("WWB",DevNode, Return.rc) != 0); + if(Pci_Trace_Flag == 1) PCIFR("WWB: IoAddress 0x%p = 0x%02X",IoAddress,Data); +} +void iSeries_Write_Word(u16 Data, void* IoAddress) +{ + u64 BarOffset; + union HvDsaMap DsaData; + struct HvCallPci_LoadReturn Return; + struct iSeries_Device_Node* DevNode = xlateIoMmAddress(IoAddress,&DsaData,&BarOffset); + + do { + ++Pci_Io_Write_Count; + Return.rc = HvCall4(HvCallPciBarStore16,DsaData.DsaAddr,BarOffset, swab16(Data), 0); + } while (CheckReturnCode("WWW",DevNode, Return.rc) != 0); + if(Pci_Trace_Flag == 1) PCIFR("WWW: IoAddress 0x%p = 0x%04X",IoAddress,Data); +} +void iSeries_Write_Long(u32 Data, void* IoAddress) +{ + u64 BarOffset; + union HvDsaMap DsaData; + struct HvCallPci_LoadReturn Return; + struct iSeries_Device_Node* DevNode = xlateIoMmAddress(IoAddress,&DsaData,&BarOffset); + + do { + ++Pci_Io_Write_Count; + Return.rc = HvCall4(HvCallPciBarStore32,DsaData.DsaAddr,BarOffset, swab32(Data), 0); + } while (CheckReturnCode("WWL",DevNode, Return.rc) != 0); + if(Pci_Trace_Flag == 1) PCIFR("WWL: IoAddress 0x%p = 0x%08X",IoAddress, Data); +} +/* + * This is called very early before the page table is setup. + * There are warnings here because of type mismatches.. Okay for now. AHT + */ +void +iSeries_pcibios_init_early(void) +{ + //ppc_md.pcibios_read_config_byte = iSeries_Node_read_config_byte; + //ppc_md.pcibios_read_config_word = iSeries_Node_read_config_word; + //ppc_md.pcibios_read_config_dword = iSeries_Node_read_config_dword; + //ppc_md.pcibios_write_config_byte = iSeries_Node_write_config_byte; + //ppc_md.pcibios_write_config_word = iSeries_Node_write_config_word; + //ppc_md.pcibios_write_config_dword = iSeries_Node_write_config_dword; +} + +/************************************************************************/ +/* Set the slot reset line to the state passed in. */ +/* This is the platform specific for code for the pci_reset_device */ +/* function. */ +/************************************************************************/ +int pci_set_reset(struct pci_dev* PciDev, int State) { + struct iSeries_Device_Node* DeviceNode = (struct iSeries_Device_Node*)PciDev->sysdata; + if (DeviceNode == NULL) { + printk("PCI: Pci Reset Failed, Device Node not found for pci_dev %p\n",PciDev); + return -1; + } + DeviceNode->ReturnCode = HvCallPci_setSlotReset(ISERIES_BUS(DeviceNode),0x00,DeviceNode->AgentId,State); + return DeviceNode->ReturnCode; +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/iSeries_pci_reset.c linuxppc64_2_4/arch/ppc64/kernel/iSeries_pci_reset.c --- ../kernel.org/linux/arch/ppc64/kernel/iSeries_pci_reset.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/iSeries_pci_reset.c Fri Nov 9 15:49:40 2001 @@ -0,0 +1,89 @@ +/************************************************************************/ +/* File iSeries_pci_reset.c created by Allan Trautman on Mar 21 2001. */ +/************************************************************************/ +/* This code supports the pci interface on the IBM iSeries systems. */ +/* Copyright (C) 20yy */ +/* */ +/* This program is free software; you can redistribute it and/or modify */ +/* it under the terms of the GNU General Public License as published by */ +/* the Free Software Foundation; either version 2 of the License, or */ +/* (at your option) any later version. */ +/* */ +/* This program is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ +/* GNU General Public License for more details. */ +/* */ +/* You should have received a copy of the GNU General Public License */ +/* along with this program; if not, write to the: */ +/* Free Software Foundation, Inc., */ +/* 59 Temple Place, Suite 330, */ +/* Boston, MA 02111-1307 USA */ +/************************************************************************/ +/* Change Activity: */ +/* Created, March 20, 2001 */ +/* April 30, 2001, Added return codes on functions. */ +/* September 10, 2001, Ported to ppc64. */ +/* End Change Activity */ +/************************************************************************/ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include "pci.h" + +/************************************************************************/ +/* Interface to toggle the reset line */ +/* Time is in .1 seconds, need for seconds. */ +/************************************************************************/ +int iSeries_Device_ToggleReset(struct pci_dev* PciDev, int AssertTime, int DelayTime) +{ + unsigned long AssertDelay, WaitDelay; + struct iSeries_Device_Node* DeviceNode = (struct iSeries_Device_Node*)PciDev->sysdata; + if (DeviceNode == NULL) { + printk("PCI: Pci Reset Failed, Device Node not found for pci_dev %p\n",PciDev); + return -1; + } + /******************************************************************** + * Set defaults, Assert is .5 second, Wait is 3 seconds. + ********************************************************************/ + if (AssertTime == 0) AssertDelay = ( 5 * HZ)/10; + else AssertDelay = (AssertTime*HZ)/10; + if (WaitDelay == 0) WaitDelay = (30 * HZ)/10; + else WaitDelay = (DelayTime* HZ)/10; + + /******************************************************************** + * Assert reset + ********************************************************************/ + DeviceNode->ReturnCode = HvCallPci_setSlotReset(ISERIES_BUS(DeviceNode),0x00,DeviceNode->AgentId,1); + if (DeviceNode->ReturnCode == 0) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(AssertDelay); /* Sleep for the time */ + DeviceNode->ReturnCode = HvCallPci_setSlotReset(ISERIES_BUS(DeviceNode),0x00,DeviceNode->AgentId, 0); + + /*************************************************************** + * Wait for device to reset + ***************************************************************/ + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(WaitDelay); + } + if (DeviceNode->ReturnCode == 0) { + PCIFR("Slot 0x%04X.%02 Reset\n",ISERIES_BUS(DeviceNode),DeviceNode->AgentId ); + } + else { + printk("PCI: Slot 0x%04X.%02X Reset Failed, RCode: %04X\n",ISERIES_BUS(DeviceNode),DeviceNode->AgentId,DeviceNode->ReturnCode); + PCIFR( "Slot 0x%04X.%02X Reset Failed, RCode: %04X\n",ISERIES_BUS(DeviceNode),DeviceNode->AgentId,DeviceNode->ReturnCode); + } + return DeviceNode->ReturnCode; +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/iSeries_proc.c linuxppc64_2_4/arch/ppc64/kernel/iSeries_proc.c --- ../kernel.org/linux/arch/ppc64/kernel/iSeries_proc.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/iSeries_proc.c Sat Oct 20 06:53:25 2001 @@ -0,0 +1,142 @@ +/* + * iSeries_proc.c + * Copyright (C) 2001 Kyle A. Lucke IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +/* Change Activity: */ +/* End Change Activity */ + +#include +#include +#ifndef _ISERIES_PROC_H +#include +#endif + + +static struct proc_dir_entry * iSeries_proc_root = NULL; +static int iSeries_proc_initializationDone = 0; +static spinlock_t iSeries_proc_lock; + +struct iSeries_proc_registration +{ + struct iSeries_proc_registration *next; + iSeriesProcFunction functionMember; +}; + + +struct iSeries_proc_registration preallocated[16]; +#define MYQUEUETYPE(T) struct MYQueue##T +#define MYQUEUE(T) \ +MYQUEUETYPE(T) \ +{ \ + struct T *head; \ + struct T *tail; \ +} +#define MYQUEUECTOR(q) do { (q)->head = NULL; (q)->tail = NULL; } while(0) +#define MYQUEUEENQ(q, p) \ +do { \ + (p)->next = NULL; \ + if ((q)->head != NULL) { \ + (q)->head->next = (p); \ + (q)->head = (p); \ + } else { \ + (q)->tail = (q)->head = (p); \ + } \ +} while(0) + +#define MYQUEUEDEQ(q,p) \ +do { \ + (p) = (q)->tail; \ + if ((p) != NULL) { \ + (q)->tail = (p)->next; \ + (p)->next = NULL; \ + } \ + if ((q)->tail == NULL) \ + (q)->head = NULL; \ +} while(0) +MYQUEUE(iSeries_proc_registration); +typedef MYQUEUETYPE(iSeries_proc_registration) aQueue; + + +aQueue iSeries_free; +aQueue iSeries_queued; + +void iSeries_proc_early_init(void) +{ + int i = 0; + unsigned long flags; + iSeries_proc_initializationDone = 0; + spin_lock_init(&iSeries_proc_lock); + MYQUEUECTOR(&iSeries_free); + MYQUEUECTOR(&iSeries_queued); + + spin_lock_irqsave(&iSeries_proc_lock, flags); + for (i = 0; i < 16; ++i) { + MYQUEUEENQ(&iSeries_free, preallocated+i); + } + spin_unlock_irqrestore(&iSeries_proc_lock, flags); +} + +void iSeries_proc_create(void) +{ + unsigned long flags; + struct iSeries_proc_registration *reg = NULL; + spin_lock_irqsave(&iSeries_proc_lock, flags); + printk("iSeries_proc: Creating /proc/iSeries\n"); + + iSeries_proc_root = proc_mkdir("iSeries", 0); + if (!iSeries_proc_root) return; + + MYQUEUEDEQ(&iSeries_queued, reg); + + while (reg != NULL) { + (*(reg->functionMember))(iSeries_proc_root); + + MYQUEUEDEQ(&iSeries_queued, reg); + } + + iSeries_proc_initializationDone = 1; + spin_unlock_irqrestore(&iSeries_proc_lock, flags); +} + +void iSeries_proc_callback(iSeriesProcFunction initFunction) +{ + unsigned long flags; + spin_lock_irqsave(&iSeries_proc_lock, flags); + + if (iSeries_proc_initializationDone) { + (*initFunction)(iSeries_proc_root); + } else { + struct iSeries_proc_registration *reg = NULL; + + MYQUEUEDEQ(&iSeries_free, reg); + + if (reg != NULL) { + /* printk("Registering %p in reg %p\n", initFunction, reg); */ + reg->functionMember = initFunction; + + MYQUEUEENQ(&iSeries_queued, reg); + } else { + printk("Couldn't get a queue entry\n"); + } + } + + spin_unlock_irqrestore(&iSeries_proc_lock, flags); +} + + diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/iSeries_rtc.c linuxppc64_2_4/arch/ppc64/kernel/iSeries_rtc.c --- ../kernel.org/linux/arch/ppc64/kernel/iSeries_rtc.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/iSeries_rtc.c Fri May 4 17:13:59 2001 @@ -0,0 +1,265 @@ +/* + * Real Time Clock interface for IBM iSeries + * + * Based on rtc.c by Paul Gortmaker + * + * This driver allows use of the real time clock + * from user space. It exports the /dev/rtc + * interface supporting various ioctl() and also the + * /proc/driver/rtc pseudo-file for status information. + * + * iSeries does not support RTC interrupts nor an alarm. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * 1.0 Mike Corrigan: IBM iSeries rtc support + */ + +#define RTC_VERSION "1.0" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +/* + * We sponge a minor off of the misc major. No need slurping + * up another valuable major dev number for this. If you add + * an ioctl, make sure you don't conflict with SPARC's RTC + * ioctls. + */ + +static loff_t rtc_llseek(struct file *file, loff_t offset, int origin); + +static ssize_t rtc_read(struct file *file, char *buf, + size_t count, loff_t *ppos); + +static int rtc_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg); + +static void get_rtc_time (struct rtc_time *rtc_tm); + +static int rtc_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data); + +/* + * If this driver ever becomes modularised, it will be really nice + * to make the epoch retain its value across module reload... + */ + +static unsigned long epoch = 1900; /* year corresponding to 0x00 */ + +static const unsigned char days_in_mo[] = +{0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; + +/* + * Now all the various file operations that we export. + */ + +static loff_t rtc_llseek(struct file *file, loff_t offset, int origin) +{ + return -ESPIPE; +} + +static ssize_t rtc_read(struct file *file, char *buf, + size_t count, loff_t *ppos) +{ + return -EIO; +} + +static int rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct rtc_time wtime; + + switch (cmd) { + case RTC_RD_TIME: /* Read the time/date from RTC */ + { + get_rtc_time(&wtime); + break; + } + case RTC_SET_TIME: /* Set the RTC */ + { + struct rtc_time rtc_tm; + unsigned char mon, day, hrs, min, sec, leap_yr; + unsigned int yrs; + + if (!capable(CAP_SYS_TIME)) + return -EACCES; + + if (copy_from_user(&rtc_tm, (struct rtc_time*)arg, + sizeof(struct rtc_time))) + return -EFAULT; + + yrs = rtc_tm.tm_year; + mon = rtc_tm.tm_mon + 1; /* tm_mon starts at zero */ + day = rtc_tm.tm_mday; + hrs = rtc_tm.tm_hour; + min = rtc_tm.tm_min; + sec = rtc_tm.tm_sec; + + if (yrs < 70) + return -EINVAL; + + leap_yr = ((!(yrs % 4) && (yrs % 100)) || !(yrs % 400)); + + if ((mon > 12) || (day == 0)) + return -EINVAL; + + if (day > (days_in_mo[mon] + ((mon == 2) && leap_yr))) + return -EINVAL; + + if ((hrs >= 24) || (min >= 60) || (sec >= 60)) + return -EINVAL; + + if ( yrs > 169 ) + return -EINVAL; + + mf_setRtc( &rtc_tm ); + + return 0; + } + case RTC_EPOCH_READ: /* Read the epoch. */ + { + return put_user (epoch, (unsigned long *)arg); + } + case RTC_EPOCH_SET: /* Set the epoch. */ + { + /* + * There were no RTC clocks before 1900. + */ + if (arg < 1900) + return -EINVAL; + + if (!capable(CAP_SYS_TIME)) + return -EACCES; + + epoch = arg; + return 0; + } + default: + return -EINVAL; + } + return copy_to_user((void *)arg, &wtime, sizeof wtime) ? -EFAULT : 0; +} + +static int rtc_open(struct inode *inode, struct file *file) +{ + return 0; +} + +static int rtc_release(struct inode *inode, struct file *file) +{ + return 0; +} + +/* + * The various file operations we support. + */ + +static struct file_operations rtc_fops = { + owner: THIS_MODULE, + llseek: rtc_llseek, + read: rtc_read, + ioctl: rtc_ioctl, + open: rtc_open, + release: rtc_release, +}; + +static struct miscdevice rtc_dev= +{ + RTC_MINOR, + "rtc", + &rtc_fops +}; + +static int __init rtc_init(void) +{ + misc_register(&rtc_dev); + create_proc_read_entry ("driver/rtc", 0, 0, rtc_read_proc, NULL); + + printk(KERN_INFO "iSeries Real Time Clock Driver v" RTC_VERSION "\n"); + + return 0; +} + +static void __exit rtc_exit (void) +{ + remove_proc_entry ("driver/rtc", NULL); + misc_deregister(&rtc_dev); +} + +module_init(rtc_init); +module_exit(rtc_exit); +EXPORT_NO_SYMBOLS; + +/* + * Info exported via "/proc/driver/rtc". + */ + +static int rtc_proc_output (char *buf) +{ + + char *p; + struct rtc_time tm; + + p = buf; + + get_rtc_time(&tm); + + /* + * There is no way to tell if the luser has the RTC set for local + * time or for Universal Standard Time (GMT). Probably local though. + */ + p += sprintf(p, + "rtc_time\t: %02d:%02d:%02d\n" + "rtc_date\t: %04d-%02d-%02d\n" + "rtc_epoch\t: %04lu\n", + tm.tm_hour, tm.tm_min, tm.tm_sec, + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, epoch); + + p += sprintf(p, + "DST_enable\t: no\n" + "BCD\t\t: yes\n" + "24hr\t\t: yes\n" ); + + return p - buf; +} + +static int rtc_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len = rtc_proc_output (page); + if (len <= off+count) *eof = 1; + *start = page + off; + len -= off; + if (len>count) len = count; + if (len<0) len = 0; + return len; +} + +static void get_rtc_time(struct rtc_time *rtc_tm) +{ + mf_getRtc( rtc_tm ); + + rtc_tm->tm_mon--; +} + + diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/iSeries_setup.c linuxppc64_2_4/arch/ppc64/kernel/iSeries_setup.c --- ../kernel.org/linux/arch/ppc64/kernel/iSeries_setup.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/iSeries_setup.c Tue Oct 23 15:50:37 2001 @@ -0,0 +1,893 @@ +/* + * + * + * Copyright (c) 2000 Mike Corrigan + * Copyright (c) 1999-2000 Grant Erickson + * + * Module name: iSeries_setup.c + * + * Description: + * Architecture- / platform-specific boot-time initialization code for + * the IBM iSeries LPAR. Adapted from original code by Grant Erickson and + * code by Gary Thomas, Cort Dougan , and Dan Malek + * . + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include "iSeries_setup.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Function Prototypes */ + +extern void abort(void); +#ifdef CONFIG_PPC_ISERIES +static void build_iSeries_Memory_Map( void ); +static void setup_iSeries_cache_sizes( void ); +static void iSeries_bolt_kernel(unsigned long saddr, unsigned long eaddr); +#endif +void build_valid_hpte( unsigned long vsid, unsigned long ea, unsigned long pa, + pte_t * ptep, unsigned hpteflags, unsigned bolted ); +extern void ppcdbg_initialize(void); +extern void iSeries_pcibios_init(void); +extern void iSeries_pcibios_fixup(void); +extern void iSeries_pcibios_fixup_bus(int); +static void iSeries_setup_dprofile(void); + +/* Global Variables */ + +unsigned long procFreqHz = 0; +unsigned long procFreqMhz = 0; +unsigned long procFreqMhzHundreths = 0; + +unsigned long tbFreqHz = 0; +unsigned long tbFreqMhz = 0; +unsigned long tbFreqMhzHundreths = 0; + +unsigned long dprof_shift = 0; +unsigned long dprof_len = 0; +unsigned int * dprof_buffer = NULL; + +int piranha_simulator = 0; + +extern char _end[]; + +extern struct Naca *naca; +extern int rd_size; /* Defined in drivers/block/rd.c */ +extern unsigned long klimit; +extern unsigned long embedded_sysmap_start; +extern unsigned long embedded_sysmap_end; + +extern unsigned long iSeries_recal_tb; +extern unsigned long iSeries_recal_titan; + +extern char _stext; +extern char _etext; + +static int mf_initialized = 0; + +struct MemoryBlock { + unsigned long absStart; + unsigned long absEnd; + unsigned long logicalStart; + unsigned long logicalEnd; +}; + +/* + * Process the main store vpd to determine where the holes in memory are + * and return the number of physical blocks and fill in the array of + * block data. + */ + +unsigned long iSeries_process_Condor_mainstore_vpd( struct MemoryBlock *mb_array, unsigned long max_entries ) +{ + /* Determine if absolute memory has any + * holes so that we can interpret the + * access map we get back from the hypervisor + * correctly. + */ + + unsigned long holeFirstChunk, holeSizeChunks; + unsigned long numMemoryBlocks = 1; + struct IoHriMainStoreSegment4 * msVpd = (struct IoHriMainStoreSegment4 *)xMsVpd; + unsigned long holeStart = msVpd->nonInterleavedBlocksStartAdr; + unsigned long holeEnd = msVpd->nonInterleavedBlocksEndAdr; + unsigned long holeSize = holeEnd - holeStart; + + printk("Mainstore_VPD: Condor\n"); + + mb_array[0].logicalStart = 0; + mb_array[0].logicalEnd = 0x100000000; + mb_array[0].absStart = 0; + mb_array[0].absEnd = 0x100000000; + + if ( holeSize ) { + numMemoryBlocks = 2; + holeStart = holeStart & 0x000fffffffffffff; + holeStart = addr_to_chunk(holeStart); + holeFirstChunk = holeStart; + holeSize = addr_to_chunk(holeSize); + holeSizeChunks = holeSize; + printk( "Main store hole: start chunk = %0lx, size = %0lx chunks\n", + holeFirstChunk, holeSizeChunks ); + mb_array[0].logicalEnd = holeFirstChunk; + mb_array[0].absEnd = holeFirstChunk; + mb_array[1].logicalStart = holeFirstChunk; + mb_array[1].logicalEnd = 0x100000000 - holeSizeChunks; + mb_array[1].absStart = holeFirstChunk + holeSizeChunks; + mb_array[1].absEnd = 0x100000000; + } + + + return numMemoryBlocks; +} + +#define MaxSegmentAreas 32 +#define MaxSegmentAdrRangeBlocks 128 +#define MaxAreaRangeBlocks 4 +unsigned long iSeries_process_Regatta_mainstore_vpd( struct MemoryBlock *mb_array, unsigned long max_entries ) +{ + struct IoHriMainStoreSegment5 * msVpdP = (struct IoHriMainStoreSegment5 *)xMsVpd; + unsigned long numSegmentBlocks = 0; + u32 existsBits = msVpdP->msAreaExists; + unsigned long area_num; + + printk("Mainstore_VPD: Regatta\n"); + + for ( area_num = 0; area_num < MaxSegmentAreas; ++area_num ) { + unsigned long numAreaBlocks; + struct IoHriMainStoreArea4 * currentArea; + + if ( existsBits & 0x80000000 ) { + unsigned long block_num; + + currentArea = &msVpdP->msAreaArray[area_num]; + numAreaBlocks = currentArea->numAdrRangeBlocks; + + printk("ms_vpd: processing area %2ld blocks=%ld", area_num, numAreaBlocks); + + for ( block_num = 0; block_num < numAreaBlocks; ++block_num ) { + /* Process an address range block */ + struct MemoryBlock tempBlock; + unsigned long i; + + tempBlock.absStart = (unsigned long)currentArea->xAdrRangeBlock[block_num].blockStart; + tempBlock.absEnd = (unsigned long)currentArea->xAdrRangeBlock[block_num].blockEnd; + tempBlock.logicalStart = 0; + tempBlock.logicalEnd = 0; + + printk("\n block %ld absStart=%016lx absEnd=%016lx", block_num, + tempBlock.absStart, tempBlock.absEnd); + + for ( i=0; i 1 ) { + unsigned long m, n; + for ( m=0; mxRamDisk ) { + initrd_start = (unsigned long)__va(naca->xRamDisk); + initrd_end = initrd_start + naca->xRamDiskSize * PAGE_SIZE; + initrd_below_start_ok = 1; // ramdisk in kernel space + ROOT_DEV = MKDEV( RAMDISK_MAJOR, 0 ); + + if ( ((rd_size*1024)/PAGE_SIZE) < naca->xRamDiskSize ) + rd_size = (naca->xRamDiskSize*PAGE_SIZE)/1024; + } else + +#endif /* CONFIG_BLK_DEV_INITRD */ + { + + /* ROOT_DEV = MKDEV( VIODASD_MAJOR, 1 ); */ + } + + iSeries_recal_tb = get_tb(); + iSeries_recal_titan = HvCallXm_loadTod(); + + ppc_md.setup_arch = iSeries_setup_arch; + ppc_md.setup_residual = iSeries_setup_residual; + ppc_md.get_cpuinfo = iSeries_get_cpuinfo; + ppc_md.irq_cannonicalize = NULL; + ppc_md.init_IRQ = iSeries_init_IRQ; + ppc_md.init_ras_IRQ = NULL; + ppc_md.get_irq = iSeries_get_irq; + ppc_md.init = NULL; + + ppc_md.pcibios_fixup = iSeries_pcibios_fixup; + ppc_md.pcibios_fixup_bus = iSeries_pcibios_fixup_bus; + + ppc_md.restart = iSeries_restart; + ppc_md.power_off = iSeries_power_off; + ppc_md.halt = iSeries_halt; + + ppc_md.time_init = NULL; + ppc_md.get_boot_time = iSeries_get_boot_time; + ppc_md.set_rtc_time = iSeries_set_rtc_time; + ppc_md.get_rtc_time = iSeries_get_rtc_time; + ppc_md.calibrate_decr = iSeries_calibrate_decr; + ppc_md.progress = iSeries_progress; + + ppc_md.kbd_setkeycode = NULL; + ppc_md.kbd_getkeycode = NULL; + ppc_md.kbd_translate = NULL; + ppc_md.kbd_unexpected_up = NULL; + ppc_md.kbd_leds = NULL; + ppc_md.kbd_init_hw = NULL; + +#if defined(CONFIG_MAGIC_SYSRQ) + ppc_md.ppc_kbd_sysrq_xlate = NULL; +#endif + + hpte_init_iSeries(); + tce_init_iSeries(); + + /* Initialize the table which translate Linux physical addresses to + * AS/400 absolute addresses + */ + + build_iSeries_Memory_Map(); + + setup_iSeries_cache_sizes(); + + /* Initialize machine-dependency vectors */ + + +#ifdef CONFIG_SMP + smp_init_iSeries(); +#endif + + if ( itLpNaca.xPirEnvironMode == 0 ) + piranha_simulator = 1; +#endif +} + +/* + * void __init iSeries_init() + */ + +void __init +iSeries_init(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7) +{ + /* Associate Lp Event Queue 0 with processor 0 */ + HvCallEvent_setLpEventQueueInterruptProc( 0, 0 ); + + { + /* copy the command line parameter from the primary VSP */ + char *p, *q; + HvCallEvent_dmaToSp( cmd_line, + 2*64*1024, + 256, + HvLpDma_Direction_RemoteToLocal ); + + p = q = cmd_line + 255; + while( p > cmd_line ) { + if ((*p == 0) || (*p == ' ') || (*p == '\n')) + --p; + else + break; + } + if ( p < q ) + *(p+1) = 0; + } + + if (strstr(cmd_line, "dprofile=")) { + char *p, *q; + + for (q = cmd_line; (p = strstr(q, "dprofile=")) != 0; ) { + unsigned long size, new_klimit; + q = p + 9; + if (p > cmd_line && p[-1] != ' ') + continue; + dprof_shift = simple_strtoul(q, &q, 0); + dprof_len = (unsigned long)&_etext - (unsigned long)&_stext; + dprof_len >>= dprof_shift; + size = ((dprof_len * sizeof(unsigned int)) + (PAGE_SIZE-1)) & PAGE_MASK; + dprof_buffer = (unsigned int *)((klimit + (PAGE_SIZE-1)) & PAGE_MASK); + new_klimit = ((unsigned long)dprof_buffer) + size; + lmb_reserve( __pa(klimit), (new_klimit-klimit)); + klimit = new_klimit; + memset( dprof_buffer, 0, size ); + } + } + + iSeries_setup_dprofile(); + + iSeries_proc_early_init(); + mf_init(); + mf_initialized = 1; + mb(); + + iSeries_proc_callback( &pmc_proc_init ); +} + +#ifdef CONFIG_PPC_ISERIES +/* + * The iSeries may have very large memories ( > 128 GB ) and a partition + * may get memory in "chunks" that may be anywhere in the 2**52 real + * address space. The chunks are 256K in size. To map this to the + * memory model Linux expects, the AS/400 specific code builds a + * translation table to translate what Linux thinks are "physical" + * addresses to the actual real addresses. This allows us to make + * it appear to Linux that we have contiguous memory starting at + * physical address zero while in fact this could be far from the truth. + * To avoid confusion, I'll let the words physical and/or real address + * apply to the Linux addresses while I'll use "absolute address" to + * refer to the actual hardware real address. + * + * build_iSeries_Memory_Map gets information from the Hypervisor and + * looks at the Main Store VPD to determine the absolute addresses + * of the memory that has been assigned to our partition and builds + * a table used to translate Linux's physical addresses to these + * absolute addresses. Absolute addresses are needed when + * communicating with the hypervisor (e.g. to build HPT entries) + */ + +static void __init build_iSeries_Memory_Map(void) +{ + u32 loadAreaFirstChunk, loadAreaLastChunk, loadAreaSize; + u32 nextPhysChunk; + u32 hptFirstChunk, hptLastChunk, hptSizeChunks, hptSizePages; + u32 num_ptegs; + u32 totalChunks,moreChunks; + u32 currChunk, thisChunk, absChunk; + u32 currDword; + u32 chunkBit; + u64 map; + struct MemoryBlock mb[32]; + unsigned long numMemoryBlocks, curBlock; + + /* Chunk size on iSeries is 256K bytes */ + totalChunks = (u32)HvLpConfig_getMsChunks(); + klimit = msChunks_alloc(klimit, totalChunks, 1UL<<18); + + /* Get absolute address of our load area + * and map it to physical address 0 + * This guarantees that the loadarea ends up at physical 0 + * otherwise, it might not be returned by PLIC as the first + * chunks + */ + + loadAreaFirstChunk = (u32)addr_to_chunk(itLpNaca.xLoadAreaAddr); + loadAreaSize = itLpNaca.xLoadAreaChunks; + + /* Only add the pages already mapped here. + * Otherwise we might add the hpt pages + * The rest of the pages of the load area + * aren't in the HPT yet and can still + * be assigned an arbitrary physical address + */ + if ( (loadAreaSize * 64) > HvPagesToMap ) + loadAreaSize = HvPagesToMap / 64; + + loadAreaLastChunk = loadAreaFirstChunk + loadAreaSize - 1; + + /* TODO Do we need to do something if the HPT is in the 64MB load area? + * This would be required if the itLpNaca.xLoadAreaChunks includes + * the HPT size + */ + + printk( "Mapping load area - physical addr = 0000000000000000\n" + " absolute addr = %016lx\n", + chunk_to_addr(loadAreaFirstChunk) ); + printk( "Load area size %dK\n", loadAreaSize*256 ); + + for ( nextPhysChunk = 0; + nextPhysChunk < loadAreaSize; + ++nextPhysChunk ) { + msChunks.abs[nextPhysChunk] = loadAreaFirstChunk+nextPhysChunk; + } + + /* Get absolute address of our HPT and remember it so + * we won't map it to any physical address + */ + + hptFirstChunk = (u32)addr_to_chunk(HvCallHpt_getHptAddress()); + hptSizePages = (u32)(HvCallHpt_getHptPages()); + hptSizeChunks = hptSizePages >> (msChunks.chunk_shift-PAGE_SHIFT); + hptLastChunk = hptFirstChunk + hptSizeChunks - 1; + + printk( "HPT absolute addr = %016lx, size = %dK\n", + chunk_to_addr(hptFirstChunk), hptSizeChunks*256 ); + + /* Fill in the htab_data structure */ + + /* Fill in size of hashed page table */ + num_ptegs = hptSizePages * (PAGE_SIZE/(sizeof(HPTE)*HPTES_PER_GROUP)); + htab_data.htab_num_ptegs = num_ptegs; + htab_data.htab_hash_mask = num_ptegs - 1; + + /* The actual hashed page table is in the hypervisor, we have no direct access */ + htab_data.htab = NULL; + + /* Determine if absolute memory has any + * holes so that we can interpret the + * access map we get back from the hypervisor + * correctly. + */ + numMemoryBlocks = iSeries_process_mainstore_vpd( mb, 32 ); + + /* Process the main store access map from the hypervisor + * to build up our physical -> absolute translation table + */ + curBlock = 0; + currChunk = 0; + currDword = 0; + moreChunks = totalChunks; + + while ( moreChunks ) { + map = HvCallSm_get64BitsOfAccessMap( itLpNaca.xLpIndex, + currDword ); + thisChunk = currChunk; + while ( map ) { + chunkBit = map >> 63; + map <<= 1; + if ( chunkBit ) { + --moreChunks; + + while ( thisChunk >= mb[curBlock].logicalEnd ) { + ++curBlock; + if ( curBlock >= numMemoryBlocks ) + panic("out of memory blocks"); + } + if ( thisChunk < mb[curBlock].logicalStart ) + panic("memory block error"); + + absChunk = mb[curBlock].absStart + ( thisChunk - mb[curBlock].logicalStart ); + + if ( ( ( absChunk < hptFirstChunk ) || + ( absChunk > hptLastChunk ) ) && + ( ( absChunk < loadAreaFirstChunk ) || + ( absChunk > loadAreaLastChunk ) ) ) { + msChunks.abs[nextPhysChunk] = absChunk; + ++nextPhysChunk; + } + } + ++thisChunk; + } + ++currDword; + currChunk += 64; + } + + /* main store size (in chunks) is + * totalChunks - hptSizeChunks + * which should be equal to + * nextPhysChunk + */ + naca->physicalMemorySize = chunk_to_addr(nextPhysChunk); + + /* Bolt kernel mappings for all of memory */ + iSeries_bolt_kernel( 0, naca->physicalMemorySize ); + + lmb_init(); + lmb_add( 0, naca->physicalMemorySize ); + lmb_reserve( 0, __pa(klimit)); + + /* + * Hardcode to GP size. I am not sure where to get this info. DRENG + */ + naca->slb_size = 64; +} + +/* + * Set up the variables that describe the cache line sizes + * for this machine. + */ + +static void __init setup_iSeries_cache_sizes(void) +{ + unsigned i,n; + naca->iCacheL1LineSize = xIoHriProcessorVpd[0].xInstCacheOperandSize; + naca->dCacheL1LineSize = xIoHriProcessorVpd[0].xDataCacheOperandSize; + naca->iCacheL1LinesPerPage = PAGE_SIZE / naca->iCacheL1LineSize; + naca->dCacheL1LinesPerPage = PAGE_SIZE / naca->dCacheL1LineSize; + i = naca->iCacheL1LineSize; + n = 0; + while ((i=(i/2))) ++n; + naca->iCacheL1LogLineSize = n; + i = naca->dCacheL1LineSize; + n = 0; + while ((i=(i/2))) ++n; + naca->dCacheL1LogLineSize = n; + + printk( "D-cache line size = %d (log = %d)\n", + (unsigned)naca->dCacheL1LineSize, + (unsigned)naca->dCacheL1LogLineSize ); + printk( "I-cache line size = %d (log = %d)\n", + (unsigned)naca->iCacheL1LineSize, + (unsigned)naca->iCacheL1LogLineSize ); + +} + +/* + * Bolt the kernel addr space into the HPT + */ + +static void __init iSeries_bolt_kernel(unsigned long saddr, unsigned long eaddr) +{ + unsigned long pa; + unsigned long mode_rw = _PAGE_ACCESSED | _PAGE_COHERENT | PP_RWXX; + HPTE hpte; + + for (pa=saddr; pa < eaddr ;pa+=PAGE_SIZE) { + unsigned long ea = (unsigned long)__va(pa); + unsigned long vsid = get_kernel_vsid( ea ); + unsigned long va = ( vsid << 28 ) | ( pa & 0xfffffff ); + unsigned long vpn = va >> PAGE_SHIFT; + unsigned long slot = HvCallHpt_findValid( &hpte, vpn ); + if ( hpte.dw0.dw0.v ) { + /* HPTE exists, so just bolt it */ + HvCallHpt_setSwBits( slot, 0x10, 0 ); + } else { + /* No HPTE exists, so create a new bolted one */ + build_valid_hpte(vsid, ea, pa, NULL, mode_rw, 1); + } + } +} +#endif /* CONFIG_PPC_ISERIES */ + +/* + * Document me. + */ +void __init +iSeries_setup_arch(void) +{ + void * eventStack; + + /* Setup the Lp Event Queue */ + + /* Allocate a page for the Event Stack + * The hypervisor wants the absolute real address, so + * we subtract out the KERNELBASE and add in the + * absolute real address of the kernel load area + */ + + eventStack = alloc_bootmem_pages( LpEventStackSize ); + + memset( eventStack, 0, LpEventStackSize ); + + /* Invoke the hypervisor to initialize the event stack */ + + HvCallEvent_setLpEventStack( 0, eventStack, LpEventStackSize ); + + /* Initialize fields in our Lp Event Queue */ + + xItLpQueue.xSlicEventStackPtr = (char *)eventStack; + xItLpQueue.xSlicCurEventPtr = (char *)eventStack; + xItLpQueue.xSlicLastValidEventPtr = (char *)eventStack + + (LpEventStackSize - LpEventMaxSize); + xItLpQueue.xIndex = 0; + + /* Compute processor frequency */ + procFreqHz = (((1UL<<34) * 1000000) / xIoHriProcessorVpd[0].xProcFreq ); + procFreqMhz = procFreqHz / 1000000; + procFreqMhzHundreths = (procFreqHz/10000) - (procFreqMhz*100); + + /* Compute time base frequency */ + tbFreqHz = (((1UL<<32) * 1000000) / xIoHriProcessorVpd[0].xTimeBaseFreq ); + tbFreqMhz = tbFreqHz / 1000000; + tbFreqMhzHundreths = (tbFreqHz/10000) - (tbFreqMhz*100); + + printk("Max logical processors = %d\n", + itVpdAreas.xSlicMaxLogicalProcs ); + printk("Max physical processors = %d\n", + itVpdAreas.xSlicMaxPhysicalProcs ); + printk("Processor frequency = %lu.%02lu\n", + procFreqMhz, + procFreqMhzHundreths ); + printk("Time base frequency = %lu.%02lu\n", + tbFreqMhz, + tbFreqMhzHundreths ); + printk("Processor version = %x\n", + xIoHriProcessorVpd[0].xPVR ); + +} + +/* + * int as400_setup_residual() + * + * Description: + * This routine pretty-prints CPU information gathered from the VPD + * for use in /proc/cpuinfo + * + * Input(s): + * *buffer - Buffer into which CPU data is to be printed. + * + * Output(s): + * *buffer - Buffer with CPU data. + * + * Returns: + * The number of bytes copied into 'buffer' if OK, otherwise zero or less + * on error. + */ +int +iSeries_setup_residual(char *buffer) +{ + int len = 0; + + len += sprintf(len+buffer,"clock\t\t: %lu.%02luMhz\n", + procFreqMhz, procFreqMhzHundreths ); + len += sprintf(len+buffer,"time base\t: %lu.%02luMHz\n", + tbFreqMhz, tbFreqMhzHundreths ); + len += sprintf(len+buffer,"i-cache\t\t: %d\n", + naca->iCacheL1LineSize); + len += sprintf(len+buffer,"d-cache\t\t: %d\n", + naca->dCacheL1LineSize); + + + return (len); +} + +int iSeries_get_cpuinfo(char *buffer) +{ + int len = 0; + + len += sprintf(len+buffer,"machine\t\t: 64-bit iSeries Logical Partition\n"); + + return len; +} + +/* + * Document me. + * and Implement me. + */ +int +iSeries_get_irq(struct pt_regs *regs) +{ + /* -2 means ignore this interrupt */ + return -2; +} + +/* + * Document me. + */ +void +iSeries_restart(char *cmd) +{ + mf_reboot(); +} + +/* + * Document me. + */ +void +iSeries_power_off(void) +{ + mf_powerOff(); +} + +/* + * Document me. + */ +void +iSeries_halt(void) +{ + mf_powerOff(); +} + +/* + * Nothing to do here. + */ +void __init +iSeries_time_init(void) +{ + /* Nothing to do */ +} + +/* JDH Hack */ +unsigned long jdh_time = 0; + +extern void setup_default_decr(void); + +/* + * void __init iSeries_calibrate_decr() + * + * Description: + * This routine retrieves the internal processor frequency from the VPD, + * and sets up the kernel timer decrementer based on that value. + * + */ +void __init +iSeries_calibrate_decr(void) +{ + unsigned long freq; + unsigned long cyclesPerUsec; + unsigned long tbf; + + struct div_result divres; + + /* Compute decrementer (and TB) frequency + * in cycles/sec + */ + + tbf = xIoHriProcessorVpd[0].xTimeBaseFreq; + + freq = 0x0100000000; + freq *= 1000000; /* 2^32 * 10^6 */ + freq = freq / tbf; /* cycles / sec */ + cyclesPerUsec = freq / 1000000; /* cycles / usec */ + + /* Set the amount to refresh the decrementer by. This + * is the number of decrementer ticks it takes for + * 1/HZ seconds. + */ + + tb_ticks_per_jiffy = freq / HZ; + /* + * tb_ticks_per_sec = freq; would give better accuracy + * but tb_ticks_per_sec = tb_ticks_per_jiffy*HZ; assures + * that jiffies (and xtime) will match the time returned + * by do_gettimeofday. + */ + tb_ticks_per_sec = tb_ticks_per_jiffy * HZ; + tb_ticks_per_usec = cyclesPerUsec; + tb_to_us = mulhwu_scale_factor(freq, 1000000); + div128_by_32( 1024*1024, 0, tb_ticks_per_sec, &divres ); + tb_to_xs = divres.result_low; + setup_default_decr(); +} + +void __init +iSeries_progress( char * st, unsigned short code ) +{ + printk( "Progress: [%04x] - %s\n", (unsigned)code, st ); + if ( !piranha_simulator && mf_initialized ) { + if (code != 0xffff) + mf_displayProgress( code ); + else + mf_clearSrc(); + } +} + + +void iSeries_fixup_klimit(void) +{ + /* Change klimit to take into account any ram disk that may be included */ + if (naca->xRamDisk) + klimit = KERNELBASE + (u64)naca->xRamDisk + (naca->xRamDiskSize * PAGE_SIZE); + else { + /* No ram disk was included - check and see if there was an embedded system map */ + /* Change klimit to take into account any embedded system map */ + if (embedded_sysmap_end) + klimit = KERNELBASE + ((embedded_sysmap_end+4095) & 0xfffffffffffff000); + } +} + +static void iSeries_setup_dprofile(void) +{ + if ( dprof_buffer ) { + unsigned i; + for (i=0; i + * Copyright (c) 1999-2000 Grant Erickson + * + * Module name: as400_setup.h + * + * Description: + * Architecture- / platform-specific boot-time initialization code for + * the IBM AS/400 LPAR. Adapted from original code by Grant Erickson and + * code by Gary Thomas, Cort Dougan , and Dan Malek + * . + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef __ISERIES_SETUP_H__ +#define __ISERIES_SETUP_H__ + +extern void iSeries_init_early(void); +extern void iSeries_init(unsigned long r3, + unsigned long ird_start, + unsigned long ird_end, + unsigned long cline_start, + unsigned long cline_end); +extern void iSeries_setup_arch(void); +extern int iSeries_setup_residual(char *buffer); +extern int iSeries_get_cpuinfo(char *buffer); +extern void iSeries_init_IRQ(void); +extern int iSeries_get_irq(struct pt_regs *regs); +extern void iSeries_restart(char *cmd); +extern void iSeries_power_off(void); +extern void iSeries_halt(void); +extern void iSeries_time_init(void); +extern void iSeries_get_boot_time(struct rtc_time *tm); +extern int iSeries_set_rtc_time(unsigned long now); +extern unsigned long iSeries_get_rtc_time(void); +extern void iSeries_calibrate_decr(void); +extern void iSeries_progress( char *, unsigned short ); + +#endif /* __ISERIES_SETUP_H__ */ diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/idle.c linuxppc64_2_4/arch/ppc64/kernel/idle.c --- ../kernel.org/linux/arch/ppc64/kernel/idle.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/idle.c Tue Oct 23 22:57:42 2001 @@ -0,0 +1,131 @@ +/* + * Idle daemon for PowerPC. Idle daemon will handle any action + * that needs to be taken when the system becomes idle. + * + * Written by Cort Dougan (cort@cs.nmt.edu) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +unsigned long maxYieldTime = 0; +unsigned long minYieldTime = 0xffffffffffffffffUL; + +static void yield_shared_processor(void) +{ + struct Paca *paca; + unsigned long tb; + unsigned long yieldTime; + + paca = (struct Paca *)mfspr(SPRG3); + HvCall_setEnabledInterrupts( HvCall_MaskIPI | + HvCall_MaskLpEvent | + HvCall_MaskLpProd | + HvCall_MaskTimeout ); + + tb = get_tb(); + /* Compute future tb value when yield should expire */ + HvCall_yieldProcessor( HvCall_YieldTimed, tb+tb_ticks_per_jiffy ); + + yieldTime = get_tb() - tb; + if ( yieldTime > maxYieldTime ) + maxYieldTime = yieldTime; + + if ( yieldTime < minYieldTime ) + minYieldTime = yieldTime; + + /* The decrementer stops during the yield. Force a fake decrementer + * here and let the timer_interrupt code sort out the actual time. + */ + paca->xLpPaca.xIntDword.xFields.xDecrInt = 1; + process_iSeries_events(); +} + +int idled(void) +{ + struct Paca *paca; + long oldval; + unsigned long CTRL; + + /* endless loop with no priority at all */ + current->nice = 20; + current->counter = -100; +#ifdef CONFIG_PPC_ISERIES + /* ensure iSeries run light will be out when idle */ + current->thread.flags &= ~PPC_FLAG_RUN_LIGHT; + CTRL = mfspr(CTRLF); + CTRL &= ~RUNLATCH; + mtspr(CTRLT, CTRL); +#endif + init_idle(); + + paca = (struct Paca *)mfspr(SPRG3); + + for (;;) { +#ifdef CONFIG_PPC_ISERIES + if ( paca->xLpPaca.xSharedProc ) { + if ( ItLpQueue_isLpIntPending( paca->lpQueuePtr ) ) + process_iSeries_events(); + if ( !current->need_resched ) + yield_shared_processor(); + } + else +#endif + { + /* Avoid an IPI by setting need_resched */ + oldval = xchg(¤t->need_resched, -1); + if (!oldval) { + while(current->need_resched == -1) { +#ifdef CONFIG_PPC_ISERIES + HMT_medium(); + if ( ItLpQueue_isLpIntPending( paca->lpQueuePtr ) ) + process_iSeries_events(); +#endif + HMT_low(); + } + } + } + HMT_medium(); + if (current->need_resched) { + schedule(); + check_pgt_cache(); + } + } + return 0; +} + +/* + * SMP entry into the idle task - calls the same thing as the + * non-smp versions. -- Cort + */ +int cpu_idle(void) +{ + idled(); + return 0; +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/ioctl32.c linuxppc64_2_4/arch/ppc64/kernel/ioctl32.c --- ../kernel.org/linux/arch/ppc64/kernel/ioctl32.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/ioctl32.c Mon Nov 5 00:41:20 2001 @@ -0,0 +1,4311 @@ +/* + * ioctl32.c: Conversion between 32bit and 64bit native ioctls. + * + * Based on sparc64 ioctl32.c by: + * + * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com) + * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) + * + * ppc64 changes: + * + * Copyright (C) 2000 Ken Aaker (kdaaker@rchland.vnet.ibm.com) + * Copyright (C) 2001 Anton Blanchard (antonb@au.ibm.com) + * + * These routines maintain argument size conversion between 32bit and 64bit + * ioctls. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE) +#include +#endif /* LVM */ + +#include +/* Ugly hack. */ +#undef __KERNEL__ +#include +#define __KERNEL__ +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +/* Use this to get at 32-bit user passed pointers. + See sys_sparc32.c for description about these. */ +#define A(__x) ((unsigned long)(__x)) +#define AA(__x) \ +({ unsigned long __ret; \ + __asm__ ("clrldi %0, %0, 32" \ + : "=r" (__ret) \ + : "0" (__x)); \ + __ret; \ +}) + +/* Aiee. Someone does not find a difference between int and long */ +#define EXT2_IOC32_GETFLAGS _IOR('f', 1, int) +#define EXT2_IOC32_SETFLAGS _IOW('f', 2, int) +#define EXT2_IOC32_GETVERSION _IOR('v', 1, int) +#define EXT2_IOC32_SETVERSION _IOW('v', 2, int) + +extern asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); + +static int w_long(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + mm_segment_t old_fs = get_fs(); + int err; + unsigned long val; + + set_fs (KERNEL_DS); + err = sys_ioctl(fd, cmd, (unsigned long)&val); + set_fs (old_fs); + if (!err && put_user(val, (u32 *)arg)) + return -EFAULT; + return err; +} + +static int rw_long(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + mm_segment_t old_fs = get_fs(); + int err; + unsigned long val; + + if (get_user(val, (u32 *)arg)) + return -EFAULT; + set_fs (KERNEL_DS); + err = sys_ioctl(fd, cmd, (unsigned long)&val); + set_fs (old_fs); + if (!err && put_user(val, (u32 *)arg)) + return -EFAULT; + return err; +} + +static int do_ext2_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + /* These are just misnamed, they actually get/put from/to user an int */ + switch (cmd) { + case EXT2_IOC32_GETFLAGS: cmd = EXT2_IOC_GETFLAGS; break; + case EXT2_IOC32_SETFLAGS: cmd = EXT2_IOC_SETFLAGS; break; + case EXT2_IOC32_GETVERSION: cmd = EXT2_IOC_GETVERSION; break; + case EXT2_IOC32_SETVERSION: cmd = EXT2_IOC_SETVERSION; break; + } + return sys_ioctl(fd, cmd, arg); +} + +struct video_tuner32 { + s32 tuner; + u8 name[32]; + u32 rangelow, rangehigh; + u32 flags; + u16 mode, signal; +}; + +static int get_video_tuner32(struct video_tuner *kp, struct video_tuner32 *up) +{ + int i; + + if (get_user(kp->tuner, &up->tuner)) + return -EFAULT; + for(i = 0; i < 32; i++) + __get_user(kp->name[i], &up->name[i]); + __get_user(kp->rangelow, &up->rangelow); + __get_user(kp->rangehigh, &up->rangehigh); + __get_user(kp->flags, &up->flags); + __get_user(kp->mode, &up->mode); + __get_user(kp->signal, &up->signal); + return 0; +} + +static int put_video_tuner32(struct video_tuner *kp, struct video_tuner32 *up) +{ + int i; + + if (put_user(kp->tuner, &up->tuner)) + return -EFAULT; + for(i = 0; i < 32; i++) + __put_user(kp->name[i], &up->name[i]); + __put_user(kp->rangelow, &up->rangelow); + __put_user(kp->rangehigh, &up->rangehigh); + __put_user(kp->flags, &up->flags); + __put_user(kp->mode, &up->mode); + __put_user(kp->signal, &up->signal); + return 0; +} + +struct video_buffer32 { + /* void * */ u32 base; + s32 height, width, depth, bytesperline; +}; + +static int get_video_buffer32(struct video_buffer *kp, struct video_buffer32 *up) +{ + u32 tmp; + + if (get_user(tmp, &up->base)) + return -EFAULT; + kp->base = (void *) ((unsigned long)tmp); + __get_user(kp->height, &up->height); + __get_user(kp->width, &up->width); + __get_user(kp->depth, &up->depth); + __get_user(kp->bytesperline, &up->bytesperline); + return 0; +} + +static int put_video_buffer32(struct video_buffer *kp, struct video_buffer32 *up) +{ + u32 tmp = (u32)((unsigned long)kp->base); + + if (put_user(tmp, &up->base)) + return -EFAULT; + __put_user(kp->height, &up->height); + __put_user(kp->width, &up->width); + __put_user(kp->depth, &up->depth); + __put_user(kp->bytesperline, &up->bytesperline); + return 0; +} + +struct video_clip32 { + s32 x, y, width, height; + /* struct video_clip32 * */ u32 next; +}; + +struct video_window32 { + u32 x, y, width, height, chromakey, flags; + /* struct video_clip32 * */ u32 clips; + s32 clipcount; +}; + +static void free_kvideo_clips(struct video_window *kp) +{ + struct video_clip *cp; + + cp = kp->clips; + if (cp != NULL) + kfree(cp); +} + +static int get_video_window32(struct video_window *kp, struct video_window32 *up) +{ + struct video_clip32 *ucp; + struct video_clip *kcp; + int nclips, err, i; + u32 tmp; + + if (get_user(kp->x, &up->x)) + return -EFAULT; + __get_user(kp->y, &up->y); + __get_user(kp->width, &up->width); + __get_user(kp->height, &up->height); + __get_user(kp->chromakey, &up->chromakey); + __get_user(kp->flags, &up->flags); + __get_user(kp->clipcount, &up->clipcount); + __get_user(tmp, &up->clips); + ucp = (struct video_clip32 *)A(tmp); + kp->clips = NULL; + + nclips = kp->clipcount; + if (nclips == 0) + return 0; + + if (ucp == 0) + return -EINVAL; + + /* Peculiar interface... */ + if (nclips < 0) + nclips = VIDEO_CLIPMAP_SIZE; + + kcp = kmalloc(nclips * sizeof(struct video_clip), GFP_KERNEL); + err = -ENOMEM; + if (kcp == NULL) + goto cleanup_and_err; + + kp->clips = kcp; + for(i = 0; i < nclips; i++) { + __get_user(kcp[i].x, &ucp[i].x); + __get_user(kcp[i].y, &ucp[i].y); + __get_user(kcp[i].width, &ucp[i].width); + __get_user(kcp[i].height, &ucp[i].height); + kcp[nclips].next = NULL; + } + + return 0; + +cleanup_and_err: + free_kvideo_clips(kp); + return err; +} + +/* You get back everything except the clips... */ +static int put_video_window32(struct video_window *kp, struct video_window32 *up) +{ + if (put_user(kp->x, &up->x)) + return -EFAULT; + __put_user(kp->y, &up->y); + __put_user(kp->width, &up->width); + __put_user(kp->height, &up->height); + __put_user(kp->chromakey, &up->chromakey); + __put_user(kp->flags, &up->flags); + __put_user(kp->clipcount, &up->clipcount); + return 0; +} + +#define VIDIOCGTUNER32 _IOWR('v',4, struct video_tuner32) +#define VIDIOCSTUNER32 _IOW('v',5, struct video_tuner32) +#define VIDIOCGWIN32 _IOR('v',9, struct video_window32) +#define VIDIOCSWIN32 _IOW('v',10, struct video_window32) +#define VIDIOCGFBUF32 _IOR('v',11, struct video_buffer32) +#define VIDIOCSFBUF32 _IOW('v',12, struct video_buffer32) +#define VIDIOCGFREQ32 _IOR('v',14, u32) +#define VIDIOCSFREQ32 _IOW('v',15, u32) + +static int do_video_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + union { + struct video_tuner vt; + struct video_buffer vb; + struct video_window vw; + unsigned long vx; + } karg; + mm_segment_t old_fs = get_fs(); + void *up = (void *)arg; + int err = 0; + + /* First, convert the command. */ + switch(cmd) { + case VIDIOCGTUNER32: cmd = VIDIOCGTUNER; break; + case VIDIOCSTUNER32: cmd = VIDIOCSTUNER; break; + case VIDIOCGWIN32: cmd = VIDIOCGWIN; break; + case VIDIOCSWIN32: cmd = VIDIOCSWIN; break; + case VIDIOCGFBUF32: cmd = VIDIOCGFBUF; break; + case VIDIOCSFBUF32: cmd = VIDIOCSFBUF; break; + case VIDIOCGFREQ32: cmd = VIDIOCGFREQ; break; + case VIDIOCSFREQ32: cmd = VIDIOCSFREQ; break; + }; + + switch(cmd) { + case VIDIOCSTUNER: + case VIDIOCGTUNER: + err = get_video_tuner32(&karg.vt, up); + break; + + case VIDIOCSWIN: + err = get_video_window32(&karg.vw, up); + break; + + case VIDIOCSFBUF: + err = get_video_buffer32(&karg.vb, up); + break; + + case VIDIOCSFREQ: + err = get_user(karg.vx, (u32 *)up); + break; + }; + if (err) + goto out; + + set_fs(KERNEL_DS); + err = sys_ioctl(fd, cmd, (unsigned long)&karg); + set_fs(old_fs); + + if (cmd == VIDIOCSWIN) + free_kvideo_clips(&karg.vw); + + if (err == 0) { + switch(cmd) { + case VIDIOCGTUNER: + err = put_video_tuner32(&karg.vt, up); + break; + + case VIDIOCGWIN: + err = put_video_window32(&karg.vw, up); + break; + + case VIDIOCGFBUF: + err = put_video_buffer32(&karg.vb, up); + break; + + case VIDIOCGFREQ: + err = put_user(((u32)karg.vx), (u32 *)up); + break; + }; + } +out: + return err; +} + +struct timeval32 { + int tv_sec; + int tv_usec; +}; + +static int do_siocgstamp(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + struct timeval32 *up = (struct timeval32 *)arg; + struct timeval ktv; + mm_segment_t old_fs = get_fs(); + int err; + + set_fs(KERNEL_DS); + err = sys_ioctl(fd, cmd, (unsigned long)&ktv); + set_fs(old_fs); + if (!err) { + err = put_user(ktv.tv_sec, &up->tv_sec); + err |= __put_user(ktv.tv_usec, &up->tv_usec); + } + return err; +} + +struct ifmap32 { + u32 mem_start; + u32 mem_end; + unsigned short base_addr; + unsigned char irq; + unsigned char dma; + unsigned char port; +}; + +struct ifreq32 { +#define IFHWADDRLEN 6 +#define IFNAMSIZ 16 + union { + char ifrn_name[IFNAMSIZ]; /* if name, e.g. "en0" */ + } ifr_ifrn; + union { + struct sockaddr ifru_addr; + struct sockaddr ifru_dstaddr; + struct sockaddr ifru_broadaddr; + struct sockaddr ifru_netmask; + struct sockaddr ifru_hwaddr; + short ifru_flags; + int ifru_ivalue; + int ifru_mtu; + struct ifmap32 ifru_map; + char ifru_slave[IFNAMSIZ]; /* Just fits the size */ + char ifru_newname[IFNAMSIZ]; + __kernel_caddr_t32 ifru_data; + } ifr_ifru; +}; + +struct ifconf32 { + int ifc_len; /* size of buffer */ + __kernel_caddr_t32 ifcbuf; +}; + +static int dev_ifname32(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + struct net_device *dev; + struct ifreq32 ifr32; + int err; + + if (copy_from_user(&ifr32, (struct ifreq32 *)arg, sizeof(struct ifreq32))) + return -EFAULT; + + dev = dev_get_by_index(ifr32.ifr_ifindex); + if (!dev) + return -ENODEV; + + strcpy(ifr32.ifr_name, dev->name); + + err = copy_to_user((struct ifreq32 *)arg, &ifr32, sizeof(struct ifreq32)); + return (err ? -EFAULT : 0); +} + +static int dev_ifconf(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + struct ifconf32 ifc32; + struct ifconf ifc; + struct ifreq32 *ifr32; + struct ifreq *ifr; + mm_segment_t old_fs; + unsigned int i, j; + int err; + + if (copy_from_user(&ifc32, (struct ifconf32 *)arg, sizeof(struct ifconf32))) + return -EFAULT; + + if (ifc32.ifcbuf == 0) { + ifc32.ifc_len = 0; + ifc.ifc_len = 0; + ifc.ifc_buf = NULL; + } else { + ifc.ifc_len = ((ifc32.ifc_len / sizeof (struct ifreq32)) + 1) * + sizeof (struct ifreq); + ifc.ifc_buf = kmalloc (ifc.ifc_len, GFP_KERNEL); + if (!ifc.ifc_buf) + return -ENOMEM; + } + ifr = ifc.ifc_req; + ifr32 = (struct ifreq32 *)A(ifc32.ifcbuf); + for (i = 0; i < ifc32.ifc_len; i += sizeof (struct ifreq32)) { + if (copy_from_user(ifr++, ifr32++, sizeof (struct ifreq32))) { + kfree (ifc.ifc_buf); + return -EFAULT; + } + } + old_fs = get_fs(); set_fs (KERNEL_DS); + err = sys_ioctl (fd, SIOCGIFCONF, (unsigned long)&ifc); + set_fs (old_fs); + if (!err) { + ifr = ifc.ifc_req; + ifr32 = (struct ifreq32 *)A(ifc32.ifcbuf); + for (i = 0, j = 0; i < ifc32.ifc_len && j < ifc.ifc_len; + i += sizeof (struct ifreq32), j += sizeof (struct ifreq)) { + if (copy_to_user(ifr32++, ifr++, sizeof (struct ifreq32))) { + err = -EFAULT; + break; + } + } + if (!err) { + if (ifc32.ifcbuf == 0) { + /* Translate from 64-bit structure multiple to + * a 32-bit one. + */ + i = ifc.ifc_len; + i = ((i / sizeof(struct ifreq)) * sizeof(struct ifreq32)); + ifc32.ifc_len = i; + } else { + if (i <= ifc32.ifc_len) + ifc32.ifc_len = i; + else + ifc32.ifc_len = i - sizeof (struct ifreq32); + } + if (copy_to_user((struct ifconf32 *)arg, &ifc32, sizeof(struct ifconf32))) + err = -EFAULT; + } + } + if (ifc.ifc_buf != NULL) + kfree (ifc.ifc_buf); + return err; +} + +static int ethtool_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + struct ifreq ifr; + mm_segment_t old_fs; + int err, len; + u32 data, ethcmd; + + if (copy_from_user(&ifr, (struct ifreq32 *)arg, sizeof(struct ifreq32))) + return -EFAULT; + ifr.ifr_data = (__kernel_caddr_t)get_free_page(GFP_KERNEL); + if (!ifr.ifr_data) + return -EAGAIN; + + __get_user(data, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_data)); + + if (get_user(ethcmd, (u32 *)A(data))) { + err = -EFAULT; + goto out; + } + switch (ethcmd) { + case ETHTOOL_GDRVINFO: len = sizeof(struct ethtool_drvinfo); break; + case ETHTOOL_GSET: + case ETHTOOL_SSET: + default: len = sizeof(struct ethtool_cmd); break; + } + + if (copy_from_user(ifr.ifr_data, (char *)A(data), len)) { + err = -EFAULT; + goto out; + } + + old_fs = get_fs(); + set_fs (KERNEL_DS); + err = sys_ioctl (fd, cmd, (unsigned long)&ifr); + set_fs (old_fs); + if (!err) { + u32 data; + + __get_user(data, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_data)); + len = copy_to_user((char *)A(data), ifr.ifr_data, len); + if (len) + err = -EFAULT; + } + +out: + free_page((unsigned long)ifr.ifr_data); + return err; +} + +static int dev_ifsioc(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + struct ifreq ifr; + mm_segment_t old_fs; + int err; + + switch (cmd) { + case SIOCSIFMAP: + err = copy_from_user(&ifr, (struct ifreq32 *)arg, sizeof(ifr.ifr_name)); + err |= __get_user(ifr.ifr_map.mem_start, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_map.mem_start)); + err |= __get_user(ifr.ifr_map.mem_end, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_map.mem_end)); + err |= __get_user(ifr.ifr_map.base_addr, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_map.base_addr)); + err |= __get_user(ifr.ifr_map.irq, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_map.irq)); + err |= __get_user(ifr.ifr_map.dma, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_map.dma)); + err |= __get_user(ifr.ifr_map.port, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_map.port)); + if (err) + return -EFAULT; + break; + default: + if (copy_from_user(&ifr, (struct ifreq32 *)arg, sizeof(struct ifreq32))) + return -EFAULT; + break; + } + old_fs = get_fs(); + set_fs (KERNEL_DS); + err = sys_ioctl (fd, cmd, (unsigned long)&ifr); + set_fs (old_fs); + if (!err) { + switch (cmd) { + case SIOCGIFFLAGS: + case SIOCGIFMETRIC: + case SIOCGIFMTU: + case SIOCGIFMEM: + case SIOCGIFHWADDR: + case SIOCGIFINDEX: + case SIOCGIFADDR: + case SIOCGIFBRDADDR: + case SIOCGIFDSTADDR: + case SIOCGIFNETMASK: + case SIOCGIFTXQLEN: + if (copy_to_user((struct ifreq32 *)arg, &ifr, sizeof(struct ifreq32))) + return -EFAULT; + break; + case SIOCGIFMAP: + err = copy_to_user((struct ifreq32 *)arg, &ifr, sizeof(ifr.ifr_name)); + err |= __put_user(ifr.ifr_map.mem_start, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_map.mem_start)); + err |= __put_user(ifr.ifr_map.mem_end, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_map.mem_end)); + err |= __put_user(ifr.ifr_map.base_addr, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_map.base_addr)); + err |= __put_user(ifr.ifr_map.irq, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_map.irq)); + err |= __put_user(ifr.ifr_map.dma, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_map.dma)); + err |= __put_user(ifr.ifr_map.port, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_map.port)); + if (err) + err = -EFAULT; + break; + } + } + return err; +} + +struct rtentry32 { + u32 rt_pad1; + struct sockaddr rt_dst; /* target address */ + struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */ + struct sockaddr rt_genmask; /* target network mask (IP) */ + unsigned short rt_flags; + short rt_pad2; + u32 rt_pad3; + unsigned char rt_tos; + unsigned char rt_class; + short rt_pad4; + short rt_metric; /* +1 for binary compatibility! */ + /* char * */ u32 rt_dev; /* forcing the device at add */ + u32 rt_mtu; /* per route MTU/Window */ + u32 rt_window; /* Window clamping */ + unsigned short rt_irtt; /* Initial RTT */ + +}; + +struct in6_rtmsg32 { + struct in6_addr rtmsg_dst; + struct in6_addr rtmsg_src; + struct in6_addr rtmsg_gateway; + u32 rtmsg_type; + u16 rtmsg_dst_len; + u16 rtmsg_src_len; + u32 rtmsg_metric; + u32 rtmsg_info; + u32 rtmsg_flags; + s32 rtmsg_ifindex; +}; + +extern struct socket *sockfd_lookup(int fd, int *err); + +static int routing_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + int ret; + void *r = NULL; + struct in6_rtmsg r6; + struct rtentry r4; + char devname[16]; + u32 rtdev; + mm_segment_t old_fs = get_fs(); + + struct socket *mysock = sockfd_lookup(fd, &ret); + + if (mysock && mysock->sk && mysock->sk->family == AF_INET6) { /* ipv6 */ + ret = copy_from_user (&r6.rtmsg_dst, &(((struct in6_rtmsg32 *)arg)->rtmsg_dst), + 3 * sizeof(struct in6_addr)); + ret |= __get_user (r6.rtmsg_type, &(((struct in6_rtmsg32 *)arg)->rtmsg_type)); + ret |= __get_user (r6.rtmsg_dst_len, &(((struct in6_rtmsg32 *)arg)->rtmsg_dst_len)); + ret |= __get_user (r6.rtmsg_src_len, &(((struct in6_rtmsg32 *)arg)->rtmsg_src_len)); + ret |= __get_user (r6.rtmsg_metric, &(((struct in6_rtmsg32 *)arg)->rtmsg_metric)); + ret |= __get_user (r6.rtmsg_info, &(((struct in6_rtmsg32 *)arg)->rtmsg_info)); + ret |= __get_user (r6.rtmsg_flags, &(((struct in6_rtmsg32 *)arg)->rtmsg_flags)); + ret |= __get_user (r6.rtmsg_ifindex, &(((struct in6_rtmsg32 *)arg)->rtmsg_ifindex)); + + r = (void *) &r6; + } else { /* ipv4 */ + ret = copy_from_user (&r4.rt_dst, &(((struct rtentry32 *)arg)->rt_dst), 3 * sizeof(struct sockaddr)); + ret |= __get_user (r4.rt_flags, &(((struct rtentry32 *)arg)->rt_flags)); + ret |= __get_user (r4.rt_metric, &(((struct rtentry32 *)arg)->rt_metric)); + ret |= __get_user (r4.rt_mtu, &(((struct rtentry32 *)arg)->rt_mtu)); + ret |= __get_user (r4.rt_window, &(((struct rtentry32 *)arg)->rt_window)); + ret |= __get_user (r4.rt_irtt, &(((struct rtentry32 *)arg)->rt_irtt)); + ret |= __get_user (rtdev, &(((struct rtentry32 *)arg)->rt_dev)); + if (rtdev) { + ret |= copy_from_user (devname, (char *)A(rtdev), 15); + r4.rt_dev = devname; devname[15] = 0; + } else + r4.rt_dev = 0; + + r = (void *) &r4; + } + + if (ret) + return -EFAULT; + + set_fs (KERNEL_DS); + ret = sys_ioctl (fd, cmd, (long) r); + set_fs (old_fs); + + return ret; +} + +struct hd_geometry32 { + unsigned char heads; + unsigned char sectors; + unsigned short cylinders; + u32 start; +}; + +static int hdio_getgeo(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + mm_segment_t old_fs = get_fs(); + struct hd_geometry geo; + int err; + + set_fs (KERNEL_DS); + err = sys_ioctl(fd, HDIO_GETGEO, (unsigned long)&geo); + set_fs (old_fs); + if (!err) { + err = copy_to_user ((struct hd_geometry32 *)arg, &geo, 4); + err |= __put_user (geo.start, &(((struct hd_geometry32 *)arg)->start)); + } + return err ? -EFAULT : 0; +} + + +static int hdio_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + mm_segment_t old_fs = get_fs(); + unsigned long kval; + unsigned int *uvp; + int error; + + set_fs(KERNEL_DS); + error = sys_ioctl(fd, cmd, (long)&kval); + set_fs(old_fs); + + if (error == 0) { + uvp = (unsigned int *)arg; + if (put_user(kval, uvp)) + error = -EFAULT; + } + return error; +} + +struct floppy_struct32 { + unsigned int size; + unsigned int sect; + unsigned int head; + unsigned int track; + unsigned int stretch; + unsigned char gap; + unsigned char rate; + unsigned char spec1; + unsigned char fmt_gap; + const __kernel_caddr_t32 name; +}; + +struct floppy_drive_params32 { + char cmos; + u32 max_dtr; + u32 hlt; + u32 hut; + u32 srt; + u32 spinup; + u32 spindown; + unsigned char spindown_offset; + unsigned char select_delay; + unsigned char rps; + unsigned char tracks; + u32 timeout; + unsigned char interleave_sect; + struct floppy_max_errors max_errors; + char flags; + char read_track; + short autodetect[8]; + int checkfreq; + int native_format; +}; + +struct floppy_drive_struct32 { + signed char flags; + u32 spinup_date; + u32 select_date; + u32 first_read_date; + short probed_format; + short track; + short maxblock; + short maxtrack; + int generation; + int keep_data; + int fd_ref; + int fd_device; + int last_checked; + __kernel_caddr_t32 dmabuf; + int bufblocks; +}; + +struct floppy_fdc_state32 { + int spec1; + int spec2; + int dtr; + unsigned char version; + unsigned char dor; + u32 address; + unsigned int rawcmd:2; + unsigned int reset:1; + unsigned int need_configure:1; + unsigned int perp_mode:2; + unsigned int has_fifo:1; + unsigned int driver_version; + unsigned char track[4]; +}; + +struct floppy_write_errors32 { + unsigned int write_errors; + u32 first_error_sector; + int first_error_generation; + u32 last_error_sector; + int last_error_generation; + unsigned int badness; +}; + +#define FDSETPRM32 _IOW(2, 0x42, struct floppy_struct32) +#define FDDEFPRM32 _IOW(2, 0x43, struct floppy_struct32) +#define FDGETPRM32 _IOR(2, 0x04, struct floppy_struct32) +#define FDSETDRVPRM32 _IOW(2, 0x90, struct floppy_drive_params32) +#define FDGETDRVPRM32 _IOR(2, 0x11, struct floppy_drive_params32) +#define FDGETDRVSTAT32 _IOR(2, 0x12, struct floppy_drive_struct32) +#define FDPOLLDRVSTAT32 _IOR(2, 0x13, struct floppy_drive_struct32) +#define FDGETFDCSTAT32 _IOR(2, 0x15, struct floppy_fdc_state32) +#define FDWERRORGET32 _IOR(2, 0x17, struct floppy_write_errors32) + +static struct { + unsigned int cmd32; + unsigned int cmd; +} fd_ioctl_trans_table[] = { + { FDSETPRM32, FDSETPRM }, + { FDDEFPRM32, FDDEFPRM }, + { FDGETPRM32, FDGETPRM }, + { FDSETDRVPRM32, FDSETDRVPRM }, + { FDGETDRVPRM32, FDGETDRVPRM }, + { FDGETDRVSTAT32, FDGETDRVSTAT }, + { FDPOLLDRVSTAT32, FDPOLLDRVSTAT }, + { FDGETFDCSTAT32, FDGETFDCSTAT }, + { FDWERRORGET32, FDWERRORGET } +}; + +#define NR_FD_IOCTL_TRANS (sizeof(fd_ioctl_trans_table)/sizeof(fd_ioctl_trans_table[0])) + +static int fd_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + mm_segment_t old_fs = get_fs(); + void *karg = NULL; + unsigned int kcmd = 0; + int i, err; + + for (i = 0; i < NR_FD_IOCTL_TRANS; i++) + if (cmd == fd_ioctl_trans_table[i].cmd32) { + kcmd = fd_ioctl_trans_table[i].cmd; + break; + } + if (!kcmd) + return -EINVAL; + + switch (cmd) { + case FDSETPRM32: + case FDDEFPRM32: + case FDGETPRM32: + { + struct floppy_struct *f; + + f = karg = kmalloc(sizeof(struct floppy_struct), GFP_KERNEL); + if (!karg) + return -ENOMEM; + if (cmd == FDGETPRM32) + break; + err = __get_user(f->size, &((struct floppy_struct32 *)arg)->size); + err |= __get_user(f->sect, &((struct floppy_struct32 *)arg)->sect); + err |= __get_user(f->head, &((struct floppy_struct32 *)arg)->head); + err |= __get_user(f->track, &((struct floppy_struct32 *)arg)->track); + err |= __get_user(f->stretch, &((struct floppy_struct32 *)arg)->stretch); + err |= __get_user(f->gap, &((struct floppy_struct32 *)arg)->gap); + err |= __get_user(f->rate, &((struct floppy_struct32 *)arg)->rate); + err |= __get_user(f->spec1, &((struct floppy_struct32 *)arg)->spec1); + err |= __get_user(f->fmt_gap, &((struct floppy_struct32 *)arg)->fmt_gap); + err |= __get_user((u64)f->name, &((struct floppy_struct32 *)arg)->name); + if (err) { + err = -EFAULT; + goto out; + } + break; + } + case FDSETDRVPRM32: + case FDGETDRVPRM32: + { + struct floppy_drive_params *f; + + f = karg = kmalloc(sizeof(struct floppy_drive_params), GFP_KERNEL); + if (!karg) + return -ENOMEM; + if (cmd == FDGETDRVPRM32) + break; + err = __get_user(f->cmos, &((struct floppy_drive_params32 *)arg)->cmos); + err |= __get_user(f->max_dtr, &((struct floppy_drive_params32 *)arg)->max_dtr); + err |= __get_user(f->hlt, &((struct floppy_drive_params32 *)arg)->hlt); + err |= __get_user(f->hut, &((struct floppy_drive_params32 *)arg)->hut); + err |= __get_user(f->srt, &((struct floppy_drive_params32 *)arg)->srt); + err |= __get_user(f->spinup, &((struct floppy_drive_params32 *)arg)->spinup); + err |= __get_user(f->spindown, &((struct floppy_drive_params32 *)arg)->spindown); + err |= __get_user(f->spindown_offset, &((struct floppy_drive_params32 *)arg)->spindown_offset); + err |= __get_user(f->select_delay, &((struct floppy_drive_params32 *)arg)->select_delay); + err |= __get_user(f->rps, &((struct floppy_drive_params32 *)arg)->rps); + err |= __get_user(f->tracks, &((struct floppy_drive_params32 *)arg)->tracks); + err |= __get_user(f->timeout, &((struct floppy_drive_params32 *)arg)->timeout); + err |= __get_user(f->interleave_sect, &((struct floppy_drive_params32 *)arg)->interleave_sect); + err |= __copy_from_user(&f->max_errors, &((struct floppy_drive_params32 *)arg)->max_errors, sizeof(f->max_errors)); + err |= __get_user(f->flags, &((struct floppy_drive_params32 *)arg)->flags); + err |= __get_user(f->read_track, &((struct floppy_drive_params32 *)arg)->read_track); + err |= __copy_from_user(f->autodetect, ((struct floppy_drive_params32 *)arg)->autodetect, sizeof(f->autodetect)); + err |= __get_user(f->checkfreq, &((struct floppy_drive_params32 *)arg)->checkfreq); + err |= __get_user(f->native_format, &((struct floppy_drive_params32 *)arg)->native_format); + if (err) { + err = -EFAULT; + goto out; + } + break; + } + case FDGETDRVSTAT32: + case FDPOLLDRVSTAT32: + karg = kmalloc(sizeof(struct floppy_drive_struct), GFP_KERNEL); + if (!karg) + return -ENOMEM; + break; + case FDGETFDCSTAT32: + karg = kmalloc(sizeof(struct floppy_fdc_state), GFP_KERNEL); + if (!karg) + return -ENOMEM; + break; + case FDWERRORGET32: + karg = kmalloc(sizeof(struct floppy_write_errors), GFP_KERNEL); + if (!karg) + return -ENOMEM; + break; + default: + return -EINVAL; + } + set_fs (KERNEL_DS); + err = sys_ioctl (fd, kcmd, (unsigned long)karg); + set_fs (old_fs); + if (err) + goto out; + switch (cmd) { + case FDGETPRM32: + { + struct floppy_struct *f = karg; + + err = __put_user(f->size, &((struct floppy_struct32 *)arg)->size); + err |= __put_user(f->sect, &((struct floppy_struct32 *)arg)->sect); + err |= __put_user(f->head, &((struct floppy_struct32 *)arg)->head); + err |= __put_user(f->track, &((struct floppy_struct32 *)arg)->track); + err |= __put_user(f->stretch, &((struct floppy_struct32 *)arg)->stretch); + err |= __put_user(f->gap, &((struct floppy_struct32 *)arg)->gap); + err |= __put_user(f->rate, &((struct floppy_struct32 *)arg)->rate); + err |= __put_user(f->spec1, &((struct floppy_struct32 *)arg)->spec1); + err |= __put_user(f->fmt_gap, &((struct floppy_struct32 *)arg)->fmt_gap); + err |= __put_user((u64)f->name, &((struct floppy_struct32 *)arg)->name); + break; + } + case FDGETDRVPRM32: + { + struct floppy_drive_params *f = karg; + + err = __put_user(f->cmos, &((struct floppy_drive_params32 *)arg)->cmos); + err |= __put_user(f->max_dtr, &((struct floppy_drive_params32 *)arg)->max_dtr); + err |= __put_user(f->hlt, &((struct floppy_drive_params32 *)arg)->hlt); + err |= __put_user(f->hut, &((struct floppy_drive_params32 *)arg)->hut); + err |= __put_user(f->srt, &((struct floppy_drive_params32 *)arg)->srt); + err |= __put_user(f->spinup, &((struct floppy_drive_params32 *)arg)->spinup); + err |= __put_user(f->spindown, &((struct floppy_drive_params32 *)arg)->spindown); + err |= __put_user(f->spindown_offset, &((struct floppy_drive_params32 *)arg)->spindown_offset); + err |= __put_user(f->select_delay, &((struct floppy_drive_params32 *)arg)->select_delay); + err |= __put_user(f->rps, &((struct floppy_drive_params32 *)arg)->rps); + err |= __put_user(f->tracks, &((struct floppy_drive_params32 *)arg)->tracks); + err |= __put_user(f->timeout, &((struct floppy_drive_params32 *)arg)->timeout); + err |= __put_user(f->interleave_sect, &((struct floppy_drive_params32 *)arg)->interleave_sect); + err |= __copy_to_user(&((struct floppy_drive_params32 *)arg)->max_errors, &f->max_errors, sizeof(f->max_errors)); + err |= __put_user(f->flags, &((struct floppy_drive_params32 *)arg)->flags); + err |= __put_user(f->read_track, &((struct floppy_drive_params32 *)arg)->read_track); + err |= __copy_to_user(((struct floppy_drive_params32 *)arg)->autodetect, f->autodetect, sizeof(f->autodetect)); + err |= __put_user(f->checkfreq, &((struct floppy_drive_params32 *)arg)->checkfreq); + err |= __put_user(f->native_format, &((struct floppy_drive_params32 *)arg)->native_format); + break; + } + case FDGETDRVSTAT32: + case FDPOLLDRVSTAT32: + { + struct floppy_drive_struct *f = karg; + + err = __put_user(f->flags, &((struct floppy_drive_struct32 *)arg)->flags); + err |= __put_user(f->spinup_date, &((struct floppy_drive_struct32 *)arg)->spinup_date); + err |= __put_user(f->select_date, &((struct floppy_drive_struct32 *)arg)->select_date); + err |= __put_user(f->first_read_date, &((struct floppy_drive_struct32 *)arg)->first_read_date); + err |= __put_user(f->probed_format, &((struct floppy_drive_struct32 *)arg)->probed_format); + err |= __put_user(f->track, &((struct floppy_drive_struct32 *)arg)->track); + err |= __put_user(f->maxblock, &((struct floppy_drive_struct32 *)arg)->maxblock); + err |= __put_user(f->maxtrack, &((struct floppy_drive_struct32 *)arg)->maxtrack); + err |= __put_user(f->generation, &((struct floppy_drive_struct32 *)arg)->generation); + err |= __put_user(f->keep_data, &((struct floppy_drive_struct32 *)arg)->keep_data); + err |= __put_user(f->fd_ref, &((struct floppy_drive_struct32 *)arg)->fd_ref); + err |= __put_user(f->fd_device, &((struct floppy_drive_struct32 *)arg)->fd_device); + err |= __put_user(f->last_checked, &((struct floppy_drive_struct32 *)arg)->last_checked); + err |= __put_user((u64)f->dmabuf, &((struct floppy_drive_struct32 *)arg)->dmabuf); + err |= __put_user((u64)f->bufblocks, &((struct floppy_drive_struct32 *)arg)->bufblocks); + break; + } + case FDGETFDCSTAT32: + { + struct floppy_fdc_state *f = karg; + + err = __put_user(f->spec1, &((struct floppy_fdc_state32 *)arg)->spec1); + err |= __put_user(f->spec2, &((struct floppy_fdc_state32 *)arg)->spec2); + err |= __put_user(f->dtr, &((struct floppy_fdc_state32 *)arg)->dtr); + err |= __put_user(f->version, &((struct floppy_fdc_state32 *)arg)->version); + err |= __put_user(f->dor, &((struct floppy_fdc_state32 *)arg)->dor); + err |= __put_user(f->address, &((struct floppy_fdc_state32 *)arg)->address); + err |= __copy_to_user((char *)&((struct floppy_fdc_state32 *)arg)->address + + sizeof(((struct floppy_fdc_state32 *)arg)->address), + (char *)&f->address + sizeof(f->address), sizeof(int)); + err |= __put_user(f->driver_version, &((struct floppy_fdc_state32 *)arg)->driver_version); + err |= __copy_to_user(((struct floppy_fdc_state32 *)arg)->track, f->track, sizeof(f->track)); + break; + } + case FDWERRORGET32: + { + struct floppy_write_errors *f = karg; + + err = __put_user(f->write_errors, &((struct floppy_write_errors32 *)arg)->write_errors); + err |= __put_user(f->first_error_sector, &((struct floppy_write_errors32 *)arg)->first_error_sector); + err |= __put_user(f->first_error_generation, &((struct floppy_write_errors32 *)arg)->first_error_generation); + err |= __put_user(f->last_error_sector, &((struct floppy_write_errors32 *)arg)->last_error_sector); + err |= __put_user(f->last_error_generation, &((struct floppy_write_errors32 *)arg)->last_error_generation); + err |= __put_user(f->badness, &((struct floppy_write_errors32 *)arg)->badness); + break; + } + default: + break; + } + if (err) + err = -EFAULT; + +out: if (karg) kfree(karg); + return err; +} + +struct ppp_option_data32 { + __kernel_caddr_t32 ptr; + __u32 length; + int transmit; +}; +#define PPPIOCSCOMPRESS32 _IOW('t', 77, struct ppp_option_data32) + +struct ppp_idle32 { + __kernel_time_t32 xmit_idle; + __kernel_time_t32 recv_idle; +}; +#define PPPIOCGIDLE32 _IOR('t', 63, struct ppp_idle32) + +static int ppp_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + mm_segment_t old_fs = get_fs(); + struct ppp_option_data32 data32; + struct ppp_option_data data; + struct ppp_idle32 idle32; + struct ppp_idle idle; + unsigned int kcmd; + void *karg; + int err = 0; + + switch (cmd) { + case PPPIOCGIDLE32: + kcmd = PPPIOCGIDLE; + karg = &idle; + break; + case PPPIOCSCOMPRESS32: + if (copy_from_user(&data32, (struct ppp_option_data32 *)arg, sizeof(struct ppp_option_data32))) + return -EFAULT; + data.ptr = kmalloc (data32.length, GFP_KERNEL); + if (!data.ptr) + return -ENOMEM; + if (copy_from_user(data.ptr, (__u8 *)A(data32.ptr), data32.length)) { + kfree(data.ptr); + return -EFAULT; + } + data.length = data32.length; + data.transmit = data32.transmit; + kcmd = PPPIOCSCOMPRESS; + karg = &data; + break; + default: + do { + static int count = 0; + if (++count <= 20) + printk("ppp_ioctl: Unknown cmd fd(%d) " + "cmd(%08x) arg(%08x)\n", + (int)fd, (unsigned int)cmd, (unsigned int)arg); + } while (0); + return -EINVAL; + } + set_fs (KERNEL_DS); + err = sys_ioctl (fd, kcmd, (unsigned long)karg); + set_fs (old_fs); + switch (cmd) { + case PPPIOCGIDLE32: + if (err) + return err; + idle32.xmit_idle = idle.xmit_idle; + idle32.recv_idle = idle.recv_idle; + if (copy_to_user((struct ppp_idle32 *)arg, &idle32, sizeof(struct ppp_idle32))) + return -EFAULT; + break; + case PPPIOCSCOMPRESS32: + kfree(data.ptr); + break; + default: + break; + } + return err; +} + + +struct mtget32 { + __u32 mt_type; + __u32 mt_resid; + __u32 mt_dsreg; + __u32 mt_gstat; + __u32 mt_erreg; + __kernel_daddr_t32 mt_fileno; + __kernel_daddr_t32 mt_blkno; +}; +#define MTIOCGET32 _IOR('m', 2, struct mtget32) + +struct mtpos32 { + __u32 mt_blkno; +}; +#define MTIOCPOS32 _IOR('m', 3, struct mtpos32) + +struct mtconfiginfo32 { + __u32 mt_type; + __u32 ifc_type; + __u16 irqnr; + __u16 dmanr; + __u16 port; + __u32 debug; + __u32 have_dens:1; + __u32 have_bsf:1; + __u32 have_fsr:1; + __u32 have_bsr:1; + __u32 have_eod:1; + __u32 have_seek:1; + __u32 have_tell:1; + __u32 have_ras1:1; + __u32 have_ras2:1; + __u32 have_ras3:1; + __u32 have_qfa:1; + __u32 pad1:5; + char reserved[10]; +}; +#define MTIOCGETCONFIG32 _IOR('m', 4, struct mtconfiginfo32) +#define MTIOCSETCONFIG32 _IOW('m', 5, struct mtconfiginfo32) + +static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + mm_segment_t old_fs = get_fs(); + struct mtconfiginfo info; + struct mtget get; + struct mtpos pos; + unsigned long kcmd; + void *karg; + int err = 0; + + switch(cmd) { + case MTIOCPOS32: + kcmd = MTIOCPOS; + karg = &pos; + break; + case MTIOCGET32: + kcmd = MTIOCGET; + karg = &get; + break; + case MTIOCGETCONFIG32: + kcmd = MTIOCGETCONFIG; + karg = &info; + break; + case MTIOCSETCONFIG32: + kcmd = MTIOCSETCONFIG; + karg = &info; + err = __get_user(info.mt_type, &((struct mtconfiginfo32 *)arg)->mt_type); + err |= __get_user(info.ifc_type, &((struct mtconfiginfo32 *)arg)->ifc_type); + err |= __get_user(info.irqnr, &((struct mtconfiginfo32 *)arg)->irqnr); + err |= __get_user(info.dmanr, &((struct mtconfiginfo32 *)arg)->dmanr); + err |= __get_user(info.port, &((struct mtconfiginfo32 *)arg)->port); + err |= __get_user(info.debug, &((struct mtconfiginfo32 *)arg)->debug); + err |= __copy_from_user((char *)&info.debug + sizeof(info.debug), + (char *)&((struct mtconfiginfo32 *)arg)->debug + + sizeof(((struct mtconfiginfo32 *)arg)->debug), sizeof(__u32)); + if (err) + return -EFAULT; + break; + default: + do { + static int count = 0; + if (++count <= 20) + printk("mt_ioctl: Unknown cmd fd(%d) " + "cmd(%08x) arg(%08x)\n", + (int)fd, (unsigned int)cmd, (unsigned int)arg); + } while (0); + return -EINVAL; + } + set_fs (KERNEL_DS); + err = sys_ioctl (fd, kcmd, (unsigned long)karg); + set_fs (old_fs); + if (err) + return err; + switch (cmd) { + case MTIOCPOS32: + err = __put_user(pos.mt_blkno, &((struct mtpos32 *)arg)->mt_blkno); + break; + case MTIOCGET32: + err = __put_user(get.mt_type, &((struct mtget32 *)arg)->mt_type); + err |= __put_user(get.mt_resid, &((struct mtget32 *)arg)->mt_resid); + err |= __put_user(get.mt_dsreg, &((struct mtget32 *)arg)->mt_dsreg); + err |= __put_user(get.mt_gstat, &((struct mtget32 *)arg)->mt_gstat); + err |= __put_user(get.mt_erreg, &((struct mtget32 *)arg)->mt_erreg); + err |= __put_user(get.mt_fileno, &((struct mtget32 *)arg)->mt_fileno); + err |= __put_user(get.mt_blkno, &((struct mtget32 *)arg)->mt_blkno); + break; + case MTIOCGETCONFIG32: + err = __put_user(info.mt_type, &((struct mtconfiginfo32 *)arg)->mt_type); + err |= __put_user(info.ifc_type, &((struct mtconfiginfo32 *)arg)->ifc_type); + err |= __put_user(info.irqnr, &((struct mtconfiginfo32 *)arg)->irqnr); + err |= __put_user(info.dmanr, &((struct mtconfiginfo32 *)arg)->dmanr); + err |= __put_user(info.port, &((struct mtconfiginfo32 *)arg)->port); + err |= __put_user(info.debug, &((struct mtconfiginfo32 *)arg)->debug); + err |= __copy_to_user((char *)&((struct mtconfiginfo32 *)arg)->debug + + sizeof(((struct mtconfiginfo32 *)arg)->debug), + (char *)&info.debug + sizeof(info.debug), sizeof(__u32)); + break; + case MTIOCSETCONFIG32: + break; + } + return err ? -EFAULT: 0; +} + +struct cdrom_read32 { + int cdread_lba; + __kernel_caddr_t32 cdread_bufaddr; + int cdread_buflen; +}; + +struct cdrom_read_audio32 { + union cdrom_addr addr; + u_char addr_format; + int nframes; + __kernel_caddr_t32 buf; +}; + +struct cdrom_generic_command32 { + unsigned char cmd[CDROM_PACKET_SIZE]; + __kernel_caddr_t32 buffer; + unsigned int buflen; + int stat; + __kernel_caddr_t32 sense; + __kernel_caddr_t32 reserved[3]; +}; + +static int cdrom_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + mm_segment_t old_fs = get_fs(); + struct cdrom_read cdread; + struct cdrom_read_audio cdreadaudio; + struct cdrom_generic_command cgc; + __kernel_caddr_t32 addr; + char *data = 0; + void *karg; + int err = 0; + + switch(cmd) { + case CDROMREADMODE2: + case CDROMREADMODE1: + case CDROMREADRAW: + case CDROMREADCOOKED: + karg = &cdread; + err = __get_user(cdread.cdread_lba, &((struct cdrom_read32 *)arg)->cdread_lba); + err |= __get_user(addr, &((struct cdrom_read32 *)arg)->cdread_bufaddr); + err |= __get_user(cdread.cdread_buflen, &((struct cdrom_read32 *)arg)->cdread_buflen); + if (err) + return -EFAULT; + data = kmalloc(cdread.cdread_buflen, GFP_KERNEL); + if (!data) + return -ENOMEM; + cdread.cdread_bufaddr = data; + break; + case CDROMREADAUDIO: + karg = &cdreadaudio; + err = copy_from_user(&cdreadaudio.addr, &((struct cdrom_read_audio32 *)arg)->addr, sizeof(cdreadaudio.addr)); + err |= __get_user(cdreadaudio.addr_format, &((struct cdrom_read_audio32 *)arg)->addr_format); + err |= __get_user(cdreadaudio.nframes, &((struct cdrom_read_audio32 *)arg)->nframes); + err |= __get_user(addr, &((struct cdrom_read_audio32 *)arg)->buf); + if (err) + return -EFAULT; + data = kmalloc(cdreadaudio.nframes * 2352, GFP_KERNEL); + if (!data) + return -ENOMEM; + cdreadaudio.buf = data; + break; + case CDROM_SEND_PACKET: + karg = &cgc; + err = copy_from_user(cgc.cmd, &((struct cdrom_generic_command32 *)arg)->cmd, sizeof(cgc.cmd)); + err |= __get_user(addr, &((struct cdrom_generic_command32 *)arg)->buffer); + err |= __get_user(cgc.buflen, &((struct cdrom_generic_command32 *)arg)->buflen); + if (err) + return -EFAULT; + if ((data = kmalloc(cgc.buflen, GFP_KERNEL)) == NULL) + return -ENOMEM; + cgc.buffer = data; + break; + default: + do { + static int count = 0; + if (++count <= 20) + printk("cdrom_ioctl: Unknown cmd fd(%d) " + "cmd(%08x) arg(%08x)\n", + (int)fd, (unsigned int)cmd, (unsigned int)arg); + } while (0); + return -EINVAL; + } + set_fs (KERNEL_DS); + err = sys_ioctl (fd, cmd, (unsigned long)karg); + set_fs (old_fs); + if (err) + goto out; + switch (cmd) { + case CDROMREADMODE2: + case CDROMREADMODE1: + case CDROMREADRAW: + case CDROMREADCOOKED: + err = copy_to_user((char *)A(addr), data, cdread.cdread_buflen); + break; + case CDROMREADAUDIO: + err = copy_to_user((char *)A(addr), data, cdreadaudio.nframes * 2352); + break; + case CDROM_SEND_PACKET: + err = copy_to_user((char *)A(addr), data, cgc.buflen); + break; + default: + break; + } +out: if (data) + kfree(data); + return err ? -EFAULT : 0; +} + +struct loop_info32 { + int lo_number; /* ioctl r/o */ + __kernel_dev_t32 lo_device; /* ioctl r/o */ + unsigned int lo_inode; /* ioctl r/o */ + __kernel_dev_t32 lo_rdevice; /* ioctl r/o */ + int lo_offset; + int lo_encrypt_type; + int lo_encrypt_key_size; /* ioctl w/o */ + int lo_flags; /* ioctl r/o */ + char lo_name[LO_NAME_SIZE]; + unsigned char lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */ + unsigned int lo_init[2]; + char reserved[4]; +}; + +static int loop_status(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + mm_segment_t old_fs = get_fs(); + struct loop_info l; + int err = -EINVAL; + + switch(cmd) { + case LOOP_SET_STATUS: + err = get_user(l.lo_number, &((struct loop_info32 *)arg)->lo_number); + err |= __get_user(l.lo_device, &((struct loop_info32 *)arg)->lo_device); + err |= __get_user(l.lo_inode, &((struct loop_info32 *)arg)->lo_inode); + err |= __get_user(l.lo_rdevice, &((struct loop_info32 *)arg)->lo_rdevice); + err |= __copy_from_user((char *)&l.lo_offset, (char *)&((struct loop_info32 *)arg)->lo_offset, + 8 + (unsigned long)l.lo_init - (unsigned long)&l.lo_offset); + if (err) { + err = -EFAULT; + } else { + set_fs (KERNEL_DS); + err = sys_ioctl (fd, cmd, (unsigned long)&l); + set_fs (old_fs); + } + break; + case LOOP_GET_STATUS: + set_fs (KERNEL_DS); + err = sys_ioctl (fd, cmd, (unsigned long)&l); + set_fs (old_fs); + if (!err) { + err = put_user(l.lo_number, &((struct loop_info32 *)arg)->lo_number); + err |= __put_user(l.lo_device, &((struct loop_info32 *)arg)->lo_device); + err |= __put_user(l.lo_inode, &((struct loop_info32 *)arg)->lo_inode); + err |= __put_user(l.lo_rdevice, &((struct loop_info32 *)arg)->lo_rdevice); + err |= __copy_to_user((char *)&((struct loop_info32 *)arg)->lo_offset, + (char *)&l.lo_offset, (unsigned long)l.lo_init - (unsigned long)&l.lo_offset); + if (err) + err = -EFAULT; + } + break; + default: { + static int count = 0; + if (++count <= 20) + printk("%s: Unknown loop ioctl cmd, fd(%d) " + "cmd(%08x) arg(%08lx)\n", + __FUNCTION__, fd, cmd, arg); + } + } + return err; +} + +extern int tty_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg); + +#ifdef CONFIG_VT +static int vt_check(struct file *file) +{ + struct tty_struct *tty; + struct inode *inode = file->f_dentry->d_inode; + + if (file->f_op->ioctl != tty_ioctl) + return -EINVAL; + + tty = (struct tty_struct *)file->private_data; + if (tty_paranoia_check(tty, inode->i_rdev, "tty_ioctl")) + return -EINVAL; + + if (tty->driver.ioctl != vt_ioctl) + return -EINVAL; + + /* + * To have permissions to do most of the vt ioctls, we either have + * to be the owner of the tty, or super-user. + */ + if (current->tty == tty || suser()) + return 1; + return 0; +} + +struct consolefontdesc32 { + unsigned short charcount; /* characters in font (256 or 512) */ + unsigned short charheight; /* scan lines per character (1-32) */ + u32 chardata; /* font data in expanded form */ +}; + +static int do_fontx_ioctl(unsigned int fd, int cmd, struct consolefontdesc32 *user_cfd, struct file *file) +{ + struct consolefontdesc cfdarg; + struct console_font_op op; + int i, perm; + + perm = vt_check(file); + if (perm < 0) return perm; + + if (copy_from_user(&cfdarg, user_cfd, sizeof(struct consolefontdesc32))) + return -EFAULT; + + cfdarg.chardata = (unsigned char *)A(((struct consolefontdesc32 *)&cfdarg)->chardata); + + switch (cmd) { + case PIO_FONTX: + if (!perm) + return -EPERM; + op.op = KD_FONT_OP_SET; + op.flags = 0; + op.width = 8; + op.height = cfdarg.charheight; + op.charcount = cfdarg.charcount; + op.data = cfdarg.chardata; + return con_font_op(fg_console, &op); + case GIO_FONTX: + if (!cfdarg.chardata) + return 0; + op.op = KD_FONT_OP_GET; + op.flags = 0; + op.width = 8; + op.height = cfdarg.charheight; + op.charcount = cfdarg.charcount; + op.data = cfdarg.chardata; + i = con_font_op(fg_console, &op); + if (i) + return i; + cfdarg.charheight = op.height; + cfdarg.charcount = op.charcount; + ((struct consolefontdesc32 *)&cfdarg)->chardata = (unsigned long)cfdarg.chardata; + if (copy_to_user(user_cfd, &cfdarg, sizeof(struct consolefontdesc32))) + return -EFAULT; + return 0; + } + return -EINVAL; +} + +struct console_font_op32 { + unsigned int op; /* operation code KD_FONT_OP_* */ + unsigned int flags; /* KD_FONT_FLAG_* */ + unsigned int width, height; /* font size */ + unsigned int charcount; + u32 data; /* font data with height fixed to 32 */ +}; + +static int do_kdfontop_ioctl(unsigned int fd, unsigned int cmd, struct console_font_op32 *fontop, struct file *file) +{ + struct console_font_op op; + int perm = vt_check(file), i; + struct vt_struct *vt; + + if (perm < 0) return perm; + + if (copy_from_user(&op, (void *) fontop, sizeof(struct console_font_op32))) + return -EFAULT; + if (!perm && op.op != KD_FONT_OP_GET) + return -EPERM; + op.data = (unsigned char *)A(((struct console_font_op32 *)&op)->data); + op.flags |= KD_FONT_FLAG_OLD; + vt = (struct vt_struct *)((struct tty_struct *)file->private_data)->driver_data; + i = con_font_op(vt->vc_num, &op); + if (i) return i; + ((struct console_font_op32 *)&op)->data = (unsigned long)op.data; + if (copy_to_user((void *) fontop, &op, sizeof(struct console_font_op32))) + return -EFAULT; + return 0; +} + +struct fb_fix_screeninfo32 { + char id[16]; /* identification string eg "TT Builtin" */ + unsigned int smem_start; /* Start of frame buffer mem */ + /* (physical address) */ + __u32 smem_len; /* Length of frame buffer mem */ + __u32 type; /* see FB_TYPE_* */ + __u32 type_aux; /* Interleave for interleaved Planes */ + __u32 visual; /* see FB_VISUAL_* */ + __u16 xpanstep; /* zero if no hardware panning */ + __u16 ypanstep; /* zero if no hardware panning */ + __u16 ywrapstep; /* zero if no hardware ywrap */ + __u32 line_length; /* length of a line in bytes */ + unsigned int mmio_start; /* Start of Memory Mapped I/O */ + /* (physical address) */ + __u32 mmio_len; /* Length of Memory Mapped I/O */ + __u32 accel; /* Type of acceleration available */ + __u16 reserved[3]; /* Reserved for future compatibility */ +}; + +static int do_fbioget_fscreeninfo_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + mm_segment_t old_fs = get_fs(); + struct fb_fix_screeninfo fix; + int err; + + set_fs(KERNEL_DS); + err = sys_ioctl(fd, cmd, (long)&fix); + set_fs(old_fs); + + if (err == 0) { + unsigned int smem_start = fix.smem_start; /* lose top 32 bits */ + unsigned int mmio_start = fix.mmio_start; /* lose top 32 bits */ + int i; + + err = put_user(fix.id[0], &((struct fb_fix_screeninfo32 *)arg)->id[0]); + for (i=1; i<16; i++) { + err |= __put_user(fix.id[i], &((struct fb_fix_screeninfo32 *)arg)->id[i]); + } + err |= __put_user(smem_start, &((struct fb_fix_screeninfo32 *)arg)->smem_start); + err |= __put_user(fix.smem_len, &((struct fb_fix_screeninfo32 *)arg)->smem_len); + err |= __put_user(fix.type, &((struct fb_fix_screeninfo32 *)arg)->type); + err |= __put_user(fix.type_aux, &((struct fb_fix_screeninfo32 *)arg)->type_aux); + err |= __put_user(fix.visual, &((struct fb_fix_screeninfo32 *)arg)->visual); + err |= __put_user(fix.xpanstep, &((struct fb_fix_screeninfo32 *)arg)->xpanstep); + err |= __put_user(fix.ypanstep, &((struct fb_fix_screeninfo32 *)arg)->ypanstep); + err |= __put_user(fix.ywrapstep, &((struct fb_fix_screeninfo32 *)arg)->ywrapstep); + err |= __put_user(fix.line_length, &((struct fb_fix_screeninfo32 *)arg)->line_length); + err |= __put_user(mmio_start, &((struct fb_fix_screeninfo32 *)arg)->mmio_start); + err |= __put_user(fix.mmio_len, &((struct fb_fix_screeninfo32 *)arg)->mmio_len); + err |= __put_user(fix.accel, &((struct fb_fix_screeninfo32 *)arg)->accel); + err |= __put_user(fix.reserved[0], &((struct fb_fix_screeninfo32 *)arg)->reserved[0]); + err |= __put_user(fix.reserved[1], &((struct fb_fix_screeninfo32 *)arg)->reserved[1]); + err |= __put_user(fix.reserved[2], &((struct fb_fix_screeninfo32 *)arg)->reserved[2]); + if (err) + err = -EFAULT; + } + return err; +} + +struct fb_cmap32 { + __u32 start; /* First entry */ + __u32 len; /* Number of entries */ + __u32 redptr; /* Red values */ + __u32 greenptr; + __u32 blueptr; + __u32 transpptr; /* transparency, can be NULL */ +}; + +static int do_fbiogetcmap_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + mm_segment_t old_fs = get_fs(); + struct fb_cmap cmap; + int err; + + set_fs(KERNEL_DS); + err = sys_ioctl(fd, cmd, (long)&cmap); + set_fs(old_fs); + + if (err == 0) { + __u32 redptr = (__u32)(__u64)cmap.red; + __u32 greenptr = (__u32)(__u64)cmap.green; + __u32 blueptr = (__u32)(__u64)cmap.blue; + __u32 transpptr = (__u32)(__u64)cmap.transp; + + err = put_user(cmap.start, &((struct fb_cmap32 *)arg)->start); + err |= __put_user(cmap.len, &((struct fb_cmap32 *)arg)->len); + err |= __put_user(redptr, &((struct fb_cmap32 *)arg)->redptr); + err |= __put_user(greenptr, &((struct fb_cmap32 *)arg)->greenptr); + err |= __put_user(blueptr, &((struct fb_cmap32 *)arg)->blueptr); + err |= __put_user(transpptr, &((struct fb_cmap32 *)arg)->transpptr); + if (err) + err = -EFAULT; + } + return err; +} + +static int do_fbioputcmap_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + mm_segment_t old_fs = get_fs(); + struct fb_cmap cmap; + __u32 redptr, greenptr, blueptr, transpptr; + int err; + + err = get_user(cmap.start, &((struct fb_cmap32 *)arg)->start); + err |= __get_user(cmap.len, &((struct fb_cmap32 *)arg)->len); + err |= __get_user(redptr, &((struct fb_cmap32 *)arg)->redptr); + err |= __get_user(greenptr, &((struct fb_cmap32 *)arg)->greenptr); + err |= __get_user(blueptr, &((struct fb_cmap32 *)arg)->blueptr); + err |= __get_user(transpptr, &((struct fb_cmap32 *)arg)->transpptr); + + if (err) { + err = -EFAULT; + } else { + cmap.red = (__u16 *)(__u64)redptr; + cmap.green = (__u16 *)(__u64)greenptr; + cmap.blue = (__u16 *)(__u64)blueptr; + cmap.transp = (__u16 *)(__u64)transpptr; + set_fs (KERNEL_DS); + err = sys_ioctl (fd, cmd, (unsigned long)&cmap); + set_fs (old_fs); + } + return err; +} + +struct unimapdesc32 { + unsigned short entry_ct; + u32 entries; +}; + +static int do_unimap_ioctl(unsigned int fd, unsigned int cmd, struct unimapdesc32 *user_ud, struct file *file) +{ + struct unimapdesc32 tmp; + int perm = vt_check(file); + + if (perm < 0) return perm; + if (copy_from_user(&tmp, user_ud, sizeof tmp)) + return -EFAULT; + switch (cmd) { + case PIO_UNIMAP: + if (!perm) return -EPERM; + return con_set_unimap(fg_console, tmp.entry_ct, (struct unipair *)A(tmp.entries)); + case GIO_UNIMAP: + return con_get_unimap(fg_console, tmp.entry_ct, &(user_ud->entry_ct), (struct unipair *)A(tmp.entries)); + } + return 0; +} +#endif /* CONFIG_VT */ +static int do_smb_getmountuid(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + mm_segment_t old_fs = get_fs(); + __kernel_uid_t kuid; + int err; + + cmd = SMB_IOC_GETMOUNTUID; + + set_fs(KERNEL_DS); + err = sys_ioctl(fd, cmd, (unsigned long)&kuid); + set_fs(old_fs); + + if (err >= 0) + err = put_user(kuid, (__kernel_uid_t32 *)arg); + + return err; +} + +struct atmif_sioc32 { + int number; + int length; + __kernel_caddr_t32 arg; +}; + +struct atm_iobuf32 { + int length; + __kernel_caddr_t32 buffer; +}; + +#define ATM_GETLINKRATE32 _IOW('a', ATMIOC_ITF+1, struct atmif_sioc32) +#define ATM_GETNAMES32 _IOW('a', ATMIOC_ITF+3, struct atm_iobuf32) +#define ATM_GETTYPE32 _IOW('a', ATMIOC_ITF+4, struct atmif_sioc32) +#define ATM_GETESI32 _IOW('a', ATMIOC_ITF+5, struct atmif_sioc32) +#define ATM_GETADDR32 _IOW('a', ATMIOC_ITF+6, struct atmif_sioc32) +#define ATM_RSTADDR32 _IOW('a', ATMIOC_ITF+7, struct atmif_sioc32) +#define ATM_ADDADDR32 _IOW('a', ATMIOC_ITF+8, struct atmif_sioc32) +#define ATM_DELADDR32 _IOW('a', ATMIOC_ITF+9, struct atmif_sioc32) +#define ATM_GETCIRANGE32 _IOW('a', ATMIOC_ITF+10, struct atmif_sioc32) +#define ATM_SETCIRANGE32 _IOW('a', ATMIOC_ITF+11, struct atmif_sioc32) +#define ATM_SETESI32 _IOW('a', ATMIOC_ITF+12, struct atmif_sioc32) +#define ATM_SETESIF32 _IOW('a', ATMIOC_ITF+13, struct atmif_sioc32) +#define ATM_GETSTAT32 _IOW('a', ATMIOC_SARCOM+0, struct atmif_sioc32) +#define ATM_GETSTATZ32 _IOW('a', ATMIOC_SARCOM+1, struct atmif_sioc32) +#define ATM_GETLOOP32 _IOW('a', ATMIOC_SARCOM+2, struct atmif_sioc32) +#define ATM_SETLOOP32 _IOW('a', ATMIOC_SARCOM+3, struct atmif_sioc32) +#define ATM_QUERYLOOP32 _IOW('a', ATMIOC_SARCOM+4, struct atmif_sioc32) + +static struct { + unsigned int cmd32; + unsigned int cmd; +} atm_ioctl_map[] = { + { ATM_GETLINKRATE32, ATM_GETLINKRATE }, + { ATM_GETNAMES32, ATM_GETNAMES }, + { ATM_GETTYPE32, ATM_GETTYPE }, + { ATM_GETESI32, ATM_GETESI }, + { ATM_GETADDR32, ATM_GETADDR }, + { ATM_RSTADDR32, ATM_RSTADDR }, + { ATM_ADDADDR32, ATM_ADDADDR }, + { ATM_DELADDR32, ATM_DELADDR }, + { ATM_GETCIRANGE32, ATM_GETCIRANGE }, + { ATM_SETCIRANGE32, ATM_SETCIRANGE }, + { ATM_SETESI32, ATM_SETESI }, + { ATM_SETESIF32, ATM_SETESIF }, + { ATM_GETSTAT32, ATM_GETSTAT }, + { ATM_GETSTATZ32, ATM_GETSTATZ }, + { ATM_GETLOOP32, ATM_GETLOOP }, + { ATM_SETLOOP32, ATM_SETLOOP }, + { ATM_QUERYLOOP32, ATM_QUERYLOOP } +}; + +#define NR_ATM_IOCTL (sizeof(atm_ioctl_map)/sizeof(atm_ioctl_map[0])) + + +static int do_atm_iobuf(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + struct atm_iobuf32 iobuf32; + struct atm_iobuf iobuf = { 0, NULL }; + mm_segment_t old_fs; + int err; + + err = copy_from_user(&iobuf32, (struct atm_iobuf32*)arg, + sizeof(struct atm_iobuf32)); + if (err) + return -EFAULT; + + iobuf.length = iobuf32.length; + + if (iobuf32.buffer == (__kernel_caddr_t32) NULL || iobuf32.length == 0) { + iobuf.buffer = (void*)(unsigned long)iobuf32.buffer; + } else { + iobuf.buffer = kmalloc(iobuf.length, GFP_KERNEL); + if (iobuf.buffer == NULL) { + err = -ENOMEM; + goto out; + } + + err = copy_from_user(iobuf.buffer, (void *)A(iobuf32.buffer), iobuf.length); + if (err) { + err = -EFAULT; + goto out; + } + } + + old_fs = get_fs(); set_fs (KERNEL_DS); + err = sys_ioctl (fd, cmd, (unsigned long)&iobuf); + set_fs (old_fs); + if (err) + goto out; + + if (iobuf.buffer && iobuf.length > 0) { + err = copy_to_user((void *)A(iobuf32.buffer), iobuf.buffer, iobuf.length); + if (err) { + err = -EFAULT; + goto out; + } + } + err = __put_user(iobuf.length, &(((struct atm_iobuf32*)arg)->length)); + + out: + if (iobuf32.buffer && iobuf32.length > 0) + kfree(iobuf.buffer); + + return err; +} + + +static int do_atmif_sioc(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + struct atmif_sioc32 sioc32; + struct atmif_sioc sioc = { 0, 0, NULL }; + mm_segment_t old_fs; + int err; + + err = copy_from_user(&sioc32, (struct atmif_sioc32*)arg, + sizeof(struct atmif_sioc32)); + if (err) + return -EFAULT; + + sioc.number = sioc32.number; + sioc.length = sioc32.length; + + if (sioc32.arg == (__kernel_caddr_t32) NULL || sioc32.length == 0) { + sioc.arg = (void*)(unsigned long)sioc32.arg; + } else { + sioc.arg = kmalloc(sioc.length, GFP_KERNEL); + if (sioc.arg == NULL) { + err = -ENOMEM; + goto out; + } + + err = copy_from_user(sioc.arg, (void *)A(sioc32.arg), sioc32.length); + if (err) { + err = -EFAULT; + goto out; + } + } + + old_fs = get_fs(); set_fs (KERNEL_DS); + err = sys_ioctl (fd, cmd, (unsigned long)&sioc); + set_fs (old_fs); + if (err) { + goto out; + } + + if (sioc.arg && sioc.length > 0) { + err = copy_to_user((void *)A(sioc32.arg), sioc.arg, sioc.length); + if (err) { + err = -EFAULT; + goto out; + } + } + err = __put_user(sioc.length, &(((struct atmif_sioc32*)arg)->length)); + + out: + if (sioc32.arg && sioc32.length > 0) + kfree(sioc.arg); + + return err; +} + + +static int do_atm_ioctl(unsigned int fd, unsigned int cmd32, unsigned long arg) +{ + int i; + unsigned int cmd = 0; + + switch (cmd32) { + case SONET_GETSTAT: + case SONET_GETSTATZ: + case SONET_GETDIAG: + case SONET_SETDIAG: + case SONET_CLRDIAG: + case SONET_SETFRAMING: + case SONET_GETFRAMING: + case SONET_GETFRSENSE: + return do_atmif_sioc(fd, cmd32, arg); + } + + for (i = 0; i < NR_ATM_IOCTL; i++) { + if (cmd32 == atm_ioctl_map[i].cmd32) { + cmd = atm_ioctl_map[i].cmd; + break; + } + } + if (i == NR_ATM_IOCTL) { + return -EINVAL; + } + + switch (cmd) { + case ATM_GETNAMES: + return do_atm_iobuf(fd, cmd, arg); + + case ATM_GETLINKRATE: + case ATM_GETTYPE: + case ATM_GETESI: + case ATM_GETADDR: + case ATM_RSTADDR: + case ATM_ADDADDR: + case ATM_DELADDR: + case ATM_GETCIRANGE: + case ATM_SETCIRANGE: + case ATM_SETESI: + case ATM_SETESIF: + case ATM_GETSTAT: + case ATM_GETSTATZ: + case ATM_GETLOOP: + case ATM_SETLOOP: + case ATM_QUERYLOOP: + return do_atmif_sioc(fd, cmd, arg); + } + + return -EINVAL; +} + +#if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE) +/* Ugh, LVM. Pitty it was not cleaned up before accepted :((. */ +typedef struct { + uint8_t vg_name[NAME_LEN]; + uint32_t vg_number; + uint32_t vg_access; + uint32_t vg_status; + uint32_t lv_max; + uint32_t lv_cur; + uint32_t lv_open; + uint32_t pv_max; + uint32_t pv_cur; + uint32_t pv_act; + uint32_t dummy; + uint32_t vgda; + uint32_t pe_size; + uint32_t pe_total; + uint32_t pe_allocated; + uint32_t pvg_total; + u32 proc; + u32 pv[ABS_MAX_PV + 1]; + u32 lv[ABS_MAX_LV + 1]; + uint8_t vg_uuid[UUID_LEN+1]; /* volume group UUID */ +} vg32_t; + +typedef struct { + uint8_t id[2]; + uint16_t version; + lvm_disk_data_t pv_on_disk; + lvm_disk_data_t vg_on_disk; + lvm_disk_data_t pv_namelist_on_disk; + lvm_disk_data_t lv_on_disk; + lvm_disk_data_t pe_on_disk; + uint8_t pv_name[NAME_LEN]; + uint8_t vg_name[NAME_LEN]; + uint8_t system_id[NAME_LEN]; + kdev_t pv_dev; + uint32_t pv_number; + uint32_t pv_status; + uint32_t pv_allocatable; + uint32_t pv_size; + uint32_t lv_cur; + uint32_t pe_size; + uint32_t pe_total; + uint32_t pe_allocated; + uint32_t pe_stale; + u32 pe; + u32 inode; + uint8_t pv_uuid[UUID_LEN+1]; +} pv32_t; + +typedef struct { + char lv_name[NAME_LEN]; + u32 lv; +} lv_req32_t; + +typedef struct { + u32 lv_index; + u32 lv; + /* Transfer size because user space and kernel space differ */ + uint16_t size; +} lv_status_byindex_req32_t; + +typedef struct { + dev_t dev; + u32 lv; +} lv_status_bydev_req32_t; + +typedef struct { + uint8_t lv_name[NAME_LEN]; + kdev_t old_dev; + kdev_t new_dev; + u32 old_pe; + u32 new_pe; +} le_remap_req32_t; + +typedef struct { + char pv_name[NAME_LEN]; + u32 pv; +} pv_status_req32_t; + +typedef struct { + uint8_t lv_name[NAME_LEN]; + uint8_t vg_name[NAME_LEN]; + uint32_t lv_access; + uint32_t lv_status; + uint32_t lv_open; + kdev_t lv_dev; + uint32_t lv_number; + uint32_t lv_mirror_copies; + uint32_t lv_recovery; + uint32_t lv_schedule; + uint32_t lv_size; + u32 lv_current_pe; + uint32_t lv_current_le; + uint32_t lv_allocated_le; + uint32_t lv_stripes; + uint32_t lv_stripesize; + uint32_t lv_badblock; + uint32_t lv_allocation; + uint32_t lv_io_timeout; + uint32_t lv_read_ahead; + /* delta to version 1 starts here */ + u32 lv_snapshot_org; + u32 lv_snapshot_prev; + u32 lv_snapshot_next; + u32 lv_block_exception; + uint32_t lv_remap_ptr; + uint32_t lv_remap_end; + uint32_t lv_chunk_size; + uint32_t lv_snapshot_minor; + char dummy[200]; +} lv32_t; + +typedef struct { + u32 hash[2]; + u32 rsector_org; + kdev_t rdev_org; + u32 rsector_new; + kdev_t rdev_new; +} lv_block_exception32_t; + +static void put_lv_t(lv_t *l) +{ + if (l->lv_current_pe) vfree(l->lv_current_pe); + if (l->lv_block_exception) vfree(l->lv_block_exception); + kfree(l); +} + +static lv_t *get_lv_t(u32 p, int *errp) +{ + int err, i; + u32 ptr1, ptr2; + size_t size; + lv_block_exception32_t *lbe32; + lv_block_exception_t *lbe; + lv32_t *ul = (lv32_t *)A(p); + lv_t *l = (lv_t *)kmalloc(sizeof(lv_t), GFP_KERNEL); + if (!l) { + *errp = -ENOMEM; + return NULL; + } + memset(l, 0, sizeof(lv_t)); + err = copy_from_user(l, ul, (long)&((lv32_t *)0)->lv_current_pe); + err |= __copy_from_user(&l->lv_current_le, &ul->lv_current_le, + ((long)&ul->lv_snapshot_org) - ((long)&ul->lv_current_le)); + err |= __copy_from_user(&l->lv_remap_ptr, &ul->lv_remap_ptr, + ((long)&ul->dummy[0]) - ((long)&ul->lv_remap_ptr)); + err |= __get_user(ptr1, &ul->lv_current_pe); + err |= __get_user(ptr2, &ul->lv_block_exception); + if (err) { + kfree(l); + *errp = -EFAULT; + return NULL; + } + if (ptr1) { + size = l->lv_allocated_le * sizeof(pe_t); + l->lv_current_pe = vmalloc(size); + if (l->lv_current_pe) + err = copy_from_user(l->lv_current_pe, (void *)A(ptr1), size); + } + if (!err && ptr2) { + size = l->lv_remap_end * sizeof(lv_block_exception_t); + l->lv_block_exception = lbe = vmalloc(size); + if (l->lv_block_exception) { + lbe32 = (lv_block_exception32_t *)A(ptr2); + memset(lbe, 0, size); + for (i = 0; i < l->lv_remap_end; i++, lbe++, lbe32++) { + err |= get_user(lbe->rsector_org, &lbe32->rsector_org); + err |= __get_user(lbe->rdev_org, &lbe32->rdev_org); + err |= __get_user(lbe->rsector_new, &lbe32->rsector_new); + err |= __get_user(lbe->rdev_new, &lbe32->rdev_new); + + } + } + } + if (err || (ptr1 && !l->lv_current_pe) || (ptr2 && !l->lv_block_exception)) { + if (!err) + *errp = -ENOMEM; + else + *errp = -EFAULT; + put_lv_t(l); + return NULL; + } + return l; +} + +static int copy_lv_t(u32 ptr, lv_t *l) +{ + int err; + lv32_t *ul = (lv32_t *)A(ptr); + u32 ptr1; + size_t size; + + err = get_user(ptr1, &ul->lv_current_pe); + if (err) + return -EFAULT; + err = copy_to_user(ul, l, (long)&((lv32_t *)0)->lv_current_pe); + err |= __copy_to_user(&ul->lv_current_le, &l->lv_current_le, + ((long)&ul->lv_snapshot_org) - ((long)&ul->lv_current_le)); + err |= __copy_to_user(&ul->lv_remap_ptr, &l->lv_remap_ptr, + ((long)&ul->dummy[0]) - ((long)&ul->lv_remap_ptr)); + size = l->lv_allocated_le * sizeof(pe_t); + if (ptr1) + err |= __copy_to_user((void *)A(ptr1), l->lv_current_pe, size); + return err ? -EFAULT : 0; +} + +static int do_lvm_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + vg_t *v; + union { + lv_req_t lv_req; + le_remap_req_t le_remap; + lv_status_byindex_req_t lv_byindex; + lv_status_bydev_req_t lv_bydev; + pv_status_req_t pv_status; + } u; + pv_t p; + int err; + u32 ptr = 0; + int i; + mm_segment_t old_fs; + void *karg = &u; + + switch (cmd) { + case VG_STATUS: + v = kmalloc(sizeof(vg_t), GFP_KERNEL); + if (!v) return -ENOMEM; + karg = v; + break; + case VG_CREATE: + v = kmalloc(sizeof(vg_t), GFP_KERNEL); + if (!v) return -ENOMEM; + if (copy_from_user(v, (void *)arg, (long)&((vg32_t *)0)->proc) || + __get_user(v->proc, &((vg32_t *)arg)->proc)) { + kfree(v); + return -EFAULT; + } + if (copy_from_user(v->vg_uuid, ((vg32_t *)arg)->vg_uuid, UUID_LEN+1)) { + kfree(v); + return -EFAULT; + } + + karg = v; + memset(v->pv, 0, sizeof(v->pv) + sizeof(v->lv)); + if (v->pv_max > ABS_MAX_PV || v->lv_max > ABS_MAX_LV) + return -EPERM; + for (i = 0; i < v->pv_max; i++) { + err = __get_user(ptr, &((vg32_t *)arg)->pv[i]); + if (err) break; + if (ptr) { + v->pv[i] = kmalloc(sizeof(pv_t), GFP_KERNEL); + if (!v->pv[i]) { + err = -ENOMEM; + break; + } + err = copy_from_user(v->pv[i], (void *)A(ptr), sizeof(pv32_t) - 8 - UUID_LEN+1); + if (err) { + err = -EFAULT; + break; + } + err = copy_from_user(v->pv[i]->pv_uuid, ((pv32_t *)A(ptr))->pv_uuid, UUID_LEN+1); + if (err) { + err = -EFAULT; + break; + } + + + v->pv[i]->pe = NULL; v->pv[i]->inode = NULL; + } + } + if (!err) { + for (i = 0; i < v->lv_max; i++) { + err = __get_user(ptr, &((vg32_t *)arg)->lv[i]); + if (err) break; + if (ptr) { + v->lv[i] = get_lv_t(ptr, &err); + if (err) break; + } + } + } + break; + case LV_CREATE: + case LV_EXTEND: + case LV_REDUCE: + case LV_REMOVE: + case LV_RENAME: + case LV_STATUS_BYNAME: + err = copy_from_user(&u.pv_status, arg, sizeof(u.pv_status.pv_name)); + if (err) return -EFAULT; + if (cmd != LV_REMOVE) { + err = __get_user(ptr, &((lv_req32_t *)arg)->lv); + if (err) return err; + u.lv_req.lv = get_lv_t(ptr, &err); + } else + u.lv_req.lv = NULL; + break; + + + case LV_STATUS_BYINDEX: + err = get_user(u.lv_byindex.lv_index, &((lv_status_byindex_req32_t *)arg)->lv_index); + err |= __get_user(ptr, &((lv_status_byindex_req32_t *)arg)->lv); + if (err) return err; + u.lv_byindex.lv = get_lv_t(ptr, &err); + break; + case LV_STATUS_BYDEV: + err = get_user(u.lv_bydev.dev, &((lv_status_bydev_req32_t *)arg)->dev); + u.lv_bydev.lv = get_lv_t(ptr, &err); + if (err) return err; + u.lv_bydev.lv = &p; + p.pe = NULL; p.inode = NULL; + break; + case VG_EXTEND: + err = copy_from_user(&p, (void *)arg, sizeof(pv32_t) - 8 - UUID_LEN+1); + if (err) return -EFAULT; + err = copy_from_user(p.pv_uuid, ((pv32_t *)arg)->pv_uuid, UUID_LEN+1); + if (err) return -EFAULT; + p.pe = NULL; p.inode = NULL; + karg = &p; + break; + case PV_CHANGE: + case PV_STATUS: + err = copy_from_user(&u.pv_status, arg, sizeof(u.lv_req.lv_name)); + if (err) return -EFAULT; + err = __get_user(ptr, &((pv_status_req32_t *)arg)->pv); + if (err) return err; + u.pv_status.pv = &p; + if (cmd == PV_CHANGE) { + err = copy_from_user(&p, (void *)A(ptr), sizeof(pv32_t) - 8 - UUID_LEN+1); + if (err) return -EFAULT; + p.pe = NULL; p.inode = NULL; + } + break; + } + old_fs = get_fs(); set_fs (KERNEL_DS); + err = sys_ioctl (fd, cmd, (unsigned long)karg); + set_fs (old_fs); + switch (cmd) { + case VG_STATUS: + if (!err) { + if (copy_to_user((void *)arg, v, (long)&((vg32_t *)0)->proc) || + clear_user(&((vg32_t *)arg)->proc, sizeof(vg32_t) - (long)&((vg32_t *)0)->proc)) + err = -EFAULT; + } + if (copy_to_user(((vg32_t *)arg)->vg_uuid, v->vg_uuid, UUID_LEN+1)) { + err = -EFAULT; + } + kfree(v); + break; + case VG_CREATE: + for (i = 0; i < v->pv_max; i++) + if (v->pv[i]) kfree(v->pv[i]); + for (i = 0; i < v->lv_max; i++) + if (v->lv[i]) put_lv_t(v->lv[i]); + kfree(v); + break; + case LV_STATUS_BYNAME: + if (!err && u.lv_req.lv) err = copy_lv_t(ptr, u.lv_req.lv); + /* Fall through */ + case LV_CREATE: + case LV_EXTEND: + case LV_REDUCE: + if (u.lv_req.lv) put_lv_t(u.lv_req.lv); + break; + case LV_STATUS_BYINDEX: + if (u.lv_byindex.lv) { + if (!err) err = copy_lv_t(ptr, u.lv_byindex.lv); + put_lv_t(u.lv_byindex.lv); + } + break; + case PV_STATUS: + if (!err) { + err = copy_to_user((void *)A(ptr), &p, sizeof(pv32_t) - 8 - UUID_LEN+1); + if (err) return -EFAULT; + err = copy_to_user(((pv_t *)A(ptr))->pv_uuid, p.pv_uuid, UUID_LEN + 1); + if (err) return -EFAULT; + } + break; + case LV_STATUS_BYDEV: + if (!err) { + if (!err) err = copy_lv_t(ptr, u.lv_bydev.lv); + put_lv_t(u.lv_byindex.lv); + } + break; + } + return err; +} +#endif + +#if defined(CONFIG_DRM) || defined(CONFIG_DRM_MODULE) +/* This really belongs in include/linux/drm.h -DaveM */ +#include "../../../drivers/char/drm/drm.h" + +typedef struct drm32_version { + int version_major; /* Major version */ + int version_minor; /* Minor version */ + int version_patchlevel;/* Patch level */ + int name_len; /* Length of name buffer */ + u32 name; /* Name of driver */ + int date_len; /* Length of date buffer */ + u32 date; /* User-space buffer to hold date */ + int desc_len; /* Length of desc buffer */ + u32 desc; /* User-space buffer to hold desc */ +} drm32_version_t; +#define DRM32_IOCTL_VERSION DRM_IOWR(0x00, drm32_version_t) + +static int drm32_version(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + drm32_version_t *uversion = (drm32_version_t *)arg; + char *name_ptr, *date_ptr, *desc_ptr; + u32 tmp1, tmp2, tmp3; + drm_version_t kversion; + mm_segment_t old_fs; + int ret; + + memset(&kversion, 0, sizeof(kversion)); + if (get_user(kversion.name_len, &uversion->name_len) || + get_user(kversion.date_len, &uversion->date_len) || + get_user(kversion.desc_len, &uversion->desc_len) || + get_user(tmp1, &uversion->name) || + get_user(tmp2, &uversion->date) || + get_user(tmp3, &uversion->desc)) + return -EFAULT; + + name_ptr = (char *) A(tmp1); + date_ptr = (char *) A(tmp2); + desc_ptr = (char *) A(tmp3); + + ret = -ENOMEM; + if (kversion.name_len && name_ptr) { + kversion.name = kmalloc(kversion.name_len, GFP_KERNEL); + if (!kversion.name) + goto out; + } + if (kversion.date_len && date_ptr) { + kversion.date = kmalloc(kversion.date_len, GFP_KERNEL); + if (!kversion.date) + goto out; + } + if (kversion.desc_len && desc_ptr) { + kversion.desc = kmalloc(kversion.desc_len, GFP_KERNEL); + if (!kversion.desc) + goto out; + } + + old_fs = get_fs(); + set_fs(KERNEL_DS); + ret = sys_ioctl (fd, DRM_IOCTL_VERSION, (unsigned long)&kversion); + set_fs(old_fs); + + if (!ret) { + if ((kversion.name && + copy_to_user(name_ptr, kversion.name, kversion.name_len)) || + (kversion.date && + copy_to_user(date_ptr, kversion.date, kversion.date_len)) || + (kversion.desc && + copy_to_user(desc_ptr, kversion.desc, kversion.desc_len))) + ret = -EFAULT; + if (put_user(kversion.version_major, &uversion->version_major) || + put_user(kversion.version_minor, &uversion->version_minor) || + put_user(kversion.version_patchlevel, &uversion->version_patchlevel) || + put_user(kversion.name_len, &uversion->name_len) || + put_user(kversion.date_len, &uversion->date_len) || + put_user(kversion.desc_len, &uversion->desc_len)) + ret = -EFAULT; + } + +out: + if (kversion.name) + kfree(kversion.name); + if (kversion.date) + kfree(kversion.date); + if (kversion.desc) + kfree(kversion.desc); + return ret; +} + +typedef struct drm32_unique { + int unique_len; /* Length of unique */ + u32 unique; /* Unique name for driver instantiation */ +} drm32_unique_t; +#define DRM32_IOCTL_GET_UNIQUE DRM_IOWR(0x01, drm32_unique_t) +#define DRM32_IOCTL_SET_UNIQUE DRM_IOW( 0x10, drm32_unique_t) + +static int drm32_getsetunique(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + drm32_unique_t *uarg = (drm32_unique_t *)arg; + drm_unique_t karg; + mm_segment_t old_fs; + char *uptr; + u32 tmp; + int ret; + + if (get_user(karg.unique_len, &uarg->unique_len)) + return -EFAULT; + karg.unique = NULL; + + if (get_user(tmp, &uarg->unique)) + return -EFAULT; + + uptr = (char *) A(tmp); + + if (uptr) { + karg.unique = kmalloc(karg.unique_len, GFP_KERNEL); + if (!karg.unique) + return -ENOMEM; + if (cmd == DRM32_IOCTL_SET_UNIQUE && + copy_from_user(karg.unique, uptr, karg.unique_len)) { + kfree(karg.unique); + return -EFAULT; + } + } + + old_fs = get_fs(); + set_fs(KERNEL_DS); + if (cmd == DRM32_IOCTL_GET_UNIQUE) + ret = sys_ioctl (fd, DRM_IOCTL_GET_UNIQUE, (unsigned long)&karg); + else + ret = sys_ioctl (fd, DRM_IOCTL_SET_UNIQUE, (unsigned long)&karg); + set_fs(old_fs); + + if (!ret) { + if (cmd == DRM32_IOCTL_GET_UNIQUE && + uptr != NULL && + copy_to_user(uptr, karg.unique, karg.unique_len)) + ret = -EFAULT; + if (put_user(karg.unique_len, &uarg->unique_len)) + ret = -EFAULT; + } + + if (karg.unique != NULL) + kfree(karg.unique); + + return ret; +} + +typedef struct drm32_map { + u32 offset; /* Requested physical address (0 for SAREA)*/ + u32 size; /* Requested physical size (bytes) */ + drm_map_type_t type; /* Type of memory to map */ + drm_map_flags_t flags; /* Flags */ + u32 handle; /* User-space: "Handle" to pass to mmap */ + /* Kernel-space: kernel-virtual address */ + int mtrr; /* MTRR slot used */ + /* Private data */ +} drm32_map_t; +#define DRM32_IOCTL_ADD_MAP DRM_IOWR(0x15, drm32_map_t) + +static int drm32_addmap(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + drm32_map_t *uarg = (drm32_map_t *) arg; + drm_map_t karg; + mm_segment_t old_fs; + u32 tmp; + int ret; + + ret = get_user(karg.offset, &uarg->offset); + ret |= get_user(karg.size, &uarg->size); + ret |= get_user(karg.type, &uarg->type); + ret |= get_user(karg.flags, &uarg->flags); + ret |= get_user(tmp, &uarg->handle); + ret |= get_user(karg.mtrr, &uarg->mtrr); + if (ret) + return -EFAULT; + + karg.handle = (void *) A(tmp); + + old_fs = get_fs(); + set_fs(KERNEL_DS); + ret = sys_ioctl(fd, DRM_IOCTL_ADD_MAP, (unsigned long) &karg); + set_fs(old_fs); + + if (!ret) { + ret = put_user(karg.offset, &uarg->offset); + ret |= put_user(karg.size, &uarg->size); + ret |= put_user(karg.type, &uarg->type); + ret |= put_user(karg.flags, &uarg->flags); + tmp = (u32) (long)karg.handle; + ret |= put_user(tmp, &uarg->handle); + ret |= put_user(karg.mtrr, &uarg->mtrr); + if (ret) + ret = -EFAULT; + } + + return ret; +} + +typedef struct drm32_buf_info { + int count; /* Entries in list */ + u32 list; /* (drm_buf_desc_t *) */ +} drm32_buf_info_t; +#define DRM32_IOCTL_INFO_BUFS DRM_IOWR(0x18, drm32_buf_info_t) + +static int drm32_info_bufs(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + drm32_buf_info_t *uarg = (drm32_buf_info_t *)arg; + drm_buf_desc_t *ulist; + drm_buf_info_t karg; + mm_segment_t old_fs; + int orig_count, ret; + u32 tmp; + + if (get_user(karg.count, &uarg->count) || + get_user(tmp, &uarg->list)) + return -EFAULT; + + ulist = (drm_buf_desc_t *) A(tmp); + + orig_count = karg.count; + + karg.list = kmalloc(karg.count * sizeof(drm_buf_desc_t), GFP_KERNEL); + if (!karg.list) + return -EFAULT; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + ret = sys_ioctl(fd, DRM_IOCTL_INFO_BUFS, (unsigned long) &karg); + set_fs(old_fs); + + if (!ret) { + if (karg.count <= orig_count && + (copy_to_user(ulist, karg.list, + karg.count * sizeof(drm_buf_desc_t)))) + ret = -EFAULT; + if (put_user(karg.count, &uarg->count)) + ret = -EFAULT; + } + + kfree(karg.list); + + return ret; +} + +typedef struct drm32_buf_free { + int count; + u32 list; /* (int *) */ +} drm32_buf_free_t; +#define DRM32_IOCTL_FREE_BUFS DRM_IOW( 0x1a, drm32_buf_free_t) + +static int drm32_free_bufs(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + drm32_buf_free_t *uarg = (drm32_buf_free_t *)arg; + drm_buf_free_t karg; + mm_segment_t old_fs; + int *ulist; + int ret; + u32 tmp; + + if (get_user(karg.count, &uarg->count) || + get_user(tmp, &uarg->list)) + return -EFAULT; + + ulist = (int *) A(tmp); + + karg.list = kmalloc(karg.count * sizeof(int), GFP_KERNEL); + if (!karg.list) + return -ENOMEM; + + ret = -EFAULT; + if (copy_from_user(karg.list, ulist, (karg.count * sizeof(int)))) + goto out; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + ret = sys_ioctl(fd, DRM_IOCTL_FREE_BUFS, (unsigned long) &karg); + set_fs(old_fs); + +out: + kfree(karg.list); + + return ret; +} + +typedef struct drm32_buf_pub { + int idx; /* Index into master buflist */ + int total; /* Buffer size */ + int used; /* Amount of buffer in use (for DMA) */ + u32 address; /* Address of buffer (void *) */ +} drm32_buf_pub_t; + +typedef struct drm32_buf_map { + int count; /* Length of buflist */ + u32 virtual; /* Mmaped area in user-virtual (void *) */ + u32 list; /* Buffer information (drm_buf_pub_t *) */ +} drm32_buf_map_t; +#define DRM32_IOCTL_MAP_BUFS DRM_IOWR(0x19, drm32_buf_map_t) + +static int drm32_map_bufs(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + drm32_buf_map_t *uarg = (drm32_buf_map_t *)arg; + drm32_buf_pub_t *ulist; + drm_buf_map_t karg; + mm_segment_t old_fs; + int orig_count, ret, i; + u32 tmp1, tmp2; + + if (get_user(karg.count, &uarg->count) || + get_user(tmp1, &uarg->virtual) || + get_user(tmp2, &uarg->list)) + return -EFAULT; + + karg.virtual = (void *) A(tmp1); + ulist = (drm32_buf_pub_t *) A(tmp2); + + orig_count = karg.count; + + karg.list = kmalloc(karg.count * sizeof(drm_buf_pub_t), GFP_KERNEL); + if (!karg.list) + return -ENOMEM; + + ret = -EFAULT; + for (i = 0; i < karg.count; i++) { + if (get_user(karg.list[i].idx, &ulist[i].idx) || + get_user(karg.list[i].total, &ulist[i].total) || + get_user(karg.list[i].used, &ulist[i].used) || + get_user(tmp1, &ulist[i].address)) + goto out; + + karg.list[i].address = (void *) A(tmp1); + } + + old_fs = get_fs(); + set_fs(KERNEL_DS); + ret = sys_ioctl(fd, DRM_IOCTL_MAP_BUFS, (unsigned long) &karg); + set_fs(old_fs); + + if (!ret) { + for (i = 0; i < orig_count; i++) { + tmp1 = (u32) (long) karg.list[i].address; + if (put_user(karg.list[i].idx, &ulist[i].idx) || + put_user(karg.list[i].total, &ulist[i].total) || + put_user(karg.list[i].used, &ulist[i].used) || + put_user(tmp1, &ulist[i].address)) { + ret = -EFAULT; + goto out; + } + } + if (put_user(karg.count, &uarg->count)) + ret = -EFAULT; + } + +out: + kfree(karg.list); + return ret; +} + +typedef struct drm32_dma { + /* Indices here refer to the offset into + buflist in drm_buf_get_t. */ + int context; /* Context handle */ + int send_count; /* Number of buffers to send */ + u32 send_indices; /* List of handles to buffers (int *) */ + u32 send_sizes; /* Lengths of data to send (int *) */ + drm_dma_flags_t flags; /* Flags */ + int request_count; /* Number of buffers requested */ + int request_size; /* Desired size for buffers */ + u32 request_indices; /* Buffer information (int *) */ + u32 request_sizes; /* (int *) */ + int granted_count; /* Number of buffers granted */ +} drm32_dma_t; +#define DRM32_IOCTL_DMA DRM_IOWR(0x29, drm32_dma_t) + +/* RED PEN The DRM layer blindly dereferences the send/request + * indice/size arrays even though they are userland + * pointers. -DaveM + */ +static int drm32_dma(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + drm32_dma_t *uarg = (drm32_dma_t *) arg; + int *u_si, *u_ss, *u_ri, *u_rs; + drm_dma_t karg; + mm_segment_t old_fs; + int ret; + u32 tmp1, tmp2, tmp3, tmp4; + + karg.send_indices = karg.send_sizes = NULL; + karg.request_indices = karg.request_sizes = NULL; + + if (get_user(karg.context, &uarg->context) || + get_user(karg.send_count, &uarg->send_count) || + get_user(tmp1, &uarg->send_indices) || + get_user(tmp2, &uarg->send_sizes) || + get_user(karg.flags, &uarg->flags) || + get_user(karg.request_count, &uarg->request_count) || + get_user(karg.request_size, &uarg->request_size) || + get_user(tmp3, &uarg->request_indices) || + get_user(tmp4, &uarg->request_sizes) || + get_user(karg.granted_count, &uarg->granted_count)) + return -EFAULT; + + u_si = (int *) A(tmp1); + u_ss = (int *) A(tmp2); + u_ri = (int *) A(tmp3); + u_rs = (int *) A(tmp4); + + if (karg.send_count) { + karg.send_indices = kmalloc(karg.send_count * sizeof(int), GFP_KERNEL); + karg.send_sizes = kmalloc(karg.send_count * sizeof(int), GFP_KERNEL); + + ret = -ENOMEM; + if (!karg.send_indices || !karg.send_sizes) + goto out; + + ret = -EFAULT; + if (copy_from_user(karg.send_indices, u_si, + (karg.send_count * sizeof(int))) || + copy_from_user(karg.send_sizes, u_ss, + (karg.send_count * sizeof(int)))) + goto out; + } + + if (karg.request_count) { + karg.request_indices = kmalloc(karg.request_count * sizeof(int), GFP_KERNEL); + karg.request_sizes = kmalloc(karg.request_count * sizeof(int), GFP_KERNEL); + + ret = -ENOMEM; + if (!karg.request_indices || !karg.request_sizes) + goto out; + + ret = -EFAULT; + if (copy_from_user(karg.request_indices, u_ri, + (karg.request_count * sizeof(int))) || + copy_from_user(karg.request_sizes, u_rs, + (karg.request_count * sizeof(int)))) + goto out; + } + + old_fs = get_fs(); + set_fs(KERNEL_DS); + ret = sys_ioctl(fd, DRM_IOCTL_DMA, (unsigned long) &karg); + set_fs(old_fs); + + if (!ret) { + if (put_user(karg.context, &uarg->context) || + put_user(karg.send_count, &uarg->send_count) || + put_user(karg.flags, &uarg->flags) || + put_user(karg.request_count, &uarg->request_count) || + put_user(karg.request_size, &uarg->request_size) || + put_user(karg.granted_count, &uarg->granted_count)) + ret = -EFAULT; + + if (karg.send_count) { + if (copy_to_user(u_si, karg.send_indices, + (karg.send_count * sizeof(int))) || + copy_to_user(u_ss, karg.send_sizes, + (karg.send_count * sizeof(int)))) + ret = -EFAULT; + } + if (karg.request_count) { + if (copy_to_user(u_ri, karg.request_indices, + (karg.request_count * sizeof(int))) || + copy_to_user(u_rs, karg.request_sizes, + (karg.request_count * sizeof(int)))) + ret = -EFAULT; + } + } + +out: + if (karg.send_indices) + kfree(karg.send_indices); + if (karg.send_sizes) + kfree(karg.send_sizes); + if (karg.request_indices) + kfree(karg.request_indices); + if (karg.request_sizes) + kfree(karg.request_sizes); + + return ret; +} + +typedef struct drm32_ctx_res { + int count; + u32 contexts; /* (drm_ctx_t *) */ +} drm32_ctx_res_t; +#define DRM32_IOCTL_RES_CTX DRM_IOWR(0x26, drm32_ctx_res_t) + +static int drm32_res_ctx(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + drm32_ctx_res_t *uarg = (drm32_ctx_res_t *) arg; + drm_ctx_t *ulist; + drm_ctx_res_t karg; + mm_segment_t old_fs; + int orig_count, ret; + u32 tmp; + + karg.contexts = NULL; + if (get_user(karg.count, &uarg->count) || + get_user(tmp, &uarg->contexts)) + return -EFAULT; + + ulist = (drm_ctx_t *) A(tmp); + + orig_count = karg.count; + if (karg.count && ulist) { + karg.contexts = kmalloc((karg.count * sizeof(drm_ctx_t)), GFP_KERNEL); + if (!karg.contexts) + return -ENOMEM; + if (copy_from_user(karg.contexts, ulist, + (karg.count * sizeof(drm_ctx_t)))) { + kfree(karg.contexts); + return -EFAULT; + } + } + + old_fs = get_fs(); + set_fs(KERNEL_DS); + ret = sys_ioctl(fd, DRM_IOCTL_RES_CTX, (unsigned long) &karg); + set_fs(old_fs); + + if (!ret) { + if (orig_count) { + if (copy_to_user(ulist, karg.contexts, + (orig_count * sizeof(drm_ctx_t)))) + ret = -EFAULT; + } + if (put_user(karg.count, &uarg->count)) + ret = -EFAULT; + } + + if (karg.contexts) + kfree(karg.contexts); + + return ret; +} + +#endif + +static int ret_einval(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + return -EINVAL; +} + +static int broken_blkgetsize(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + /* The mkswap binary hard codes it to Intel value :-((( */ + return w_long(fd, BLKGETSIZE, arg); +} + +struct blkpg_ioctl_arg32 { + int op; + int flags; + int datalen; + u32 data; +}; + +static int blkpg_ioctl_trans(unsigned int fd, unsigned int cmd, struct blkpg_ioctl_arg32 *arg) +{ + struct blkpg_ioctl_arg a; + struct blkpg_partition p; + int err; + mm_segment_t old_fs = get_fs(); + + err = get_user(a.op, &arg->op); + err |= __get_user(a.flags, &arg->flags); + err |= __get_user(a.datalen, &arg->datalen); + err |= __get_user((long)a.data, &arg->data); + if (err) return err; + switch (a.op) { + case BLKPG_ADD_PARTITION: + case BLKPG_DEL_PARTITION: + if (a.datalen < sizeof(struct blkpg_partition)) + return -EINVAL; + if (copy_from_user(&p, a.data, sizeof(struct blkpg_partition))) + return -EFAULT; + a.data = &p; + set_fs (KERNEL_DS); + err = sys_ioctl(fd, cmd, (unsigned long)&a); + set_fs (old_fs); + default: + return -EINVAL; + } + return err; +} + +static int ioc_settimeout(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + return rw_long(fd, AUTOFS_IOC_SETTIMEOUT, arg); +} + +struct usbdevfs_ctrltransfer32 { + __u8 requesttype; + __u8 request; + __u16 value; + __u16 index; + __u16 length; + __u32 timeout; /* in milliseconds */ + __u32 data; +}; + +#define USBDEVFS_CONTROL32 _IOWR('U', 0, struct usbdevfs_ctrltransfer32) + +static int do_usbdevfs_control(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + struct usbdevfs_ctrltransfer kctrl; + struct usbdevfs_ctrltransfer32 *uctrl; + mm_segment_t old_fs; + __u32 udata; + void *uptr, *kptr; + int err; + + uctrl = (struct usbdevfs_ctrltransfer32 *) arg; + + if (copy_from_user(&kctrl, uctrl, + (sizeof(struct usbdevfs_ctrltransfer) - + sizeof(void *)))) + return -EFAULT; + + if (get_user(udata, &uctrl->data)) + return -EFAULT; + uptr = (void *) A(udata); + + /* In usbdevice_fs, it limits the control buffer to a page, + * for simplicity so do we. + */ + if (!uptr || kctrl.length > PAGE_SIZE) + return -EINVAL; + + kptr = (void *)__get_free_page(GFP_KERNEL); + + if ((kctrl.requesttype & 0x80) == 0) { + err = -EFAULT; + if (copy_from_user(kptr, uptr, kctrl.length)) + goto out; + } + + kctrl.data = kptr; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + err = sys_ioctl(fd, USBDEVFS_CONTROL, (unsigned long)&kctrl); + set_fs(old_fs); + + if (err >= 0 && + ((kctrl.requesttype & 0x80) != 0)) { + if (copy_to_user(uptr, kptr, kctrl.length)) + err = -EFAULT; + } + +out: + free_page((unsigned long) kptr); + return err; +} + +struct usbdevfs_bulktransfer32 { + unsigned int ep; + unsigned int len; + unsigned int timeout; /* in milliseconds */ + __u32 data; +}; + +#define USBDEVFS_BULK32 _IOWR('U', 2, struct usbdevfs_bulktransfer32) + +static int do_usbdevfs_bulk(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + struct usbdevfs_bulktransfer kbulk; + struct usbdevfs_bulktransfer32 *ubulk; + mm_segment_t old_fs; + __u32 udata; + void *uptr, *kptr; + int err; + + ubulk = (struct usbdevfs_bulktransfer32 *) arg; + + if (get_user(kbulk.ep, &ubulk->ep) || + get_user(kbulk.len, &ubulk->len) || + get_user(kbulk.timeout, &ubulk->timeout) || + get_user(udata, &ubulk->data)) + return -EFAULT; + + uptr = (void *) A(udata); + + /* In usbdevice_fs, it limits the control buffer to a page, + * for simplicity so do we. + */ + if (!uptr || kbulk.len > PAGE_SIZE) + return -EINVAL; + + kptr = (void *) __get_free_page(GFP_KERNEL); + + if ((kbulk.ep & 0x80) == 0) { + err = -EFAULT; + if (copy_from_user(kptr, uptr, kbulk.len)) + goto out; + } + + kbulk.data = kptr; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + err = sys_ioctl(fd, USBDEVFS_BULK, (unsigned long) &kbulk); + set_fs(old_fs); + + if (err >= 0 && + ((kbulk.ep & 0x80) != 0)) { + if (copy_to_user(uptr, kptr, kbulk.len)) + err = -EFAULT; + } + +out: + free_page((unsigned long) kptr); + return err; +} + +/* This needs more work before we can enable it. Unfortunately + * because of the fancy asynchronous way URB status/error is written + * back to userspace, we'll need to fiddle with USB devio internals + * and/or reimplement entirely the frontend of it ourselves. -DaveM + * + * The issue is: + * + * When an URB is submitted via usbdevicefs it is put onto an + * asynchronous queue. When the URB completes, it may be reaped + * via another ioctl. During this reaping the status is written + * back to userspace along with the length of the transfer. + * + * We must translate into 64-bit kernel types so we pass in a kernel + * space copy of the usbdevfs_urb structure. This would mean that we + * must do something to deal with the async entry reaping. First we + * have to deal somehow with this transitory memory we've allocated. + * This is problematic since there are many call sites from which the + * async entries can be destroyed (and thus when we'd need to free up + * this kernel memory). One of which is the close() op of usbdevicefs. + * To handle that we'd need to make our own file_operations struct which + * overrides usbdevicefs's release op with our own which runs usbdevicefs's + * real release op then frees up the kernel memory. + * + * But how to keep track of these kernel buffers? We'd need to either + * keep track of them in some table _or_ know about usbdevicefs internals + * (ie. the exact layout of it's file private, which is actually defined + * in linux/usbdevice_fs.h, the layout of the async queues are private to + * devio.c) + * + * There is one possible other solution I considered, also involving knowledge + * of usbdevicefs internals: + * + * After an URB is submitted, we "fix up" the address back to the user + * space one. This would work if the status/length fields written back + * by the async URB completion lines up perfectly in the 32-bit type with + * the 64-bit kernel type. Unfortunately, it does not because the iso + * frame descriptors, at the end of the struct, can be written back. + * + * I think we'll just need to simply duplicate the devio URB engine here. + */ +#if 0 +struct usbdevfs_urb32 { + __u8 type; + __u8 endpoint; + __s32 status; + __u32 flags; + __u32 buffer; + __s32 buffer_length; + __s32 actual_length; + __s32 start_frame; + __s32 number_of_packets; + __s32 error_count; + __u32 signr; + __u32 usercontext; /* unused */ + struct usbdevfs_iso_packet_desc iso_frame_desc[0]; +}; + +#define USBDEVFS_SUBMITURB32 _IOR('U', 10, struct usbdevfs_urb32) + +static int get_urb32(struct usbdevfs_urb *kurb, + struct usbdevfs_urb32 *uurb) +{ + if (get_user(kurb->type, &uurb->type) || + __get_user(kurb->endpoint, &uurb->endpoint) || + __get_user(kurb->status, &uurb->status) || + __get_user(kurb->flags, &uurb->flags) || + __get_user(kurb->buffer_length, &uurb->buffer_length) || + __get_user(kurb->actual_length, &uurb->actual_length) || + __get_user(kurb->start_frame, &uurb->start_frame) || + __get_user(kurb->number_of_packets, &uurb->number_of_packets) || + __get_user(kurb->error_count, &uurb->error_count) || + __get_user(kurb->signr, &uurb->signr)) + return -EFAULT; + + kurb->usercontext = 0; /* unused currently */ + + return 0; +} + +/* Just put back the values which usbdevfs actually changes. */ +static int put_urb32(struct usbdevfs_urb *kurb, + struct usbdevfs_urb32 *uurb) +{ + if (put_user(kurb->status, &uurb->status) || + __put_user(kurb->actual_length, &uurb->actual_length) || + __put_user(kurb->error_count, &uurb->error_count)) + return -EFAULT; + + if (kurb->number_of_packets != 0) { + int i; + + for (i = 0; i < kurb->number_of_packets; i++) { + if (__put_user(kurb->iso_frame_desc[i].actual_length, + &uurb->iso_frame_desc[i].actual_length) || + __put_user(kurb->iso_frame_desc[i].status, + &uurb->iso_frame_desc[i].status)) + return -EFAULT; + } + } + + return 0; +} + +static int get_urb32_isoframes(struct usbdevfs_urb *kurb, + struct usbdevfs_urb32 *uurb) +{ + unsigned int totlen; + int i; + + if (kurb->type != USBDEVFS_URB_TYPE_ISO) { + kurb->number_of_packets = 0; + return 0; + } + + if (kurb->number_of_packets < 1 || + kurb->number_of_packets > 128) + return -EINVAL; + + if (copy_from_user(&kurb->iso_frame_desc[0], + &uurb->iso_frame_desc[0], + sizeof(struct usbdevfs_iso_packet_desc) * + kurb->number_of_packets)) + return -EFAULT; + + totlen = 0; + for (i = 0; i < kurb->number_of_packets; i++) { + unsigned int this_len; + + this_len = kurb->iso_frame_desc[i].length; + if (this_len > 1023) + return -EINVAL; + + totlen += this_len; + } + + if (totlen > 32768) + return -EINVAL; + + kurb->buffer_length = totlen; + + return 0; +} + +static int do_usbdevfs_urb(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + struct usbdevfs_urb *kurb; + struct usbdevfs_urb32 *uurb; + mm_segment_t old_fs; + __u32 udata; + void *uptr, *kptr; + unsigned int buflen; + int err; + + uurb = (struct usbdevfs_urb32 *) arg; + + err = -ENOMEM; + kurb = kmalloc(sizeof(struct usbdevfs_urb) + + (sizeof(struct usbdevfs_iso_packet_desc) * 128), + GFP_KERNEL); + if (!kurb) + goto out; + + err = -EFAULT; + if (get_urb32(kurb, uurb)) + goto out; + + err = get_urb32_isoframes(kurb, uurb); + if (err) + goto out; + + err = -EFAULT; + if (__get_user(udata, &uurb->buffer)) + uptr = (void *) A(udata); + + err = -ENOMEM; + buflen = kurb->buffer_length; + kptr = kmalloc(buflen, GFP_KERNEL); + if (!kptr) + goto out; + + kurb->buffer = kptr; + + err = -EFAULT; + if (copy_from_user(kptr, uptr, buflen)) + goto out_kptr; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + err = sys_ioctl(fd, USBDEVFS_SUBMITURB, (unsigned long) kurb); + set_fs(old_fs); + + if (err >= 0) { + /* XXX Shit, this doesn't work for async URBs :-( XXX */ + if (put_urb32(kurb, uurb)) { + err = -EFAULT; + } else if ((kurb->endpoint & USB_DIR_IN) != 0) { + if (copy_to_user(uptr, kptr, buflen)) + err = -EFAULT; + } + } + +out_kptr: + kfree(kptr); + +out: + kfree(kurb); + return err; +} +#endif + +#define USBDEVFS_REAPURB32 _IOW('U', 12, u32) +#define USBDEVFS_REAPURBNDELAY32 _IOW('U', 13, u32) + +static int do_usbdevfs_reapurb(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + mm_segment_t old_fs; + void *kptr; + int err; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + err = sys_ioctl(fd, + (cmd == USBDEVFS_REAPURB32 ? + USBDEVFS_REAPURB : + USBDEVFS_REAPURBNDELAY), + (unsigned long) &kptr); + set_fs(old_fs); + + if (err >= 0 && + put_user(((u32)(long)kptr), (u32 *) A(arg))) + err = -EFAULT; + + return err; +} + +struct usbdevfs_disconnectsignal32 { + unsigned int signr; + u32 context; +}; + +#define USBDEVFS_DISCSIGNAL32 _IOR('U', 14, struct usbdevfs_disconnectsignal32) + +static int do_usbdevfs_discsignal(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + struct usbdevfs_disconnectsignal kdis; + struct usbdevfs_disconnectsignal32 *udis; + mm_segment_t old_fs; + u32 uctx; + int err; + + udis = (struct usbdevfs_disconnectsignal32 *) arg; + + if (get_user(kdis.signr, &udis->signr) || + __get_user(uctx, &udis->context)) + return -EFAULT; + + kdis.context = (void *) (long)uctx; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + err = sys_ioctl(fd, USBDEVFS_DISCSIGNAL, (unsigned long) &kdis); + set_fs(old_fs); + + return err; +} + +struct mtd_oob_buf32 { + u32 start; + u32 length; + u32 ptr; /* unsigned char* */ +}; + +#define MEMWRITEOOB32 _IOWR('M',3,struct mtd_oob_buf32) +#define MEMREADOOB32 _IOWR('M',4,struct mtd_oob_buf32) + +static inline int +mtd_rw_oob(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + mm_segment_t old_fs = get_fs(); + struct mtd_oob_buf32 *uarg = (struct mtd_oob_buf32 *)arg; + struct mtd_oob_buf karg; + u32 tmp; + char *ptr; + int ret; + + if (get_user(karg.start, &uarg->start) || + get_user(karg.length, &uarg->length) || + get_user(tmp, &uarg->ptr)) + return -EFAULT; + + ptr = (char *)A(tmp); + if (0 >= karg.length) + return -EINVAL; + + karg.ptr = kmalloc(karg.length, GFP_KERNEL); + if (NULL == karg.ptr) + return -ENOMEM; + + if (copy_from_user(karg.ptr, ptr, karg.length)) { + kfree(karg.ptr); + return -EFAULT; + } + + set_fs(KERNEL_DS); + if (MEMREADOOB32 == cmd) + ret = sys_ioctl(fd, MEMREADOOB, (unsigned long)&karg); + else if (MEMWRITEOOB32 == cmd) + ret = sys_ioctl(fd, MEMWRITEOOB, (unsigned long)&karg); + else + ret = -EINVAL; + set_fs(old_fs); + + if (0 == ret && cmd == MEMREADOOB32) { + ret = copy_to_user(ptr, karg.ptr, karg.length); + ret |= put_user(karg.start, &uarg->start); + ret |= put_user(karg.length, &uarg->length); + } + + kfree(karg.ptr); + return ((0 == ret) ? 0 : -EFAULT); +} + +struct ioctl_trans { + unsigned long cmd; + unsigned long handler; + unsigned long next; +}; + +#define COMPATIBLE_IOCTL(cmd) { cmd, (unsigned long)sys_ioctl, 0 } + +#define HANDLE_IOCTL(cmd,handler) { cmd, (unsigned long)handler, 0 } + +#define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,unsigned int) +#define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, __kernel_uid_t32) + +static struct ioctl_trans ioctl_translations[] = { + /* List here explicitly which ioctl's need translation, + * all others default to calling sys_ioctl(). + */ +/* Big T */ +COMPATIBLE_IOCTL(TCGETA), +COMPATIBLE_IOCTL(TCSETA), +COMPATIBLE_IOCTL(TCSETAW), +COMPATIBLE_IOCTL(TCSETAF), +COMPATIBLE_IOCTL(TCSBRK), +COMPATIBLE_IOCTL(TCXONC), +COMPATIBLE_IOCTL(TCFLSH), +COMPATIBLE_IOCTL(TCGETS), +COMPATIBLE_IOCTL(TCSETS), +COMPATIBLE_IOCTL(TCSETSW), +COMPATIBLE_IOCTL(TCSETSF), +COMPATIBLE_IOCTL(TIOCLINUX), +COMPATIBLE_IOCTL(TIOCSTART), +/* Little t */ +COMPATIBLE_IOCTL(TIOCGETD), +COMPATIBLE_IOCTL(TIOCSETD), +COMPATIBLE_IOCTL(TIOCEXCL), +COMPATIBLE_IOCTL(TIOCNXCL), +COMPATIBLE_IOCTL(TIOCCONS), +COMPATIBLE_IOCTL(TIOCGSOFTCAR), +COMPATIBLE_IOCTL(TIOCSSOFTCAR), +COMPATIBLE_IOCTL(TIOCSWINSZ), +COMPATIBLE_IOCTL(TIOCGWINSZ), +COMPATIBLE_IOCTL(TIOCMGET), +COMPATIBLE_IOCTL(TIOCMBIC), +COMPATIBLE_IOCTL(TIOCMBIS), +COMPATIBLE_IOCTL(TIOCMSET), +COMPATIBLE_IOCTL(TIOCPKT), +COMPATIBLE_IOCTL(TIOCNOTTY), +COMPATIBLE_IOCTL(TIOCSTI), +COMPATIBLE_IOCTL(TIOCOUTQ), +COMPATIBLE_IOCTL(TIOCSPGRP), +COMPATIBLE_IOCTL(TIOCGPGRP), +COMPATIBLE_IOCTL(TIOCSCTTY), +COMPATIBLE_IOCTL(TIOCGPTN), +COMPATIBLE_IOCTL(TIOCSPTLCK), +COMPATIBLE_IOCTL(TIOCGSERIAL), +COMPATIBLE_IOCTL(TIOCSSERIAL), +COMPATIBLE_IOCTL(TIOCSERGETLSR), +/* Big F */ +COMPATIBLE_IOCTL(FBIOGET_VSCREENINFO), +COMPATIBLE_IOCTL(FBIOPUT_VSCREENINFO), +COMPATIBLE_IOCTL(FBIOPAN_DISPLAY), +COMPATIBLE_IOCTL(FBIOGET_FCURSORINFO), +COMPATIBLE_IOCTL(FBIOGET_VCURSORINFO), +COMPATIBLE_IOCTL(FBIOPUT_VCURSORINFO), +COMPATIBLE_IOCTL(FBIOGET_CURSORSTATE), +COMPATIBLE_IOCTL(FBIOPUT_CURSORSTATE), +COMPATIBLE_IOCTL(FBIOGET_CON2FBMAP), +COMPATIBLE_IOCTL(FBIOPUT_CON2FBMAP), +#if 0 +COMPATIBLE_IOCTL(FBIOBLANK), +#endif +/* Little f */ +COMPATIBLE_IOCTL(FIOCLEX), +COMPATIBLE_IOCTL(FIONCLEX), +COMPATIBLE_IOCTL(FIOASYNC), +COMPATIBLE_IOCTL(FIONBIO), +COMPATIBLE_IOCTL(FIONREAD), /* This is also TIOCINQ */ +/* 0x00 */ +COMPATIBLE_IOCTL(FIBMAP), +COMPATIBLE_IOCTL(FIGETBSZ), +/* 0x03 -- HD/IDE ioctl's used by hdparm and friends. + * Some need translations, these do not. + */ +COMPATIBLE_IOCTL(HDIO_GET_IDENTITY), +COMPATIBLE_IOCTL(HDIO_SET_DMA), +COMPATIBLE_IOCTL(HDIO_SET_KEEPSETTINGS), +COMPATIBLE_IOCTL(HDIO_SET_UNMASKINTR), +COMPATIBLE_IOCTL(HDIO_SET_NOWERR), +COMPATIBLE_IOCTL(HDIO_SET_32BIT), +COMPATIBLE_IOCTL(HDIO_SET_MULTCOUNT), +COMPATIBLE_IOCTL(HDIO_DRIVE_CMD), +COMPATIBLE_IOCTL(HDIO_SET_PIO_MODE), +COMPATIBLE_IOCTL(HDIO_SCAN_HWIF), +COMPATIBLE_IOCTL(HDIO_SET_NICE), +/* 0x02 -- Floppy ioctls */ +COMPATIBLE_IOCTL(FDMSGON), +COMPATIBLE_IOCTL(FDMSGOFF), +COMPATIBLE_IOCTL(FDSETEMSGTRESH), +COMPATIBLE_IOCTL(FDFLUSH), +COMPATIBLE_IOCTL(FDWERRORCLR), +COMPATIBLE_IOCTL(FDSETMAXERRS), +COMPATIBLE_IOCTL(FDGETMAXERRS), +COMPATIBLE_IOCTL(FDGETDRVTYP), +COMPATIBLE_IOCTL(FDEJECT), +COMPATIBLE_IOCTL(FDCLRPRM), +COMPATIBLE_IOCTL(FDFMTBEG), +COMPATIBLE_IOCTL(FDFMTEND), +COMPATIBLE_IOCTL(FDRESET), +COMPATIBLE_IOCTL(FDTWADDLE), +COMPATIBLE_IOCTL(FDFMTTRK), +COMPATIBLE_IOCTL(FDRAWCMD), +/* 0x12 */ +COMPATIBLE_IOCTL(BLKROSET), +COMPATIBLE_IOCTL(BLKROGET), +COMPATIBLE_IOCTL(BLKRRPART), +COMPATIBLE_IOCTL(BLKFLSBUF), +COMPATIBLE_IOCTL(BLKRASET), +COMPATIBLE_IOCTL(BLKFRASET), +COMPATIBLE_IOCTL(BLKSECTSET), +COMPATIBLE_IOCTL(BLKSSZGET), +COMPATIBLE_IOCTL(BLKBSZGET), +COMPATIBLE_IOCTL(BLKBSZSET), +COMPATIBLE_IOCTL(BLKGETSIZE64), + +/* RAID */ +COMPATIBLE_IOCTL(RAID_VERSION), +COMPATIBLE_IOCTL(GET_ARRAY_INFO), +COMPATIBLE_IOCTL(GET_DISK_INFO), +COMPATIBLE_IOCTL(PRINT_RAID_DEBUG), +COMPATIBLE_IOCTL(CLEAR_ARRAY), +COMPATIBLE_IOCTL(ADD_NEW_DISK), +COMPATIBLE_IOCTL(HOT_REMOVE_DISK), +COMPATIBLE_IOCTL(SET_ARRAY_INFO), +COMPATIBLE_IOCTL(SET_DISK_INFO), +COMPATIBLE_IOCTL(WRITE_RAID_INFO), +COMPATIBLE_IOCTL(UNPROTECT_ARRAY), +COMPATIBLE_IOCTL(PROTECT_ARRAY), +COMPATIBLE_IOCTL(HOT_ADD_DISK), +COMPATIBLE_IOCTL(SET_DISK_FAULTY), +COMPATIBLE_IOCTL(RUN_ARRAY), +COMPATIBLE_IOCTL(START_ARRAY), +COMPATIBLE_IOCTL(STOP_ARRAY), +COMPATIBLE_IOCTL(STOP_ARRAY_RO), +COMPATIBLE_IOCTL(RESTART_ARRAY_RW), +/* Big K */ +COMPATIBLE_IOCTL(PIO_FONT), +COMPATIBLE_IOCTL(GIO_FONT), +COMPATIBLE_IOCTL(KDSIGACCEPT), +COMPATIBLE_IOCTL(KDGETKEYCODE), +COMPATIBLE_IOCTL(KDSETKEYCODE), +COMPATIBLE_IOCTL(KIOCSOUND), +COMPATIBLE_IOCTL(KDMKTONE), +COMPATIBLE_IOCTL(KDGKBTYPE), +COMPATIBLE_IOCTL(KDSETMODE), +COMPATIBLE_IOCTL(KDGETMODE), +COMPATIBLE_IOCTL(KDSKBMODE), +COMPATIBLE_IOCTL(KDGKBMODE), +COMPATIBLE_IOCTL(KDSKBMETA), +COMPATIBLE_IOCTL(KDGKBMETA), +COMPATIBLE_IOCTL(KDGKBENT), +COMPATIBLE_IOCTL(KDSKBENT), +COMPATIBLE_IOCTL(KDGKBSENT), +COMPATIBLE_IOCTL(KDSKBSENT), +COMPATIBLE_IOCTL(KDGKBDIACR), +COMPATIBLE_IOCTL(KDKBDREP), +COMPATIBLE_IOCTL(KDSKBDIACR), +COMPATIBLE_IOCTL(KDGKBLED), +COMPATIBLE_IOCTL(KDSKBLED), +COMPATIBLE_IOCTL(KDGETLED), +COMPATIBLE_IOCTL(KDSETLED), +COMPATIBLE_IOCTL(GIO_SCRNMAP), +COMPATIBLE_IOCTL(PIO_SCRNMAP), +COMPATIBLE_IOCTL(GIO_UNISCRNMAP), +COMPATIBLE_IOCTL(PIO_UNISCRNMAP), +COMPATIBLE_IOCTL(PIO_FONTRESET), +COMPATIBLE_IOCTL(PIO_UNIMAPCLR), +/* Big S */ +COMPATIBLE_IOCTL(SCSI_IOCTL_GET_IDLUN), +COMPATIBLE_IOCTL(SCSI_IOCTL_PROBE_HOST), +COMPATIBLE_IOCTL(SCSI_IOCTL_DOORLOCK), +COMPATIBLE_IOCTL(SCSI_IOCTL_DOORUNLOCK), +COMPATIBLE_IOCTL(SCSI_IOCTL_TEST_UNIT_READY), +COMPATIBLE_IOCTL(SCSI_IOCTL_TAGGED_ENABLE), +COMPATIBLE_IOCTL(SCSI_IOCTL_TAGGED_DISABLE), +COMPATIBLE_IOCTL(SCSI_IOCTL_GET_BUS_NUMBER), +COMPATIBLE_IOCTL(SCSI_IOCTL_SEND_COMMAND), +/* Big V */ +COMPATIBLE_IOCTL(VT_SETMODE), +COMPATIBLE_IOCTL(VT_GETMODE), +COMPATIBLE_IOCTL(VT_GETSTATE), +COMPATIBLE_IOCTL(VT_OPENQRY), +COMPATIBLE_IOCTL(VT_ACTIVATE), +COMPATIBLE_IOCTL(VT_WAITACTIVE), +COMPATIBLE_IOCTL(VT_RELDISP), +COMPATIBLE_IOCTL(VT_DISALLOCATE), +COMPATIBLE_IOCTL(VT_RESIZE), +COMPATIBLE_IOCTL(VT_RESIZEX), +COMPATIBLE_IOCTL(VT_LOCKSWITCH), +COMPATIBLE_IOCTL(VT_UNLOCKSWITCH), +/* Little v, the video4linux ioctls */ +COMPATIBLE_IOCTL(VIDIOCGCAP), +COMPATIBLE_IOCTL(VIDIOCGCHAN), +COMPATIBLE_IOCTL(VIDIOCSCHAN), +COMPATIBLE_IOCTL(VIDIOCGPICT), +COMPATIBLE_IOCTL(VIDIOCSPICT), +COMPATIBLE_IOCTL(VIDIOCCAPTURE), +COMPATIBLE_IOCTL(VIDIOCKEY), +COMPATIBLE_IOCTL(VIDIOCGAUDIO), +COMPATIBLE_IOCTL(VIDIOCSAUDIO), +COMPATIBLE_IOCTL(VIDIOCSYNC), +COMPATIBLE_IOCTL(VIDIOCMCAPTURE), +COMPATIBLE_IOCTL(VIDIOCGMBUF), +COMPATIBLE_IOCTL(VIDIOCGUNIT), +COMPATIBLE_IOCTL(VIDIOCGCAPTURE), +COMPATIBLE_IOCTL(VIDIOCSCAPTURE), +/* BTTV specific... */ +COMPATIBLE_IOCTL(_IOW('v', BASE_VIDIOCPRIVATE+0, char [256])), +COMPATIBLE_IOCTL(_IOR('v', BASE_VIDIOCPRIVATE+1, char [256])), +COMPATIBLE_IOCTL(_IOR('v' , BASE_VIDIOCPRIVATE+2, unsigned int)), +COMPATIBLE_IOCTL(_IOW('v' , BASE_VIDIOCPRIVATE+3, char [16])), /* struct bttv_pll_info */ +COMPATIBLE_IOCTL(_IOR('v' , BASE_VIDIOCPRIVATE+4, int)), +COMPATIBLE_IOCTL(_IOR('v' , BASE_VIDIOCPRIVATE+5, int)), +COMPATIBLE_IOCTL(_IOR('v' , BASE_VIDIOCPRIVATE+6, int)), +COMPATIBLE_IOCTL(_IOR('v' , BASE_VIDIOCPRIVATE+7, int)), +/* Little p (/dev/rtc, /dev/envctrl, etc.) */ +COMPATIBLE_IOCTL(_IOR('p', 20, int[7])), /* RTCGET */ +COMPATIBLE_IOCTL(_IOW('p', 21, int[7])), /* RTCSET */ +COMPATIBLE_IOCTL(RTC_AIE_ON), +COMPATIBLE_IOCTL(RTC_AIE_OFF), +COMPATIBLE_IOCTL(RTC_UIE_ON), +COMPATIBLE_IOCTL(RTC_UIE_OFF), +COMPATIBLE_IOCTL(RTC_PIE_ON), +COMPATIBLE_IOCTL(RTC_PIE_OFF), +COMPATIBLE_IOCTL(RTC_WIE_ON), +COMPATIBLE_IOCTL(RTC_WIE_OFF), +COMPATIBLE_IOCTL(RTC_ALM_SET), +COMPATIBLE_IOCTL(RTC_ALM_READ), +COMPATIBLE_IOCTL(RTC_RD_TIME), +COMPATIBLE_IOCTL(RTC_SET_TIME), +COMPATIBLE_IOCTL(RTC_WKALM_SET), +COMPATIBLE_IOCTL(RTC_WKALM_RD), +/* Little m */ +COMPATIBLE_IOCTL(MTIOCTOP), +/* Socket level stuff */ +COMPATIBLE_IOCTL(FIOSETOWN), +COMPATIBLE_IOCTL(SIOCSPGRP), +COMPATIBLE_IOCTL(FIOGETOWN), +COMPATIBLE_IOCTL(SIOCGPGRP), +COMPATIBLE_IOCTL(SIOCATMARK), +COMPATIBLE_IOCTL(SIOCSIFLINK), +COMPATIBLE_IOCTL(SIOCSIFENCAP), +COMPATIBLE_IOCTL(SIOCGIFENCAP), +COMPATIBLE_IOCTL(SIOCSIFBR), +COMPATIBLE_IOCTL(SIOCGIFBR), +COMPATIBLE_IOCTL(SIOCSARP), +COMPATIBLE_IOCTL(SIOCGARP), +COMPATIBLE_IOCTL(SIOCDARP), +COMPATIBLE_IOCTL(SIOCSRARP), +COMPATIBLE_IOCTL(SIOCGRARP), +COMPATIBLE_IOCTL(SIOCDRARP), +COMPATIBLE_IOCTL(SIOCADDDLCI), +COMPATIBLE_IOCTL(SIOCDELDLCI), +/* SG stuff */ +COMPATIBLE_IOCTL(SG_SET_TIMEOUT), +COMPATIBLE_IOCTL(SG_GET_TIMEOUT), +COMPATIBLE_IOCTL(SG_EMULATED_HOST), +COMPATIBLE_IOCTL(SG_SET_TRANSFORM), +COMPATIBLE_IOCTL(SG_GET_TRANSFORM), +COMPATIBLE_IOCTL(SG_SET_RESERVED_SIZE), +COMPATIBLE_IOCTL(SG_GET_RESERVED_SIZE), +COMPATIBLE_IOCTL(SG_GET_SCSI_ID), +COMPATIBLE_IOCTL(SG_SET_FORCE_LOW_DMA), +COMPATIBLE_IOCTL(SG_GET_LOW_DMA), +COMPATIBLE_IOCTL(SG_SET_FORCE_PACK_ID), +COMPATIBLE_IOCTL(SG_GET_PACK_ID), +COMPATIBLE_IOCTL(SG_GET_NUM_WAITING), +COMPATIBLE_IOCTL(SG_SET_DEBUG), +COMPATIBLE_IOCTL(SG_GET_SG_TABLESIZE), +COMPATIBLE_IOCTL(SG_GET_COMMAND_Q), +COMPATIBLE_IOCTL(SG_SET_COMMAND_Q), +COMPATIBLE_IOCTL(SG_GET_VERSION_NUM), +COMPATIBLE_IOCTL(SG_NEXT_CMD_LEN), +COMPATIBLE_IOCTL(SG_SCSI_RESET), +COMPATIBLE_IOCTL(SG_IO), +COMPATIBLE_IOCTL(SG_GET_REQUEST_TABLE), +COMPATIBLE_IOCTL(SG_SET_KEEP_ORPHAN), +COMPATIBLE_IOCTL(SG_GET_KEEP_ORPHAN), +/* PPP stuff */ +COMPATIBLE_IOCTL(PPPIOCGFLAGS), +COMPATIBLE_IOCTL(PPPIOCSFLAGS), +COMPATIBLE_IOCTL(PPPIOCGASYNCMAP), +COMPATIBLE_IOCTL(PPPIOCSASYNCMAP), +COMPATIBLE_IOCTL(PPPIOCGUNIT), +COMPATIBLE_IOCTL(PPPIOCGRASYNCMAP), +COMPATIBLE_IOCTL(PPPIOCSRASYNCMAP), +COMPATIBLE_IOCTL(PPPIOCGMRU), +COMPATIBLE_IOCTL(PPPIOCSMRU), +COMPATIBLE_IOCTL(PPPIOCSMAXCID), +COMPATIBLE_IOCTL(PPPIOCGXASYNCMAP), +COMPATIBLE_IOCTL(PPPIOCSXASYNCMAP), +COMPATIBLE_IOCTL(PPPIOCXFERUNIT), +COMPATIBLE_IOCTL(PPPIOCGNPMODE), +COMPATIBLE_IOCTL(PPPIOCSNPMODE), +COMPATIBLE_IOCTL(PPPIOCGDEBUG), +COMPATIBLE_IOCTL(PPPIOCSDEBUG), +COMPATIBLE_IOCTL(PPPIOCNEWUNIT), +COMPATIBLE_IOCTL(PPPIOCATTACH), +COMPATIBLE_IOCTL(PPPIOCDETACH), +COMPATIBLE_IOCTL(PPPIOCSMRRU), +COMPATIBLE_IOCTL(PPPIOCCONNECT), +COMPATIBLE_IOCTL(PPPIOCDISCONN), +COMPATIBLE_IOCTL(PPPIOCATTCHAN), +COMPATIBLE_IOCTL(PPPIOCGCHAN), +/* PPPOX */ +COMPATIBLE_IOCTL(PPPOEIOCSFWD), +COMPATIBLE_IOCTL(PPPOEIOCDFWD), +/* CDROM stuff */ +COMPATIBLE_IOCTL(CDROMPAUSE), +COMPATIBLE_IOCTL(CDROMRESUME), +COMPATIBLE_IOCTL(CDROMPLAYMSF), +COMPATIBLE_IOCTL(CDROMPLAYTRKIND), +COMPATIBLE_IOCTL(CDROMREADTOCHDR), +COMPATIBLE_IOCTL(CDROMREADTOCENTRY), +COMPATIBLE_IOCTL(CDROMSTOP), +COMPATIBLE_IOCTL(CDROMSTART), +COMPATIBLE_IOCTL(CDROMEJECT), +COMPATIBLE_IOCTL(CDROMVOLCTRL), +COMPATIBLE_IOCTL(CDROMSUBCHNL), +COMPATIBLE_IOCTL(CDROMEJECT_SW), +COMPATIBLE_IOCTL(CDROMMULTISESSION), +COMPATIBLE_IOCTL(CDROM_GET_MCN), +COMPATIBLE_IOCTL(CDROMRESET), +COMPATIBLE_IOCTL(CDROMVOLREAD), +COMPATIBLE_IOCTL(CDROMSEEK), +COMPATIBLE_IOCTL(CDROMPLAYBLK), +COMPATIBLE_IOCTL(CDROMCLOSETRAY), +COMPATIBLE_IOCTL(CDROM_SET_OPTIONS), +COMPATIBLE_IOCTL(CDROM_CLEAR_OPTIONS), +COMPATIBLE_IOCTL(CDROM_SELECT_SPEED), +COMPATIBLE_IOCTL(CDROM_SELECT_DISC), +COMPATIBLE_IOCTL(CDROM_MEDIA_CHANGED), +COMPATIBLE_IOCTL(CDROM_DRIVE_STATUS), +COMPATIBLE_IOCTL(CDROM_DISC_STATUS), +COMPATIBLE_IOCTL(CDROM_CHANGER_NSLOTS), +COMPATIBLE_IOCTL(CDROM_LOCKDOOR), +COMPATIBLE_IOCTL(CDROM_DEBUG), +COMPATIBLE_IOCTL(CDROM_GET_CAPABILITY), +/* DVD ioctls */ +COMPATIBLE_IOCTL(DVD_READ_STRUCT), +COMPATIBLE_IOCTL(DVD_WRITE_STRUCT), +COMPATIBLE_IOCTL(DVD_AUTH), +/* Big L */ +COMPATIBLE_IOCTL(LOOP_SET_FD), +COMPATIBLE_IOCTL(LOOP_CLR_FD), +/* Big Q for sound/OSS */ +COMPATIBLE_IOCTL(SNDCTL_SEQ_RESET), +COMPATIBLE_IOCTL(SNDCTL_SEQ_SYNC), +COMPATIBLE_IOCTL(SNDCTL_SYNTH_INFO), +COMPATIBLE_IOCTL(SNDCTL_SEQ_CTRLRATE), +COMPATIBLE_IOCTL(SNDCTL_SEQ_GETOUTCOUNT), +COMPATIBLE_IOCTL(SNDCTL_SEQ_GETINCOUNT), +COMPATIBLE_IOCTL(SNDCTL_SEQ_PERCMODE), +COMPATIBLE_IOCTL(SNDCTL_FM_LOAD_INSTR), +COMPATIBLE_IOCTL(SNDCTL_SEQ_TESTMIDI), +COMPATIBLE_IOCTL(SNDCTL_SEQ_RESETSAMPLES), +COMPATIBLE_IOCTL(SNDCTL_SEQ_NRSYNTHS), +COMPATIBLE_IOCTL(SNDCTL_SEQ_NRMIDIS), +COMPATIBLE_IOCTL(SNDCTL_MIDI_INFO), +COMPATIBLE_IOCTL(SNDCTL_SEQ_THRESHOLD), +COMPATIBLE_IOCTL(SNDCTL_SYNTH_MEMAVL), +COMPATIBLE_IOCTL(SNDCTL_FM_4OP_ENABLE), +COMPATIBLE_IOCTL(SNDCTL_SEQ_PANIC), +COMPATIBLE_IOCTL(SNDCTL_SEQ_OUTOFBAND), +COMPATIBLE_IOCTL(SNDCTL_SEQ_GETTIME), +COMPATIBLE_IOCTL(SNDCTL_SYNTH_ID), +COMPATIBLE_IOCTL(SNDCTL_SYNTH_CONTROL), +COMPATIBLE_IOCTL(SNDCTL_SYNTH_REMOVESAMPLE), +/* Big T for sound/OSS */ +COMPATIBLE_IOCTL(SNDCTL_TMR_TIMEBASE), +COMPATIBLE_IOCTL(SNDCTL_TMR_START), +COMPATIBLE_IOCTL(SNDCTL_TMR_STOP), +COMPATIBLE_IOCTL(SNDCTL_TMR_CONTINUE), +COMPATIBLE_IOCTL(SNDCTL_TMR_TEMPO), +COMPATIBLE_IOCTL(SNDCTL_TMR_SOURCE), +COMPATIBLE_IOCTL(SNDCTL_TMR_METRONOME), +COMPATIBLE_IOCTL(SNDCTL_TMR_SELECT), +/* Little m for sound/OSS */ +COMPATIBLE_IOCTL(SNDCTL_MIDI_PRETIME), +COMPATIBLE_IOCTL(SNDCTL_MIDI_MPUMODE), +COMPATIBLE_IOCTL(SNDCTL_MIDI_MPUCMD), +/* Big P for sound/OSS */ +COMPATIBLE_IOCTL(SNDCTL_DSP_RESET), +COMPATIBLE_IOCTL(SNDCTL_DSP_SYNC), +COMPATIBLE_IOCTL(SNDCTL_DSP_SPEED), +COMPATIBLE_IOCTL(SNDCTL_DSP_STEREO), +COMPATIBLE_IOCTL(SNDCTL_DSP_GETBLKSIZE), +COMPATIBLE_IOCTL(SNDCTL_DSP_CHANNELS), +COMPATIBLE_IOCTL(SOUND_PCM_WRITE_FILTER), +COMPATIBLE_IOCTL(SNDCTL_DSP_POST), +COMPATIBLE_IOCTL(SNDCTL_DSP_SUBDIVIDE), +COMPATIBLE_IOCTL(SNDCTL_DSP_SETFRAGMENT), +COMPATIBLE_IOCTL(SNDCTL_DSP_GETFMTS), +COMPATIBLE_IOCTL(SNDCTL_DSP_SETFMT), +COMPATIBLE_IOCTL(SNDCTL_DSP_GETOSPACE), +COMPATIBLE_IOCTL(SNDCTL_DSP_GETISPACE), +COMPATIBLE_IOCTL(SNDCTL_DSP_NONBLOCK), +COMPATIBLE_IOCTL(SNDCTL_DSP_GETCAPS), +COMPATIBLE_IOCTL(SNDCTL_DSP_GETTRIGGER), +COMPATIBLE_IOCTL(SNDCTL_DSP_SETTRIGGER), +COMPATIBLE_IOCTL(SNDCTL_DSP_GETIPTR), +COMPATIBLE_IOCTL(SNDCTL_DSP_GETOPTR), +/* SNDCTL_DSP_MAPINBUF, XXX needs translation */ +/* SNDCTL_DSP_MAPOUTBUF, XXX needs translation */ +COMPATIBLE_IOCTL(SNDCTL_DSP_SETSYNCRO), +COMPATIBLE_IOCTL(SNDCTL_DSP_SETDUPLEX), +COMPATIBLE_IOCTL(SNDCTL_DSP_GETODELAY), +COMPATIBLE_IOCTL(SNDCTL_DSP_PROFILE), +COMPATIBLE_IOCTL(SOUND_PCM_READ_RATE), +COMPATIBLE_IOCTL(SOUND_PCM_READ_CHANNELS), +COMPATIBLE_IOCTL(SOUND_PCM_READ_BITS), +COMPATIBLE_IOCTL(SOUND_PCM_READ_FILTER), +/* Big C for sound/OSS */ +COMPATIBLE_IOCTL(SNDCTL_COPR_RESET), +COMPATIBLE_IOCTL(SNDCTL_COPR_LOAD), +COMPATIBLE_IOCTL(SNDCTL_COPR_RDATA), +COMPATIBLE_IOCTL(SNDCTL_COPR_RCODE), +COMPATIBLE_IOCTL(SNDCTL_COPR_WDATA), +COMPATIBLE_IOCTL(SNDCTL_COPR_WCODE), +COMPATIBLE_IOCTL(SNDCTL_COPR_RUN), +COMPATIBLE_IOCTL(SNDCTL_COPR_HALT), +COMPATIBLE_IOCTL(SNDCTL_COPR_SENDMSG), +COMPATIBLE_IOCTL(SNDCTL_COPR_RCVMSG), +/* Big M for sound/OSS */ +COMPATIBLE_IOCTL(SOUND_MIXER_READ_VOLUME), +COMPATIBLE_IOCTL(SOUND_MIXER_READ_BASS), +COMPATIBLE_IOCTL(SOUND_MIXER_READ_TREBLE), +COMPATIBLE_IOCTL(SOUND_MIXER_READ_SYNTH), +COMPATIBLE_IOCTL(SOUND_MIXER_READ_PCM), +COMPATIBLE_IOCTL(SOUND_MIXER_READ_SPEAKER), +COMPATIBLE_IOCTL(SOUND_MIXER_READ_LINE), +COMPATIBLE_IOCTL(SOUND_MIXER_READ_MIC), +COMPATIBLE_IOCTL(SOUND_MIXER_READ_CD), +COMPATIBLE_IOCTL(SOUND_MIXER_READ_IMIX), +COMPATIBLE_IOCTL(SOUND_MIXER_READ_ALTPCM), +COMPATIBLE_IOCTL(SOUND_MIXER_READ_RECLEV), +COMPATIBLE_IOCTL(SOUND_MIXER_READ_IGAIN), +COMPATIBLE_IOCTL(SOUND_MIXER_READ_OGAIN), +COMPATIBLE_IOCTL(SOUND_MIXER_READ_LINE1), +COMPATIBLE_IOCTL(SOUND_MIXER_READ_LINE2), +COMPATIBLE_IOCTL(SOUND_MIXER_READ_LINE3), +COMPATIBLE_IOCTL(MIXER_READ(SOUND_MIXER_DIGITAL1)), +COMPATIBLE_IOCTL(MIXER_READ(SOUND_MIXER_DIGITAL2)), +COMPATIBLE_IOCTL(MIXER_READ(SOUND_MIXER_DIGITAL3)), +COMPATIBLE_IOCTL(MIXER_READ(SOUND_MIXER_PHONEIN)), +COMPATIBLE_IOCTL(MIXER_READ(SOUND_MIXER_PHONEOUT)), +COMPATIBLE_IOCTL(MIXER_READ(SOUND_MIXER_VIDEO)), +COMPATIBLE_IOCTL(MIXER_READ(SOUND_MIXER_RADIO)), +COMPATIBLE_IOCTL(MIXER_READ(SOUND_MIXER_MONITOR)), +COMPATIBLE_IOCTL(SOUND_MIXER_READ_MUTE), +/* SOUND_MIXER_READ_ENHANCE, same value as READ_MUTE */ +/* SOUND_MIXER_READ_LOUD, same value as READ_MUTE */ +COMPATIBLE_IOCTL(SOUND_MIXER_READ_RECSRC), +COMPATIBLE_IOCTL(SOUND_MIXER_READ_DEVMASK), +COMPATIBLE_IOCTL(SOUND_MIXER_READ_RECMASK), +COMPATIBLE_IOCTL(SOUND_MIXER_READ_STEREODEVS), +COMPATIBLE_IOCTL(SOUND_MIXER_READ_CAPS), +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_VOLUME), +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_BASS), +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_TREBLE), +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_SYNTH), +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_PCM), +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_SPEAKER), +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_LINE), +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_MIC), +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_CD), +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_IMIX), +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_ALTPCM), +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_RECLEV), +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_IGAIN), +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_OGAIN), +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_LINE1), +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_LINE2), +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_LINE3), +COMPATIBLE_IOCTL(MIXER_WRITE(SOUND_MIXER_DIGITAL1)), +COMPATIBLE_IOCTL(MIXER_WRITE(SOUND_MIXER_DIGITAL2)), +COMPATIBLE_IOCTL(MIXER_WRITE(SOUND_MIXER_DIGITAL3)), +COMPATIBLE_IOCTL(MIXER_WRITE(SOUND_MIXER_PHONEIN)), +COMPATIBLE_IOCTL(MIXER_WRITE(SOUND_MIXER_PHONEOUT)), +COMPATIBLE_IOCTL(MIXER_WRITE(SOUND_MIXER_VIDEO)), +COMPATIBLE_IOCTL(MIXER_WRITE(SOUND_MIXER_RADIO)), +COMPATIBLE_IOCTL(MIXER_WRITE(SOUND_MIXER_MONITOR)), +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_MUTE), +/* SOUND_MIXER_WRITE_ENHANCE, same value as WRITE_MUTE */ +/* SOUND_MIXER_WRITE_LOUD, same value as WRITE_MUTE */ +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_RECSRC), +COMPATIBLE_IOCTL(SOUND_MIXER_INFO), +COMPATIBLE_IOCTL(SOUND_OLD_MIXER_INFO), +COMPATIBLE_IOCTL(SOUND_MIXER_ACCESS), +COMPATIBLE_IOCTL(SOUND_MIXER_PRIVATE1), +COMPATIBLE_IOCTL(SOUND_MIXER_PRIVATE2), +COMPATIBLE_IOCTL(SOUND_MIXER_PRIVATE3), +COMPATIBLE_IOCTL(SOUND_MIXER_PRIVATE4), +COMPATIBLE_IOCTL(SOUND_MIXER_PRIVATE5), +COMPATIBLE_IOCTL(SOUND_MIXER_GETLEVELS), +COMPATIBLE_IOCTL(SOUND_MIXER_SETLEVELS), +COMPATIBLE_IOCTL(OSS_GETVERSION), +/* AUTOFS */ +COMPATIBLE_IOCTL(AUTOFS_IOC_READY), +COMPATIBLE_IOCTL(AUTOFS_IOC_FAIL), +COMPATIBLE_IOCTL(AUTOFS_IOC_CATATONIC), +COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOVER), +COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE), +/* DEVFS */ +COMPATIBLE_IOCTL(DEVFSDIOC_GET_PROTO_REV), +COMPATIBLE_IOCTL(DEVFSDIOC_SET_EVENT_MASK), +COMPATIBLE_IOCTL(DEVFSDIOC_RELEASE_EVENT_QUEUE), +COMPATIBLE_IOCTL(DEVFSDIOC_SET_DEBUG_MASK), +/* Raw devices */ +COMPATIBLE_IOCTL(RAW_SETBIND), +COMPATIBLE_IOCTL(RAW_GETBIND), +/* SMB ioctls which do not need any translations */ +COMPATIBLE_IOCTL(SMB_IOC_NEWCONN), +/* Little a */ +COMPATIBLE_IOCTL(ATMSIGD_CTRL), +COMPATIBLE_IOCTL(ATMARPD_CTRL), +COMPATIBLE_IOCTL(ATMLEC_CTRL), +COMPATIBLE_IOCTL(ATMLEC_MCAST), +COMPATIBLE_IOCTL(ATMLEC_DATA), +COMPATIBLE_IOCTL(ATM_SETSC), +COMPATIBLE_IOCTL(SIOCSIFATMTCP), +COMPATIBLE_IOCTL(SIOCMKCLIP), +COMPATIBLE_IOCTL(ATMARP_MKIP), +COMPATIBLE_IOCTL(ATMARP_SETENTRY), +COMPATIBLE_IOCTL(ATMARP_ENCAP), +COMPATIBLE_IOCTL(ATMTCP_CREATE), +COMPATIBLE_IOCTL(ATMTCP_REMOVE), +COMPATIBLE_IOCTL(ATMMPC_CTRL), +COMPATIBLE_IOCTL(ATMMPC_DATA), +#if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE) +/* 0xfe - lvm */ +COMPATIBLE_IOCTL(VG_SET_EXTENDABLE), +COMPATIBLE_IOCTL(VG_STATUS_GET_COUNT), +COMPATIBLE_IOCTL(VG_STATUS_GET_NAMELIST), +COMPATIBLE_IOCTL(VG_REMOVE), +COMPATIBLE_IOCTL(VG_RENAME), +COMPATIBLE_IOCTL(VG_REDUCE), +COMPATIBLE_IOCTL(PE_LOCK_UNLOCK), +COMPATIBLE_IOCTL(PV_FLUSH), +COMPATIBLE_IOCTL(LVM_LOCK_LVM), +COMPATIBLE_IOCTL(LVM_GET_IOP_VERSION), +#ifdef LVM_TOTAL_RESET +COMPATIBLE_IOCTL(LVM_RESET), +#endif +COMPATIBLE_IOCTL(LV_SET_ACCESS), +COMPATIBLE_IOCTL(LV_SET_STATUS), +COMPATIBLE_IOCTL(LV_SET_ALLOCATION), +COMPATIBLE_IOCTL(LE_REMAP), +COMPATIBLE_IOCTL(LV_BMAP), +COMPATIBLE_IOCTL(LV_SNAPSHOT_USE_RATE), +#endif /* LVM */ +#if defined(CONFIG_DRM) || defined(CONFIG_DRM_MODULE) +COMPATIBLE_IOCTL(DRM_IOCTL_GET_MAGIC), +COMPATIBLE_IOCTL(DRM_IOCTL_IRQ_BUSID), +COMPATIBLE_IOCTL(DRM_IOCTL_AUTH_MAGIC), +COMPATIBLE_IOCTL(DRM_IOCTL_BLOCK), +COMPATIBLE_IOCTL(DRM_IOCTL_UNBLOCK), +COMPATIBLE_IOCTL(DRM_IOCTL_CONTROL), +COMPATIBLE_IOCTL(DRM_IOCTL_ADD_BUFS), +COMPATIBLE_IOCTL(DRM_IOCTL_MARK_BUFS), +COMPATIBLE_IOCTL(DRM_IOCTL_ADD_CTX), +COMPATIBLE_IOCTL(DRM_IOCTL_RM_CTX), +COMPATIBLE_IOCTL(DRM_IOCTL_MOD_CTX), +COMPATIBLE_IOCTL(DRM_IOCTL_GET_CTX), +COMPATIBLE_IOCTL(DRM_IOCTL_SWITCH_CTX), +COMPATIBLE_IOCTL(DRM_IOCTL_NEW_CTX), +COMPATIBLE_IOCTL(DRM_IOCTL_ADD_DRAW), +COMPATIBLE_IOCTL(DRM_IOCTL_RM_DRAW), +COMPATIBLE_IOCTL(DRM_IOCTL_LOCK), +COMPATIBLE_IOCTL(DRM_IOCTL_UNLOCK), +COMPATIBLE_IOCTL(DRM_IOCTL_FINISH), +#endif /* DRM */ +/* elevator */ +COMPATIBLE_IOCTL(BLKELVGET), +COMPATIBLE_IOCTL(BLKELVSET), +/* Big W */ +/* WIOC_GETSUPPORT not yet implemented -E */ +COMPATIBLE_IOCTL(WDIOC_GETSTATUS), +COMPATIBLE_IOCTL(WDIOC_GETBOOTSTATUS), +COMPATIBLE_IOCTL(WDIOC_GETTEMP), +COMPATIBLE_IOCTL(WDIOC_SETOPTIONS), +COMPATIBLE_IOCTL(WDIOC_KEEPALIVE), +/* Bluetooth ioctls */ +COMPATIBLE_IOCTL(HCIDEVUP), +COMPATIBLE_IOCTL(HCIDEVDOWN), +COMPATIBLE_IOCTL(HCIDEVRESET), +COMPATIBLE_IOCTL(HCIRESETSTAT), +COMPATIBLE_IOCTL(HCIGETINFO), +COMPATIBLE_IOCTL(HCIGETDEVLIST), +COMPATIBLE_IOCTL(HCISETRAW), +COMPATIBLE_IOCTL(HCISETSCAN), +COMPATIBLE_IOCTL(HCISETAUTH), +COMPATIBLE_IOCTL(HCIINQUIRY), +COMPATIBLE_IOCTL(PCIIOC_CONTROLLER), +COMPATIBLE_IOCTL(PCIIOC_MMAP_IS_IO), +COMPATIBLE_IOCTL(PCIIOC_MMAP_IS_MEM), +COMPATIBLE_IOCTL(PCIIOC_WRITE_COMBINE), +/* USB */ +COMPATIBLE_IOCTL(USBDEVFS_RESETEP), +COMPATIBLE_IOCTL(USBDEVFS_SETINTERFACE), +COMPATIBLE_IOCTL(USBDEVFS_SETCONFIGURATION), +COMPATIBLE_IOCTL(USBDEVFS_GETDRIVER), +COMPATIBLE_IOCTL(USBDEVFS_DISCARDURB), +COMPATIBLE_IOCTL(USBDEVFS_CLAIMINTERFACE), +COMPATIBLE_IOCTL(USBDEVFS_RELEASEINTERFACE), +COMPATIBLE_IOCTL(USBDEVFS_CONNECTINFO), +COMPATIBLE_IOCTL(USBDEVFS_HUB_PORTINFO), +COMPATIBLE_IOCTL(USBDEVFS_RESET), +COMPATIBLE_IOCTL(USBDEVFS_CLEAR_HALT), +/* MTD */ +COMPATIBLE_IOCTL(MEMGETINFO), +COMPATIBLE_IOCTL(MEMERASE), +COMPATIBLE_IOCTL(MEMLOCK), +COMPATIBLE_IOCTL(MEMUNLOCK), +COMPATIBLE_IOCTL(MEMGETREGIONCOUNT), +COMPATIBLE_IOCTL(MEMGETREGIONINFO), +/* NBD */ +COMPATIBLE_IOCTL(NBD_SET_SOCK), +COMPATIBLE_IOCTL(NBD_SET_BLKSIZE), +COMPATIBLE_IOCTL(NBD_SET_SIZE), +COMPATIBLE_IOCTL(NBD_DO_IT), +COMPATIBLE_IOCTL(NBD_CLEAR_SOCK), +COMPATIBLE_IOCTL(NBD_CLEAR_QUE), +COMPATIBLE_IOCTL(NBD_PRINT_DEBUG), +COMPATIBLE_IOCTL(NBD_SET_SIZE_BLOCKS), +COMPATIBLE_IOCTL(NBD_DISCONNECT), +/* Remove *PRIVATE in 2.5 */ +COMPATIBLE_IOCTL(SIOCDEVPRIVATE), +COMPATIBLE_IOCTL(SIOCDEVPRIVATE+1), +COMPATIBLE_IOCTL(SIOCDEVPRIVATE+2), +COMPATIBLE_IOCTL(SIOCGMIIPHY), +COMPATIBLE_IOCTL(SIOCGMIIREG), +COMPATIBLE_IOCTL(SIOCSMIIREG), +/* And these ioctls need translation */ +HANDLE_IOCTL(MEMREADOOB32, mtd_rw_oob), +HANDLE_IOCTL(MEMWRITEOOB32, mtd_rw_oob), +HANDLE_IOCTL(SIOCGIFNAME, dev_ifname32), +HANDLE_IOCTL(SIOCGIFCONF, dev_ifconf), +HANDLE_IOCTL(SIOCGIFFLAGS, dev_ifsioc), +HANDLE_IOCTL(SIOCSIFFLAGS, dev_ifsioc), +HANDLE_IOCTL(SIOCGIFMETRIC, dev_ifsioc), +HANDLE_IOCTL(SIOCSIFMETRIC, dev_ifsioc), +HANDLE_IOCTL(SIOCGIFMTU, dev_ifsioc), +HANDLE_IOCTL(SIOCSIFMTU, dev_ifsioc), +HANDLE_IOCTL(SIOCGIFMEM, dev_ifsioc), +HANDLE_IOCTL(SIOCSIFMEM, dev_ifsioc), +HANDLE_IOCTL(SIOCGIFHWADDR, dev_ifsioc), +HANDLE_IOCTL(SIOCSIFHWADDR, dev_ifsioc), +HANDLE_IOCTL(SIOCADDMULTI, dev_ifsioc), +HANDLE_IOCTL(SIOCDELMULTI, dev_ifsioc), +HANDLE_IOCTL(SIOCGIFINDEX, dev_ifsioc), +HANDLE_IOCTL(SIOCGIFMAP, dev_ifsioc), +HANDLE_IOCTL(SIOCSIFMAP, dev_ifsioc), +HANDLE_IOCTL(SIOCGIFADDR, dev_ifsioc), +HANDLE_IOCTL(SIOCSIFADDR, dev_ifsioc), +HANDLE_IOCTL(SIOCGIFBRDADDR, dev_ifsioc), +HANDLE_IOCTL(SIOCSIFBRDADDR, dev_ifsioc), +HANDLE_IOCTL(SIOCGIFDSTADDR, dev_ifsioc), +HANDLE_IOCTL(SIOCSIFDSTADDR, dev_ifsioc), +HANDLE_IOCTL(SIOCGIFNETMASK, dev_ifsioc), +HANDLE_IOCTL(SIOCSIFNETMASK, dev_ifsioc), +HANDLE_IOCTL(SIOCSIFPFLAGS, dev_ifsioc), +HANDLE_IOCTL(SIOCGIFPFLAGS, dev_ifsioc), +HANDLE_IOCTL(SIOCGIFTXQLEN, dev_ifsioc), +HANDLE_IOCTL(SIOCSIFTXQLEN, dev_ifsioc), +HANDLE_IOCTL(SIOCETHTOOL, ethtool_ioctl), +HANDLE_IOCTL(SIOCADDRT, routing_ioctl), +HANDLE_IOCTL(SIOCDELRT, routing_ioctl), +/* Note SIOCRTMSG is no longer, so this is safe and + * the user would have seen just an -EINVAL anyways. */ +HANDLE_IOCTL(SIOCRTMSG, ret_einval), +HANDLE_IOCTL(SIOCGSTAMP, do_siocgstamp), +HANDLE_IOCTL(HDIO_GETGEO, hdio_getgeo), +HANDLE_IOCTL(BLKRAGET, w_long), +HANDLE_IOCTL(BLKGETSIZE, w_long), +HANDLE_IOCTL(0x1260, broken_blkgetsize), +HANDLE_IOCTL(BLKFRAGET, w_long), +HANDLE_IOCTL(BLKSECTGET, w_long), +HANDLE_IOCTL(BLKPG, blkpg_ioctl_trans), +HANDLE_IOCTL(HDIO_GET_KEEPSETTINGS, hdio_ioctl_trans), +HANDLE_IOCTL(HDIO_GET_UNMASKINTR, hdio_ioctl_trans), +HANDLE_IOCTL(HDIO_GET_DMA, hdio_ioctl_trans), +HANDLE_IOCTL(HDIO_GET_32BIT, hdio_ioctl_trans), +HANDLE_IOCTL(HDIO_GET_MULTCOUNT, hdio_ioctl_trans), +HANDLE_IOCTL(HDIO_GET_NOWERR, hdio_ioctl_trans), +HANDLE_IOCTL(HDIO_GET_NICE, hdio_ioctl_trans), +HANDLE_IOCTL(FDSETPRM32, fd_ioctl_trans), +HANDLE_IOCTL(FDDEFPRM32, fd_ioctl_trans), +HANDLE_IOCTL(FDGETPRM32, fd_ioctl_trans), +HANDLE_IOCTL(FDSETDRVPRM32, fd_ioctl_trans), +HANDLE_IOCTL(FDGETDRVPRM32, fd_ioctl_trans), +HANDLE_IOCTL(FDGETDRVSTAT32, fd_ioctl_trans), +HANDLE_IOCTL(FDPOLLDRVSTAT32, fd_ioctl_trans), +HANDLE_IOCTL(FDGETFDCSTAT32, fd_ioctl_trans), +HANDLE_IOCTL(FDWERRORGET32, fd_ioctl_trans), +HANDLE_IOCTL(PPPIOCGIDLE32, ppp_ioctl_trans), +HANDLE_IOCTL(PPPIOCSCOMPRESS32, ppp_ioctl_trans), +HANDLE_IOCTL(MTIOCGET32, mt_ioctl_trans), +HANDLE_IOCTL(MTIOCPOS32, mt_ioctl_trans), +HANDLE_IOCTL(MTIOCGETCONFIG32, mt_ioctl_trans), +HANDLE_IOCTL(MTIOCSETCONFIG32, mt_ioctl_trans), +HANDLE_IOCTL(CDROMREADMODE2, cdrom_ioctl_trans), +HANDLE_IOCTL(CDROMREADMODE1, cdrom_ioctl_trans), +HANDLE_IOCTL(CDROMREADRAW, cdrom_ioctl_trans), +HANDLE_IOCTL(CDROMREADCOOKED, cdrom_ioctl_trans), +HANDLE_IOCTL(CDROMREADAUDIO, cdrom_ioctl_trans), +HANDLE_IOCTL(CDROMREADALL, cdrom_ioctl_trans), +HANDLE_IOCTL(CDROM_SEND_PACKET, cdrom_ioctl_trans), +HANDLE_IOCTL(LOOP_SET_STATUS, loop_status), +HANDLE_IOCTL(LOOP_GET_STATUS, loop_status), +HANDLE_IOCTL(AUTOFS_IOC_SETTIMEOUT32, ioc_settimeout), +#ifdef CONFIG_VT +HANDLE_IOCTL(PIO_FONTX, do_fontx_ioctl), +HANDLE_IOCTL(GIO_FONTX, do_fontx_ioctl), +HANDLE_IOCTL(PIO_UNIMAP, do_unimap_ioctl), +HANDLE_IOCTL(GIO_UNIMAP, do_unimap_ioctl), +HANDLE_IOCTL(KDFONTOP, do_kdfontop_ioctl), +HANDLE_IOCTL(FBIOGET_FSCREENINFO, do_fbioget_fscreeninfo_ioctl), +HANDLE_IOCTL(FBIOGETCMAP, do_fbiogetcmap_ioctl), +HANDLE_IOCTL(FBIOPUTCMAP, do_fbioputcmap_ioctl), +#endif +HANDLE_IOCTL(EXT2_IOC32_GETFLAGS, do_ext2_ioctl), +HANDLE_IOCTL(EXT2_IOC32_SETFLAGS, do_ext2_ioctl), +HANDLE_IOCTL(EXT2_IOC32_GETVERSION, do_ext2_ioctl), +HANDLE_IOCTL(EXT2_IOC32_SETVERSION, do_ext2_ioctl), +HANDLE_IOCTL(VIDIOCGTUNER32, do_video_ioctl), +HANDLE_IOCTL(VIDIOCSTUNER32, do_video_ioctl), +HANDLE_IOCTL(VIDIOCGWIN32, do_video_ioctl), +HANDLE_IOCTL(VIDIOCSWIN32, do_video_ioctl), +HANDLE_IOCTL(VIDIOCGFBUF32, do_video_ioctl), +HANDLE_IOCTL(VIDIOCSFBUF32, do_video_ioctl), +HANDLE_IOCTL(VIDIOCGFREQ32, do_video_ioctl), +HANDLE_IOCTL(VIDIOCSFREQ32, do_video_ioctl), +/* One SMB ioctl needs translations. */ +HANDLE_IOCTL(SMB_IOC_GETMOUNTUID_32, do_smb_getmountuid), +HANDLE_IOCTL(ATM_GETLINKRATE32, do_atm_ioctl), +HANDLE_IOCTL(ATM_GETNAMES32, do_atm_ioctl), +HANDLE_IOCTL(ATM_GETTYPE32, do_atm_ioctl), +HANDLE_IOCTL(ATM_GETESI32, do_atm_ioctl), +HANDLE_IOCTL(ATM_GETADDR32, do_atm_ioctl), +HANDLE_IOCTL(ATM_RSTADDR32, do_atm_ioctl), +HANDLE_IOCTL(ATM_ADDADDR32, do_atm_ioctl), +HANDLE_IOCTL(ATM_DELADDR32, do_atm_ioctl), +HANDLE_IOCTL(ATM_GETCIRANGE32, do_atm_ioctl), +HANDLE_IOCTL(ATM_SETCIRANGE32, do_atm_ioctl), +HANDLE_IOCTL(ATM_SETESI32, do_atm_ioctl), +HANDLE_IOCTL(ATM_SETESIF32, do_atm_ioctl), +HANDLE_IOCTL(ATM_GETSTAT32, do_atm_ioctl), +HANDLE_IOCTL(ATM_GETSTATZ32, do_atm_ioctl), +HANDLE_IOCTL(ATM_GETLOOP32, do_atm_ioctl), +HANDLE_IOCTL(ATM_SETLOOP32, do_atm_ioctl), +HANDLE_IOCTL(ATM_QUERYLOOP32, do_atm_ioctl), +HANDLE_IOCTL(SONET_GETSTAT, do_atm_ioctl), +HANDLE_IOCTL(SONET_GETSTATZ, do_atm_ioctl), +HANDLE_IOCTL(SONET_GETDIAG, do_atm_ioctl), +HANDLE_IOCTL(SONET_SETDIAG, do_atm_ioctl), +HANDLE_IOCTL(SONET_CLRDIAG, do_atm_ioctl), +HANDLE_IOCTL(SONET_SETFRAMING, do_atm_ioctl), +HANDLE_IOCTL(SONET_GETFRAMING, do_atm_ioctl), +HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl), +#if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE) +HANDLE_IOCTL(VG_STATUS, do_lvm_ioctl), +HANDLE_IOCTL(VG_CREATE, do_lvm_ioctl), +HANDLE_IOCTL(VG_EXTEND, do_lvm_ioctl), +HANDLE_IOCTL(LV_CREATE, do_lvm_ioctl), +HANDLE_IOCTL(LV_REMOVE, do_lvm_ioctl), +HANDLE_IOCTL(LV_EXTEND, do_lvm_ioctl), +HANDLE_IOCTL(LV_REDUCE, do_lvm_ioctl), +HANDLE_IOCTL(LV_RENAME, do_lvm_ioctl), +HANDLE_IOCTL(LV_STATUS_BYNAME, do_lvm_ioctl), +HANDLE_IOCTL(LV_STATUS_BYINDEX, do_lvm_ioctl), +HANDLE_IOCTL(PV_CHANGE, do_lvm_ioctl), +HANDLE_IOCTL(PV_STATUS, do_lvm_ioctl), +#endif /* LVM */ +#if defined(CONFIG_DRM) || defined(CONFIG_DRM_MODULE) +HANDLE_IOCTL(DRM32_IOCTL_VERSION, drm32_version), +HANDLE_IOCTL(DRM32_IOCTL_GET_UNIQUE, drm32_getsetunique), +HANDLE_IOCTL(DRM32_IOCTL_SET_UNIQUE, drm32_getsetunique), +HANDLE_IOCTL(DRM32_IOCTL_ADD_MAP, drm32_addmap), +HANDLE_IOCTL(DRM32_IOCTL_INFO_BUFS, drm32_info_bufs), +HANDLE_IOCTL(DRM32_IOCTL_FREE_BUFS, drm32_free_bufs), +HANDLE_IOCTL(DRM32_IOCTL_MAP_BUFS, drm32_map_bufs), +HANDLE_IOCTL(DRM32_IOCTL_DMA, drm32_dma), +HANDLE_IOCTL(DRM32_IOCTL_RES_CTX, drm32_res_ctx), +#endif /* DRM */ +HANDLE_IOCTL(USBDEVFS_CONTROL32, do_usbdevfs_control), +HANDLE_IOCTL(USBDEVFS_BULK32, do_usbdevfs_bulk), +/*HANDLE_IOCTL(USBDEVFS_SUBMITURB32, do_usbdevfs_urb)*/ +HANDLE_IOCTL(USBDEVFS_REAPURB32, do_usbdevfs_reapurb), +HANDLE_IOCTL(USBDEVFS_REAPURBNDELAY32, do_usbdevfs_reapurb), +HANDLE_IOCTL(USBDEVFS_DISCSIGNAL32, do_usbdevfs_discsignal), +}; + +unsigned long ioctl32_hash_table[1024]; + +static inline unsigned long ioctl32_hash(unsigned long cmd) +{ + return ((cmd >> 6) ^ (cmd >> 4) ^ cmd) & 0x3ff; +} + +static void ioctl32_insert_translation(struct ioctl_trans *trans) +{ + unsigned long hash; + struct ioctl_trans *t; + + hash = ioctl32_hash (trans->cmd); + if (!ioctl32_hash_table[hash]) + ioctl32_hash_table[hash] = (long)trans; + else { + t = (struct ioctl_trans *)ioctl32_hash_table[hash]; + while (t->next) + t = (struct ioctl_trans *)(long)t->next; + trans->next = 0; + t->next = (long)trans; + } +} + +static int __init init_sys32_ioctl(void) +{ + int i, size = sizeof(ioctl_translations) / sizeof(struct ioctl_trans); + for (i=0; i < size ;i++) + ioctl32_insert_translation(&ioctl_translations[i]); + return 0; +} + +__initcall(init_sys32_ioctl); + +static struct ioctl_trans *additional_ioctls; + +/* Always call these with kernel lock held! */ + +int register_ioctl32_conversion(unsigned int cmd, int (*handler)(unsigned int, unsigned int, unsigned long, struct file *)) +{ + int i; + if (!additional_ioctls) { + additional_ioctls = module_map(PAGE_SIZE); + if (!additional_ioctls) + return -ENOMEM; + memset(additional_ioctls, 0, PAGE_SIZE); + } + for (i = 0; i < PAGE_SIZE/sizeof(struct ioctl_trans); i++) + if (!additional_ioctls[i].cmd) + break; + if (i == PAGE_SIZE/sizeof(struct ioctl_trans)) + return -ENOMEM; + additional_ioctls[i].cmd = cmd; + if (!handler) + additional_ioctls[i].handler = (long)sys_ioctl; + else + additional_ioctls[i].handler = (long)handler; + ioctl32_insert_translation(&additional_ioctls[i]); + return 0; +} + +int unregister_ioctl32_conversion(unsigned int cmd) +{ + unsigned long hash = ioctl32_hash(cmd); + struct ioctl_trans *t, *t1; + + t = (struct ioctl_trans *)ioctl32_hash_table[hash]; + if (!t) return -EINVAL; + if (t->cmd == cmd && t >= additional_ioctls && + (unsigned long)t < ((unsigned long)additional_ioctls) + PAGE_SIZE) { + ioctl32_hash_table[hash] = t->next; + t->cmd = 0; + return 0; + } else while (t->next) { + t1 = (struct ioctl_trans *)t->next; + if (t1->cmd == cmd && t1 >= additional_ioctls && + (unsigned long)t1 < ((unsigned long)additional_ioctls) + PAGE_SIZE) { + t1->cmd = 0; + t->next = t1->next; + return 0; + } + t = t1; + } + return -EINVAL; +} + +asmlinkage int sys32_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + struct file * filp; + int error = -EBADF; + int (*handler)(unsigned int, unsigned int, unsigned long, struct file * filp); + struct ioctl_trans *t; + + filp = fget(fd); + if (!filp) + goto out2; + + if (!filp->f_op || !filp->f_op->ioctl) { + error = sys_ioctl (fd, cmd, arg); + goto out; + } + + t = (struct ioctl_trans *)ioctl32_hash_table [ioctl32_hash (cmd)]; + + while (t && t->cmd != cmd) + t = (struct ioctl_trans *)t->next; + if (t) { + handler = (void *)t->handler; + error = handler(fd, cmd, arg, filp); + } else { + static int count = 0; + if (++count <= 20) + printk("sys32_ioctl(%s:%d): Unknown cmd fd(%d) " + "cmd(%08x) arg(%08x)\n", + current->comm, current->pid, + (int)fd, (unsigned int)cmd, (unsigned int)arg); + error = -EINVAL; + } +out: + fput(filp); +out2: + return error; +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/irq.c linuxppc64_2_4/arch/ppc64/kernel/irq.c --- ../kernel.org/linux/arch/ppc64/kernel/irq.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/irq.c Sat Oct 20 06:53:25 2001 @@ -0,0 +1,928 @@ +/* + * + * + * arch/ppc/kernel/irq.c + * + * Derived from arch/i386/kernel/irq.c + * Copyright (C) 1992 Linus Torvalds + * Adapted from arch/i386 by Gary Thomas + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * Updated and modified by Cort Dougan (cort@cs.nmt.edu) + * Copyright (C) 1996 Cort Dougan + * Adapted for Power Macintosh by Paul Mackerras + * Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au) + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This file contains the code used by various IRQ handling routines: + * asking for different IRQ's should be done through these routines + * instead of just grabbing them. Thus setups with different IRQ numbers + * shouldn't result in any weird surprises, and installing new handlers + * should be easier. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "local_irq.h" + +atomic_t ipi_recv; +atomic_t ipi_sent; +void enable_irq(unsigned int irq_nr); +void disable_irq(unsigned int irq_nr); + +#ifdef CONFIG_SMP +extern void iSeries_smp_message_recv( struct pt_regs * ); +#endif + +volatile unsigned char *chrp_int_ack_special; +static void register_irq_proc (unsigned int irq); + +irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = + { [0 ... NR_IRQS-1] = { 0, NULL, NULL, 0, SPIN_LOCK_UNLOCKED}}; + +int ppc_spurious_interrupts = 0; +struct irqaction *ppc_irq_action[NR_IRQS]; +unsigned long lpEvent_count = 0; +#ifdef CONFIG_XMON +extern void xmon(struct pt_regs *regs); +extern int xmon_bpt(struct pt_regs *regs); +extern int xmon_sstep(struct pt_regs *regs); +extern int xmon_iabr_match(struct pt_regs *regs); +extern int xmon_dabr_match(struct pt_regs *regs); +extern void (*xmon_fault_handler)(struct pt_regs *regs); +#endif +#ifdef CONFIG_XMON +extern void (*debugger)(struct pt_regs *regs); +extern int (*debugger_bpt)(struct pt_regs *regs); +extern int (*debugger_sstep)(struct pt_regs *regs); +extern int (*debugger_iabr_match)(struct pt_regs *regs); +extern int (*debugger_dabr_match)(struct pt_regs *regs); +extern void (*debugger_fault_handler)(struct pt_regs *regs); +#endif + +/* nasty hack for shared irq's since we need to do kmalloc calls but + * can't very early in the boot when we need to do a request irq. + * this needs to be removed. + * -- Cort + */ +#define IRQ_KMALLOC_ENTRIES 8 +static int cache_bitmask = 0; +static struct irqaction malloc_cache[IRQ_KMALLOC_ENTRIES]; +extern int mem_init_done; + +void *irq_kmalloc(size_t size, int pri) +{ + unsigned int i; + if ( mem_init_done ) + return kmalloc(size,pri); + for ( i = 0; i < IRQ_KMALLOC_ENTRIES ; i++ ) + if ( ! ( cache_bitmask & (1<flags & SA_SAMPLE_RANDOM) { + /* + * This function might sleep, we want to call it first, + * outside of the atomic block. + * Yes, this might clear the entropy pool if the wrong + * driver is attempted to be loaded, without actually + * installing a new handler, but is this really a problem, + * only the sysadmin is able to do this. + */ + rand_initialize_irq(irq); + } + + /* + * The following block of code has to be executed atomically + */ + spin_lock_irqsave(&desc->lock,flags); + p = &desc->action; + if ((old = *p) != NULL) { + /* Can't share interrupts unless both agree to */ + if (!(old->flags & new->flags & SA_SHIRQ)) { + spin_unlock_irqrestore(&desc->lock,flags); + return -EBUSY; + } + + /* add new interrupt at end of irq queue */ + do { + p = &old->next; + old = *p; + } while (old); + shared = 1; + } + + *p = new; + + if (!shared) { + desc->depth = 0; + desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING); + unmask_irq(irq); + } + spin_unlock_irqrestore(&desc->lock,flags); + + register_irq_proc(irq); + return 0; +} + +/* This could be promoted to a real free_irq() ... */ +static int +do_free_irq(int irq, void* dev_id) +{ + irq_desc_t *desc; + struct irqaction **p; + unsigned long flags; + + desc = irq_desc + irq; + spin_lock_irqsave(&desc->lock,flags); + p = &desc->action; + for (;;) { + struct irqaction * action = *p; + if (action) { + struct irqaction **pp = p; + p = &action->next; + if (action->dev_id != dev_id) + continue; + + /* Found it - now remove it from the list of entries */ + *pp = action->next; + if (!desc->action) { + desc->status |= IRQ_DISABLED; + mask_irq(irq); + } + spin_unlock_irqrestore(&desc->lock,flags); + +#ifdef CONFIG_SMP + /* Wait to make sure it's not being used on another CPU */ + while (desc->status & IRQ_INPROGRESS) + barrier(); +#endif + irq_kfree(action); + return 0; + } + printk("Trying to free free IRQ%d\n",irq); + spin_unlock_irqrestore(&desc->lock,flags); + break; + } + return -ENOENT; +} + +int request_irq(unsigned int irq, void (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, const char * devname, void *dev_id) +{ + struct irqaction *action; + int retval; + + if (irq >= NR_IRQS) + return -EINVAL; + if (!handler) + /* We could implement really free_irq() instead of that... */ + return do_free_irq(irq, dev_id); + + action = (struct irqaction *) + irq_kmalloc(sizeof(struct irqaction), GFP_KERNEL); + if (!action) { + printk(KERN_ERR "irq_kmalloc() failed for irq %d !\n", irq); + return -ENOMEM; + } + + action->handler = handler; + action->flags = irqflags; + action->mask = 0; + action->name = devname; + action->dev_id = dev_id; + action->next = NULL; + + retval = setup_irq(irq, action); + if (retval) + kfree(action); + + return 0; +} + +void free_irq(unsigned int irq, void *dev_id) +{ + request_irq(irq, NULL, 0, NULL, dev_id); +} + +/* + * Generic enable/disable code: this just calls + * down into the PIC-specific version for the actual + * hardware disable after having gotten the irq + * controller lock. + */ + +/** + * disable_irq_nosync - disable an irq without waiting + * @irq: Interrupt to disable + * + * Disable the selected interrupt line. Disables of an interrupt + * stack. Unlike disable_irq(), this function does not ensure existing + * instances of the IRQ handler have completed before returning. + * + * This function may be called from IRQ context. + */ + + void disable_irq_nosync(unsigned int irq) +{ + irq_desc_t *desc = irq_desc + irq; + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + if (!desc->depth++) { + if (!(desc->status & IRQ_PER_CPU)) + desc->status |= IRQ_DISABLED; + mask_irq(irq); + } + spin_unlock_irqrestore(&desc->lock, flags); +} + +/** + * disable_irq - disable an irq and wait for completion + * @irq: Interrupt to disable + * + * Disable the selected interrupt line. Disables of an interrupt + * stack. That is for two disables you need two enables. This + * function waits for any pending IRQ handlers for this interrupt + * to complete before returning. If you use this function while + * holding a resource the IRQ handler may need you will deadlock. + * + * This function may be called - with care - from IRQ context. + */ + +void disable_irq(unsigned int irq) +{ + disable_irq_nosync(irq); + + if (!local_irq_count(smp_processor_id())) { + do { + barrier(); + } while (irq_desc[irq].status & IRQ_INPROGRESS); + } +} + +/** + * enable_irq - enable interrupt handling on an irq + * @irq: Interrupt to enable + * + * Re-enables the processing of interrupts on this IRQ line + * providing no disable_irq calls are now in effect. + * + * This function may be called from IRQ context. + */ + +void enable_irq(unsigned int irq) +{ + irq_desc_t *desc = irq_desc + irq; + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + switch (desc->depth) { + case 1: { + unsigned int status = desc->status & ~IRQ_DISABLED; + desc->status = status; + if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { + desc->status = status | IRQ_REPLAY; + hw_resend_irq(desc->handler,irq); + } + unmask_irq(irq); + /* fall-through */ + } + default: + desc->depth--; + break; + case 0: + printk("enable_irq(%u) unbalanced\n", irq); + } + spin_unlock_irqrestore(&desc->lock, flags); +} + +int get_irq_list(char *buf) +{ + int i, len = 0, j; + struct irqaction * action; + + len += sprintf(buf+len, " "); + for (j=0; jhandler ) + continue; + len += sprintf(buf+len, "%3d: ", i); +#ifdef CONFIG_SMP + for (j = 0; j < smp_num_cpus; j++) + len += sprintf(buf+len, "%10u ", + kstat.irqs[cpu_logical_map(j)][i]); +#else + len += sprintf(buf+len, "%10u ", kstat_irqs(i)); +#endif /* CONFIG_SMP */ + if ( irq_desc[i].handler ) + len += sprintf(buf+len, " %s ", irq_desc[i].handler->typename ); + else + len += sprintf(buf+len, " None "); + len += sprintf(buf+len, "%s", (irq_desc[i].status & IRQ_LEVEL) ? "Level " : "Edge "); + len += sprintf(buf+len, " %s",action->name); + for (action=action->next; action; action = action->next) { + len += sprintf(buf+len, ", %s", action->name); + } + len += sprintf(buf+len, "\n"); + } +#ifdef CONFIG_SMP + /* should this be per processor send/receive? */ + len += sprintf(buf+len, "IPI (recv/sent): %10u/%u\n", + atomic_read(&ipi_recv), atomic_read(&ipi_sent)); +#endif + len += sprintf(buf+len, "BAD: %10u\n", ppc_spurious_interrupts); + return len; +} + +static inline void +handle_irq_event(int irq, struct pt_regs *regs, struct irqaction *action) +{ + int status = 0; + + if (!(action->flags & SA_INTERRUPT)) + __sti(); + + do { + status |= action->flags; + action->handler(irq, action->dev_id, regs); + action = action->next; + } while (action); + if (status & SA_SAMPLE_RANDOM) + add_interrupt_randomness(irq); + __cli(); +} + +/* + * Eventually, this should take an array of interrupts and an array size + * so it can dispatch multiple interrupts. + */ +void ppc_irq_dispatch_handler(struct pt_regs *regs, int irq) +{ + int status; + struct irqaction *action; + int cpu = smp_processor_id(); + irq_desc_t *desc = irq_desc + irq; + + kstat.irqs[cpu][irq]++; + spin_lock(&desc->lock); + ack_irq(irq); + /* + REPLAY is when Linux resends an IRQ that was dropped earlier + WAITING is used by probe to mark irqs that are being tested + */ + status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING); + if (!(status & IRQ_PER_CPU)) + status |= IRQ_PENDING; /* we _want_ to handle it */ + + /* + * If the IRQ is disabled for whatever reason, we cannot + * use the action we have. + */ + action = NULL; + if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) { + action = desc->action; + if (!action || !action->handler) { + ppc_spurious_interrupts++; + printk(KERN_DEBUG "Unhandled interrupt %x, disabled\n", irq); + /* We can't call disable_irq here, it would deadlock */ + if (!desc->depth) + desc->depth = 1; + desc->status |= IRQ_DISABLED; + /* This is not a real spurrious interrupt, we + * have to eoi it, so we jump to out + */ + mask_irq(irq); + goto out; + } + status &= ~IRQ_PENDING; /* we commit to handling */ + if (!(status & IRQ_PER_CPU)) + status |= IRQ_INPROGRESS; /* we are handling it */ + } + desc->status = status; + + /* + * If there is no IRQ handler or it was disabled, exit early. + Since we set PENDING, if another processor is handling + a different instance of this same irq, the other processor + will take care of it. + */ + if (!action) + goto out; + + + /* + * Edge triggered interrupts need to remember + * pending events. + * This applies to any hw interrupts that allow a second + * instance of the same irq to arrive while we are in do_IRQ + * or in the handler. But the code here only handles the _second_ + * instance of the irq, not the third or fourth. So it is mostly + * useful for irq hardware that does not mask cleanly in an + * SMP environment. + */ + for (;;) { + spin_unlock(&desc->lock); + handle_irq_event(irq, regs, action); + spin_lock(&desc->lock); + + if (!(desc->status & IRQ_PENDING)) + break; + desc->status &= ~IRQ_PENDING; + } + desc->status &= ~IRQ_INPROGRESS; +out: + /* + * The ->end() handler has to deal with interrupts which got + * disabled while the handler was running. + */ + if (irq_desc[irq].handler) { + if (irq_desc[irq].handler->end) + irq_desc[irq].handler->end(irq); + else if (irq_desc[irq].handler->enable) + irq_desc[irq].handler->enable(irq); + } + spin_unlock(&desc->lock); +} + +int do_IRQ(struct pt_regs *regs, int isfake) +{ + int cpu = smp_processor_id(); + int irq; + struct Paca * paca; + struct ItLpQueue * lpq; + + /* if(cpu) udbg_printf("Entering do_IRQ\n"); */ + + irq_enter(cpu); + + if ( _machine != _MACH_iSeries ) { + + /* every arch is required to have a get_irq -- Cort */ + irq = ppc_md.get_irq( regs ); + + if ( irq >= 0 ) { + ppc_irq_dispatch_handler( regs, irq ); + if (ppc_md.post_irq) + ppc_md.post_irq( regs, irq ); + } else { + /* -2 means ignore, already handled */ + if (irq != -2) { + printk(KERN_DEBUG "Bogus interrupt %d from PC = %lx\n", + irq, regs->nip); + ppc_spurious_interrupts++; + } + } + } + /* if on iSeries partition */ + else { + paca = (struct Paca *)mfspr(SPRG3); +#ifdef CONFIG_SMP + if ( paca->xLpPaca.xIntDword.xFields.xIpiCnt ) { + paca->xLpPaca.xIntDword.xFields.xIpiCnt = 0; + iSeries_smp_message_recv( regs ); + } +#endif /* CONFIG_SMP */ + lpq = paca->lpQueuePtr; + if ( lpq && ItLpQueue_isLpIntPending( lpq ) ) + lpEvent_count += ItLpQueue_process( lpq, regs ); + } + + irq_exit(cpu); + + if ( _machine == _MACH_iSeries ) { + if ( paca->xLpPaca.xIntDword.xFields.xDecrInt ) { + paca->xLpPaca.xIntDword.xFields.xDecrInt = 0; + /* Signal a fake decrementer interrupt */ + timer_interrupt( regs ); + } + } + + if (softirq_pending(cpu)) + do_softirq(); + + return 1; /* lets ret_from_int know we can do checks */ +} + +unsigned long probe_irq_on (void) +{ + return 0; +} + +int probe_irq_off (unsigned long irqs) +{ + return 0; +} + +unsigned int probe_irq_mask(unsigned long irqs) +{ + return 0; +} + +void __init init_IRQ(void) +{ + static int once = 0; + + if ( once ) + return; + else + once++; + + ppc_md.init_IRQ(); + if(ppc_md.init_ras_IRQ) ppc_md.init_ras_IRQ(); +} + +#ifdef CONFIG_SMP +unsigned char global_irq_holder = NO_PROC_ID; + +static void show(char * str) +{ + int cpu = smp_processor_id(); + int i; + + printk("\n%s, CPU %d:\n", str, cpu); + printk("irq: %d [ ", irqs_running()); + for (i = 0; i < smp_num_cpus; i++) + printk("%u ", __brlock_array[i][BR_GLOBALIRQ_LOCK]); + printk("]\nbh: %d [ ", + (spin_is_locked(&global_bh_lock) ? 1 : 0)); + for (i = 0; i < smp_num_cpus; i++) + printk("%u ", local_bh_count(i)); + printk("]\n"); +} + +#define MAXCOUNT 10000000 + +void synchronize_irq(void) +{ + if (irqs_running()) { + cli(); + sti(); + } +} + +static inline void get_irqlock(int cpu) +{ + int count; + + if ((unsigned char)cpu == global_irq_holder) + return; + + count = MAXCOUNT; +again: + br_write_lock(BR_GLOBALIRQ_LOCK); + for (;;) { + spinlock_t *lock; + + if (!irqs_running() && + (local_bh_count(smp_processor_id()) || !spin_is_locked(&global_bh_lock))) + break; + + br_write_unlock(BR_GLOBALIRQ_LOCK); + lock = &__br_write_locks[BR_GLOBALIRQ_LOCK].lock; + while (irqs_running() || + spin_is_locked(lock) || + (!local_bh_count(smp_processor_id()) && spin_is_locked(&global_bh_lock))) { + if (!--count) { + show("get_irqlock"); + count = (~0 >> 1); + } + __sti(); + barrier(); + __cli(); + } + goto again; + } + + global_irq_holder = cpu; +} + +/* + * A global "cli()" while in an interrupt context + * turns into just a local cli(). Interrupts + * should use spinlocks for the (very unlikely) + * case that they ever want to protect against + * each other. + * + * If we already have local interrupts disabled, + * this will not turn a local disable into a + * global one (problems with spinlocks: this makes + * save_flags+cli+sti usable inside a spinlock). + */ +void __global_cli(void) +{ + unsigned long flags; + + __save_flags(flags); + if (flags & (1UL << 15)) { + int cpu = smp_processor_id(); + __cli(); + if (!local_irq_count(cpu)) + get_irqlock(cpu); + } +} + +void __global_sti(void) +{ + int cpu = smp_processor_id(); + + if (!local_irq_count(cpu)) + release_irqlock(cpu); + __sti(); +} + +/* + * SMP flags value to restore to: + * 0 - global cli + * 1 - global sti + * 2 - local cli + * 3 - local sti + */ +unsigned long __global_save_flags(void) +{ + int retval; + int local_enabled; + unsigned long flags; + + __save_flags(flags); + local_enabled = (flags >> 15) & 1; + /* default to local */ + retval = 2 + local_enabled; + + /* check for global flags if we're not in an interrupt */ + if (!local_irq_count(smp_processor_id())) { + if (local_enabled) + retval = 1; + if (global_irq_holder == (unsigned char) smp_processor_id()) + retval = 0; + } + return retval; +} + +void __global_restore_flags(unsigned long flags) +{ + switch (flags) { + case 0: + __global_cli(); + break; + case 1: + __global_sti(); + break; + case 2: + __cli(); + break; + case 3: + __sti(); + break; + default: + printk("global_restore_flags: %016lx caller %p\n", + flags, __builtin_return_address(0)); + } +} + +#endif /* CONFIG_SMP */ + +static struct proc_dir_entry * root_irq_dir; +static struct proc_dir_entry * irq_dir [NR_IRQS]; +static struct proc_dir_entry * smp_affinity_entry [NR_IRQS]; + +#ifdef CONFIG_IRQ_ALL_CPUS +unsigned int irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = 0xffffffff}; +#else /* CONFIG_IRQ_ALL_CPUS */ +unsigned int irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = 0x00000000}; +#endif /* CONFIG_IRQ_ALL_CPUS */ + +#define HEX_DIGITS 8 + +static int irq_affinity_read_proc (char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + if (count < HEX_DIGITS+1) + return -EINVAL; + return sprintf (page, "%08x\n", irq_affinity[(int)(long)data]); +} + +static unsigned int parse_hex_value (const char *buffer, + unsigned long count, unsigned long *ret) +{ + unsigned char hexnum [HEX_DIGITS]; + unsigned long value; + int i; + + if (!count) + return -EINVAL; + if (count > HEX_DIGITS) + count = HEX_DIGITS; + if (copy_from_user(hexnum, buffer, count)) + return -EFAULT; + + /* + * Parse the first 8 characters as a hex string, any non-hex char + * is end-of-string. '00e1', 'e1', '00E1', 'E1' are all the same. + */ + value = 0; + + for (i = 0; i < count; i++) { + unsigned int c = hexnum[i]; + + switch (c) { + case '0' ... '9': c -= '0'; break; + case 'a' ... 'f': c -= 'a'-10; break; + case 'A' ... 'F': c -= 'A'-10; break; + default: + goto out; + } + value = (value << 4) | c; + } +out: + *ret = value; + return 0; +} + +static int irq_affinity_write_proc (struct file *file, const char *buffer, + unsigned long count, void *data) +{ + int irq = (int)(long) data, full_count = count, err; + unsigned long new_value; + + if (!irq_desc[irq].handler->set_affinity) + return -EIO; + + err = parse_hex_value(buffer, count, &new_value); + +/* Why is this disabled ? --BenH */ +#if 0/*CONFIG_SMP*/ + /* + * Do not allow disabling IRQs completely - it's a too easy + * way to make the system unusable accidentally :-) At least + * one online CPU still has to be targeted. + */ + if (!(new_value & cpu_online_map)) + return -EINVAL; +#endif + + irq_affinity[irq] = new_value; + irq_desc[irq].handler->set_affinity(irq, new_value); + + return full_count; +} + +static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + unsigned long *mask = (unsigned long *) data; + if (count < HEX_DIGITS+1) + return -EINVAL; + return sprintf (page, "%08lx\n", *mask); +} + +static int prof_cpu_mask_write_proc (struct file *file, const char *buffer, + unsigned long count, void *data) +{ + unsigned long *mask = (unsigned long *) data, full_count = count, err; + unsigned long new_value; + + err = parse_hex_value(buffer, count, &new_value); + if (err) + return err; + + *mask = new_value; + +#ifdef CONFIG_PPC_ISERIES + { + unsigned i; + for (i=0; i>= 1; + } + } +#endif + + return full_count; +} + +#define MAX_NAMELEN 10 + +static void register_irq_proc (unsigned int irq) +{ + struct proc_dir_entry *entry; + char name [MAX_NAMELEN]; + + if (!root_irq_dir || (irq_desc[irq].handler == NULL)) + return; + + memset(name, 0, MAX_NAMELEN); + sprintf(name, "%d", irq); + + /* create /proc/irq/1234 */ + irq_dir[irq] = proc_mkdir(name, root_irq_dir); + + /* create /proc/irq/1234/smp_affinity */ + entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]); + + entry->nlink = 1; + entry->data = (void *)(long)irq; + entry->read_proc = irq_affinity_read_proc; + entry->write_proc = irq_affinity_write_proc; + + smp_affinity_entry[irq] = entry; +} + +unsigned long prof_cpu_mask = -1; + +void init_irq_proc (void) +{ + struct proc_dir_entry *entry; + int i; + + /* create /proc/irq */ + root_irq_dir = proc_mkdir("irq", 0); + + /* create /proc/irq/prof_cpu_mask */ + entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir); + + entry->nlink = 1; + entry->data = (void *)&prof_cpu_mask; + entry->read_proc = prof_cpu_mask_read_proc; + entry->write_proc = prof_cpu_mask_write_proc; + + /* + * Create entries for all existing IRQs. + */ + for (i = 0; i < NR_IRQS; i++) { + if (irq_desc[i].handler == NULL) + continue; + register_irq_proc(i); + } +} + +void no_action(int irq, void *dev, struct pt_regs *regs) +{ +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/lmb.c linuxppc64_2_4/arch/ppc64/kernel/lmb.c --- ../kernel.org/linux/arch/ppc64/kernel/lmb.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/lmb.c Sat Oct 20 06:53:25 2001 @@ -0,0 +1,343 @@ +/* + * + * Procedures for interfacing to Open Firmware. + * + * Peter Bergner, IBM Corp. June 2001. + * Copyright (C) 2001 Peter Bergner. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include + +extern unsigned long klimit; +extern unsigned long reloc_offset(void); + + +static long lmb_add_region(struct lmb_region *, unsigned long, unsigned long, unsigned long); + +struct lmb lmb = { + 0, + {0,0,0,{{0,0,0}}}, + {0,0,0,{{0,0,0}}} +}; + + +/* Assumption: base addr of region 1 < base addr of region 2 */ +static void +lmb_coalesce_regions(struct lmb_region *rgn, unsigned long r1, unsigned long r2) +{ + unsigned long i; + + rgn->region[r1].size += rgn->region[r2].size; + for (i=r2; i < rgn->cnt-1 ;i++) { + rgn->region[i].base = rgn->region[i+1].base; + rgn->region[i].physbase = rgn->region[i+1].physbase; + rgn->region[i].size = rgn->region[i+1].size; + rgn->region[i].type = rgn->region[i+1].type; + } + rgn->cnt--; +} + + +/* This routine called with relocation disabled. */ +void +lmb_init(void) +{ + unsigned long offset = reloc_offset(); + struct lmb *_lmb = PTRRELOC(&lmb); + + /* Create a dummy zero size LMB which will get coalesced away later. + * This simplifies the lmb_add() code below... + */ + _lmb->memory.region[0].base = 0; + _lmb->memory.region[0].size = 0; + _lmb->memory.region[0].type = LMB_MEMORY_AREA; + _lmb->memory.cnt = 1; + + /* Ditto. */ + _lmb->reserved.region[0].base = 0; + _lmb->reserved.region[0].size = 0; + _lmb->reserved.region[0].type = LMB_MEMORY_AREA; + _lmb->reserved.cnt = 1; +} + +/* This routine called with relocation disabled. */ +void +lmb_analyze(void) +{ + unsigned long i, physbase = 0; + unsigned long total_size = 0; + unsigned long size_mask = 0; + unsigned long offset = reloc_offset(); + struct lmb *_lmb = PTRRELOC(&lmb); + + for (i=0; i < _lmb->memory.cnt ;i++) { + unsigned long lmb_size = _lmb->memory.region[i].size; +#ifdef CONFIG_MSCHUNKS + _lmb->memory.region[i].physbase = physbase; + physbase += lmb_size; +#else + _lmb->memory.region[i].physbase = _lmb->memory.region[i].base; +#endif + total_size += lmb_size; + size_mask |= lmb_size; + } + _lmb->memory.size = total_size; + _lmb->memory.lcd_size = (1UL << cnt_trailing_zeros(size_mask)); +} + +/* This routine called with relocation disabled. */ +long +lmb_add(unsigned long base, unsigned long size) +{ + unsigned long offset = reloc_offset(); + struct lmb *_lmb = PTRRELOC(&lmb); + struct lmb_region *_rgn = &(_lmb->memory); + + return lmb_add_region(_rgn, base, size, LMB_MEMORY_AREA); + +} + +/* This routine called with relocation disabled. */ +long +lmb_add_io(unsigned long base, unsigned long size) +{ + unsigned long offset = reloc_offset(); + struct lmb *_lmb = PTRRELOC(&lmb); + struct lmb_region *_rgn = &(_lmb->memory); + + return lmb_add_region(_rgn, base, size, LMB_IO_AREA); + +} + +long +lmb_reserve(unsigned long base, unsigned long size) +{ + unsigned long offset = reloc_offset(); + struct lmb *_lmb = PTRRELOC(&lmb); + struct lmb_region *_rgn = &(_lmb->reserved); + + return lmb_add_region(_rgn, base, size, LMB_MEMORY_AREA); +} + +/* This routine called with relocation disabled. */ +static long +lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned long size, + unsigned long type) +{ + unsigned long i, coalesced = 0; + long adjacent; + + /* First try and coalesce this LMB with another. */ + for (i=0; i < rgn->cnt ;i++) { + unsigned long rgnbase = rgn->region[i].base; + unsigned long rgnsize = rgn->region[i].size; + unsigned long rgntype = rgn->region[i].type; + + if ( rgntype != type ) + continue; + + adjacent = lmb_addrs_adjacent(base,size,rgnbase,rgnsize); + if ( adjacent > 0 ) { + rgn->region[i].base -= size; + rgn->region[i].physbase -= size; + rgn->region[i].size += size; + coalesced++; + break; + } + else if ( adjacent < 0 ) { + rgn->region[i].size += size; + coalesced++; + break; + } + } + + if ((i < rgn->cnt-1) && lmb_regions_adjacent(rgn, i, i+1) ) { + lmb_coalesce_regions(rgn, i, i+1); + coalesced++; + } + + if ( coalesced ) { + return coalesced; + } else if ( rgn->cnt >= MAX_LMB_REGIONS ) { + return -1; + } + + /* Couldn't coalesce the LMB, so add it to the sorted table. */ + for (i=rgn->cnt-1; i >= 0 ;i--) { + if (base < rgn->region[i].base) { + rgn->region[i+1].base = rgn->region[i].base; + rgn->region[i+1].physbase = rgn->region[i].physbase; + rgn->region[i+1].size = rgn->region[i].size; + rgn->region[i+1].type = rgn->region[i].type; + } else { + rgn->region[i+1].base = base; + rgn->region[i+1].physbase = lmb_abs_to_phys(base); + rgn->region[i+1].size = size; + rgn->region[i+1].type = type; + break; + } + } + rgn->cnt++; + + return 0; +} + +long +lmb_overlaps_region(struct lmb_region *rgn, unsigned long base, unsigned long size) +{ + unsigned long i; + + for (i=0; i < rgn->cnt ;i++) { + unsigned long rgnbase = rgn->region[i].base; + unsigned long rgnsize = rgn->region[i].size; + if ( lmb_addrs_overlap(base,size,rgnbase,rgnsize) ) { + break; + } + } + + return (i < rgn->cnt) ? i : -1; +} + + +unsigned long +lmb_alloc(unsigned long size, unsigned long align) +{ + long i, j; + unsigned long base; + unsigned long offset = reloc_offset(); + struct lmb *_lmb = PTRRELOC(&lmb); + struct lmb_region *_mem = &(_lmb->memory); + struct lmb_region *_rsv = &(_lmb->reserved); + + for (i=_mem->cnt-1; i >= 0 ;i--) { + unsigned long lmbbase = _mem->region[i].base; + unsigned long lmbsize = _mem->region[i].size; + unsigned long lmbtype = _mem->region[i].type; + + if ( lmbtype != LMB_MEMORY_AREA ) + continue; + + base = _ALIGN_DOWN(lmbbase+lmbsize-size, align); + + while ( (lmbbase <= base) && + ((j = lmb_overlaps_region(_rsv,base,size)) >= 0) ) { + base = _ALIGN_DOWN(_rsv->region[j].base-size, align); + } + + if ( (base != 0) && (lmbbase <= base) ) + break; + } + + if ( i < 0 ) + return 0; + + lmb_add_region(_rsv, base, size, LMB_MEMORY_AREA); + + return base; +} + +unsigned long +lmb_phys_mem_size(void) +{ + unsigned long offset = reloc_offset(); + struct lmb *_lmb = PTRRELOC(&lmb); + struct lmb_region *_mem = &(_lmb->memory); + unsigned long idx = _mem->cnt-1; + unsigned long lastbase = _mem->region[idx].physbase; + unsigned long lastsize = _mem->region[idx].size; + + return (lastbase + lastsize); +} + +unsigned long +lmb_end_of_DRAM(void) +{ + unsigned long offset = reloc_offset(); + struct lmb *_lmb = PTRRELOC(&lmb); + struct lmb_region *_mem = &(_lmb->memory); + unsigned long idx = _mem->cnt-1; +#ifdef CONFIG_MSCHUNKS + unsigned long lastbase = _mem->region[idx].physbase; +#else + unsigned long lastbase = _mem->region[idx].base; +#endif /* CONFIG_MSCHUNKS */ + unsigned long lastsize = _mem->region[idx].size; + + return (lastbase + lastsize); +} + + +unsigned long +lmb_abs_to_phys(unsigned long aa) +{ + unsigned long i, pa = 0; + unsigned long offset = reloc_offset(); + struct lmb *_lmb = PTRRELOC(&lmb); + struct lmb_region *_mem = &(_lmb->memory); + + for (i=0; i < _mem->cnt ;i++) { + unsigned long lmbbase = _mem->region[i].base; + unsigned long lmbsize = _mem->region[i].size; + if ( lmb_addrs_overlap(aa,1,lmbbase,lmbsize) ) { + pa = _mem->region[i].physbase + (aa - lmbbase); + break; + } + } + + return pa; +} + +void +lmb_dump(char *str) +{ + unsigned long i; + + udbg_printf("\nlmb_dump: %s\n", str); + udbg_printf(" debug = %s\n", + (lmb.debug) ? "TRUE" : "FALSE"); + udbg_printf(" memory.cnt = %d\n", + lmb.memory.cnt); + udbg_printf(" memory.size = 0x%lx\n", + lmb.memory.size); + udbg_printf(" memory.lcd_size = 0x%lx\n", + lmb.memory.lcd_size); + for (i=0; i < lmb.memory.cnt ;i++) { + udbg_printf(" memory.region[%d].base = 0x%lx\n", + i, lmb.memory.region[i].base); + udbg_printf(" .physbase = 0x%lx\n", + lmb.memory.region[i].physbase); + udbg_printf(" .size = 0x%lx\n", + lmb.memory.region[i].size); + udbg_printf(" .type = 0x%lx\n", + lmb.memory.region[i].type); + } + + udbg_printf("\n"); + udbg_printf(" reserved.cnt = %d\n", + lmb.reserved.cnt); + udbg_printf(" reserved.size = 0x%lx\n", + lmb.reserved.size); + udbg_printf(" reserved.lcd_size = 0x%lx\n", + lmb.reserved.lcd_size); + for (i=0; i < lmb.reserved.cnt ;i++) { + udbg_printf(" reserved.region[%d].base = 0x%lx\n", + i, lmb.reserved.region[i].base); + udbg_printf(" .physbase = 0x%lx\n", + lmb.reserved.region[i].physbase); + udbg_printf(" .size = 0x%lx\n", + lmb.reserved.region[i].size); + udbg_printf(" .type = 0x%lx\n", + lmb.reserved.region[i].type); + } +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/local_irq.h linuxppc64_2_4/arch/ppc64/kernel/local_irq.h --- ../kernel.org/linux/arch/ppc64/kernel/local_irq.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/local_irq.h Sat Sep 15 22:35:39 2001 @@ -0,0 +1,26 @@ +/* + * c 2001 PowerPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _PPC_KERNEL_LOCAL_IRQ_H +#define _PPC_KERNEL_LOCAL_IRQ_H + +#include +#include +#include +#include +#include + +void ppc_irq_dispatch_handler(struct pt_regs *regs, int irq); + +#define NR_MASK_WORDS ((NR_IRQS + 63) / 64) + +extern int ppc_spurious_interrupts; +extern int ppc_second_irq; +extern struct irqaction *ppc_irq_action[NR_IRQS]; + +#endif /* _PPC_KERNEL_LOCAL_IRQ_H */ diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/mf.c linuxppc64_2_4/arch/ppc64/kernel/mf.c --- ../kernel.org/linux/arch/ppc64/kernel/mf.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/mf.c Sat Oct 20 06:53:25 2001 @@ -0,0 +1,1202 @@ +/* + * mf.c + * Copyright (C) 2001 Troy D. Armstrong IBM Corporation + * + * This modules exists as an interface between a Linux secondary partition + * running on an iSeries and the primary partition's Virtual Service + * Processor (VSP) object. The VSP has final authority over powering on/off + * all partitions in the iSeries. It also provides miscellaneous low-level + * machine facility type operations. + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern struct pci_dev * iSeries_vio_dev; + +/* + * This is the structure layout for the Machine Facilites LPAR event + * flows. + */ +struct VspCmdData; +struct CeMsgData; +union SafeCast +{ + u64 ptrAsU64; + void *ptr; +}; + + +typedef void (*CeMsgCompleteHandler)( void *token, struct CeMsgData *vspCmdRsp ); + +struct CeMsgCompleteData +{ + CeMsgCompleteHandler xHdlr; + void *xToken; +}; + +struct VspRspData +{ + struct semaphore *xSemaphore; + struct VspCmdData *xResponse; +}; + +struct IoMFLpEvent +{ + struct HvLpEvent xHvLpEvent; + + u16 xSubtypeRc; + u16 xRsvd1; + u32 xRsvd2; + + union + { + + struct AllocData + { + u16 xSize; + u16 xType; + u32 xCount; + u16 xRsvd3; + u8 xRsvd4; + HvLpIndex xTargetLp; + } xAllocData; + + struct CeMsgData + { + u8 xCEMsg[12]; + char xReserved[4]; + struct CeMsgCompleteData *xToken; + } xCEMsgData; + + struct VspCmdData + { + union SafeCast xTokenUnion; + u16 xCmd; + HvLpIndex xLpIndex; + u8 xRc; + u32 xReserved1; + + union VspCmdSubData + { + struct + { + u64 xState; + } xGetStateOut; + + struct + { + u64 xIplType; + } xGetIplTypeOut, xFunction02SelectIplTypeIn; + + struct + { + u64 xIplMode; + } xGetIplModeOut, xFunction02SelectIplModeIn; + + struct + { + u64 xPage[4]; + } xGetSrcHistoryIn; + + struct + { + u64 xFlag; + } xGetAutoIplWhenPrimaryIplsOut, + xSetAutoIplWhenPrimaryIplsIn, + xWhiteButtonPowerOffIn, + xFunction08FastPowerOffIn, + xIsSpcnRackPowerIncompleteOut; + + struct + { + u64 xToken; + u64 xAddressType; + u64 xSide; + u32 xTransferLength; + u32 xOffset; + } xSetKernelImageIn, + xGetKernelImageIn, + xSetKernelCmdLineIn, + xGetKernelCmdLineIn; + + struct + { + u32 xTransferLength; + } xGetKernelImageOut,xGetKernelCmdLineOut; + + + u8 xReserved2[80]; + + } xSubData; + } xVspCmd; + } xUnion; +}; + + +/* + * All outgoing event traffic is kept on a FIFO queue. The first + * pointer points to the one that is outstanding, and all new + * requests get stuck on the end. Also, we keep a certain number of + * preallocated stack elements so that we can operate very early in + * the boot up sequence (before kmalloc is ready). + */ +struct StackElement +{ + struct StackElement * next; + struct IoMFLpEvent event; + MFCompleteHandler hdlr; + char dmaData[72]; + unsigned dmaDataLength; + unsigned remoteAddress; +}; +static spinlock_t spinlock; +static struct StackElement * head = NULL; +static struct StackElement * tail = NULL; +static struct StackElement * avail = NULL; +static struct StackElement prealloc[16]; + +/* + * Put a stack element onto the available queue, so it can get reused. + * Attention! You must have the spinlock before calling! + */ +void free( struct StackElement * element ) +{ + if ( element != NULL ) + { + element->next = avail; + avail = element; + } +} + +/* + * Enqueue the outbound event onto the stack. If the queue was + * empty to begin with, we must also issue it via the Hypervisor + * interface. There is a section of code below that will touch + * the first stack pointer without the protection of the spinlock. + * This is OK, because we know that nobody else will be modifying + * the first pointer when we do this. + */ +static int signalEvent( struct StackElement * newElement ) +{ + int rc = 0; + unsigned long flags; + int go = 1; + struct StackElement * element; + HvLpEvent_Rc hvRc; + + /* enqueue the event */ + if ( newElement != NULL ) + { + spin_lock_irqsave( &spinlock, flags ); + if ( head == NULL ) + head = newElement; + else { + go = 0; + tail->next = newElement; + } + newElement->next = NULL; + tail = newElement; + spin_unlock_irqrestore( &spinlock, flags ); + } + + /* send the event */ + while ( go ) + { + go = 0; + + /* any DMA data to send beforehand? */ + if ( head->dmaDataLength > 0 ) + HvCallEvent_dmaToSp( head->dmaData, head->remoteAddress, head->dmaDataLength, HvLpDma_Direction_LocalToRemote ); + + hvRc = HvCallEvent_signalLpEvent(&head->event.xHvLpEvent); + if ( hvRc != HvLpEvent_Rc_Good ) + { + printk( KERN_ERR "mf.c: HvCallEvent_signalLpEvent() failed with %d\n", (int)hvRc ); + + spin_lock_irqsave( &spinlock, flags ); + element = head; + head = head->next; + if ( head != NULL ) + go = 1; + spin_unlock_irqrestore( &spinlock, flags ); + + if ( element == newElement ) + rc = -EIO; + else { + if ( element->hdlr != NULL ) + { + union SafeCast mySafeCast; + mySafeCast.ptrAsU64 = element->event.xHvLpEvent.xCorrelationToken; + (*element->hdlr)( mySafeCast.ptr, -EIO ); + } + } + + spin_lock_irqsave( &spinlock, flags ); + free( element ); + spin_unlock_irqrestore( &spinlock, flags ); + } + } + + return rc; +} + +/* + * Allocate a new StackElement structure, and initialize it. + */ +static struct StackElement * newStackElement( void ) +{ + struct StackElement * newElement = NULL; + HvLpIndex primaryLp = HvLpConfig_getPrimaryLpIndex(); + unsigned long flags; + + if ( newElement == NULL ) + { + spin_lock_irqsave( &spinlock, flags ); + if ( avail != NULL ) + { + newElement = avail; + avail = avail->next; + } + spin_unlock_irqrestore( &spinlock, flags ); + } + + if ( newElement == NULL ) + newElement = kmalloc(sizeof(struct StackElement),GFP_ATOMIC); + + if ( newElement == NULL ) + { + printk( KERN_ERR "mf.c: unable to kmalloc %ld bytes\n", sizeof(struct StackElement) ); + return NULL; + } + + memset( newElement, 0, sizeof(struct StackElement) ); + newElement->event.xHvLpEvent.xFlags.xValid = 1; + newElement->event.xHvLpEvent.xFlags.xAckType = HvLpEvent_AckType_ImmediateAck; + newElement->event.xHvLpEvent.xFlags.xAckInd = HvLpEvent_AckInd_DoAck; + newElement->event.xHvLpEvent.xFlags.xFunction = HvLpEvent_Function_Int; + newElement->event.xHvLpEvent.xType = HvLpEvent_Type_MachineFac; + newElement->event.xHvLpEvent.xSourceLp = HvLpConfig_getLpIndex(); + newElement->event.xHvLpEvent.xTargetLp = primaryLp; + newElement->event.xHvLpEvent.xSizeMinus1 = sizeof(newElement->event)-1; + newElement->event.xHvLpEvent.xRc = HvLpEvent_Rc_Good; + newElement->event.xHvLpEvent.xSourceInstanceId = HvCallEvent_getSourceLpInstanceId(primaryLp,HvLpEvent_Type_MachineFac); + newElement->event.xHvLpEvent.xTargetInstanceId = HvCallEvent_getTargetLpInstanceId(primaryLp,HvLpEvent_Type_MachineFac); + + return newElement; +} + +static int signalVspInstruction( struct VspCmdData *vspCmd ) +{ + struct StackElement * newElement = newStackElement(); + int rc = 0; + struct VspRspData response; + DECLARE_MUTEX_LOCKED(Semaphore); + response.xSemaphore = &Semaphore; + response.xResponse = vspCmd; + + if ( newElement == NULL ) + rc = -ENOMEM; + else { + newElement->event.xHvLpEvent.xSubtype = 6; + newElement->event.xHvLpEvent.x.xSubtypeData = ('M'<<24)+('F'<<16)+('V'<<8)+('I'<<0); + newElement->event.xUnion.xVspCmd.xTokenUnion.ptr = &response; + newElement->event.xUnion.xVspCmd.xCmd = vspCmd->xCmd; + newElement->event.xUnion.xVspCmd.xLpIndex = HvLpConfig_getLpIndex(); + newElement->event.xUnion.xVspCmd.xRc = 0xFF; + newElement->event.xUnion.xVspCmd.xReserved1 = 0; + memcpy(&(newElement->event.xUnion.xVspCmd.xSubData),&(vspCmd->xSubData), sizeof(vspCmd->xSubData)); + mb(); + + rc = signalEvent(newElement); + } + + if (rc == 0) + { + down(&Semaphore); + } + + return rc; +} + + +/* + * Send a 12-byte CE message to the primary partition VSP object + */ +static int signalCEMsg( char * ceMsg, void * token ) +{ + struct StackElement * newElement = newStackElement(); + int rc = 0; + + if ( newElement == NULL ) + rc = -ENOMEM; + else { + newElement->event.xHvLpEvent.xSubtype = 0; + newElement->event.xHvLpEvent.x.xSubtypeData = ('M'<<24)+('F'<<16)+('C'<<8)+('E'<<0); + memcpy( newElement->event.xUnion.xCEMsgData.xCEMsg, ceMsg, 12 ); + newElement->event.xUnion.xCEMsgData.xToken = token; + rc = signalEvent(newElement); + } + + return rc; +} + +/* + * Send a 12-byte CE message and DMA data to the primary partition VSP object + */ +static int dmaAndSignalCEMsg( char * ceMsg, void * token, void * dmaData, unsigned dmaDataLength, unsigned remoteAddress ) +{ + struct StackElement * newElement = newStackElement(); + int rc = 0; + + if ( newElement == NULL ) + rc = -ENOMEM; + else { + newElement->event.xHvLpEvent.xSubtype = 0; + newElement->event.xHvLpEvent.x.xSubtypeData = ('M'<<24)+('F'<<16)+('C'<<8)+('E'<<0); + memcpy( newElement->event.xUnion.xCEMsgData.xCEMsg, ceMsg, 12 ); + newElement->event.xUnion.xCEMsgData.xToken = token; + memcpy( newElement->dmaData, dmaData, dmaDataLength ); + newElement->dmaDataLength = dmaDataLength; + newElement->remoteAddress = remoteAddress; + rc = signalEvent(newElement); + } + + return rc; +} + +/* + * Initiate a nice (hopefully) shutdown of Linux. We simply are + * going to try and send the init process a SIGINT signal. If + * this fails (why?), we'll simply force it off in a not-so-nice + * manner. + */ +static int shutdown( void ) +{ + int rc = kill_proc(1,SIGINT,1); + + if ( rc ) + { + printk( KERN_ALERT "mf.c: SIGINT to init failed (%d), hard shutdown commencing\n", rc ); + mf_powerOff(); + } + else + printk( KERN_ALERT "mf.c: init has been successfully notified to proceed with shutdown\n" ); + + return rc; +} + +/* + * The primary partition VSP object is sending us a new + * event flow. Handle it... + */ +static void intReceived( struct IoMFLpEvent * event ) +{ + int freeIt = 0; + struct StackElement * two = NULL; + /* ack the interrupt */ + event->xHvLpEvent.xRc = HvLpEvent_Rc_Good; + HvCallEvent_ackLpEvent( &event->xHvLpEvent ); + + /* process interrupt */ + switch( event->xHvLpEvent.xSubtype ) + { + case 0: /* CE message */ + switch( event->xUnion.xCEMsgData.xCEMsg[3] ) + { + case 0x5B: /* power control notification */ + if ( (event->xUnion.xCEMsgData.xCEMsg[5]&0x20) != 0 ) + { + printk( KERN_ALERT "mf.c: Commencing partition shutdown\n" ); + if ( shutdown() == 0 ) + signalCEMsg( "\x00\x00\x00\xDB\x00\x00\x00\x00\x00\x00\x00\x00", NULL ); + } + break; + case 0xC0: /* get time */ + { + if ( (head != NULL) && ( head->event.xUnion.xCEMsgData.xCEMsg[3] == 0x40 ) ) + { + freeIt = 1; + if ( head->event.xUnion.xCEMsgData.xToken != 0 ) + { + CeMsgCompleteHandler xHdlr = head->event.xUnion.xCEMsgData.xToken->xHdlr; + void * token = head->event.xUnion.xCEMsgData.xToken->xToken; + + if (xHdlr != NULL) + (*xHdlr)( token, &(event->xUnion.xCEMsgData) ); + } + } + } + break; + } + + /* remove from queue */ + if ( freeIt == 1 ) + { + unsigned long flags; + spin_lock_irqsave( &spinlock, flags ); + if ( head != NULL ) + { + struct StackElement *oldHead = head; + head = head->next; + two = head; + free( oldHead ); + } + spin_unlock_irqrestore( &spinlock, flags ); + } + + /* send next waiting event */ + if ( two != NULL ) + signalEvent( NULL ); + break; + case 1: /* IT sys shutdown */ + printk( KERN_ALERT "mf.c: Commencing system shutdown\n" ); + shutdown(); + break; + } +} + +/* + * The primary partition VSP object is acknowledging the receipt + * of a flow we sent to them. If there are other flows queued + * up, we must send another one now... + */ +static void ackReceived( struct IoMFLpEvent * event ) +{ + unsigned long flags; + struct StackElement * two = NULL; + unsigned long freeIt = 0; + + /* handle current event */ + if ( head != NULL ) + { + switch( event->xHvLpEvent.xSubtype ) + { + case 0: /* CE msg */ + if ( event->xUnion.xCEMsgData.xCEMsg[3] == 0x40 ) + { + if ( event->xUnion.xCEMsgData.xCEMsg[2] != 0 ) + { + freeIt = 1; + if ( head->event.xUnion.xCEMsgData.xToken != 0 ) + { + CeMsgCompleteHandler xHdlr = head->event.xUnion.xCEMsgData.xToken->xHdlr; + void * token = head->event.xUnion.xCEMsgData.xToken->xToken; + + if (xHdlr != NULL) + (*xHdlr)( token, &(event->xUnion.xCEMsgData) ); + } + } + } else { + freeIt = 1; + } + break; + case 4: /* allocate */ + case 5: /* deallocate */ + if ( head->hdlr != NULL ) + { + union SafeCast mySafeCast; + mySafeCast.ptrAsU64 = event->xHvLpEvent.xCorrelationToken; + (*head->hdlr)( mySafeCast.ptr, event->xUnion.xAllocData.xCount ); + } + freeIt = 1; + break; + case 6: + { + struct VspRspData *rsp = (struct VspRspData *)event->xUnion.xVspCmd.xTokenUnion.ptr; + + if (rsp != NULL) + { + if (rsp->xResponse != NULL) + memcpy(rsp->xResponse, &(event->xUnion.xVspCmd), sizeof(event->xUnion.xVspCmd)); + if (rsp->xSemaphore != NULL) + up(rsp->xSemaphore); + } else { + printk( KERN_ERR "mf.c: no rsp\n"); + } + freeIt = 1; + } + break; + } + } + else + printk( KERN_ERR "mf.c: stack empty for receiving ack\n" ); + + /* remove from queue */ + spin_lock_irqsave( &spinlock, flags ); + if (( head != NULL ) && ( freeIt == 1 )) + { + struct StackElement *oldHead = head; + head = head->next; + two = head; + free( oldHead ); + } + spin_unlock_irqrestore( &spinlock, flags ); + + /* send next waiting event */ + if ( two != NULL ) + signalEvent( NULL ); +} + +/* + * This is the generic event handler we are registering with + * the Hypervisor. Ensure the flows are for us, and then + * parse it enough to know if it is an interrupt or an + * acknowledge. + */ +static void hvHandler( struct HvLpEvent * event, struct pt_regs * regs ) +{ + if ( (event != NULL) && (event->xType == HvLpEvent_Type_MachineFac) ) + { + switch( event->xFlags.xFunction ) + { + case HvLpEvent_Function_Ack: + ackReceived( (struct IoMFLpEvent *)event ); + break; + case HvLpEvent_Function_Int: + intReceived( (struct IoMFLpEvent *)event ); + break; + default: + printk( KERN_ERR "mf.c: non ack/int event received\n" ); + break; + } + } + else + printk( KERN_ERR "mf.c: alien event received\n" ); +} + +/* + * Global kernel interface to allocate and seed events into the + * Hypervisor. + */ +void mf_allocateLpEvents( HvLpIndex targetLp, + HvLpEvent_Type type, + unsigned size, + unsigned count, + MFCompleteHandler hdlr, + void * userToken ) +{ + struct StackElement * newElement = newStackElement(); + int rc = 0; + + if ( newElement == NULL ) + rc = -ENOMEM; + else { + union SafeCast mine; + mine.ptr = userToken; + newElement->event.xHvLpEvent.xSubtype = 4; + newElement->event.xHvLpEvent.xCorrelationToken = mine.ptrAsU64; + newElement->event.xHvLpEvent.x.xSubtypeData = ('M'<<24)+('F'<<16)+('M'<<8)+('A'<<0); + newElement->event.xUnion.xAllocData.xTargetLp = targetLp; + newElement->event.xUnion.xAllocData.xType = type; + newElement->event.xUnion.xAllocData.xSize = size; + newElement->event.xUnion.xAllocData.xCount = count; + newElement->hdlr = hdlr; + rc = signalEvent(newElement); + } + + if ( (rc != 0) && (hdlr != NULL) ) + (*hdlr)( userToken, rc ); +} + +/* + * Global kernel interface to unseed and deallocate events already in + * Hypervisor. + */ +void mf_deallocateLpEvents( HvLpIndex targetLp, + HvLpEvent_Type type, + unsigned count, + MFCompleteHandler hdlr, + void * userToken ) +{ + struct StackElement * newElement = newStackElement(); + int rc = 0; + + if ( newElement == NULL ) + rc = -ENOMEM; + else { + union SafeCast mine; + mine.ptr = userToken; + newElement->event.xHvLpEvent.xSubtype = 5; + newElement->event.xHvLpEvent.xCorrelationToken = mine.ptrAsU64; + newElement->event.xHvLpEvent.x.xSubtypeData = ('M'<<24)+('F'<<16)+('M'<<8)+('D'<<0); + newElement->event.xUnion.xAllocData.xTargetLp = targetLp; + newElement->event.xUnion.xAllocData.xType = type; + newElement->event.xUnion.xAllocData.xCount = count; + newElement->hdlr = hdlr; + rc = signalEvent(newElement); + } + + if ( (rc != 0) && (hdlr != NULL) ) + (*hdlr)( userToken, rc ); +} + +/* + * Global kernel interface to tell the VSP object in the primary + * partition to power this partition off. + */ +void mf_powerOff( void ) +{ + printk( KERN_ALERT "mf.c: Down it goes...\n" ); + signalCEMsg( "\x00\x00\x00\x4D\x00\x00\x00\x00\x00\x00\x00\x00", NULL ); + for (;;); +} + +/* + * Global kernel interface to tell the VSP object in the primary + * partition to reboot this partition. + */ +void mf_reboot( void ) +{ + printk( KERN_ALERT "mf.c: Preparing to bounce...\n" ); + signalCEMsg( "\x00\x00\x00\x4E\x00\x00\x00\x00\x00\x00\x00\x00", NULL ); + for (;;); +} + +/* + * Display a single word SRC onto the VSP control panel. + */ +void mf_displaySrc( u32 word ) +{ + u8 ce[12]; + + memcpy( ce, "\x00\x00\x00\x4A\x00\x00\x00\x01\x00\x00\x00\x00", 12 ); + ce[8] = word>>24; + ce[9] = word>>16; + ce[10] = word>>8; + ce[11] = word; + signalCEMsg( ce, NULL ); +} + +/* + * Display a single word SRC of the form "PROGXXXX" on the VSP control panel. + */ +void mf_displayProgress( u16 value ) +{ + u8 ce[12]; + u8 src[72]; + + memcpy( ce, "\x00\x00\x04\x4A\x00\x00\x00\x48\x00\x00\x00\x00", 12 ); + memcpy( src, + "\x01\x00\x00\x01" + "\x00\x00\x00\x00" + "\x00\x00\x00\x00" + "\x00\x00\x00\x00" + "\x00\x00\x00\x00" + "\x00\x00\x00\x00" + "\x00\x00\x00\x00" + "\x00\x00\x00\x00" + "\x00\x00\x00\x00" + "\x00\x00\x00\x00" + "PROGxxxx" + " ", + 72 ); + src[6] = value>>8; + src[7] = value&255; + src[44] = "0123456789ABCDEF"[(value>>12)&15]; + src[45] = "0123456789ABCDEF"[(value>>8)&15]; + src[46] = "0123456789ABCDEF"[(value>>4)&15]; + src[47] = "0123456789ABCDEF"[value&15]; + dmaAndSignalCEMsg( ce, NULL, src, sizeof(src), 9*64*1024 ); +} + +/* + * Clear the VSP control panel. Used to "erase" an SRC that was + * previously displayed. + */ +void mf_clearSrc( void ) +{ + signalCEMsg( "\x00\x00\x00\x4B\x00\x00\x00\x00\x00\x00\x00\x00", NULL ); +} + +/* + * Initialization code here. + */ +void mf_init( void ) +{ + int i; + + /* initialize */ + spin_lock_init( &spinlock ); + for ( i = 0; i < sizeof(prealloc)/sizeof(*prealloc); ++i ) + free( &prealloc[i] ); + HvLpEvent_registerHandler( HvLpEvent_Type_MachineFac, &hvHandler ); + + /* virtual continue ack */ + signalCEMsg( "\x00\x00\x00\x57\x00\x00\x00\x00\x00\x00\x00\x00", NULL ); + + /* initialization complete */ + printk( KERN_NOTICE "mf.c: iSeries Linux LPAR Machine Facilities initialized\n" ); + + iSeries_proc_callback(&mf_proc_init); +} + +void mf_setSide(char side) +{ + int rc = 0; + u64 newSide = 0; + struct VspCmdData myVspCmd; + + memset(&myVspCmd, 0, sizeof(myVspCmd)); + if (side == 'A') + newSide = 0; + else if (side == 'B') + newSide = 1; + else if (side == 'C') + newSide = 2; + else + newSide = 3; + + myVspCmd.xSubData.xFunction02SelectIplTypeIn.xIplType = newSide; + myVspCmd.xCmd = 10; + + rc = signalVspInstruction(&myVspCmd); +} + +char mf_getSide(void) +{ + char returnValue = ' '; + int rc = 0; + struct VspCmdData myVspCmd; + + memset(&myVspCmd, 0, sizeof(myVspCmd)); + myVspCmd.xCmd = 2; + myVspCmd.xSubData.xFunction02SelectIplTypeIn.xIplType = 0; + mb(); + rc = signalVspInstruction(&myVspCmd); + + if (rc != 0) + { + return returnValue; + } else { + if (myVspCmd.xRc == 0) + { + if (myVspCmd.xSubData.xGetIplTypeOut.xIplType == 0) + returnValue = 'A'; + else if (myVspCmd.xSubData.xGetIplTypeOut.xIplType == 1) + returnValue = 'B'; + else if (myVspCmd.xSubData.xGetIplTypeOut.xIplType == 2) + returnValue = 'C'; + else + returnValue = 'D'; + } + } + + return returnValue; +} + +void mf_getSrcHistory(char *buffer, int size) +{ + /* struct IplTypeReturnStuff returnStuff; + struct StackElement * newElement = newStackElement(); + int rc = 0; + char *pages[4]; + + pages[0] = kmalloc(4096, GFP_ATOMIC); + pages[1] = kmalloc(4096, GFP_ATOMIC); + pages[2] = kmalloc(4096, GFP_ATOMIC); + pages[3] = kmalloc(4096, GFP_ATOMIC); + if (( newElement == NULL ) || (pages[0] == NULL) || (pages[1] == NULL) || (pages[2] == NULL) || (pages[3] == NULL)) + rc = -ENOMEM; + else + { + returnStuff.xType = 0; + returnStuff.xRc = 0; + returnStuff.xDone = 0; + newElement->event.xHvLpEvent.xSubtype = 6; + newElement->event.xHvLpEvent.x.xSubtypeData = ('M'<<24)+('F'<<16)+('V'<<8)+('I'<<0); + newElement->event.xUnion.xVspCmd.xEvent = &returnStuff; + newElement->event.xUnion.xVspCmd.xCmd = 4; + newElement->event.xUnion.xVspCmd.xLpIndex = HvLpConfig_getLpIndex(); + newElement->event.xUnion.xVspCmd.xRc = 0xFF; + newElement->event.xUnion.xVspCmd.xReserved1 = 0; + newElement->event.xUnion.xVspCmd.xSubData.xGetSrcHistoryIn.xPage[0] = (0x8000000000000000ULL | virt_to_absolute((unsigned long)pages[0])); + newElement->event.xUnion.xVspCmd.xSubData.xGetSrcHistoryIn.xPage[1] = (0x8000000000000000ULL | virt_to_absolute((unsigned long)pages[1])); + newElement->event.xUnion.xVspCmd.xSubData.xGetSrcHistoryIn.xPage[2] = (0x8000000000000000ULL | virt_to_absolute((unsigned long)pages[2])); + newElement->event.xUnion.xVspCmd.xSubData.xGetSrcHistoryIn.xPage[3] = (0x8000000000000000ULL | virt_to_absolute((unsigned long)pages[3])); + mb(); + rc = signalEvent(newElement); + } + + if (rc != 0) + { + return; + } + else + { + while (returnStuff.xDone != 1) + { + udelay(10); + } + + if (returnStuff.xRc == 0) + { + memcpy(buffer, pages[0], size); + } + } + + kfree(pages[0]); + kfree(pages[1]); + kfree(pages[2]); + kfree(pages[3]);*/ +} + +void mf_setCmdLine(const char *cmdline, int size, u64 side) +{ + struct VspCmdData myVspCmd; + int rc = 0; + dma_addr_t dma_addr = 0; + char *page = pci_alloc_consistent(iSeries_vio_dev, size, &dma_addr); + + if (page == NULL) { + printk(KERN_ERR "mf.c: couldn't allocate memory to set command line\n"); + return; + } + + copy_from_user(page, cmdline, size); + + memset(&myVspCmd, 0, sizeof(myVspCmd)); + myVspCmd.xCmd = 31; + myVspCmd.xSubData.xSetKernelCmdLineIn.xToken = dma_addr; + myVspCmd.xSubData.xSetKernelCmdLineIn.xAddressType = HvLpDma_AddressType_TceIndex; + myVspCmd.xSubData.xSetKernelCmdLineIn.xSide = side; + myVspCmd.xSubData.xSetKernelCmdLineIn.xTransferLength = size; + mb(); + rc = signalVspInstruction(&myVspCmd); + + pci_free_consistent(iSeries_vio_dev, size, page, dma_addr); +} + +int mf_getCmdLine(char *cmdline, int *size, u64 side) +{ + struct VspCmdData myVspCmd; + int rc = 0; + int len = *size; + dma_addr_t dma_addr = pci_map_single(iSeries_vio_dev, cmdline, *size, PCI_DMA_FROMDEVICE); + + memset(cmdline, 0, *size); + memset(&myVspCmd, 0, sizeof(myVspCmd)); + myVspCmd.xCmd = 33; + myVspCmd.xSubData.xGetKernelCmdLineIn.xToken = dma_addr; + myVspCmd.xSubData.xGetKernelCmdLineIn.xAddressType = HvLpDma_AddressType_TceIndex; + myVspCmd.xSubData.xGetKernelCmdLineIn.xSide = side; + myVspCmd.xSubData.xGetKernelCmdLineIn.xTransferLength = *size; + mb(); + rc = signalVspInstruction(&myVspCmd); + + if ( ! rc ) { + + if (myVspCmd.xRc == 0) + { + len = myVspCmd.xSubData.xGetKernelCmdLineOut.xTransferLength; + } + /* else + { + memcpy(cmdline, "Bad cmdline", 11); + } + */ + } + + pci_unmap_single(iSeries_vio_dev, dma_addr, *size, PCI_DMA_FROMDEVICE); + + return len; +} + + +int mf_setVmlinuxChunk(const char *buffer, int size, int offset, u64 side) +{ + struct VspCmdData myVspCmd; + int rc = 0; + + dma_addr_t dma_addr = 0; + + char *page = pci_alloc_consistent(iSeries_vio_dev, size, &dma_addr); + + if (page == NULL) { + printk(KERN_ERR "mf.c: couldn't allocate memory to set vmlinux chunk\n"); + return -ENOMEM; + } + + copy_from_user(page, buffer, size); + memset(&myVspCmd, 0, sizeof(myVspCmd)); + + myVspCmd.xCmd = 30; + myVspCmd.xSubData.xGetKernelImageIn.xToken = dma_addr; + myVspCmd.xSubData.xGetKernelImageIn.xAddressType = HvLpDma_AddressType_TceIndex; + myVspCmd.xSubData.xGetKernelImageIn.xSide = side; + myVspCmd.xSubData.xGetKernelImageIn.xOffset = offset; + myVspCmd.xSubData.xGetKernelImageIn.xTransferLength = size; + mb(); + rc = signalVspInstruction(&myVspCmd); + + if (rc == 0) + { + if (myVspCmd.xRc == 0) + { + rc = 0; + } else { + rc = -ENOMEM; + } + } + + pci_free_consistent(iSeries_vio_dev, size, page, dma_addr); + + return rc; +} + +int mf_getVmlinuxChunk(char *buffer, int *size, int offset, u64 side) +{ + struct VspCmdData myVspCmd; + int rc = 0; + int len = *size; + + dma_addr_t dma_addr = pci_map_single(iSeries_vio_dev, buffer, *size, PCI_DMA_FROMDEVICE); + + memset(buffer, 0, len); + + memset(&myVspCmd, 0, sizeof(myVspCmd)); + myVspCmd.xCmd = 32; + myVspCmd.xSubData.xGetKernelImageIn.xToken = dma_addr; + myVspCmd.xSubData.xGetKernelImageIn.xAddressType = HvLpDma_AddressType_TceIndex; + myVspCmd.xSubData.xGetKernelImageIn.xSide = side; + myVspCmd.xSubData.xGetKernelImageIn.xOffset = offset; + myVspCmd.xSubData.xGetKernelImageIn.xTransferLength = len; + mb(); + rc = signalVspInstruction(&myVspCmd); + + if (rc == 0) + { + if (myVspCmd.xRc == 0) + { + *size = myVspCmd.xSubData.xGetKernelImageOut.xTransferLength; + } else { + rc = -ENOMEM; + } + } + + pci_unmap_single(iSeries_vio_dev, dma_addr, *size, PCI_DMA_FROMDEVICE); + + return rc; +} + +int mf_setRtcTime(unsigned long time) +{ + struct rtc_time tm; + + to_tm(time, &tm); + + return mf_setRtc( &tm ); +} + +struct RtcTimeData +{ + struct semaphore *xSemaphore; + struct CeMsgData xCeMsg; + int xRc; +}; + +void getRtcTimeComplete(void * token, struct CeMsgData *ceMsg) +{ + struct RtcTimeData *rtc = (struct RtcTimeData *)token; + + memcpy(&(rtc->xCeMsg), ceMsg, sizeof(rtc->xCeMsg)); + + rtc->xRc = 0; + up(rtc->xSemaphore); +} + +static unsigned long lastsec = 1; + +int mf_getRtcTime(unsigned long *time) +{ +/* unsigned long usec, tsec; */ + + u32 dataWord1 = *((u32 *)(&xSpCommArea.xBcdTimeAtIplStart)); + u32 dataWord2 = *(((u32 *)&(xSpCommArea.xBcdTimeAtIplStart)) + 1); + int year = 1970; + int year1 = ( dataWord1 >> 24 ) & 0x000000FF; + int year2 = ( dataWord1 >> 16 ) & 0x000000FF; + int sec = ( dataWord1 >> 8 ) & 0x000000FF; + int min = dataWord1 & 0x000000FF; + int hour = ( dataWord2 >> 24 ) & 0x000000FF; + int day = ( dataWord2 >> 8 ) & 0x000000FF; + int mon = dataWord2 & 0x000000FF; + + BCD_TO_BIN(sec); + BCD_TO_BIN(min); + BCD_TO_BIN(hour); + BCD_TO_BIN(day); + BCD_TO_BIN(mon); + BCD_TO_BIN(year1); + BCD_TO_BIN(year2); + year = year1 * 100 + year2; + + *time = mktime(year, mon, day, hour, min, sec); + + *time += ( jiffies / HZ ); + + /* Now THIS is a nasty hack! + * It ensures that the first two calls to mf_getRtcTime get different + * answers. That way the loop in init_time (time.c) will not think + * the clock is stuck. + */ + if ( lastsec ) { + *time -= lastsec; + --lastsec; + } + + return 0; + +} + +int mf_getRtc( struct rtc_time * tm ) +{ + + struct CeMsgCompleteData ceComplete; + struct RtcTimeData rtcData; + int rc = 0; + DECLARE_MUTEX_LOCKED(Semaphore); + + memset(&ceComplete, 0, sizeof(ceComplete)); + memset(&rtcData, 0, sizeof(rtcData)); + + rtcData.xSemaphore = &Semaphore; + + ceComplete.xHdlr = &getRtcTimeComplete; + ceComplete.xToken = (void *)&rtcData; + + rc = signalCEMsg( "\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00\x00\x00", &ceComplete ); + + if ( rc == 0 ) + { + down(&Semaphore); + + if ( rtcData.xRc == 0) + { + if ( ( rtcData.xCeMsg.xCEMsg[2] == 0xa9 ) || + ( rtcData.xCeMsg.xCEMsg[2] == 0xaf ) ) { + /* TOD clock is not set */ + tm->tm_sec = 1; + tm->tm_min = 1; + tm->tm_hour = 1; + tm->tm_mday = 10; + tm->tm_mon = 8; + tm->tm_year = 71; + mf_setRtc( tm ); + } + { + u32 dataWord1 = *((u32 *)(rtcData.xCeMsg.xCEMsg+4)); + u32 dataWord2 = *((u32 *)(rtcData.xCeMsg.xCEMsg+8)); + u8 year = (dataWord1 >> 16 ) & 0x000000FF; + u8 sec = ( dataWord1 >> 8 ) & 0x000000FF; + u8 min = dataWord1 & 0x000000FF; + u8 hour = ( dataWord2 >> 24 ) & 0x000000FF; + u8 day = ( dataWord2 >> 8 ) & 0x000000FF; + u8 mon = dataWord2 & 0x000000FF; + + BCD_TO_BIN(sec); + BCD_TO_BIN(min); + BCD_TO_BIN(hour); + BCD_TO_BIN(day); + BCD_TO_BIN(mon); + BCD_TO_BIN(year); + + if ( year <= 69 ) + year += 100; + + tm->tm_sec = sec; + tm->tm_min = min; + tm->tm_hour = hour; + tm->tm_mday = day; + tm->tm_mon = mon; + tm->tm_year = year; + } + } else { + rc = rtcData.xRc; + tm->tm_sec = 0; + tm->tm_min = 0; + tm->tm_hour = 0; + tm->tm_mday = 15; + tm->tm_mon = 5; + tm->tm_year = 52; + + } + tm->tm_wday = 0; + tm->tm_yday = 0; + tm->tm_isdst = 0; + + } + + return rc; + +} + +int mf_setRtc(struct rtc_time * tm) +{ + char ceTime[12] = "\x00\x00\x00\x41\x00\x00\x00\x00\x00\x00\x00\x00"; + int rc = 0; + u8 day, mon, hour, min, sec, y1, y2; + unsigned year; + + year = 1900 + tm->tm_year; + y1 = year / 100; + y2 = year % 100; + + sec = tm->tm_sec; + min = tm->tm_min; + hour = tm->tm_hour; + day = tm->tm_mday; + mon = tm->tm_mon + 1; + + BIN_TO_BCD(sec); + BIN_TO_BCD(min); + BIN_TO_BCD(hour); + BIN_TO_BCD(mon); + BIN_TO_BCD(day); + BIN_TO_BCD(y1); + BIN_TO_BCD(y2); + + ceTime[4] = y1; + ceTime[5] = y2; + ceTime[6] = sec; + ceTime[7] = min; + ceTime[8] = hour; + ceTime[10] = day; + ceTime[11] = mon; + + rc = signalCEMsg( ceTime, NULL ); + + return rc; +} + + + diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/mf_proc.c linuxppc64_2_4/arch/ppc64/kernel/mf_proc.c --- ../kernel.org/linux/arch/ppc64/kernel/mf_proc.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/mf_proc.c Sat Oct 20 06:53:25 2001 @@ -0,0 +1,301 @@ +/* + * mf_proc.c + * Copyright (C) 2001 Kyle A. Lucke IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +/* Change Activity: */ +/* End Change Activity */ + +#ifndef _MF_PROC_H +#include +#endif +#ifndef MF_H_INCLUDED +#include +#endif +#include + +static struct proc_dir_entry *mf_proc_root = NULL; + +int proc_mf_dump_cmdline +(char *page, char **start, off_t off, int count, int *eof, void *data); + +int proc_mf_dump_vmlinux +(char *page, char **start, off_t off, int count, int *eof, void *data); + +int proc_mf_dump_side +(char *page, char **start, off_t off, int count, int *eof, void *data); + +int proc_mf_change_side +(struct file *file, const char *buffer, unsigned long count, void *data); + +int proc_mf_dump_src +(char *page, char **start, off_t off, int count, int *eof, void *data); +int proc_mf_change_src (struct file *file, const char *buffer, unsigned long count, void *data); +int proc_mf_change_cmdline(struct file *file, const char *buffer, unsigned long count, void *data); +int proc_mf_change_vmlinux(struct file *file, const char *buffer, unsigned long count, void *data); + + +void mf_proc_init(struct proc_dir_entry *iSeries_proc) +{ + struct proc_dir_entry *ent = NULL; + struct proc_dir_entry *mf_a = NULL; + struct proc_dir_entry *mf_b = NULL; + struct proc_dir_entry *mf_c = NULL; + struct proc_dir_entry *mf_d = NULL; + + mf_proc_root = proc_mkdir("mf", iSeries_proc); + if (!mf_proc_root) return; + + mf_a = proc_mkdir("A", mf_proc_root); + if (!mf_a) return; + + ent = create_proc_entry("cmdline", S_IFREG|S_IRUSR|S_IWUSR, mf_a); + if (!ent) return; + ent->nlink = 1; + ent->data = (void *)0; + ent->read_proc = proc_mf_dump_cmdline; + ent->write_proc = proc_mf_change_cmdline; + + ent = create_proc_entry("vmlinux", S_IFREG|S_IRUSR|S_IWUSR, mf_a); + if (!ent) return; + ent->nlink = 1; + ent->data = (void *)0; + ent->read_proc = proc_mf_dump_vmlinux; + ent->write_proc = proc_mf_change_vmlinux; + + mf_b = proc_mkdir("B", mf_proc_root); + if (!mf_b) return; + + ent = create_proc_entry("cmdline", S_IFREG|S_IRUSR|S_IWUSR, mf_b); + if (!ent) return; + ent->nlink = 1; + ent->data = (void *)1; + ent->read_proc = proc_mf_dump_cmdline; + ent->write_proc = proc_mf_change_cmdline; + + ent = create_proc_entry("vmlinux", S_IFREG|S_IRUSR|S_IWUSR, mf_b); + if (!ent) return; + ent->nlink = 1; + ent->data = (void *)1; + ent->read_proc = proc_mf_dump_vmlinux; + ent->write_proc = proc_mf_change_vmlinux; + + mf_c = proc_mkdir("C", mf_proc_root); + if (!mf_c) return; + + ent = create_proc_entry("cmdline", S_IFREG|S_IRUSR|S_IWUSR, mf_c); + if (!ent) return; + ent->nlink = 1; + ent->data = (void *)2; + ent->read_proc = proc_mf_dump_cmdline; + ent->write_proc = proc_mf_change_cmdline; + + ent = create_proc_entry("vmlinux", S_IFREG|S_IRUSR|S_IWUSR, mf_c); + if (!ent) return; + ent->nlink = 1; + ent->data = (void *)2; + ent->read_proc = proc_mf_dump_vmlinux; + ent->write_proc = proc_mf_change_vmlinux; + + mf_d = proc_mkdir("D", mf_proc_root); + if (!mf_d) return; + + + ent = create_proc_entry("cmdline", S_IFREG|S_IRUSR|S_IWUSR, mf_d); + if (!ent) return; + ent->nlink = 1; + ent->data = (void *)3; + ent->read_proc = proc_mf_dump_cmdline; + ent->write_proc = proc_mf_change_cmdline; + + ent = create_proc_entry("vmlinux", S_IFREG|S_IRUSR, mf_d); + if (!ent) return; + ent->nlink = 1; + ent->data = (void *)3; + ent->read_proc = proc_mf_dump_vmlinux; + ent->write_proc = NULL; + + ent = create_proc_entry("side", S_IFREG|S_IRUSR|S_IWUSR, mf_proc_root); + if (!ent) return; + ent->nlink = 1; + ent->data = (void *)0; + ent->read_proc = proc_mf_dump_side; + ent->write_proc = proc_mf_change_side; + + ent = create_proc_entry("src", S_IFREG|S_IRUSR|S_IWUSR, mf_proc_root); + if (!ent) return; + ent->nlink = 1; + ent->data = (void *)0; + ent->read_proc = proc_mf_dump_src; + ent->write_proc = proc_mf_change_src; +} + +int proc_mf_dump_cmdline +(char *page, char **start, off_t off, int count, int *eof, void *data) +{ + int len = count; + char *p; + + len = mf_getCmdLine(page, &len, (u64)data); + + p = page + len - 1; + while ( p > page ) { + if ( (*p == 0) || (*p == ' ') ) + --p; + else + break; + } + if ( *p != '\n' ) { + ++p; + *p = '\n'; + } + ++p; + *p = 0; + len = p - page; + + len -= off; + if (len < count) { + *eof = 1; + if (len <= 0) + return 0; + } else + len = count; + *start = page + off; + return len; +} + +int proc_mf_dump_vmlinux +(char *page, char **start, off_t off, int count, int *eof, void *data) +{ + int sizeToGet = count; + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + if (mf_getVmlinuxChunk(page, &sizeToGet, off, (u64)data) == 0) + { + if (sizeToGet != 0) + { + *start = page + off; + printk("mf_proc.c: got count %d off %d\n", sizeToGet, (int)off); + return sizeToGet; + } else { + printk("mf_proc.c: eof\n"); + *eof = 1; + return 0; + } + } else { + printk("mf_proc.c: eof\n"); + *eof = 1; + return 0; + } +} + + +int proc_mf_dump_side +(char *page, char **start, off_t off, int count, int *eof, void *data) +{ + int len = 0; + + char mf_current_side = mf_getSide(); + len = sprintf(page, "%c\n", mf_current_side); + + if (len <= off+count) *eof = 1; + *start = page + off; + len -= off; + if (len>count) len = count; + if (len<0) len = 0; + return len; +} + +int proc_mf_change_side(struct file *file, const char *buffer, unsigned long count, void *data) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + if ((*buffer != 'A') && + (*buffer != 'B') && + (*buffer != 'C') && + (*buffer != 'D')) + { + printk(KERN_ERR "mf_proc.c: proc_mf_change_side: invalid side\n"); + return -EINVAL; + } + + mf_setSide(*buffer); + + return count; +} + +int proc_mf_dump_src +(char *page, char **start, off_t off, int count, int *eof, void *data) +{ + int len = 0; + mf_getSrcHistory(page, count); + len = count; + len -= off; + if (len < count) { + *eof = 1; + if (len <= 0) + return 0; + } else + len = count; + *start = page + off; + return len; +} + +int proc_mf_change_src(struct file *file, const char *buffer, unsigned long count, void *data) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + if ((count < 4) && (count != 1)) + { + printk(KERN_ERR "mf_proc: invalid src\n"); + return -EINVAL; + } + + if ((count == 1) && ((*buffer) == '\0')) + { + mf_clearSrc(); + } else { + mf_displaySrc(*(u32 *)buffer); + } + + return count; +} + +int proc_mf_change_cmdline(struct file *file, const char *buffer, unsigned long count, void *data) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + mf_setCmdLine(buffer, count, (u64)data); + + return count; +} + +int proc_mf_change_vmlinux(struct file *file, const char *buffer, unsigned long count, void *data) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + mf_setVmlinuxChunk(buffer, count, file->f_pos, (u64)data); + file->f_pos += count; + + return count; +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/misc.S linuxppc64_2_4/arch/ppc64/kernel/misc.S --- ../kernel.org/linux/arch/ppc64/kernel/misc.S Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/misc.S Tue Nov 20 12:29:25 2001 @@ -0,0 +1,948 @@ +/* + * arch/ppc/kernel/misc.S + * + * + * + * This file contains miscellaneous low-level functions. + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Largely rewritten by Cort Dougan (cort@cs.nmt.edu) + * and Paul Mackerras. + * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com) + * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include "ppc_asm.h" + + .text + +/* + * Returns (address we're running at) - (address we were linked at) + * for use before the text and data are mapped to KERNELBASE. + */ + +_GLOBAL(reloc_offset) + mflr r0 + bl 1f +1: mflr r3 + LOADADDR(r4,1b) + sub r3,r4,r3 + mtlr r0 + blr + +_GLOBAL(get_msr) + mfmsr r3 + blr + +_GLOBAL(get_dar) + mfdar r3 + blr + +_GLOBAL(get_srr0) + mfsrr0 r3 + blr + +_GLOBAL(get_srr1) + mfsrr1 r3 + blr + +_GLOBAL(get_sp) + mr r3,r1 + blr + +#ifdef CONFIG_PPC_ISERIES +/* unsigned long __no_use_save_flags(void) */ +_GLOBAL(__no_use_save_flags) + mfspr r4,SPRG3 + lbz r3,PACAPROCENABLED(r4) + blr + +/* void __no_use_restore_flags(unsigned long flags) */ +_GLOBAL(__no_use_restore_flags) +/* + * Just set/clear the MSR_EE bit through restore/flags but do not + * change anything else. This is needed by the RT system and makes + * sense anyway. + * -- Cort + */ + mfspr r6,SPRG3 + lbz r5,PACAPROCENABLED(r6) + /* Check if things are setup the way we want _already_. */ + cmpw 0,r3,r5 + beqlr + /* are we enabling interrupts? */ + cmpi 0,r3,0 + stb r3,PACAPROCENABLED(r6) + beqlr + /* Check pending interrupts */ + CHECKANYINT(r4,r5) + beqlr + + /* + * Handle pending interrupts in interrupt context + */ + li r0,0x5555 + sc + blr + +_GLOBAL(__no_use_cli) + mfspr r5,SPRG3 + lbz r3,PACAPROCENABLED(r5) + li r4,0 + stb r4,PACAPROCENABLED(r5) + blr /* Done */ + +_GLOBAL(__no_use_sti) + mfspr r6,SPRG3 + li r3,1 + stb r3,PACAPROCENABLED(r6) + + /* Check for pending interrupts + * A decrementer, IPI or PMC interrupt may have occurred + * while we were in the hypervisor (which enables) + */ + CHECKANYINT(r4,r5) + beqlr + + /* + * Handle pending interrupts in interrupt context + */ + li r0,0x5555 + sc + blr +#endif +/* + * Flush instruction cache. + */ +_GLOBAL(flush_instruction_cache) + +/* + * This is called by kgdb code + * and should probably go away + * to be replaced by invalidating + * the cache lines that are actually + * modified + */ + /* use invalidate-all bit in HID0 + * - is this consistent across all 64-bit cpus? -- paulus */ + mfspr r3,HID0 + ori r3,r3,HID0_ICFI + mtspr HID0,r3 + sync + isync + blr + +/* + * Write any modified data cache blocks out to memory + * and invalidate the corresponding instruction cache blocks. + * + * flush_icache_range(unsigned long start, unsigned long stop) + * + * flush all bytes from start through stop-1 inclusive + */ + +_GLOBAL(flush_icache_range) + +/* + * Flush the data cache to memory + * + * Different systems have different cache line sizes + * and in some cases i-cache and d-cache line sizes differ from + * each other. + */ + LOADADDR(r10,naca) /* Get Naca address */ + ld r10,0(r10) + lhz r7,DCACHEL1LINESIZE(r10) /* Get cache line size */ + addi r5,r7,-1 + andc r6,r3,r5 /* round low to line bdy */ + subf r8,r6,r4 /* compute length */ + add r8,r8,r5 /* ensure we get enough */ + lhz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of cache line size */ + srw. r8,r8,r9 /* compute line count */ + beqlr /* nothing to do? */ + mtctr r8 +1: dcbst 0,r6 + add r6,r6,r7 + bdnz 1b + sync + +/* Now invalidate the instruction cache */ + + lhz r7,ICACHEL1LINESIZE(r10) /* Get Icache line size */ + addi r5,r7,-1 + andc r6,r3,r5 /* round low to line bdy */ + subf r8,r6,r4 /* compute length */ + add r8,r8,r5 + lhz r9,ICACHEL1LOGLINESIZE(r10) /* Get log-2 of Icache line size */ + srw. r8,r8,r9 /* compute line count */ + beqlr /* nothing to do? */ + mtctr r8 +2: icbi 0,r6 + add r6,r6,r7 + bdnz 2b + isync + blr + +/* + * Like above, but only do the D-cache. + * + * flush_dcache_range(unsigned long start, unsigned long stop) + * + * flush all bytes from start to stop-1 inclusive + */ +_GLOBAL(flush_dcache_range) + +/* + * Flush the data cache to memory + * + * Different systems have different cache line sizes + */ + LOADADDR(r10,naca) /* Get Naca address */ + ld r10,0(r10) + lhz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */ + addi r5,r7,-1 + andc r6,r3,r5 /* round low to line bdy */ + subf r8,r6,r4 /* compute length */ + add r8,r8,r5 /* ensure we get enough */ + lhz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of dcache line size */ + srw. r8,r8,r9 /* compute line count */ + beqlr /* nothing to do? */ + mtctr r8 +0: dcbst 0,r6 + add r6,r6,r7 + bdnz 0b + sync + blr + +/* + * Flush a particular page from the data cache to RAM. + * Note: this is necessary because the instruction cache does *not* + * snoop from the data cache. + * + * void __flush_dcache_icache(void *page) + */ +_GLOBAL(__flush_dcache_icache) +/* + * Flush the data cache to memory + * + * Different systems have different cache line sizes + */ + +/* Flush the dcache */ + LOADADDR(r7,naca) + ld r7,0(r7) + clrrdi r3,r3,12 /* Page align */ + lhz r4,DCACHEL1LINESPERPAGE(r7) /* Get # dcache lines per page */ + lhz r5,DCACHEL1LINESIZE(r7) /* Get dcache line size */ + mr r6,r3 + mtctr r4 +0: dcbst 0,r6 + add r6,r6,r5 + bdnz 0b + sync + +/* Now invalidate the icache */ + + lhz r4,ICACHEL1LINESPERPAGE(r7) /* Get # icache lines per page */ + lhz r5,ICACHEL1LINESIZE(r7) /* Get icache line size */ + mtctr r4 +1: icbi 0,r3 + add r3,r3,r5 + bdnz 1b + isync + blr + +/* + * Copy a whole page. Assumes a 4096B page size. + */ +_GLOBAL(copy_page) + clrrdi r3,r3,12 /* Page align */ + clrrdi r4,r4,12 /* Page align */ + li r5,256 + mtctr r5 + addi r3,r3,-8 + addi r4,r4,-8 + +1: ld r6,8(r4) + ldu r7,16(r4) + std r6,8(r3) + stdu r7,16(r3) + bdnz+ 1b + blr + +/* + * I/O string operations + * + * insb(port, buf, len) + * outsb(port, buf, len) + * insw(port, buf, len) + * outsw(port, buf, len) + * insl(port, buf, len) + * outsl(port, buf, len) + * insw_ns(port, buf, len) + * outsw_ns(port, buf, len) + * insl_ns(port, buf, len) + * outsl_ns(port, buf, len) + * + * The *_ns versions don't do byte-swapping. + */ +_GLOBAL(_insb) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,1 + blelr- +00: lbz r5,0(r3) + eieio + stbu r5,1(r4) + bdnz 00b + blr + +_GLOBAL(_outsb) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,1 + blelr- +00: lbzu r5,1(r4) + stb r5,0(r3) + eieio + bdnz 00b + blr + +_GLOBAL(_insw) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,2 + blelr- +00: lhbrx r5,0,r3 + eieio + sthu r5,2(r4) + bdnz 00b + blr + +_GLOBAL(_outsw) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,2 + blelr- +00: lhzu r5,2(r4) + eieio + sthbrx r5,0,r3 + bdnz 00b + blr + +_GLOBAL(_insl) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,4 + blelr- +00: lwbrx r5,0,r3 + eieio + stwu r5,4(r4) + bdnz 00b + blr + +_GLOBAL(_outsl) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,4 + blelr- +00: lwzu r5,4(r4) + stwbrx r5,0,r3 + eieio + bdnz 00b + blr + +_GLOBAL(ide_insw) +_GLOBAL(_insw_ns) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,2 + blelr- +00: lhz r5,0(r3) + eieio + sthu r5,2(r4) + bdnz 00b + blr + +_GLOBAL(ide_outsw) +_GLOBAL(_outsw_ns) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,2 + blelr- +00: lhzu r5,2(r4) + sth r5,0(r3) + eieio + bdnz 00b + blr + +_GLOBAL(_insl_ns) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,4 + blelr- +00: lwz r5,0(r3) + eieio + stwu r5,4(r4) + bdnz 00b + blr + +_GLOBAL(_outsl_ns) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,4 + blelr- +00: lwzu r5,4(r4) + stw r5,0(r3) + eieio + bdnz 00b + blr + +/* + * Extended precision shifts + * + * R3/R4 has 64 bit value + * R5 has shift count + * result in R3/R4 + * + * ashrdi3: XXXYYY/ZZZAAA -> SSSXXX/YYYZZZ + * ashldi3: XXXYYY/ZZZAAA -> YYYZZZ/AAA000 + * lshrdi3: XXXYYY/ZZZAAA -> 000XXX/YYYZZZ + */ +/* MIKEC: These may no longer be needed...what does gcc expect ? */ + +_GLOBAL(__ashrdi3) + li r6,32 + sub r6,r6,r5 + slw r7,r3,r6 /* isolate YYY */ + srw r4,r4,r5 /* isolate ZZZ */ + or r4,r4,r7 /* YYYZZZ */ + sraw r3,r3,r5 /* SSSXXX */ + blr + +_GLOBAL(__ashldi3) + li r6,32 + sub r6,r6,r5 + srw r7,r4,r6 /* isolate ZZZ */ + slw r4,r4,r5 /* AAA000 */ + slw r3,r3,r5 /* YYY--- */ + or r3,r3,r7 /* YYYZZZ */ + blr + +_GLOBAL(__lshrdi3) + li r6,32 + sub r6,r6,r5 + slw r7,r3,r6 /* isolate YYY */ + srw r4,r4,r5 /* isolate ZZZ */ + or r4,r4,r7 /* YYYZZZ */ + srw r3,r3,r5 /* 000XXX */ + blr + +_GLOBAL(abs) + cmpi 0,r3,0 + bge 10f + neg r3,r3 +10: blr + +_GLOBAL(_get_SP) + mr r3,r1 /* Close enough */ + blr + +_GLOBAL(_get_PVR) + mfspr r3,PVR + blr + +_GLOBAL(_get_PIR) + mfspr r3,PIR + blr + +_GLOBAL(_get_HID0) + mfspr r3,HID0 + blr + +_GLOBAL(cvt_fd) + lfd 0,-4(r5) /* load up fpscr value */ + mtfsf 0xff,0 + lfs 0,0(r3) + stfd 0,0(r4) + mffs 0 /* save new fpscr value */ + stfd 0,-4(r5) + blr + +_GLOBAL(cvt_df) + lfd 0,-4(r5) /* load up fpscr value */ + mtfsf 0xff,0 + lfd 0,0(r3) + stfs 0,0(r4) + mffs 0 /* save new fpscr value */ + stfd 0,-4(r5) + blr + +/* + * Create a kernel thread + * kernel_thread(fn, arg, flags) + */ +_GLOBAL(kernel_thread) + mr r6,r3 /* function */ + ori r3,r5,CLONE_VM /* flags */ + li r0,__NR_clone + sc + cmpi 0,r3,0 /* parent or child? */ + bnelr /* return if parent */ + + li r0,0 /* clear out p->thread.regs */ + std r0,THREAD+PT_REGS(r13) /* since we don't have user ctx */ + li r0,RUN_FLAG /* Run light on */ + std r0,THREAD+THREAD_FLAGS(r13) + + ld r2,8(r6) + ld r6,0(r6) + mtlr r6 /* fn addr in lr */ + mr r3,r4 /* load arg and call fn */ + blrl + li r0,__NR_exit /* exit after child exits */ + li r3,0 + sc + +#ifdef CONFIG_BINFMT_ELF32 +/* Why isn't this a) automatic, b) written in 'C'? */ + .data + .align 8 +_GLOBAL(sys_call_table32) + .llong .sys_ni_syscall /* 0 - old "setup()" system call */ + .llong .sys32_exit + .llong .sys32_fork + .llong .sys_read + .llong .sys_write + .llong .sys32_open /* 5 */ + .llong .sys_close + .llong .sys32_waitpid + .llong .sys32_creat + .llong .sys_link + .llong .sys_unlink /* 10 */ + .llong .sys32_execve + .llong .sys_chdir + .llong .sys32_time + .llong .sys32_mknod + .llong .sys32_chmod /* 15 */ + .llong .sys_lchown + .llong .sys_ni_syscall /* old break syscall holder */ + .llong .sys32_stat + .llong .sys32_lseek + .llong .sys_getpid /* 20 */ + .llong .sys32_mount + .llong .sys_oldumount + .llong .sys_setuid + .llong .sys_getuid + .llong .ppc64_sys_stime /* 25 */ + .llong .sys32_ptrace + .llong .sys_alarm + .llong .sys32_fstat + .llong .sys32_pause + .llong .sys32_utime /* 30 */ + .llong .sys_ni_syscall /* old stty syscall holder */ + .llong .sys_ni_syscall /* old gtty syscall holder */ + .llong .sys32_access + .llong .sys32_nice + .llong .sys_ni_syscall /* 35 */ /* old ftime syscall holder */ + .llong .sys_sync + .llong .sys32_kill + .llong .sys_rename + .llong .sys32_mkdir + .llong .sys_rmdir /* 40 */ + .llong .sys_dup + .llong .sys_pipe + .llong .sys32_times + .llong .sys_ni_syscall /* old prof syscall holder */ + .llong .sys_brk /* 45 */ + .llong .sys_setgid + .llong .sys_getgid + .llong .sys_signal + .llong .sys_geteuid + .llong .sys_getegid /* 50 */ + .llong .sys_acct + .llong .sys32_umount /* recycled never used phys() */ + .llong .sys_ni_syscall /* old lock syscall holder */ + .llong .sys32_ioctl + .llong .sys32_fcntl /* 55 */ + .llong .sys_ni_syscall /* old mpx syscall holder */ + .llong .sys32_setpgid + .llong .sys_ni_syscall /* old ulimit syscall holder */ + .llong .sys_olduname + .llong .sys32_umask /* 60 */ + .llong .sys_chroot + .llong .sys_ustat + .llong .sys_dup2 + .llong .sys_getppid + .llong .sys_getpgrp /* 65 */ + .llong .sys_setsid + .llong .sys32_sigaction + .llong .sys_sgetmask + .llong .sys32_ssetmask + .llong .sys_setreuid /* 70 */ + .llong .sys_setregid + .llong .sys_sigsuspend + .llong .sys32_sigpending + .llong .sys32_sethostname + .llong .sys32_setrlimit /* 75 */ + .llong .sys32_old_getrlimit + .llong .sys32_getrusage + .llong .sys32_gettimeofday + .llong .sys32_settimeofday + .llong .sys32_getgroups /* 80 */ + .llong .sys32_setgroups + .llong .ppc32_select + .llong .sys_symlink + .llong .sys32_lstat + .llong .sys32_readlink /* 85 */ + .llong .sys_uselib + .llong .sys32_swapon + .llong .sys32_reboot + .llong .old32_readdir + .llong .sys32_mmap /* 90 */ + .llong .sys_munmap + .llong .sys_truncate + .llong .sys_ftruncate + .llong .sys_fchmod + .llong .sys_fchown /* 95 */ + .llong .sys32_getpriority + .llong .sys32_setpriority + .llong .sys_ni_syscall /* old profil syscall holder */ + .llong .sys32_statfs + .llong .sys32_fstatfs /* 100 */ + .llong .sys32_ioperm + .llong .sys32_socketcall + .llong .sys32_syslog + .llong .sys32_setitimer + .llong .sys32_getitimer /* 105 */ + .llong .sys32_newstat + .llong .sys32_newlstat + .llong .sys32_newfstat + .llong .sys_uname + .llong .sys32_iopl /* 110 */ + .llong .sys_vhangup + .llong .sys_ni_syscall /* old 'idle' syscall */ + .llong .sys32_vm86 + .llong .sys32_wait4 + .llong .sys_swapoff /* 115 */ + .llong .sys32_sysinfo + .llong .sys32_ipc + .llong .sys_fsync + .llong .sys32_sigreturn + .llong .sys32_clone /* 120 */ + .llong .sys32_setdomainname + .llong .ppc64_newuname + .llong .sys32_modify_ldt + .llong .sys32_adjtimex + .llong .sys_mprotect /* 125 */ + .llong .sys32_sigprocmask + .llong .sys32_create_module + .llong .sys32_init_module + .llong .sys32_delete_module + .llong .sys32_get_kernel_syms /* 130 */ + .llong .sys32_quotactl + .llong .sys32_getpgid + .llong .sys_fchdir + .llong .sys32_bdflush + .llong .sys32_sysfs /* 135 */ + .llong .sys32_personality + .llong .sys_ni_syscall /* for afs_syscall */ + .llong .sys_setfsuid + .llong .sys_setfsgid + .llong .sys_llseek /* 140 */ + .llong .sys32_getdents + .llong .ppc32_select + .llong .sys_flock + .llong .sys32_msync + .llong .sys32_readv /* 145 */ + .llong .sys32_writev + .llong .sys32_getsid + .llong .sys_fdatasync + .llong .sys32_sysctl + .llong .sys_mlock /* 150 */ + .llong .sys_munlock + .llong .sys32_mlockall + .llong .sys_munlockall + .llong .sys32_sched_setparam + .llong .sys32_sched_getparam /* 155 */ + .llong .sys32_sched_setscheduler + .llong .sys32_sched_getscheduler + .llong .sys_sched_yield + .llong .sys32_sched_get_priority_max + .llong .sys32_sched_get_priority_min /* 160 */ + .llong .sys32_sched_rr_get_interval + .llong .sys32_nanosleep + .llong .sys32_mremap + .llong .sys_setresuid + .llong .sys_getresuid /* 165 */ + .llong .sys32_query_module + .llong .sys_poll + .llong .sys32_nfsservctl + .llong .sys_setresgid + .llong .sys_getresgid /* 170 */ + .llong .sys32_prctl + .llong .sys32_rt_sigreturn + .llong .sys32_rt_sigaction + .llong .sys32_rt_sigprocmask + .llong .sys32_rt_sigpending /* 175 */ + .llong .sys32_rt_sigtimedwait + .llong .sys32_rt_sigqueueinfo + .llong .sys32_rt_sigsuspend + .llong .sys32_pread + .llong .sys32_pwrite /* 180 */ + .llong .sys_chown + .llong .sys_getcwd + .llong .sys_capget + .llong .sys_capset + .llong .sys32_sigaltstack /* 185 */ + .llong .sys32_sendfile + .llong .sys_ni_syscall /* streams1 */ + .llong .sys_ni_syscall /* streams2 */ + .llong .sys32_vfork + .llong .sys32_getrlimit /* 190 */ + .llong .sys_ni_syscall /* 191 */ /* Unused */ + .llong .sys_ni_syscall /* 192 - reserved - mmap2 */ + .llong .sys32_truncate64 /* 193 - truncate64 */ + .llong .sys32_ftruncate64 /* 194 - ftruncate64 */ + .llong .sys_stat64 /* 195 - stat64 */ + .llong .sys_lstat64 /* 196 - lstat64 */ + .llong .sys_fstat64 /* 197 - fstat64 */ + .llong .sys32_pciconfig_read /* 198 */ + .llong .sys32_pciconfig_write /* 199 */ + .llong .sys_pciconfig_iobase /* 200 */ + .llong .sys_ni_syscall /* 201 - reserved - MacOnLinux - new */ + .llong .sys_getdents64 /* 202 */ + .llong .sys_pivot_root /* 203 */ + .llong .sys32_fcntl64 /* 204 */ + .llong .sys_madvise /* 205 */ + .llong .sys_mincore /* 206 */ + .rept NR_syscalls-206 + .llong .sys_ni_syscall + .endr +#endif + .data + .align 8 +_GLOBAL(sys_call_table) + .llong .sys_ni_syscall /* 0 - old "setup()" system call */ + .llong .sys_exit + .llong .sys_fork + .llong .sys_read + .llong .sys_write + .llong .sys_open /* 5 */ + .llong .sys_close + .llong .sys_waitpid + .llong .sys_creat + .llong .sys_link + .llong .sys_unlink /* 10 */ + .llong .sys_execve + .llong .sys_chdir + .llong .sys64_time + .llong .sys_mknod + .llong .sys_chmod /* 15 */ + .llong .sys_lchown + .llong .sys_ni_syscall /* old break syscall holder */ + .llong .sys_stat + .llong .sys_lseek + .llong .sys_getpid /* 20 */ + .llong .sys_mount + .llong .sys_oldumount + .llong .sys_setuid + .llong .sys_getuid + .llong .ppc64_sys_stime /* 25 */ + .llong .sys_ptrace + .llong .sys_alarm + .llong .sys_fstat + .llong .sys_pause + .llong .sys_utime /* 30 */ + .llong .sys_ni_syscall /* old stty syscall holder */ + .llong .sys_ni_syscall /* old gtty syscall holder */ + .llong .sys_access + .llong .sys_nice + .llong .sys_ni_syscall /* 35 */ /* old ftime syscall holder */ + .llong .sys_sync + .llong .sys_kill + .llong .sys_rename + .llong .sys_mkdir + .llong .sys_rmdir /* 40 */ + .llong .sys_dup + .llong .sys_pipe + .llong .sys_times + .llong .sys_ni_syscall /* old prof syscall holder */ + .llong .sys_brk /* 45 */ + .llong .sys_setgid + .llong .sys_getgid + .llong .sys_signal + .llong .sys_geteuid + .llong .sys_getegid /* 50 */ + .llong .sys_acct + .llong .sys_umount /* recycled never used phys() */ + .llong .sys_ni_syscall /* old lock syscall holder */ + .llong .sys_ioctl + .llong .sys_fcntl /* 55 */ + .llong .sys_ni_syscall /* old mpx syscall holder */ + .llong .sys_setpgid + .llong .sys_ni_syscall /* old ulimit syscall holder */ + .llong .sys_olduname + .llong .sys_umask /* 60 */ + .llong .sys_chroot + .llong .sys_ustat + .llong .sys_dup2 + .llong .sys_getppid + .llong .sys_getpgrp /* 65 */ + .llong .sys_setsid + .llong .sys_sigaction + .llong .sys_sgetmask + .llong .sys_ssetmask + .llong .sys_setreuid /* 70 */ + .llong .sys_setregid + .llong .sys_sigsuspend + .llong .sys_sigpending + .llong .sys_sethostname + .llong .sys_setrlimit /* 75 */ + .llong .sys_old_getrlimit + .llong .sys_getrusage + .llong .sys_gettimeofday + .llong .sys_settimeofday + .llong .sys_getgroups /* 80 */ + .llong .sys_setgroups + .llong .sys_select + .llong .sys_symlink + .llong .sys_lstat + .llong .sys_readlink /* 85 */ + .llong .sys_uselib + .llong .sys_swapon + .llong .sys_reboot + .llong .old_readdir + .llong .sys_mmap /* 90 */ + .llong .sys_munmap + .llong .sys_truncate + .llong .sys_ftruncate + .llong .sys_fchmod + .llong .sys_fchown /* 95 */ + .llong .sys_getpriority + .llong .sys_setpriority + .llong .sys_ni_syscall /* old profil syscall holder */ + .llong .sys_statfs + .llong .sys_fstatfs /* 100 */ + .llong .sys_ioperm + .llong .sys_socketcall + .llong .sys_syslog + .llong .sys_setitimer + .llong .sys_getitimer /* 105 */ + .llong .sys_newstat + .llong .sys_newlstat + .llong .sys_newfstat + .llong .sys_uname + .llong .sys_iopl /* 110 */ + .llong .sys_vhangup + .llong .sys_ni_syscall /* old 'idle' syscall */ + .llong .sys_vm86 + .llong .sys_wait4 + .llong .sys_swapoff /* 115 */ + .llong .sys_sysinfo + .llong .sys_ipc + .llong .sys_fsync + .llong .sys_sigreturn + .llong .sys_clone /* 120 */ + .llong .sys_setdomainname + .llong .ppc64_newuname + .llong .sys_modify_ldt + .llong .sys_adjtimex + .llong .sys_mprotect /* 125 */ + .llong .sys_sigprocmask + .llong .sys_create_module + .llong .sys_init_module + .llong .sys_delete_module + .llong .sys_get_kernel_syms /* 130 */ + .llong .sys_quotactl + .llong .sys_getpgid + .llong .sys_fchdir + .llong .sys_bdflush + .llong .sys_sysfs /* 135 */ + .llong .sys_personality + .llong .sys_ni_syscall /* for afs_syscall */ + .llong .sys_setfsuid + .llong .sys_setfsgid + .llong .sys_llseek /* 140 */ + .llong .sys_getdents + .llong .sys_select + .llong .sys_flock + .llong .sys_msync + .llong .sys_readv /* 145 */ + .llong .sys_writev + .llong .sys_getsid + .llong .sys_fdatasync + .llong .sys_sysctl + .llong .sys_mlock /* 150 */ + .llong .sys_munlock + .llong .sys_mlockall + .llong .sys_munlockall + .llong .sys_sched_setparam + .llong .sys_sched_getparam /* 155 */ + .llong .sys_sched_setscheduler + .llong .sys_sched_getscheduler + .llong .sys_sched_yield + .llong .sys_sched_get_priority_max + .llong .sys_sched_get_priority_min /* 160 */ + .llong .sys_sched_rr_get_interval + .llong .sys_nanosleep + .llong .sys_mremap + .llong .sys_setresuid + .llong .sys_getresuid /* 165 */ + .llong .sys_query_module + .llong .sys_poll + .llong .sys_nfsservctl + .llong .sys_setresgid + .llong .sys_getresgid /* 170 */ + .llong .sys_prctl + .llong .sys_rt_sigreturn + .llong .sys_rt_sigaction + .llong .sys_rt_sigprocmask + .llong .sys_rt_sigpending /* 175 */ + .llong .sys_rt_sigtimedwait + .llong .sys_rt_sigqueueinfo + .llong .sys_rt_sigsuspend + .llong .sys_pread + .llong .sys_pwrite /* 180 */ + .llong .sys_chown + .llong .sys_getcwd + .llong .sys_capget + .llong .sys_capset + .llong .sys_sigaltstack /* 185 */ + .llong .sys_sendfile + .llong .sys_ni_syscall /* streams1 */ + .llong .sys_ni_syscall /* streams2 */ + .llong .sys_vfork + .llong .sys_getrlimit /* 190 */ + .llong .sys_ni_syscall /* 191 */ /* Unused */ + .llong .sys_ni_syscall /* 192 - reserved - mmap2 */ + .llong .sys_ni_syscall /* 193 - reserved - truncate64 */ + .llong .sys_ni_syscall /* 194 - reserved - ftruncate64 */ + .llong .sys_ni_syscall /* 195 - reserved - stat64 */ + .llong .sys_ni_syscall /* 196 - reserved - lstat64 */ + .llong .sys_ni_syscall /* 197 - reserved - fstat64 */ + .llong .sys_pciconfig_read /* 198 */ + .llong .sys_pciconfig_write /* 199 */ + .llong .sys_pciconfig_iobase /* 200 */ + .llong .sys_ni_syscall /* 201 - reserved - MacOnLinux - new */ + .llong .sys_getdents64 /* 202 */ + .llong .sys_pivot_root /* 203 */ + .llong .sys_ni_syscall /* 204 */ + .llong .sys_madvise /* 205 */ + .llong .sys_mincore /* 206 */ + .rept NR_syscalls-206 + .llong .sys_ni_syscall + .endr diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/mk_defs.c linuxppc64_2_4/arch/ppc64/kernel/mk_defs.c --- ../kernel.org/linux/arch/ppc64/kernel/mk_defs.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/mk_defs.c Thu Oct 25 23:27:54 2001 @@ -0,0 +1,151 @@ +/* + * This program is used to generate definitions needed by + * assembly language modules. + * + * We use the technique used in the OSF Mach kernel code: + * generate asm statements containing #defines, + * compile this file to assembler, and then extract the + * #defines from the assembly-language output. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#define DEFINE(sym, val) \ + asm volatile("\n#define\t" #sym "\t%0" : : "i" (val)) + +int +main(void) +{ + DEFINE(SIGPENDING, offsetof(struct task_struct, sigpending)); + DEFINE(THREAD, offsetof(struct task_struct, thread)); + DEFINE(MM, offsetof(struct task_struct, mm)); + DEFINE(TASK_STRUCT_SIZE, sizeof(struct task_struct)); + DEFINE(KSP, offsetof(struct thread_struct, ksp)); + + DEFINE(PACA, offsetof(struct Naca, paca)); + DEFINE(PACA_SIZE, sizeof(struct Paca)); + + DEFINE(DCACHEL1LINESIZE, offsetof(struct Naca, dCacheL1LineSize)); + DEFINE(DCACHEL1LOGLINESIZE, offsetof(struct Naca, dCacheL1LogLineSize)); + DEFINE(DCACHEL1LINESPERPAGE, offsetof(struct Naca, dCacheL1LinesPerPage)); + + DEFINE(ICACHEL1LINESIZE, offsetof(struct Naca, iCacheL1LineSize)); + DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct Naca, iCacheL1LogLineSize)); + DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct Naca, iCacheL1LinesPerPage)); + DEFINE(SLBSIZE, offsetof(struct Naca, slb_size)); + + DEFINE(PACAPACAINDEX, offsetof(struct Paca, xPacaIndex)); + DEFINE(PACAPROCSTART, offsetof(struct Paca, xProcStart)); + DEFINE(PACAKSAVE, offsetof(struct Paca, xKsave)); + DEFINE(PACACURRENT, offsetof(struct Paca, xCurrent)); + DEFINE(PACASAVEDMSR, offsetof(struct Paca, xSavedMsr)); + DEFINE(PACASTABREAL, offsetof(struct Paca, xStab_data.real)); + DEFINE(PACASTABVIRT, offsetof(struct Paca, xStab_data.virt)); + DEFINE(PACASTABRR, offsetof(struct Paca, xStab_data.next_round_robin)); + DEFINE(PACAR1, offsetof(struct Paca, xR1)); + DEFINE(PACALPQUEUE, offsetof(struct Paca, lpQueuePtr)); + DEFINE(PACATOC, offsetof(struct Paca, xTOC)); + DEFINE(PACAEXCSP, offsetof(struct Paca, exception_sp)); + DEFINE(PACAHRDWINTSTACK, offsetof(struct Paca, xHrdIntStack)); + DEFINE(PACAPROCENABLED, offsetof(struct Paca, xProcEnabled)); + DEFINE(PACAHRDWINTCOUNT, offsetof(struct Paca, xHrdIntCount)); + DEFINE(PACADEFAULTDECR, offsetof(struct Paca, default_decr)); + DEFINE(PACAPROFENABLED, offsetof(struct Paca, prof_enabled)); + DEFINE(PACAPROFLEN, offsetof(struct Paca, prof_len)); + DEFINE(PACAPROFSHIFT, offsetof(struct Paca, prof_shift)); + DEFINE(PACAPROFBUFFER, offsetof(struct Paca, prof_buffer)); + DEFINE(PACAPROFSTEXT, offsetof(struct Paca, prof_stext)); + DEFINE(PACALPPACA, offsetof(struct Paca, xLpPaca)); + DEFINE(LPPACA, offsetof(struct Paca, xLpPaca)); + DEFINE(PACAREGSAV, offsetof(struct Paca, xRegSav)); + DEFINE(PACAEXC, offsetof(struct Paca, exception_stack)); + DEFINE(PACAGUARD, offsetof(struct Paca, guard)); + DEFINE(LPPACASRR0, offsetof(struct ItLpPaca, xSavedSrr0)); + DEFINE(LPPACASRR1, offsetof(struct ItLpPaca, xSavedSrr1)); + DEFINE(LPPACAANYINT, offsetof(struct ItLpPaca, xIntDword.xAnyInt)); + DEFINE(LPPACADECRINT, offsetof(struct ItLpPaca, xIntDword.xFields.xDecrInt)); + DEFINE(LPQCUREVENTPTR, offsetof(struct ItLpQueue, xSlicCurEventPtr)); + DEFINE(LPQOVERFLOW, offsetof(struct ItLpQueue, xPlicOverflowIntPending)); + DEFINE(LPEVENTFLAGS, offsetof(struct HvLpEvent, xFlags)); + DEFINE(PROMENTRY, offsetof(struct prom_t, entry)); + + DEFINE(RTASBASE, offsetof(struct rtas_t, base)); + DEFINE(RTASENTRY, offsetof(struct rtas_t, entry)); + DEFINE(RTASSIZE, offsetof(struct rtas_t, size)); + + DEFINE(LAST_SYSCALL, offsetof(struct thread_struct, last_syscall)); + DEFINE(PT_REGS, offsetof(struct thread_struct, regs)); + DEFINE(PT_TRACESYS, PT_TRACESYS); + DEFINE(TASK_PTRACE, offsetof(struct task_struct, ptrace)); + DEFINE(NEED_RESCHED, offsetof(struct task_struct, need_resched)); + DEFINE(THREAD_FPR0, offsetof(struct thread_struct, fpr[0])); + DEFINE(THREAD_FPSCR, offsetof(struct thread_struct, fpscr)); + DEFINE(THREAD_FLAGS, offsetof(struct thread_struct, flags)); + DEFINE(PPC_FLAG_32BIT, PPC_FLAG_32BIT); + /* Interrupt register frame */ + DEFINE(TASK_UNION_SIZE, sizeof(union task_union)); + DEFINE(STACK_FRAME_OVERHEAD, STACK_FRAME_OVERHEAD); + /* 288 = # of volatile regs, int & fp, for leaf routines */ + /* which do not stack a frame. See the PPC64 ABI. */ + DEFINE(INT_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 288); + DEFINE(GPR0, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[0])); + DEFINE(GPR1, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[1])); + DEFINE(GPR2, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[2])); + DEFINE(GPR3, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[3])); + DEFINE(GPR4, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[4])); + DEFINE(GPR5, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[5])); + DEFINE(GPR6, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[6])); + DEFINE(GPR7, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[7])); + DEFINE(GPR8, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[8])); + DEFINE(GPR9, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[9])); + DEFINE(GPR20, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[20])); + DEFINE(GPR21, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[21])); + DEFINE(GPR22, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[22])); + DEFINE(GPR23, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[23])); + /* Note: these symbols include _ because they overlap with special + * register names + */ + DEFINE(_NIP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, nip)); + DEFINE(_MSR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, msr)); + DEFINE(_CTR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ctr)); + DEFINE(_LINK, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, link)); + DEFINE(_CCR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ccr)); + DEFINE(_XER, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, xer)); + DEFINE(_DAR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dar)); + DEFINE(_DSISR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dsisr)); + DEFINE(ORIG_GPR3, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, orig_gpr3)); + DEFINE(RESULT, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, result)); + DEFINE(TRAP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, trap)); + DEFINE(SOFTE, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, softe)); + DEFINE(CLONE_VM, CLONE_VM); + + return 0; +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/open_pic.c linuxppc64_2_4/arch/ppc64/kernel/open_pic.c --- ../kernel.org/linux/arch/ppc64/kernel/open_pic.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/open_pic.c Fri Nov 9 00:43:14 2001 @@ -0,0 +1,828 @@ +/* + * arch/ppc/kernel/open_pic.c -- OpenPIC Interrupt Handling + * + * Copyright (C) 1997 Geert Uytterhoeven + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "local_irq.h" +#include "open_pic.h" +#include "open_pic_defs.h" +#include "i8259.h" +#include + +void* OpenPIC_Addr; +static volatile struct OpenPIC *OpenPIC = NULL; +u_int OpenPIC_NumInitSenses __initdata = 0; +u_char *OpenPIC_InitSenses __initdata = NULL; +extern int use_of_interrupt_tree; + +void find_ISUs(void); + +static u_int NumProcessors; +static u_int NumSources; +static int NumISUs; +static int open_pic_irq_offset; +static volatile unsigned char* chrp_int_ack_special; +static int broken_ipi_registers; + +OpenPIC_SourcePtr ISU[OPENPIC_MAX_ISU]; + +static void openpic_end_irq(unsigned int irq_nr); +static void openpic_ack_irq(unsigned int irq_nr); +static void openpic_set_affinity(unsigned int irq_nr, unsigned long cpumask); + +struct hw_interrupt_type open_pic = { + " OpenPIC ", + NULL, + NULL, + openpic_enable_irq, + openpic_disable_irq, + openpic_ack_irq, + openpic_end_irq, + openpic_set_affinity +}; + +#ifdef CONFIG_SMP +static void openpic_end_ipi(unsigned int irq_nr); +static void openpic_ack_ipi(unsigned int irq_nr); +static void openpic_enable_ipi(unsigned int irq_nr); +static void openpic_disable_ipi(unsigned int irq_nr); + +struct hw_interrupt_type open_pic_ipi = { + " OpenPIC ", + NULL, + NULL, + openpic_enable_ipi, + openpic_disable_ipi, + openpic_ack_ipi, + openpic_end_ipi, + 0 +}; +#endif /* CONFIG_SMP */ + +unsigned int openpic_vec_ipi; +unsigned int openpic_vec_timer; +unsigned int openpic_vec_spurious; + +/* + * Accesses to the current processor's openpic registers + */ +#ifdef CONFIG_SMP +#define THIS_CPU Processor[cpu] +#define DECL_THIS_CPU int cpu = hard_smp_processor_id() +#define CHECK_THIS_CPU check_arg_cpu(cpu) +#else +#define THIS_CPU Processor[hard_smp_processor_id()] +#define DECL_THIS_CPU +#define CHECK_THIS_CPU +#endif /* CONFIG_SMP */ + +#if 1 +#define check_arg_ipi(ipi) \ + if (ipi < 0 || ipi >= OPENPIC_NUM_IPI) \ + printk(KERN_ERR "open_pic.c:%d: illegal ipi %d\n", __LINE__, ipi); +#define check_arg_timer(timer) \ + if (timer < 0 || timer >= OPENPIC_NUM_TIMERS) \ + printk(KERN_ERR "open_pic.c:%d: illegal timer %d\n", __LINE__, timer); +#define check_arg_vec(vec) \ + if (vec < 0 || vec >= OPENPIC_NUM_VECTORS) \ + printk(KERN_ERR "open_pic.c:%d: illegal vector %d\n", __LINE__, vec); +#define check_arg_pri(pri) \ + if (pri < 0 || pri >= OPENPIC_NUM_PRI) \ + printk(KERN_ERR "open_pic.c:%d: illegal priority %d\n", __LINE__, pri); +/* + * Print out a backtrace if it's out of range, since if it's larger than NR_IRQ's + * data has probably been corrupted and we're going to panic or deadlock later + * anyway --Troy + */ +extern unsigned long* _get_SP(void); +#define check_arg_irq(irq) \ + if (irq < open_pic_irq_offset || irq >= (NumSources+open_pic_irq_offset)){ \ + printk(KERN_ERR "open_pic.c:%d: illegal irq %d\n", __LINE__, irq); \ + print_backtrace(_get_SP()); } +#define check_arg_cpu(cpu) \ + if (cpu < 0 || cpu >= OPENPIC_MAX_PROCESSORS){ \ + printk(KERN_ERR "open_pic.c:%d: illegal cpu %d\n", __LINE__, cpu); \ + print_backtrace(_get_SP()); } +#else +#define check_arg_ipi(ipi) do {} while (0) +#define check_arg_timer(timer) do {} while (0) +#define check_arg_vec(vec) do {} while (0) +#define check_arg_pri(pri) do {} while (0) +#define check_arg_irq(irq) do {} while (0) +#define check_arg_cpu(cpu) do {} while (0) +#endif + +#define GET_ISU(source) ISU[(source) >> 4][(source) & 0xf] + +void __init openpic_init_IRQ(void) +{ + struct device_node *np; + int i; + unsigned int *addrp; + unsigned char* chrp_int_ack_special = 0; + unsigned char init_senses[NR_IRQS - NUM_8259_INTERRUPTS]; + int nmi_irq = -1; +#if defined(CONFIG_VT) && defined(CONFIG_ADB_KEYBOARD) && defined(XMON) + struct device_node *kbd; +#endif + + if (!(np = find_devices("pci")) + || !(addrp = (unsigned int *) + get_property(np, "8259-interrupt-acknowledge", NULL))) + printk(KERN_ERR "Cannot find pci to get ack address\n"); + else + chrp_int_ack_special = (unsigned char *) + __ioremap(addrp[prom_n_addr_cells(np)-1], 1, _PAGE_NO_CACHE); + /* hydra still sets OpenPIC_InitSenses to a static set of values */ + if (OpenPIC_InitSenses == NULL) { + prom_get_irq_senses(init_senses, NUM_8259_INTERRUPTS, NR_IRQS); + OpenPIC_InitSenses = init_senses; + OpenPIC_NumInitSenses = NR_IRQS - NUM_8259_INTERRUPTS; + } + openpic_init(1, NUM_8259_INTERRUPTS, chrp_int_ack_special, nmi_irq); + for ( i = 0 ; i < NUM_8259_INTERRUPTS ; i++ ) + irq_desc[i].handler = &i8259_pic; + i8259_init(); +} + +static inline u_int openpic_read(volatile u_int *addr) +{ + u_int val; + + val = in_le32(addr); + return val; +} + +static inline void openpic_write(volatile u_int *addr, u_int val) +{ + out_le32(addr, val); +} + +static inline u_int openpic_readfield(volatile u_int *addr, u_int mask) +{ + u_int val = openpic_read(addr); + return val & mask; +} + +static inline void openpic_writefield(volatile u_int *addr, u_int mask, + u_int field) +{ + u_int val = openpic_read(addr); + openpic_write(addr, (val & ~mask) | (field & mask)); +} + +static inline void openpic_clearfield(volatile u_int *addr, u_int mask) +{ + openpic_writefield(addr, mask, 0); +} + +static inline void openpic_setfield(volatile u_int *addr, u_int mask) +{ + openpic_writefield(addr, mask, mask); +} + +static void openpic_safe_writefield(volatile u_int *addr, u_int mask, + u_int field) +{ + unsigned int loops = 100000; + + openpic_setfield(addr, OPENPIC_MASK); + while (openpic_read(addr) & OPENPIC_ACTIVITY) { + if (!loops--) { + printk(KERN_ERR "openpic_safe_writefield timeout\n"); + break; + } + } + openpic_writefield(addr, mask | OPENPIC_MASK, field | OPENPIC_MASK); +} + +#ifdef CONFIG_SMP +static u_int openpic_read_IPI(volatile u_int* addr) +{ + u_int val = 0; + + if (broken_ipi_registers) + /* yes this is right ... bug, feature, you decide! -- tgall */ + val = in_be32(addr); + else + val = in_le32(addr); + + return val; +} + +static void openpic_test_broken_IPI(void) +{ + u_int t; + + openpic_write(&OpenPIC->Global.IPI_Vector_Priority(0), OPENPIC_MASK); + t = openpic_read(&OpenPIC->Global.IPI_Vector_Priority(0)); + if (t == le32_to_cpu(OPENPIC_MASK)) { + printk(KERN_INFO "OpenPIC reversed IPI registers detected\n"); + broken_ipi_registers = 1; + } +} + +/* because of the power3 be / le above, this is needed */ +static inline void openpic_writefield_IPI(volatile u_int* addr, u_int mask, u_int field) +{ + u_int val = openpic_read_IPI(addr); + openpic_write(addr, (val & ~mask) | (field & mask)); +} + +static inline void openpic_clearfield_IPI(volatile u_int *addr, u_int mask) +{ + openpic_writefield_IPI(addr, mask, 0); +} + +static inline void openpic_setfield_IPI(volatile u_int *addr, u_int mask) +{ + openpic_writefield_IPI(addr, mask, mask); +} + +static void openpic_safe_writefield_IPI(volatile u_int *addr, u_int mask, u_int field) +{ + unsigned int loops = 100000; + + openpic_setfield_IPI(addr, OPENPIC_MASK); + + /* wait until it's not in use */ + /* BenH: Is this code really enough ? I would rather check the result + * and eventually retry ... + */ + while(openpic_read_IPI(addr) & OPENPIC_ACTIVITY) { + if (!loops--) { + printk(KERN_ERR "openpic_safe_writefield timeout\n"); + break; + } + } + + openpic_writefield_IPI(addr, mask, field | OPENPIC_MASK); +} +#endif /* CONFIG_SMP */ + +void __init openpic_init(int main_pic, int offset, unsigned char* chrp_ack, + int programmer_switch_irq) +{ + u_int t, i; + u_int timerfreq; + const char *version; + + if (!OpenPIC_Addr) { + printk(KERN_INFO "No OpenPIC found !\n"); + return; + } + OpenPIC = (volatile struct OpenPIC *)OpenPIC_Addr; + + ppc_md.progress("openpic enter",0x122); + + t = openpic_read(&OpenPIC->Global.Feature_Reporting0); + switch (t & OPENPIC_FEATURE_VERSION_MASK) { + case 1: + version = "1.0"; + break; + case 2: + version = "1.2"; + break; + case 3: + version = "1.3"; + break; + default: + version = "?"; + break; + } + NumProcessors = ((t & OPENPIC_FEATURE_LAST_PROCESSOR_MASK) >> + OPENPIC_FEATURE_LAST_PROCESSOR_SHIFT) + 1; + NumSources = ((t & OPENPIC_FEATURE_LAST_SOURCE_MASK) >> + OPENPIC_FEATURE_LAST_SOURCE_SHIFT) + 1; + printk(KERN_INFO "OpenPIC Version %s (%d CPUs and %d IRQ sources) at %p\n", + version, NumProcessors, NumSources, OpenPIC); + timerfreq = openpic_read(&OpenPIC->Global.Timer_Frequency); + if (timerfreq) + printk(KERN_INFO "OpenPIC timer frequency is %d.%06d MHz\n", + timerfreq / 1000000, timerfreq % 1000000); + + if (!main_pic) + return; + + open_pic_irq_offset = offset; + chrp_int_ack_special = (volatile unsigned char*)chrp_ack; + + find_ISUs(); + + /* Initialize timer interrupts */ + ppc_md.progress("openpic timer",0x3ba); + for (i = 0; i < OPENPIC_NUM_TIMERS; i++) { + /* Disabled, Priority 0 */ + openpic_inittimer(i, 0, openpic_vec_timer+i); + /* No processor */ + openpic_maptimer(i, 0); + } + +#ifdef CONFIG_SMP + /* Initialize IPI interrupts */ + ppc_md.progress("openpic ipi",0x3bb); + openpic_test_broken_IPI(); + for (i = 0; i < OPENPIC_NUM_IPI; i++) { + /* Disabled, Priority 10..13 */ + openpic_initipi(i, 10+i, openpic_vec_ipi+i); + /* IPIs are per-CPU */ + irq_desc[openpic_vec_ipi+i].status |= IRQ_PER_CPU; + irq_desc[openpic_vec_ipi+i].handler = &open_pic_ipi; + } +#endif + + /* Initialize external interrupts */ + ppc_md.progress("openpic ext",0x3bc); + + openpic_set_priority(0xf); + + /* SIOint (8259 cascade) is special */ + if (offset) { + openpic_initirq(0, 8, offset, 1, 1); + openpic_mapirq(0, 1<= OPENPIC_MAX_ISU) + return; + ISU[isu_num] = (OpenPIC_SourcePtr) __ioremap(addr, 0x400, _PAGE_NO_CACHE); + if (isu_num >= NumISUs) + NumISUs = isu_num + 1; +} + +void find_ISUs(void) +{ + /* Use /interrupt-controller/reg and + * /interrupt-controller/interrupt-ranges from OF device tree + * the ISU array is setup in chrp_pci.c in ibm_add_bridges + * as a result + * -- tgall + */ + + /* basically each ISU is a bus, and this assumes that + * open_pic_isu_count interrupts per bus are possible + * ISU == Interrupt Source + */ + NumSources = NumISUs * 0x10; + openpic_vec_ipi = NumSources + open_pic_irq_offset; + openpic_vec_timer = openpic_vec_ipi + OPENPIC_NUM_IPI; + openpic_vec_spurious = openpic_vec_timer + OPENPIC_NUM_TIMERS; +} + +static inline void openpic_reset(void) +{ + openpic_setfield(&OpenPIC->Global.Global_Configuration0, + OPENPIC_CONFIG_RESET); +} + +static inline void openpic_enable_8259_pass_through(void) +{ + openpic_clearfield(&OpenPIC->Global.Global_Configuration0, + OPENPIC_CONFIG_8259_PASSTHROUGH_DISABLE); +} + +static void openpic_disable_8259_pass_through(void) +{ + openpic_setfield(&OpenPIC->Global.Global_Configuration0, + OPENPIC_CONFIG_8259_PASSTHROUGH_DISABLE); +} + +/* + * Find out the current interrupt + */ +static u_int openpic_irq(void) +{ + u_int vec; + DECL_THIS_CPU; + + CHECK_THIS_CPU; + vec = openpic_readfield(&OpenPIC->THIS_CPU.Interrupt_Acknowledge, + OPENPIC_VECTOR_MASK); + return vec; +} + +static void openpic_eoi(void) +{ + DECL_THIS_CPU; + + CHECK_THIS_CPU; + openpic_write(&OpenPIC->THIS_CPU.EOI, 0); + /* Handle PCI write posting */ + (void)openpic_read(&OpenPIC->THIS_CPU.EOI); +} + + +static inline u_int openpic_get_priority(void) +{ + DECL_THIS_CPU; + + CHECK_THIS_CPU; + return openpic_readfield(&OpenPIC->THIS_CPU.Current_Task_Priority, + OPENPIC_CURRENT_TASK_PRIORITY_MASK); +} + +static void openpic_set_priority(u_int pri) +{ + DECL_THIS_CPU; + + CHECK_THIS_CPU; + check_arg_pri(pri); + openpic_writefield(&OpenPIC->THIS_CPU.Current_Task_Priority, + OPENPIC_CURRENT_TASK_PRIORITY_MASK, pri); +} + +/* + * Get/set the spurious vector + */ +static inline u_int openpic_get_spurious(void) +{ + return openpic_readfield(&OpenPIC->Global.Spurious_Vector, + OPENPIC_VECTOR_MASK); +} + +static void openpic_set_spurious(u_int vec) +{ + check_arg_vec(vec); + openpic_writefield(&OpenPIC->Global.Spurious_Vector, OPENPIC_VECTOR_MASK, + vec); +} + +/* + * Convert a cpu mask from logical to physical cpu numbers. + */ +static inline u32 physmask(u32 cpumask) +{ + int i; + u32 mask = 0; + + for (i = 0; i < smp_num_cpus; ++i, cpumask >>= 1) + mask |= (cpumask & 1) << get_hard_smp_processor_id(i); + return mask; +} + +void openpic_init_processor(u_int cpumask) +{ + openpic_write(&OpenPIC->Global.Processor_Initialization, + physmask(cpumask)); +} + +#ifdef CONFIG_SMP +/* + * Initialize an interprocessor interrupt (and disable it) + * + * ipi: OpenPIC interprocessor interrupt number + * pri: interrupt source priority + * vec: the vector it will produce + */ +static void __init openpic_initipi(u_int ipi, u_int pri, u_int vec) +{ + check_arg_ipi(ipi); + check_arg_pri(pri); + check_arg_vec(vec); + openpic_safe_writefield_IPI(&OpenPIC->Global.IPI_Vector_Priority(ipi), + OPENPIC_PRIORITY_MASK | OPENPIC_VECTOR_MASK, + (pri << OPENPIC_PRIORITY_SHIFT) | vec); +} + +/* + * Send an IPI to one or more CPUs + * + * Externally called, however, it takes an IPI number (0...OPENPIC_NUM_IPI) + * and not a system-wide interrupt number + */ +void openpic_cause_IPI(u_int ipi, u_int cpumask) +{ + DECL_THIS_CPU; + + CHECK_THIS_CPU; + check_arg_ipi(ipi); + openpic_write(&OpenPIC->THIS_CPU.IPI_Dispatch(ipi), + physmask(cpumask)); +} + +void openpic_request_IPIs(void) +{ + int i; + + /* + * Make sure this matches what is defined in smp.c for + * smp_message_{pass|recv}() or what shows up in + * /proc/interrupts will be wrong!!! --Troy */ + + if (OpenPIC == NULL) + return; + + request_irq(openpic_vec_ipi, + openpic_ipi_action, 0, "IPI0 (call function)", 0); + request_irq(openpic_vec_ipi+1, + openpic_ipi_action, 0, "IPI1 (reschedule)", 0); + request_irq(openpic_vec_ipi+2, + openpic_ipi_action, 0, "IPI2 (invalidate tlb)", 0); + request_irq(openpic_vec_ipi+3, + openpic_ipi_action, 0, "IPI3 (xmon break)", 0); + + for ( i = 0; i < OPENPIC_NUM_IPI ; i++ ) + openpic_enable_ipi(openpic_vec_ipi+i); +} + +/* + * Do per-cpu setup for SMP systems. + * + * Get IPI's working and start taking interrupts. + * -- Cort + */ +static spinlock_t openpic_setup_lock __initdata = SPIN_LOCK_UNLOCKED; + +void __init do_openpic_setup_cpu(void) +{ +#ifdef CONFIG_IRQ_ALL_CPUS + int i; + u32 msk = 1 << hard_smp_processor_id(); +#endif + + spin_lock(&openpic_setup_lock); + +#ifdef CONFIG_IRQ_ALL_CPUS + /* let the openpic know we want intrs. default affinity + * is 0xffffffff until changed via /proc + * That's how it's done on x86. If we want it differently, then + * we should make sure we also change the default values of irq_affinity + * in irq.c. + */ + for (i = 0; i < NumSources ; i++) + openpic_mapirq(i, openpic_read(&GET_ISU(i).Destination) | msk); +#endif /* CONFIG_IRQ_ALL_CPUS */ + openpic_set_priority(0); + + spin_unlock(&openpic_setup_lock); +} +#endif /* CONFIG_SMP */ + +/* + * Initialize a timer interrupt (and disable it) + * + * timer: OpenPIC timer number + * pri: interrupt source priority + * vec: the vector it will produce + */ +static void __init openpic_inittimer(u_int timer, u_int pri, u_int vec) +{ + check_arg_timer(timer); + check_arg_pri(pri); + check_arg_vec(vec); + openpic_safe_writefield(&OpenPIC->Global.Timer[timer].Vector_Priority, + OPENPIC_PRIORITY_MASK | OPENPIC_VECTOR_MASK, + (pri << OPENPIC_PRIORITY_SHIFT) | vec); +} + +/* + * Map a timer interrupt to one or more CPUs + */ +static void __init openpic_maptimer(u_int timer, u_int cpumask) +{ + check_arg_timer(timer); + openpic_write(&OpenPIC->Global.Timer[timer].Destination, + physmask(cpumask)); +} + + +/* + * + * All functions below take an offset'ed irq argument + * + */ + + +/* + * Enable/disable an external interrupt source + * + * Externally called, irq is an offseted system-wide interrupt number + */ +static void openpic_enable_irq(u_int irq) +{ + unsigned int loops = 100000; + check_arg_irq(irq); + + openpic_clearfield(&GET_ISU(irq - open_pic_irq_offset).Vector_Priority, OPENPIC_MASK); + /* make sure mask gets to controller before we return to user */ + do { + if (!loops--) { + printk(KERN_ERR "openpic_enable_irq timeout\n"); + break; + } + + mb(); /* sync is probably useless here */ + } while(openpic_readfield(&GET_ISU(irq - open_pic_irq_offset).Vector_Priority, + OPENPIC_MASK)); +} + +static void openpic_disable_irq(u_int irq) +{ + u32 vp; + unsigned int loops = 100000; + + check_arg_irq(irq); + + openpic_setfield(&GET_ISU(irq - open_pic_irq_offset).Vector_Priority, OPENPIC_MASK); + /* make sure mask gets to controller before we return to user */ + do { + if (!loops--) { + printk(KERN_ERR "openpic_disable_irq timeout\n"); + break; + } + + mb(); /* sync is probably useless here */ + vp = openpic_readfield(&GET_ISU(irq - open_pic_irq_offset).Vector_Priority, + OPENPIC_MASK | OPENPIC_ACTIVITY); + } while((vp & OPENPIC_ACTIVITY) && !(vp & OPENPIC_MASK)); +} + +#ifdef CONFIG_SMP +/* + * Enable/disable an IPI interrupt source + * + * Externally called, irq is an offseted system-wide interrupt number + */ +void openpic_enable_ipi(u_int irq) +{ + irq -= openpic_vec_ipi; + check_arg_ipi(irq); + openpic_clearfield_IPI(&OpenPIC->Global.IPI_Vector_Priority(irq), OPENPIC_MASK); + +} +void openpic_disable_ipi(u_int irq) +{ + /* NEVER disable an IPI... that's just plain wrong! */ +} + +#endif + +/* + * Initialize an interrupt source (and disable it!) + * + * irq: OpenPIC interrupt number + * pri: interrupt source priority + * vec: the vector it will produce + * pol: polarity (1 for positive, 0 for negative) + * sense: 1 for level, 0 for edge + */ +static void openpic_initirq(u_int irq, u_int pri, u_int vec, int pol, int sense) +{ + openpic_safe_writefield(&GET_ISU(irq).Vector_Priority, + OPENPIC_PRIORITY_MASK | OPENPIC_VECTOR_MASK | + OPENPIC_SENSE_MASK | OPENPIC_POLARITY_MASK, + (pri << OPENPIC_PRIORITY_SHIFT) | vec | + (pol ? OPENPIC_POLARITY_POSITIVE : + OPENPIC_POLARITY_NEGATIVE) | + (sense ? OPENPIC_SENSE_LEVEL : OPENPIC_SENSE_EDGE)); +} + +/* + * Map an interrupt source to one or more CPUs + */ +static void openpic_mapirq(u_int irq, u_int physmask) +{ + openpic_write(&GET_ISU(irq).Destination, physmask); +} + +/* + * Set the sense for an interrupt source (and disable it!) + * + * sense: 1 for level, 0 for edge + */ +static inline void openpic_set_sense(u_int irq, int sense) +{ + openpic_safe_writefield(&GET_ISU(irq).Vector_Priority, + OPENPIC_SENSE_LEVEL, + (sense ? OPENPIC_SENSE_LEVEL : 0)); +} + +/* No spinlocks, should not be necessary with the OpenPIC + * (1 register = 1 interrupt and we have the desc lock). + */ +static void openpic_ack_irq(unsigned int irq_nr) +{ +} + +static void openpic_end_irq(unsigned int irq_nr) +{ + if ((irq_desc[irq_nr].status & IRQ_LEVEL) != 0) + openpic_eoi(); +} + +static void openpic_set_affinity(unsigned int irq_nr, unsigned long cpumask) +{ + openpic_mapirq(irq_nr - open_pic_irq_offset, physmask(cpumask)); +} + +#ifdef CONFIG_SMP +static void openpic_ack_ipi(unsigned int irq_nr) +{ +} + +static void openpic_end_ipi(unsigned int irq_nr) +{ + /* IPIs are marked IRQ_PER_CPU. This has the side effect of + * preventing the IRQ_PENDING/IRQ_INPROGRESS logic from + * applying to them. We EOI them late to avoid re-entering. + * however, I'm wondering if we could simply let them have the + * SA_INTERRUPT flag and let them execute with all interrupts OFF. + * This would have the side effect of either running cross-CPU + * functions with interrupts off, or we can re-enable them explicitely + * with a __sti() in smp_call_function_interrupt(), since + * smp_call_function() is protected by a spinlock. + * Or maybe we shouldn't set the IRQ_PER_CPU flag on cross-CPU + * function calls IPI at all but that would make a special case. + */ + openpic_eoi(); +} + +static void openpic_ipi_action(int cpl, void *dev_id, struct pt_regs *regs) +{ + smp_message_recv(cpl-openpic_vec_ipi, regs); +} + +#endif /* CONFIG_SMP */ + +int openpic_get_irq(struct pt_regs *regs) +{ + extern int i8259_irq(int cpu); + + int irq = openpic_irq(); + + /* Management of the cascade should be moved out of here */ + if (open_pic_irq_offset && irq == open_pic_irq_offset) + { + /* + * This magic address generates a PCI IACK cycle. + */ + if ( chrp_int_ack_special ) + irq = *chrp_int_ack_special; + else + irq = i8259_irq( smp_processor_id() ); + openpic_eoi(); + } + if (irq == openpic_vec_spurious) + irq = -1; + return irq; +} + diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/open_pic.h linuxppc64_2_4/arch/ppc64/kernel/open_pic.h --- ../kernel.org/linux/arch/ppc64/kernel/open_pic.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/open_pic.h Fri May 4 17:13:58 2001 @@ -0,0 +1,43 @@ +/* + * arch/ppc/kernel/open_pic.h -- OpenPIC Interrupt Handling + * + * Copyright (C) 1997 Geert Uytterhoeven + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + * + */ + +#ifndef _PPC64_KERNEL_OPEN_PIC_H +#define _PPC64_KERNEL_OPEN_PIC_H + +#define OPENPIC_SIZE 0x40000 + +/* OpenPIC IRQ controller structure */ +extern struct hw_interrupt_type open_pic; + +/* OpenPIC IPI controller structure */ +#ifdef CONFIG_SMP +extern struct hw_interrupt_type open_pic_ipi; +#endif /* CONFIG_SMP */ + +extern u_int OpenPIC_NumInitSenses; +extern u_char *OpenPIC_InitSenses; +extern void* OpenPIC_Addr; + +/* Exported functions */ +extern void openpic_init(int, int, unsigned char *, int); +extern void openpic_request_IPIs(void); +extern void do_openpic_setup_cpu(void); +extern int openpic_get_irq(struct pt_regs *regs); +extern void openpic_init_processor(u_int cpumask); +extern void openpic_setup_ISU(int isu_num, unsigned long addr); +extern void openpic_cause_IPI(u_int ipi, u_int cpumask); + +extern inline int openpic_to_irq(int irq) +{ + return irq += NUM_8259_INTERRUPTS; +} +/*extern int open_pic_irq_offset;*/ +#endif /* _PPC64_KERNEL_OPEN_PIC_H */ diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/open_pic_defs.h linuxppc64_2_4/arch/ppc64/kernel/open_pic_defs.h --- ../kernel.org/linux/arch/ppc64/kernel/open_pic_defs.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/open_pic_defs.h Sat Oct 20 06:53:25 2001 @@ -0,0 +1,318 @@ +/* + * linux/openpic.h -- OpenPIC definitions + * + * Copyright (C) 1997 Geert Uytterhoeven + * + * This file is based on the following documentation: + * + * The Open Programmable Interrupt Controller (PIC) + * Register Interface Specification Revision 1.2 + * + * Issue Date: October 1995 + * + * Issued jointly by Advanced Micro Devices and Cyrix Corporation + * + * AMD is a registered trademark of Advanced Micro Devices, Inc. + * Copyright (C) 1995, Advanced Micro Devices, Inc. and Cyrix, Inc. + * All Rights Reserved. + * + * To receive a copy of this documentation, send an email to openpic@amd.com. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + */ + +#ifndef _LINUX_OPENPIC_H +#define _LINUX_OPENPIC_H + +#ifdef __KERNEL__ + +#include + +/* + * OpenPIC supports up to 2048 interrupt sources and up to 32 processors + */ + +#define OPENPIC_MAX_SOURCES 2048 +#define OPENPIC_MAX_PROCESSORS 32 +#define OPENPIC_MAX_ISU 32 + +#define OPENPIC_NUM_TIMERS 4 +#define OPENPIC_NUM_IPI 4 +#define OPENPIC_NUM_PRI 16 +#define OPENPIC_NUM_VECTORS OPENPIC_MAX_SOURCES + +/* + * OpenPIC Registers are 32 bits and aligned on 128 bit boundaries + */ + +typedef struct _OpenPIC_Reg { + u_int Reg; /* Little endian! */ + char Pad[0xc]; +} OpenPIC_Reg; + + +/* + * Per Processor Registers + */ + +typedef struct _OpenPIC_Processor { + /* + * Private Shadow Registers (for SLiC backwards compatibility) + */ + u_int IPI0_Dispatch_Shadow; /* Write Only */ + char Pad1[0x4]; + u_int IPI0_Vector_Priority_Shadow; /* Read/Write */ + char Pad2[0x34]; + /* + * Interprocessor Interrupt Command Ports + */ + OpenPIC_Reg _IPI_Dispatch[OPENPIC_NUM_IPI]; /* Write Only */ + /* + * Current Task Priority Register + */ + OpenPIC_Reg _Current_Task_Priority; /* Read/Write */ + char Pad3[0x10]; + /* + * Interrupt Acknowledge Register + */ + OpenPIC_Reg _Interrupt_Acknowledge; /* Read Only */ + /* + * End of Interrupt (EOI) Register + */ + OpenPIC_Reg _EOI; /* Read/Write */ + char Pad5[0xf40]; +} OpenPIC_Processor; + + + /* + * Timer Registers + */ + +typedef struct _OpenPIC_Timer { + OpenPIC_Reg _Current_Count; /* Read Only */ + OpenPIC_Reg _Base_Count; /* Read/Write */ + OpenPIC_Reg _Vector_Priority; /* Read/Write */ + OpenPIC_Reg _Destination; /* Read/Write */ +} OpenPIC_Timer; + + + /* + * Global Registers + */ + +typedef struct _OpenPIC_Global { + /* + * Feature Reporting Registers + */ + OpenPIC_Reg _Feature_Reporting0; /* Read Only */ + OpenPIC_Reg _Feature_Reporting1; /* Future Expansion */ + /* + * Global Configuration Registers + */ + OpenPIC_Reg _Global_Configuration0; /* Read/Write */ + OpenPIC_Reg _Global_Configuration1; /* Future Expansion */ + /* + * Vendor Specific Registers + */ + OpenPIC_Reg _Vendor_Specific[4]; + /* + * Vendor Identification Register + */ + OpenPIC_Reg _Vendor_Identification; /* Read Only */ + /* + * Processor Initialization Register + */ + OpenPIC_Reg _Processor_Initialization; /* Read/Write */ + /* + * IPI Vector/Priority Registers + */ + OpenPIC_Reg _IPI_Vector_Priority[OPENPIC_NUM_IPI]; /* Read/Write */ + /* + * Spurious Vector Register + */ + OpenPIC_Reg _Spurious_Vector; /* Read/Write */ + /* + * Global Timer Registers + */ + OpenPIC_Reg _Timer_Frequency; /* Read/Write */ + OpenPIC_Timer Timer[OPENPIC_NUM_TIMERS]; + char Pad1[0xee00]; +} OpenPIC_Global; + + + /* + * Interrupt Source Registers + */ + +typedef struct _OpenPIC_Source { + OpenPIC_Reg _Vector_Priority; /* Read/Write */ + OpenPIC_Reg _Destination; /* Read/Write */ +} OpenPIC_Source, *OpenPIC_SourcePtr; + + + /* + * OpenPIC Register Map + */ + +struct OpenPIC { + char Pad1[0x1000]; + /* + * Global Registers + */ + OpenPIC_Global Global; + /* + * Interrupt Source Configuration Registers + */ + OpenPIC_Source Source[OPENPIC_MAX_SOURCES]; + /* + * Per Processor Registers + */ + OpenPIC_Processor Processor[OPENPIC_MAX_PROCESSORS]; +}; + +extern volatile struct OpenPIC *OpenPIC; + + +/* + * Current Task Priority Register + */ + +#define OPENPIC_CURRENT_TASK_PRIORITY_MASK 0x0000000f + +/* + * Who Am I Register + */ + +#define OPENPIC_WHO_AM_I_ID_MASK 0x0000001f + +/* + * Feature Reporting Register 0 + */ + +#define OPENPIC_FEATURE_LAST_SOURCE_MASK 0x07ff0000 +#define OPENPIC_FEATURE_LAST_SOURCE_SHIFT 16 +#define OPENPIC_FEATURE_LAST_PROCESSOR_MASK 0x00001f00 +#define OPENPIC_FEATURE_LAST_PROCESSOR_SHIFT 8 +#define OPENPIC_FEATURE_VERSION_MASK 0x000000ff + +/* + * Global Configuration Register 0 + */ + +#define OPENPIC_CONFIG_RESET 0x80000000 +#define OPENPIC_CONFIG_8259_PASSTHROUGH_DISABLE 0x20000000 +#define OPENPIC_CONFIG_BASE_MASK 0x000fffff + +/* + * Vendor Identification Register + */ + +#define OPENPIC_VENDOR_ID_STEPPING_MASK 0x00ff0000 +#define OPENPIC_VENDOR_ID_STEPPING_SHIFT 16 +#define OPENPIC_VENDOR_ID_DEVICE_ID_MASK 0x0000ff00 +#define OPENPIC_VENDOR_ID_DEVICE_ID_SHIFT 8 +#define OPENPIC_VENDOR_ID_VENDOR_ID_MASK 0x000000ff + +/* + * Vector/Priority Registers + */ + +#define OPENPIC_MASK 0x80000000 +#define OPENPIC_ACTIVITY 0x40000000 /* Read Only */ +#define OPENPIC_PRIORITY_MASK 0x000f0000 +#define OPENPIC_PRIORITY_SHIFT 16 +#define OPENPIC_VECTOR_MASK 0x000007ff + + +/* + * Interrupt Source Registers + */ + +#define OPENPIC_POLARITY_POSITIVE 0x00800000 +#define OPENPIC_POLARITY_NEGATIVE 0x00000000 +#define OPENPIC_POLARITY_MASK 0x00800000 +#define OPENPIC_SENSE_LEVEL 0x00400000 +#define OPENPIC_SENSE_EDGE 0x00000000 +#define OPENPIC_SENSE_MASK 0x00400000 + + +/* + * Timer Registers + */ + +#define OPENPIC_COUNT_MASK 0x7fffffff +#define OPENPIC_TIMER_TOGGLE 0x80000000 +#define OPENPIC_TIMER_COUNT_INHIBIT 0x80000000 + + +/* + * Aliases to make life simpler + */ + +/* Per Processor Registers */ +#define IPI_Dispatch(i) _IPI_Dispatch[i].Reg +#define Current_Task_Priority _Current_Task_Priority.Reg +#define Interrupt_Acknowledge _Interrupt_Acknowledge.Reg +#define EOI _EOI.Reg + +/* Global Registers */ +#define Feature_Reporting0 _Feature_Reporting0.Reg +#define Feature_Reporting1 _Feature_Reporting1.Reg +#define Global_Configuration0 _Global_Configuration0.Reg +#define Global_Configuration1 _Global_Configuration1.Reg +#define Vendor_Specific(i) _Vendor_Specific[i].Reg +#define Vendor_Identification _Vendor_Identification.Reg +#define Processor_Initialization _Processor_Initialization.Reg +#define IPI_Vector_Priority(i) _IPI_Vector_Priority[i].Reg +#define Spurious_Vector _Spurious_Vector.Reg +#define Timer_Frequency _Timer_Frequency.Reg + +/* Timer Registers */ +#define Current_Count _Current_Count.Reg +#define Base_Count _Base_Count.Reg +#define Vector_Priority _Vector_Priority.Reg +#define Destination _Destination.Reg + +/* Interrupt Source Registers */ +#define Vector_Priority _Vector_Priority.Reg +#define Destination _Destination.Reg + +/* + * Local (static) OpenPIC Operations + */ + + +/* Global Operations */ +static void openpic_reset(void); +static void openpic_enable_8259_pass_through(void); +static void openpic_disable_8259_pass_through(void); +static u_int openpic_irq(void); +static void openpic_eoi(void); +static u_int openpic_get_priority(void); +static void openpic_set_priority(u_int pri); +static u_int openpic_get_spurious(void); +static void openpic_set_spurious(u_int vector); + +#ifdef CONFIG_SMP +/* Interprocessor Interrupts */ +static void openpic_initipi(u_int ipi, u_int pri, u_int vector); +static void openpic_ipi_action(int cpl, void *dev_id, struct pt_regs *regs); +#endif + +/* Timer Interrupts */ +static void openpic_inittimer(u_int timer, u_int pri, u_int vector); +static void openpic_maptimer(u_int timer, u_int cpumask); + +/* Interrupt Sources */ +static void openpic_enable_irq(u_int irq); +static void openpic_disable_irq(u_int irq); +static void openpic_initirq(u_int irq, u_int pri, u_int vector, int polarity, + int is_level); +static void openpic_mapirq(u_int irq, u_int cpumask); +static void openpic_set_sense(u_int irq, int sense); + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_OPENPIC_H */ diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/pSeries_hvCall.S linuxppc64_2_4/arch/ppc64/kernel/pSeries_hvCall.S --- ../kernel.org/linux/arch/ppc64/kernel/pSeries_hvCall.S Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/pSeries_hvCall.S Fri Oct 19 09:33:45 2001 @@ -0,0 +1,70 @@ +/* + * arch/ppc64/kernel/pSeries_hvCall.S + * + * + * This file contains the generic code to perform a call to the + * pSeries LPAR hypervisor. + * NOTE: this file will go away when we move to inline this work. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include +#include "ppc_asm.h" + +/* + * hcall interface to pSeries LPAR + */ +#define HSC .long 0x44000022 + +/* long plpar_hcall(unsigned long opcode, R3 + unsigned long arg1, R4 + unsigned long arg2, R5 + unsigned long arg3, R6 + unsigned long arg4, R7 + unsigned long *out1, R8 + unsigned long *out2, R9 + unsigned long *out3); R10 + */ + + .text +_GLOBAL(plpar_hcall) + mfcr r0 + std r0,-8(r1) + stdu r1,-32(r1) + + std r8,-8(r1) /* Save out ptrs. */ + std r9,-16(r1) + std r10,-24(r1) + + HSC /* invoke the hypervisor */ + + ld r10,-8(r1) /* Fetch r4-r7 ret args. */ + std r4,0(r10) + ld r10,-16(r1) + std r5,0(r10) + ld r10,-24(r1) + std r6,0(r10) + + ld r1,0(r1) + ld r0,-8(r1) + mtcrf 0xff,r0 + blr /* return r3 = status */ + + +/* Simple interface with no output values (other than status) */ +_GLOBAL(plpar_hcall_norets) + mfcr r0 + std r0,-8(r1) + HSC /* invoke the hypervisor */ + ld r0,-8(r1) + mtcrf 0xff,r0 + blr /* return r3 = status */ diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/pSeries_lpar.c linuxppc64_2_4/arch/ppc64/kernel/pSeries_lpar.c --- ../kernel.org/linux/arch/ppc64/kernel/pSeries_lpar.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/pSeries_lpar.c Thu Nov 8 23:13:41 2001 @@ -0,0 +1,895 @@ +/* + * pSeries_lpar.c + * Copyright (C) 2001 Todd Inglett, IBM Corporation + * + * pSeries LPAR support. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Status return values */ +#define H_Success 0 +#define H_Busy 1 /* Hardware busy -- retry later */ +#define H_Hardware -1 /* Hardware error */ +#define H_Function -2 /* Function not supported */ +#define H_Privilege -3 /* Caller not privileged */ +#define H_Parameter -4 /* Parameter invalid, out-of-range or conflicting */ +#define H_Bad_Mode -5 /* Illegal msr value */ +#define H_PTEG_Full -6 /* PTEG is full */ +#define H_Not_Found -7 /* PTE was not found" */ +#define H_Reserved_DABR -8 /* DABR address is reserved by the hypervisor on this processor" */ + +/* Flags */ +#define H_LARGE_PAGE (1UL<<(63-16)) +#define H_EXACT (1UL<<(63-24)) /* Use exact PTE or return H_PTEG_FULL */ +#define H_R_XLATE (1UL<<(63-25)) /* include a valid logical page num in the pte if the valid bit is set */ +#define H_READ_4 (1UL<<(63-26)) /* Return 4 PTEs */ +#define H_AVPN (1UL<<(63-32)) /* An avpn is provided as a sanity test */ +#define H_ICACHE_INVALIDATE (1UL<<(63-40)) /* icbi, etc. (ignored for IO pages) */ +#define H_ICACHE_SYNCHRONIZE (1UL<<(63-41)) /* dcbst, icbi, etc (ignored for IO pages */ +#define H_ZERO_PAGE (1UL<<(63-48)) /* zero the page before mapping (ignored for IO pages) */ +#define H_COPY_PAGE (1UL<<(63-49)) +#define H_N (1UL<<(63-61)) +#define H_PP1 (1UL<<(63-62)) +#define H_PP2 (1UL<<(63-63)) + + + +/* pSeries hypervisor opcodes */ +#define H_REMOVE 0x04 +#define H_ENTER 0x08 +#define H_READ 0x0c +#define H_CLEAR_MOD 0x10 +#define H_CLEAR_REF 0x14 +#define H_PROTECT 0x18 +#define H_GET_TCE 0x1c +#define H_PUT_TCE 0x20 +#define H_SET_SPRG0 0x24 +#define H_SET_DABR 0x28 +#define H_PAGE_INIT 0x2c +#define H_SET_ASR 0x30 +#define H_ASR_ON 0x34 +#define H_ASR_OFF 0x38 +#define H_LOGICAL_CI_LOAD 0x3c +#define H_LOGICAL_CI_STORE 0x40 +#define H_LOGICAL_CACHE_LOAD 0x44 +#define H_LOGICAL_CACHE_STORE 0x48 +#define H_LOGICAL_ICBI 0x4c +#define H_LOGICAL_DCBF 0x50 +#define H_GET_TERM_CHAR 0x54 +#define H_PUT_TERM_CHAR 0x58 +#define H_REAL_TO_LOGICAL 0x5c +#define H_HYPERVISOR_DATA 0x60 +#define H_EOI 0x64 +#define H_CPPR 0x68 +#define H_IPI 0x6c +#define H_IPOLL 0x70 +#define H_XIRR 0x74 + +#define HSC ".long 0x44000022\n" +#define H_ENTER_r3 "li 3, 0x08\n" + +/* plpar_hcall() -- Generic call interface using above opcodes + * + * The actual call interface is a hypervisor call instruction with + * the opcode in R3 and input args in R4-R7. + * Status is returned in R3 with variable output values in R4-R11. + * Only H_PTE_READ with H_READ_4 uses R6-R11 so we ignore it for now + * and return only two out args which MUST ALWAYS BE PROVIDED. + */ +long plpar_hcall(unsigned long opcode, + unsigned long arg1, + unsigned long arg2, + unsigned long arg3, + unsigned long arg4, + unsigned long *out1, + unsigned long *out2, + unsigned long *out3); + +/* Same as plpar_hcall but for those opcodes that return no values + * other than status. Slightly more efficient. + */ +long plpar_hcall_norets(unsigned long opcode, ...); + + +long plpar_pte_enter(unsigned long flags, + unsigned long ptex, + unsigned long new_pteh, unsigned long new_ptel, + unsigned long *old_pteh_ret, unsigned long *old_ptel_ret) +{ + unsigned long dummy, ret; + ret = plpar_hcall(H_ENTER, flags, ptex, new_pteh, new_ptel, + old_pteh_ret, old_ptel_ret, &dummy); + return(ret); +} + +long plpar_pte_remove(unsigned long flags, + unsigned long ptex, + unsigned long avpn, + unsigned long *old_pteh_ret, unsigned long *old_ptel_ret) +{ + unsigned long dummy; + return plpar_hcall(H_REMOVE, flags, ptex, avpn, 0, + old_pteh_ret, old_ptel_ret, &dummy); +} + +long plpar_pte_read(unsigned long flags, + unsigned long ptex, + unsigned long *old_pteh_ret, unsigned long *old_ptel_ret) +{ + unsigned long dummy; + return plpar_hcall(H_READ, flags, ptex, 0, 0, + old_pteh_ret, old_ptel_ret, &dummy); +} + +long plpar_pte_protect(unsigned long flags, + unsigned long ptex, + unsigned long avpn) +{ + return plpar_hcall_norets(H_PROTECT, flags, ptex); +} + +long plpar_tce_get(unsigned long liobn, + unsigned long ioba, + unsigned long *tce_ret) +{ + unsigned long dummy; + return plpar_hcall(H_GET_TCE, liobn, ioba, 0, 0, + tce_ret, &dummy, &dummy); +} + + +long plpar_tce_put(unsigned long liobn, + unsigned long ioba, + unsigned long tceval) +{ + return plpar_hcall_norets(H_PUT_TCE, liobn, ioba, tceval); +} + +long plpar_get_term_char(unsigned long termno, + unsigned long *len_ret, + char *buf_ret) +{ + unsigned long *lbuf = (unsigned long *)buf_ret; /* ToDo: alignment? */ + return plpar_hcall(H_GET_TERM_CHAR, termno, 0, 0, 0, + len_ret, lbuf+0, lbuf+1); +} + +long plpar_put_term_char(unsigned long termno, + unsigned long len, + const char *buffer) +{ + unsigned long dummy; + unsigned long *lbuf = (unsigned long *)buffer; /* ToDo: alignment? */ + return plpar_hcall(H_PUT_TERM_CHAR, termno, len, + lbuf[0], lbuf[1], &dummy, &dummy, &dummy); +} + +long plpar_eoi(unsigned long xirr) +{ + return plpar_hcall_norets(H_EOI, xirr); +} + +long plpar_cppr(unsigned long cppr) +{ + return plpar_hcall_norets(H_CPPR, cppr); +} + +long plpar_ipi(unsigned long servernum, + unsigned long mfrr) +{ + return plpar_hcall_norets(H_IPI, servernum, mfrr); +} + +long plpar_xirr(unsigned long *xirr_ret) +{ + unsigned long dummy; + return plpar_hcall(H_XIRR, 0, 0, 0, 0, + xirr_ret, &dummy, &dummy); +} + +/* + * The following section contains code that ultimately should + * be put in the relavent file (htab.c, xics.c, etc). It has + * been put here for the time being in order to ease maintainence + * of the pSeries LPAR code until it can all be put into CVS. + */ +static void hpte_invalidate_pSeriesLP(unsigned long slot) +{ + HPTE old_pte; + unsigned long lpar_rc; + unsigned long flags = 0; + + lpar_rc = plpar_pte_remove(flags, + slot, + 0, + &old_pte.dw0.dword0, + &old_pte.dw1.dword1); + if (lpar_rc != H_Success) BUG(); +} + +/* NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and + * the low 3 bits of flags happen to line up. So no transform is needed. + * We can probably optimize here and assume the high bits of newpp are + * already zero. For now I am paranoid. + */ +static void hpte_updatepp_pSeriesLP(long slot, unsigned long newpp, unsigned long va) +{ + unsigned long lpar_rc; + unsigned long flags; + flags = newpp & 3; + lpar_rc = plpar_pte_protect( flags, + slot, + 0); + if (lpar_rc != H_Success) { + udbg_printf( " bad return code from pte protect rc = %lx \n", lpar_rc); + for (;;); + } +} + +static void hpte_updateboltedpp_pSeriesLP(unsigned long newpp, unsigned long ea) +{ + unsigned long lpar_rc; + unsigned long vsid,va,vpn,flags; + long slot; + + vsid = get_kernel_vsid( ea ); + va = ( vsid << 28 ) | ( ea & 0x0fffffff ); + vpn = va >> PAGE_SHIFT; + + slot = ppc_md.hpte_find( vpn ); + flags = newpp & 3; + lpar_rc = plpar_pte_protect( flags, + slot, + 0); + if (lpar_rc != H_Success) { + udbg_printf( " bad return code from pte bolted protect rc = %lx \n", lpar_rc); + for (;;); + } +} + + +static unsigned long hpte_getword0_pSeriesLP(unsigned long slot) +{ + unsigned long dword0; + unsigned long lpar_rc; + unsigned long dummy_word1; + unsigned long flags; + /* Read 1 pte at a time */ + /* Do not need RPN to logical page translation */ + /* No cross CEC PFT access */ + flags = 0; + + lpar_rc = plpar_pte_read(flags, + slot, + &dword0, &dummy_word1); + if (lpar_rc != H_Success) { + udbg_printf(" error on pte read in get_hpte0 rc = %lx \n", lpar_rc); + for (;;); + } + + return(dword0); +} + +static long hpte_selectslot_pSeriesLP(unsigned long vpn) +{ + unsigned long primary_hash; + unsigned long hpteg_slot; + unsigned i, k; + unsigned long flags; + HPTE pte_read; + unsigned long lpar_rc; + + /* Search the primary group for an available slot */ + primary_hash = hpt_hash(vpn, 0); + + hpteg_slot = ( primary_hash & htab_data.htab_hash_mask ) * HPTES_PER_GROUP; + + /* Read 1 pte at a time */ + /* Do not need RPN to logical page translation */ + /* No cross CEC PFT access */ + flags = 0; + for (i=0; i> 11; + unsigned long arpn = physRpn_to_absRpn( prpn ); + + unsigned long lpar_rc; + unsigned long flags; + HPTE ret_hpte; + + /* Fill in the local HPTE with absolute rpn, avpn and flags */ + lhpte.dw1.d = 0; + lhpte.dw1.h.rpn = arpn; + lhpte.dw1.f.flags = hpteflags; + + lhpte.dw0.d = 0; + lhpte.dw0.h.avpn = avpn; + lhpte.dw0.h.h = hash; + lhpte.dw0.h.bolted = bolted; + lhpte.dw0.h.v = 1; + + /* Now fill in the actual HPTE */ + /* Set CEC cookie to 0 */ + /* Large page = 0 */ + /* Zero page = 0 */ + /* I-cache Invalidate = 0 */ + /* I-cache synchronize = 0 */ + /* Exact = 1 - only modify exact entry */ + flags = H_EXACT; + + if (hpteflags & (_PAGE_GUARDED|_PAGE_NO_CACHE)) + lhpte.dw1.f.flags &= ~_PAGE_COHERENT; +#if 1 + __asm__ __volatile__ ( + H_ENTER_r3 + "mr 4, %1\n" + "mr 5, %2\n" + "mr 6, %3\n" + "mr 7, %4\n" + HSC + "mr %0, 3\n" + : "=r" (lpar_rc) + : "r" (flags), "r" (slot), "r" (lhpte.dw0.d), "r" (lhpte.dw1.d) + : "r3", "r4", "r5", "r6", "r7", "cc"); +#else + lpar_rc = plpar_pte_enter(flags, + slot, + lhpte.dw0.d, + lhpte.dw1.d, + &ret_hpte.dw0.dword0, + &ret_hpte.dw1.dword1); +#endif + if (lpar_rc != H_Success) { + udbg_printf("error on pte enter lapar rc = %ld\n",lpar_rc); + udbg_printf("ent: s=%lx, dw0=%lx, dw1=%lx\n", slot, lhpte.dw0.d, lhpte.dw1.d); + /* xmon_backtrace("backtrace"); */ + for (;;); + } +} + +static long hpte_find_pSeriesLP(unsigned long vpn) +{ + union { + unsigned long d; + Hpte_dword0 h; + } hpte_dw0; + long slot; + unsigned long hash; + unsigned long i,j; + + hash = hpt_hash(vpn, 0); + for ( j=0; j<2; ++j ) { + slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP; + for ( i=0; i> 11 ) ) && + ( hpte_dw0.h.v ) && + ( hpte_dw0.h.h == j ) ) { + /* HPTE matches */ + if ( j ) + slot = -slot; + return slot; + } + ++slot; + } + hash = ~hash; + } + return -1; +} + +/* + * Create a pte - LPAR . Used during initialization only. + * We assume the PTE will fit in the primary PTEG. + */ +void make_pte_LPAR(HPTE *htab, + unsigned long va, unsigned long pa, int mode, + unsigned long hash_mask, int large) +{ + HPTE local_hpte, ret_hpte; + unsigned long hash, slot, flags,lpar_rc, vpn; + + if (large) + vpn = va >> 24; + else + vpn = va >> 12; + + hash = hpt_hash(vpn, large); + + slot = ((hash & hash_mask)*HPTES_PER_GROUP); + + local_hpte.dw1.dword1 = pa | mode; + local_hpte.dw0.dword0 = 0; + local_hpte.dw0.dw0.avpn = va >> 23; + local_hpte.dw0.dw0.bolted = 1; /* bolted */ + local_hpte.dw0.dw0.v = 1; + + /* Set CEC cookie to 0 */ + /* Large page = 0 */ + /* Zero page = 0 */ + /* I-cache Invalidate = 0 */ + /* I-cache synchronize = 0 */ + /* Exact = 0 - modify any entry in group */ + flags = 0; +#if 1 + __asm__ __volatile__ ( + H_ENTER_r3 + "mr 4, %1\n" + "mr 5, %2\n" + "mr 6, %3\n" + "mr 7, %4\n" + HSC + "mr %0, 3\n" + : "=r" (lpar_rc) + : "r" (flags), "r" (slot), "r" (local_hpte.dw0.dword0), "r" (local_hpte.dw1.dword1) + : "r3", "r4", "r5", "r6", "r7", "cc"); +#else + lpar_rc = plpar_pte_enter(flags, + slot, + local_hpte.dw0.dword0, + local_hpte.dw1.dword1, + &ret_hpte.dw0.dword0, + &ret_hpte.dw1.dword1); +#endif +#if 0 /* NOTE: we explicitly do not check return status here because it is + * "normal" for early boot code to map io regions for which a partition + * has no access. However, we will die if we actually fault on these + * "permission denied" pages. + */ + if (lpar_rc != H_Success) { + /* pSeriesLP_init_early(); */ + udbg_printf("flags=%lx, slot=%lx, dword0=%lx, dword1=%lx, rc=%d\n", flags, slot, local_hpte.dw0.dword0,local_hpte.dw1.dword1, lpar_rc); + BUG(); + } +#endif +} + +static void tce_build_pSeriesLP(struct TceTable *tbl, long tcenum, + unsigned long uaddr, int direction ) +{ + u64 setTceRc; + union Tce tce; + + PPCDBG(PPCDBG_TCE, "build_tce: uaddr = 0x%lx\n", uaddr); + PPCDBG(PPCDBG_TCE, "\ttcenum = 0x%lx, tbl = 0x%lx, index=%lx\n", + tcenum, tbl, tbl->index); + + tce.wholeTce = 0; + tce.tceBits.rpn = (virt_to_absolute(uaddr)) >> PAGE_SHIFT; + + tce.tceBits.readWrite = 1; + if ( direction != PCI_DMA_TODEVICE ) tce.tceBits.pciWrite = 1; + + setTceRc = plpar_tce_put((u64)tbl->index, + (u64)tcenum << 12, + tce.wholeTce ); + /* Make sure the update is visible to hardware. + * ToDo: sync after setting *all* the tce's. + */ + __asm__ __volatile__ ("sync" : : : "memory"); + + if(setTceRc) { + PPCDBG(PPCDBG_TCE, "setTce failed. rc=%ld\n", setTceRc); + PPCDBG(PPCDBG_TCE, "\tindex = 0x%lx\n", (u64)tbl->index); + PPCDBG(PPCDBG_TCE, "\ttcenum = 0x%lx\n", (u64)tcenum); + PPCDBG(PPCDBG_TCE, "\ttce val = 0x%lx\n", tce.wholeTce ); + } +} + +static inline void free_tce_range(struct TceTable *tbl, + long tcenum, unsigned order ) +{ + unsigned long flags; + + /* Lock the tce allocation bitmap */ + spin_lock_irqsave( &(tbl->lock), flags ); + + /* Do the actual work */ + free_tce_range_nolock( tbl, tcenum, order ); + + /* Unlock the tce allocation bitmap */ + spin_unlock_irqrestore( &(tbl->lock), flags ); + +} + +static void tce_free_pSeriesLP(struct TceTable *tbl, dma_addr_t dma_addr, + unsigned order, unsigned numPages) +{ + u64 setTceRc; + long tcenum, freeTce, maxTcenum; + unsigned i; + union Tce tce; + + maxTcenum = (tbl->size * (PAGE_SIZE / sizeof(union Tce))) - 1; + + tcenum = dma_addr >> PAGE_SHIFT; + + freeTce = tcenum - tbl->startOffset; + + if ( freeTce > maxTcenum ) { + printk("free_tces: tcenum > maxTcenum\n"); + printk("\ttcenum = 0x%lx\n", tcenum); + printk("\tfreeTce = 0x%lx\n", freeTce); + printk("\tmaxTcenum = 0x%lx\n", maxTcenum); + printk("\tTCE Table = 0x%lx\n", (u64)tbl); + printk("\tbus# = 0x%lx\n", + (u64)tbl->busNumber ); + printk("\tsize = 0x%lx\n", (u64)tbl->size); + printk("\tstartOff = 0x%lx\n", + (u64)tbl->startOffset ); + printk("\tindex = 0x%lx\n", (u64)tbl->index); + return; + } + + for (i=0; iindex, + (u64)tcenum << 12, /* note: not freeTce */ + tce.wholeTce ); + if ( setTceRc ) { + printk("tce_free: setTce failed\n"); + printk("\trc = %ld\n", setTceRc); + printk("\tindex = 0x%lx\n", + (u64)tbl->index); + printk("\ttcenum = 0x%lx\n", (u64)tcenum); + printk("\tfreeTce = 0x%lx\n", (u64)freeTce); + printk("\ttce val = 0x%lx\n", + tce.wholeTce ); + } + + ++tcenum; + } + + /* Make sure the update is visible to hardware. */ + __asm__ __volatile__ ("sync" : : : "memory"); + + free_tce_range( tbl, freeTce, order ); +} + +/* PowerPC Interrupts for lpar. */ +/* NOTE: this typedef is duplicated (for now) from xics.c! */ +typedef struct { + int (*xirr_info_get)(int cpu); + void (*xirr_info_set)(int cpu, int val); + void (*cppr_info)(int cpu, u8 val); + void (*qirr_info)(int cpu, u8 val); +} xics_ops; +static int pSeriesLP_xirr_info_get(int n_cpu) +{ + unsigned long lpar_rc; + unsigned long return_value; + + lpar_rc = plpar_xirr(&return_value); + if (lpar_rc != H_Success) { + panic(" bad return code xirr - rc = %lx \n", lpar_rc); + } + return ((int)(return_value)); +} + +static void pSeriesLP_xirr_info_set(int n_cpu, int value) +{ + unsigned long lpar_rc; + unsigned long val64 = value & 0xffffffff; + + lpar_rc = plpar_eoi(val64); + if (lpar_rc != H_Success) { + panic(" bad return code EOI - rc = %ld, value=%lx \n", lpar_rc, val64); + } +} + +static void pSeriesLP_cppr_info(int n_cpu, u8 value) +{ + unsigned long lpar_rc; + + lpar_rc = plpar_cppr(value); + if (lpar_rc != H_Success) { + panic(" bad return code cppr - rc = %lx \n", lpar_rc); + } +} + +static void pSeriesLP_qirr_info(int n_cpu , u8 value) +{ + unsigned long lpar_rc; + + lpar_rc = plpar_ipi(get_hard_smp_processor_id(n_cpu),value); + if (lpar_rc != H_Success) { + panic(" bad return code qirr -ipi - rc = %lx \n", lpar_rc); + } +} + +xics_ops pSeriesLP_ops = { + pSeriesLP_xirr_info_get, + pSeriesLP_xirr_info_set, + pSeriesLP_cppr_info, + pSeriesLP_qirr_info +}; +/* end TAI-LPAR */ + + +int vtermno; /* virtual terminal# for udbg */ + +static void udbg_putcLP(unsigned char c) +{ + char buf[16]; + unsigned long rc; + + if (c == '\n') + udbg_putcLP('\r'); + + buf[0] = c; + do { + rc = plpar_put_term_char(vtermno, 1, buf); + } while(rc == H_Busy); +} + +/* Buffered chars getc */ +static long inbuflen; +static long inbuf[2]; /* must be 2 longs */ + +static int udbg_getc_pollLP(void) +{ + /* The interface is tricky because it may return up to 16 chars. + * We save them statically for future calls to udbg_getc(). + */ + char ch, *buf = (char *)inbuf; + int i; + long rc; + if (inbuflen == 0) { + /* get some more chars. */ + inbuflen = 0; + rc = plpar_get_term_char(vtermno, &inbuflen, buf); + if (inbuflen == 0 && rc == H_Success) + return -1; + } + ch = buf[0]; + for (i = 1; i < inbuflen; i++) /* shuffle them down. */ + buf[i-1] = buf[i]; + inbuflen--; + return ch; +} + +static unsigned char udbg_getcLP(void) +{ + int ch; + for (;;) { + ch = udbg_getc_pollLP(); + if (ch == -1) { + /* This shouldn't be needed...but... */ + volatile unsigned long delay; + for (delay=0; delay < 2000000; delay++) + ; + } else { + return ch; + } + } +} + + +/* This is called early in setup.c. + * Use it to setup page table ppc_md stuff as well as udbg. + */ +void pSeriesLP_init_early(void) +{ + ppc_md.hpte_invalidate = hpte_invalidate_pSeriesLP; + ppc_md.hpte_updatepp = hpte_updatepp_pSeriesLP; + ppc_md.hpte_updateboltedpp = hpte_updateboltedpp_pSeriesLP; + ppc_md.hpte_getword0 = hpte_getword0_pSeriesLP; + ppc_md.hpte_selectslot = hpte_selectslot_pSeriesLP; + ppc_md.hpte_create_valid = hpte_create_valid_pSeriesLP; + ppc_md.hpte_find = hpte_find_pSeriesLP; + + ppc_md.tce_build = tce_build_pSeriesLP; + ppc_md.tce_free = tce_free_pSeriesLP; + +#ifdef CONFIG_SMP + smp_init_pSeries(); +#endif + pSeries_pcibios_init_early(); + + /* The keyboard is not useful in the LPAR environment. + * Leave all the interfaces NULL. + */ + + if (naca->serialPortAddr) { + void *comport = (void *)__ioremap(naca->serialPortAddr, 16, _PAGE_NO_CACHE); + udbg_init_uart(comport); + ppc_md.udbg_putc = udbg_putc; + ppc_md.udbg_getc = udbg_getc; + ppc_md.udbg_getc_poll = udbg_getc_poll; + } else { + /* lookup the first virtual terminal number in case we don't have a com port. + * Zero is probably correct in case someone calls udbg before the init. + * The property is a pair of numbers. The first is the starting termno (the + * one we use) and the second is the number of terminals. + */ + u32 *termno; + struct device_node *np = find_path_device("/rtas"); + if (np) { + termno = (u32 *)get_property(np, "ibm,termno", 0); + if (termno) + vtermno = termno[0]; + } + ppc_md.udbg_putc = udbg_putcLP; + ppc_md.udbg_getc = udbg_getcLP; + ppc_md.udbg_getc_poll = udbg_getc_pollLP; + } +} + +/* Confidential code for hvc_console. Should move it back eventually. */ + +int hvc_get_chars(int index, char *buf, int count) +{ + unsigned long got; + + if (plpar_hcall(H_GET_TERM_CHAR, index, 0, 0, 0, + &got, buf, buf+8) == H_Success) { + /* + * Work around a HV bug where it gives us a null + * after every \r. -- paulus + */ + if (got > 0) { + int i; + for (i = 1; i < got; ++i) { + if (buf[i] == 0 && buf[i-1] == '\r') { + --got; + if (i < got) + memmove(&buf[i], &buf[i+1], + got - i); + } + } + } + return got; + } + return 0; +} + +int hvc_put_chars(int index, const char *buf, int count) +{ + unsigned long dummy; + unsigned long *lbuf = (unsigned long *) buf; + long ret; + + ret = plpar_hcall(H_PUT_TERM_CHAR, index, count, lbuf[0], lbuf[1], + &dummy, &dummy, &dummy); + if (ret == H_Success) + return count; + if (ret == H_Busy) + return 0; + return -1; +} + +int hvc_count(int *start_termno) +{ + u32 *termno; + struct device_node *dn; + + if ((dn = find_path_device("/rtas")) != NULL) { + if ((termno = (u32 *)get_property(dn, "ibm,termno", 0)) != NULL) { + if (start_termno) + *start_termno = termno[0]; + return termno[1]; + } + } + return 0; +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/pSeries_pci.c linuxppc64_2_4/arch/ppc64/kernel/pSeries_pci.c --- ../kernel.org/linux/arch/ppc64/kernel/pSeries_pci.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/pSeries_pci.c Mon Dec 3 14:59:09 2001 @@ -0,0 +1,792 @@ +/* + * pSeries_pci.c + * + * pSeries_pcibios_init(void)opyright (C) 2001 Dave Engebretsen, IBM Corporation + * + * pSeries specific routines for PCI. + * + * Based on code from pci.c and chrp_pci.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_PPC_EEH +#include +#endif + +#include "xics.h" +#include "open_pic.h" +#include "pci.h" + +extern struct device_node *allnodes; + +/******************************************************************* + * Forward declares of prototypes. + *******************************************************************/ +unsigned long find_and_init_phbs(void); +struct pci_controller* alloc_phb(struct device_node *dev, char *model, unsigned int addr_size_words) ; +void pSeries_pcibios_fixup(void); +static int rtas_fake_read(struct device_node *dn, int offset, int nbytes, unsigned long *returnval); + +/* RTAS tokens */ +static int read_pci_config; +static int write_pci_config; +static int ibm_read_pci_config; +static int ibm_write_pci_config; + +/****************************************************************************** + * + * pSeries I/O Operations to access the PCI configuration space. + * + *****************************************************************************/ +#define RTAS_PCI_READ_OP(size, type, nbytes) \ +int __chrp \ +rtas_read_config_##size(struct device_node *dn, int offset, type val) { \ + unsigned long returnval = ~0L; \ + unsigned long buid; \ + unsigned int addr; \ + int ret; \ + \ + if (dn == NULL) { \ + ret = -2; \ + } else if (dn->status) { \ + ret = -1; \ + } else { \ + addr = (dn->busno << 16) | (dn->devfn << 8) | offset; \ + buid = dn->phb->buid; \ + if (buid) { \ + ret = rtas_call(ibm_read_pci_config, 4, 2, &returnval, addr, buid >> 32, buid & 0xffffffff, nbytes); \ + if (ret < 0) \ + ret = rtas_fake_read(dn, offset, nbytes, &returnval); \ + } else { \ + ret = rtas_call(read_pci_config, 2, 2, &returnval, addr, nbytes); \ + } \ + } \ + *val = returnval; \ + return ret; \ +} \ +int __chrp \ +rtas_pci_read_config_##size(struct pci_dev *dev, int offset, type val) { \ + struct device_node *dn = pci_device_to_OF_node(dev); \ + int ret = rtas_read_config_##size(dn, offset, val); \ + /* udbg_printf("read bus=%x, devfn=%x, ret=%d phb=%lx, dn=%lx\n", dev->bus->number, dev->devfn, ret, dn ? dn->phb : 0, dn); */ \ + return ret ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL; \ +} + +#define RTAS_PCI_WRITE_OP(size, type, nbytes) \ +int __chrp \ +rtas_write_config_##size(struct device_node *dn, int offset, type val) { \ + unsigned long buid; \ + unsigned int addr; \ + int ret; \ + \ + if (dn == NULL) { \ + ret = -2; \ + } else if (dn->status) { \ + ret = -1; \ + } else { \ + buid = dn->phb->buid; \ + addr = (dn->busno << 16) | (dn->devfn << 8) | offset; \ + if (buid) { \ + ret = rtas_call(ibm_write_pci_config, 5, 1, NULL, addr, buid >> 32, buid & 0xffffffff, nbytes, (ulong) val); \ + } else { \ + ret = rtas_call(write_pci_config, 3, 1, NULL, addr, nbytes, (ulong)val); \ + } \ + } \ + return ret; \ +} \ +int __chrp \ +rtas_pci_write_config_##size(struct pci_dev *dev, int offset, type val) { \ + return rtas_write_config_##size(pci_device_to_OF_node(dev), offset, val); \ +} + +RTAS_PCI_READ_OP(byte, u8 *, 1) +RTAS_PCI_READ_OP(word, u16 *, 2) +RTAS_PCI_READ_OP(dword, u32 *, 4) +RTAS_PCI_WRITE_OP(byte, u8, 1) +RTAS_PCI_WRITE_OP(word, u16, 2) +RTAS_PCI_WRITE_OP(dword, u32, 4) + +struct pci_ops rtas_pci_ops = { + rtas_pci_read_config_byte, + rtas_pci_read_config_word, + rtas_pci_read_config_dword, + rtas_pci_write_config_byte, + rtas_pci_write_config_word, + rtas_pci_write_config_dword, +}; + +/* + * Handle the case where rtas refuses to do a pci config read. + * This currently only happens with some PHBs in which case we totally fake + * out the values (and call it a speedwagaon -- something we could look up + * in the device tree). + */ +static int +rtas_fake_read(struct device_node *dn, int offset, int nbytes, unsigned long *returnval) +{ + char *device_type = (char *)get_property(dn, "device_type", 0); + u32 *class_code = (u32 *)get_property(dn, "class-code", 0); + + *returnval = ~0; /* float by default */ + + /* udbg_printf("rtas_fake_read dn=%p, offset=0x%02x, nbytes=%d, device_type=%s\n", dn, offset, nbytes, device_type ? device_type : ""); */ + if (device_type && strcmp(device_type, "pci") != 0) + return -3; /* Not a phb or bridge */ + + /* NOTE: class_code != NULL => EADS pci bridge. Else a PHB */ + if (nbytes == 1) { + if (offset == PCI_HEADER_TYPE) + *returnval = 0x80; /* multifunction */ + else if (offset == PCI_INTERRUPT_PIN || offset == PCI_INTERRUPT_LINE) + *returnval = 0; + } else if (nbytes == 2) { + if (offset == PCI_SUBSYSTEM_VENDOR_ID || offset == PCI_SUBSYSTEM_ID) + *returnval = 0; + else if (offset == PCI_COMMAND) + *returnval = PCI_COMMAND_PARITY|PCI_COMMAND_MASTER|PCI_COMMAND_MEMORY; + } else if (nbytes == 4) { + if (offset == PCI_VENDOR_ID) + *returnval = 0x1014 | ((class_code ? 0x8b : 0x102) << 16); /* a phb */ + else if (offset == PCI_REVISION_ID) + *returnval = (class_code ? PCI_CLASS_BRIDGE_PCI : PCI_CLASS_BRIDGE_HOST) << 16; /* revs are zero */ + else if ((offset >= PCI_BASE_ADDRESS_0 && offset <= PCI_BASE_ADDRESS_5) || offset == PCI_ROM_ADDRESS) + *returnval = 0; + } + + /* printk("fake: %s nbytes=%d, offset=%lx ret=%lx\n", class_code ? "EADS" : "PHB", nbytes, offset, *returnval); */ + return 0; +} + +/****************************************************************** + * pci_read_irq_line + * + * Reads the Interrupt Pin to determine if interrupt is use by card. + * If the interrupt is used, then gets the interrupt line from the + * openfirmware and sets it in the pci_dev and pci_config line. + * + ******************************************************************/ +int +pci_read_irq_line(struct pci_dev *Pci_Dev) +{ + u8 InterruptPin; + struct device_node *Node; + + pci_read_config_byte(Pci_Dev, PCI_INTERRUPT_PIN, &InterruptPin); + if (InterruptPin == 0) { + PPCDBG(PPCDBG_BUSWALK,"\tDevice: %s No Interrupt used by device.\n",Pci_Dev->slot_name); + return 0; + } + Node = pci_device_to_OF_node(Pci_Dev); + if ( Node == NULL) { + PPCDBG(PPCDBG_BUSWALK,"\tDevice: %s Device Node not found.\n",Pci_Dev->slot_name); + return -1; + } + if (Node->n_intrs == 0) { + PPCDBG(PPCDBG_BUSWALK,"\tDevice: %s No Device OF interrupts defined.\n",Pci_Dev->slot_name); + return -1; + } + Pci_Dev->irq = Node->intrs[0].line; + pci_write_config_byte(Pci_Dev, PCI_INTERRUPT_LINE, Pci_Dev->irq); + + PPCDBG(PPCDBG_BUSWALK,"\tDevice: %s pci_dev->irq = 0x%02X\n",Pci_Dev->slot_name,Pci_Dev->irq); + return 0; +} + +/****************************************************************** + * Find all PHBs in the system and initialize a set of data + * structures to represent them. + ******************************************************************/ +unsigned long __init +find_and_init_phbs(void) +{ + struct device_node *Pci_Node; + struct pci_controller *phb; + unsigned int root_addr_size_words = 0, this_addr_size_words = 0; + unsigned int this_addr_count = 0, range_stride; + unsigned int *ui_ptr = NULL, *ranges; + char *model; + struct pci_range64 range; + struct resource *res; + unsigned int memno, rlen, i, index; + unsigned int *opprop; + int has_isa = 0; + PPCDBG(PPCDBG_PHBINIT, "find_and_init_phbs\n"); + + read_pci_config = rtas_token("read-pci-config"); + write_pci_config = rtas_token("write-pci-config"); + ibm_read_pci_config = rtas_token("ibm,read-pci-config"); + ibm_write_pci_config = rtas_token("ibm,write-pci-config"); +#ifdef CONFIG_PPC_EEH + eeh_init(); +#endif + + if (naca->interrupt_controller == IC_OPEN_PIC) { + opprop = (unsigned int *)get_property(find_path_device("/"), + "platform-open-pic", NULL); + } + + /* Get the root address word size. */ + ui_ptr = (unsigned int *) get_property(find_path_device("/"), + "#size-cells", NULL); + if (ui_ptr) { + root_addr_size_words = *ui_ptr; + } else { + PPCDBG(PPCDBG_PHBINIT, "\tget #size-cells failed.\n"); + return(-1); + } + + if (find_type_devices("isa")) { + has_isa = 1; + PPCDBG(PPCDBG_PHBINIT, "\tFound an ISA bus.\n"); + } + + index = 0; + + /****************************************************************** + * Find all PHB devices and create an object for them. + ******************************************************************/ + for (Pci_Node = find_devices("pci"); Pci_Node != NULL; Pci_Node = Pci_Node->next) { + model = (char *) get_property(Pci_Node, "model", NULL); + if (model != NULL) { + phb = alloc_phb(Pci_Node, model, root_addr_size_words); + if (phb == NULL) return(-1); + } + else { + continue; + } + + /* Get this node's address word size. */ + ui_ptr = (unsigned int *) get_property(Pci_Node, "#size-cells", NULL); + if (ui_ptr) + this_addr_size_words = *ui_ptr; + else + this_addr_size_words = 1; + /* Get this node's address word count. */ + ui_ptr = (unsigned int *) get_property(Pci_Node, "#address-cells", NULL); + if (ui_ptr) + this_addr_count = *ui_ptr; + else + this_addr_count = 3; + + range_stride = this_addr_count + root_addr_size_words + this_addr_size_words; + + memno = 0; + phb->io_base_phys = 0; + + ranges = (unsigned int *) get_property(Pci_Node, "ranges", &rlen); + PPCDBG(PPCDBG_PHBINIT, "\trange_stride = 0x%lx, rlen = 0x%x\n", range_stride, rlen); + + for (i = 0; i < (rlen/sizeof(*ranges)); i+=range_stride) { + /* Put the PCI addr part of the current element into a + * '64' struct. + */ + range = *((struct pci_range64 *)(ranges + i)); + + /* If this is a '32' element, map into a 64 struct. */ + if ((range_stride * sizeof(int)) == + sizeof(struct pci_range32)) { + range.parent_addr = + (unsigned long)(*(ranges + i + 3)); + range.size = + (((unsigned long)(*(ranges + i + 4)))<<32) | + (*(ranges + i + 5)); + } else { + range.parent_addr = + (((unsigned long)(*(ranges + i + 3)))<<32) | + (*(ranges + i + 4)); + range.size = + (((unsigned long)(*(ranges + i + 5)))<<32) | + (*(ranges + i + 6)); + } + + PPCDBG(PPCDBG_PHBINIT, "\trange.parent_addr = 0x%lx\n", + range.parent_addr); + PPCDBG(PPCDBG_PHBINIT, "\trange.child_addr.hi = 0x%lx\n", + range.child_addr.a_hi); + PPCDBG(PPCDBG_PHBINIT, "\trange.child_addr.mid = 0x%lx\n", + range.child_addr.a_mid); + PPCDBG(PPCDBG_PHBINIT, "\trange.child_addr.lo = 0x%lx\n", + range.child_addr.a_lo); + PPCDBG(PPCDBG_PHBINIT, "\trange.size = 0x%lx\n", + range.size); + + res = NULL; + switch ((range.child_addr.a_hi >> 24) & 0x3) { + case 1: /* I/O space */ + PPCDBG(PPCDBG_PHBINIT, "\tIO Space\n"); + phb->io_base_phys = range.parent_addr; + res = &phb->io_resource; + res->name = Pci_Node->full_name; + res->flags = IORESOURCE_IO; +#ifdef CONFIG_PPC_EEH + if (!isa_io_base && has_isa) { + /* map a page for ISA ports. Not EEH protected. */ + isa_io_base = (unsigned long)__ioremap(phb->io_base_phys, PAGE_SIZE, _PAGE_NO_CACHE); + } + res->start = phb->io_base_virt = eeh_token(index, 0, 0, 0); + res->end = eeh_token(index, 0xff, 0xff, 0xffffffff); +#else + phb->io_base_virt = ioremap(phb->io_base_phys, range.size); + if (!pci_io_base) { + pci_io_base = (unsigned long)phb->io_base_virt; + if (has_isa) + isa_io_base = pci_io_base; + } + res->start = ((((unsigned long) range.child_addr.a_mid) << 32) | (range.child_addr.a_lo)); + res->start += (unsigned long)phb->io_base_virt; + res->end = res->start + range.size - 1; +#endif + res->parent = NULL; + res->sibling = NULL; + res->child = NULL; + phb->pci_io_offset = range.parent_addr - + ((((unsigned long) + range.child_addr.a_mid) << 32) | + (range.child_addr.a_lo)); + PPCDBG(PPCDBG_PHBINIT, "\tpci_io_offset = 0x%lx\n", + phb->pci_io_offset); + break; + case 2: /* mem space */ + PPCDBG(PPCDBG_PHBINIT, "\tMem Space\n"); + phb->pci_mem_offset = range.parent_addr - + ((((unsigned long) + range.child_addr.a_mid) << 32) | + (range.child_addr.a_lo)); + PPCDBG(PPCDBG_PHBINIT, "\tpci_mem_offset = 0x%lx\n", + phb->pci_mem_offset); + if (memno < sizeof(phb->mem_resources)/sizeof(phb->mem_resources[0])) { + res = &(phb->mem_resources[memno]); + ++memno; + res->name = Pci_Node->full_name; + res->flags = IORESOURCE_MEM; +#ifdef CONFIG_PPC_EEH + res->start = eeh_token(index, 0, 0, 0); + res->end = eeh_token(index, 0xff, 0xff, 0xffffffff); +#else + res->start = range.parent_addr; + res->end = range.parent_addr + range.size - 1; +#endif + res->parent = NULL; + res->sibling = NULL; + res->child = NULL; + } + break; + } + } + PPCDBG(PPCDBG_PHBINIT, "\tphb->io_base_phys = 0x%lx\n", + phb->io_base_phys); + PPCDBG(PPCDBG_PHBINIT, "\tphb->pci_mem_offset = 0x%lx\n", + phb->pci_mem_offset); + + if (naca->interrupt_controller == IC_OPEN_PIC) { + int addr = root_addr_size_words * (index + 2) - 1; + openpic_setup_ISU(index, opprop[addr]); + } + index++; + } + pci_devs_phb_init(); + return 0; /*Success */ +} + +/****************************************************************** + * + * Allocate and partially initialize a structure to represent a PHB. + * + ******************************************************************/ +struct pci_controller * +alloc_phb(struct device_node *dev, char *model, unsigned int addr_size_words) +{ + struct pci_controller *phb; + unsigned int *ui_ptr = NULL, len; + struct reg_property64 reg_struct; + int *bus_range; + int *buid_vals; + + PPCDBG(PPCDBG_PHBINIT, "alloc_phb: %s\n", dev->full_name); + PPCDBG(PPCDBG_PHBINIT, "\tdev = 0x%lx\n", dev); + PPCDBG(PPCDBG_PHBINIT, "\tmodel = 0x%lx\n", model); + PPCDBG(PPCDBG_PHBINIT, "\taddr_size_words = 0x%lx\n", addr_size_words); + + /* Found a PHB, now figure out where his registers are mapped. */ + ui_ptr = (unsigned int *) get_property(dev, "reg", &len); + if (ui_ptr == NULL) { + PPCDBG(PPCDBG_PHBINIT, "\tget reg failed.\n"); + return(NULL); + } + + if (addr_size_words == 1) { + reg_struct.address = ((struct reg_property32 *)ui_ptr)->address; + reg_struct.size = ((struct reg_property32 *)ui_ptr)->size; + } else { + reg_struct = *((struct reg_property64 *)ui_ptr); + } + + PPCDBG(PPCDBG_PHBINIT, "\treg_struct.address = 0x%lx\n", reg_struct.address); + PPCDBG(PPCDBG_PHBINIT, "\treg_struct.size = 0x%lx\n", reg_struct.size); + + /*************************************************************** + * Set chip specific data in the phb, including types & + * register pointers. + ***************************************************************/ + + /**************************************************************** + * Python + ***************************************************************/ + if (strstr(model, "Python")) { + PPCDBG(PPCDBG_PHBINIT, "\tCreate python\n"); + phb = pci_alloc_pci_controller("PHB PY",phb_type_python); + if (phb == NULL) return NULL; + + phb->cfg_addr = (volatile unsigned long *) + ioremap(reg_struct.address + 0xf8000, PAGE_SIZE); + PPCDBG(PPCDBG_PHBINIT, "\tcfg_addr_r = 0x%lx\n", + reg_struct.address + 0xf8000); + PPCDBG(PPCDBG_PHBINIT, "\tcfg_addr_v = 0x%lx\n", + phb->cfg_addr); + phb->cfg_data = (char*)(phb->cfg_addr + 0x02); + phb->phb_regs = (volatile unsigned long *) + ioremap(reg_struct.address + 0xf7000, PAGE_SIZE); + /* Python's register file is 1 MB in size. */ + phb->chip_regs = ioremap(reg_struct.address & ~(0xfffffUL), + 0x100000); + /*************************************************************** + * Speedwagon + ***************************************************************/ + } else if (strstr(model, "Speedwagon")) { + PPCDBG(PPCDBG_PHBINIT, "\tCreate speedwagon\n"); + phb = pci_alloc_pci_controller("PHB SW",phb_type_speedwagon); + if (phb == NULL) return NULL; + + if (_machine == _MACH_pSeries) { + phb->cfg_addr = (volatile unsigned long *) + ioremap(reg_struct.address + 0x140, PAGE_SIZE); + phb->cfg_data = (char*)(phb->cfg_addr - 0x02); /* minus is correct */ + phb->phb_regs = (volatile unsigned long *) + ioremap(reg_struct.address, PAGE_SIZE); + /* Speedwagon's register file is 1 MB in size. */ + phb->chip_regs = ioremap(reg_struct.address & ~(0xfffffUL), + 0x100000); + PPCDBG(PPCDBG_PHBINIT, "\tmapping chip_regs from 0x%lx -> 0x%lx\n", + reg_struct.address & 0xfffff, phb->chip_regs); + } else { + phb->cfg_addr = NULL; + phb->cfg_data = NULL; + phb->phb_regs = NULL; + phb->chip_regs = NULL; + } + + phb->local_number = ((reg_struct.address >> 12) & 0xf) - 0x8; + /*************************************************************** + * Trying to build a known just gets the code in trouble. + ***************************************************************/ + } else { + PPCDBG(PPCDBG_PHBINIT, "\tUnknown PHB Type!\n"); + printk("PCI: Unknown Phb Type!\n"); + return NULL; + } + + bus_range = (int *) get_property(dev, "bus-range", &len); + if (bus_range == NULL || len < 2 * sizeof(int)) { + PPCDBG(PPCDBG_PHBINIT, "Can't get bus-range for %s\n", dev->full_name); + kfree(phb); + return(NULL); + } + + /*************************************************************** + * Finished with the initialization + ***************************************************************/ + phb->first_busno = bus_range[0]; + phb->last_busno = bus_range[1]; + + phb->arch_data = dev; + phb->ops = &rtas_pci_ops; + + buid_vals = (int *) get_property(dev, "ibm,fw-phb-id", &len); + if (buid_vals == NULL || len < 2 * sizeof(int)) { + phb->buid = 0; + } else { + /* Big bus system. These systems start new bus numbers under + * each phb. Until pci domains are standard, we depend on a + * patch which makes bus numbers ints and we shift the phb + * number into the upper bits. + */ + struct pci_bus check; + if (sizeof(check.number) == 1 || sizeof(check.primary) == 1 || + sizeof(check.secondary) == 1 || sizeof(check.subordinate) == 1) { + udbg_printf("pSeries_pci: this system has large bus numbers and the kernel was not\n" + "built with the patch that fixes include/linux/pci.h struct pci_bus so\n" + "number, primary, secondary and subordinate are ints.\n"); + panic("pSeries_pci: this system has large bus numbers and the kernel was not\n" + "built with the patch that fixes include/linux/pci.h struct pci_bus so\n" + "number, primary, secondary and subordinate are ints.\n"); + } + phb->buid = (((unsigned long)buid_vals[0]) << 32UL) | + (((unsigned long)buid_vals[1]) & 0xffffffff); + phb->first_busno += (phb->global_number << 8); + phb->last_busno += (phb->global_number << 8); + } + + /* Dump PHB information for Debug */ + PPCDBGCALL(PPCDBG_PHBINIT,dumpPci_Controller(phb) ); + + return phb; +} + +void +fixup_resources(struct pci_dev *dev) +{ + int i; + struct pci_controller *phb = PCI_GET_PHB_PTR(dev); +#ifdef CONFIG_PPC_EEH + struct device_node *dn; + unsigned long eeh_disable_bit; + + /* Add IBM loc code (slot) as a prefix to the device names for service */ + dn = pci_device_to_OF_node(dev); + if (dn) { + char *loc_code = get_property(dn, "ibm,loc-code", 0); + if (loc_code) { + int loc_len = strlen(loc_code); + if (loc_len < sizeof(dev->name)) { + memmove(dev->name+loc_len+1, dev->name, sizeof(dev->name)-loc_len-1); + memcpy(dev->name, loc_code, loc_len); + dev->name[loc_len] = ' '; + dev->name[sizeof(dev->name)-1] = '\0'; + } + } + } + + if (is_eeh_configured(dev)) { + eeh_disable_bit = 0; + printk("PCI: eeh configured for %s %s\n", dev->slot_name, dev->name); + if (eeh_set_option(dev, EEH_ENABLE) != 0) { + printk("PCI: failed to enable eeh for %s %s\n", dev->slot_name, dev->name); + eeh_disable_bit = EEH_TOKEN_DISABLED; + } + } else { + /* Assume device is by default EEH_DISABLE'd */ + printk("PCI: eeh NOT configured for %s %s\n", dev->slot_name, dev->name); + eeh_disable_bit = EEH_TOKEN_DISABLED; + } +#endif + + PPCDBG(PPCDBG_PHBINIT, "fixup_resources:\n"); + PPCDBG(PPCDBG_PHBINIT, "\tphb = 0x%016LX\n", phb); + PPCDBG(PPCDBG_PHBINIT, "\tphb->pci_io_offset = 0x%016LX\n", phb->pci_io_offset); + PPCDBG(PPCDBG_PHBINIT, "\tphb->pci_mem_offset = 0x%016LX\n", phb->pci_mem_offset); + + PPCDBG(PPCDBG_PHBINIT, "\tdev->name = %s\n", dev->name); + PPCDBG(PPCDBG_PHBINIT, "\tdev->vendor:device = 0x%04X : 0x%04X\n", dev->vendor, dev->device); + + if (phb == NULL) + return; + + for (i = 0; i < DEVICE_COUNT_RESOURCE; ++i) { + PPCDBG(PPCDBG_PHBINIT, "\tdevice %x.%x[%d] (flags %x) [%lx..%lx]\n", + dev->bus->number, dev->devfn, i, + dev->resource[i].flags, + dev->resource[i].start, + dev->resource[i].end); + + if ((dev->resource[i].start == 0) && (dev->resource[i].end == 0)) { + continue; + } + + if (dev->resource[i].flags & IORESOURCE_IO) { +#ifdef CONFIG_PPC_EEH + unsigned int busno = dev->bus ? dev->bus->number : 0; + unsigned long size = dev->resource[i].end - dev->resource[i].start; + unsigned long addr = (unsigned long)__ioremap(dev->resource[i].start + phb->io_base_phys, size, _PAGE_NO_CACHE); + if (!addr) + panic("fixup_resources: ioremap failed!\n"); + dev->resource[i].start = eeh_token(phb->global_number, busno, dev->devfn, addr) | eeh_disable_bit; + dev->resource[i].end = dev->resource[i].start + size; +#else + unsigned long offset = (unsigned long)phb->io_base_virt; + dev->resource[i].start += offset; + dev->resource[i].end += offset; +#endif + PPCDBG(PPCDBG_PHBINIT, "\t\t-> now [%lx .. %lx]\n", + dev->resource[i].start, dev->resource[i].end); + } else if (dev->resource[i].flags & IORESOURCE_MEM) { + if (dev->resource[i].start == 0) { + /* Bogus. Probably an unused bridge. */ + dev->resource[i].end = 0; + } else { +#ifdef CONFIG_PPC_EEH + unsigned int busno = dev->bus ? dev->bus->number : 0; + unsigned long size = dev->resource[i].end - dev->resource[i].start; + unsigned long addr = (unsigned long)__ioremap(dev->resource[i].start + phb->pci_mem_offset, size, _PAGE_NO_CACHE); + if (!addr) + panic("fixup_resources: ioremap failed!\n"); + dev->resource[i].start = eeh_token(phb->global_number, busno, dev->devfn, addr) | eeh_disable_bit; + dev->resource[i].end = dev->resource[i].start + size; +#else + dev->resource[i].start += phb->pci_mem_offset; + dev->resource[i].end += phb->pci_mem_offset; +#endif + } + PPCDBG(PPCDBG_PHBINIT, "\t\t-> now [%lx..%lx]\n", + dev->resource[i].start, dev->resource[i].end); + + } else { + continue; + } + + /* zap the 2nd function of the winbond chip */ + if (dev->resource[i].flags & IORESOURCE_IO + && dev->bus->number == 0 && dev->devfn == 0x81) + dev->resource[i].flags &= ~IORESOURCE_IO; + } +} + +void __init +pSeries_pcibios_fixup(void) +{ + struct pci_dev *dev; + + PPCDBG(PPCDBG_PHBINIT, "pSeries_pcibios_fixup: start\n"); + pci_assign_all_busses = 0; + + pci_for_each_dev(dev) { + pci_read_irq_line(dev); + PPCDBGCALL(PPCDBG_PHBINIT, dumpPci_Dev(dev) ); + } + + if (naca->interrupt_controller == IC_PPC_XIC) { + xics_isa_init(); + } +} + +/*********************************************************************** + * pci_find_hose_for_OF_device + * + * This function finds the PHB that matching device_node in the + * OpenFirmware by scanning all the pci_controllers. + * + ***********************************************************************/ +struct pci_controller* +pci_find_hose_for_OF_device(struct device_node *node) +{ + while (node) { + struct pci_controller *hose; + for (hose=hose_head;hose;hose=hose->next) + if (hose->arch_data == node) + return hose; + node=node->parent; + } + return NULL; +} + +/*********************************************************************** + * find_floppy(void) + * + * Finds the default floppy device, if the system has one, and returns + * the pci_dev for the isa bridge for the floppy device. + * + * Note: This functions finds the first "fdc" device and then looks to + * the parent device which should be the isa bridge device. If there + * is more than one floppy on the system, it will find the first one + * and maybe that is okay. + ***********************************************************************/ +struct pci_dev* +find_floppy(void) +{ + struct device_node *floppy_dn; + struct pci_dev *floppy_dev = NULL; + int *reg; + + floppy_dn = find_type_devices("fdc"); + if (floppy_dn && floppy_dn->parent) { + if ((reg = (unsigned int *)get_property(floppy_dn->parent,"reg", 0)) != NULL) + floppy_dev = pci_find_slot((reg[0] & 0x00ff0000) >> 16, (reg[0] & 0x0000ff00) >> 8); + } + PPCDBG(PPCDBG_BUSWALK,"\tFloppy pci_dev\n"); + PPCDBGCALL(PPCDBG_BUSWALK, dumpPci_Dev(floppy_dev) ); + return floppy_dev; +} + +/*********************************************************************** + * ppc64_pcibios_init + * + * Chance to initialize and structures or variable before PCI Bus walk. + * + ***********************************************************************/ +void +pSeries_pcibios_init(void) +{ + PPCDBG(PPCDBG_PHBINIT, "\tppc64_pcibios_init Entry.\n"); + + if (get_property(find_path_device("/rtas"),"ibm,fw-phb-id",NULL) != NULL) { + PPCDBG(PPCDBG_PHBINIT, "\tFound: ibm,fw-phb-id\n"); + Pci_Large_Bus_System = 1; + } +} + +/* + * This is called very early before the page table is setup. + */ +void +pSeries_pcibios_init_early(void) +{ + ppc_md.pcibios_read_config_byte = rtas_read_config_byte; + ppc_md.pcibios_read_config_word = rtas_read_config_word; + ppc_md.pcibios_read_config_dword = rtas_read_config_dword; + ppc_md.pcibios_write_config_byte = rtas_write_config_byte; + ppc_md.pcibios_write_config_word = rtas_write_config_word; + ppc_md.pcibios_write_config_dword = rtas_write_config_dword; +} +/************************************************************************/ +/* Get a char* of the device physical location(U0.3-P1-I8) */ +/* See the Product Topology in the RS/6000 Architecture. */ +/************************************************************************/ +int device_Location(struct pci_dev *PciDev, char *BufPtr) +{ + struct device_node *DevNode = (struct device_node *)PciDev->sysdata; + return sprintf(BufPtr,"PCI: Bus%3d, Device%3d, Vendor %04X, Location %-12s", + PciDev->bus->number, + PCI_SLOT(PciDev->devfn), + PciDev->vendor, + (char*)get_property(DevNode,"ibm,loc-code",0)); +} +/************************************************************************/ +/* Set the slot reset line to the state passed in. */ +/* This is the platform specific for code for the pci_reset_device */ +/* function. */ +/************************************************************************/ +int pci_set_reset(struct pci_dev *PciDev, int state) +{ + return -1; +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/pacaData.c linuxppc64_2_4/arch/ppc64/kernel/pacaData.c --- ../kernel.org/linux/arch/ppc64/kernel/pacaData.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/pacaData.c Wed Oct 17 12:24:42 2001 @@ -0,0 +1,118 @@ +/* + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define __KERNEL__ 1 +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +/* The Paca is an array with one entry per processor. Each contains an + * ItLpPaca, which contains the information shared between the + * hypervisor and Linux. Each also contains an ItLpRegSave area which + * is used by the hypervisor to save registers. + * On systems with hardware multi-threading, there are two threads + * per processor. The Paca array must contain an entry for each thread. + * The VPD Areas will give a max logical processors = 2 * max physical + * processors. The processor VPD array needs one entry per physical + * processor (not thread). + */ +#define PACAINITDATA(number,start,lpq,asrr,asrv) \ +{ \ + xLpPacaPtr: &xPaca[number].xLpPaca, \ + xLpRegSavePtr: &xPaca[number].xRegSav, \ + xPacaIndex: (number), /* Paca Index */ \ + default_decr: 0x00ff0000, /* Initial Decr */ \ + xStab_data: { \ + real: (asrr), /* Real pointer to segment table */ \ + virt: (asrv), /* Virt pointer to segment table */ \ + next_round_robin: 1 /* Round robin index */ \ + }, \ + lpQueuePtr: (lpq), /* &xItLpQueue, */ \ + xRtas: { \ + lock: SPIN_LOCK_UNLOCKED \ + }, \ + xProcStart: (start), /* Processor start */ \ + xLpPaca: { \ + xDesc: 0xd397d781, /* "LpPa" */ \ + xSize: sizeof(struct ItLpPaca), \ + xFPRegsInUse: 1, \ + xDynProcStatus: 2, \ + xDecrVal: 0x00ff0000, \ + xEndOfQuantum: 0xffffffffffffffff \ + }, \ + xRegSav: { \ + xDesc: 0xd397d9e2, /* "LpRS" */ \ + xSize: sizeof(struct ItLpRegSave) \ + }, \ + exception_sp: \ + (&xPaca[number].exception_stack[0]) - EXC_FRAME_SIZE, \ +} + +struct Paca xPaca[maxPacas] __page_aligned = { +#ifdef CONFIG_PPC_ISERIES + PACAINITDATA( 0, 1, &xItLpQueue, 0, 0xc000000000005000), +#else + PACAINITDATA( 0, 1, 0, 0x5000, 0xc000000000005000), +#endif + PACAINITDATA( 1, 0, 0, 0, 0), + PACAINITDATA( 2, 0, 0, 0, 0), + PACAINITDATA( 3, 0, 0, 0, 0), + PACAINITDATA( 4, 0, 0, 0, 0), + PACAINITDATA( 5, 0, 0, 0, 0), + PACAINITDATA( 6, 0, 0, 0, 0), + PACAINITDATA( 7, 0, 0, 0, 0), + PACAINITDATA( 8, 0, 0, 0, 0), + PACAINITDATA( 9, 0, 0, 0, 0), + PACAINITDATA(10, 0, 0, 0, 0), + PACAINITDATA(11, 0, 0, 0, 0), + PACAINITDATA(12, 0, 0, 0, 0), + PACAINITDATA(13, 0, 0, 0, 0), + PACAINITDATA(14, 0, 0, 0, 0), + PACAINITDATA(15, 0, 0, 0, 0), + PACAINITDATA(16, 0, 0, 0, 0), + PACAINITDATA(17, 0, 0, 0, 0), + PACAINITDATA(18, 0, 0, 0, 0), + PACAINITDATA(19, 0, 0, 0, 0), + PACAINITDATA(20, 0, 0, 0, 0), + PACAINITDATA(21, 0, 0, 0, 0), + PACAINITDATA(22, 0, 0, 0, 0), + PACAINITDATA(23, 0, 0, 0, 0), + PACAINITDATA(24, 0, 0, 0, 0), + PACAINITDATA(25, 0, 0, 0, 0), + PACAINITDATA(26, 0, 0, 0, 0), + PACAINITDATA(27, 0, 0, 0, 0), + PACAINITDATA(28, 0, 0, 0, 0), + PACAINITDATA(29, 0, 0, 0, 0), + PACAINITDATA(30, 0, 0, 0, 0), + PACAINITDATA(31, 0, 0, 0, 0), + PACAINITDATA(32, 0, 0, 0, 0), + PACAINITDATA(33, 0, 0, 0, 0), + PACAINITDATA(34, 0, 0, 0, 0), + PACAINITDATA(35, 0, 0, 0, 0), + PACAINITDATA(36, 0, 0, 0, 0), + PACAINITDATA(37, 0, 0, 0, 0), + PACAINITDATA(38, 0, 0, 0, 0), + PACAINITDATA(39, 0, 0, 0, 0), + PACAINITDATA(40, 0, 0, 0, 0), + PACAINITDATA(41, 0, 0, 0, 0), + PACAINITDATA(42, 0, 0, 0, 0), + PACAINITDATA(43, 0, 0, 0, 0), + PACAINITDATA(44, 0, 0, 0, 0), + PACAINITDATA(45, 0, 0, 0, 0), + PACAINITDATA(46, 0, 0, 0, 0), + PACAINITDATA(47, 0, 0, 0, 0) +}; diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/pci.c linuxppc64_2_4/arch/ppc64/kernel/pci.c --- ../kernel.org/linux/arch/ppc64/kernel/pci.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/pci.c Mon Dec 3 17:11:42 2001 @@ -0,0 +1,1041 @@ +/* + * + * + * Port for PPC64 David Engebretsen, IBM Corp. + * Contains common pci routines for ppc64 platform, pSeries and iSeries brands. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_PPC_EEH +#include +#endif + +#include "pci.h" + +/* pci_io_base -- the base address from which io bars are offsets. + * This is the lowest I/O base address (so bar values are always positive), + * and it *must* be the start of ISA space if an ISA bus exists because + * ISA drivers use hard coded offsets. If no ISA bus exists a dummy + * page is mapped and isa_io_limit prevents access to it. + */ +unsigned long isa_io_base = 0; /* NULL if no ISA bus */ +unsigned long pci_io_base = 0; +unsigned long isa_mem_base = 0; +unsigned long pci_dram_offset = 0; + +/****************************************************************** + * Forward declare of prototypes + ******************************************************************/ +static void pcibios_fixup_resources(struct pci_dev* dev); +static void fixup_broken_pcnet32(struct pci_dev* dev); +void fixup_resources(struct pci_dev* dev); + +struct pci_dev *find_floppy(void); +void iSeries_pcibios_init(void); +void pSeries_pcibios_init(void); + + +extern struct Naca *naca; + +int pci_assign_all_busses = 0; + +struct pci_controller* hose_head; +struct pci_controller** hose_tail = &hose_head; + +/******************************************************************* + * Counters and control flags. + *******************************************************************/ +long Pci_Io_Read_Count = 0; +long Pci_Io_Write_Count = 0; +long Pci_Cfg_Read_Count = 0; +long Pci_Cfg_Write_Count= 0; +long Pci_Error_Count = 0; + +int Pci_Retry_Max = 3; /* Only retry 3 times */ +int Pci_Error_Flag = 1; /* Set Retry Error on. */ +int Pci_Trace_Flag = 0; + +/****************************************************************** + * + ******************************************************************/ +int global_phb_number = 0; /* Global phb counter */ +int Pci_Large_Bus_System = 0; +int Pci_Set_IOA_Address = 0; +int Pci_Manage_Phb_Space = 0; +struct pci_controller *phbtab[PCI_MAX_PHB]; + +static int pci_bus_count; + +/* Floppy dev for ppc64_fd_dma_setup(). May be null if no floppy in the system. */ +struct pci_dev *ppc64_floppy_dev = NULL; + +struct pci_fixup pcibios_fixups[] = { + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_TRIDENT, PCI_ANY_ID, fixup_broken_pcnet32 }, + { PCI_FIXUP_HEADER, PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_resources }, + { 0 } +}; + +static void fixup_broken_pcnet32(struct pci_dev* dev) +{ + if ((dev->class>>8 == PCI_CLASS_NETWORK_ETHERNET)) { + dev->vendor = PCI_VENDOR_ID_AMD; + pci_write_config_word(dev, PCI_VENDOR_ID, PCI_VENDOR_ID_AMD); + pci_name_device(dev); + } +} + +void pcibios_fixup_pbus_ranges(struct pci_bus *pbus, + struct pbus_set_ranges_data *pranges) +{ +} + + +void +pcibios_update_resource(struct pci_dev *dev, struct resource *root, + struct resource *res, int resource) +{ + u32 new, check; + int reg; + struct pci_controller* hose = PCI_GET_PHB_PTR(dev); + + new = res->start; + if (hose && res->flags & IORESOURCE_MEM) + new -= hose->pci_mem_offset; + new |= (res->flags & PCI_REGION_FLAG_MASK); + if (resource < 6) { + reg = PCI_BASE_ADDRESS_0 + 4*resource; + } else if (resource == PCI_ROM_RESOURCE) { + res->flags |= PCI_ROM_ADDRESS_ENABLE; + reg = dev->rom_base_reg; + } else { + /* Somebody might have asked allocation of a non-standard resource */ + return; + } + + pci_write_config_dword(dev, reg, new); + pci_read_config_dword(dev, reg, &check); + if ((new ^ check) & ((new & PCI_BASE_ADDRESS_SPACE_IO) ? PCI_BASE_ADDRESS_IO_MASK : PCI_BASE_ADDRESS_MEM_MASK)) { + printk(KERN_ERR "PCI: Error while updating region " + "%s/%d (%08x != %08x)\n", dev->slot_name, resource, + new, check); + } +} + +static void +pcibios_fixup_resources(struct pci_dev* dev) +{ + fixup_resources(dev); +} + +/* + * We need to avoid collisions with `mirrored' VGA ports + * and other strange ISA hardware, so we always want the + * addresses to be allocated in the 0x000-0x0ff region + * modulo 0x400. + * + * Why? Because some silly external IO cards only decode + * the low 10 bits of the IO address. The 0x00-0xff region + * is reserved for motherboard devices that decode all 16 + * bits, so it's ok to allocate at, say, 0x2800-0x28ff, + * but we want to try to avoid allocating at 0x2900-0x2bff + * which might have be mirrored at 0x0100-0x03ff.. + */ +void +pcibios_align_resource(void *data, struct resource *res, unsigned long size) +{ + struct pci_dev *dev = data; + + if (res->flags & IORESOURCE_IO) { + unsigned long start = res->start; + + if (size > 0x100) { + printk(KERN_ERR "PCI: Can not align I/O Region %s %s because size %ld is too large.\n", + dev->slot_name, res->name, size); + } + + if (start & 0x300) { + start = (start + 0x3ff) & ~0x3ff; + res->start = start; + } + } +} + + +/* + * Handle resources of PCI devices. If the world were perfect, we could + * just allocate all the resource regions and do nothing more. It isn't. + * On the other hand, we cannot just re-allocate all devices, as it would + * require us to know lots of host bridge internals. So we attempt to + * keep as much of the original configuration as possible, but tweak it + * when it's found to be wrong. + * + * Known BIOS problems we have to work around: + * - I/O or memory regions not configured + * - regions configured, but not enabled in the command register + * - bogus I/O addresses above 64K used + * - expansion ROMs left enabled (this may sound harmless, but given + * the fact the PCI specs explicitly allow address decoders to be + * shared between expansion ROMs and other resource regions, it's + * at least dangerous) + * + * Our solution: + * (1) Allocate resources for all buses behind PCI-to-PCI bridges. + * This gives us fixed barriers on where we can allocate. + * (2) Allocate resources for all enabled devices. If there is + * a collision, just mark the resource as unallocated. Also + * disable expansion ROMs during this step. + * (3) Try to allocate resources for disabled devices. If the + * resources were assigned correctly, everything goes well, + * if they weren't, they won't disturb allocation of other + * resources. + * (4) Assign new addresses to resources which were either + * not configured at all or misconfigured. If explicitly + * requested by the user, configure expansion ROM address + * as well. + */ + +static void __init +pcibios_allocate_bus_resources(struct list_head *bus_list) +{ + struct list_head *ln; + struct pci_bus *bus; + int i; + struct resource *res, *pr; + + /* Depth-First Search on bus tree */ + for (ln=bus_list->next; ln != bus_list; ln=ln->next) { + bus = pci_bus_b(ln); + for (i = 0; i < 4; ++i) { + if ((res = bus->resource[i]) == NULL || !res->flags) + continue; + if (bus->parent == NULL) + pr = (res->flags & IORESOURCE_IO)? + &ioport_resource: &iomem_resource; + else + pr = pci_find_parent_resource(bus->self, res); + + if (pr == res) + continue; /* transparent bus or undefined */ + if (pr && request_resource(pr, res) == 0) + continue; + printk(KERN_ERR "PCI: Cannot allocate resource region " + "%d of PCI bridge %x\n", i, bus->number); + printk(KERN_ERR "PCI: resource is %lx..%lx (%lx), parent %p\n", + res->start, res->end, res->flags, pr); + } + pcibios_allocate_bus_resources(&bus->children); + } +} + +static void __init +pcibios_allocate_resources(int pass) +{ + struct pci_dev *dev; + int idx, disabled; + u16 command; + struct resource *r, *pr; + + pci_for_each_dev(dev) { + pci_read_config_word(dev, PCI_COMMAND, &command); + for(idx = 0; idx < 6; idx++) { + r = &dev->resource[idx]; + if (r->parent) /* Already allocated */ + continue; + if (!r->start) /* Address not assigned at all */ + continue; + + if (r->flags & IORESOURCE_IO) + disabled = !(command & PCI_COMMAND_IO); + else + disabled = !(command & PCI_COMMAND_MEMORY); + if (pass == disabled) { + PPCDBG(PPCDBG_PHBINIT, + "PCI: Resource %08lx-%08lx (f=%lx, d=%d, p=%d)\n", + r->start, r->end, r->flags, disabled, pass); + pr = pci_find_parent_resource(dev, r); + if (!pr || request_resource(pr, r) < 0) { + PPCDBG(PPCDBG_PHBINIT, + "PCI: Cannot allocate resource region %d of device %s, pr = 0x%lx\n", idx, dev->slot_name, pr); + if(pr) { + PPCDBG(PPCDBG_PHBINIT, + "PCI: Cannot allocate resource 0x%lx\n", request_resource(pr,r)); + } + /* We'll assign a new address later */ + r->end -= r->start; + r->start = 0; + } + } + } + if (!pass) { + r = &dev->resource[PCI_ROM_RESOURCE]; + if (r->flags & PCI_ROM_ADDRESS_ENABLE) { + /* Turn the ROM off, leave the resource region, but keep it unregistered. */ + u32 reg; + r->flags &= ~PCI_ROM_ADDRESS_ENABLE; + pci_read_config_dword(dev, dev->rom_base_reg, ®); + pci_write_config_dword(dev, dev->rom_base_reg, reg & ~PCI_ROM_ADDRESS_ENABLE); + } + } + } +} + +static void __init +pcibios_assign_resources(void) +{ + struct pci_dev *dev; + int idx; + struct resource *r; + + pci_for_each_dev(dev) { + int class = dev->class >> 8; + + /* Don't touch classless devices and host bridges */ + if (!class || class == PCI_CLASS_BRIDGE_HOST) + continue; + + for(idx=0; idx<6; idx++) { + r = &dev->resource[idx]; + + /* + * Don't touch IDE controllers and I/O ports of video cards! + */ + if ((class == PCI_CLASS_STORAGE_IDE && idx < 4) || + (class == PCI_CLASS_DISPLAY_VGA && (r->flags & IORESOURCE_IO))) + continue; + + /* + * We shall assign a new address to this resource, either because + * the BIOS forgot to do so or because we have decided the old + * address was unusable for some reason. + */ + if (!r->start && r->end && ppc_md.pcibios_enable_device_hook && + !ppc_md.pcibios_enable_device_hook(dev, 1)) + pci_assign_resource(dev, idx); + } + + if (0) { /* don't assign ROMs */ + r = &dev->resource[PCI_ROM_RESOURCE]; + r->end -= r->start; + r->start = 0; + if (r->end) + pci_assign_resource(dev, PCI_ROM_RESOURCE); + } + } +} + + +int +pcibios_enable_resources(struct pci_dev *dev) +{ + u16 cmd, old_cmd; + int idx; + struct resource *r; + + pci_read_config_word(dev, PCI_COMMAND, &cmd); + old_cmd = cmd; + for(idx=0; idx<6; idx++) { + r = &dev->resource[idx]; + if (!r->start && r->end) { + printk(KERN_ERR "PCI: Device %s not available because of resource collisions\n", dev->slot_name); + return -EINVAL; + } + if (r->flags & IORESOURCE_IO) + cmd |= PCI_COMMAND_IO; + if (r->flags & IORESOURCE_MEM) + cmd |= PCI_COMMAND_MEMORY; + } + if (dev->resource[PCI_ROM_RESOURCE].start) + cmd |= PCI_COMMAND_MEMORY; + if (cmd != old_cmd) { + printk("PCI: Enabling device %s (%04x -> %04x)\n", dev->slot_name, old_cmd, cmd); + pci_write_config_word(dev, PCI_COMMAND, cmd); + } + return 0; +} + +/* + * Allocate pci_controller(phb) initialized common variables. + */ +struct pci_controller * __init +pci_alloc_pci_controller(char *model, enum phb_types controller_type) +{ + struct pci_controller *hose; + PPCDBG(PPCDBG_PHBINIT, "PCI: Allocate pci_controller for %s\n",model); + hose = (struct pci_controller *)alloc_bootmem(sizeof(struct pci_controller)); + if(hose == NULL) { + printk(KERN_ERR "PCI: Allocate pci_controller failed.\n"); + return NULL; + } + memset(hose, 0, sizeof(struct pci_controller)); + if(strlen(model) < 8) strcpy(hose->what,model); + else memcpy(hose->what,model,7); + hose->type = controller_type; + hose->global_number = global_phb_number; + phbtab[global_phb_number++] = hose; + + *hose_tail = hose; + hose_tail = &hose->next; + return hose; +} + +/* + * This fixup is arch independent and probably should go somewhere else. + */ +void __init +pcibios_generic_fixup(void) +{ + struct pci_dev *dev; + + /* Fix miss-identified vendor AMD pcnet32 adapters. */ + dev = NULL; + while ((dev = pci_find_device(PCI_VENDOR_ID_TRIDENT, PCI_DEVICE_ID_AMD_LANCE, dev)) != NULL && + dev->class == (PCI_CLASS_NETWORK_ETHERNET << 8)) + dev->vendor = PCI_VENDOR_ID_AMD; +} + + + +/*********************************************************************** + * + * + * + ***********************************************************************/ +void __init +pcibios_init(void) +{ + struct pci_controller *hose; + struct pci_bus *bus; + int next_busno; + +#ifndef CONFIG_PPC_ISERIES + pSeries_pcibios_init(); +#else + iSeries_pcibios_init(); +#endif + + printk("PCI: Probing PCI hardware\n"); + PPCDBG(PPCDBG_BUSWALK,"PCI: Probing PCI hardware\n"); + + + /* Scan all of the recorded PCI controllers. */ + for (next_busno = 0, hose = hose_head; hose; hose = hose->next) { + hose->last_busno = 0xff; + bus = pci_scan_bus(hose->first_busno, hose->ops, hose->arch_data); + hose->bus = bus; + hose->last_busno = bus->subordinate; + if (pci_assign_all_busses || next_busno <= hose->last_busno) + next_busno = hose->last_busno+1; + } + pci_bus_count = next_busno; + + /* Call machine dependant fixup */ + if (ppc_md.pcibios_fixup) { + ppc_md.pcibios_fixup(); + } + + /* Generic fixups */ + pcibios_generic_fixup(); + + /* Allocate and assign resources */ + pcibios_allocate_bus_resources(&pci_root_buses); + pcibios_allocate_resources(0); + pcibios_allocate_resources(1); + pcibios_assign_resources(); + +#ifndef CONFIG_PPC_ISERIES + pci_fix_bus_sysdata(); + + create_tce_tables(); + PPCDBG(PPCDBG_BUSWALK,"pSeries create_tce_tables()\n"); +#endif + ppc64_floppy_dev = find_floppy(); + + printk("PCI: Probing PCI hardware done\n"); + PPCDBG(PPCDBG_BUSWALK,"PCI: Probing PCI hardware done.\n"); + +} + +int __init +pcibios_assign_all_busses(void) +{ + return pci_assign_all_busses; +} + +unsigned long resource_fixup(struct pci_dev * dev, struct resource * res, + unsigned long start, unsigned long size) +{ + return start; +} + +void __init pcibios_fixup_bus(struct pci_bus *bus) +{ + struct pci_controller *phb = PCI_GET_PHB_PTR(bus); + struct resource *res; + unsigned long io_offset; + int i; + +#ifndef CONFIG_PPC_ISERIES + if (bus->parent == NULL) { + /* This is a host bridge - fill in its resources */ + phb->bus = bus; + bus->resource[0] = res = &phb->io_resource; + if (!res->flags) + BUG(); /* No I/O resource for this PHB? */ + + for (i = 0; i < 3; ++i) { + res = &phb->mem_resources[i]; + if (!res->flags) { + if (i == 0) + BUG(); /* No memory resource for this PHB? */ + } + bus->resource[i+1] = res; + } + } else { + /* This is a subordinate bridge */ + pci_read_bridge_bases(bus); + + for (i = 0; i < 4; ++i) { + if ((res = bus->resource[i]) == NULL) + continue; + if (!res->flags) + continue; + if (res == pci_find_parent_resource(bus->self, res)) { + /* Transparent resource -- don't try to "fix" it. */ + continue; + } +#ifdef CONFIG_PPC_EEH + if (res->flags & (IORESOURCE_IO|IORESOURCE_MEM)) { + res->start = eeh_token(phb->global_number, bus->number, 0, 0); + res->end = eeh_token(phb->global_number, bus->number, 0xff, 0xffffffff); + } +#else + if (res->flags & IORESOURCE_IO) { + res->start += (unsigned long)phb->io_base_virt; + res->end += (unsigned long)phb->io_base_virt; + } else if (phb->pci_mem_offset + && (res->flags & IORESOURCE_MEM)) { + if (res->start < phb->pci_mem_offset) { + res->start += phb->pci_mem_offset; + res->end += phb->pci_mem_offset; + } + } +#endif + } + } +#endif + if ( ppc_md.pcibios_fixup_bus ) + ppc_md.pcibios_fixup_bus(bus); +} + +char __init *pcibios_setup(char *str) +{ + return str; +} + +int pcibios_enable_device(struct pci_dev *dev) +{ + u16 cmd, old_cmd; + int idx; + struct resource *r; + + PPCDBG(PPCDBG_BUSWALK,"PCI: "__FUNCTION__" for device %s \n",dev->slot_name); + if (ppc_md.pcibios_enable_device_hook) + if (ppc_md.pcibios_enable_device_hook(dev, 0)) + return -EINVAL; + + pci_read_config_word(dev, PCI_COMMAND, &cmd); + old_cmd = cmd; + for (idx=0; idx<6; idx++) { + r = &dev->resource[idx]; + if (!r->start && r->end) { + printk(KERN_ERR "PCI: Device %s not available because of resource collisions\n", dev->slot_name); + return -EINVAL; + } + if (r->flags & IORESOURCE_IO) + cmd |= PCI_COMMAND_IO; + if (r->flags & IORESOURCE_MEM) + cmd |= PCI_COMMAND_MEMORY; + } + if (cmd != old_cmd) { + printk("PCI: Enabling device %s (%04x -> %04x)\n", + dev->slot_name, old_cmd, cmd); + PPCDBG(PPCDBG_BUSWALK,"PCI: Enabling device %s \n",dev->slot_name); + pci_write_config_word(dev, PCI_COMMAND, cmd); + } + return 0; +} + +struct pci_controller* +pci_bus_to_hose(int bus) +{ + struct pci_controller* hose = hose_head; + + for (; hose; hose = hose->next) + if (bus >= hose->first_busno && bus <= hose->last_busno) + return hose; + return NULL; +} + +void* +pci_bus_io_base(unsigned int bus) +{ + struct pci_controller *hose; + + hose = pci_bus_to_hose(bus); + if (!hose) + return NULL; + return hose->io_base_virt; +} + +unsigned long +pci_bus_io_base_phys(unsigned int bus) +{ + struct pci_controller *hose; + + hose = pci_bus_to_hose(bus); + if (!hose) + return 0; + return hose->io_base_phys; +} + +unsigned long +pci_bus_mem_base_phys(unsigned int bus) +{ + struct pci_controller *hose; + + hose = pci_bus_to_hose(bus); + if (!hose) + return 0; + return hose->pci_mem_offset; +} + +/* + * Return the index of the PCI controller for device pdev. + */ +int pci_controller_num(struct pci_dev *dev) +{ + struct pci_controller *hose = PCI_GET_PHB_PTR(dev); + + return hose->global_number; +} + +/* + * Platform support for /proc/bus/pci/X/Y mmap()s, + * modelled on the sparc64 implementation by Dave Miller. + * -- paulus. + */ + +/* + * Adjust vm_pgoff of VMA such that it is the physical page offset + * corresponding to the 32-bit pci bus offset for DEV requested by the user. + * + * Basically, the user finds the base address for his device which he wishes + * to mmap. They read the 32-bit value from the config space base register, + * add whatever PAGE_SIZE multiple offset they wish, and feed this into the + * offset parameter of mmap on /proc/bus/pci/XXX for that device. + * + * Returns negative error code on failure, zero on success. + */ +static __inline__ int +__pci_mmap_make_offset(struct pci_dev *dev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state) +{ + struct pci_controller *hose = PCI_GET_PHB_PTR(dev); + unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; + unsigned long io_offset = 0; + int i, res_bit; + + if (hose == 0) + return -EINVAL; /* should never happen */ + + /* If memory, add on the PCI bridge address offset */ + if (mmap_state == pci_mmap_mem) { + offset += hose->pci_mem_offset; + res_bit = IORESOURCE_MEM; + } else { + io_offset = (unsigned long)hose->io_base_virt; + offset += io_offset; + res_bit = IORESOURCE_IO; + } + + /* + * Check that the offset requested corresponds to one of the + * resources of the device. + */ + for (i = 0; i <= PCI_ROM_RESOURCE; i++) { + struct resource *rp = &dev->resource[i]; + int flags = rp->flags; + + /* treat ROM as memory (should be already) */ + if (i == PCI_ROM_RESOURCE) + flags |= IORESOURCE_MEM; + + /* Active and same type? */ + if ((flags & res_bit) == 0) + continue; + + /* In the range of this resource? */ + if (offset < (rp->start & PAGE_MASK) || offset > rp->end) + continue; + + /* found it! construct the final physical address */ + if (mmap_state == pci_mmap_io) + offset += hose->io_base_phys - io_offset; + + vma->vm_pgoff = offset >> PAGE_SHIFT; + return 0; + } + + return -EINVAL; +} + +/* + * Set vm_flags of VMA, as appropriate for this architecture, for a pci device + * mapping. + */ +static __inline__ void +__pci_mmap_set_flags(struct pci_dev *dev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state) +{ + vma->vm_flags |= VM_SHM | VM_LOCKED | VM_IO; +} + +/* + * Set vm_page_prot of VMA, as appropriate for this architecture, for a pci + * device mapping. + */ +static __inline__ void +__pci_mmap_set_pgprot(struct pci_dev *dev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state, int write_combine) +{ + long prot = pgprot_val(vma->vm_page_prot); + + /* XXX would be nice to have a way to ask for write-through */ + prot |= _PAGE_NO_CACHE; + if (!write_combine) + prot |= _PAGE_GUARDED; + vma->vm_page_prot = __pgprot(prot); +} + +/* + * Perform the actual remap of the pages for a PCI device mapping, as + * appropriate for this architecture. The region in the process to map + * is described by vm_start and vm_end members of VMA, the base physical + * address is found in vm_pgoff. + * The pci device structure is provided so that architectures may make mapping + * decisions on a per-device or per-bus basis. + * + * Returns a negative error code on failure, zero on success. + */ +int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state, + int write_combine) +{ + int ret; + + ret = __pci_mmap_make_offset(dev, vma, mmap_state); + if (ret < 0) + return ret; + + __pci_mmap_set_flags(dev, vma, mmap_state); + __pci_mmap_set_pgprot(dev, vma, mmap_state, write_combine); + + ret = remap_page_range(vma->vm_start, vma->vm_pgoff << PAGE_SHIFT, + vma->vm_end - vma->vm_start, vma->vm_page_prot); + + return ret; +} + +/* Provide information on locations of various I/O regions in physical + * memory. Do this on a per-card basis so that we choose the right + * root bridge. + * Note that the returned IO or memory base is a physical address + */ + +long +sys_pciconfig_iobase(long which, unsigned long bus, unsigned long devfn) +{ + struct pci_controller* hose = pci_bus_to_hose(bus); + long result = -EOPNOTSUPP; + + if (!hose) + return -ENODEV; + + switch (which) { + case IOBASE_BRIDGE_NUMBER: + return (long)hose->first_busno; + case IOBASE_MEMORY: + return (long)hose->pci_mem_offset; + case IOBASE_IO: + return (long)hose->io_base_phys; + case IOBASE_ISA_IO: + return (long)isa_io_base; + case IOBASE_ISA_MEM: + return (long)isa_mem_base; + } + + return result; +} +/************************************************************************/ +/* Formats the device information and location for service. */ +/* - Pass in pci_dev* pointer to the device. */ +/* - Pass in buffer to place the data. Danger here is the buffer must */ +/* be as big as the client says it is. Should be at least 128 bytes.*/ +/* Return will the length of the string data put in the buffer. */ +/* The brand specific method device_Location is called. */ +/* Format: */ +/* PCI: Bus 0, Device 26, Vendor 0x12AE Frame 1, Card C10 Ethernet */ +/* PCI: Bus 0, Device 26, Vendor 0x12AE Location U0.3-P1-I8 Ethernet */ +/* For pSeries, see the Product Topology in the RS/6000 Architecture. */ +/* For iSeries, see the Service Manuals. */ +/************************************************************************/ +int format_device_location(struct pci_dev* PciDev,char* BufPtr, int BufferSize) +{ + struct device_node* DevNode = (struct device_node*)PciDev->sysdata; + int LineLen = 0; + if (DevNode != NULL && BufferSize >= 128) { + LineLen += device_Location(PciDev,BufPtr+LineLen); + LineLen += sprintf(BufPtr+LineLen," %12s",pci_class_name(PciDev->class >> 8) ); + } + return LineLen; +} +/************************************************************************ + * Saves the config registers for a device. * + ************************************************************************ + * Note: This does byte reads so the data may appear byte swapped, * + * The data returned in the pci_config_reg_save_area structure can be * + * used to the restore of the data. If the save failed, the data * + * will not be restore. Yes I know, you are most likey toast. * + ************************************************************************/ +int pci_save_config_regs(struct pci_dev* PciDev,struct pci_config_reg_save_area* SaveArea) +{ + memset(SaveArea,0x00,sizeof(struct pci_config_reg_save_area) ); + SaveArea->PciDev = PciDev; + SaveArea->RCode = 0; + SaveArea->Register = 0; + /****************************************************************** + * Save All the Regs, NOTE: restore skips the first 16 bytes. * + ******************************************************************/ + while (SaveArea->Register < REG_SAVE_SIZE && SaveArea->RCode == 0) { + SaveArea->RCode = pci_read_config_byte(PciDev, SaveArea->Register, &SaveArea->Regs[SaveArea->Register]); + ++SaveArea->Register; + } + if (SaveArea->RCode != 0) { /* Ouch */ + SaveArea->Flags = 0x80; + printk("PCI: pci_restore_save_regs failed! %p\n 0x%04X",PciDev,SaveArea->RCode); + PCIFR( "pci_restore_save_regs failed! %p\n 0x%04X",PciDev,SaveArea->RCode); + } + else { + SaveArea->Flags = 0x01; + } + return SaveArea->RCode; +} + +/************************************************************************ + * Restores the registers saved via the save function. See the save * + * function for details. * + ************************************************************************/ +int pci_restore_config_regs(struct pci_dev* PciDev,struct pci_config_reg_save_area* SaveArea) +{ + if (SaveArea->PciDev != PciDev || SaveArea->Flags == 0x80 || SaveArea->RCode != 0) { + printk("PCI: pci_restore_config_regs failed! %p\n",PciDev); + return -1; + } + /****************************************************************** + * Don't touch the Cmd or BIST regs, user must restore those. * + * Restore PCI_VENDOR_ID & PCI_DEVICE_ID * + * Restore PCI_CACHE_LINE_SIZE & PCI_LATENCY_TIMER * + * Restore Saved Regs from 0x10 to 0x3F * + ******************************************************************/ + SaveArea->Register = 0; + while(SaveArea->Register < REG_SAVE_SIZE && SaveArea->RCode == 0) { + SaveArea->RCode = pci_write_config_byte(PciDev,SaveArea->Register,SaveArea->Regs[SaveArea->Register]); + ++SaveArea->Register; + if ( SaveArea->Register == PCI_COMMAND) SaveArea->Register = PCI_CACHE_LINE_SIZE; + else if (SaveArea->Register == PCI_HEADER_TYPE) SaveArea->Register = PCI_BASE_ADDRESS_0; + } + if (SaveArea->RCode != 0) { + printk("PCI: pci_restore_config_regs failed! %p\n 0x%04X",PciDev,SaveArea->RCode); + PCIFR( "pci_restore_config_regs failed! %p\n 0x%04X",PciDev,SaveArea->RCode); + } + return SaveArea->RCode; +} + +/************************************************************************/ +/* Interface to toggle the reset line */ +/* Time is in .1 seconds, need for seconds. */ +/************************************************************************/ +int pci_reset_device(struct pci_dev* PciDev, int AssertTime, int DelayTime) +{ + unsigned long AssertDelay, WaitDelay; + int RtnCode; + /******************************************************************** + * Set defaults, Assert is .5 second, Wait is 3 seconds. + ********************************************************************/ + if (AssertTime == 0) AssertDelay = ( 5 * HZ)/10; + else AssertDelay = (AssertTime*HZ)/10; + if (WaitDelay == 0) WaitDelay = (30 * HZ)/10; + else WaitDelay = (DelayTime* HZ)/10; + + /******************************************************************** + * Assert reset, wait, de-assert reset, wait for IOA to reset. + * - Don't waste the CPU time on jiffies. + ********************************************************************/ + RtnCode = pci_set_reset(PciDev,1); + if (RtnCode == 0) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(AssertDelay); /* Sleep for the time */ + RtnCode = pci_set_reset(PciDev,0); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(WaitDelay); + } + if (RtnCode == 0) { + PCIFR( "Bus%3d, Device%3d, Reset\n",PciDev->bus->number,PCI_SLOT(PciDev->devfn) ); + } + else { + printk("PCI: Bus%3d, Device%3d, Reset Failed:0x%04X\n",PciDev->bus->number,PCI_SLOT(PciDev->devfn),RtnCode ); + PCIFR( "Bus%3d, Device%3d, Reset Failed:0x%04X\n",PciDev->bus->number,PCI_SLOT(PciDev->devfn),RtnCode ); + } + return RtnCode; +} + +/***************************************************** + * Dump Resource information + *****************************************************/ +void dumpResources(struct resource* Resource) +{ + if(Resource != NULL) { + int Flags = 0x00000F00 & Resource->flags; + if(Resource->start == 0 && Resource->end == 0) return; + else if(Resource->start == Resource->end ) return; + else { + if (Flags == IORESOURCE_IO) udbg_printf("IO.:"); + else if(Flags == IORESOURCE_MEM) udbg_printf("MEM:"); + else if(Flags == IORESOURCE_IRQ) udbg_printf("IRQ:"); + else udbg_printf("0x%02X:",Resource->flags); + + } + udbg_printf("0x%016LX / 0x%016LX (0x%08X)\n", + Resource->start, Resource->end, Resource->end - Resource->start); + } +} + +int resourceSize(struct resource* Resource) +{ + if(Resource->start == 0 && Resource->end == 0) return 0; + else if(Resource->start == Resource->end ) return 0; + else return (Resource->end-1)-Resource->start; +} + + +/***************************************************** + * Dump PHB information for Debug + *****************************************************/ +void dumpPci_Controller(struct pci_controller* phb) +{ + udbg_printf("\tpci_controller= 0x%016LX\n", phb); + if (phb != NULL) { + udbg_printf("\twhat & type = %s 0x%02X\n ",phb->what,phb->type); + udbg_printf("\tbus = "); + if (phb->bus != NULL) udbg_printf("0x%02X\n", phb->bus->number); + else udbg_printf("\n"); + udbg_printf("\tarch_data = 0x%016LX\n", phb->arch_data); + udbg_printf("\tfirst_busno = 0x%02X\n", phb->first_busno); + udbg_printf("\tlast_busno = 0x%02X\n", phb->last_busno); + udbg_printf("\tio_base_virt* = 0x%016LX\n", phb->io_base_virt); + udbg_printf("\tio_base_phys = 0x%016LX\n", phb->io_base_phys); + udbg_printf("\tpci_mem_offset= 0x%016LX\n", phb->pci_mem_offset); + udbg_printf("\tpci_io_offset = 0x%016LX\n", phb->pci_io_offset); + + udbg_printf("\tcfg_addr = 0x%016LX\n", phb->cfg_addr); + udbg_printf("\tcfg_data = 0x%016LX\n", phb->cfg_data); + udbg_printf("\tphb_regs = 0x%016LX\n", phb->phb_regs); + udbg_printf("\tchip_regs = 0x%016LX\n", phb->chip_regs); + + + udbg_printf("\tResources\n"); + dumpResources(&phb->io_resource); + if (phb->mem_resource_count > 0) dumpResources(&phb->mem_resources[0]); + if (phb->mem_resource_count > 1) dumpResources(&phb->mem_resources[1]); + if (phb->mem_resource_count > 2) dumpResources(&phb->mem_resources[2]); + + udbg_printf("\tglobal_num = 0x%02X\n", phb->global_number); + udbg_printf("\tlocal_num = 0x%02X\n", phb->local_number); + } +} + +/***************************************************** + * Dump PHB information for Debug + *****************************************************/ +void dumpPci_Bus(struct pci_bus* Pci_Bus) +{ + int i; + udbg_printf("\tpci_bus = 0x%016LX \n",Pci_Bus); + if (Pci_Bus != NULL) { + + udbg_printf("\tnumber = 0x%02X \n",Pci_Bus->number); + udbg_printf("\tprimary = 0x%02X \n",Pci_Bus->primary); + udbg_printf("\tsecondary = 0x%02X \n",Pci_Bus->secondary); + udbg_printf("\tsubordinate = 0x%02X \n",Pci_Bus->subordinate); + + for (i=0;i<4;++i) { + if(Pci_Bus->resource[i] == NULL) continue; + if(Pci_Bus->resource[i]->start == 0 && Pci_Bus->resource[i]->end == 0) break; + udbg_printf("\tResources[%d]",i); + dumpResources(Pci_Bus->resource[i]); + } + } +} + +/***************************************************** + * Dump Device information for Debug + *****************************************************/ +void dumpPci_Dev(struct pci_dev* Pci_Dev) +{ + int i; + udbg_printf("\tpci_dev* = 0x%p\n",Pci_Dev); + if ( Pci_Dev == NULL ) return; + udbg_printf("\tname = %s \n",Pci_Dev->name); + udbg_printf("\tbus* = 0x%p\n",Pci_Dev->bus); + udbg_printf("\tsysdata* = 0x%p\n",Pci_Dev->sysdata); + udbg_printf("\tDevice = 0x%4X%02X:%02X.%02X 0x%04X:%04X\n", + PCI_GET_PHB_NUMBER(Pci_Dev), + PCI_GET_BUS_NUMBER(Pci_Dev), + PCI_SLOT(Pci_Dev->devfn), + PCI_FUNC(Pci_Dev->devfn), + Pci_Dev->vendor, + Pci_Dev->device); + udbg_printf("\tHdr/Irq = 0x%02X/0x%02X \n",Pci_Dev->hdr_type,Pci_Dev->irq); + for (i=0;iresource[i].start == 0 && Pci_Dev->resource[i].end == 0) continue; + udbg_printf("\tResources[%d] ",i); + dumpResources(&Pci_Dev->resource[i]); + } + dumpResources(&Pci_Dev->resource[i]); +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/pci.h linuxppc64_2_4/arch/ppc64/kernel/pci.h --- ../kernel.org/linux/arch/ppc64/kernel/pci.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/pci.h Tue Nov 13 10:47:33 2001 @@ -0,0 +1,119 @@ +/* + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef __PPC_KERNEL_PCI_H__ +#define __PPC_KERNEL_PCI_H__ + +#include +#include + +extern unsigned long isa_io_base; +extern unsigned long isa_mem_base; +extern unsigned long pci_dram_offset; + +/******************************************************************* + * Platform independant variables referenced. + ******************************************************************* + * Set pci_assign_all_busses to 1 if you want the kernel to re-assign + * all PCI bus numbers. + *******************************************************************/ +extern int pci_assign_all_busses; + +extern struct pci_controller* pci_alloc_pci_controller(char *model, enum phb_types controller_type); +extern struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node); + +extern struct pci_controller* hose_head; +extern struct pci_controller** hose_tail; +/* PHB's are also in a table. */ +#define PCI_MAX_PHB 64 +extern int global_phb_number; +extern struct pci_controller *phbtab[]; + +/******************************************************************* + * Platform functions that are brand specific implementation. + *******************************************************************/ +extern unsigned long find_and_init_phbs(void); + +extern void fixup_resources(struct pci_dev *dev); +extern void ppc64_pcibios_init(void); + +extern int pci_set_reset(struct pci_dev*,int); +extern int device_Location(struct pci_dev*,char*); +extern int format_device_location(struct pci_dev*,char*, int ); + +extern struct pci_dev *ppc64_floppy_dev; + +/******************************************************************* + * PCI device_node operations + *******************************************************************/ +struct device_node; +typedef void *(*traverse_func)(struct device_node *me, void *data); +void *traverse_pci_devices(struct device_node *start, traverse_func pre, traverse_func post, void *data); +void *traverse_all_pci_devices(traverse_func pre); + +void pci_devs_phb_init(void); +void pci_fix_bus_sysdata(void); +struct device_node *fetch_dev_dn(struct pci_dev *dev); + +void iSeries_pcibios_init_early(void); +void pSeries_pcibios_init_early(void); +void pSeries_pcibios_init(void); + +/* Get a device_node from a pci_dev. This code must be fast except in the case + * where the sysdata is incorrect and needs to be fixed up (hopefully just once) + */ +static inline struct device_node *pci_device_to_OF_node(struct pci_dev *dev) +{ + struct device_node *dn = (struct device_node *)(dev->sysdata); + if (dn->devfn == dev->devfn && dn->busno == dev->bus->number) + return dn; /* fast path. sysdata is good */ + else + return fetch_dev_dn(dev); +} +/* Use this macro after the PCI bus walk for max performance when it + * is known that sysdata is correct. + */ +#define PCI_GET_DN(dev) ((struct device_node *)((dev)->sysdata)) + + +/******************************************************************* + * Platform configuration flags.. (Live in pci.c) + *******************************************************************/ +extern int Pci_Large_Bus_System; /* System has > 256 buses */ +extern int Pci_Manage_Phb_Space; /* Manage Phb Space for IOAs*/ + +/******************************************************************* + * Helper macros for extracting data from pci structures. + * PCI_GET_PHB_PTR(struct pci_dev*) returns the Phb pointer. + * PCI_GET_PHB_NUMBER(struct pci_dev*) returns the Phb number. + * PCI_GET_BUS_NUMBER(struct pci_dev*) returns the bus number. + *******************************************************************/ +#define PCI_GET_PHB_PTR(dev) (((struct device_node *)(dev)->sysdata)->phb) +#define PCI_GET_PHB_NUMBER(dev) (((dev)->bus->number&0x00FFFF00)>>8) +#define PCI_GET_BUS_NUMBER(dev) ((dev)->bus->number&0x0000FF) + +/******************************************************************* + * Pci Flight Recorder support. + *******************************************************************/ +#define PCIFR(...) fr_Log_Entry(PciFr,__VA_ARGS__); +extern struct flightRecorder* PciFr; +extern int Pci_Trace_Flag; + +/******************************************************************* + * Debugging Routines. + *******************************************************************/ +extern void dumpResources(struct resource* Resource); +extern void dumpPci_Controller(struct pci_controller* phb); +extern void dumpPci_Bus(struct pci_bus* Pci_Bus); +extern void dumpPci_Dev(struct pci_dev* Pci_Dev); + +extern void dump_Phb_tree(void); +extern void dump_Bus_tree(void); +extern void dump_Dev_tree(void); + +#endif /* __PPC_KERNEL_PCI_H__ */ diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/pci_dma.c linuxppc64_2_4/arch/ppc64/kernel/pci_dma.c --- ../kernel.org/linux/arch/ppc64/kernel/pci_dma.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/pci_dma.c Fri Nov 30 07:46:39 2001 @@ -0,0 +1,1447 @@ +/* + * pci_dma.c + * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation + * + * Dynamic DMA mapping support. + * + * Manages the TCE space assigned to this partition. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include "pci.h" + +// #define DEBUG_TCE 1 + +/* Initialize so this guy does not end up in the BSS section. + * Only used to pass OF initialization data set in prom.c into the main + * kernel code -- data ultimately copied into tceTables[]. + */ +extern struct _of_tce_table of_tce_table[]; + +extern struct pci_controller* hose_head; +extern struct pci_controller** hose_tail; + +struct TceTable virtBusVethTceTable; /* Tce table for virtual ethernet */ +struct TceTable virtBusVioTceTable; /* Tce table for virtual I/O */ + +struct device_node iSeries_veth_dev_node = { tce_table: &virtBusVethTceTable }; +struct device_node iSeries_vio_dev_node = { tce_table: &virtBusVioTceTable }; + +struct pci_dev iSeries_veth_dev_st = { sysdata: &iSeries_veth_dev_node }; +struct pci_dev iSeries_vio_dev_st = { sysdata: &iSeries_vio_dev_node }; + +struct pci_dev * iSeries_veth_dev = &iSeries_veth_dev_st; +struct pci_dev * iSeries_vio_dev = &iSeries_vio_dev_st; + +struct TceTable * tceTables[256]; /* Tce tables for 256 busses + * Bus 255 is the virtual bus + * zero indicates no bus defined + */ +/* allocates a contiguous range of tces (power-of-2 size) */ +static inline long alloc_tce_range(struct TceTable *, + unsigned order ); + +/* allocates a contiguous range of tces (power-of-2 size) + * assumes lock already held + */ +static long alloc_tce_range_nolock(struct TceTable *, + unsigned order ); + +/* frees a contiguous range of tces (power-of-2 size) */ +static inline void free_tce_range(struct TceTable *, + long tcenum, + unsigned order ); + +/* frees a contiguous rnage of tces (power-of-2 size) + * assumes lock already held + */ +void free_tce_range_nolock(struct TceTable *, + long tcenum, + unsigned order ); + +/* allocates a range of tces and sets them to the pages */ +static inline dma_addr_t get_tces( struct TceTable *, + unsigned order, + void *page, + unsigned numPages, + int direction ); + +static long test_tce_range( struct TceTable *, + long tcenum, + unsigned order ); + +static unsigned fill_scatterlist_sg(struct scatterlist *sg, int nents, + dma_addr_t dma_addr, + unsigned long numTces ); + +static unsigned long num_tces_sg( struct scatterlist *sg, + int nents ); + +static dma_addr_t create_tces_sg( struct TceTable *tbl, + struct scatterlist *sg, + int nents, + unsigned numTces, + int direction ); + +static void getTceTableParmsPSeries( struct pci_controller *phb, + struct device_node *dn, + struct TceTable *tce_table_parms ); + +static void getTceTableParmsPSeriesLP(struct pci_controller *phb, + struct device_node *dn, + struct TceTable *newTceTable ); + +void create_pci_bus_tce_table( unsigned long token ); + +u8 iSeries_Get_Bus( struct pci_dev * dv ) +{ + return 0; +} + +static inline struct TceTable *get_tce_table(struct pci_dev *dev) { + + if ( ( _machine == _MACH_iSeries ) && ( dev->bus ) ) + return tceTables[dev->bus->number]; + /* On the iSeries, the virtual bus will take this path. There is a */ + /* fake pci_dev and dev_node built and used. */ + return PCI_GET_DN(dev)->tce_table; +} + +static unsigned long __inline__ count_leading_zeros64( unsigned long x ) +{ + unsigned long lz; + asm("cntlzd %0,%1" : "=r"(lz) : "r"(x)); + return lz; +} + +static void tce_build_iSeries(struct TceTable *tbl, long tcenum, + unsigned long uaddr, int direction ) +{ + u64 setTceRc; + union Tce tce; + + PPCDBG(PPCDBG_TCE, "build_tce: uaddr = 0x%lx\n", uaddr); + PPCDBG(PPCDBG_TCE, "\ttcenum = 0x%lx, tbl = 0x%lx, index=%lx\n", + tcenum, tbl, tbl->index); + + tce.wholeTce = 0; + tce.tceBits.rpn = (virt_to_absolute(uaddr)) >> PAGE_SHIFT; + + /* If for virtual bus */ + if ( tbl->tceType == TCE_VB ) { + tce.tceBits.valid = 1; + tce.tceBits.allIo = 1; + if ( direction != PCI_DMA_TODEVICE ) + tce.tceBits.readWrite = 1; + } else { + /* If for PCI bus */ + tce.tceBits.readWrite = 1; // Read allowed + if ( direction != PCI_DMA_TODEVICE ) + tce.tceBits.pciWrite = 1; + } + + setTceRc = HvCallXm_setTce((u64)tbl->index, + (u64)tcenum, + tce.wholeTce ); + + if(setTceRc) { + printk("PCI: tce_build failed 0x%lx tcenum: 0x%lx\n", setTceRc, (u64)tcenum); + //PPCDBG(PPCDBG_TCE, "setTce failed. rc=%ld\n", setTceRc); + //PPCDBG(PPCDBG_TCE, "\tindex = 0x%lx\n", (u64)tbl->index); + //PPCDBG(PPCDBG_TCE, "\ttce num = 0x%lx\n", (u64)tcenum); + //PPCDBG(PPCDBG_TCE, "\ttce val = 0x%lx\n", tce.wholeTce ); + } +} + +static void tce_build_pSeries(struct TceTable *tbl, long tcenum, + unsigned long uaddr, int direction ) +{ + union Tce tce; + union Tce *tce_addr; + + PPCDBG(PPCDBG_TCE, "build_tce: uaddr = 0x%lx\n", uaddr); + PPCDBG(PPCDBG_TCE, "\ttcenum = 0x%lx, tbl = 0x%lx, index=%lx\n", + tcenum, tbl, tbl->index); + + tce.wholeTce = 0; + tce.tceBits.rpn = (virt_to_absolute(uaddr)) >> PAGE_SHIFT; + + tce.tceBits.readWrite = 1; // Read allowed + if ( direction != PCI_DMA_TODEVICE ) tce.tceBits.pciWrite = 1; + + tce_addr = ((union Tce *)tbl->base) + tcenum; + *tce_addr = (union Tce)tce.wholeTce; + + /* Make sure the update is visible to hardware. */ + __asm__ __volatile__ ("sync" : : : "memory"); +} + +/* + * Build a TceTable structure. This contains a multi-level bit map which + * is used to manage allocation of the tce space. + */ +static struct TceTable *build_tce_table( struct TceTable * tbl ) +{ + unsigned long bits, bytes, totalBytes; + unsigned long numBits[NUM_TCE_LEVELS], numBytes[NUM_TCE_LEVELS]; + unsigned i, k, m; + unsigned char * pos, * p, b; + + PPCDBG(PPCDBG_TCEINIT, "build_tce_table: tbl = 0x%lx\n", tbl); + spin_lock_init( &(tbl->lock) ); + + tbl->mlbm.maxLevel = 0; + + /* Compute number of bits and bytes for each level of the + * multi-level bit map + */ + totalBytes = 0; + bits = tbl->size * (PAGE_SIZE / sizeof( union Tce )); + + for ( i=0; imlbm.level[i].map = pos; + tbl->mlbm.maxLevel = i; + + if ( numBits[i] & 1 ) { + p = pos + numBytes[i] - 1; + m = (( numBits[i] % 8) - 1) & 7; + *p = 0x80 >> m; + PPCDBG(PPCDBG_TCEINIT, "build_tce_table: level %d last bit %x\n", i, 0x80>>m ); + } + } + else + tbl->mlbm.level[i].map = 0; + pos += numBytes[i]; + tbl->mlbm.level[i].numBits = numBits[i]; + tbl->mlbm.level[i].numBytes = numBytes[i]; + } + + /* For the highest level, turn on all the bits */ + + i = tbl->mlbm.maxLevel; + p = tbl->mlbm.level[i].map; + m = numBits[i]; + PPCDBG(PPCDBG_TCEINIT, "build_tce_table: highest level (%d) has all bits set\n", i); + for (k=0; k= 8 ) { + /* handle full bytes */ + *p++ = 0xff; + m -= 8; + } + else if(m>0) { + /* handle the last partial byte */ + b = 0x80; + *p = 0; + while (m) { + *p |= b; + b >>= 1; + --m; + } + } else { + break; + } + } + + return tbl; +} + +static inline long alloc_tce_range( struct TceTable *tbl, unsigned order ) +{ + long retval; + unsigned long flags; + + /* Lock the tce allocation bitmap */ + spin_lock_irqsave( &(tbl->lock), flags ); + + /* Do the actual work */ + retval = alloc_tce_range_nolock( tbl, order ); + + /* Unlock the tce allocation bitmap */ + spin_unlock_irqrestore( &(tbl->lock), flags ); + + return retval; +} + +static long alloc_tce_range_nolock( struct TceTable *tbl, unsigned order ) +{ + unsigned long numBits, numBytes; + unsigned long i, bit, block, mask; + long tcenum; + u64 * map; + + /* If the order (power of 2 size) requested is larger than our + * biggest, indicate failure + */ + if(order >= NUM_TCE_LEVELS) { + PPCDBG(PPCDBG_TCE, + "alloc_tce_range_nolock: invalid order: %d\n", order ); + return -1; + } + + numBits = tbl->mlbm.level[order].numBits; + numBytes = tbl->mlbm.level[order].numBytes; + map = (u64 *)tbl->mlbm.level[order].map; + + /* Initialize return value to -1 (failure) */ + tcenum = -1; + + /* Loop through the bytes of the bitmap */ + for (i=0; imlbm.maxLevel ) + PPCDBG(PPCDBG_TCE, "alloc_tce_range_nolock: trying next bigger size\n" ); + else + PPCDBG(PPCDBG_TCE, "alloc_tce_range_nolock: maximum size reached...failing\n"); + } +#endif + + /* If no block of the requested size was found, try the next + * size bigger. If one of those is found, return the second + * half of the block to freespace and keep the first half + */ + if((tcenum == -1) && (order < (NUM_TCE_LEVELS - 1))) { + tcenum = alloc_tce_range_nolock( tbl, order+1 ); + if ( tcenum != -1 ) { + free_tce_range_nolock( tbl, tcenum+(1<lock), flags ); + + /* Do the actual work */ + free_tce_range_nolock( tbl, tcenum, order ); + + /* Unlock the tce allocation bitmap */ + spin_unlock_irqrestore( &(tbl->lock), flags ); + +} + +void free_tce_range_nolock(struct TceTable *tbl, + long tcenum, unsigned order ) +{ + unsigned long block; + unsigned byte, bit, mask, b; + unsigned char * map, * bytep; + + if (order >= NUM_TCE_LEVELS) { + PPCDBG(PPCDBG_TCE, + "free_tce_range: invalid order: %d, tcenum = %d\n", + order, tcenum ); + return; + } + + block = tcenum >> order; + +#ifdef DEBUG_TCE + if ( tcenum != (block << order ) ) { + PPCDBG(PPCDBG_TCE, + "free_tce_range: tcenum %lx misaligned for order %x\n", + tcenum, order ); + return; + } + + + if ( block >= tbl->mlbm.level[order].numBits ) { + PPCDBG(PPCDBG_TCE, + "free_tce_range: tcenum %lx is outside the range of this map (order %x, numBits %lx\n", + tcenum, order, tbl->mlbm.level[order].numBits ); + return; + } + + + if ( test_tce_range( tbl, tcenum, order ) ) { + PPCDBG(PPCDBG_TCE, + "free_tce_range: freeing range not allocated.\n"); + PPCDBG(PPCDBG_TCE, + "\tTceTable %p, tcenum %lx, order %x\n", + tbl, tcenum, order ); + } +#endif + + map = tbl->mlbm.level[order].map; + byte = block / 8; + bit = block % 8; + mask = 0x80 >> bit; + bytep = map + byte; + +#ifdef DEBUG_TCE + PPCDBG(PPCDBG_TCE, + "free_tce_range_nolock: freeing block %ld (byte=%d, bit=%d) of order %d\n", + block, byte, bit, order); + if ( *bytep & mask ) + PPCDBG(PPCDBG_TCE, + "free_tce_range: already free: TceTable %p, tcenum %lx, order %x\n", + tbl, tcenum, order ); +#endif + + *bytep |= mask; + + /* If there is a higher level in the bit map than this we may be + * able to buddy up this block with its partner. + * If this is the highest level we can't buddy up + * If this level has an odd number of bits and + * we are freeing the last block we can't buddy up + * Don't buddy up if it's in the first 1/4 of the level + */ + if (( block > (tbl->mlbm.level[order].numBits/4) ) && + (( block < tbl->mlbm.level[order].numBits-1 ) || + ( 0 == ( tbl->mlbm.level[order].numBits & 1)))) { + /* See if we can buddy up the block we just freed */ + bit &= 6; /* get to the first of the buddy bits */ + mask = 0xc0 >> bit; /* build two bit mask */ + b = *bytep & mask; /* Get the two bits */ + if ( 0 == (b ^ mask) ) { /* If both bits are on */ + /* both of the buddy blocks are free we can combine them */ + *bytep ^= mask; /* turn off the two bits */ + block = ( byte * 8 ) + bit; /* block of first of buddies */ + tcenum = block << order; + /* free the buddied block */ + PPCDBG(PPCDBG_TCE, + "free_tce_range: buddying blocks %ld & %ld\n", + block, block+1); + free_tce_range_nolock( tbl, tcenum, order+1 ); + } + } +} + +static long test_tce_range( struct TceTable *tbl, long tcenum, unsigned order ) +{ + unsigned long block; + unsigned byte, bit, mask, b; + long retval, retLeft, retRight; + unsigned char * map; + + map = tbl->mlbm.level[order].map; + block = tcenum >> order; + byte = block / 8; /* Byte within bitmap */ + bit = block % 8; /* Bit within byte */ + mask = 0x80 >> bit; + b = (*(map+byte) & mask ); /* 0 if block is allocated, else free */ + if ( b ) + retval = 1; /* 1 == block is free */ + else + retval = 0; /* 0 == block is allocated */ + /* Test bits at all levels below this to ensure that all agree */ + + if (order) { + retLeft = test_tce_range( tbl, tcenum, order-1 ); + retRight = test_tce_range( tbl, tcenum+(1<<(order-1)), order-1 ); + if ( retLeft || retRight ) { + retval = 2; + } + } + + /* Test bits at all levels above this to ensure that all agree */ + + return retval; +} + +static inline dma_addr_t get_tces( struct TceTable *tbl, unsigned order, void *page, unsigned numPages, int direction ) +{ + long tcenum; + unsigned long uaddr; + unsigned i; + dma_addr_t retTce = NO_TCE; + + uaddr = (unsigned long)page & PAGE_MASK; + + /* Allocate a range of tces */ + tcenum = alloc_tce_range( tbl, order ); + if ( tcenum != -1 ) { + /* We got the tces we wanted */ + tcenum += tbl->startOffset; /* Offset into real TCE table */ + retTce = tcenum << PAGE_SHIFT; /* Set the return dma address */ + /* Setup a tce for each page */ + for (i=0; isize * (PAGE_SIZE / sizeof(union Tce))) - 1; + + tcenum = dma_addr >> PAGE_SHIFT; + + freeTce = tcenum - tbl->startOffset; + + if ( freeTce > maxTcenum ) { + PPCDBG(PPCDBG_TCE, "free_tces: tcenum > maxTcenum\n"); + PPCDBG(PPCDBG_TCE, "\ttcenum = 0x%lx\n", tcenum); + PPCDBG(PPCDBG_TCE, "\tmaxTcenum = 0x%lx\n", maxTcenum); + PPCDBG(PPCDBG_TCE, "\tTCE Table = 0x%lx\n", (u64)tbl); + PPCDBG(PPCDBG_TCE, "\tbus# = 0x%lx\n", (u64)tbl->busNumber ); + PPCDBG(PPCDBG_TCE, "\tsize = 0x%lx\n", (u64)tbl->size); + PPCDBG(PPCDBG_TCE, "\tstartOff = 0x%lx\n", (u64)tbl->startOffset ); + PPCDBG(PPCDBG_TCE, "\tindex = 0x%lx\n", (u64)tbl->index); + return; + } + + for (i=0; iindex, + (u64)tcenum, + tce.wholeTce ); + + if ( setTceRc ) { + printk("PCI: tce_free failed 0x%lx tcenum: 0x%lx\n", setTceRc, (u64)tcenum); + //PPCDBG(PPCDBG_TCE, "tce_free: setTce failed\n"); + //PPCDBG(PPCDBG_TCE, "\trc = 0x%lx\n", setTceRc); + //PPCDBG(PPCDBG_TCE, "\tindex = 0x%lx\n", (u64)tbl->index); + //PPCDBG(PPCDBG_TCE, "\ttce num = 0x%lx\n", (u64)tcenum); + //PPCDBG(PPCDBG_TCE, "\ttce val = 0x%lx\n", tce.wholeTce ); + } + + ++tcenum; + } + + free_tce_range( tbl, freeTce, order ); +} + +static void tce_free_pSeries(struct TceTable *tbl, dma_addr_t dma_addr, + unsigned order, unsigned numPages) +{ + long tcenum, freeTce, maxTcenum; + unsigned i; + union Tce tce; + union Tce *tce_addr; + + maxTcenum = (tbl->size * (PAGE_SIZE / sizeof(union Tce))) - 1; + + tcenum = dma_addr >> PAGE_SHIFT; + // tcenum -= tbl->startOffset; + + freeTce = tcenum - tbl->startOffset; + + if ( freeTce > maxTcenum ) { + PPCDBG(PPCDBG_TCE, "free_tces: tcenum > maxTcenum\n"); + PPCDBG(PPCDBG_TCE, "\ttcenum = 0x%lx\n", tcenum); + PPCDBG(PPCDBG_TCE, "\tmaxTcenum = 0x%lx\n", maxTcenum); + PPCDBG(PPCDBG_TCE, "\tTCE Table = 0x%lx\n", (u64)tbl); + PPCDBG(PPCDBG_TCE, "\tbus# = 0x%lx\n", + (u64)tbl->busNumber ); + PPCDBG(PPCDBG_TCE, "\tsize = 0x%lx\n", (u64)tbl->size); + PPCDBG(PPCDBG_TCE, "\tstartOff = 0x%lx\n", + (u64)tbl->startOffset ); + PPCDBG(PPCDBG_TCE, "\tindex = 0x%lx\n", (u64)tbl->index); + return; + } + + for (i=0; ibase) + tcenum; + *tce_addr = (union Tce)tce.wholeTce; + + ++tcenum; + } + + /* Make sure the update is visible to hardware. */ + __asm__ __volatile__ ("sync" : : : "memory"); + + free_tce_range( tbl, freeTce, order ); +} + +void __init create_virtual_bus_tce_table(void) +{ + struct TceTable *t; + struct TceTableManagerCB virtBusTceTableParms; + u64 absParmsPtr; + + virtBusTceTableParms.busNumber = 255; /* Bus 255 is the virtual bus */ + virtBusTceTableParms.virtualBusFlag = 0xff; /* Ask for virtual bus */ + + absParmsPtr = virt_to_absolute( (u64)&virtBusTceTableParms ); + HvCallXm_getTceTableParms( absParmsPtr ); + + virtBusVethTceTable.size = virtBusTceTableParms.size / 2; + virtBusVethTceTable.busNumber = virtBusTceTableParms.busNumber; + virtBusVethTceTable.startOffset = virtBusTceTableParms.startOffset; + virtBusVethTceTable.index = virtBusTceTableParms.index; + virtBusVethTceTable.tceType = TCE_VB; + + virtBusVioTceTable.size = virtBusTceTableParms.size - virtBusVethTceTable.size; + virtBusVioTceTable.busNumber = virtBusTceTableParms.busNumber; + virtBusVioTceTable.startOffset = virtBusTceTableParms.startOffset + + virtBusVethTceTable.size * (PAGE_SIZE/sizeof(union Tce)); + virtBusVioTceTable.index = virtBusTceTableParms.index; + virtBusVioTceTable.tceType = TCE_VB; + + t = build_tce_table( &virtBusVethTceTable ); + if ( t ) { + tceTables[255] = t; + printk( "Virtual Bus VETH TCE table built successfully.\n"); + printk( " TCE table size = %ld entries\n", + (unsigned long)t->size*(PAGE_SIZE/sizeof(union Tce)) ); + printk( " TCE table token = %d\n", + (unsigned)t->index ); + printk( " TCE table start entry = 0x%lx\n", + (unsigned long)t->startOffset ); + } + else + printk( "Virtual Bus VETH TCE table failed.\n"); + + t = build_tce_table( &virtBusVioTceTable ); + if ( t ) { + printk( "Virtual Bus VIO TCE table built successfully.\n"); + printk( " TCE table size = %ld entries\n", + (unsigned long)t->size*(PAGE_SIZE/sizeof(union Tce)) ); + printk( " TCE table token = %d\n", + (unsigned)t->index ); + printk( " TCE table start entry = 0x%lx\n", + (unsigned long)t->startOffset ); + } + else + printk( "Virtual Bus VIO TCE table failed.\n"); +} + +void create_tce_tables_for_buses(struct list_head *bus_list) +{ + struct pci_controller* phb; + struct device_node *dn, *first_dn; + int num_slots, num_slots_ilog2; + int first_phb = 1; + + for (phb=hose_head;phb;phb=phb->next) { + first_dn = ((struct device_node *)phb->arch_data)->child; + /* Carve 2GB into the largest dma_window_size possible */ + for (dn = first_dn, num_slots = 0; dn != NULL; dn = dn->sibling) + num_slots++; + num_slots_ilog2 = __ilog2(num_slots); + if ((1<dma_window_size = 1 << (22 - num_slots_ilog2); + /* Reserve 16MB of DMA space on the first PHB. + * We should probably be more careful and use firmware props. + * In reality this space is remapped, not lost. But we don't + * want to get that smart to handle it -- too much work. + */ + phb->dma_window_base_cur = first_phb ? (1 << 12) : 0; + first_phb = 0; + for (dn = first_dn, num_slots = 0; dn != NULL; dn = dn->sibling) { + create_pci_bus_tce_table((unsigned long)dn); + } + } +} + +void create_tce_tables_for_busesLP(struct list_head *bus_list) +{ + struct list_head *ln; + struct pci_bus *bus; + struct device_node *busdn; + u32 *dma_window; + for (ln=bus_list->next; ln != bus_list; ln=ln->next) { + bus = pci_bus_b(ln); + busdn = PCI_GET_DN(bus); + /* NOTE: there should never be a window declared on a bus when + * child devices also have a window. If this should ever be + * architected, we probably want children to have priority. + * In reality, the PHB containing ISA has the property, but otherwise + * it is the pci-bridges that have the property. + */ + dma_window = (u32 *)get_property(busdn, "ibm,dma-window", 0); + if (dma_window) { + /* Busno hasn't been copied yet. + * Do it now because getTceTableParmsPSeriesLP needs it. + */ + busdn->busno = bus->number; + create_pci_bus_tce_table((unsigned long)busdn); + } else + create_tce_tables_for_busesLP(&bus->children); + } +} + +void create_tce_tables(void) { + struct pci_dev *dev; + struct device_node *dn, *mydn; + + if (_machine == _MACH_pSeriesLP) + create_tce_tables_for_busesLP(&pci_root_buses); + else + create_tce_tables_for_buses(&pci_root_buses); + + /* Now copy the tce_table ptr from the bus devices down to every + * pci device_node. This means get_tce_table() won't need to search + * up the device tree to find it. + */ + pci_for_each_dev(dev) { + mydn = dn = PCI_GET_DN(dev); + while (dn && dn->tce_table == NULL) + dn = dn->parent; + if (dn) { + mydn->tce_table = dn->tce_table; + } + } +} + +/* + * iSeries token = busNumber + * pSeries token = pci_controller* + */ +void create_pci_bus_tce_table( unsigned long token ) { + struct TceTable * builtTceTable; + struct TceTable * newTceTable; + struct TceTableManagerCB pciBusTceTableParms; + u64 parmsPtr; + + PPCDBG(PPCDBG_TCE, "Entering create_pci_bus_tce_table.\n"); + PPCDBG(PPCDBG_TCE, "\ttoken = 0x%lx\n", token); + + newTceTable = kmalloc( sizeof(struct TceTable), GFP_KERNEL ); + + if(_machine == _MACH_iSeries) { + if ( token > 254 ) { + printk("PCI: Bus TCE table failed, invalid bus number %lu\n", token ); + return; + } + + pciBusTceTableParms.busNumber = token; + pciBusTceTableParms.virtualBusFlag = 0; + parmsPtr = virt_to_absolute( (u64)&pciBusTceTableParms ); + + /* + * Call HV with the architected data structure to get TCE table + * info. Put the returned data into the Linux representation + * of the TCE table data. + */ + HvCallXm_getTceTableParms( parmsPtr ); + printk("PCI: getTceTableParms: Bus: 0x%lx Size: 0x%lx, Start: 0x%lx, Index: 0x%lx\n", + pciBusTceTableParms.busNumber, + pciBusTceTableParms.size, + pciBusTceTableParms.startOffset, + pciBusTceTableParms.index); + + /* Determine if the table identified by the index and startOffset */ + /* returned by the hypervisor for this bus has already been created. */ + /* If so, set the tceTable entry to point to the linux shared tceTable.*/ + int BusIndex; + for ( BusIndex=0; BusIndex<255; ++BusIndex) { + if (tceTables[BusIndex] != NULL) { + struct TceTable* CmprTceTable = tceTables[BusIndex]; + if ( ( CmprTceTable->index == pciBusTceTableParms.index ) && + ( CmprTceTable->startOffset == pciBusTceTableParms.startOffset ) ) { + tceTables[token] = CmprTceTable; + printk("PCI: Bus %lu Shares a TCE table with bus %d\n",token,BusIndex); + break; + } + } + } + /* No shared table, build a new table for this bus. */ + if (tceTables[token] == NULL) { + newTceTable->size = pciBusTceTableParms.size; + newTceTable->busNumber = pciBusTceTableParms.busNumber; + newTceTable->startOffset = pciBusTceTableParms.startOffset; + newTceTable->index = pciBusTceTableParms.index; + + builtTceTable = build_tce_table( newTceTable ); + builtTceTable->tceType = TCE_PCI; + tceTables[token] = builtTceTable; + } + else { + /* We're using the shared table, not this new one. */ + kfree(newTceTable); + } + + printk("PCI: Pci bus %lu TceTable: %p\n",token,tceTables[token]); + return; + } else { + struct device_node *dn; + struct pci_controller *phb; + + dn = (struct device_node *)token; + phb = dn->phb; + if (_machine == _MACH_pSeries) + getTceTableParmsPSeries(phb, dn, newTceTable); + else + getTceTableParmsPSeriesLP(phb, dn, newTceTable); + builtTceTable = build_tce_table( newTceTable ); + dn->tce_table = builtTceTable; + } + + if(builtTceTable == NULL ) { + kfree( newTceTable ); + PPCDBG(PPCDBG_TCE, "PCI Bus TCE table failed.\n"); + return; + } +} + +static void getTceTableParmsPSeries(struct pci_controller *phb, + struct device_node *dn, + struct TceTable *newTceTable ) { + phandle node; + unsigned long i; + + node = ((struct device_node *)(phb->arch_data))->node; + + PPCDBG(PPCDBG_TCEINIT, "getTceTableParms: start\n"); + PPCDBG(PPCDBG_TCEINIT, "\tof_tce_table = 0x%lx\n", of_tce_table); + PPCDBG(PPCDBG_TCEINIT, "\tphb = 0x%lx\n", phb); + PPCDBG(PPCDBG_TCEINIT, "\tdn = 0x%lx\n", dn); + PPCDBG(PPCDBG_TCEINIT, "\tdn->name = %s\n", dn->name); + PPCDBG(PPCDBG_TCEINIT, "\tdn->full_name= %s\n", dn->full_name); + PPCDBG(PPCDBG_TCEINIT, "\tnewTceTable = 0x%lx\n", newTceTable); + PPCDBG(PPCDBG_TCEINIT, "\tdma_window_size = 0x%lx\n", phb->dma_window_size); + + i = 0; + while(of_tce_table[i].node) { + PPCDBG(PPCDBG_TCEINIT, "\tof_tce_table[%d].node = 0x%lx\n", + i, of_tce_table[i].node); + PPCDBG(PPCDBG_TCEINIT, "\tof_tce_table[%d].base = 0x%lx\n", + i, of_tce_table[i].base); + PPCDBG(PPCDBG_TCEINIT, "\tof_tce_table[%d].size = 0x%lx\n", + i, of_tce_table[i].size >> PAGE_SHIFT); + PPCDBG(PPCDBG_TCEINIT, "\tphb->arch_data->node = 0x%lx\n", + node); + + if(of_tce_table[i].node == node) { + memset((void *)of_tce_table[i].base, + 0, of_tce_table[i].size); + newTceTable->busNumber = phb->bus->number; + + /* Units of tce entries. */ + newTceTable->startOffset = phb->dma_window_base_cur; + + /* Adjust the current table offset to the next */ + /* region. Measured in TCE entries. Force an */ + /* alignment to the size alloted per IOA. This */ + /* makes it easier to remove the 1st 16MB. */ + phb->dma_window_base_cur += (phb->dma_window_size>>3); + phb->dma_window_base_cur &= + ~((phb->dma_window_size>>3)-1); + + /* Set the tce table size - measured in units */ + /* of pages of tce table. */ + newTceTable->size = ((phb->dma_window_base_cur - + newTceTable->startOffset) << 3) + >> PAGE_SHIFT; + + /* Test if we are going over 2GB of DMA space. */ + if(phb->dma_window_base_cur > (1 << 19)) { + udbg_printf("Unexpected number of IOAs under this PHB"); + panic("Unexpected number of IOAs under this PHB"); + } + + newTceTable->base = of_tce_table[i].base; + newTceTable->index = 0; + + PPCDBG(PPCDBG_TCEINIT, + "\tnewTceTable->base = 0x%lx\n", + newTceTable->base); + PPCDBG(PPCDBG_TCEINIT, + "\tnewTceTable->startOffset = 0x%lx" + "(# tce entries)\n", + newTceTable->startOffset); + PPCDBG(PPCDBG_TCEINIT, + "\tnewTceTable->size = 0x%lx" + "(# pages of tce table)\n", + newTceTable->size); + } + i++; + } +} + +/* + * getTceTableParmsPSeriesLP + * + * Function: On pSeries LPAR systems, return TCE table info, given a pci bus. + * + * ToDo: properly interpret the ibm,dma-window property. The definition is: + * logical-bus-number (1 word) + * phys-address (#address-cells words) + * size (#cell-size words) + * + * Currently we hard code these sizes (more or less). + */ +static void getTceTableParmsPSeriesLP(struct pci_controller *phb, + struct device_node *dn, + struct TceTable *newTceTable ) { + u32 *dma_window = (u32 *)get_property(dn, "ibm,dma-window", 0); + if (!dma_window) { + panic("getTceTableParmsPSeriesLP: device %s has no ibm,dma-window property!\n", dn->full_name); + } + + newTceTable->busNumber = dn->busno; + newTceTable->size = (((((unsigned long)dma_window[4] << 32) | (unsigned long)dma_window[5]) >> PAGE_SHIFT) << 3) >> PAGE_SHIFT; + newTceTable->startOffset = ((((unsigned long)dma_window[2] << 32) | (unsigned long)dma_window[3]) >> 12); + newTceTable->base = 0; + newTceTable->index = dma_window[0]; + PPCDBG(PPCDBG_TCEINIT, "getTceTableParmsPSeriesLP for bus 0x%lx:\n", dn->busno); + PPCDBG(PPCDBG_TCEINIT, "\tDevice = %s\n", dn->full_name); + PPCDBG(PPCDBG_TCEINIT, "\tnewTceTable->index = 0x%lx\n", newTceTable->index); + PPCDBG(PPCDBG_TCEINIT, "\tnewTceTable->startOffset = 0x%lx\n", newTceTable->startOffset); + PPCDBG(PPCDBG_TCEINIT, "\tnewTceTable->size = 0x%lx\n", newTceTable->size); +} + +/* Allocates a contiguous real buffer and creates TCEs over it. + * Returns the virtual address of the buffer and sets dma_handle + * to the dma address (tce) of the first page. + */ +void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, + dma_addr_t *dma_handle) +{ + struct TceTable * tbl; + void *ret = NULL; + unsigned order, nPages; + dma_addr_t tce; + + PPCDBG(PPCDBG_TCE, "pci_alloc_consistent:\n"); + PPCDBG(PPCDBG_TCE, "\thwdev = 0x%16.16lx\n", hwdev); + PPCDBG(PPCDBG_TCE, "\tsize = 0x%16.16lx\n", size); + PPCDBG(PPCDBG_TCE, "\tdma_handle = 0x%16.16lx\n", dma_handle); + + size = PAGE_ALIGN(size); + order = get_order(size); + nPages = 1 << order; + + tbl = get_tce_table(hwdev); + + if ( tbl ) { + /* Alloc enough pages (and possibly more) */ + ret = (void *)__get_free_pages( GFP_ATOMIC, order ); + if ( ret ) { + /* Page allocation succeeded */ + memset(ret, 0, nPages << PAGE_SHIFT); + /* Set up tces to cover the allocated range */ + tce = get_tces( tbl, order, ret, nPages, PCI_DMA_BIDIRECTIONAL ); + if ( tce == NO_TCE ) { + PPCDBG(PPCDBG_TCE, "pci_alloc_consistent: get_tces failed\n" ); + free_pages( (unsigned long)ret, order ); + ret = NULL; + } + else + { + *dma_handle = tce; + } + } + else PPCDBG(PPCDBG_TCE, "pci_alloc_consistent: __get_free_pages failed for order = %d\n", order); + } + else PPCDBG(PPCDBG_TCE, "pci_alloc_consistent: get_tce_table failed for 0x%016lx\n", hwdev); + + PPCDBG(PPCDBG_TCE, "\tpci_alloc_consistent: dma_handle = 0x%16.16lx\n", *dma_handle); + PPCDBG(PPCDBG_TCE, "\tpci_alloc_consistent: return = 0x%16.16lx\n", ret); + return ret; +} + +void pci_free_consistent(struct pci_dev *hwdev, size_t size, + void *vaddr, dma_addr_t dma_handle) +{ + struct TceTable * tbl; + unsigned order, nPages; + + PPCDBG(PPCDBG_TCE, "pci_free_consistent:\n"); + PPCDBG(PPCDBG_TCE, "\thwdev = 0x%16.16lx, size = 0x%16.16lx, dma_handle = 0x%16.16lx, vaddr = 0x%16.16lx\n", hwdev, size, dma_handle, vaddr); + + size = PAGE_ALIGN(size); + order = get_order(size); + nPages = 1 << order; + + if ( order > 10 ) + PPCDBG(PPCDBG_TCE, "pci_free_consistent: order=%d, size=%d, nPages=%d, dma_handle=%016lx, vaddr=%016lx\n", + order, size, nPages, (unsigned long)dma_handle, (unsigned long)vaddr ); + + tbl = get_tce_table(hwdev); + + if ( tbl ) { + ppc_md.tce_free(tbl, dma_handle, order, nPages); + free_pages( (unsigned long)vaddr, order ); + } +} + +/* Creates TCEs for a user provided buffer. The user buffer must be + * contiguous real kernel storage (not vmalloc). The address of the buffer + * passed here is the kernel (virtual) address of the buffer. The buffer + * need not be page aligned, the dma_addr_t returned will point to the same + * byte within the page as vaddr. + */ +dma_addr_t pci_map_single(struct pci_dev *hwdev, void *vaddr, + size_t size, int direction ) +{ + struct TceTable * tbl; + dma_addr_t dma_handle = NO_TCE; + unsigned long uaddr; + unsigned order, nPages; + + PPCDBG(PPCDBG_TCE, "pci_map_single:\n"); + PPCDBG(PPCDBG_TCE, "\thwdev = 0x%16.16lx, size = 0x%16.16lx, direction = 0x%16.16lx, vaddr = 0x%16.16lx\n", hwdev, size, direction, vaddr); + if ( direction == PCI_DMA_NONE ) + BUG(); + + uaddr = (unsigned long)vaddr; + nPages = PAGE_ALIGN( uaddr + size ) - ( uaddr & PAGE_MASK ); + order = get_order( nPages & PAGE_MASK ); + nPages >>= PAGE_SHIFT; + + tbl = get_tce_table(hwdev); + + if ( tbl ) { + dma_handle = get_tces( tbl, order, vaddr, nPages, direction ); + dma_handle |= ( uaddr & ~PAGE_MASK ); + } + + return dma_handle; +} + +void pci_unmap_single( struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction ) +{ + struct TceTable * tbl; + unsigned order, nPages; + + PPCDBG(PPCDBG_TCE, "pci_unmap_single:\n"); + PPCDBG(PPCDBG_TCE, "\thwdev = 0x%16.16lx, size = 0x%16.16lx, direction = 0x%16.16lx, dma_handle = 0x%16.16lx\n", hwdev, size, direction, dma_handle); + if ( direction == PCI_DMA_NONE ) + BUG(); + + nPages = PAGE_ALIGN( dma_handle + size ) - ( dma_handle & PAGE_MASK ); + order = get_order( nPages & PAGE_MASK ); + nPages >>= PAGE_SHIFT; + + if ( order > 10 ) + PPCDBG(PPCDBG_TCE, "pci_unmap_single: order=%d, size=%d, nPages=%d, dma_handle=%016lx\n", + order, size, nPages, (unsigned long)dma_handle ); + + tbl = get_tce_table(hwdev); + + if ( tbl ) + ppc_md.tce_free(tbl, dma_handle, order, nPages); + +} + +/* Figure out how many TCEs are actually going to be required + * to map this scatterlist. This code is not optimal. It + * takes into account the case where entry n ends in the same + * page in which entry n+1 starts. It does not handle the + * general case of entry n ending in the same page in which + * entry m starts. + */ +static unsigned long num_tces_sg( struct scatterlist *sg, int nents ) +{ + unsigned long nTces, numPages, startPage, endPage, prevEndPage; + unsigned i; + + prevEndPage = 0; + nTces = 0; + + for (i=0; iaddress >> PAGE_SHIFT; + endPage = ((unsigned long)sg->address + sg->length - 1) >> PAGE_SHIFT; + numPages = endPage - startPage + 1; + /* Simple optimization: if the previous entry ended + * in the same page in which this entry starts + * then we can reduce the required pages by one. + * This matches assumptions in fill_scatterlist_sg and + * create_tces_sg + */ + if ( startPage == prevEndPage ) + --numPages; + nTces += numPages; + prevEndPage = endPage; + sg++; + } + return nTces; +} + +/* Fill in the dma data in the scatterlist + * return the number of dma sg entries created + */ +static unsigned fill_scatterlist_sg( struct scatterlist *sg, int nents, + dma_addr_t dma_addr , unsigned long numTces) +{ + struct scatterlist *dma_sg; + u32 cur_start_dma; + unsigned long cur_len_dma, cur_end_virt, uaddr; + unsigned num_dma_ents; + + dma_sg = sg; + num_dma_ents = 1; + + /* Process the first sg entry */ + cur_start_dma = dma_addr + ((unsigned long)sg->address & (~PAGE_MASK)); + cur_len_dma = sg->length; + /* cur_end_virt holds the address of the byte immediately after the + * end of the current buffer. + */ + cur_end_virt = (unsigned long)sg->address + cur_len_dma; + /* Later code assumes that unused sg->dma_address and sg->dma_length + * fields will be zero. Other archs seem to assume that the user + * (device driver) guarantees that...I don't want to depend on that + */ + sg->dma_address = sg->dma_length = 0; + + /* Process the rest of the sg entries */ + while (--nents) { + ++sg; + /* Clear possibly unused fields. Note: sg >= dma_sg so + * this can't be clearing a field we've already set + */ + sg->dma_address = sg->dma_length = 0; + + /* Check if it is possible to make this next entry + * contiguous (in dma space) with the previous entry. + */ + + /* The entries can be contiguous in dma space if + * the previous entry ends immediately before the + * start of the current entry (in virtual space) + * or if the previous entry ends at a page boundary + * and the current entry starts at a page boundary. + */ + uaddr = (unsigned long)sg->address; + if ( ( uaddr != cur_end_virt ) && + ( ( ( uaddr | cur_end_virt ) & (~PAGE_MASK) ) || + ( ( uaddr & PAGE_MASK ) == ( ( cur_end_virt-1 ) & PAGE_MASK ) ) ) ) { + /* This entry can not be contiguous in dma space. + * save the previous dma entry and start a new one + */ + dma_sg->dma_address = cur_start_dma; + dma_sg->dma_length = cur_len_dma; + + ++dma_sg; + ++num_dma_ents; + + cur_start_dma += cur_len_dma-1; + /* If the previous entry ends and this entry starts + * in the same page then they share a tce. In that + * case don't bump cur_start_dma to the next page + * in dma space. This matches assumptions made in + * num_tces_sg and create_tces_sg. + */ + if ((uaddr & PAGE_MASK) == ((cur_end_virt-1) & PAGE_MASK)) + cur_start_dma &= PAGE_MASK; + else + cur_start_dma = PAGE_ALIGN(cur_start_dma+1); + cur_start_dma += ( uaddr & (~PAGE_MASK) ); + cur_len_dma = 0; + } + /* Accumulate the length of this entry for the next + * dma entry + */ + cur_len_dma += sg->length; + cur_end_virt = uaddr + sg->length; + } + /* Fill in the last dma entry */ + dma_sg->dma_address = cur_start_dma; + dma_sg->dma_length = cur_len_dma; + + if ((((cur_start_dma +cur_len_dma - 1)>> PAGE_SHIFT) - (dma_addr >> PAGE_SHIFT) + 1) != numTces) + { + PPCDBG(PPCDBG_TCE, "fill_scatterlist_sg: numTces %ld, used tces %d\n", + numTces, + (unsigned)(((cur_start_dma + cur_len_dma - 1) >> PAGE_SHIFT) - (dma_addr >> PAGE_SHIFT) + 1)); + } + + + return num_dma_ents; +} + +/* Call the hypervisor to create the TCE entries. + * return the number of TCEs created + */ +static dma_addr_t create_tces_sg( struct TceTable *tbl, struct scatterlist *sg, + int nents, unsigned numTces, int direction ) +{ + unsigned order, i, j; + unsigned long startPage, endPage, prevEndPage, numPages, uaddr; + long tcenum, starttcenum; + dma_addr_t dmaAddr; + + dmaAddr = NO_TCE; + + order = get_order( numTces << PAGE_SHIFT ); + /* allocate a block of tces */ + tcenum = alloc_tce_range( tbl, order ); + if ( tcenum != -1 ) { + tcenum += tbl->startOffset; + starttcenum = tcenum; + dmaAddr = tcenum << PAGE_SHIFT; + prevEndPage = 0; + for (j=0; jaddress >> PAGE_SHIFT; + endPage = ((unsigned long)sg->address + sg->length - 1) >> PAGE_SHIFT; + numPages = endPage - startPage + 1; + + uaddr = (unsigned long)sg->address; + + /* If the previous entry ended in the same page that + * the current page starts then they share that + * tce and we reduce the number of tces we need + * by one. This matches assumptions made in + * num_tces_sg and fill_scatterlist_sg + */ + if ( startPage == prevEndPage ) { + --numPages; + uaddr += PAGE_SIZE; + } + + for (i=0; idma_address = pci_map_single( hwdev, sg->address, + sg->length, direction ); + sg->dma_length = sg->length; + return 1; + } + + if ( direction == PCI_DMA_NONE ) + BUG(); + + tbl = get_tce_table(hwdev); + + if ( tbl ) { + /* Compute the number of tces required */ + numTces = num_tces_sg( sg, nents ); + /* Create the tces and get the dma address */ + dma_handle = create_tces_sg( tbl, sg, nents, numTces, direction ); + + /* Fill in the dma scatterlist */ + num_dma = fill_scatterlist_sg( sg, nents, dma_handle, numTces ); + } + + return num_dma; +} + +void pci_unmap_sg( struct pci_dev *hwdev, struct scatterlist *sg, int nelms, int direction ) +{ + struct TceTable * tbl; + unsigned order, numTces, i; + dma_addr_t dma_end_page, dma_start_page; + + PPCDBG(PPCDBG_TCE, "pci_unmap_sg:\n"); + PPCDBG(PPCDBG_TCE, "\thwdev = 0x%16.16lx, sg = 0x%16.16lx, direction = 0x%16.16lx, nelms = 0x%16.16lx\n", hwdev, sg, direction, nelms); + + if ( direction == PCI_DMA_NONE ) + BUG(); + + dma_start_page = sg->dma_address & PAGE_MASK; + for ( i=nelms; i>0; --i ) { + unsigned k = i - 1; + if ( sg[k].dma_length ) { + dma_end_page = ( sg[k].dma_address + + sg[k].dma_length - 1 ) & PAGE_MASK; + break; + } + } + + numTces = ((dma_end_page - dma_start_page ) >> PAGE_SHIFT) + 1; + order = get_order( numTces << PAGE_SHIFT ); + + if ( order > 10 ) + PPCDBG(PPCDBG_TCE, "pci_unmap_sg: order=%d, numTces=%d, nelms=%d, dma_start_page=%016lx, dma_end_page=%016lx\n", + order, numTces, nelms, (unsigned long)dma_start_page, (unsigned long)dma_end_page ); + + tbl = get_tce_table(hwdev); + + if ( tbl ) + ppc_md.tce_free( tbl, dma_start_page, order, numTces ); + +} + +/* + * phb_tce_table_init + * + * Function: Display TCE config registers. Could be easily changed + * to initialize the hardware to use TCEs. + */ +unsigned long phb_tce_table_init(struct pci_controller *phb) { + unsigned int r, cfg_rw, i; + unsigned long r64; + phandle node; + + PPCDBG(PPCDBG_TCE, "phb_tce_table_init: start.\n"); + + node = ((struct device_node *)(phb->arch_data))->node; + + PPCDBG(PPCDBG_TCEINIT, "\tphb = 0x%lx\n", phb); + PPCDBG(PPCDBG_TCEINIT, "\tphb->type = 0x%lx\n", phb->type); + PPCDBG(PPCDBG_TCEINIT, "\tphb->phb_regs = 0x%lx\n", phb->phb_regs); + PPCDBG(PPCDBG_TCEINIT, "\tphb->chip_regs = 0x%lx\n", phb->chip_regs); + PPCDBG(PPCDBG_TCEINIT, "\tphb: node = 0x%lx\n", node); + PPCDBG(PPCDBG_TCEINIT, "\tphb->arch_data = 0x%lx\n", phb->arch_data); + + i = 0; + while(of_tce_table[i].node) { + if(of_tce_table[i].node == node) { + if(phb->type == phb_type_python) { + r = *(((unsigned int *)phb->phb_regs) + (0xf10>>2)); + PPCDBG(PPCDBG_TCEINIT, "\tTAR(low) = 0x%x\n", r); + r = *(((unsigned int *)phb->phb_regs) + (0xf00>>2)); + PPCDBG(PPCDBG_TCEINIT, "\tTAR(high) = 0x%x\n", r); + r = *(((unsigned int *)phb->phb_regs) + (0xfd0>>2)); + PPCDBG(PPCDBG_TCEINIT, "\tPHB cfg(rw) = 0x%x\n", r); + break; + } else if(phb->type == phb_type_speedwagon) { + r64 = *(((unsigned long *)phb->chip_regs) + + (0x800>>3)); + PPCDBG(PPCDBG_TCEINIT, "\tNCFG = 0x%lx\n", r64); + r64 = *(((unsigned long *)phb->chip_regs) + + (0x580>>3)); + PPCDBG(PPCDBG_TCEINIT, "\tTAR0 = 0x%lx\n", r64); + r64 = *(((unsigned long *)phb->chip_regs) + + (0x588>>3)); + PPCDBG(PPCDBG_TCEINIT, "\tTAR1 = 0x%lx\n", r64); + r64 = *(((unsigned long *)phb->chip_regs) + + (0x590>>3)); + PPCDBG(PPCDBG_TCEINIT, "\tTAR2 = 0x%lx\n", r64); + r64 = *(((unsigned long *)phb->chip_regs) + + (0x598>>3)); + PPCDBG(PPCDBG_TCEINIT, "\tTAR3 = 0x%lx\n", r64); + cfg_rw = *(((unsigned int *)phb->chip_regs) + + ((0x160 + + (((phb->local_number)+8)<<12))>>2)); + PPCDBG(PPCDBG_TCEINIT, "\tcfg_rw = 0x%x\n", cfg_rw); + } + } + i++; + } + + PPCDBG(PPCDBG_TCEINIT, "phb_tce_table_init: done\n"); + + return(0); +} + +/* These are called very early. */ +void tce_init_pSeries(void) +{ + ppc_md.tce_build = tce_build_pSeries; + ppc_md.tce_free = tce_free_pSeries; +} + +void tce_init_iSeries(void) +{ + ppc_md.tce_build = tce_build_iSeries; + ppc_md.tce_free = tce_free_iSeries; +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/pci_dn.c linuxppc64_2_4/arch/ppc64/kernel/pci_dn.c --- ../kernel.org/linux/arch/ppc64/kernel/pci_dn.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/pci_dn.c Fri Oct 26 14:09:56 2001 @@ -0,0 +1,386 @@ +/* + * pci_dn.c + * + * Copyright (C) 2001 Todd Inglett, IBM Corporation + * + * PCI manipulation via device_nodes. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pci.h" + +/* Traverse_func that inits the PCI fields of the device node. + * NOTE: this *must* be done before read/write config to the device. + */ +static void * __init +update_dn_pci_info(struct device_node *dn, void *data) +{ + struct pci_controller *phb = (struct pci_controller *)data; + u32 *regs; + char *device_type = get_property(dn, "device_type", 0); + + dn->phb = phb; + if (device_type && strcmp(device_type, "pci") == 0 && get_property(dn, "class-code", 0) == 0) { + /* special case for PHB's. Sigh. */ + regs = (u32 *)get_property(dn, "bus-range", 0); + dn->busno = regs[0]; + dn->devfn = 0; /* assumption */ + } else { + regs = (u32 *)get_property(dn, "reg", 0); + if (regs) { + /* First register entry is addr (00BBSS00) */ + dn->busno = (regs[0] >> 16) & 0xff; + dn->devfn = (regs[0] >> 8) & 0xff; + } + } + return NULL; +} + +/* + * Hit all the BARs of all the devices with values from OF. + * This is unnecessary on most systems, but also harmless. + */ +static void * __init +write_OF_bars(struct device_node *dn, void *data) +{ + int i; + u32 oldbar, newbar, newbartest; + u8 config_offset; + char *name = get_property(dn, "name", 0); + char *device_type = get_property(dn, "device_type", 0); + char devname[128]; + sprintf(devname, "%04x:%02x.%x %s (%s)", dn->busno, PCI_SLOT(dn->devfn), PCI_FUNC(dn->devfn), name ? name : "", device_type ? device_type : ""); + + if (device_type && strcmp(device_type, "pci") == 0 && + get_property(dn, "class-code", 0) == 0) + return NULL; /* This is probably a phb. Skip it. */ + + if (dn->n_addrs == 0) + return NULL; /* This is normal for some adapters or bridges */ + + if (dn->addrs == NULL) { + /* This shouldn't happen. */ + printk(KERN_WARNING "write_OF_bars %s: device has %d BARs, but no addrs recorded\n", devname, dn->n_addrs); + return NULL; + } + +#ifndef CONFIG_PPC_ISERIES + for (i = 0; i < dn->n_addrs; i++) { + newbar = dn->addrs[i].address; + config_offset = dn->addrs[i].space & 0xff; + if (ppc_md.pcibios_read_config_dword(dn, config_offset, &oldbar) != PCIBIOS_SUCCESSFUL) { + printk(KERN_WARNING "write_OF_bars %s: read BAR%d failed\n", devname, i); + continue; + } + /* Need to update this BAR. */ + if (ppc_md.pcibios_write_config_dword(dn, config_offset, newbar) != PCIBIOS_SUCCESSFUL) { + printk(KERN_WARNING "write_OF_bars %s: write BAR%d with 0x%08x failed (old was 0x%08x)\n", devname, i, newbar, oldbar); + continue; + } + /* sanity check */ + if (ppc_md.pcibios_read_config_dword(dn, config_offset, &newbartest) != PCIBIOS_SUCCESSFUL) { + printk(KERN_WARNING "write_OF_bars %s: sanity test read BAR%d failed?\n", devname, i); + continue; + } + if ((newbar & PCI_BASE_ADDRESS_MEM_MASK) != (newbartest & PCI_BASE_ADDRESS_MEM_MASK)) { + printk(KERN_WARNING "write_OF_bars %s: oops...BAR%d read back as 0x%08x%s!\n", devname, i, newbartest, (oldbar & PCI_BASE_ADDRESS_MEM_MASK) == (newbartest & PCI_BASE_ADDRESS_MEM_MASK) ? " (original value)" : ""); + continue; + } + } +#endif + return NULL; +} + +#if 0 +/* Traverse_func that starts the BIST (self test) */ +static void * __init +startBIST(struct device_node *dn, void *data) +{ + struct pci_controller *phb = (struct pci_controller *)data; + u8 bist; + + char *name = get_property(dn, "name", 0); + udbg_printf("startBIST: %s phb=%p, device=%p\n", name ? name : "", phb, dn); + + if (ppc_md.pcibios_read_config_byte(dn, PCI_BIST, &bist) == PCIBIOS_SUCCESSFUL) { + if (bist & PCI_BIST_CAPABLE) { + udbg_printf(" -> is BIST capable!\n", phb, dn); + /* Start bist here */ + } + } + return NULL; +} +#endif + + +/****************************************************************** + * Traverse a device tree stopping each PCI device in the tree. + * This is done depth first. As each node is processed, a "pre" + * function is called, the children are processed recursively, and + * then a "post" function is called. + * + * The "pre" and "post" funcs return a value. If non-zero + * is returned from the "pre" func, the traversal stops and this + * value is returned. The return value from "post" is not used. + * This return value is useful when using traverse as + * a method of finding a device. + * + * NOTE: we do not run the funcs for devices that do not appear to + * be PCI except for the start node which we assume (this is good + * because the start node is often a phb which may be missing PCI + * properties). + * We use the class-code as an indicator. If we run into + * one of these nodes we also assume its siblings are non-pci for + * performance. + * + ******************************************************************/ +void *traverse_pci_devices(struct device_node *start, traverse_func pre, traverse_func post, void *data) +{ + struct device_node *dn, *nextdn; + void *ret; + + if (pre && (ret = pre(start, data)) != NULL) + return ret; + for (dn = start->child; dn; dn = nextdn) { + nextdn = NULL; + if (get_property(dn, "class-code", 0)) { + if (pre && (ret = pre(dn, data)) != NULL) + return ret; + if (dn->child) { + /* Depth first...do children */ + nextdn = dn->child; + } else if (dn->sibling) { + /* ok, try next sibling instead. */ + nextdn = dn->sibling; + } else { + /* no more children or siblings...call "post" */ + if (post) + post(dn, data); + } + } + if (!nextdn) { + /* Walk up to next valid sibling. */ + do { + dn = dn->parent; + if (dn == start) + return NULL; + } while (dn->sibling == NULL); + nextdn = dn->sibling; + } + } + return NULL; +} + +/* Same as traverse_pci_devices except this does it for all phbs. + */ +void *traverse_all_pci_devices(traverse_func pre) +{ + struct pci_controller* phb; + void *ret; + for (phb=hose_head;phb;phb=phb->next) + if ((ret = traverse_pci_devices((struct device_node *)phb->arch_data, pre, NULL, phb)) != NULL) + return ret; + return NULL; +} + + +/* Traversal func that looks for a value. + * If found, the device_node is returned (thus terminating the traversal). + */ +static void * +is_devfn_node(struct device_node *dn, void *data) +{ + int busno = ((unsigned long)data >> 8) & 0xff; + int devfn = ((unsigned long)data) & 0xff; + return (devfn == dn->devfn && busno == dn->busno) ? dn : NULL; +} + +/* Same as is_devfn_node except ignore the "fn" part of the "devfn". + */ +static void * +is_devfn_sub_node(struct device_node *dn, void *data) +{ + int busno = ((unsigned long)data >> 8) & 0xff; + int devfn = ((unsigned long)data) & 0xf8; + return (devfn == (dn->devfn & 0xf8) && busno == dn->busno) ? dn : NULL; +} + +/* Given an existing EADs (pci bridge) device node create a fake one + * that will simulate function zero. Make it a sibling of other_eads. + */ +static struct device_node * +create_eads_node(struct device_node *other_eads) +{ + struct device_node *eads = (struct device_node *)kmalloc(sizeof(struct device_node), GFP_KERNEL); + + if (!eads) return NULL; /* huh? */ + *eads = *other_eads; + eads->devfn &= ~7; /* make it function zero */ + eads->tce_table = NULL; + /* NOTE: share properties. We could copy but for now this should suffice. + * The full_name is also incorrect...but seems harmless. + */ + eads->child = NULL; + eads->next = NULL; + other_eads->allnext = eads; + other_eads->sibling = eads; + return eads; +} + +/* This is the "slow" path for looking up a device_node from a + * pci_dev. It will hunt for the device under it's parent's + * phb and then update sysdata for a future fastpath. + * + * It may also do fixups on the actual device since this happens + * on the first read/write. + * + * Note that it also must deal with devices that don't exist. + * In this case it may probe for real hardware ("just in case") + * and add a device_node to the device tree if necessary. + * + */ +struct device_node *fetch_dev_dn(struct pci_dev *dev) +{ + struct device_node *orig_dn = (struct device_node *)dev->sysdata; + struct pci_controller *phb = orig_dn->phb; /* assume same phb as orig_dn */ + struct device_node *phb_dn; + struct device_node *dn; + unsigned long searchval = (dev->bus->number << 8) | dev->devfn; + + phb_dn = (struct device_node *)(phb->arch_data); + dn = (struct device_node *)traverse_pci_devices(phb_dn, is_devfn_node, NULL, (void *)searchval); + if (dn) { + dev->sysdata = dn; + /* ToDo: call some device init hook here */ + } else { + /* Now it is very possible that we can't find the device because it is + * not the zero'th device of a mutifunction device and we don't have + * permission to read the zero'th device. If this is the case, Linux + * would ordinarily skip all the other functions. + */ + if ((searchval & 0x7) == 0) { + struct device_node *thisdevdn; + /* Ok, we are looking for fn == 0. Let's check for other functions. */ + thisdevdn = (struct device_node *)traverse_pci_devices(phb_dn, is_devfn_sub_node, NULL, (void *)searchval); + if (thisdevdn) { + /* Ah ha! There does exist a sub function. Now this isn't an exact + * match for searchval, but in order to get Linux to believe the sub + * functions exist we will need to manufacture a fake device_node + * for this zero'th function. To keept this simple for now we only + * handle pci bridges and we just hand back the found node which + * isn't correct, but Linux won't care. + */ + char *device_type = (char *)get_property(thisdevdn, "device_type", 0); + if (device_type && strcmp(device_type, "pci") == 0) { + return create_eads_node(thisdevdn); + } + } + } + /* ToDo: device not found...probe for it anyway with a fake dn? + struct device_node fake_dn; + memset(&fake_dn, 0, sizeof(fake_dn)); + fake_dn.phb = phb; + fake_dn.busno = dev->bus->number; + fake_dn.devfn = dev->devfn; + ... now do ppc_md.pcibios_read_config_dword(&fake_dn.....) + ... if ok, alloc a real device_node and dn = real_dn; + */ + } + return dn; +} + + +/****************************************************************** + * Actually initialize the phbs. + * The buswalk on this phb has not happened yet. + ******************************************************************/ +void __init +pci_devs_phb_init(void) +{ + /* This must be done first so the device nodes have valid pci info! */ + traverse_all_pci_devices(update_dn_pci_info); + + /* Hack for regatta which does not init the bars correctly */ + traverse_all_pci_devices(write_OF_bars); +#if 0 + traverse_all_pci_devices(startBIST); + mdelay(5000); + traverse_all_pci_devices(checkBIST); +#endif +} + + +static void __init +pci_fixup_bus_sysdata_list(struct list_head *bus_list) +{ + struct list_head *ln; + struct pci_bus *bus; + struct pci_controller *phb; + int newnum; + + for (ln=bus_list->next; ln != bus_list; ln=ln->next) { + bus = pci_bus_b(ln); + if (bus->self) { + bus->sysdata = bus->self->sysdata; + /* Also fixup the bus number on large bus systems to + * include the PHB# in the next byte + */ + phb = PCI_GET_DN(bus)->phb; + if (phb && phb->buid) { + newnum = (phb->global_number << 8) | bus->number; + bus->number = newnum; + sprintf(bus->name, "PCI Bus #%x", bus->number); + } + } + pci_fixup_bus_sysdata_list(&bus->children); + } +} + + +/****************************************************************** + * Fixup the bus->sysdata ptrs to point to the bus' device_node. + * This is done late in pcibios_init(). We do this mostly for + * sanity, but pci_dma.c uses these at DMA time so they must be + * correct. + * To do this we recurse down the bus hierarchy. Note that PHB's + * have bus->self == NULL, but fortunately bus->sysdata is already + * correct in this case. + ******************************************************************/ +void __init +pci_fix_bus_sysdata(void) +{ + pci_fixup_bus_sysdata_list(&pci_root_buses); +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/pmac_nvram.c linuxppc64_2_4/arch/ppc64/kernel/pmac_nvram.c --- ../kernel.org/linux/arch/ppc64/kernel/pmac_nvram.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/pmac_nvram.c Sat Oct 20 06:53:25 2001 @@ -0,0 +1,358 @@ +/* + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Miscellaneous procedures for dealing with the PowerMac hardware. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#undef DEBUG + +/* + * Read and write the non-volatile RAM on PowerMacs and CHRP machines. + */ +static int nvram_naddrs; +static volatile unsigned char *nvram_addr; +static volatile unsigned char *nvram_data; +static int nvram_mult, is_core_99; +static char* nvram_image; +static int core99_bank = 0; +sys_ctrler_t sys_ctrler = SYS_CTRLER_UNKNOWN; + +#define NVRAM_SIZE 0x2000 /* 8kB of non-volatile RAM */ + +#define CORE99_SIGNATURE 0x5a +#define CORE99_ADLER_START 0x14 + +/* Core99 nvram is a flash */ +#define CORE99_FLASH_STATUS_DONE 0x80 +#define CORE99_FLASH_STATUS_ERR 0x38 +#define CORE99_FLASH_CMD_ERASE_CONFIRM 0xd0 +#define CORE99_FLASH_CMD_ERASE_SETUP 0x20 +#define CORE99_FLASH_CMD_RESET 0xff +#define CORE99_FLASH_CMD_WRITE_SETUP 0x40 + +/* CHRP NVRAM header */ +struct chrp_header { + u8 signature; + u8 cksum; + u16 len; + char name[12]; + u8 data[0]; +}; + +struct core99_header { + struct chrp_header hdr; + u32 adler; + u32 generation; + u32 reserved[2]; +}; + +static int nvram_partitions[3]; + +static u8 +chrp_checksum(struct chrp_header* hdr) +{ + u8 *ptr; + u16 sum = hdr->signature; + for (ptr = (u8 *)&hdr->len; ptr < hdr->data; ptr++) + sum += *ptr; + while (sum > 0xFF) + sum = (sum & 0xFF) + (sum>>8); + return sum; +} + +static u32 +core99_calc_adler(u8 *buffer) +{ + int cnt; + u32 low, high; + + buffer += CORE99_ADLER_START; + low = 1; + high = 0; + for (cnt=0; cnt<(NVRAM_SIZE-CORE99_ADLER_START); cnt++) { + if ((cnt % 5000) == 0) { + high %= 65521UL; + high %= 65521UL; + } + low += buffer[cnt]; + high += low; + } + low %= 65521UL; + high %= 65521UL; + + return (high << 16) | low; +} + +static u32 +core99_check(u8* datas) +{ + struct core99_header* hdr99 = (struct core99_header*)datas; + + if (hdr99->hdr.signature != CORE99_SIGNATURE) { +#ifdef DEBUG + printk("Invalid signature\n"); +#endif + return 0; + } + if (hdr99->hdr.cksum != chrp_checksum(&hdr99->hdr)) { +#ifdef DEBUG + printk("Invalid checksum\n"); +#endif + return 0; + } + if (hdr99->adler != core99_calc_adler(datas)) { +#ifdef DEBUG + printk("Invalid adler\n"); +#endif + return 0; + } + return hdr99->generation; +} + +static int +core99_erase_bank(int bank) +{ + int stat, i; + + u8* base = (u8 *)nvram_data + core99_bank*NVRAM_SIZE; + + out_8(base, CORE99_FLASH_CMD_ERASE_SETUP); + out_8(base, CORE99_FLASH_CMD_ERASE_CONFIRM); + do { stat = in_8(base); } + while(!(stat & CORE99_FLASH_STATUS_DONE)); + out_8(base, CORE99_FLASH_CMD_RESET); + if (stat & CORE99_FLASH_STATUS_ERR) { + printk("nvram: flash error 0x%02x on erase !\n", stat); + return -ENXIO; + } + for (i=0; in_addrs; + is_core_99 = device_is_compatible(dp, "nvram,flash"); + if (is_core_99) { + int i; + u32 gen_bank0, gen_bank1; + + if (nvram_naddrs < 1) { + printk(KERN_ERR "nvram: no address\n"); + return; + } + nvram_image = kmalloc(NVRAM_SIZE, GFP_KERNEL); + if (!nvram_image) { + printk(KERN_ERR "nvram: can't allocate image\n"); + return; + } + nvram_data = ioremap(dp->addrs[0].address, NVRAM_SIZE*2); +#ifdef DEBUG + printk("nvram: Checking bank 0...\n"); +#endif + gen_bank0 = core99_check((u8 *)nvram_data); + gen_bank1 = core99_check((u8 *)nvram_data + NVRAM_SIZE); + core99_bank = (gen_bank0 < gen_bank1) ? 1 : 0; +#ifdef DEBUG + printk("nvram: gen0=%d, gen1=%d\n", gen_bank0, gen_bank1); + printk("nvram: Active bank is: %d\n", core99_bank); +#endif + for (i=0; iaddrs[0].address, dp->addrs[0].size); + nvram_mult = 1; + } else if (nvram_naddrs == 1) { + nvram_data = ioremap(dp->addrs[0].address, dp->addrs[0].size); + nvram_mult = (dp->addrs[0].size + NVRAM_SIZE - 1) / NVRAM_SIZE; + } else if (nvram_naddrs == 2) { + nvram_addr = ioremap(dp->addrs[0].address, dp->addrs[0].size); + nvram_data = ioremap(dp->addrs[1].address, dp->addrs[1].size); + } else if (nvram_naddrs == 0 && sys_ctrler == SYS_CTRLER_PMU) { + nvram_naddrs = -1; + } else { + printk(KERN_ERR "Don't know how to access NVRAM with %d addresses\n", + nvram_naddrs); + } +} + +void +pmac_nvram_update(void) +{ + struct core99_header* hdr99; + + if (!is_core_99 || !nvram_data || !nvram_image) + return; + if (!memcmp(nvram_image, (u8*)nvram_data + core99_bank*NVRAM_SIZE, + NVRAM_SIZE)) + return; +#ifdef DEBUG + printk("Updating nvram...\n"); +#endif + hdr99 = (struct core99_header*)nvram_image; + hdr99->generation++; + hdr99->hdr.signature = CORE99_SIGNATURE; + hdr99->hdr.cksum = chrp_checksum(&hdr99->hdr); + hdr99->adler = core99_calc_adler(nvram_image); + core99_bank = core99_bank ? 0 : 1; + if (core99_erase_bank(core99_bank)) { + printk("nvram: Error erasing bank %d\n", core99_bank); + return; + } + if (core99_write_bank(core99_bank, nvram_image)) + printk("nvram: Error writing bank %d\n", core99_bank); +} + +__openfirmware +unsigned char nvram_read_byte(int addr) +{ + #ifdef CONFIG_ADB_PMU // -aglitke + struct adb_request req; + #endif + + switch (nvram_naddrs) { +#ifdef CONFIG_ADB_PMU + case -1: + if (pmu_request(&req, NULL, 3, PMU_READ_NVRAM, + (addr >> 8) & 0xff, addr & 0xff)) + break; + while (!req.complete) + pmu_poll(); + return req.reply[1]; +#endif + case 1: + if (is_core_99) + return nvram_image[addr]; + return nvram_data[(addr & (NVRAM_SIZE - 1)) * nvram_mult]; + case 2: + *nvram_addr = addr >> 5; + eieio(); + return nvram_data[(addr & 0x1f) << 4]; + } + return 0; +} + +__openfirmware +void nvram_write_byte(unsigned char val, int addr) +{ + #ifdef CONFIG_ADB_PMU // -aglitke + struct adb_request req; + #endif + switch (nvram_naddrs) { +#ifdef CONFIG_ADB_PMU + case -1: + if (pmu_request(&req, NULL, 4, PMU_WRITE_NVRAM, + (addr >> 8) & 0xff, addr & 0xff, val)) + break; + while (!req.complete) + pmu_poll(); + break; +#endif + case 1: + if (is_core_99) { + nvram_image[addr] = val; + break; + } + nvram_data[(addr & (NVRAM_SIZE - 1)) * nvram_mult] = val; + break; + case 2: + *nvram_addr = addr >> 5; + eieio(); + nvram_data[(addr & 0x1f) << 4] = val; + break; + } + eieio(); +} + +int +pmac_get_partition(int partition) +{ + return nvram_partitions[partition]; +} + +u8 +pmac_xpram_read(int xpaddr) +{ + int offset = nvram_partitions[pmac_nvram_XPRAM]; + + if (offset < 0) + return 0; + + return nvram_read_byte(xpaddr + offset); +} + +void +pmac_xpram_write(int xpaddr, u8 data) +{ + int offset = nvram_partitions[pmac_nvram_XPRAM]; + + if (offset < 0) + return; + + nvram_write_byte(xpaddr + offset, data); +} + + diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/pmc.c linuxppc64_2_4/arch/ppc64/kernel/pmc.c --- ../kernel.org/linux/arch/ppc64/kernel/pmc.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/pmc.c Sat Oct 20 06:53:25 2001 @@ -0,0 +1,167 @@ +/* + * pmc.c + * Copyright (C) 2001 Dave Engebretsen & Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* Change Activity: + * 2001/06/05 : engebret : Created. + * End Change Activity + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +extern struct Naca *naca; + +struct _pmc_sw pmc_sw_system = { + 0 +}; + +struct _pmc_sw pmc_sw_cpu[NR_CPUS] = { + {0 }, +}; + +/* + * Provide enough storage for either system level counters or + * one cpu's counters. + */ +struct _pmc_sw_text pmc_sw_text; +struct _pmc_hw_text pmc_hw_text; + +char * +ppc64_pmc_stab(int file) +{ + int n; + unsigned long stab_faults, stab_capacity_castouts, stab_invalidations; + unsigned long i; + + stab_faults = stab_capacity_castouts = stab_invalidations = n = 0; + + if (file == -1) { + for (i = 0; i < smp_num_cpus; i++) { + stab_faults += pmc_sw_cpu[i].stab_faults; + stab_capacity_castouts += pmc_sw_cpu[i].stab_capacity_castouts; + stab_invalidations += pmc_sw_cpu[i].stab_invalidations; + } + n += sprintf(pmc_sw_text.buffer + n, + "Faults 0x%lx\n", stab_faults); + n += sprintf(pmc_sw_text.buffer + n, + "Castouts 0x%lx\n", stab_capacity_castouts); + n += sprintf(pmc_sw_text.buffer + n, + "Invalidations 0x%lx\n", stab_invalidations); + } else { + n += sprintf(pmc_sw_text.buffer + n, + "Faults 0x%lx\n", + pmc_sw_cpu[file].stab_faults); + + n += sprintf(pmc_sw_text.buffer + n, + "Castouts 0x%lx\n", + pmc_sw_cpu[file].stab_capacity_castouts); + + n += sprintf(pmc_sw_text.buffer + n, + "Invalidations 0x%lx\n", + pmc_sw_cpu[file].stab_invalidations); + + for (i = 0; i < STAB_ENTRY_MAX; i++) { + if (pmc_sw_cpu[file].stab_entry_use[i]) { + n += sprintf(pmc_sw_text.buffer + n, + "Entry %02ld 0x%lx\n", i, + pmc_sw_cpu[file].stab_entry_use[i]); + } + } + + } + + return(pmc_sw_text.buffer); +} + +char * +ppc64_pmc_htab(int file) +{ + int n; + unsigned long htab_primary_overflows, htab_capacity_castouts; + unsigned long htab_read_to_write_faults; + + htab_primary_overflows = htab_capacity_castouts = 0; + htab_read_to_write_faults = n = 0; + + if (file == -1) { + n += sprintf(pmc_sw_text.buffer + n, + "Primary Overflows 0x%lx\n", + pmc_sw_system.htab_primary_overflows); + n += sprintf(pmc_sw_text.buffer + n, + "Castouts 0x%lx\n", + pmc_sw_system.htab_capacity_castouts); + } else { + n += sprintf(pmc_sw_text.buffer + n, + "Primary Overflows N/A\n"); + + n += sprintf(pmc_sw_text.buffer + n, + "Castouts N/A\n\n"); + + } + + return(pmc_sw_text.buffer); +} + +char * +ppc64_pmc_hw(int file) +{ + int n; + + n = 0; + if (file == -1) { + n += sprintf(pmc_hw_text.buffer + n, "Not Implemented\n"); + } else { + n += sprintf(pmc_hw_text.buffer + n, + "MMCR0 0x%lx\n", mfspr(MMCR0)); + n += sprintf(pmc_hw_text.buffer + n, + "MMCR1 0x%lx\n", mfspr(MMCR1)); +#if 0 + n += sprintf(pmc_hw_text.buffer + n, + "MMCRA 0x%lx\n", mfspr(MMCRA)); +#endif + + n += sprintf(pmc_hw_text.buffer + n, + "PMC1 0x%lx\n", mfspr(PMC1)); + n += sprintf(pmc_hw_text.buffer + n, + "PMC2 0x%lx\n", mfspr(PMC2)); + n += sprintf(pmc_hw_text.buffer + n, + "PMC3 0x%lx\n", mfspr(PMC3)); + n += sprintf(pmc_hw_text.buffer + n, + "PMC4 0x%lx\n", mfspr(PMC4)); + n += sprintf(pmc_hw_text.buffer + n, + "PMC5 0x%lx\n", mfspr(PMC5)); + n += sprintf(pmc_hw_text.buffer + n, + "PMC6 0x%lx\n", mfspr(PMC6)); + n += sprintf(pmc_hw_text.buffer + n, + "PMC7 0x%lx\n", mfspr(PMC7)); + n += sprintf(pmc_hw_text.buffer + n, + "PMC8 0x%lx\n", mfspr(PMC8)); + } + + return(pmc_hw_text.buffer); +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/ppc-stub.c linuxppc64_2_4/arch/ppc64/kernel/ppc-stub.c --- ../kernel.org/linux/arch/ppc64/kernel/ppc-stub.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/ppc-stub.c Fri Sep 14 22:14:50 2001 @@ -0,0 +1,739 @@ +/* + * ppc-stub.c: KGDB support for the Linux kernel. + * + * adapted from arch/sparc/kernel/sparc-stub.c for the PowerPC + * some stuff borrowed from Paul Mackerras' xmon + * Copyright (C) 1998 Michael AK Tesch (tesch@cs.wisc.edu) + * + * Modifications to run under Linux + * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) + * + * This file originally came from the gdb sources, and the + * copyright notices have been retained below. + */ + +/**************************************************************************** + + THIS SOFTWARE IS NOT COPYRIGHTED + + HP offers the following for use in the public domain. HP makes no + warranty with regard to the software or its performance and the + user accepts the software "AS IS" with all faults. + + HP DISCLAIMS ANY WARRANTIES, EXPRESS OR IMPLIED, WITH REGARD + TO THIS SOFTWARE INCLUDING BUT NOT LIMITED TO THE WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + +****************************************************************************/ + +/**************************************************************************** + * Header: remcom.c,v 1.34 91/03/09 12:29:49 glenne Exp $ + * + * Module name: remcom.c $ + * Revision: 1.34 $ + * Date: 91/03/09 12:29:49 $ + * Contributor: Lake Stevens Instrument Division$ + * + * Description: low level support for gdb debugger. $ + * + * Considerations: only works on target hardware $ + * + * Written by: Glenn Engel $ + * ModuleState: Experimental $ + * + * NOTES: See Below $ + * + * Modified for SPARC by Stu Grossman, Cygnus Support. + * + * This code has been extensively tested on the Fujitsu SPARClite demo board. + * + * To enable debugger support, two things need to happen. One, a + * call to set_debug_traps() is necessary in order to allow any breakpoints + * or error conditions to be properly intercepted and reported to gdb. + * Two, a breakpoint needs to be generated to begin communication. This + * is most easily accomplished by a call to breakpoint(). Breakpoint() + * simulates a breakpoint by executing a trap #1. + * + ************* + * + * The following gdb commands are supported: + * + * command function Return value + * + * g return the value of the CPU registers hex data or ENN + * G set the value of the CPU registers OK or ENN + * qOffsets Get section offsets. Reply is Text=xxx;Data=yyy;Bss=zzz + * + * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN + * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN + * + * c Resume at current address SNN ( signal NN) + * cAA..AA Continue at address AA..AA SNN + * + * s Step one instruction SNN + * sAA..AA Step one instruction from AA..AA SNN + * + * k kill + * + * ? What was the last sigval ? SNN (signal NN) + * + * bBB..BB Set baud rate to BB..BB OK or BNN, then sets + * baud rate + * + * All commands and responses are sent with a packet which includes a + * checksum. A packet consists of + * + * $#. + * + * where + * :: + * :: > + * + * When a packet is received, it is first acknowledged with either '+' or '-'. + * '+' indicates a successful transfer. '-' indicates a failed transfer. + * + * Example: + * + * Host: Reply: + * $m0,10#2a +$00010203040506070809101112131415#42 + * + ****************************************************************************/ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +void breakinst(void); + +/* + * BUFMAX defines the maximum number of characters in inbound/outbound buffers + * at least NUMREGBYTES*2 are needed for register packets + */ +#define BUFMAX 2048 +static char remcomInBuffer[BUFMAX]; +static char remcomOutBuffer[BUFMAX]; + +static int initialized = 0; +static int kgdb_active = 0; +static int kgdb_started = 0; +static u_int fault_jmp_buf[100]; +static int kdebug; + +static const char hexchars[]="0123456789abcdef"; + +/* Place where we save old trap entries for restoration - sparc*/ +/* struct tt_entry kgdb_savettable[256]; */ +/* typedef void (*trapfunc_t)(void); */ + +#if 0 +/* Install an exception handler for kgdb */ +static void exceptionHandler(int tnum, unsigned int *tfunc) +{ + /* We are dorking with a live trap table, all irqs off */ +} +#endif + +int +kgdb_setjmp(long *buf) +{ + asm ("mflr 0; stw 0,0(%0);" + "stw 1,4(%0); stw 2,8(%0);" + "mfcr 0; stw 0,12(%0);" + "stmw 13,16(%0)" + : : "r" (buf)); + /* XXX should save fp regs as well */ + return 0; +} +void +kgdb_longjmp(long *buf, int val) +{ + if (val == 0) + val = 1; + asm ("lmw 13,16(%0);" + "lwz 0,12(%0); mtcrf 0x38,0;" + "lwz 0,0(%0); lwz 1,4(%0); lwz 2,8(%0);" + "mtlr 0; mr 3,%1" + : : "r" (buf), "r" (val)); +} +/* Convert ch from a hex digit to an int */ +static int +hex(unsigned char ch) +{ + if (ch >= 'a' && ch <= 'f') + return ch-'a'+10; + if (ch >= '0' && ch <= '9') + return ch-'0'; + if (ch >= 'A' && ch <= 'F') + return ch-'A'+10; + return -1; +} + +/* Convert the memory pointed to by mem into hex, placing result in buf. + * Return a pointer to the last char put in buf (null), in case of mem fault, + * return 0. + */ +static unsigned char * +mem2hex(char *mem, char *buf, int count) +{ + unsigned char ch; + + if (kgdb_setjmp((long*)fault_jmp_buf) == 0) { + debugger_fault_handler = kgdb_fault_handler; + while (count-- > 0) { + ch = *mem++; + *buf++ = hexchars[ch >> 4]; + *buf++ = hexchars[ch & 0xf]; + } + } else { + /* error condition */ + } + debugger_fault_handler = 0; + *buf = 0; + return buf; +} + +/* convert the hex array pointed to by buf into binary to be placed in mem + * return a pointer to the character AFTER the last byte written. +*/ +static char * +hex2mem(char *buf, char *mem, int count) +{ + int i; + unsigned char ch; + + if (kgdb_setjmp((long*)fault_jmp_buf) == 0) { + debugger_fault_handler = kgdb_fault_handler; + for (i=0; i# */ +static void +getpacket(char *buffer) +{ + unsigned char checksum; + unsigned char xmitcsum; + int i; + int count; + unsigned char ch; + + do { + /* wait around for the start character, ignore all other + * characters */ + while ((ch = (getDebugChar() & 0x7f)) != '$') ; + + checksum = 0; + xmitcsum = -1; + + count = 0; + + /* now, read until a # or end of buffer is found */ + while (count < BUFMAX) { + ch = getDebugChar() & 0x7f; + if (ch == '#') + break; + checksum = checksum + ch; + buffer[count] = ch; + count = count + 1; + } + + if (count >= BUFMAX) + continue; + + buffer[count] = 0; + + if (ch == '#') { + xmitcsum = hex(getDebugChar() & 0x7f) << 4; + xmitcsum |= hex(getDebugChar() & 0x7f); + if (checksum != xmitcsum) + putDebugChar('-'); /* failed checksum */ + else { + putDebugChar('+'); /* successful transfer */ + /* if a sequence char is present, reply the ID */ + if (buffer[2] == ':') { + putDebugChar(buffer[0]); + putDebugChar(buffer[1]); + /* remove sequence chars from buffer */ + count = strlen(buffer); + for (i=3; i <= count; i++) + buffer[i-3] = buffer[i]; + } + } + } + } while (checksum != xmitcsum); +} + +/* send the packet in buffer. */ +static void putpacket(unsigned char *buffer) +{ + unsigned char checksum; + int count; + unsigned char ch, recv; + + /* $#. */ + do { + putDebugChar('$'); + checksum = 0; + count = 0; + + while ((ch = buffer[count])) { + putDebugChar(ch); + checksum += ch; + count += 1; + } + + putDebugChar('#'); + putDebugChar(hexchars[checksum >> 4]); + putDebugChar(hexchars[checksum & 0xf]); + recv = getDebugChar(); + } while ((recv & 0x7f) != '+'); +} + +static void kgdb_flush_cache_all(void) +{ + flush_instruction_cache(); +} + + +/* Set up exception handlers for tracing and breakpoints + * [could be called kgdb_init()] + */ +void set_debug_traps(void) +{ +#if 0 + unsigned char c; + + save_and_cli(flags); + + /* In case GDB is started before us, ack any packets (presumably + * "$?#xx") sitting there. + * + * I've found this code causes more problems than it solves, + * so that's why it's commented out. GDB seems to work fine + * now starting either before or after the kernel -bwb + */ + + while((c = getDebugChar()) != '$'); + while((c = getDebugChar()) != '#'); + c = getDebugChar(); /* eat first csum byte */ + c = getDebugChar(); /* eat second csum byte */ + putDebugChar('+'); /* ack it */ +#endif + debugger = kgdb; + debugger_bpt = kgdb_bpt; + debugger_sstep = kgdb_sstep; + debugger_iabr_match = kgdb_iabr_match; + debugger_dabr_match = kgdb_dabr_match; + + initialized = 1; +} + +static void kgdb_fault_handler(struct pt_regs *regs) +{ + kgdb_longjmp((long*)fault_jmp_buf, 1); +} + +int kgdb_bpt(struct pt_regs *regs) +{ + handle_exception(regs); + return 1; +} + +int kgdb_sstep(struct pt_regs *regs) +{ + handle_exception(regs); + return 1; +} + +void kgdb(struct pt_regs *regs) +{ + handle_exception(regs); +} + +int kgdb_iabr_match(struct pt_regs *regs) +{ + printk("kgdb doesn't support iabr, what?!?\n"); + handle_exception(regs); + return 1; +} + +int kgdb_dabr_match(struct pt_regs *regs) +{ + printk("kgdb doesn't support dabr, what?!?\n"); + handle_exception(regs); + return 1; +} + +/* Convert the SPARC hardware trap type code to a unix signal number. */ +/* + * This table contains the mapping between PowerPC hardware trap types, and + * signals, which are primarily what GDB understands. + */ +static struct hard_trap_info +{ + unsigned int tt; /* Trap type code for powerpc */ + unsigned char signo; /* Signal that we map this trap into */ +} hard_trap_info[] = { + { 0x200, SIGSEGV }, /* machine check */ + { 0x300, SIGSEGV }, /* address error (store) */ + { 0x400, SIGBUS }, /* instruction bus error */ + { 0x500, SIGINT }, /* interrupt */ + { 0x600, SIGBUS }, /* alingment */ + { 0x700, SIGTRAP }, /* breakpoint trap */ + { 0x800, SIGFPE }, /* fpu unavail */ + { 0x900, SIGALRM }, /* decrementer */ + { 0xa00, SIGILL }, /* reserved */ + { 0xb00, SIGILL }, /* reserved */ + { 0xc00, SIGCHLD }, /* syscall */ + { 0xd00, SIGTRAP }, /* single-step/watch */ + { 0xe00, SIGFPE }, /* fp assist */ + { 0, 0} /* Must be last */ +}; + +static int computeSignal(unsigned int tt) +{ + struct hard_trap_info *ht; + + for (ht = hard_trap_info; ht->tt && ht->signo; ht++) + if (ht->tt == tt) + return ht->signo; + + return SIGHUP; /* default for things we don't know about */ +} + +#define PC_REGNUM 64 +#define SP_REGNUM 1 + +/* + * This function does all command processing for interfacing to gdb. + */ +static void +handle_exception (struct pt_regs *regs) +{ + int sigval; + int addr; + int length; + char *ptr; + unsigned long msr; + + if (debugger_fault_handler) { + debugger_fault_handler(regs); + panic("kgdb longjump failed!\n"); + } + if (kgdb_active) { + printk("interrupt while in kgdb, returning\n"); + return; + } + kgdb_active = 1; + kgdb_started = 1; + +#ifdef KGDB_DEBUG + printk("kgdb: entering handle_exception; trap [0x%x]\n", + (unsigned int)regs->trap); +#endif + + kgdb_interruptible(0); + lock_kernel(); + msr = get_msr(); + set_msr(msr & ~MSR_EE); /* disable interrupts */ + + if (regs->nip == (unsigned long)breakinst) { + /* Skip over breakpoint trap insn */ + regs->nip += 4; + } + + /* reply to host that an exception has occurred */ + sigval = computeSignal(regs->trap); + ptr = remcomOutBuffer; + +#if 0 + *ptr++ = 'S'; + *ptr++ = hexchars[sigval >> 4]; + *ptr++ = hexchars[sigval & 0xf]; +#else + *ptr++ = 'T'; + *ptr++ = hexchars[sigval >> 4]; + *ptr++ = hexchars[sigval & 0xf]; + *ptr++ = hexchars[PC_REGNUM >> 4]; + *ptr++ = hexchars[PC_REGNUM & 0xf]; + *ptr++ = ':'; + ptr = mem2hex((char *)®s->nip, ptr, 4); + *ptr++ = ';'; + *ptr++ = hexchars[SP_REGNUM >> 4]; + *ptr++ = hexchars[SP_REGNUM & 0xf]; + *ptr++ = ':'; + ptr = mem2hex(((char *)®s) + SP_REGNUM*4, ptr, 4); + *ptr++ = ';'; +#endif + + *ptr++ = 0; + + putpacket(remcomOutBuffer); + + /* XXX We may want to add some features dealing with poking the + * XXX page tables, ... (look at sparc-stub.c for more info) + * XXX also required hacking to the gdb sources directly... + */ + + while (1) { + remcomOutBuffer[0] = 0; + + getpacket(remcomInBuffer); + switch (remcomInBuffer[0]) { + case '?': /* report most recent signal */ + remcomOutBuffer[0] = 'S'; + remcomOutBuffer[1] = hexchars[sigval >> 4]; + remcomOutBuffer[2] = hexchars[sigval & 0xf]; + remcomOutBuffer[3] = 0; + break; +#if 0 + case 'q': /* this screws up gdb for some reason...*/ + { + extern long _start, sdata, __bss_start; + + ptr = &remcomInBuffer[1]; + if (strncmp(ptr, "Offsets", 7) != 0) + break; + + ptr = remcomOutBuffer; + sprintf(ptr, "Text=%8.8x;Data=%8.8x;Bss=%8.8x", + &_start, &sdata, &__bss_start); + break; + } +#endif + case 'd': + /* toggle debug flag */ + kdebug ^= 1; + break; + + case 'g': /* return the value of the CPU registers. + * some of them are non-PowerPC names :( + * they are stored in gdb like: + * struct { + * u32 gpr[32]; + * f64 fpr[32]; + * u32 pc, ps, cnd, lr; (ps=msr) + * u32 cnt, xer, mq; + * } + */ + { + int i; + ptr = remcomOutBuffer; + /* General Purpose Regs */ + ptr = mem2hex((char *)regs, ptr, 32 * 4); + /* Floating Point Regs - FIXME */ + /*ptr = mem2hex((char *), ptr, 32 * 8);*/ + for(i=0; i<(32*8*2); i++) { /* 2chars/byte */ + ptr[i] = '0'; + } + ptr += 32*8*2; + /* pc, msr, cr, lr, ctr, xer, (mq is unused) */ + ptr = mem2hex((char *)®s->nip, ptr, 4); + ptr = mem2hex((char *)®s->msr, ptr, 4); + ptr = mem2hex((char *)®s->ccr, ptr, 4); + ptr = mem2hex((char *)®s->link, ptr, 4); + ptr = mem2hex((char *)®s->ctr, ptr, 4); + ptr = mem2hex((char *)®s->xer, ptr, 4); + } + break; + + case 'G': /* set the value of the CPU registers */ + { + ptr = &remcomInBuffer[1]; + + /* + * If the stack pointer has moved, you should pray. + * (cause only god can help you). + */ + + /* General Purpose Regs */ + hex2mem(ptr, (char *)regs, 32 * 4); + + /* Floating Point Regs - FIXME?? */ + /*ptr = hex2mem(ptr, ??, 32 * 8);*/ + ptr += 32*8*2; + + /* pc, msr, cr, lr, ctr, xer, (mq is unused) */ + ptr = hex2mem(ptr, (char *)®s->nip, 4); + ptr = hex2mem(ptr, (char *)®s->msr, 4); + ptr = hex2mem(ptr, (char *)®s->ccr, 4); + ptr = hex2mem(ptr, (char *)®s->link, 4); + ptr = hex2mem(ptr, (char *)®s->ctr, 4); + ptr = hex2mem(ptr, (char *)®s->xer, 4); + + strcpy(remcomOutBuffer,"OK"); + } + break; + case 'H': + /* don't do anything, yet, just acknowledge */ + hexToInt(&ptr, &addr); + strcpy(remcomOutBuffer,"OK"); + break; + + case 'm': /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */ + /* Try to read %x,%x. */ + + ptr = &remcomInBuffer[1]; + + if (hexToInt(&ptr, &addr) + && *ptr++ == ',' + && hexToInt(&ptr, &length)) { + if (mem2hex((char *)addr, remcomOutBuffer,length)) + break; + strcpy (remcomOutBuffer, "E03"); + } else { + strcpy(remcomOutBuffer,"E01"); + } + break; + + case 'M': /* MAA..AA,LLLL: Write LLLL bytes at address AA.AA return OK */ + /* Try to read '%x,%x:'. */ + + ptr = &remcomInBuffer[1]; + + if (hexToInt(&ptr, &addr) + && *ptr++ == ',' + && hexToInt(&ptr, &length) + && *ptr++ == ':') { + if (hex2mem(ptr, (char *)addr, length)) { + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "E03"); + } + flush_icache_range(addr, addr+length); + } else { + strcpy(remcomOutBuffer, "E02"); + } + break; + + + case 'k': /* kill the program, actually just continue */ + case 'c': /* cAA..AA Continue; address AA..AA optional */ + /* try to read optional parameter, pc unchanged if no parm */ + + ptr = &remcomInBuffer[1]; + if (hexToInt(&ptr, &addr)) { + regs->nip = addr; + } + +/* Need to flush the instruction cache here, as we may have deposited a + * breakpoint, and the icache probably has no way of knowing that a data ref to + * some location may have changed something that is in the instruction cache. + */ + kgdb_flush_cache_all(); + set_msr(msr); + kgdb_interruptible(1); + unlock_kernel(); + kgdb_active = 0; + return; + + case 's': + kgdb_flush_cache_all(); + regs->msr |= MSR_SE; +#if 0 + set_msr(msr | MSR_SE); +#endif + unlock_kernel(); + kgdb_active = 0; + return; + + case 'r': /* Reset (if user process..exit ???)*/ + panic("kgdb reset."); + break; + } /* switch */ + if (remcomOutBuffer[0] && kdebug) { + printk("remcomInBuffer: %s\n", remcomInBuffer); + printk("remcomOutBuffer: %s\n", remcomOutBuffer); + } + /* reply to the request */ + putpacket(remcomOutBuffer); + } /* while(1) */ +} + +/* This function will generate a breakpoint exception. It is used at the + beginning of a program to sync up with a debugger and can be used + otherwise as a quick means to stop program execution and "break" into + the debugger. */ + +void +breakpoint(void) +{ + if (!initialized) { + printk("breakpoint() called b4 kgdb init\n"); + return; + } + + asm(" .globl breakinst + breakinst: .long 0x7d821008 + "); +} + +/* Output string in GDB O-packet format if GDB has connected. If nothing + output, returns 0 (caller must then handle output). */ +int +kgdb_output_string (const char* s, unsigned int count) +{ + char buffer[512]; + + if (!kgdb_started) + return 0; + + count = (count <= (sizeof(buffer) / 2 - 2)) + ? count : (sizeof(buffer) / 2 - 2); + + buffer[0] = 'O'; + mem2hex (s, &buffer[1], count); + putpacket(buffer); + + return 1; + } diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/ppc_asm.h linuxppc64_2_4/arch/ppc64/kernel/ppc_asm.h --- ../kernel.org/linux/arch/ppc64/kernel/ppc_asm.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/ppc_asm.h Wed Sep 26 03:16:27 2001 @@ -0,0 +1,117 @@ +/* + * arch/ppc/kernel/ppc_asm.h + * + * Definitions used by various bits of low-level assembly code on PowerPC. + * + * Copyright (C) 1995-1999 Gary Thomas, Paul Mackerras, Cort Dougan. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include + +#include "ppc_asm.tmpl" +#include "ppc_defs.h" + +/* + * Macros for storing registers into and loading registers from + * exception frames. + */ +#define SAVE_GPR(n, base) std n,GPR0+8*(n)(base) +#define SAVE_2GPRS(n, base) SAVE_GPR(n, base); SAVE_GPR(n+1, base) +#define SAVE_4GPRS(n, base) SAVE_2GPRS(n, base); SAVE_2GPRS(n+2, base) +#define SAVE_8GPRS(n, base) SAVE_4GPRS(n, base); SAVE_4GPRS(n+4, base) +#define SAVE_10GPRS(n, base) SAVE_8GPRS(n, base); SAVE_2GPRS(n+8, base) +#define REST_GPR(n, base) ld n,GPR0+8*(n)(base) +#define REST_2GPRS(n, base) REST_GPR(n, base); REST_GPR(n+1, base) +#define REST_4GPRS(n, base) REST_2GPRS(n, base); REST_2GPRS(n+2, base) +#define REST_8GPRS(n, base) REST_4GPRS(n, base); REST_4GPRS(n+4, base) +#define REST_10GPRS(n, base) REST_8GPRS(n, base); REST_2GPRS(n+8, base) + +#define SAVE_FPR(n, base) stfd n,THREAD_FPR0+8*(n)(base) +#define SAVE_2FPRS(n, base) SAVE_FPR(n, base); SAVE_FPR(n+1, base) +#define SAVE_4FPRS(n, base) SAVE_2FPRS(n, base); SAVE_2FPRS(n+2, base) +#define SAVE_8FPRS(n, base) SAVE_4FPRS(n, base); SAVE_4FPRS(n+4, base) +#define SAVE_16FPRS(n, base) SAVE_8FPRS(n, base); SAVE_8FPRS(n+8, base) +#define SAVE_32FPRS(n, base) SAVE_16FPRS(n, base); SAVE_16FPRS(n+16, base) +#define REST_FPR(n, base) lfd n,THREAD_FPR0+8*(n)(base) +#define REST_2FPRS(n, base) REST_FPR(n, base); REST_FPR(n+1, base) +#define REST_4FPRS(n, base) REST_2FPRS(n, base); REST_2FPRS(n+2, base) +#define REST_8FPRS(n, base) REST_4FPRS(n, base); REST_4FPRS(n+4, base) +#define REST_16FPRS(n, base) REST_8FPRS(n, base); REST_8FPRS(n+8, base) +#define REST_32FPRS(n, base) REST_16FPRS(n, base); REST_16FPRS(n+16, base) + +#define CHECKANYINT(ra,rb) \ + mfspr rb,SPRG3; /* Get Paca address */\ + ld ra,PACALPPACA+LPPACAANYINT(rb); /* Get pending interrupt flags */\ + cmpldi 0,ra,0; + +/* Macros to adjust thread priority for Iseries hardware multithreading */ +#define HMT_LOW or 1,1,1 +#define HMT_MEDIUM or 2,2,2 +#define HMT_HIGH or 3,3,3 + +/* Insert the high 32 bits of the MSR into what will be the new + MSR (via SRR1 and rfid) This preserves the MSR.SF and MSR.ISF + bits. */ + +#define FIX_SRR1(ra, rb) \ + mr rb,ra; \ + mfmsr ra; \ + rldimi ra,rb,0,32 + +#define CLR_TOP32(r) rlwinm (r),(r),0,0,31 /* clear top 32 bits */ + +/* + * LOADADDR( rn, name ) + * loads the address of 'name' into 'rn' + * + * LOADBASE( rn, name ) + * loads the address (less the low 16 bits) of 'name' into 'rn' + * suitable for base+disp addressing + */ +#define LOADADDR(rn,name) \ + lis rn,name##@highest; \ + ori rn,rn,name##@higher; \ + rldicr rn,rn,32,31; \ + oris rn,rn,name##@h; \ + ori rn,rn,name##@l + +#define LOADBASE(rn,name) \ + lis rn,name@highest; \ + ori rn,rn,name@higher; \ + rldicr rn,rn,32,31; \ + oris rn,rn,name@ha + + +#define SET_REG_TO_CONST(reg, value) \ + lis reg,(((value)>>48)&0xFFFF); \ + ori reg,reg,(((value)>>32)&0xFFFF); \ + rldicr reg,reg,32,31; \ + oris reg,reg,(((value)>>16)&0xFFFF); \ + ori reg,reg,((value)&0xFFFF); + +#define SET_REG_TO_LABEL(reg, label) \ + lis reg,(label)@highest; \ + ori reg,reg,(label)@higher; \ + rldicr reg,reg,32,31; \ + oris reg,reg,(label)@h; \ + ori reg,reg,(label)@l; + + +/* PPPBBB - DRENG If KERNELBASE is always 0xC0..., + * Then we can easily do this with one asm insn. -Peter + */ +#define tophys(rd,rs) \ + lis rd,((KERNELBASE>>48)&0xFFFF); \ + rldicr rd,rd,32,31; \ + sub rd,rs,rd + +#define tovirt(rd,rs) \ + lis rd,((KERNELBASE>>48)&0xFFFF); \ + rldicr rd,rd,32,31; \ + add rd,rs,rd + diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/ppc_asm.tmpl linuxppc64_2_4/arch/ppc64/kernel/ppc_asm.tmpl --- ../kernel.org/linux/arch/ppc64/kernel/ppc_asm.tmpl Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/ppc_asm.tmpl Fri May 4 17:13:59 2001 @@ -0,0 +1,115 @@ +/* Condition Register Bit Fields */ + +#define cr0 0 +#define cr1 1 +#define cr2 2 +#define cr3 3 +#define cr4 4 +#define cr5 5 +#define cr6 6 +#define cr7 7 + + +/* General Purpose Registers (GPRs) */ + +#define r0 0 +#define r1 1 +#define r2 2 +#define r3 3 +#define r4 4 +#define r5 5 +#define r6 6 +#define r7 7 +#define r8 8 +#define r9 9 +#define r10 10 +#define r11 11 +#define r12 12 +#define r13 13 +#define r14 14 +#define r15 15 +#define r16 16 +#define r17 17 +#define r18 18 +#define r19 19 +#define r20 20 +#define r21 21 +#define r22 22 +#define r23 23 +#define r24 24 +#define r25 25 +#define r26 26 +#define r27 27 +#define r28 28 +#define r29 29 +#define r30 30 +#define r31 31 + + +/* Floating Point Registers (FPRs) */ + +#define fr0 0 +#define fr1 1 +#define fr2 2 +#define fr3 3 +#define fr4 4 +#define fr5 5 +#define fr6 6 +#define fr7 7 +#define fr8 8 +#define fr9 9 +#define fr10 10 +#define fr11 11 +#define fr12 12 +#define fr13 13 +#define fr14 14 +#define fr15 15 +#define fr16 16 +#define fr17 17 +#define fr18 18 +#define fr19 19 +#define fr20 20 +#define fr21 21 +#define fr22 22 +#define fr23 23 +#define fr24 24 +#define fr25 25 +#define fr26 26 +#define fr27 27 +#define fr28 28 +#define fr29 29 +#define fr30 30 +#define fr31 31 + +#define vr0 0 +#define vr1 1 +#define vr2 2 +#define vr3 3 +#define vr4 4 +#define vr5 5 +#define vr6 6 +#define vr7 7 +#define vr8 8 +#define vr9 9 +#define vr10 10 +#define vr11 11 +#define vr12 12 +#define vr13 13 +#define vr14 14 +#define vr15 15 +#define vr16 16 +#define vr17 17 +#define vr18 18 +#define vr19 19 +#define vr20 20 +#define vr21 21 +#define vr22 22 +#define vr23 23 +#define vr24 24 +#define vr25 25 +#define vr26 26 +#define vr27 27 +#define vr28 28 +#define vr29 29 +#define vr30 30 +#define vr31 31 diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/ppc_defs.head linuxppc64_2_4/arch/ppc64/kernel/ppc_defs.head --- ../kernel.org/linux/arch/ppc64/kernel/ppc_defs.head Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/ppc_defs.head Fri May 4 17:13:59 2001 @@ -0,0 +1,3 @@ +/* + * WARNING! This file is automatically generated - DO NOT EDIT! + */ diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/ppc_ksyms.c linuxppc64_2_4/arch/ppc64/kernel/ppc_ksyms.c --- ../kernel.org/linux/arch/ppc64/kernel/ppc_ksyms.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/ppc_ksyms.c Tue Nov 6 08:12:29 2001 @@ -0,0 +1,302 @@ +/* + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_SMP +#include +#endif /* CONFIG_SMP */ +#ifdef CONFIG_PPC_ISERIES +#include +#include +#endif + +/* Tell string.h we don't want memcpy etc. as cpp defines */ +#define EXPORT_SYMTAB_STROPS + +extern void syscall_trace(void); +extern void do_IRQ(struct pt_regs *regs, int isfake); +extern void SystemResetException(struct pt_regs *regs); +extern void MachineCheckException(struct pt_regs *regs); +extern void AlignmentException(struct pt_regs *regs); +extern void ProgramCheckException(struct pt_regs *regs); +extern void SingleStepException(struct pt_regs *regs); +extern int sys_sigreturn(struct pt_regs *regs); +extern int do_signal(sigset_t *, struct pt_regs *); +extern int register_ioctl32_conversion(unsigned int cmd, int (*handler)(unsigned int, unsigned int, unsigned long, struct file *)); +extern int unregister_ioctl32_conversion(unsigned int cmd); + +long long __ashrdi3(long long, int); +long long __ashldi3(long long, int); +long long __lshrdi3(long long, int); +int abs(int); +extern unsigned long ret_to_user_hook; + +extern struct pci_dev * iSeries_veth_dev; +extern struct pci_dev * iSeries_vio_dev; + +EXPORT_SYMBOL(do_signal); +EXPORT_SYMBOL(syscall_trace); +EXPORT_SYMBOL(do_IRQ); +EXPORT_SYMBOL(SystemResetException); +EXPORT_SYMBOL(MachineCheckException); +EXPORT_SYMBOL(AlignmentException); +EXPORT_SYMBOL(ProgramCheckException); +EXPORT_SYMBOL(SingleStepException); +EXPORT_SYMBOL(sys_sigreturn); +EXPORT_SYMBOL(enable_irq); +EXPORT_SYMBOL(disable_irq); +EXPORT_SYMBOL(disable_irq_nosync); +#ifdef CONFIG_SMP +EXPORT_SYMBOL(kernel_flag); +EXPORT_SYMBOL(synchronize_irq); +EXPORT_SYMBOL(smp_num_cpus); +#endif /* CONFIG_SMP */ + +EXPORT_SYMBOL(register_ioctl32_conversion); +EXPORT_SYMBOL(unregister_ioctl32_conversion); + +EXPORT_SYMBOL(isa_io_base); +EXPORT_SYMBOL(isa_mem_base); +EXPORT_SYMBOL(pci_io_base); +EXPORT_SYMBOL(pci_dram_offset); + +EXPORT_SYMBOL(find_next_zero_bit); + +EXPORT_SYMBOL(strcpy); +EXPORT_SYMBOL(strncpy); +EXPORT_SYMBOL(strcat); +EXPORT_SYMBOL(strncat); +EXPORT_SYMBOL(strchr); +EXPORT_SYMBOL(strrchr); +EXPORT_SYMBOL(strpbrk); +EXPORT_SYMBOL(strtok); +EXPORT_SYMBOL(strstr); +EXPORT_SYMBOL(strlen); +EXPORT_SYMBOL(strnlen); +EXPORT_SYMBOL(strcmp); +EXPORT_SYMBOL(strncmp); + +EXPORT_SYMBOL(__down_interruptible); +EXPORT_SYMBOL(__up); +EXPORT_SYMBOL(naca); +EXPORT_SYMBOL(__down); + +/* EXPORT_SYMBOL(csum_partial); already in net/netsyms.c */ +EXPORT_SYMBOL(csum_partial_copy_generic); +EXPORT_SYMBOL(ip_fast_csum); +EXPORT_SYMBOL(csum_tcpudp_magic); + +EXPORT_SYMBOL(__copy_tofrom_user); +EXPORT_SYMBOL(__clear_user); +EXPORT_SYMBOL(__strncpy_from_user); +EXPORT_SYMBOL(__strnlen_user); + +/* +EXPORT_SYMBOL(inb); +EXPORT_SYMBOL(inw); +EXPORT_SYMBOL(inl); +EXPORT_SYMBOL(outb); +EXPORT_SYMBOL(outw); +EXPORT_SYMBOL(outl); +EXPORT_SYMBOL(outsl);*/ + +#ifdef CONFIG_MSCHUNKS +EXPORT_SYMBOL(msChunks); +#endif +EXPORT_SYMBOL(reloc_offset); + +#ifdef CONFIG_PPC_ISERIES +EXPORT_SYMBOL(iSeries_proc_callback); +EXPORT_SYMBOL(HvCall0); +EXPORT_SYMBOL(HvCall1); +EXPORT_SYMBOL(HvCall2); +EXPORT_SYMBOL(HvCall3); +EXPORT_SYMBOL(HvCall4); +EXPORT_SYMBOL(HvCall5); +EXPORT_SYMBOL(HvCall6); +EXPORT_SYMBOL(HvCall7); +#endif + +EXPORT_SYMBOL(_insb); +EXPORT_SYMBOL(_outsb); +EXPORT_SYMBOL(_insw); +EXPORT_SYMBOL(_outsw); +EXPORT_SYMBOL(_insl); +EXPORT_SYMBOL(_outsl); +EXPORT_SYMBOL(_insw_ns); +EXPORT_SYMBOL(_outsw_ns); +EXPORT_SYMBOL(_insl_ns); +EXPORT_SYMBOL(_outsl_ns); +EXPORT_SYMBOL(ioremap); +EXPORT_SYMBOL(__ioremap); +EXPORT_SYMBOL(iounmap); + +EXPORT_SYMBOL(ide_insw); +EXPORT_SYMBOL(ide_outsw); +EXPORT_SYMBOL(ppc_ide_md); +#ifdef CONFIG_BLK_DEV_IDE_MODULE +EXPORT_SYMBOL(chrp_ide_irq); +EXPORT_SYMBOL(chrp_ide_ports_known); +EXPORT_SYMBOL(chrp_ide_regbase); +EXPORT_SYMBOL(chrp_ide_probe); +#endif + +#ifdef CONFIG_PCI +EXPORT_SYMBOL(pci_alloc_consistent); +EXPORT_SYMBOL(pci_free_consistent); +EXPORT_SYMBOL(pci_map_single); +EXPORT_SYMBOL(pci_unmap_single); +EXPORT_SYMBOL(pci_map_sg); +EXPORT_SYMBOL(pci_unmap_sg); +#ifdef CONFIG_PPC_ISERIES +EXPORT_SYMBOL(iSeries_Write_Long); +EXPORT_SYMBOL(iSeries_GetLocationData); +EXPORT_SYMBOL(iSeries_Read_Long); +EXPORT_SYMBOL(iSeries_Device_ToggleReset); +EXPORT_SYMBOL(iSeries_Write_Word); +EXPORT_SYMBOL(iSeries_memcpy_fromio); +EXPORT_SYMBOL(iSeries_Read_Word); +EXPORT_SYMBOL(iSeries_Read_Byte); +EXPORT_SYMBOL(iSeries_Write_Byte); + +#endif /* CONFIG_PPC_ISERIES */ +#endif /* CONFIG_PCI */ + +EXPORT_SYMBOL(iSeries_veth_dev); +EXPORT_SYMBOL(iSeries_vio_dev); + +EXPORT_SYMBOL(start_thread); +EXPORT_SYMBOL(kernel_thread); + +EXPORT_SYMBOL(flush_instruction_cache); +EXPORT_SYMBOL(_get_PVR); +EXPORT_SYMBOL(giveup_fpu); +EXPORT_SYMBOL(enable_kernel_fp); +EXPORT_SYMBOL(flush_icache_range); +EXPORT_SYMBOL(flush_dcache_page); +#ifdef CONFIG_SMP +EXPORT_SYMBOL(__global_cli); +EXPORT_SYMBOL(__global_sti); +EXPORT_SYMBOL(__global_save_flags); +EXPORT_SYMBOL(__global_restore_flags); +#ifdef CONFIG_PPC_ISERIES +EXPORT_SYMBOL(__no_use_restore_flags); +EXPORT_SYMBOL(__no_use_save_flags); +EXPORT_SYMBOL(__no_use_sti); +EXPORT_SYMBOL(__no_use_cli); +#endif +#endif + +#ifndef CONFIG_MACH_SPECIFIC +EXPORT_SYMBOL(_machine); +#endif +EXPORT_SYMBOL(ppc_md); + +EXPORT_SYMBOL(find_devices); +EXPORT_SYMBOL(find_type_devices); +EXPORT_SYMBOL(find_compatible_devices); +EXPORT_SYMBOL(find_path_device); +EXPORT_SYMBOL(device_is_compatible); +EXPORT_SYMBOL(machine_is_compatible); +EXPORT_SYMBOL(find_all_nodes); +EXPORT_SYMBOL(get_property); + +#ifndef CONFIG_PPC_ISERIES +EXPORT_SYMBOL(kd_mksound); +EXPORT_SYMBOL_NOVERS(sys_ctrler); /* tibit */ +#endif +#ifdef CONFIG_NVRAM +EXPORT_SYMBOL(nvram_read_byte); +EXPORT_SYMBOL(nvram_write_byte); +#endif /* CONFIG_NVRAM */ + +EXPORT_SYMBOL_NOVERS(__ashrdi3); +EXPORT_SYMBOL_NOVERS(__ashldi3); +EXPORT_SYMBOL_NOVERS(__lshrdi3); +EXPORT_SYMBOL_NOVERS(memcpy); +EXPORT_SYMBOL_NOVERS(memset); +EXPORT_SYMBOL_NOVERS(memmove); +EXPORT_SYMBOL_NOVERS(memscan); +EXPORT_SYMBOL_NOVERS(memcmp); + +EXPORT_SYMBOL(abs); + +#ifdef CONFIG_VT +EXPORT_SYMBOL(screen_info); +#endif + +EXPORT_SYMBOL(timer_interrupt); +EXPORT_SYMBOL(irq_desc); +void ppc_irq_dispatch_handler(struct pt_regs *, int); +EXPORT_SYMBOL(ppc_irq_dispatch_handler); +EXPORT_SYMBOL(get_wchan); +EXPORT_SYMBOL(console_drivers); +#ifdef CONFIG_XMON +EXPORT_SYMBOL(xmon); +#endif + +#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) +extern void (*debugger)(struct pt_regs *regs); +extern int (*debugger_bpt)(struct pt_regs *regs); +extern int (*debugger_sstep)(struct pt_regs *regs); +extern int (*debugger_iabr_match)(struct pt_regs *regs); +extern int (*debugger_dabr_match)(struct pt_regs *regs); +extern void (*debugger_fault_handler)(struct pt_regs *regs); + +EXPORT_SYMBOL(debugger); +EXPORT_SYMBOL(debugger_bpt); +EXPORT_SYMBOL(debugger_sstep); +EXPORT_SYMBOL(debugger_iabr_match); +EXPORT_SYMBOL(debugger_dabr_match); +EXPORT_SYMBOL(debugger_fault_handler); +#endif + +#ifdef CONFIG_SMP +EXPORT_SYMBOL(atomic_dec_and_lock); +#endif + +EXPORT_SYMBOL(ret_to_user_hook); + +EXPORT_SYMBOL(tb_ticks_per_usec); diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/proc_pcifr.c linuxppc64_2_4/arch/ppc64/kernel/proc_pcifr.c --- ../kernel.org/linux/arch/ppc64/kernel/proc_pcifr.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/proc_pcifr.c Fri Nov 9 15:49:40 2001 @@ -0,0 +1,252 @@ +/************************************************************************/ +/* File pcifr_proc.c created by Allan Trautman on Thu Aug 2 2001. */ +/************************************************************************/ +/* Supports the ../proc/ppc64/pcifr for the pci flight recorder. */ +/* Copyright (C) 20yy */ +/* */ +/* This program is free software; you can redistribute it and/or modify */ +/* it under the terms of the GNU General Public License as published by */ +/* the Free Software Foundation; either version 2 of the License, or */ +/* (at your option) any later version. */ +/* */ +/* This program is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ +/* GNU General Public License for more details. */ +/* */ +/* You should have received a copy of the GNU General Public License */ +/* along with this program; if not, write to the: */ +/* Free Software Foundation, Inc., */ +/* 59 Temple Place, Suite 330, */ +/* Boston, MA 02111-1307 USA */ +/************************************************************************/ +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include "pci.h" + +void pci_Fr_TestCode(void); + +static spinlock_t proc_pcifr_lock; +struct flightRecorder* PciFr = NULL; + +extern long Pci_Interrupt_Count; +extern long Pci_Event_Count; +extern long Pci_Io_Read_Count; +extern long Pci_Io_Write_Count; +extern long Pci_Cfg_Read_Count; +extern long Pci_Cfg_Write_Count; +extern long Pci_Error_Count; + +/************************************************************************/ +/* Forward declares. */ +/************************************************************************/ +static struct proc_dir_entry *pciFr_proc_root = NULL; +int proc_pciFr_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data); +int proc_pciFr_write_proc(struct file *file, const char *buffer, unsigned long count, void *data); + +static struct proc_dir_entry *pciDev_proc_root = NULL; +int proc_pciDev_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data); +int proc_pciDev_write_proc(struct file *file, const char *buffer, unsigned long count, void *data); + +/************************************************************************/ +/* Create entry ../proc/ppc64/pcifr */ +/************************************************************************/ +void proc_pciFr_init(struct proc_dir_entry *proc_ppc64_root) +{ + if (proc_ppc64_root == NULL) return; + + /* Read = User,Group,Other, Write User */ + printk("PCI: Creating ../proc/ppc64/pcifr \n"); + spin_lock(&proc_pcifr_lock); + pciFr_proc_root = create_proc_entry("pcifr", S_IFREG | S_IRUGO | S_IWUSR, proc_ppc64_root); + spin_unlock(&proc_pcifr_lock); + + if (pciFr_proc_root == NULL) return; + + pciFr_proc_root->nlink = 1; + pciFr_proc_root->data = (void *)0; + pciFr_proc_root->read_proc = proc_pciFr_read_proc; + pciFr_proc_root->write_proc = proc_pciFr_write_proc; + + PciFr = alloc_Flight_Recorder(NULL,"PciFr", 4096); + + printk("PCI: Creating ../proc/ppc64/pci \n"); + spin_lock(&proc_pcifr_lock); + pciDev_proc_root = create_proc_entry("pci", S_IFREG | S_IRUGO | S_IWUSR, proc_ppc64_root); + spin_unlock(&proc_pcifr_lock); + + if (pciDev_proc_root == NULL) return; + + pciDev_proc_root->nlink = 1; + pciDev_proc_root->data = (void *)0; + pciDev_proc_root->read_proc = proc_pciDev_read_proc; + pciDev_proc_root->write_proc = proc_pciDev_write_proc; +} + +static char* PciFrBuffer = NULL; +static int PciFrBufLen = 0; +static char* PciFrBufPtr = NULL; +static int PciFileSize = 0; + +/*******************************************************************************/ +/* Read function for ../proc/ppc64/pcifr. */ +/* -> Function grabs a copy of the pcifr(could change) and writes the data to */ +/* the caller. Note, it may not all fit in the buffer. The function */ +/* handles the repeated calls until all the data has been read. */ +/* Tip: */ +/* ./fs/proc/generic.c::proc_file_read is the caller of this routine. */ +/*******************************************************************************/ +int proc_pciFr_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) +{ + /* First call will have offset 0, get snapshot the pcifr */ + if( off == 0) { + spin_lock(&proc_pcifr_lock); + PciFrBuffer = (char*)kmalloc(PciFr->Size, GFP_KERNEL); + PciFrBufLen = fr_Dump(PciFr,PciFrBuffer, PciFr->Size); + PciFrBufPtr = PciFrBuffer; + PciFileSize = 0; + } + /* For the persistant folks, set eof and return zero length. */ + else if( PciFrBuffer == NULL) { + *eof = 1; + return 0; + } + /* - If there is more data than will fit, move what will fit. */ + /* - The rest will get moved on the next call. */ + int MoveSize = PciFrBufLen; + if( MoveSize > count) MoveSize = count; + + /* Move the data info the FileSystem buffer. */ + memcpy(page+off,PciFrBufPtr,MoveSize); + PciFrBufPtr += MoveSize; + PciFileSize += MoveSize; + PciFrBufLen -= MoveSize; + + /* If all the data has been moved, free the buffer and set EOF. */ + if( PciFrBufLen == 0) { + kfree(PciFrBuffer); + PciFrBuffer = NULL; + spin_unlock(&proc_pcifr_lock); + *eof = 1; + } + return PciFileSize; +} +/*******************************************************************************/ +/* Gets called when client writes to ../proc/ppc64/pcifr */ +/*******************************************************************************/ +int proc_pciFr_write_proc(struct file *file, const char *buffer, unsigned long count, void *data) +{ + return count; +} +static spinlock_t ProcBufferLock; +static char* ProcBuffer = NULL; +static int ProcBufSize = 0; +static char* ProcBufPtr = NULL; +static int ProcFileSize = 0; + +/*******************************************************************************/ +/* Build Device Buffer for /proc/ppc64/pci */ +/*******************************************************************************/ +static int build_PciDev_Buffer(int BufferSize) +{ + ProcBuffer = (char*)kmalloc(BufferSize, GFP_KERNEL); + ProcBufPtr = ProcBuffer; + + int BufLen = 0; + + BufLen += sprintf(ProcBuffer+BufLen,"Pci I/O Reads. %8ld ",Pci_Io_Read_Count); + BufLen += sprintf(ProcBuffer+BufLen,"Pci I/O Writes %8ld\n",Pci_Io_Write_Count); + + BufLen += sprintf(ProcBuffer+BufLen,"Pci Cfg Reads. %8ld ",Pci_Cfg_Read_Count); + BufLen += sprintf(ProcBuffer+BufLen,"Pci Cfg Writes %8ld\n",Pci_Cfg_Write_Count); + + BufLen += sprintf(ProcBuffer+BufLen,"Pci I/O Errors %8ld\n",Pci_Error_Count); + BufLen += sprintf(ProcBuffer+BufLen,"\n"); + + /***************************************************************************/ + /* List the devices */ + /***************************************************************************/ + struct pci_dev* PciDev; /* Device pointer */ + struct net_device* dev; /* net_device pointer */ + int DeviceCount = 0; + pci_for_each_dev(PciDev) { + if ( BufLen > BufferSize-128) { /* Room for another line? */ + BufLen +=sprintf(ProcBuffer+BufLen,"Buffer Full\n"); + break; + } + if( PCI_SLOT(PciDev->devfn) != 0) { + ++DeviceCount; + BufLen += sprintf(ProcBuffer+BufLen,"%3d. ",DeviceCount); + if ( PciDev->sysdata != NULL ) { + BufLen += format_device_location(PciDev,ProcBuffer+BufLen,128); + } + else { + BufLen += sprintf(ProcBuffer+BufLen,"No Device Node!\n"); + } + BufLen += sprintf(ProcBuffer+BufLen,"\n"); + + /* look for the net devices out */ + for (dev = dev_base; dev != NULL; dev = dev->next) { + if (dev->base_addr == PciDev->resource[0].start ) { + BufLen += sprintf(ProcBuffer+BufLen, " - Net device: %s\n", dev->name); + break; + } /* if */ + } /* for */ + } /* if(PCI_SLOT(PciDev->devfn) != 0) */ + } + return BufLen; +} +/*******************************************************************************/ +/* Get called when client reads the ../proc/ppc64/pcifr. */ +/*******************************************************************************/ +int proc_pciDev_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) +{ + /* First call will have offset 0 */ + if( off == 0) { + spin_lock(&ProcBufferLock); + ProcBufSize = build_PciDev_Buffer(4096); + ProcFileSize = 0; + } + /* For the persistant folks, set eof and return zero length. */ + else if( ProcBuffer == NULL) { + *eof = 1; + return 0; + } + /* How much data can be moved */ + int MoveSize = ProcBufSize; + if( MoveSize > count) MoveSize = count; + + /* Move the data info the FileSystem buffer. */ + memcpy(page+off,ProcBufPtr,MoveSize); + ProcBufPtr += MoveSize; + ProcBufSize -= MoveSize; + ProcFileSize += MoveSize; + + /* If all the data has been moved, free the buffer and set EOF. */ + if( ProcBufSize == 0) { + kfree(ProcBuffer ); + ProcBuffer = NULL; + spin_unlock(&ProcBufferLock); + *eof = 1; + } + return ProcFileSize; +} +/*******************************************************************************/ +/* Gets called when client writes to ../proc/ppc64/pcifr */ +/*******************************************************************************/ +int proc_pciDev_write_proc(struct file *file, const char *buffer, unsigned long count, void *data) +{ + return count; +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/proc_pmc.c linuxppc64_2_4/arch/ppc64/kernel/proc_pmc.c --- ../kernel.org/linux/arch/ppc64/kernel/proc_pmc.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/proc_pmc.c Tue Nov 13 10:47:33 2001 @@ -0,0 +1,798 @@ +/* + * proc_pmc.c + * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +/* Change Activity: + * 2001 : mikec : Created + * 2001/06/05 : engebret : Software event count support. + * 2001/08/03 : trautman : Added PCI Flight Recorder + * End Change Activity + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +/* pci Flight Recorder AHT */ +extern void proc_pciFr_init(struct proc_dir_entry *proc_ppc64_root); + +static int proc_pmc_control_mode = 0; + +static struct proc_dir_entry *proc_ppc64_root = NULL; +static struct proc_dir_entry *proc_ppc64_pmc_root = NULL; +static struct proc_dir_entry *proc_ppc64_pmc_system_root = NULL; +static struct proc_dir_entry *proc_ppc64_pmc_cpu_root[NR_CPUS] = {NULL, }; + +static spinlock_t proc_ppc64_lock; + +extern struct Naca *naca; + +int proc_ppc64_pmc_find_file(void *data); +int proc_ppc64_pmc_read(char *page, char **start, off_t off, + int count, int *eof, char *buffer); +int proc_ppc64_pmc_stab_read(char *page, char **start, off_t off, + int count, int *eof, void *data); +int proc_ppc64_pmc_htab_read(char *page, char **start, off_t off, + int count, int *eof, void *data); +int proc_ppc64_pmc_hw_read(char *page, char **start, off_t off, + int count, int *eof, void *data); + +static struct proc_dir_entry *pmc_proc_root = NULL; + +int proc_get_lpevents( char *page, char **start, off_t off, int count, int *eof, void *data); +int proc_reset_lpevents( struct file *file, const char *buffer, unsigned long count, void *data); + +int proc_get_titanTod( char *page, char **start, off_t off, int count, int *eof, void *data); + +int proc_pmc_get_control( char *page, char **start, off_t off, int count, int *eof, void *data); + +int proc_pmc_set_control( struct file *file, const char *buffer, unsigned long count, void *data); +int proc_pmc_set_mmcr0( struct file *file, const char *buffer, unsigned long count, void *data); +int proc_pmc_set_mmcr1( struct file *file, const char *buffer, unsigned long count, void *data); +int proc_pmc_set_mmcra( struct file *file, const char *buffer, unsigned long count, void *data); +int proc_pmc_set_pmc1( struct file *file, const char *buffer, unsigned long count, void *data); +int proc_pmc_set_pmc2( struct file *file, const char *buffer, unsigned long count, void *data); +int proc_pmc_set_pmc3( struct file *file, const char *buffer, unsigned long count, void *data); +int proc_pmc_set_pmc4( struct file *file, const char *buffer, unsigned long count, void *data); +int proc_pmc_set_pmc5( struct file *file, const char *buffer, unsigned long count, void *data); +int proc_pmc_set_pmc6( struct file *file, const char *buffer, unsigned long count, void *data); +int proc_pmc_set_pmc7( struct file *file, const char *buffer, unsigned long count, void *data); +int proc_pmc_set_pmc8( struct file *file, const char *buffer, unsigned long count, void *data); + + +void proc_ppc64_init(void) +{ + unsigned long i; + struct proc_dir_entry *ent = NULL; + char buf[256]; + + printk("proc_ppc64: Creating /proc/ppc64/pmc\n"); + + /* + * Create the root, system, and cpu directories as follows: + * /proc/ppc64/pmc/system + * /proc/ppc64/pmc/cpu0 + */ + spin_lock(&proc_ppc64_lock); + proc_ppc64_root = proc_mkdir("ppc64", 0); + if (!proc_ppc64_root) return; + spin_unlock(&proc_ppc64_lock); + + /* Create the /proc/ppc64/pcifr for the Pci Flight Recorder. */ + proc_pciFr_init(proc_ppc64_root); + +#ifdef CONFIG_PPC_EEH + eeh_init_proc(proc_ppc64_root); +#endif + + proc_ppc64_pmc_root = proc_mkdir("pmc", proc_ppc64_root); + + proc_ppc64_pmc_system_root = proc_mkdir("system", proc_ppc64_pmc_root); + for (i = 0; i < naca->processorCount; i++) { + sprintf(buf, "cpu%ld", i); + proc_ppc64_pmc_cpu_root[i] = proc_mkdir(buf, proc_ppc64_pmc_root); + } + + + /* Create directories for the software counters. */ + for (i = 0; i < naca->processorCount; i++) { + ent = create_proc_entry("stab", S_IRUGO | S_IWUSR, + proc_ppc64_pmc_cpu_root[i]); + if (ent) { + ent->nlink = 1; + ent->data = (void *)proc_ppc64_pmc_cpu_root[i]; + ent->read_proc = (void *)proc_ppc64_pmc_stab_read; + ent->write_proc = (void *)proc_ppc64_pmc_stab_read; + } + + ent = create_proc_entry("htab", S_IRUGO | S_IWUSR, + proc_ppc64_pmc_cpu_root[i]); + if (ent) { + ent->nlink = 1; + ent->data = (void *)proc_ppc64_pmc_cpu_root[i]; + ent->read_proc = (void *)proc_ppc64_pmc_htab_read; + ent->write_proc = (void *)proc_ppc64_pmc_htab_read; + } + } + + ent = create_proc_entry("stab", S_IRUGO | S_IWUSR, + proc_ppc64_pmc_system_root); + if (ent) { + ent->nlink = 1; + ent->data = (void *)proc_ppc64_pmc_system_root; + ent->read_proc = (void *)proc_ppc64_pmc_stab_read; + ent->write_proc = (void *)proc_ppc64_pmc_stab_read; + } + + ent = create_proc_entry("htab", S_IRUGO | S_IWUSR, + proc_ppc64_pmc_system_root); + if (ent) { + ent->nlink = 1; + ent->data = (void *)proc_ppc64_pmc_system_root; + ent->read_proc = (void *)proc_ppc64_pmc_htab_read; + ent->write_proc = (void *)proc_ppc64_pmc_htab_read; + } + + /* Create directories for the hardware counters. */ + for (i = 0; i < naca->processorCount; i++) { + ent = create_proc_entry("hardware", S_IRUGO | S_IWUSR, + proc_ppc64_pmc_cpu_root[i]); + if (ent) { + ent->nlink = 1; + ent->data = (void *)proc_ppc64_pmc_cpu_root[i]; + ent->read_proc = (void *)proc_ppc64_pmc_hw_read; + ent->write_proc = (void *)proc_ppc64_pmc_hw_read; + } + } + + ent = create_proc_entry("hardware", S_IRUGO | S_IWUSR, + proc_ppc64_pmc_system_root); + if (ent) { + ent->nlink = 1; + ent->data = (void *)proc_ppc64_pmc_system_root; + ent->read_proc = (void *)proc_ppc64_pmc_hw_read; + ent->write_proc = (void *)proc_ppc64_pmc_hw_read; + } +} + +/* + * Find the requested 'file' given a proc token. + * + * Inputs: void * data: proc token + * Output: int : (0, ..., +N) = CPU number. + * -1 = System. + */ +int proc_ppc64_pmc_find_file(void *data) +{ + int i; + + if ((unsigned long)data == + (unsigned long) proc_ppc64_pmc_system_root) { + return(-1); + } else { + for (i = 0; i < naca->processorCount; i++) { + if ((unsigned long)data == + (unsigned long)proc_ppc64_pmc_cpu_root[i]) { + return(i); + } + } + } + + /* On error, just default to a type of system. */ + printk("proc_ppc64_pmc_find_file: failed to find file token.\n"); + return(-1); +} + +int +proc_ppc64_pmc_read(char *page, char **start, off_t off, + int count, int *eof, char *buffer) +{ + int buffer_size, n; + + if (count < 0) return 0; + + if (buffer == NULL) { + *eof = 1; + return 0; + } + + /* Check for read beyond EOF */ + buffer_size = n = strlen(buffer); + if (off >= buffer_size) { + *eof = 1; + return 0; + } + if (n > (buffer_size - off)) n = buffer_size - off; + + /* Never return more than was requested */ + if (n > count) { + n = count; + } else { + *eof = 1; + } + + memcpy(page, buffer + off, n); + + *start = page; + + return n; +} + +int +proc_ppc64_pmc_stab_read(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int n, file; + char *buffer = NULL; + + if (count < 0) return 0; + spin_lock(&proc_ppc64_lock); + + /* Figure out which file is being request. */ + file = proc_ppc64_pmc_find_file(data); + + /* Update the counters and the text buffer representation. */ + buffer = ppc64_pmc_stab(file); + + /* Put the data into the requestor's buffer. */ + n = proc_ppc64_pmc_read(page, start, off, count, eof, buffer); + + spin_unlock(&proc_ppc64_lock); + return n; +} + +int +proc_ppc64_pmc_htab_read(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int n, file; + char *buffer = NULL; + + if (count < 0) return 0; + spin_lock(&proc_ppc64_lock); + + /* Figure out which file is being request. */ + file = proc_ppc64_pmc_find_file(data); + + /* Update the counters and the text buffer representation. */ + buffer = ppc64_pmc_htab(file); + + /* Put the data into the requestor's buffer. */ + n = proc_ppc64_pmc_read(page, start, off, count, eof, buffer); + + spin_unlock(&proc_ppc64_lock); + return n; +} + +int +proc_ppc64_pmc_hw_read(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int n, file; + char *buffer = NULL; + + if (count < 0) return 0; + spin_lock(&proc_ppc64_lock); + + /* Figure out which file is being request. */ + file = proc_ppc64_pmc_find_file(data); + + /* Update the counters and the text buffer representation. */ + buffer = ppc64_pmc_hw(file); + + /* Put the data into the requestor's buffer. */ + n = proc_ppc64_pmc_read(page, start, off, count, eof, buffer); + + spin_unlock(&proc_ppc64_lock); + return n; +} + +/* + * DRENG the remainder of these functions still need work ... + */ +void pmc_proc_init(struct proc_dir_entry *iSeries_proc) +{ + struct proc_dir_entry *ent = NULL; + + ent = create_proc_entry("lpevents", S_IFREG|S_IRUGO, iSeries_proc); + if (!ent) return; + ent->nlink = 1; + ent->data = (void *)0; + ent->read_proc = proc_get_lpevents; + ent->write_proc = proc_reset_lpevents; + + ent = create_proc_entry("titanTod", S_IFREG|S_IRUGO, iSeries_proc); + if (!ent) return; + ent->nlink = 1; + ent->data = (void *)0; + ent->read_proc = proc_get_titanTod; + ent->write_proc = NULL; + + pmc_proc_root = proc_mkdir("pmc", iSeries_proc); + if (!pmc_proc_root) return; + + ent = create_proc_entry("control", S_IFREG|S_IRUSR|S_IWUSR, pmc_proc_root); + if (!ent) return; + ent->nlink = 1; + ent->data = (void *)0; + ent->read_proc = proc_pmc_get_control; + ent->write_proc = proc_pmc_set_control; + +} + +static int pmc_calc_metrics( char *page, char **start, off_t off, int count, int *eof, int len) +{ + if ( len <= off+count) + *eof = 1; + *start = page+off; + len -= off; + if ( len > count ) + len = count; + if ( len < 0 ) + len = 0; + return len; +} + +static char * lpEventTypes[9] = { + "Hypervisor\t\t", + "Machine Facilities\t", + "Session Manager\t", + "SPD I/O\t\t", + "Virtual Bus\t\t", + "PCI I/O\t\t", + "RIO I/O\t\t", + "Virtual Lan\t\t", + "Virtual I/O\t\t" + }; + + +int proc_get_lpevents +(char *page, char **start, off_t off, int count, int *eof, void *data) +{ + unsigned i; + int len = 0; + + len += sprintf( page+len, "LpEventQueue 0\n" ); + len += sprintf( page+len, " events processed:\t%lu\n", + (unsigned long)xItLpQueue.xLpIntCount ); + for (i=0; i<9; ++i) { + len += sprintf( page+len, " %s %10lu\n", + lpEventTypes[i], + (unsigned long)xItLpQueue.xLpIntCountByType[i] ); + } + len += sprintf( page+len, "\n events processed by processor:\n" ); + for (i=0; iprocessorCount; ++i) { + len += sprintf( page+len, " CPU%02d %10u\n", + i, xPaca[i].lpEvent_count ); + } + + return pmc_calc_metrics( page, start, off, count, eof, len ); + +} + +int proc_reset_lpevents( struct file *file, const char *buffer, unsigned long count, void *data ) +{ + return count; +} + +extern unsigned long procFreqHz; +static unsigned long startTitan = 0; +static unsigned long startTb = 0; + + +int proc_get_titanTod +(char *page, char **start, off_t off, int count, int *eof, void *data) +{ + int len = 0; + unsigned long tb0, titan_tod; + + tb0 = get_tb(); + titan_tod = HvCallXm_loadTod(); + + len += sprintf( page+len, "Titan\n" ); + len += sprintf( page+len, " time base = %016lx\n", tb0 ); + len += sprintf( page+len, " titan tod = %016lx\n", titan_tod ); + len += sprintf( page+len, " xProcFreq = %016x\n", xIoHriProcessorVpd[0].xProcFreq ); + len += sprintf( page+len, " procFreqHz = %016lx\n", procFreqHz ); + len += sprintf( page+len, " xTimeBaseFreq = %016x\n", xIoHriProcessorVpd[0].xTimeBaseFreq ); + len += sprintf( page+len, " tb_ticks_per_jiffy = %lu\n", tb_ticks_per_jiffy ); + len += sprintf( page+len, " tb_ticks_per_usec = %lu\n", tb_ticks_per_usec ); + + if ( !startTitan ) { + startTitan = titan_tod; + startTb = tb0; + } + else { + unsigned long titan_usec = (titan_tod - startTitan) >> 12; + unsigned long tb_ticks = (tb0 - startTb); + unsigned long titan_jiffies = titan_usec / (1000000/HZ); + unsigned long titan_jiff_usec = titan_jiffies * (1000000/HZ); + unsigned long titan_jiff_rem_usec = titan_usec - titan_jiff_usec; + unsigned long tb_jiffies = tb_ticks / tb_ticks_per_jiffy; + unsigned long tb_jiff_ticks = tb_jiffies * tb_ticks_per_jiffy; + unsigned long tb_jiff_rem_ticks = tb_ticks - tb_jiff_ticks; + unsigned long tb_jiff_rem_usec = tb_jiff_rem_ticks / tb_ticks_per_usec; + unsigned long new_tb_ticks_per_jiffy = (tb_ticks * (1000000/HZ))/titan_usec; + + len += sprintf( page+len, " titan elapsed = %lu uSec\n", titan_usec); + len += sprintf( page+len, " tb elapsed = %lu ticks\n", tb_ticks); + len += sprintf( page+len, " titan jiffies = %lu.%04lu \n", titan_jiffies, titan_jiff_rem_usec ); + len += sprintf( page+len, " tb jiffies = %lu.%04lu\n", tb_jiffies, tb_jiff_rem_usec ); + len += sprintf( page+len, " new tb_ticks_per_jiffy = %lu\n", new_tb_ticks_per_jiffy ); + + } + + return pmc_calc_metrics( page, start, off, count, eof, len ); +} + +int proc_pmc_get_control +(char *page, char **start, off_t off, int count, int *eof, void *data) +{ + int len = 0; + + if ( proc_pmc_control_mode == PMC_CONTROL_CPI ) { + unsigned long mach_cycles = mfspr( PMC5 ); + unsigned long inst_complete = mfspr( PMC4 ); + unsigned long inst_dispatch = mfspr( PMC3 ); + unsigned long thread_active_run = mfspr( PMC1 ); + unsigned long thread_active = mfspr( PMC2 ); + unsigned long cpi = 0; + unsigned long cpithou = 0; + unsigned long remain; + + if ( inst_complete ) { + cpi = thread_active_run / inst_complete; + remain = thread_active_run % inst_complete; + if ( inst_complete > 1000000 ) + cpithou = remain / ( inst_complete / 1000 ); + else + cpithou = ( remain * 1000 ) / inst_complete; + } + len += sprintf( page+len, "PMC CPI Mode\nRaw Counts\n" ); + len += sprintf( page+len, "machine cycles : %12lu\n", mach_cycles ); + len += sprintf( page+len, "thread active cycles : %12lu\n\n", thread_active ); + + len += sprintf( page+len, "instructions completed : %12lu\n", inst_complete ); + len += sprintf( page+len, "instructions dispatched : %12lu\n", inst_dispatch ); + len += sprintf( page+len, "thread active run cycles : %12lu\n", thread_active_run ); + + len += sprintf( page+len, "thread active run cycles/instructions completed\n" ); + len += sprintf( page+len, "CPI = %lu.%03lu\n", cpi, cpithou ); + + } + else if ( proc_pmc_control_mode == PMC_CONTROL_TLB ) { + len += sprintf( page+len, "PMC TLB Mode\n" ); + len += sprintf( page+len, "I-miss count : %12lu\n", mfspr( PMC1 ) ); + len += sprintf( page+len, "I-miss latency : %12lu\n", mfspr( PMC2 ) ); + len += sprintf( page+len, "D-miss count : %12lu\n", mfspr( PMC3 ) ); + len += sprintf( page+len, "D-miss latency : %12lu\n", mfspr( PMC4 ) ); + len += sprintf( page+len, "IERAT miss count : %12lu\n", mfspr( PMC5 ) ); + len += sprintf( page+len, "D-reference count : %12lu\n", mfspr( PMC6 ) ); + len += sprintf( page+len, "miss PTEs searched : %12lu\n", mfspr( PMC7 ) ); + len += sprintf( page+len, "miss >8 PTEs searched : %12lu\n", mfspr( PMC8 ) ); + } + /* IMPLEMENT ME */ + return pmc_calc_metrics( page, start, off, count, eof, len ); +} + +unsigned long proc_pmc_conv_int( const char *buf, unsigned count ) +{ + const char * p; + char b0, b1; + unsigned v, multiplier, mult, i; + unsigned long val; + multiplier = 10; + p = buf; + if ( count >= 3 ) { + b0 = buf[0]; + b1 = buf[1]; + if ( ( b0 == '0' ) && + ( ( b1 == 'x' ) || ( b1 == 'X' ) ) ) { + p = buf + 2; + count -= 2; + multiplier = 16; + } + + } + val = 0; + for ( i=0; i= '0' ) && ( b0 <= '9' ) ) + v = b0 - '0'; + else if ( multiplier == 16 ) { + if ( ( b0 >= 'a' ) && ( b0 <= 'f' ) ) + v = b0 - 'a' + 10; + else if ( ( b0 >= 'A' ) && ( b0 <= 'F' ) ) + v = b0 - 'A' + 10; + else + mult = 1; + } + else + mult = 1; + val *= mult; + val += v; + } + + return val; + +} + +static inline void proc_pmc_stop(void) +{ + /* Freeze all counters, leave everything else alone */ + mtspr( MMCR0, mfspr( MMCR0 ) | 0x80000000 ); +} + +static inline void proc_pmc_start(void) +{ + /* Unfreeze all counters, leave everything else alone */ + mtspr( MMCR0, mfspr( MMCR0 ) & ~0x80000000 ); + +} + +static inline void proc_pmc_reset(void) +{ + /* Clear all the PMCs to zeros + * Assume a "stop" has already frozen the counters + * Clear all the PMCs + */ + mtspr( PMC1, 0 ); + mtspr( PMC2, 0 ); + mtspr( PMC3, 0 ); + mtspr( PMC4, 0 ); + mtspr( PMC5, 0 ); + mtspr( PMC6, 0 ); + mtspr( PMC7, 0 ); + mtspr( PMC8, 0 ); + +} + +static inline void proc_pmc_cpi(void) +{ + /* Configure the PMC registers to count cycles and instructions */ + /* so we can compute cpi */ + /* + * MMCRA[30] = 1 Don't count in wait state (CTRL[31]=0) + * MMCR0[6] = 1 Freeze counters when any overflow + * MMCR0[19:25] = 0x01 PMC1 counts Thread Active Run Cycles + * MMCR0[26:31] = 0x05 PMC2 counts Thread Active Cycles + * MMCR1[0:4] = 0x07 PMC3 counts Instructions Dispatched + * MMCR1[5:9] = 0x03 PMC4 counts Instructions Completed + * MMCR1[10:14] = 0x06 PMC5 counts Machine Cycles + * + */ + + proc_pmc_control_mode = PMC_CONTROL_CPI; + + // Indicate to hypervisor that we are using the PMCs + ((struct Paca *)mfspr(SPRG3))->xLpPacaPtr->xPMCRegsInUse = 1; + + // Freeze all counters + mtspr( MMCR0, 0x80000000 ); + mtspr( MMCR1, 0x00000000 ); + + // Clear all the PMCs + mtspr( PMC1, 0 ); + mtspr( PMC2, 0 ); + mtspr( PMC3, 0 ); + mtspr( PMC4, 0 ); + mtspr( PMC5, 0 ); + mtspr( PMC6, 0 ); + mtspr( PMC7, 0 ); + mtspr( PMC8, 0 ); + + /* Freeze counters in Wait State (CTRL[31]=0) */ + mtspr( MMCRA, 0x00000002 ); + + /* PMC3<-0x07, PMC4<-0x03, PMC5<-0x06 */ + mtspr( MMCR1, 0x38cc0000 ); + + mb(); + + /* PMC1<-0x01, PMC2<-0x05 + * Start all counters + */ + mtspr( MMCR0, 0x02000045 ); + +} + +static inline void proc_pmc_tlb(void) +{ + /* Configure the PMC registers to count tlb misses */ + /* + * MMCR0[6] = 1 Freeze counters when any overflow + * MMCR0[19:25] = 0x55 Group count + * PMC1 counts I misses + * PMC2 counts I miss duration (latency) + * PMC3 counts D misses + * PMC4 counts D miss duration (latency) + * PMC5 counts IERAT misses + * PMC6 counts D references (including PMC7) + * PMC7 counts miss PTEs searched + * PMC8 counts miss >8 PTEs searched + * + */ + + proc_pmc_control_mode = PMC_CONTROL_TLB; + + /* Indicate to hypervisor that we are using the PMCs */ + ((struct Paca *)mfspr(SPRG3))->xLpPacaPtr->xPMCRegsInUse = 1; + + /* Freeze all counters */ + mtspr( MMCR0, 0x80000000 ); + mtspr( MMCR1, 0x00000000 ); + + /* Clear all the PMCs */ + mtspr( PMC1, 0 ); + mtspr( PMC2, 0 ); + mtspr( PMC3, 0 ); + mtspr( PMC4, 0 ); + mtspr( PMC5, 0 ); + mtspr( PMC6, 0 ); + mtspr( PMC7, 0 ); + mtspr( PMC8, 0 ); + + mtspr( MMCRA, 0x00000000 ); + + mb(); + + /* PMC1<-0x55 + * Start all counters + */ + mtspr( MMCR0, 0x02001540 ); + +} + +int proc_pmc_set_control( struct file *file, const char *buffer, unsigned long count, void *data ) +{ + if ( ! strncmp( buffer, "stop", 4 ) ) + proc_pmc_stop(); + else if ( ! strncmp( buffer, "start", 5 ) ) + proc_pmc_start(); + else if ( ! strncmp( buffer, "reset", 5 ) ) + proc_pmc_reset(); + else if ( ! strncmp( buffer, "cpi", 3 ) ) + proc_pmc_cpi(); + else if ( ! strncmp( buffer, "tlb", 3 ) ) + proc_pmc_tlb(); + + /* IMPLEMENT ME */ + return count; +} + +int proc_pmc_set_mmcr0( struct file *file, const char *buffer, unsigned long count, void *data ) +{ + unsigned long v; + v = proc_pmc_conv_int( buffer, count ); + v = v & ~0x04000000; /* Don't allow interrupts for now */ + if ( v & ~0x80000000 ) /* Inform hypervisor we are using PMCs */ + ((struct Paca *)mfspr(SPRG3))->xLpPacaPtr->xPMCRegsInUse = 1; + else + ((struct Paca *)mfspr(SPRG3))->xLpPacaPtr->xPMCRegsInUse = 0; + mtspr( MMCR0, v ); + + return count; +} + +int proc_pmc_set_mmcr1( struct file *file, const char *buffer, unsigned long count, void *data ) +{ + unsigned long v; + v = proc_pmc_conv_int( buffer, count ); + mtspr( MMCR1, v ); + + return count; +} + +int proc_pmc_set_mmcra( struct file *file, const char *buffer, unsigned long count, void *data ) +{ + unsigned long v; + v = proc_pmc_conv_int( buffer, count ); + v = v & ~0x00008000; /* Don't allow interrupts for now */ + mtspr( MMCRA, v ); + + return count; +} + + +int proc_pmc_set_pmc1( struct file *file, const char *buffer, unsigned long count, void *data ) +{ + unsigned long v; + v = proc_pmc_conv_int( buffer, count ); + mtspr( PMC1, v ); + + return count; +} + +int proc_pmc_set_pmc2( struct file *file, const char *buffer, unsigned long count, void *data ) +{ + unsigned long v; + v = proc_pmc_conv_int( buffer, count ); + mtspr( PMC2, v ); + + return count; +} + +int proc_pmc_set_pmc3( struct file *file, const char *buffer, unsigned long count, void *data ) +{ + unsigned long v; + v = proc_pmc_conv_int( buffer, count ); + mtspr( PMC3, v ); + + return count; +} + +int proc_pmc_set_pmc4( struct file *file, const char *buffer, unsigned long count, void *data ) +{ + unsigned long v; + v = proc_pmc_conv_int( buffer, count ); + mtspr( PMC4, v ); + + return count; +} + +int proc_pmc_set_pmc5( struct file *file, const char *buffer, unsigned long count, void *data ) +{ + unsigned long v; + v = proc_pmc_conv_int( buffer, count ); + mtspr( PMC5, v ); + + return count; +} + +int proc_pmc_set_pmc6( struct file *file, const char *buffer, unsigned long count, void *data ) +{ + unsigned long v; + v = proc_pmc_conv_int( buffer, count ); + mtspr( PMC6, v ); + + return count; +} + +int proc_pmc_set_pmc7( struct file *file, const char *buffer, unsigned long count, void *data ) +{ + unsigned long v; + v = proc_pmc_conv_int( buffer, count ); + mtspr( PMC7, v ); + + return count; +} + +int proc_pmc_set_pmc8( struct file *file, const char *buffer, unsigned long count, void *data ) +{ + unsigned long v; + v = proc_pmc_conv_int( buffer, count ); + mtspr( PMC8, v ); + + return count; +} + diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/process.c linuxppc64_2_4/arch/ppc64/kernel/process.c --- ../kernel.org/linux/arch/ppc64/kernel/process.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/process.c Thu Nov 29 01:39:31 2001 @@ -0,0 +1,613 @@ +/* + * + * + * linux/arch/ppc/kernel/process.c + * + * Derived from "arch/i386/kernel/process.c" + * Copyright (C) 1995 Linus Torvalds + * + * Updated and modified by Cort Dougan (cort@cs.nmt.edu) and + * Paul Mackerras (paulus@cs.anu.edu.au) + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpregs); +extern unsigned long _get_SP(void); + +struct task_struct *last_task_used_math = NULL; +static struct fs_struct init_fs = INIT_FS; +static struct files_struct init_files = INIT_FILES; +static struct signal_struct init_signals = INIT_SIGNALS; +struct mm_struct init_mm = INIT_MM(init_mm); + +struct mm_struct ioremap_mm = { pgd : ioremap_dir + ,page_table_lock : SPIN_LOCK_UNLOCKED }; + +/* this is 16-byte aligned because it has a stack in it */ +union task_union __attribute((aligned(16))) init_task_union = { + INIT_TASK(init_task_union.task) +}; + +#ifdef CONFIG_SMP +struct current_set_struct current_set[NR_CPUS] = {{&init_task, 0}, }; +#endif + +char *sysmap = NULL; +unsigned long sysmap_size = 0; + +extern char __toc_start; + +#undef SHOW_TASK_SWITCHES +#undef CHECK_STACK + +#if defined(CHECK_STACK) +unsigned long +kernel_stack_top(struct task_struct *tsk) +{ + return ((unsigned long)tsk) + sizeof(union task_union); +} + +unsigned long +task_top(struct task_struct *tsk) +{ + return ((unsigned long)tsk) + sizeof(struct task_struct); +} + +/* check to make sure the kernel stack is healthy */ +int check_stack(struct task_struct *tsk) +{ + unsigned long stack_top = kernel_stack_top(tsk); + unsigned long tsk_top = task_top(tsk); + int ret = 0; + +#if 0 + /* check thread magic */ + if ( tsk->thread.magic != THREAD_MAGIC ) + { + ret |= 1; + printk("thread.magic bad: %08x\n", tsk->thread.magic); + } +#endif + + if ( !tsk ) + printk("check_stack(): tsk bad tsk %p\n",tsk); + + /* check if stored ksp is bad */ + if ( (tsk->thread.ksp > stack_top) || (tsk->thread.ksp < tsk_top) ) + { + printk("stack out of bounds: %s/%d\n" + " tsk_top %08lx ksp %08lx stack_top %08lx\n", + tsk->comm,tsk->pid, + tsk_top, tsk->thread.ksp, stack_top); + ret |= 2; + } + + /* check if stack ptr RIGHT NOW is bad */ + if ( (tsk == current) && ((_get_SP() > stack_top ) || (_get_SP() < tsk_top)) ) + { + printk("current stack ptr out of bounds: %s/%d\n" + " tsk_top %08lx sp %08lx stack_top %08lx\n", + current->comm,current->pid, + tsk_top, _get_SP(), stack_top); + ret |= 4; + } + +#if 0 + /* check amount of free stack */ + for ( i = (unsigned long *)task_top(tsk) ; i < kernel_stack_top(tsk) ; i++ ) + { + if ( !i ) + printk("check_stack(): i = %p\n", i); + if ( *i != 0 ) + { + /* only notify if it's less than 900 bytes */ + if ( (i - (unsigned long *)task_top(tsk)) < 900 ) + printk("%d bytes free on stack\n", + i - task_top(tsk)); + break; + } + } +#endif + + if (ret) + { + panic("bad kernel stack"); + } + return(ret); +} +#endif /* defined(CHECK_STACK) */ + +void +enable_kernel_fp(void) +{ +#ifdef CONFIG_SMP + if (current->thread.regs && (current->thread.regs->msr & MSR_FP)) + giveup_fpu(current); + else + giveup_fpu(NULL); /* just enables FP for kernel */ +#else + giveup_fpu(last_task_used_math); +#endif /* CONFIG_SMP */ +} + +int +dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpregs) +{ + if (regs->msr & MSR_FP) + giveup_fpu(current); + memcpy(fpregs, ¤t->thread.fpr[0], sizeof(*fpregs)); + return 1; +} + +void +_switch_to(struct task_struct *prev, struct task_struct *new, + struct task_struct **last) +{ + struct thread_struct *new_thread, *old_thread; + unsigned long s; + + __save_flags(s); + __cli(); +#if CHECK_STACK + check_stack(prev); + check_stack(new); +#endif + +#ifdef SHOW_TASK_SWITCHES + printk("%s/%d -> %s/%d NIP %08lx cpu %d root %x/%x\n", + prev->comm,prev->pid, + new->comm,new->pid,new->thread.regs->nip,new->processor, + new->fs->root,prev->fs->root); +#endif +#ifdef CONFIG_SMP + /* avoid complexity of lazy save/restore of fpu + * by just saving it every time we switch out if + * this task used the fpu during the last quantum. + * + * If it tries to use the fpu again, it'll trap and + * reload its fp regs. So we don't have to do a restore + * every switch, just a save. + * -- Cort + */ + if ( prev->thread.regs && (prev->thread.regs->msr & MSR_FP) ) + giveup_fpu(prev); + + /* prev->last_processor = prev->processor; */ + current_set[smp_processor_id()].task = new; +#endif /* CONFIG_SMP */ + new_thread = &new->thread; + old_thread = ¤t->thread; + *last = _switch(old_thread, new_thread); + __restore_flags(s); +} + +void show_regs(struct pt_regs * regs) +{ + int i; + + printk("NIP: %016lX XER: %016lX LR: %016lX REGS: %p TRAP: %04lx %s\n", + regs->nip, regs->xer, regs->link, regs,regs->trap, print_tainted()); + printk("MSR: %016lx EE: %01x PR: %01x FP: %01x ME: %01x IR/DR: %01x%01x\n", + regs->msr, regs->msr&MSR_EE ? 1 : 0, regs->msr&MSR_PR ? 1 : 0, + regs->msr & MSR_FP ? 1 : 0,regs->msr&MSR_ME ? 1 : 0, + regs->msr&MSR_IR ? 1 : 0, + regs->msr&MSR_DR ? 1 : 0); + printk("TASK = %p[%d] '%s' ", + current, current->pid, current->comm); + printk("Last syscall: %ld ", current->thread.last_syscall); + printk("\nlast math %p ", last_task_used_math); + +#ifdef CONFIG_SMP + /* printk(" CPU: %d last CPU: %d", current->processor,current->last_processor); */ +#endif /* CONFIG_SMP */ + + printk("\n"); + for (i = 0; i < 32; i++) + { + long r; + if ((i % 4) == 0) + { + printk("GPR%02d: ", i); + } + + if ( __get_user(r, &(regs->gpr[i])) ) + return; + + printk("%016lX ", r); + if ((i % 4) == 3) + { + printk("\n"); + } + } +} + +void exit_thread(void) +{ + if (last_task_used_math == current) + last_task_used_math = NULL; +} + +void flush_thread(void) +{ + if (last_task_used_math == current) + last_task_used_math = NULL; +} + +void +release_thread(struct task_struct *t) +{ +} + +/* + * Copy a thread.. + */ +int +copy_thread(int nr, unsigned long clone_flags, unsigned long usp, + unsigned long unused, + struct task_struct * p, struct pt_regs * regs) +{ + unsigned long msr; + struct pt_regs * childregs, *kregs; + extern void ret_from_fork(void); + + /* Copy registers */ + childregs = ((struct pt_regs *) + ((unsigned long)p + sizeof(union task_union) + - STACK_FRAME_OVERHEAD)) - 2; + *childregs = *regs; + childregs->gpr[3] = 0; /* Result from fork() */ + p->thread.regs = childregs; + p->thread.ksp = (unsigned long) childregs - STACK_FRAME_OVERHEAD; + p->thread.ksp -= sizeof(struct pt_regs ) + STACK_FRAME_OVERHEAD; + kregs = (struct pt_regs *)(p->thread.ksp + STACK_FRAME_OVERHEAD); + /* The PPC64 compiler makes use of a TOC to contain function + * pointers. The function (ret_from_except) is actually a pointer + * to the TOC entry. The first entry is a pointer to the actual + * function. + */ + kregs->nip = *((unsigned long *)ret_from_fork); + asm volatile("mfmsr %0" : "=r" (msr):); + kregs->msr = msr; + kregs->gpr[1] = (unsigned long)childregs - STACK_FRAME_OVERHEAD; + kregs->gpr[2] = (((unsigned long)&__toc_start) + 0x8000); + + if (usp >= (unsigned long) regs) { + /* Stack is in kernel space - must adjust */ + childregs->gpr[1] = (unsigned long)(childregs + 1); + *((unsigned long *) childregs->gpr[1]) = 0; + childregs->gpr[13] = (unsigned long) p; + } else { + /* Provided stack is in user space */ + childregs->gpr[1] = usp; + } + p->thread.last_syscall = -1; + + /* + * copy fpu info - assume lazy fpu switch now always + * -- Cort + */ + if (regs->msr & MSR_FP) { + giveup_fpu(current); + childregs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1); + } + memcpy(&p->thread.fpr, ¤t->thread.fpr, sizeof(p->thread.fpr)); + p->thread.fpscr = current->thread.fpscr; + + return 0; +} + +/* + * Set up a thread for executing a new program + */ +void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp) +{ + /* NIP is *really* a pointer to the function descriptor for + * the elf _start routine. The first entry in the function + * descriptor is the entry address of _start and the second + * entry is the TOC value we need to use. + */ + unsigned long *entry = (unsigned long *)nip; + unsigned long *toc = entry + 1; + + set_fs(USER_DS); + memset(regs->gpr, 0, sizeof(regs->gpr)); + memset(®s->ctr, 0, 4 * sizeof(regs->ctr)); + __get_user(regs->nip, entry); + regs->gpr[1] = sp; + __get_user(regs->gpr[2], toc); + regs->msr = MSR_USER64; + if (last_task_used_math == current) + last_task_used_math = 0; + current->thread.fpscr = 0; +} + +asmlinkage int sys_clone(int p1, int p2, int p3, int p4, int p5, int p6, + struct pt_regs *regs) +{ + unsigned long clone_flags = p1; + int res; + + PPCDBG(PPCDBG_SYS64, "sys_clone - entered - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + + res = do_fork(clone_flags, regs->gpr[1], regs, 0); +#ifdef CONFIG_SMP + /* When we clone the idle task we keep the same pid but + * the return value of 0 for both causes problems. + * -- Cort + */ + if ((current->pid == 0) && (current == &init_task)) + res = 1; +#endif /* CONFIG_SMP */ + + PPCDBG(PPCDBG_SYS64, "sys_clone - exited - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + + return res; +} + +asmlinkage int sys_fork(int p1, int p2, int p3, int p4, int p5, int p6, + struct pt_regs *regs) +{ + int res; + + PPCDBG(PPCDBG_SYS64, "sys_fork - entered - pid=%ld comm=%s \n", current->pid, current->comm); + + res = do_fork(SIGCHLD, regs->gpr[1], regs, 0); + +#ifdef CONFIG_SMP + /* When we clone the idle task we keep the same pid but + * the return value of 0 for both causes problems. + * -- Cort + */ + if ((current->pid == 0) && (current == &init_task)) + res = 1; +#endif /* CONFIG_SMP */ + + PPCDBG(PPCDBG_SYS64, "sys_fork - exited - pid=%ld comm=%s \n", current->pid, current->comm); + + return res; +} + +asmlinkage int sys_vfork(int p1, int p2, int p3, int p4, int p5, int p6, + struct pt_regs *regs) +{ + PPCDBG(PPCDBG_SYS64, "sys_vfork - running - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->gpr[1], regs, 0); +} + +asmlinkage int sys_execve(unsigned long a0, unsigned long a1, unsigned long a2, + unsigned long a3, unsigned long a4, unsigned long a5, + struct pt_regs *regs) +{ + int error; + char * filename; + + PPCDBG(PPCDBG_SYS64, "sys_execve - entered - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + + filename = getname((char *) a0); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + goto out; + if (regs->msr & MSR_FP) + giveup_fpu(current); + + PPCDBG(PPCDBG_SYS64, "sys_execve - before do_execve : filename = %s\n", filename); + + error = do_execve(filename, (char **) a1, (char **) a2, regs); + + if (error == 0) + current->ptrace &= ~PT_DTRACE; + putname(filename); + + out: + PPCDBG(PPCDBG_SYS64, "sys_execve - exited - pid=%ld current=%lx comm=%s error = %lx\n", current->pid, current, current->comm, error); + + return error; +} + +struct task_struct * alloc_task_struct(void) +{ + struct task_struct * new_task_ptr; + + new_task_ptr = ((struct task_struct *) + __get_free_pages(GFP_KERNEL, get_order(THREAD_SIZE))); + + return new_task_ptr; +} + +void free_task_struct(struct task_struct * task_ptr) +{ + free_pages((unsigned long)(task_ptr), get_order(THREAD_SIZE)); +} + +void initialize_paca_hardware_interrupt_stack(void) +{ + extern struct Naca *naca; + + int i; + unsigned long stack; + unsigned long end_of_stack =0; + + for (i=1; i < naca->processorCount; i++) { + /* Carve out storage for the hardware interrupt stack */ + stack = __get_free_pages(GFP_KERNEL, get_order(8*PAGE_SIZE)); + + if ( !stack ) { + printk("ERROR, cannot find space for hardware stack.\n"); + panic(" no hardware stack "); + } + + + /* Store the stack value in the PACA for the processor */ + xPaca[i].xHrdIntStack = stack + (8*PAGE_SIZE) - STACK_FRAME_OVERHEAD; + xPaca[i].xHrdIntCount = 0; + + } + + /* + * __get_free_pages() might give us a page > KERNBASE+256M which + * is mapped with large ptes so we can't set up the guard page. + */ + if (__is_processor(PV_POWER4)) + return; + + for (i=0; i < naca->processorCount; i++) { + /* set page at the top of stack to be protected - prevent overflow */ + end_of_stack = xPaca[i].xHrdIntStack - (8*PAGE_SIZE - STACK_FRAME_OVERHEAD); + ppc_md.hpte_updateboltedpp(PP_RXRX,end_of_stack); + } +} + +extern char _stext[], _etext[]; + +char * ppc_find_proc_name( unsigned * p, char * buf, unsigned buflen ) +{ + unsigned long tb_flags; + unsigned short name_len; + unsigned long tb_start, code_start, code_ptr, code_offset; + unsigned code_len; + strcpy( buf, "Unknown" ); + code_ptr = (unsigned long)p; + code_offset = 0; + if ( ( (unsigned long)p >= (unsigned long)_stext ) && ( (unsigned long)p <= (unsigned long)_etext ) ) { + while ( (unsigned long)p <= (unsigned long)_etext ) { + if ( *p == 0 ) { + tb_start = (unsigned long)p; + ++p; /* Point to traceback flags */ + tb_flags = *((unsigned long *)p); + p += 2; /* Skip over traceback flags */ + if ( tb_flags & TB_NAME_PRESENT ) { + if ( tb_flags & TB_PARMINFO ) + ++p; /* skip over parminfo data */ + if ( tb_flags & TB_HAS_TBOFF ) { + code_len = *p; /* get code length */ + code_start = tb_start - code_len; + code_offset = code_ptr - code_start + 1; + if ( code_offset > 0x100000 ) + break; + ++p; /* skip over code size */ + } + name_len = *((unsigned short *)p); + if ( name_len > (buflen-20) ) + name_len = buflen-20; + memcpy( buf, ((char *)p)+2, name_len ); + buf[name_len] = 0; + if ( code_offset ) + sprintf( buf+name_len, "+0x%lx", code_offset-1 ); + } + break; + } + ++p; + } + } + return buf; +} + +void +print_backtrace(unsigned long *sp) +{ + int cnt = 0; + unsigned long i; + char name_buf[256]; + + printk("Call backtrace: \n"); + while (sp) { + if (__get_user( i, &sp[2] )) + break; + printk("%016lX ", i); + printk("%s\n", ppc_find_proc_name( (unsigned *)i, name_buf, 256 )); + if (cnt > 32) break; + if (__get_user(sp, (unsigned long **)sp)) + break; + } + printk("\n"); +} + +/* + * These bracket the sleeping functions.. + */ +extern void scheduling_functions_start_here(void); +extern void scheduling_functions_end_here(void); +#define first_sched (*(unsigned long *)scheduling_functions_start_here) +#define last_sched (*(unsigned long *)scheduling_functions_end_here) + +unsigned long get_wchan(struct task_struct *p) +{ + unsigned long ip, sp; + unsigned long stack_page = (unsigned long)p; + int count = 0; + if (!p || p == current || p->state == TASK_RUNNING) + return 0; + sp = p->thread.ksp; + do { + sp = *(unsigned long *)sp; + if (sp < (stack_page + (2 * PAGE_SIZE)) || + sp >= (stack_page + (THREAD_SIZE * PAGE_SIZE))) + return 0; + if (count > 0) { + ip = *(unsigned long *)(sp + 16); + if (ip < first_sched || ip >= last_sched) + return (ip & 0xFFFFFFFF); + } + } while (count++ < 16); + return 0; +} + +void show_trace_task(struct task_struct *p) +{ + unsigned long ip, sp; + unsigned long stack_page = (unsigned long)p; + int count = 0; + + if (!p) + return; + + printk("Call Trace: "); + sp = p->thread.ksp; + do { + sp = *(unsigned long *)sp; + if (sp < (stack_page + (2 * PAGE_SIZE)) || + sp >= (stack_page + (THREAD_SIZE * PAGE_SIZE))) + break; + if (count > 0) { + ip = *(unsigned long *)(sp + 16); + printk("[%016lx] ", ip); + } + } while (count++ < 16); + printk("\n"); +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/prom.c linuxppc64_2_4/arch/ppc64/kernel/prom.c --- ../kernel.org/linux/arch/ppc64/kernel/prom.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/prom.c Sun Nov 4 00:02:18 2001 @@ -0,0 +1,2239 @@ +/* + * + * + * Procedures for interfacing to Open Firmware. + * + * Paul Mackerras August 1996. + * Copyright (C) 1996 Paul Mackerras. + * + * Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner. + * {engebret|bergner}@us.ibm.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#if 0 +#define DEBUG_YABOOT +#endif + +#if 0 +#define DEBUG_PROM +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef DEBUG_YABOOT +#define call_yaboot(FUNC,...) \ + do { \ + if (FUNC) { \ + struct prom_t *_prom = PTRRELOC(&prom); \ + unsigned long prom_entry = _prom->entry;\ + _prom->entry = (unsigned long)(FUNC); \ + enter_prom(__VA_ARGS__); \ + _prom->entry = prom_entry; \ + } \ + } while (0) +#else +#define call_yaboot(FUNC,...) do { ; } while (0) +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "open_pic.h" +#include +#include + +#ifdef CONFIG_FB +#include +#endif + +extern char _end[]; + +/* + * prom_init() is called very early on, before the kernel text + * and data have been mapped to KERNELBASE. At this point the code + * is running at whatever address it has been loaded at, so + * references to extern and static variables must be relocated + * explicitly. The procedure reloc_offset() returns the address + * we're currently running at minus the address we were linked at. + * (Note that strings count as static variables.) + * + * Because OF may have mapped I/O devices into the area starting at + * KERNELBASE, particularly on CHRP machines, we can't safely call + * OF once the kernel has been mapped to KERNELBASE. Therefore all + * OF calls should be done within prom_init(), and prom_init() + * and all routines called within it must be careful to relocate + * references as necessary. + * + * Note that the bss is cleared *after* prom_init runs, so we have + * to make sure that any static or extern variables it accesses + * are put in the data segment. + */ + + +#define PROM_BUG() do { \ + prom_print(RELOC("kernel BUG at ")); \ + prom_print(RELOC(__FILE__)); \ + prom_print(RELOC(":")); \ + prom_print_hex(__LINE__); \ + prom_print(RELOC("!\n")); \ + __asm__ __volatile__(".long " BUG_ILLEGAL_INSTR); \ +} while (0) + + + +struct pci_reg_property { + struct pci_address addr; + u32 size_hi; + u32 size_lo; +}; + + +struct isa_reg_property { + u32 space; + u32 address; + u32 size; +}; + +struct pci_intr_map { + struct pci_address addr; + u32 dunno; + phandle int_ctrler; + u32 intr; +}; + + +typedef unsigned long interpret_func(struct device_node *, unsigned long, + int, int); +#if 0 +static interpret_func interpret_pci_props; +#endif +static unsigned long interpret_pci_props(struct device_node *, unsigned long, + int, int); + +static interpret_func interpret_isa_props; +static interpret_func interpret_root_props; + +#ifndef FB_MAX /* avoid pulling in all of the fb stuff */ +#define FB_MAX 8 +#endif + + +struct prom_t prom = { + 0, /* entry */ + 0, /* chosen */ + 0, /* cpu */ + 0, /* stdout */ + 0, /* disp_node */ + {0,0,0,{0},NULL}, /* args */ + 0, /* version */ + 32, /* encode_phys_size */ + 0 /* bi_rec pointer */ +#ifdef DEBUG_YABOOT + ,NULL /* yaboot */ +#endif +}; + + +char *prom_display_paths[FB_MAX] __initdata = { 0, }; +unsigned int prom_num_displays = 0; +char *of_stdout_device = 0; + +extern struct rtas_t rtas; +extern unsigned long klimit; +extern unsigned long embedded_sysmap_end; +extern struct Naca *naca; +extern struct lmb lmb; +#ifdef CONFIG_MSCHUNKS +extern struct msChunks msChunks; +#endif /* CONFIG_MSCHUNKS */ + +#define MAX_PHB 16 * 3 // 16 Towers * 3 PHBs/tower +struct _of_tce_table of_tce_table[MAX_PHB + 1] = {{0, 0, 0}}; + +char *bootpath = 0; +char *bootdevice = 0; + +struct device_node *allnodes = 0; + +#define UNDEFINED_IRQ 0xffff +unsigned short real_irq_to_virt_map[NR_HW_IRQS]; +unsigned short virt_irq_to_real_map[NR_IRQS]; +int last_virt_irq = 2; /* index of last virt_irq. Skip through IPI */ + +static unsigned long call_prom(const char *service, int nargs, int nret, ...); +static void prom_exit(void); +static unsigned long copy_device_tree(unsigned long); +static unsigned long inspect_node(phandle, struct device_node *, unsigned long, + unsigned long, struct device_node ***); +static unsigned long finish_node(struct device_node *, unsigned long, + interpret_func *, int, int); +static unsigned long finish_node_interrupts(struct device_node *, unsigned long); +static unsigned long check_display(unsigned long); +static int prom_next_node(phandle *); +static struct bi_record * prom_bi_rec_verify(struct bi_record *); +static unsigned long prom_bi_rec_reserve(unsigned long); +static struct device_node *find_phandle(phandle); + +#ifdef CONFIG_MSCHUNKS +static unsigned long prom_initialize_mschunks(unsigned long); +#endif /* CONFIG_MSCHUNKS */ + +extern unsigned long reloc_offset(void); + +extern void enter_prom(void *dummy,...); + +void cacheable_memzero(void *, unsigned int); + +extern char cmd_line[512]; /* XXX */ +unsigned long dev_tree_size; + +#ifdef CONFIG_HMT +struct { + unsigned int pir; + unsigned int threadid; +} hmt_thread_data[NR_CPUS] = {0}; +#endif /* CONFIG_HMT */ + +char testString[] = "LINUX\n"; + + +/* This is the one and *ONLY* place where we actually call open + * firmware from, since we need to make sure we're running in 32b + * mode when we do. We switch back to 64b mode upon return. + */ + +static unsigned long __init +call_prom(const char *service, int nargs, int nret, ...) +{ + int i; + unsigned long offset = reloc_offset(); + struct prom_t *_prom = PTRRELOC(&prom); + va_list list; + + _prom->args.service = (u32)LONG_LSW(service); + _prom->args.nargs = nargs; + _prom->args.nret = nret; + _prom->args.rets = (prom_arg_t *)&(_prom->args.args[nargs]); + + va_start(list, nret); + for (i=0; i < nargs ;i++) + _prom->args.args[i] = (prom_arg_t)LONG_LSW(va_arg(list, unsigned long)); + va_end(list); + + for (i=0; i < nret ;i++) + _prom->args.rets[i] = 0; + + enter_prom(&_prom->args); + + return (unsigned long)((nret > 0) ? _prom->args.rets[0] : 0); +} + + +static void __init +prom_exit() +{ + unsigned long offset = reloc_offset(); + + call_prom(RELOC("exit"), 0, 0); + + for (;;) /* should never get here */ + ; +} + +void __init +prom_enter(void) +{ + unsigned long offset = reloc_offset(); + + call_prom(RELOC("enter"), 0, 0); +} + + +void __init +prom_print(const char *msg) +{ + const char *p, *q; + unsigned long offset = reloc_offset(); + struct prom_t *_prom = PTRRELOC(&prom); + + if (_prom->stdout == 0) + return; + + for (p = msg; *p != 0; p = q) { + for (q = p; *q != 0 && *q != '\n'; ++q) + ; + if (q > p) + call_prom(RELOC("write"), 3, 1, _prom->stdout, + p, q - p); + if (*q != 0) { + ++q; + call_prom(RELOC("write"), 3, 1, _prom->stdout, + RELOC("\r\n"), 2); + } + } +} + +void +prom_print_hex(unsigned long val) +{ + int i, nibbles = sizeof(val)*2; + char buf[sizeof(val)*2+1]; + + for (i = nibbles-1; i >= 0; i--) { + buf[i] = (val & 0xf) + '0'; + if (buf[i] > '9') + buf[i] += ('a'-'0'-10); + val >>= 4; + } + buf[nibbles] = '\0'; + prom_print(buf); +} + +void +prom_print_nl(void) +{ + unsigned long offset = reloc_offset(); + prom_print(RELOC("\n")); +} + + +static unsigned long +prom_initialize_naca(unsigned long mem) +{ + phandle node; + char type[64]; + unsigned long num_cpus = 0; + unsigned long offset = reloc_offset(); + struct prom_t *_prom = PTRRELOC(&prom); + struct Naca *_naca = RELOC(naca); + +#ifdef DEBUG_PROM + prom_print(RELOC("prom_initialize_naca: start...\n")); +#endif + + _naca->pftSize = 0; /* ilog2 of htab size. computed below. */ + + for (node = 0; prom_next_node(&node); ) { + type[0] = 0; + call_prom(RELOC("getprop"), 4, 1, node, RELOC("device_type"), + type, sizeof(type)); + + if (!strcmp(type, RELOC("cpu"))) { + num_cpus += 1; + + /* We're assuming *all* of the CPUs have the same + * d-cache and i-cache sizes... -Peter + */ + if ( num_cpus == 1 ) { + u32 size; + + call_prom(RELOC("getprop"), 4, 1, node, + RELOC("d-cache-line-size"), + &size, sizeof(size)); + + _naca->dCacheL1LineSize = size; + _naca->dCacheL1LogLineSize = __ilog2(size); + _naca->dCacheL1LinesPerPage = PAGE_SIZE / size; + + call_prom(RELOC("getprop"), 4, 1, node, + RELOC("i-cache-line-size"), + &size, sizeof(size)); + + _naca->iCacheL1LineSize = size; + _naca->iCacheL1LogLineSize = __ilog2(size); + _naca->iCacheL1LinesPerPage = PAGE_SIZE / size; + + if (RELOC(_machine) == _MACH_pSeriesLP) { + u32 pft_size[2]; + call_prom(RELOC("getprop"), 4, 1, node, + RELOC("ibm,pft-size"), + &pft_size, sizeof(pft_size)); + /* pft_size[0] is the NUMA CEC cookie */ + _naca->pftSize = pft_size[1]; + } + } + } else if (!strcmp(type, RELOC("serial"))) { + phandle isa, pci; + struct isa_reg_property reg; + union pci_range ranges; + + type[0] = 0; + call_prom(RELOC("getprop"), 4, 1, node, + RELOC("ibm,aix-loc"), type, sizeof(type)); + + if (strcmp(type, RELOC("S1"))) + continue; + + call_prom(RELOC("getprop"), 4, 1, node, RELOC("reg"), + ®, sizeof(reg)); + + isa = call_prom(RELOC("parent"), 1, 1, node); + if (!isa) + PROM_BUG(); + pci = call_prom(RELOC("parent"), 1, 1, isa); + if (!pci) + PROM_BUG(); + + call_prom(RELOC("getprop"), 4, 1, pci, RELOC("ranges"), + &ranges, sizeof(ranges)); + + if ( _prom->encode_phys_size == 32 ) + _naca->serialPortAddr = ranges.pci32.phys+reg.address; + else { + _naca->serialPortAddr = + ((((unsigned long)ranges.pci64.phys_hi) << 32) | + (ranges.pci64.phys_lo)) + reg.address; + } + } + } + + _naca->interrupt_controller = IC_INVALID; + for (node = 0; prom_next_node(&node); ) { + type[0] = 0; + call_prom(RELOC("getprop"), 4, 1, node, RELOC("name"), + type, sizeof(type)); + if (strcmp(type, RELOC("interrupt-controller"))) { + continue; + } + call_prom(RELOC("getprop"), 4, 1, node, RELOC("compatible"), + type, sizeof(type)); + if (strstr(type, RELOC("open-pic"))) { + _naca->interrupt_controller = IC_OPEN_PIC; + } else if (strstr(type, RELOC("ppc-xicp"))) { + _naca->interrupt_controller = IC_PPC_XIC; + } else { + prom_print(RELOC("prom: failed to recognize interrupt-controller\n")); + } + break; + } + + if (_naca->interrupt_controller == IC_INVALID) { + prom_print(RELOC("prom: failed to find interrupt-controller\n")); + PROM_BUG(); + } + + /* We gotta have at least 1 cpu... */ + if ( (_naca->processorCount = num_cpus) < 1 ) + PROM_BUG(); + + _naca->physicalMemorySize = lmb_phys_mem_size(); + + if (RELOC(_machine) == _MACH_pSeries) { + unsigned long rnd_mem_size, pteg_count; + + /* round mem_size up to next power of 2 */ + rnd_mem_size = 1UL << __ilog2(_naca->physicalMemorySize); + if (rnd_mem_size < _naca->physicalMemorySize) + rnd_mem_size <<= 1; + + /* # pages / 2 */ + pteg_count = (rnd_mem_size >> (12 + 1)); + + _naca->pftSize = __ilog2(pteg_count << 7); + } + + if (_naca->pftSize == 0) { + prom_print(RELOC("prom: failed to compute pftSize!\n")); + PROM_BUG(); + } + + /* + * Hardcode to GP size. I am not sure where to get this info + * in general, as there does not appear to be a slb-size OF + * entry. At least in Condor and earlier. DRENG + */ + _naca->slb_size = 64; + +#ifdef DEBUG_PROM + prom_print(RELOC("naca->processorCount = 0x")); + prom_print_hex(_naca->processorCount); + prom_print_nl(); + + prom_print(RELOC("naca->physicalMemorySize = 0x")); + prom_print_hex(_naca->physicalMemorySize); + prom_print_nl(); + + prom_print(RELOC("naca->pftSize = 0x")); + prom_print_hex(_naca->pftSize); + prom_print_nl(); + + prom_print(RELOC("naca->dCacheL1LineSize = 0x")); + prom_print_hex(_naca->dCacheL1LineSize); + prom_print_nl(); + + prom_print(RELOC("naca->dCacheL1LogLineSize = 0x")); + prom_print_hex(_naca->dCacheL1LogLineSize); + prom_print_nl(); + + prom_print(RELOC("naca->dCacheL1LinesPerPage = 0x")); + prom_print_hex(_naca->dCacheL1LinesPerPage); + prom_print_nl(); + + prom_print(RELOC("naca->iCacheL1LineSize = 0x")); + prom_print_hex(_naca->iCacheL1LineSize); + prom_print_nl(); + + prom_print(RELOC("naca->iCacheL1LogLineSize = 0x")); + prom_print_hex(_naca->iCacheL1LogLineSize); + prom_print_nl(); + + prom_print(RELOC("naca->iCacheL1LinesPerPage = 0x")); + prom_print_hex(_naca->iCacheL1LinesPerPage); + prom_print_nl(); + + prom_print(RELOC("naca->serialPortAddr = 0x")); + prom_print_hex(_naca->serialPortAddr); + prom_print_nl(); + + prom_print(RELOC("naca->interrupt_controller = 0x")); + prom_print_hex(_naca->interrupt_controller); + prom_print_nl(); + + prom_print(RELOC("_machine = 0x")); + prom_print_hex(RELOC(_machine)); + prom_print_nl(); + + prom_print(RELOC("prom_initialize_naca: end...\n")); +#endif + + return mem; +} + + +static unsigned long __init +prom_initialize_lmb(unsigned long mem) +{ + phandle node; + char type[64]; + unsigned long i, offset = reloc_offset(); + struct prom_t *_prom = PTRRELOC(&prom); + union lmb_reg_property reg; + unsigned long mem_size, lmb_base, lmb_size; + unsigned long num_regs, bytes_per_reg = (_prom->encode_phys_size*2)/8; + +#ifdef CONFIG_MSCHUNKS +#if 1 + /* Fix me: 630 3G-4G IO hack here... -Peter (PPPBBB) */ + unsigned long io_base = 3UL<<30; + unsigned long io_size = 1UL<<30; + unsigned long have_630 = 1; /* assume we have a 630 */ + +#else + unsigned long io_base = ; + unsigned long io_size = ; +#endif +#endif + + lmb_init(); + + for (node = 0; prom_next_node(&node); ) { + type[0] = 0; + call_prom(RELOC("getprop"), 4, 1, node, RELOC("device_type"), + type, sizeof(type)); + + if (strcmp(type, RELOC("memory"))) + continue; + + num_regs = call_prom(RELOC("getprop"), 4, 1, node, RELOC("reg"), + ®, sizeof(reg)) / bytes_per_reg; + + for (i=0; i < num_regs ;i++) { + if (_prom->encode_phys_size == 32) { + lmb_base = reg.addr32[i].address; + lmb_size = reg.addr32[i].size; + } else { + lmb_base = reg.addr64[i].address; + lmb_size = reg.addr64[i].size; + } + +#ifdef CONFIG_MSCHUNKS + if ( lmb_addrs_overlap(lmb_base,lmb_size, + io_base,io_size) ) { + /* If we really have dram here, then we don't + * have a 630! -Peter + */ + have_630 = 0; + } +#endif + if ( lmb_add(lmb_base, lmb_size) < 0 ) + prom_print(RELOC("Too many LMB's, discarding this one...\n")); + else + mem_size =+ lmb_size; + } + + } + +#ifdef CONFIG_MSCHUNKS + if ( have_630 && lmb_addrs_overlap(0,mem_size,io_base,io_size) ) + lmb_add_io(io_base, io_size); +#endif + + lmb_analyze(); + +#ifdef CONFIG_MSCHUNKS + mem = prom_initialize_mschunks(mem); +#endif /* CONFIG_MSCHUNKS */ + + return mem; +} + + +static unsigned long __init +prom_instantiate_rtas(unsigned long mem) +{ + unsigned long offset = reloc_offset(); + struct prom_t *_prom = PTRRELOC(&prom); + struct rtas_t *_rtas = PTRRELOC(&rtas); + ihandle prom_rtas; + u32 getprop_rval; + +#ifdef DEBUG_PROM + prom_print(RELOC("prom_instantiate_rtas: start...\n")); +#endif + prom_rtas = (ihandle)call_prom(RELOC("finddevice"), 1, 1, RELOC("/rtas")); + if (prom_rtas != (ihandle) -1) { + char hypertas_funcs[1024]; + int rc; + + if ((rc = call_prom(RELOC("getprop"), + 4, 1, prom_rtas, + RELOC("ibm,hypertas-functions"), + hypertas_funcs, + sizeof(hypertas_funcs))) > 0) { + RELOC(_machine) = _MACH_pSeriesLP; + } + + call_prom(RELOC("getprop"), + 4, 1, prom_rtas, + RELOC("rtas-size"), + &getprop_rval, + sizeof(getprop_rval)); + _rtas->size = getprop_rval; + prom_print(RELOC("instantiating rtas")); + if (_rtas->size != 0) { + /* + * Ask OF for some space for RTAS. + * Actually OF has bugs so we just arbitrarily + * use memory at the 6MB point. + */ + // The new code... + mem = PAGE_ALIGN(mem); + _rtas->base = mem + offset - KERNELBASE; + + mem += _rtas->size; + prom_print(RELOC(" at 0x")); + prom_print_hex(_rtas->base); + + prom_rtas = (ihandle)call_prom(RELOC("open"), + 1, 1, RELOC("/rtas")); + prom_print(RELOC("...")); + + if ((long)call_prom(RELOC("call-method"), 3, 2, + RELOC("instantiate-rtas"), + prom_rtas, + _rtas->base) >= 0) { + _rtas->entry = (long)_prom->args.rets[1]; + } + } + + if (_rtas->entry <= 0) { + prom_print(RELOC(" failed\n")); + } else { + prom_print(RELOC(" done\n")); + } + +#ifdef DEBUG_PROM + prom_print(RELOC("rtas->base = 0x")); + prom_print_hex(_rtas->base); + prom_print_nl(); + prom_print(RELOC("rtas->entry = 0x")); + prom_print_hex(_rtas->entry); + prom_print_nl(); + prom_print(RELOC("rtas->size = 0x")); + prom_print_hex(_rtas->size); + prom_print_nl(); +#endif + } +#ifdef DEBUG_PROM + prom_print(RELOC("prom_instantiate_rtas: end...\n")); +#endif + + return mem; +} + +unsigned long prom_strtoul(const char *cp) +{ + unsigned long result = 0,value; + + while (*cp) { + value = *cp-'0'; + result = result*10 + value; + cp++; + } + + return result; +} + + +#ifdef CONFIG_MSCHUNKS +static unsigned long +prom_initialize_mschunks(unsigned long mem) +{ + unsigned long offset = reloc_offset(); + struct lmb *_lmb = PTRRELOC(&lmb); + struct msChunks *_msChunks = PTRRELOC(&msChunks); + unsigned long i, pchunk = 0; + unsigned long mem_size = _lmb->memory.size; + unsigned long chunk_size = _lmb->memory.lcd_size; + +#if 1 + /* Fix me: 630 3G-4G IO hack here... -Peter (PPPBBB) */ + unsigned long io_base = 3UL<<30; + unsigned long io_size = 1UL<<30; + + for (i=0; i < _lmb->memory.cnt ;i++) { + unsigned long base = _lmb->memory.region[i].base; + unsigned long size = _lmb->memory.region[i].size; + if ( lmb_addrs_overlap(base,size,io_base,io_size) ) { + /* If we really have dram here, then we don't + * have a 630! -Peter + */ + io_base = mem_size; + io_size = 1; + break; + } + } +#else + unsigned long io_base = ; + unsigned long io_size = ; +#endif + + if ( lmb_addrs_overlap(0,mem_size,io_base,io_size) ) { + lmb_add(io_base, io_size); + lmb_reserve(io_base, io_size); + } + + mem = msChunks_alloc(mem, mem_size / chunk_size, chunk_size); + + for (i=0; i < _lmb->memory.cnt ;i++) { + unsigned long base = _lmb->memory.region[i].base; + unsigned long size = _lmb->memory.region[i].size; + unsigned long achunk = addr_to_chunk(base); + unsigned long end_achunk = addr_to_chunk(base+size); + _lmb->memory.region[i].physbase = chunk_to_addr(pchunk); + for (; achunk < end_achunk ;) { + PTRRELOC(_msChunks->abs)[pchunk++] = achunk++; + } + } + + return mem; +} +#endif /* CONFIG_MSCHUNKS */ + +void +prom_initialize_tce_table(void) +{ + phandle node; + ihandle phb_node; + unsigned long offset = reloc_offset(); + char compatible[64], path[64], type[64]; + unsigned long i, table = 0; + unsigned long base, vbase, align; + unsigned int minalign, minsize; + struct _of_tce_table *prom_tce_table = RELOC(of_tce_table); + unsigned long tce_entry, *tce_entryp; + +#ifdef DEBUG_PROM + prom_print(RELOC("starting prom_initialize_tce_table\n")); +#endif + + /* Search all nodes looking for PHBs. */ + for (node = 0; prom_next_node(&node); ) { + compatible[0] = 0; + type[0] = 0; + call_prom(RELOC("getprop"), 4, 1, node, RELOC("compatible"), + compatible, sizeof(compatible)); + call_prom(RELOC("getprop"), 4, 1, node, RELOC("device_type"), + type, sizeof(type)); + + if ((compatible[0] == 0) || + ((strstr(compatible, RELOC("python")) == NULL) && + (strstr(compatible, RELOC("Speedwagon")) == NULL))) { + continue; + } + if ((type[0] == 0) || (strstr(type, RELOC("pci")) == NULL)) { + continue; + } + + if (call_prom(RELOC("getprop"), 4, 1, node, + RELOC("tce-table-minalign"), &minalign, + sizeof(minalign)) < 0) { + minalign = 0; + } + + if (call_prom(RELOC("getprop"), 4, 1, node, + RELOC("tce-table-minsize"), &minsize, + sizeof(minsize)) < 0) { + minsize = 4UL << 20; + } + + /* Even though we read what OF wants, we just set the table + * size to 4 MB. This is enough to map 2GB of PCI DMA space. + * By doing this, we avoid the pitfalls of trying to DMA to + * MMIO space and the DMA alias hole. + */ + minsize = 4UL << 20; + + /* Align to the greater of the align or size */ + align = (minalign < minsize) ? minsize : minalign; + + /* Carve out storage for the TCE table. */ + base = lmb_alloc(minsize, align); + + if ( !base ) { + prom_print(RELOC("ERROR, cannot find space for TCE table.\n")); + prom_exit(); + } + + vbase = absolute_to_virt(base); + + /* Save away the TCE table attributes for later use. */ + prom_tce_table[table].node = node; + prom_tce_table[table].base = vbase; + prom_tce_table[table].size = minsize; + +#ifdef DEBUG_PROM + prom_print(RELOC("TCE table: 0x")); + prom_print_hex(table); + prom_print_nl(); + + prom_print(RELOC("\tnode = 0x")); + prom_print_hex(node); + prom_print_nl(); + + prom_print(RELOC("\tbase = 0x")); + prom_print_hex(vbase); + prom_print_nl(); + + prom_print(RELOC("\tsize = 0x")); + prom_print_hex(minsize); + prom_print_nl(); +#endif + + /* Initialize the table to have a one-to-one mapping + * over the allocated size. + */ + tce_entryp = (unsigned long *)base; + for (i = 0; i < (minsize >> 3) ;tce_entryp++, i++) { + tce_entry = (i << PAGE_SHIFT); + tce_entry |= 0x3; + *tce_entryp = tce_entry; + } + + /* Call OF to setup the TCE hardware */ + if (call_prom(RELOC("package-to-path"), 3, 1, node, + path, 255) <= 0) { + prom_print(RELOC("package-to-path failed\n")); + } else { + prom_print(RELOC("opened ")); + prom_print(path); + prom_print_nl(); + } + + phb_node = (ihandle)call_prom(RELOC("open"), 1, 1, path); + if ( (long)phb_node <= 0) { + prom_print(RELOC("open failed\n")); + } else { + prom_print(RELOC("open success\n")); + } + call_prom(RELOC("call-method"), 6, 0, + RELOC("set-64-bit-addressing"), + phb_node, + -1, + minsize, + base & 0xffffffff, + (base >> 32) & 0xffffffff); + call_prom(RELOC("close"), 1, 0, phb_node); + + table++; + } + + /* Flag the first invalid entry */ + prom_tce_table[table].node = 0; +#ifdef DEBUG_PROM + prom_print(RELOC("ending prom_initialize_tce_table\n")); +#endif +} + +/* + * With CHRP SMP we need to use the OF to start the other + * processors so we can't wait until smp_boot_cpus (the OF is + * trashed by then) so we have to put the processors into + * a holding pattern controlled by the kernel (not OF) before + * we destroy the OF. + * + * This uses a chunk of low memory, puts some holding pattern + * code there and sends the other processors off to there until + * smp_boot_cpus tells them to do something. The holding pattern + * checks that address until its cpu # is there, when it is that + * cpu jumps to __secondary_start(). smp_boot_cpus() takes care + * of setting those values. + * + * We also use physical address 0x4 here to tell when a cpu + * is in its holding pattern code. + * + * Fixup comment... DRENG / PPPBBB - Peter + * + * -- Cort + */ +static void +prom_hold_cpus(unsigned long mem) +{ + unsigned long i; + unsigned int reg; + phandle node; + unsigned long offset = reloc_offset(); + char type[64], *path; + int cpuid = 0; + extern void __secondary_hold(void); + extern unsigned long __secondary_hold_spinloop; + extern unsigned long __secondary_hold_acknowledge; + unsigned long *spinloop = __v2a(&__secondary_hold_spinloop); + unsigned long *acknowledge = __v2a(&__secondary_hold_acknowledge); + unsigned long secondary_hold = (unsigned long)__v2a(*PTRRELOC((unsigned long *)__secondary_hold)); + struct Naca *_naca = RELOC(naca); + struct Paca *_xPaca = PTRRELOC(&xPaca[0]); + struct prom_t *_prom = PTRRELOC(&prom); + + /* Initially, we must have one active CPU. */ + _naca->processorCount = 1; + +#ifdef DEBUG_PROM + prom_print(RELOC("prom_hold_cpus: start...\n")); + prom_print(RELOC(" 1) spinloop = 0x")); + prom_print_hex(spinloop); + prom_print_nl(); + prom_print(RELOC(" 1) *spinloop = 0x")); + prom_print_hex(*spinloop); + prom_print_nl(); + prom_print(RELOC(" 1) acknowledge = 0x")); + prom_print_hex(acknowledge); + prom_print_nl(); + prom_print(RELOC(" 1) *acknowledge = 0x")); + prom_print_hex(*acknowledge); + prom_print_nl(); + prom_print(RELOC(" 1) secondary_hold = 0x")); + prom_print_hex(secondary_hold); + prom_print_nl(); +#endif + + /* Set the common spinloop variable, so all of the secondary cpus + * will block when they are awakened from their OF spinloop. + * This must occur for both SMP and non SMP kernels, since OF will + * be trashed when we move the kernel. + */ + *spinloop = 0; + +#ifdef CONFIG_HMT + for (i=0; i < NR_CPUS; i++) { + RELOC(hmt_thread_data)[i].pir = 0xdeadbeef; + } +#endif + /* look for cpus */ + for (node = 0; prom_next_node(&node); ) { + type[0] = 0; + call_prom(RELOC("getprop"), 4, 1, node, RELOC("device_type"), + type, sizeof(type)); + if (strcmp(type, RELOC("cpu")) != 0) + continue; + + /* Skip non-configured cpus. */ + call_prom(RELOC("getprop"), 4, 1, node, RELOC("status"), + type, sizeof(type)); + if (strcmp(type, RELOC("okay")) != 0) + continue; + + reg = -1; + call_prom(RELOC("getprop"), 4, 1, node, RELOC("reg"), + ®, sizeof(reg)); + + /* Only need to start secondary procs, not ourself. */ + if ( reg == _prom->cpu ) + continue; + + path = (char *) mem; + memset(path, 0, 256); + if ((long) call_prom(RELOC("package-to-path"), 3, 1, + node, path, 255) < 0) + continue; + + cpuid++; + +#ifdef DEBUG_PROM + prom_print_nl(); + prom_print(RELOC("cpuid = 0x")); + prom_print_hex(cpuid); + prom_print_nl(); + prom_print(RELOC("cpu hw idx = 0x")); + prom_print_hex(reg); + prom_print_nl(); +#endif + _xPaca[cpuid].xHwProcNum = reg; + + prom_print(RELOC("starting cpu ")); + prom_print(path); + + /* Init the acknowledge var which will be reset by + * the secondary cpu when it awakens from its OF + * spinloop. + */ + *acknowledge = (unsigned long)-1; + +#ifdef DEBUG_PROM + prom_print(RELOC(" 3) spinloop = 0x")); + prom_print_hex(spinloop); + prom_print_nl(); + prom_print(RELOC(" 3) *spinloop = 0x")); + prom_print_hex(*spinloop); + prom_print_nl(); + prom_print(RELOC(" 3) acknowledge = 0x")); + prom_print_hex(acknowledge); + prom_print_nl(); + prom_print(RELOC(" 3) *acknowledge = 0x")); + prom_print_hex(*acknowledge); + prom_print_nl(); + prom_print(RELOC(" 3) secondary_hold = 0x")); + prom_print_hex(secondary_hold); + prom_print_nl(); + prom_print(RELOC(" 3) cpuid = 0x")); + prom_print_hex(cpuid); + prom_print_nl(); +#endif + call_prom(RELOC("start-cpu"), 3, 0, node, secondary_hold, cpuid); + prom_print(RELOC("...")); + for ( i = 0 ; (i < 100000000) && + (*acknowledge == ((unsigned long)-1)); i++ ) ; +#ifdef DEBUG_PROM + { + unsigned long *p = 0x0; + prom_print(RELOC(" 4) 0x0 = 0x")); + prom_print_hex(*p); + prom_print_nl(); + } +#endif + if (*acknowledge == cpuid) { + prom_print(RELOC("ok\n")); + /* Set the number of active processors. */ + _naca->processorCount++; + } else { + prom_print(RELOC("failed: ")); + prom_print_hex(*acknowledge); + prom_print_nl(); + } + } +#ifdef CONFIG_HMT + /* Only enable HMT on processors that provide support. */ + if (__is_processor(PV_PULSAR) || + __is_processor(PV_ICESTAR) || + __is_processor(PV_SSTAR)) { + prom_print(RELOC(" starting secondary threads\n")); + + for (i=0; i < _naca->processorCount ;i++) { + unsigned long threadid = _naca->processorCount*2-1-i; + + if (i == 0) { + unsigned long pir = _get_PIR(); + if (__is_processor(PV_PULSAR)) { + RELOC(hmt_thread_data)[i].pir = + pir & 0x1f; + } else { + RELOC(hmt_thread_data)[i].pir = + pir & 0x3ff; + } + } + + RELOC(hmt_thread_data)[i].threadid = threadid; +#ifdef DEBUG_PROM + prom_print(RELOC(" cpuid 0x")); + prom_print_hex(i); + prom_print(RELOC(" maps to threadid 0x")); + prom_print_hex(threadid); + prom_print_nl(); + prom_print(RELOC(" pir 0x")); + prom_print_hex(RELOC(hmt_thread_data)[i].pir); + prom_print_nl(); +#endif + _xPaca[threadid].xHwProcNum = _xPaca[i].xHwProcNum+1; + } + _naca->processorCount *= 2; + } else { + prom_print(RELOC("Processor is not HMT capable\n")); + } +#endif + +#ifdef DEBUG_PROM + prom_print(RELOC("prom_hold_cpus: end...\n")); +#endif +} + + +/* + * We enter here early on, when the Open Firmware prom is still + * handling exceptions and the MMU hash table for us. + */ + +unsigned long __init +prom_init(unsigned long r3, unsigned long r4, unsigned long pp, + unsigned long r6, unsigned long r7, yaboot_debug_t *yaboot) +{ + int chrp = 0; + unsigned long mem; + ihandle prom_mmu, prom_op, prom_root, prom_cpu; + phandle cpu_pkg; + unsigned long offset = reloc_offset(); + long l; + char *p, *d; + unsigned long phys; + u32 getprop_rval; + struct Naca *_naca = RELOC(naca); + struct Paca *_xPaca = PTRRELOC(&xPaca[0]); + struct prom_t *_prom = PTRRELOC(&prom); + + /* Default machine type. */ + RELOC(_machine) = _MACH_pSeries; + /* Reset klimit to take into account the embedded system map */ + if (RELOC(embedded_sysmap_end)) + RELOC(klimit) = __va(PAGE_ALIGN(RELOC(embedded_sysmap_end))); + + /* Get a handle to the prom entry point before anything else */ + _prom->entry = pp; + _prom->bi_recs = prom_bi_rec_verify((struct bi_record *)r6); + if ( _prom->bi_recs != NULL ) { + RELOC(klimit) = PTRUNRELOC((unsigned long)_prom->bi_recs + _prom->bi_recs->data[1]); + } + +#ifdef DEBUG_YABOOT + call_yaboot(yaboot->dummy,offset>>32,offset&0xffffffff); + call_yaboot(yaboot->printf, RELOC("offset = 0x%08x%08x\n"), LONG_MSW(offset), LONG_LSW(offset)); +#endif + + /* Default */ + phys = KERNELBASE - offset; + +#ifdef DEBUG_YABOOT + call_yaboot(yaboot->printf, RELOC("phys = 0x%08x%08x\n"), LONG_MSW(phys), LONG_LSW(phys)); +#endif + + +#ifdef DEBUG_YABOOT + _prom->yaboot = yaboot; + call_yaboot(yaboot->printf, RELOC("pp = 0x%08x%08x\n"), LONG_MSW(pp), LONG_LSW(pp)); + call_yaboot(yaboot->printf, RELOC("prom = 0x%08x%08x\n"), LONG_MSW(_prom->entry), LONG_LSW(_prom->entry)); +#endif + + /* First get a handle for the stdout device */ + _prom->chosen = (ihandle)call_prom(RELOC("finddevice"), 1, 1, + RELOC("/chosen")); + +#ifdef DEBUG_YABOOT + call_yaboot(yaboot->printf, RELOC("prom->chosen = 0x%08x%08x\n"), LONG_MSW(_prom->chosen), LONG_LSW(_prom->chosen)); +#endif + + if ((long)_prom->chosen <= 0) + prom_exit(); + + if ((long)call_prom(RELOC("getprop"), 4, 1, _prom->chosen, + RELOC("stdout"), &getprop_rval, + sizeof(getprop_rval)) <= 0) + prom_exit(); + + _prom->stdout = (ihandle)(unsigned long)getprop_rval; + +#ifdef DEBUG_YABOOT + if (_prom->stdout == 0) { + call_yaboot(yaboot->printf, RELOC("prom->stdout = 0x%08x%08x\n"), LONG_MSW(_prom->stdout), LONG_LSW(_prom->stdout)); + } + + call_yaboot(yaboot->printf, RELOC("prom->stdout = 0x%08x%08x\n"), LONG_MSW(_prom->stdout), LONG_LSW(_prom->stdout)); +#endif + +#ifdef DEBUG_YABOOT + call_yaboot(yaboot->printf, RELOC("Location: 0x11\n")); +#endif + + mem = RELOC(klimit) - offset; +#ifdef DEBUG_YABOOT + call_yaboot(yaboot->printf, RELOC("Location: 0x11b\n")); +#endif + + /* Get the full OF pathname of the stdout device */ + p = (char *) mem; + memset(p, 0, 256); + call_prom(RELOC("instance-to-path"), 3, 1, _prom->stdout, p, 255); + RELOC(of_stdout_device) = PTRUNRELOC(p); + mem += strlen(p) + 1; + + getprop_rval = 1; + prom_root = (ihandle)call_prom(RELOC("finddevice"), 1, 1, RELOC("/")); + if (prom_root != (ihandle)-1) { + call_prom(RELOC("getprop"), 4, 1, + prom_root, RELOC("#size-cells"), + &getprop_rval, sizeof(getprop_rval)); + } + _prom->encode_phys_size = (getprop_rval==1) ? 32 : 64; + +#ifdef DEBUG_PROM + prom_print(RELOC("DRENG: Detect OF version...\n")); +#endif + /* Find the OF version */ + prom_op = (ihandle)call_prom(RELOC("finddevice"), 1, 1, RELOC("/openprom")); + if (prom_op != (ihandle)-1) { + char model[64]; + long sz; + sz = (long)call_prom(RELOC("getprop"), 4, 1, prom_op, + RELOC("model"), model, 64); + if (sz > 0) { + char *c; + /* hack to skip the ibm chrp firmware # */ + if ( strncmp(model,RELOC("IBM"),3) ) { + for (c = model; *c; c++) + if (*c >= '0' && *c <= '9') { + _prom->version = *c - '0'; + break; + } + } + else + chrp = 1; + } + } + if (_prom->version >= 3) + prom_print(RELOC("OF Version 3 detected.\n")); + + + /* Determine which cpu is actually running right _now_ */ + if ((long)call_prom(RELOC("getprop"), 4, 1, _prom->chosen, + RELOC("cpu"), &getprop_rval, + sizeof(getprop_rval)) <= 0) + prom_exit(); + + prom_cpu = (ihandle)(unsigned long)getprop_rval; + cpu_pkg = call_prom(RELOC("instance-to-package"), 1, 1, prom_cpu); + call_prom(RELOC("getprop"), 4, 1, + cpu_pkg, RELOC("reg"), + &getprop_rval, sizeof(getprop_rval)); + _prom->cpu = (int)(unsigned long)getprop_rval; + _xPaca[0].xHwProcNum = _prom->cpu; + +#ifdef DEBUG_PROM + prom_print(RELOC("Booting CPU hw index = 0x")); + prom_print_hex(_prom->cpu); + prom_print_nl(); +#endif + + /* Get the boot device and translate it to a full OF pathname. */ + p = (char *) mem; + l = (long) call_prom(RELOC("getprop"), 4, 1, _prom->chosen, + RELOC("bootpath"), p, 1<<20); + if (l > 0) { + p[l] = 0; /* should already be null-terminated */ + RELOC(bootpath) = PTRUNRELOC(p); + mem += l + 1; + d = (char *) mem; + *d = 0; + call_prom(RELOC("canon"), 3, 1, p, d, 1<<20); + RELOC(bootdevice) = PTRUNRELOC(d); + mem = DOUBLEWORD_ALIGN(mem + strlen(d) + 1); + } + + mem = prom_initialize_lmb(mem); + + mem = prom_bi_rec_reserve(mem); + + mem = prom_instantiate_rtas(mem); + + /* Initialize some system info into the Naca early... */ + mem = prom_initialize_naca(mem); + + /* If we are on an SMP machine, then we *MUST* do the + * following, regardless of whether we have an SMP + * kernel or not. + */ + if ( _naca->processorCount > 1 ) + prom_hold_cpus(mem); + + mem = check_display(mem); + +#ifdef DEBUG_PROM + prom_print(RELOC("copying OF device tree...\n")); +#endif + mem = copy_device_tree(mem); + + RELOC(klimit) = mem + offset; + + lmb_reserve(0, __pa(RELOC(klimit))); + + if (RELOC(_machine) == _MACH_pSeries) + prom_initialize_tce_table(); + + if ((long) call_prom(RELOC("getprop"), 4, 1, + _prom->chosen, + RELOC("mmu"), + &getprop_rval, + sizeof(getprop_rval)) <= 0) { + prom_print(RELOC(" no MMU found\n")); + prom_exit(); + } + + /* We assume the phys. address size is 3 cells */ + RELOC(prom_mmu) = (ihandle)(unsigned long)getprop_rval; + + if ((long)call_prom(RELOC("call-method"), 4, 4, + RELOC("translate"), + prom_mmu, + (void *)(KERNELBASE - offset), + (void *)1) != 0) { + prom_print(RELOC(" (translate failed) ")); + } else { + prom_print(RELOC(" (translate ok) ")); + phys = (unsigned long)_prom->args.rets[3]; + } + + /* If OpenFirmware version >= 3, then use quiesce call */ + if (_prom->version >= 3) { + prom_print(RELOC("Calling quiesce ...\n")); + call_prom(RELOC("quiesce"), 0, 0); + phys = KERNELBASE - offset; + } + + prom_print(RELOC("returning from prom_init\n")); + return phys; +} + + +static int +prom_set_color(ihandle ih, int i, int r, int g, int b) +{ + unsigned long offset = reloc_offset(); + + return (int)(long)call_prom(RELOC("call-method"), 6, 1, + RELOC("color!"), + ih, + (void *)(long) i, + (void *)(long) b, + (void *)(long) g, + (void *)(long) r ); +} + +/* + * If we have a display that we don't know how to drive, + * we will want to try to execute OF's open method for it + * later. However, OF will probably fall over if we do that + * we've taken over the MMU. + * So we check whether we will need to open the display, + * and if so, open it now. + */ +static unsigned long __init +check_display(unsigned long mem) +{ + phandle node; + ihandle ih; + int i; + unsigned long offset = reloc_offset(); + struct prom_t *_prom = PTRRELOC(&prom); + char type[64], *path; + static unsigned char default_colors[] = { + 0x00, 0x00, 0x00, + 0x00, 0x00, 0xaa, + 0x00, 0xaa, 0x00, + 0x00, 0xaa, 0xaa, + 0xaa, 0x00, 0x00, + 0xaa, 0x00, 0xaa, + 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, + 0x55, 0x55, 0x55, + 0x55, 0x55, 0xff, + 0x55, 0xff, 0x55, + 0x55, 0xff, 0xff, + 0xff, 0x55, 0x55, + 0xff, 0x55, 0xff, + 0xff, 0xff, 0x55, + 0xff, 0xff, 0xff + }; + + _prom->disp_node = 0; + + for (node = 0; prom_next_node(&node); ) { + type[0] = 0; + call_prom(RELOC("getprop"), 4, 1, node, RELOC("device_type"), + type, sizeof(type)); + if (strcmp(type, RELOC("display")) != 0) + continue; + /* It seems OF doesn't null-terminate the path :-( */ + path = (char *) mem; + memset(path, 0, 256); + if ((long) call_prom(RELOC("package-to-path"), 3, 1, + node, path, 255) < 0) + continue; + prom_print(RELOC("opening display ")); + prom_print(path); + ih = (ihandle)call_prom(RELOC("open"), 1, 1, path); + if (ih == (ihandle)0 || ih == (ihandle)-1) { + prom_print(RELOC("... failed\n")); + continue; + } + prom_print(RELOC("... ok\n")); + + if (_prom->disp_node == 0) + _prom->disp_node = (ihandle)(unsigned long)node; + + /* Setup a useable color table when the appropriate + * method is available. Should update this to set-colors */ + for (i = 0; i < 32; i++) + if (prom_set_color(ih, i, RELOC(default_colors)[i*3], + RELOC(default_colors)[i*3+1], + RELOC(default_colors)[i*3+2]) != 0) + break; + +#ifdef CONFIG_FB + for (i = 0; i < LINUX_LOGO_COLORS; i++) + if (prom_set_color(ih, i + 32, + RELOC(linux_logo_red)[i], + RELOC(linux_logo_green)[i], + RELOC(linux_logo_blue)[i]) != 0) + break; +#endif /* CONFIG_FB */ + + /* + * If this display is the device that OF is using for stdout, + * move it to the front of the list. + */ + mem += strlen(path) + 1; + i = RELOC(prom_num_displays)++; + if (RELOC(of_stdout_device) != 0 && i > 0 + && strcmp(PTRRELOC(RELOC(of_stdout_device)), path) == 0) { + for (; i > 0; --i) + RELOC(prom_display_paths[i]) = RELOC(prom_display_paths[i-1]); + } + RELOC(prom_display_paths[i]) = PTRUNRELOC(path); + if (RELOC(prom_num_displays) >= FB_MAX) + break; + } + return DOUBLEWORD_ALIGN(mem); +} + +void +virt_irq_init(void) +{ + int i; + for (i = 0; i < NR_IRQS; i++) + virt_irq_to_real_map[i] = UNDEFINED_IRQ; + for (i = 0; i < NR_HW_IRQS; i++) + real_irq_to_virt_map[i] = UNDEFINED_IRQ; +} + +/* Create a mapping for a real_irq if it doesn't already exist. + * Return the virtual irq as a convenience. + */ +unsigned long +virt_irq_create_mapping(unsigned long real_irq) +{ + unsigned long virq; + if (naca->interrupt_controller == IOS_OPEN_PIC) + return real_irq; /* no mapping for openpic (for now) */ + virq = real_irq_to_virt(real_irq); + if (virq == UNDEFINED_IRQ) { + /* Assign a virtual IRQ number */ + if (real_irq < NR_IRQS && virt_irq_to_real(real_irq) == UNDEFINED_IRQ) { + /* A 1-1 mapping will work. */ + virq = real_irq; + } else { + while (last_virt_irq < NR_IRQS && + virt_irq_to_real(++last_virt_irq) != UNDEFINED_IRQ) + /* skip irq's in use */; + if (last_virt_irq >= NR_IRQS) + panic("Too many IRQs are required on this system. NR_IRQS=%d\n", NR_IRQS); + virq = last_virt_irq; + } + virt_irq_to_real_map[virq] = real_irq; + real_irq_to_virt_map[real_irq] = virq; + } + return virq; +} + + +static int __init +prom_next_node(phandle *nodep) +{ + phandle node; + unsigned long offset = reloc_offset(); + + if ((node = *nodep) != 0 + && (*nodep = call_prom(RELOC("child"), 1, 1, node)) != 0) + return 1; + if ((*nodep = call_prom(RELOC("peer"), 1, 1, node)) != 0) + return 1; + for (;;) { + if ((node = call_prom(RELOC("parent"), 1, 1, node)) == 0) + return 0; + if ((*nodep = call_prom(RELOC("peer"), 1, 1, node)) != 0) + return 1; + } +} + +/* + * Make a copy of the device tree from the PROM. + */ +static unsigned long __init +copy_device_tree(unsigned long mem_start) +{ + phandle root; + unsigned long new_start; + struct device_node **allnextp; + unsigned long offset = reloc_offset(); + unsigned long mem_end = mem_start + (8<<20); + + root = call_prom(RELOC("peer"), 1, 1, (phandle)0); + if (root == (phandle)0) { + prom_print(RELOC("couldn't get device tree root\n")); + prom_exit(); + } + allnextp = &RELOC(allnodes); + mem_start = DOUBLEWORD_ALIGN(mem_start); + new_start = inspect_node(root, 0, mem_start, mem_end, &allnextp); + *allnextp = 0; + return new_start; +} + +__init +static unsigned long +inspect_node(phandle node, struct device_node *dad, + unsigned long mem_start, unsigned long mem_end, + struct device_node ***allnextpp) +{ + int l; + phandle child; + struct device_node *np; + struct property *pp, **prev_propp; + char *prev_name, *namep; + unsigned char *valp; + unsigned long offset = reloc_offset(); + + np = (struct device_node *) mem_start; + mem_start += sizeof(struct device_node); + memset(np, 0, sizeof(*np)); + np->node = node; + **allnextpp = PTRUNRELOC(np); + *allnextpp = &np->allnext; + if (dad != 0) { + np->parent = PTRUNRELOC(dad); + /* we temporarily use the `next' field as `last_child'. */ + if (dad->next == 0) + dad->child = PTRUNRELOC(np); + else + dad->next->sibling = PTRUNRELOC(np); + dad->next = np; + } + + /* get and store all properties */ + prev_propp = &np->properties; + prev_name = RELOC(""); + for (;;) { + pp = (struct property *) mem_start; + namep = (char *) (pp + 1); + pp->name = PTRUNRELOC(namep); + if ((long) call_prom(RELOC("nextprop"), 3, 1, node, prev_name, + namep) <= 0) + break; + mem_start = DOUBLEWORD_ALIGN((unsigned long)namep + strlen(namep) + 1); + prev_name = namep; + valp = (unsigned char *) mem_start; + pp->value = PTRUNRELOC(valp); + pp->length = (int)(long) + call_prom(RELOC("getprop"), 4, 1, node, namep, + valp, mem_end - mem_start); + if (pp->length < 0) + continue; + mem_start = DOUBLEWORD_ALIGN(mem_start + pp->length); + *prev_propp = PTRUNRELOC(pp); + prev_propp = &pp->next; + } + *prev_propp = 0; + + /* get the node's full name */ + l = (long) call_prom(RELOC("package-to-path"), 3, 1, node, + (char *) mem_start, mem_end - mem_start); + if (l >= 0) { + np->full_name = PTRUNRELOC((char *) mem_start); + *(char *)(mem_start + l) = 0; + mem_start = DOUBLEWORD_ALIGN(mem_start + l + 1); + } + + /* do all our children */ + child = call_prom(RELOC("child"), 1, 1, node); + while (child != (phandle)0) { + mem_start = inspect_node(child, np, mem_start, mem_end, + allnextpp); + child = call_prom(RELOC("peer"), 1, 1, child); + } + + return mem_start; +} + +/* + * finish_device_tree is called once things are running normally + * (i.e. with text and data mapped to the address they were linked at). + * It traverses the device tree and fills in the name, type, + * {n_}addrs and {n_}intrs fields of each node. + */ +void __init +finish_device_tree(void) +{ + unsigned long mem = klimit; + + virt_irq_init(); + + mem = finish_node(allnodes, mem, NULL, 0, 0); + dev_tree_size = mem - (unsigned long) allnodes; + + mem = _ALIGN(mem, PAGE_SIZE); + lmb_reserve(__pa(klimit), mem-klimit); + + klimit = mem; + + rtas.dev = find_devices("rtas"); +} + +static unsigned long __init +finish_node(struct device_node *np, unsigned long mem_start, + interpret_func *ifunc, int naddrc, int nsizec) +{ + struct device_node *child; + int *ip; + + np->name = get_property(np, "name", 0); + np->type = get_property(np, "device_type", 0); + + /* get the device addresses and interrupts */ + if (ifunc != NULL) { + mem_start = ifunc(np, mem_start, naddrc, nsizec); + } + mem_start = finish_node_interrupts(np, mem_start); + + /* Look for #address-cells and #size-cells properties. */ + ip = (int *) get_property(np, "#address-cells", 0); + if (ip != NULL) + naddrc = *ip; + ip = (int *) get_property(np, "#size-cells", 0); + if (ip != NULL) + nsizec = *ip; + + /* the f50 sets the name to 'display' and 'compatible' to what we + * expect for the name -- Cort + */ + ifunc = NULL; + if (!strcmp(np->name, "display")) + np->name = get_property(np, "compatible", 0); + + if (!strcmp(np->name, "device-tree") || np->parent == NULL) + ifunc = interpret_root_props; + else if (np->type == 0) + ifunc = NULL; + else if (!strcmp(np->type, "pci") || !strcmp(np->type, "vci")) + ifunc = interpret_pci_props; + else if (!strcmp(np->type, "isa")) + ifunc = interpret_isa_props; + + for (child = np->child; child != NULL; child = child->sibling) + mem_start = finish_node(child, mem_start, ifunc, + naddrc, nsizec); + + return mem_start; +} + +/* This routine walks the interrupt tree for a given device node and gather + * all necessary informations according to the draft interrupt mapping + * for CHRP. The current version was only tested on Apple "Core99" machines + * and may not handle cascaded controllers correctly. + */ +__init +static unsigned long +finish_node_interrupts(struct device_node *np, unsigned long mem_start) +{ + /* Finish this node */ + unsigned int *isizep, *asizep, *interrupts, *map, *map_mask, *reg; + phandle *parent, map_parent; + struct device_node *node, *parent_node; + int l, isize, ipsize, asize, map_size, regpsize; + + /* Currently, we don't look at all nodes with no "interrupts" property */ + + interrupts = (unsigned int *)get_property(np, "interrupts", &l); + if (interrupts == NULL) + return mem_start; + ipsize = l>>2; + + reg = (unsigned int *)get_property(np, "reg", &l); + regpsize = l>>2; + + /* We assume default interrupt cell size is 1 (bugus ?) */ + isize = 1; + node = np; + + do { + /* We adjust the cell size if the current parent contains an #interrupt-cells + * property */ + isizep = (unsigned int *)get_property(node, "#interrupt-cells", &l); + if (isizep) + isize = *isizep; + + /* We don't do interrupt cascade (ISA) for now, we stop on the first + * controller found + */ + if (get_property(node, "interrupt-controller", &l)) { + int i,j; + + np->intrs = (struct interrupt_info *) mem_start; + np->n_intrs = ipsize / isize; + mem_start += np->n_intrs * sizeof(struct interrupt_info); + for (i = 0; i < np->n_intrs; ++i) { + np->intrs[i].line = openpic_to_irq(virt_irq_create_mapping(*interrupts++)); + np->intrs[i].sense = 1; + if (isize > 1) + np->intrs[i].sense = *interrupts++; + for (j=2; j>2; + map_mask = (unsigned int *)get_property(node, "interrupt-map-mask", &l); + asizep = (unsigned int *)get_property(node, "#address-cells", &l); + if (asizep && l == sizeof(unsigned int)) + asize = *asizep; + else + asize = 0; + found = 0; + while (map_size>0 && !found) { + found = 1; + for (i=0; i=regpsize) || ((mask & *map) != (mask & reg[i]))) + found = 0; + map++; + map_size--; + } + for (i=0; iparent; + } while (node); + + return mem_start; +} + +int +prom_n_addr_cells(struct device_node* np) +{ + int* ip; + do { + if (np->parent) + np = np->parent; + ip = (int *) get_property(np, "#address-cells", 0); + if (ip != NULL) + return *ip; + } while (np->parent); + /* No #address-cells property for the root node, default to 1 */ + return 1; +} + +int +prom_n_size_cells(struct device_node* np) +{ + int* ip; + do { + if (np->parent) + np = np->parent; + ip = (int *) get_property(np, "#size-cells", 0); + if (ip != NULL) + return *ip; + } while (np->parent); + /* No #size-cells property for the root node, default to 1 */ + return 1; +} + +static unsigned long __init +interpret_pci_props(struct device_node *np, unsigned long mem_start, + int naddrc, int nsizec) +{ + struct address_range *adr; + struct pci_reg_property *pci_addrs; + int i, l; + + pci_addrs = (struct pci_reg_property *) + get_property(np, "assigned-addresses", &l); + if (pci_addrs != 0 && l >= sizeof(struct pci_reg_property)) { + i = 0; + adr = (struct address_range *) mem_start; + while ((l -= sizeof(struct pci_reg_property)) >= 0) { + adr[i].space = pci_addrs[i].addr.a_hi; + adr[i].address = pci_addrs[i].addr.a_lo; + adr[i].size = pci_addrs[i].size_lo; + ++i; + } + np->addrs = adr; + np->n_addrs = i; + mem_start += i * sizeof(struct address_range); + } + return mem_start; +} + +static unsigned long __init +interpret_isa_props(struct device_node *np, unsigned long mem_start, + int naddrc, int nsizec) +{ + struct isa_reg_property *rp; + struct address_range *adr; + int i, l; + + rp = (struct isa_reg_property *) get_property(np, "reg", &l); + if (rp != 0 && l >= sizeof(struct isa_reg_property)) { + i = 0; + adr = (struct address_range *) mem_start; + while ((l -= sizeof(struct reg_property)) >= 0) { + adr[i].space = rp[i].space; + adr[i].address = rp[i].address + + (adr[i].space? 0: _ISA_MEM_BASE); + adr[i].size = rp[i].size; + ++i; + } + np->addrs = adr; + np->n_addrs = i; + mem_start += i * sizeof(struct address_range); + } + + return mem_start; +} + +static unsigned long __init +interpret_root_props(struct device_node *np, unsigned long mem_start, + int naddrc, int nsizec) +{ + struct address_range *adr; + int i, l; + unsigned int *rp; + int rpsize = (naddrc + nsizec) * sizeof(unsigned int); + + rp = (unsigned int *) get_property(np, "reg", &l); + if (rp != 0 && l >= rpsize) { + i = 0; + adr = (struct address_range *) mem_start; + while ((l -= rpsize) >= 0) { + adr[i].space = 0; + adr[i].address = rp[naddrc - 1]; + adr[i].size = rp[naddrc + nsizec - 1]; + ++i; + rp += naddrc + nsizec; + } + np->addrs = adr; + np->n_addrs = i; + mem_start += i * sizeof(struct address_range); + } + + return mem_start; +} + +/* + * Work out the sense (active-low level / active-high edge) + * of each interrupt from the device tree. + */ +void __init +prom_get_irq_senses(unsigned char *senses, int off, int max) +{ + struct device_node *np; + int i, j; + + /* default to level-triggered */ + memset(senses, 1, max - off); + + for (np = allnodes; np != 0; np = np->allnext) { + for (j = 0; j < np->n_intrs; j++) { + i = np->intrs[j].line; + if (i >= off && i < max) + senses[i-off] = np->intrs[j].sense; + } + } +} + +/* + * Construct and return a list of the device_nodes with a given name. + */ +struct device_node * +find_devices(const char *name) +{ + struct device_node *head, **prevp, *np; + + prevp = &head; + for (np = allnodes; np != 0; np = np->allnext) { + if (np->name != 0 && strcasecmp(np->name, name) == 0) { + *prevp = np; + prevp = &np->next; + } + } + *prevp = 0; + return head; +} + +/* + * Construct and return a list of the device_nodes with a given type. + */ +struct device_node * +find_type_devices(const char *type) +{ + struct device_node *head, **prevp, *np; + + prevp = &head; + for (np = allnodes; np != 0; np = np->allnext) { + if (np->type != 0 && strcasecmp(np->type, type) == 0) { + *prevp = np; + prevp = &np->next; + } + } + *prevp = 0; + return head; +} + +/* + * Returns all nodes linked together + */ +struct device_node * __openfirmware +find_all_nodes(void) +{ + struct device_node *head, **prevp, *np; + + prevp = &head; + for (np = allnodes; np != 0; np = np->allnext) { + *prevp = np; + prevp = &np->next; + } + *prevp = 0; + return head; +} + +/* Checks if the given "compat" string matches one of the strings in + * the device's "compatible" property + */ +int +device_is_compatible(struct device_node *device, const char *compat) +{ + const char* cp; + int cplen, l; + + cp = (char *) get_property(device, "compatible", &cplen); + if (cp == NULL) + return 0; + while (cplen > 0) { + if (strncasecmp(cp, compat, strlen(compat)) == 0) + return 1; + l = strlen(cp) + 1; + cp += l; + cplen -= l; + } + + return 0; +} + + +/* + * Indicates whether the root node has a given value in its + * compatible property. + */ +int +machine_is_compatible(const char *compat) +{ + struct device_node *root; + + root = find_path_device("/"); + if (root == 0) + return 0; + return device_is_compatible(root, compat); +} + +/* + * Construct and return a list of the device_nodes with a given type + * and compatible property. + */ +struct device_node * +find_compatible_devices(const char *type, const char *compat) +{ + struct device_node *head, **prevp, *np; + + prevp = &head; + for (np = allnodes; np != 0; np = np->allnext) { + if (type != NULL + && !(np->type != 0 && strcasecmp(np->type, type) == 0)) + continue; + if (device_is_compatible(np, compat)) { + *prevp = np; + prevp = &np->next; + } + } + *prevp = 0; + return head; +} + +/* + * Find the device_node with a given full_name. + */ +struct device_node * +find_path_device(const char *path) +{ + struct device_node *np; + + for (np = allnodes; np != 0; np = np->allnext) + if (np->full_name != 0 && strcasecmp(np->full_name, path) == 0) + return np; + return NULL; +} + +/* + * Find the device_node with a given phandle. + */ +static struct device_node * __init +find_phandle(phandle ph) +{ + struct device_node *np; + + for (np = allnodes; np != 0; np = np->allnext) + if (np->node == ph) + return np; + return NULL; +} + +/* + * Find a property with a given name for a given node + * and return the value. + */ +unsigned char * +get_property(struct device_node *np, const char *name, int *lenp) +{ + struct property *pp; + + for (pp = np->properties; pp != 0; pp = pp->next) + if (strcmp(pp->name, name) == 0) { + if (lenp != 0) + *lenp = pp->length; + return pp->value; + } + return 0; +} + +/* + * Add a property to a node + */ +void __openfirmware +prom_add_property(struct device_node* np, struct property* prop) +{ + struct property **next = &np->properties; + + prop->next = NULL; + while (*next) + next = &(*next)->next; + *next = prop; +} + +#if 0 +void __openfirmware +print_properties(struct device_node *np) +{ + struct property *pp; + char *cp; + int i, n; + + for (pp = np->properties; pp != 0; pp = pp->next) { + printk(KERN_INFO "%s", pp->name); + for (i = strlen(pp->name); i < 16; ++i) + printk(" "); + cp = (char *) pp->value; + for (i = pp->length; i > 0; --i, ++cp) + if ((i > 1 && (*cp < 0x20 || *cp > 0x7e)) + || (i == 1 && *cp != 0)) + break; + if (i == 0 && pp->length > 1) { + /* looks like a string */ + printk(" %s\n", (char *) pp->value); + } else { + /* dump it in hex */ + n = pp->length; + if (n > 64) + n = 64; + if (pp->length % 4 == 0) { + unsigned int *p = (unsigned int *) pp->value; + + n /= 4; + for (i = 0; i < n; ++i) { + if (i != 0 && (i % 4) == 0) + printk("\n "); + printk(" %08x", *p++); + } + } else { + unsigned char *bp = pp->value; + + for (i = 0; i < n; ++i) { + if (i != 0 && (i % 16) == 0) + printk("\n "); + printk(" %02x", *bp++); + } + } + printk("\n"); + if (pp->length > 64) + printk(" ... (length = %d)\n", + pp->length); + } + } +} +#endif + + +void __init +abort() +{ +#ifdef CONFIG_XMON + xmon(NULL); +#endif + for (;;) + prom_exit(); +} + + +/* Verify bi_recs are good */ +static struct bi_record * +prom_bi_rec_verify(struct bi_record *bi_recs) +{ + struct bi_record *first, *last; + + if ( bi_recs == NULL || bi_recs->tag != BI_FIRST ) + return NULL; + + last = (struct bi_record *)bi_recs->data[0]; + if ( last == NULL || last->tag != BI_LAST ) + return NULL; + + first = (struct bi_record *)last->data[0]; + if ( first == NULL || first != bi_recs ) + return NULL; + + return bi_recs; +} + +static unsigned long +prom_bi_rec_reserve(unsigned long mem) +{ + unsigned long offset = reloc_offset(); + struct prom_t *_prom = PTRRELOC(&prom); + struct bi_record *rec; + + if ( _prom->bi_recs != NULL) { + + for ( rec=_prom->bi_recs; + rec->tag != BI_LAST; + rec=bi_rec_next(rec) ) { + switch (rec->tag) { +#ifdef CONFIG_BLK_DEV_INITRD + case BI_INITRD: + lmb_reserve(rec->data[0], rec->data[1]); + break; +#endif /* CONFIG_BLK_DEV_INITRD */ + } + } + /* The next use of this field will be after relocation + * is enabled, so convert this physical address into a + * virtual address. + */ + _prom->bi_recs = PTRUNRELOC(_prom->bi_recs); + } + + return mem; +} + diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/ptrace.c linuxppc64_2_4/arch/ppc64/kernel/ptrace.c --- ../kernel.org/linux/arch/ppc64/kernel/ptrace.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/ptrace.c Mon Oct 29 22:26:22 2001 @@ -0,0 +1,341 @@ +/* + * linux/arch/ppc/kernel/ptrace.c + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Derived from "arch/m68k/kernel/ptrace.c" + * Copyright (C) 1994 by Hamish Macdonald + * Taken from linux/kernel/ptrace.c and modified for M680x0. + * linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds + * + * Modified by Cort Dougan (cort@hq.fsmlabs.com) + * and Paul Mackerras (paulus@linuxcare.com.au). + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file README.legal in the main directory of + * this archive for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* + * Set of msr bits that gdb can change on behalf of a process. + */ +#define MSR_DEBUGCHANGE (MSR_FE0 | MSR_SE | MSR_BE | MSR_FE1) + +/* + * does not yet catch signals sent when the child dies. + * in exit.c or in signal.c. + */ + +/* + * Get contents of register REGNO in task TASK. + */ +static inline unsigned long get_reg(struct task_struct *task, int regno) +{ + if (regno < sizeof(struct pt_regs) / sizeof(unsigned long)) + return ((unsigned long *)task->thread.regs)[regno]; + return (0); +} + +/* + * Write contents of register REGNO in task TASK. + */ +static inline int put_reg(struct task_struct *task, int regno, + unsigned long data) +{ + if (regno < PT_SOFTE) { + if (regno == PT_MSR) + data = (data & MSR_DEBUGCHANGE) + | (task->thread.regs->msr & ~MSR_DEBUGCHANGE); + ((unsigned long *)task->thread.regs)[regno] = data; + return 0; + } + return -EIO; +} + +static inline void +set_single_step(struct task_struct *task) +{ + struct pt_regs *regs = task->thread.regs; + if (regs != NULL) + regs->msr |= MSR_SE; +} + +static inline void +clear_single_step(struct task_struct *task) +{ + struct pt_regs *regs = task->thread.regs; + if (regs != NULL) + regs->msr &= ~MSR_SE; +} + +/* + * Called by kernel/ptrace.c when detaching.. + * + * Make sure single step bits etc are not set. + */ +void ptrace_disable(struct task_struct *child) +{ + /* make sure the single step bit is not set. */ + clear_single_step(child); +} + +int sys_ptrace(long request, long pid, long addr, long data) +{ + struct task_struct *child; + int ret = -EPERM; + + lock_kernel(); + if (request == PTRACE_TRACEME) { + /* are we already being traced? */ + if (current->ptrace & PT_PTRACED) + goto out; + /* set the ptrace bit in the process flags. */ + current->ptrace |= PT_PTRACED; + ret = 0; + goto out; + } + ret = -ESRCH; + read_lock(&tasklist_lock); + child = find_task_by_pid(pid); + if (child) + get_task_struct(child); + read_unlock(&tasklist_lock); + if (!child) + goto out; + + ret = -EPERM; + if (pid == 1) /* you may not mess with init */ + goto out_tsk; + + if (request == PTRACE_ATTACH) { + ret = ptrace_attach(child); + goto out_tsk; + } + ret = -ESRCH; + if (!(child->ptrace & PT_PTRACED)) + goto out_tsk; + if (child->state != TASK_STOPPED) { + if (request != PTRACE_KILL) + goto out_tsk; + } + if (child->p_pptr != current) + goto out_tsk; + + switch (request) { + /* when I and D space are separate, these will need to be fixed. */ + case PTRACE_PEEKTEXT: /* read word at location addr. */ + case PTRACE_PEEKDATA: { + unsigned long tmp; + int copied; + + copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); + ret = -EIO; + if (copied != sizeof(tmp)) + break; + ret = put_user(tmp,(unsigned long *) data); + break; + } + + /* read the word at location addr in the USER area. */ + case PTRACE_PEEKUSR: { + unsigned long index, tmp; + + ret = -EIO; + /* convert to index and check */ + index = (unsigned long) addr >> 3; + if ((addr & 7) || index > PT_FPSCR) + break; + + if (index < PT_FPR0) { + tmp = get_reg(child, (int) index); + } else { + if (child->thread.regs->msr & MSR_FP) + giveup_fpu(child); + tmp = ((unsigned long *)child->thread.fpr)[index - PT_FPR0]; + } + ret = put_user(tmp,(unsigned long *) data); + break; + } + + /* If I and D space are separate, this will have to be fixed. */ + case PTRACE_POKETEXT: /* write the word at location addr. */ + case PTRACE_POKEDATA: + ret = 0; + if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data)) + break; + ret = -EIO; + break; + + /* write the word at location addr in the USER area */ + case PTRACE_POKEUSR: { + unsigned long index; + + ret = -EIO; + /* convert to index and check */ + index = (unsigned long) addr >> 3; + if ((addr & 7) || index > PT_FPSCR) + break; + + if (index == PT_ORIG_R3) + break; + if (index < PT_FPR0) { + ret = put_reg(child, index, data); + } else { + if (child->thread.regs->msr & MSR_FP) + giveup_fpu(child); + ((unsigned long *)child->thread.fpr)[index - PT_FPR0] = data; + ret = 0; + } + break; + } + + case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ + case PTRACE_CONT: { /* restart after signal. */ + ret = -EIO; + if ((unsigned long) data > _NSIG) + break; + if (request == PTRACE_SYSCALL) + child->ptrace |= PT_TRACESYS; + else + child->ptrace &= ~PT_TRACESYS; + child->exit_code = data; + /* make sure the single step bit is not set. */ + clear_single_step(child); + wake_up_process(child); + ret = 0; + break; + } + +/* + * make the child exit. Best I can do is send it a sigkill. + * perhaps it should be put in the status that it wants to + * exit. + */ + case PTRACE_KILL: { + ret = 0; + if (child->state == TASK_ZOMBIE) /* already dead */ + break; + child->exit_code = SIGKILL; + /* make sure the single step bit is not set. */ + clear_single_step(child); + wake_up_process(child); + break; + } + + case PTRACE_SINGLESTEP: { /* set the trap flag. */ + ret = -EIO; + if ((unsigned long) data > _NSIG) + break; + child->ptrace &= ~PT_TRACESYS; + set_single_step(child); + child->exit_code = data; + /* give it a chance to run. */ + wake_up_process(child); + ret = 0; + break; + } + + case PTRACE_DETACH: + ret = ptrace_detach(child, data); + break; + + case PPC_PTRACE_GETREGS: + { /* Get GPRs 0 - 31. */ + u64 tmp; + u64 cntr; + ret = 0; + for (cntr=0; cntr<32 && ret==0; ++cntr) + { + tmp = ((u64*)child->thread.regs)[cntr]; + ret = put_user(tmp, (u64*)(data+cntr)); + } + break; + } + + case PPC_PTRACE_SETREGS: + { /* Set GPRs 0 - 31. */ + u64 cntr; + ret = 0; + for (cntr=0; cntr<32 && ret==0; ++cntr) + { + ret = put_reg(child, cntr, *(u64*)(data+cntr)); + } + break; + } + + case PPC_PTRACE_GETFPREGS: + { /* Get FPRs 0 - 31. */ + u64 tmp; + u64 cntr; + ret = -EIO; + if (child->thread.regs->msr & MSR_FP) + giveup_fpu(child); + ret = 0; + for (cntr=0; cntr<32 && ret==0; ++cntr) + { + tmp = ((u64*)child->thread.fpr)[cntr]; + ret = put_user(tmp, (u64*)(data+cntr)); + } + break; + } + + case PPC_PTRACE_SETFPREGS: + { /* Get FPRs 0 - 31. */ + u64 cntr; + ret = -EIO; + if (child->thread.regs->msr & MSR_FP) + giveup_fpu(child); + for (cntr=0; cntr<32; ++cntr) + { + ((u64*)child->thread.fpr)[cntr] = *(u64*)(data+cntr); + } + ret = 0; + break; + } + + default: + ret = -EIO; + break; + } +out_tsk: + free_task_struct(child); +out: + unlock_kernel(); + return ret; +} + +void syscall_trace(void) +{ + if ((current->ptrace & (PT_PTRACED|PT_TRACESYS)) + != (PT_PTRACED|PT_TRACESYS)) + return; + current->exit_code = SIGTRAP; + current->state = TASK_STOPPED; + notify_parent(current, SIGCHLD); + schedule(); + /* + * this isn't the same as continuing with a signal, but it will do + * for normal use. strace only continues with a signal if the + * stopping signal is not SIGTRAP. -brl + */ + if (current->exit_code) { + send_sig(current->exit_code, current, 1); + current->exit_code = 0; + } +} + diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/ptrace32.c linuxppc64_2_4/arch/ppc64/kernel/ptrace32.c --- ../kernel.org/linux/arch/ppc64/kernel/ptrace32.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/ptrace32.c Mon Oct 29 22:26:22 2001 @@ -0,0 +1,401 @@ +/* + * linux/arch/ppc/kernel/ptrace32.c + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Derived from "arch/m68k/kernel/ptrace.c" + * Copyright (C) 1994 by Hamish Macdonald + * Taken from linux/kernel/ptrace.c and modified for M680x0. + * linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds + * + * Modified by Cort Dougan (cort@hq.fsmlabs.com) + * and Paul Mackerras (paulus@linuxcare.com.au). + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file README.legal in the main directory of + * this archive for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* + * Set of msr bits that gdb can change on behalf of a process. + */ +#define MSR_DEBUGCHANGE (MSR_FE0 | MSR_SE | MSR_BE | MSR_FE1) + +/* + * does not yet catch signals sent when the child dies. + * in exit.c or in signal.c. + */ + +/* + * Get contents of register REGNO in task TASK. + */ +static inline unsigned long get_reg(struct task_struct *task, int regno) +{ + if (regno < sizeof(struct pt_regs) / sizeof(unsigned long)) + return ((unsigned long *)task->thread.regs)[regno]; + return (0); +} + +/* + * Write contents of register REGNO in task TASK. + * (Put DATA into task TASK's register REGNO.) + */ +static inline int put_reg(struct task_struct *task, int regno, unsigned long data) +{ + if (regno < PT_SOFTE) + { + if (regno == PT_MSR) + data = (data & MSR_DEBUGCHANGE) | (task->thread.regs->msr & ~MSR_DEBUGCHANGE); + ((unsigned long *)task->thread.regs)[regno] = data; + return 0; + } + return -EIO; +} + +static inline void +set_single_step(struct task_struct *task) +{ + struct pt_regs *regs = task->thread.regs; + if (regs != NULL) + regs->msr |= MSR_SE; +} + +static inline void +clear_single_step(struct task_struct *task) +{ + struct pt_regs *regs = task->thread.regs; + if (regs != NULL) + regs->msr &= ~MSR_SE; +} + +int sys32_ptrace(long request, long pid, unsigned long addr, unsigned long data) +{ + struct task_struct *child; + int ret = -EPERM; + + lock_kernel(); + if (request == PTRACE_TRACEME) { + /* are we already being traced? */ + if (current->ptrace & PT_PTRACED) + goto out; + /* set the ptrace bit in the process flags. */ + current->ptrace |= PT_PTRACED; + ret = 0; + goto out; + } + ret = -ESRCH; + read_lock(&tasklist_lock); + child = find_task_by_pid(pid); + if (child) + get_task_struct(child); + read_unlock(&tasklist_lock); + if (!child) + goto out; + + ret = -EPERM; + if (pid == 1) /* you may not mess with init */ + goto out_tsk; + + if (request == PTRACE_ATTACH) { + ret = ptrace_attach(child); + goto out_tsk; + } + ret = -ESRCH; + if (!(child->ptrace & PT_PTRACED)) + goto out_tsk; + if (child->state != TASK_STOPPED) { + if (request != PTRACE_KILL) + goto out_tsk; + } + if (child->p_pptr != current) + goto out_tsk; + + switch (request) + { + /* Read word at location ADDR */ + /* when I and D space are separate, these will need to be fixed. */ + case PTRACE_PEEKTEXT: /* read word at location addr. */ + case PTRACE_PEEKDATA: + { + unsigned int tmp_mem_value; + int copied; + + copied = access_process_vm(child, addr, &tmp_mem_value, sizeof(tmp_mem_value), 0); + ret = -EIO; + if (copied != sizeof(tmp_mem_value)) + break; + ret = put_user(tmp_mem_value, (u32*)data); // copy 4 bytes of data into the user location specified by the 8 byte pointer in "data". + break; + } + + /* Read 4 bytes of the other process' storage */ + /* data is a pointer specifying where the user wants the 4 bytes copied into */ + /* addr is a pointer in the user's storage that contains an 8 byte address in the other process of the 4 bytes that is to be read */ + /* (this is run in a 32-bit process looking at a 64-bit process) */ + /* when I and D space are separate, these will need to be fixed. */ + case PPC_PTRACE_PEEKTEXT_3264: + case PPC_PTRACE_PEEKDATA_3264: + { + u32 tmp_mem_value; + int copied; + u32* addrOthers; + + ret = -EIO; + + /* Get the addr in the other process that we want to read */ + if (get_user(addrOthers,(u32**)addr) != 0) + break; + + copied = access_process_vm(child, (u64)addrOthers, &tmp_mem_value, sizeof(tmp_mem_value), 0); + if (copied != sizeof(tmp_mem_value)) + break; + ret = put_user(tmp_mem_value, (u32*)data); // copy 4 bytes of data into the user location specified by the 8 byte pointer in "data". + break; + } + + /* Read a register (specified by ADDR) out of the "user area" */ + case PTRACE_PEEKUSR: { + int index; + unsigned int reg32bits; + unsigned long tmp_reg_value; + + ret = -EIO; + /* convert to index and check */ + index = (unsigned long) addr >> 2; + if ((addr & 3) || index > PT_FPSCR32) + break; + + if (index < PT_FPR0) { + tmp_reg_value = get_reg(child, index); + } else { + if (child->thread.regs->msr & MSR_FP) + giveup_fpu(child); + /* the user space code considers the floating point to be + * an array of unsigned int (32 bits) - the index passed + * in is based on this assumption. + */ + tmp_reg_value = ((unsigned int *)child->thread.fpr)[index - PT_FPR0]; + } + reg32bits = tmp_reg_value; + ret = put_user(reg32bits, (u32*)data); // copy 4 bytes of data into the user location specified by the 8 byte pointer in "data". + break; + } + + /* Read 4 bytes out of the other process' pt_regs area */ + /* data is a pointer specifying where the user wants the 4 bytes copied into */ + /* addr is the offset into the other process' pt_regs structure that is to be read */ + /* (this is run in a 32-bit process looking at a 64-bit process) */ + case PPC_PTRACE_PEEKUSR_3264: + { + u32 index; + u32 reg32bits; + u64 tmp_reg_value; + u32 numReg; + u32 part; + + ret = -EIO; + /* Determine which register the user wants */ + index = (u64)addr >> 2; /* Divide addr by 4 */ + numReg = index / 2; + /* Determine which part of the register the user wants */ + if (index % 2) + part = 1; /* want the 2nd half of the register (right-most). */ + else + part = 0; /* want the 1st half of the register (left-most). */ + + /* Validate the input - check to see if address is on the wrong boundary or beyond the end of the user area */ + if ((addr & 3) || numReg > PT_FPSCR) + break; + + if (numReg >= PT_FPR0) + { + if (child->thread.regs->msr & MSR_FP) + giveup_fpu(child); + } + tmp_reg_value = get_reg(child, numReg); + reg32bits = ((u32*)&tmp_reg_value)[part]; + ret = put_user(reg32bits, (u32*)data); /* copy 4 bytes of data into the user location specified by the 8 byte pointer in "data". */ + break; + } + + /* Write the word at location ADDR */ + /* If I and D space are separate, this will have to be fixed. */ + case PTRACE_POKETEXT: /* write the word at location addr. */ + case PTRACE_POKEDATA: { + unsigned int tmp_value_to_write; + tmp_value_to_write = data; + ret = 0; + if (access_process_vm(child, addr, &tmp_value_to_write, sizeof(tmp_value_to_write), 1) == sizeof(tmp_value_to_write)) + break; + ret = -EIO; + break; + } + + /* Write 4 bytes into the other process' storage */ + /* data is the 4 bytes that the user wants written */ + /* addr is a pointer in the user's storage that contains an 8 byte address in the other process where the 4 bytes that is to be written */ + /* (this is run in a 32-bit process looking at a 64-bit process) */ + /* when I and D space are separate, these will need to be fixed. */ + case PPC_PTRACE_POKETEXT_3264: + case PPC_PTRACE_POKEDATA_3264: + { + u32 tmp_value_to_write = data; + u32* addrOthers; + int bytesWritten; + + /* Get the addr in the other process that we want to write into */ + ret = -EIO; + if (get_user(addrOthers,(u32**)addr) != 0) + break; + + ret = 0; + bytesWritten = access_process_vm(child, (u64)addrOthers, &tmp_value_to_write, sizeof(tmp_value_to_write), 1); + if (bytesWritten == sizeof(tmp_value_to_write)) + break; + ret = -EIO; + break; + } + + /* Write DATA into location ADDR within the USER area */ + case PTRACE_POKEUSR: { + unsigned long index; + + ret = -EIO; + + /* convert to index and check */ + index = (unsigned long) addr >> 2; + if ((addr & 3) || index > PT_FPSCR32) + break; + + if (index == PT_ORIG_R3) + break; + + + if (index < PT_FPR0) { + ret = put_reg(child, index, data); + } else { + if (child->thread.regs->msr & MSR_FP) + giveup_fpu(child); + /* the user space code considers the floating point to be + * an array of unsigned int (32 bits) - the index passed + * in is based on this assumption. + */ + + ((unsigned int *)child->thread.fpr)[index - PT_FPR0] = data; + ret = 0; + } + break; + } + + /* Write 4 bytes into the other process' pt_regs area */ + /* data is the 4 bytes that the user wants written */ + /* addr is the offset into the other process' pt_regs structure that is to be written into */ + /* (this is run in a 32-bit process looking at a 64-bit process) */ + case PPC_PTRACE_POKEUSR_3264: + { + u32 index; + u32 numReg; + + ret = -EIO; + + /* Determine which register the user wants */ + index = (u64)addr >> 2; /* Divide addr by 4 */ + numReg = index / 2; + + /* Validate the input - check to see if address is on the wrong boundary or beyond the end of the user area */ + if ((addr & 3) || numReg > PT_FPSCR) + break; + /* Insure it is a register we let them change */ + if ((numReg == PT_ORIG_R3) || ((numReg > PT_CCR) && (numReg < PT_FPR0))) + break; + + if (numReg >= PT_FPR0) + { + if (child->thread.regs->msr & MSR_FP) + giveup_fpu(child); + } + + if (numReg == PT_MSR) + data = (data & MSR_DEBUGCHANGE) | (child->thread.regs->msr & ~MSR_DEBUGCHANGE); + + ((u32*)child->thread.regs)[index] = data; + ret = 0; + break; + } + + case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ + case PTRACE_CONT: { /* restart after signal. */ + ret = -EIO; + if ((unsigned long) data > _NSIG) + break; + if (request == PTRACE_SYSCALL) + child->ptrace |= PT_TRACESYS; + else + child->ptrace &= ~PT_TRACESYS; + child->exit_code = data; + /* make sure the single step bit is not set. */ + clear_single_step(child); + wake_up_process(child); + ret = 0; + break; + } + + /* + * make the child exit. Best I can do is send it a sigkill. + * perhaps it should be put in the status that it wants to + * exit. + */ + case PTRACE_KILL: { + ret = 0; + if (child->state == TASK_ZOMBIE) /* already dead */ + break; + child->exit_code = SIGKILL; + /* make sure the single step bit is not set. */ + clear_single_step(child); + wake_up_process(child); + break; + } + + case PTRACE_SINGLESTEP: { /* set the trap flag. */ + ret = -EIO; + if ((unsigned long) data > _NSIG) + break; + child->ptrace &= ~PT_TRACESYS; + set_single_step(child); + child->exit_code = data; + /* give it a chance to run. */ + wake_up_process(child); + ret = 0; + break; + } + + case PTRACE_DETACH: + ret = ptrace_detach(child, data); + break; + + default: + ret = -EIO; + break; + } +out_tsk: + free_task_struct(child); +out: + unlock_kernel(); + return ret; +} + diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/ras.c linuxppc64_2_4/arch/ppc64/kernel/ras.c --- ../kernel.org/linux/arch/ppc64/kernel/ras.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/ras.c Wed Nov 7 13:05:40 2001 @@ -0,0 +1,166 @@ + +/* + * ras.c + * Copyright (C) 2001 Dave Engebretsen IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* Change Activity: + * 2001/09/21 : engebret : Created with minimal EPOW and HW exception support. + * End Change Activity + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void ras_epow_interrupt(int irq, void *dev_id, struct pt_regs * regs); +static void ras_error_interrupt(int irq, void *dev_id, struct pt_regs * regs); +void init_ras_IRQ(void); + +/* #define DEBUG */ + +/* + * Initialize handlers for the set of interrupts caused by hardware errors + * and power system events. + */ +void init_ras_IRQ(void) { + struct device_node *np; + unsigned int *ireg, len, i; + + if((np = find_path_device("/event-sources/internal-errors")) && + (ireg = (unsigned int *)get_property(np, "open-pic-interrupt", + &len))) { + for(i=0; i<(len / sizeof(*ireg)); i++) { + request_irq(virt_irq_create_mapping(*(ireg)) + NUM_8259_INTERRUPTS, + &ras_error_interrupt, 0, + "RAS_ERROR", NULL); + ireg++; + } + } + + if((np = find_path_device("/event-sources/epow-events")) && + (ireg = (unsigned int *)get_property(np, "open-pic-interrupt", + &len))) { + for(i=0; i<(len / sizeof(*ireg)); i++) { + request_irq(virt_irq_create_mapping(*(ireg)) + NUM_8259_INTERRUPTS, + &ras_epow_interrupt, 0, + "RAS_EPOW", NULL); + ireg++; + } + } +} + +/* + * Handle power subsystem events (EPOW). + * + * Presently we just log the event has occured. This should be fixed + * to examine the type of power failure and take appropriate action where + * the time horizon permits something useful to be done. + */ +static void +ras_epow_interrupt(int irq, void *dev_id, struct pt_regs * regs) +{ + struct rtas_error_log log_entry; + unsigned int size = sizeof(log_entry); + long status = 0xdeadbeef; + + status = rtas_call(rtas_token("check-exception"), 6, 1, NULL, + 0x500, irq, + EPOW_WARNING | POWERMGM_EVENTS, + 1, /* Time Critical */ + __pa(&log_entry), size); + + udbg_printf("EPOW <0x%lx 0x%lx>\n", + *((unsigned long *)&log_entry), status); + printk(KERN_WARNING + "EPOW <0x%lx 0x%lx>\n",*((unsigned long *)&log_entry), status); +} + +/* + * Handle hardware error interrupts. + * + * RTAS check-exception is called to collect data on the exception. If + * the error is deemed recoverable, we log a warning and return. + * For nonrecoverable errors, an error is logged and we stop all processing + * as quickly as possible in order to prevent propagation of the failure. + */ +static void +ras_error_interrupt(int irq, void *dev_id, struct pt_regs * regs) +{ + struct rtas_error_log log_entry; + unsigned int size = sizeof(log_entry); + long status = 0xdeadbeef; + + status = rtas_call(rtas_token("check-exception"), 6, 1, NULL, + 0x500, irq, + INTERNAL_ERROR, + 1, /* Time Critical */ + __pa(&log_entry), size); + + if((status != 1) && + (log_entry.severity >= SEVERITY_ERROR_SYNC)) { + udbg_printf("HW Error <0x%lx 0x%lx>\n", + *((unsigned long *)&log_entry), status); + printk(KERN_EMERG + "Error: Fatal hardware error <0x%lx 0x%lx>\n", + *((unsigned long *)&log_entry), status); + +#ifndef DEBUG + /* Don't actually power off when debugging so we can test + * without actually failing while injecting errors. + */ + ppc_md.power_off(); +#endif + } else { + udbg_printf("Recoverable HW Error <0x%lx 0x%lx>\n", + *((unsigned long *)&log_entry), status); + printk(KERN_WARNING + "Warning: Recoverable hardware error <0x%lx 0x%lx>\n", + *((unsigned long *)&log_entry), status); + + return; + } +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/rtas-proc.c linuxppc64_2_4/arch/ppc64/kernel/rtas-proc.c --- ../kernel.org/linux/arch/ppc64/kernel/rtas-proc.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/rtas-proc.c Mon Nov 19 22:54:35 2001 @@ -0,0 +1,796 @@ +/* + * arch/ppc64/kernel/rtas-proc.c + * Copyright (C) 2000 Tilmann Bitterberg + * (tilmann@bitterberg.de) + * + * RTAS (Runtime Abstraction Services) stuff + * Intention is to provide a clean user interface + * to use the RTAS. + * + * TODO: + * Split off a header file and maybe move it to a different + * location. Write Documentation on what the /proc/rtas/ entries + * actually do. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include /* for ppc_md */ +#include + +/* Token for Sensors */ +#define KEY_SWITCH 0x0001 +#define ENCLOSURE_SWITCH 0x0002 +#define THERMAL_SENSOR 0x0003 +#define LID_STATUS 0x0004 +#define POWER_SOURCE 0x0005 +#define BATTERY_VOLTAGE 0x0006 +#define BATTERY_REMAINING 0x0007 +#define BATTERY_PERCENTAGE 0x0008 +#define EPOW_SENSOR 0x0009 +#define BATTERY_CYCLESTATE 0x000a +#define BATTERY_CHARGING 0x000b + +/* IBM specific sensors */ +#define IBM_SURVEILLANCE 0x2328 /* 9000 */ +#define IBM_FANRPM 0x2329 /* 9001 */ +#define IBM_VOLTAGE 0x232a /* 9002 */ +#define IBM_DRCONNECTOR 0x232b /* 9003 */ +#define IBM_POWERSUPPLY 0x232c /* 9004 */ +#define IBM_INTQUEUE 0x232d /* 9005 */ + +/* Status return values */ +#define SENSOR_CRITICAL_HIGH 13 +#define SENSOR_WARNING_HIGH 12 +#define SENSOR_NORMAL 11 +#define SENSOR_WARNING_LOW 10 +#define SENSOR_CRITICAL_LOW 9 +#define SENSOR_SUCCESS 0 +#define SENSOR_HW_ERROR -1 +#define SENSOR_BUSY -2 +#define SENSOR_NOT_EXIST -3 +#define SENSOR_DR_ENTITY -9000 + +/* Location Codes */ +#define LOC_SCSI_DEV_ADDR 'A' +#define LOC_SCSI_DEV_LOC 'B' +#define LOC_CPU 'C' +#define LOC_DISKETTE 'D' +#define LOC_ETHERNET 'E' +#define LOC_FAN 'F' +#define LOC_GRAPHICS 'G' +/* reserved / not used 'H' */ +#define LOC_IO_ADAPTER 'I' +/* reserved / not used 'J' */ +#define LOC_KEYBOARD 'K' +#define LOC_LCD 'L' +#define LOC_MEMORY 'M' +#define LOC_NV_MEMORY 'N' +#define LOC_MOUSE 'O' +#define LOC_PLANAR 'P' +#define LOC_OTHER_IO 'Q' +#define LOC_PARALLEL 'R' +#define LOC_SERIAL 'S' +#define LOC_DEAD_RING 'T' +#define LOC_RACKMOUNTED 'U' /* for _u_nit is rack mounted */ +#define LOC_VOLTAGE 'V' +#define LOC_SWITCH_ADAPTER 'W' +#define LOC_OTHER 'X' +#define LOC_FIRMWARE 'Y' +#define LOC_SCSI 'Z' + +/* Tokens for indicators */ +#define TONE_FREQUENCY 0x0001 /* 0 - 1000 (HZ)*/ +#define TONE_VOLUME 0x0002 /* 0 - 100 (%) */ +#define SYSTEM_POWER_STATE 0x0003 +#define WARNING_LIGHT 0x0004 +#define DISK_ACTIVITY_LIGHT 0x0005 +#define HEX_DISPLAY_UNIT 0x0006 +#define BATTERY_WARNING_TIME 0x0007 +#define CONDITION_CYCLE_REQUEST 0x0008 +#define SURVEILLANCE_INDICATOR 0x2328 /* 9000 */ +#define DR_ACTION 0x2329 /* 9001 */ +#define DR_INDICATOR 0x232a /* 9002 */ +/* 9003 - 9004: Vendor specific */ +#define GLOBAL_INTERRUPT_QUEUE 0x232d /* 9005 */ +/* 9006 - 9999: Vendor specific */ + +/* other */ +#define MAX_SENSORS 17 /* I only know of 17 sensors */ +#define MAX_LINELENGTH 256 +#define SENSOR_PREFIX "ibm,sensor-" +#define cel_to_fahr(x) ((x*9/5)+32) + + +/* Globals */ +static struct proc_dir_entry *proc_rtas; +static struct rtas_sensors sensors; +static struct device_node *rtas_node; +static unsigned long power_on_time = 0; /* Save the time the user set */ +static char progress_led[MAX_LINELENGTH]; + +static unsigned long rtas_tone_frequency = 1000; +static unsigned long rtas_tone_volume = 0; + +/* ****************STRUCTS******************************************* */ +struct individual_sensor { + unsigned int token; + unsigned int quant; +}; + +struct rtas_sensors { + struct individual_sensor sensor[MAX_SENSORS]; + unsigned int quant; +}; + +/* ****************************************************************** */ +/* Declarations */ +static int ppc_rtas_sensor_read(char * buf, char ** start, off_t off, + int count, int *eof, void *data); +static ssize_t ppc_rtas_clock_read(struct file * file, char * buf, + size_t count, loff_t *ppos); +static ssize_t ppc_rtas_clock_write(struct file * file, const char * buf, + size_t count, loff_t *ppos); +static ssize_t ppc_rtas_progress_read(struct file * file, char * buf, + size_t count, loff_t *ppos); +static ssize_t ppc_rtas_progress_write(struct file * file, const char * buf, + size_t count, loff_t *ppos); +static ssize_t ppc_rtas_poweron_read(struct file * file, char * buf, + size_t count, loff_t *ppos); +static ssize_t ppc_rtas_poweron_write(struct file * file, const char * buf, + size_t count, loff_t *ppos); + +static ssize_t ppc_rtas_tone_freq_write(struct file * file, const char * buf, + size_t count, loff_t *ppos); +static ssize_t ppc_rtas_tone_freq_read(struct file * file, char * buf, + size_t count, loff_t *ppos); +static ssize_t ppc_rtas_tone_volume_write(struct file * file, const char * buf, + size_t count, loff_t *ppos); +static ssize_t ppc_rtas_tone_volume_read(struct file * file, char * buf, + size_t count, loff_t *ppos); + +struct file_operations ppc_rtas_poweron_operations = { + read: ppc_rtas_poweron_read, + write: ppc_rtas_poweron_write +}; +struct file_operations ppc_rtas_progress_operations = { + read: ppc_rtas_progress_read, + write: ppc_rtas_progress_write +}; + +struct file_operations ppc_rtas_clock_operations = { + read: ppc_rtas_clock_read, + write: ppc_rtas_clock_write +}; + +struct file_operations ppc_rtas_tone_freq_operations = { + read: ppc_rtas_tone_freq_read, + write: ppc_rtas_tone_freq_write +}; +struct file_operations ppc_rtas_tone_volume_operations = { + read: ppc_rtas_tone_volume_read, + write: ppc_rtas_tone_volume_write +}; + +int ppc_rtas_find_all_sensors (void); +int ppc_rtas_process_sensor(struct individual_sensor s, int state, + int error, char * buf); +char * ppc_rtas_process_error(int error); +int get_location_code(struct individual_sensor s, char * buf); +int check_location_string (char *c, char * buf); +int check_location (char *c, int idx, char * buf); + +/* ****************************************************************** */ +/* MAIN */ +/* ****************************************************************** */ +void proc_rtas_init(void) +{ + struct proc_dir_entry *entry; + + rtas_node = find_devices("rtas"); + if ((rtas_node == 0) || (_machine == _MACH_iSeries)) { + return; + } + + proc_rtas = proc_mkdir("rtas", 0); + if (proc_rtas == 0) + return; + + /* /proc/rtas entries */ + + entry = create_proc_entry("progress", S_IRUGO|S_IWUSR, proc_rtas); + if (entry) entry->proc_fops = &ppc_rtas_progress_operations; + + entry = create_proc_entry("clock", S_IRUGO|S_IWUSR, proc_rtas); + if (entry) entry->proc_fops = &ppc_rtas_clock_operations; + + entry = create_proc_entry("poweron", S_IWUSR|S_IRUGO, proc_rtas); + if (entry) entry->proc_fops = &ppc_rtas_poweron_operations; + + create_proc_read_entry("sensors", S_IRUGO, proc_rtas, + ppc_rtas_sensor_read, NULL); + + entry = create_proc_entry("frequency", S_IWUSR|S_IRUGO, proc_rtas); + if (entry) entry->proc_fops = &ppc_rtas_tone_freq_operations; + + entry = create_proc_entry("volume", S_IWUSR|S_IRUGO, proc_rtas); + if (entry) entry->proc_fops = &ppc_rtas_tone_volume_operations; +} + +/* ****************************************************************** */ +/* POWER-ON-TIME */ +/* ****************************************************************** */ +static ssize_t ppc_rtas_poweron_write(struct file * file, const char * buf, + size_t count, loff_t *ppos) +{ + struct rtc_time tm; + unsigned long nowtime; + char *dest; + int error; + + nowtime = simple_strtoul(buf, &dest, 10); + if (*dest != '\0' && *dest != '\n') { + printk("ppc_rtas_poweron_write: Invalid time\n"); + return count; + } + power_on_time = nowtime; /* save the time */ + + to_tm(nowtime, &tm); + + error = rtas_call(rtas_token("set-time-for-power-on"), 7, 1, NULL, + tm.tm_year, tm.tm_mon, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec, 0 /* nano */); + if (error != 0) + printk(KERN_WARNING "error: setting poweron time returned: %s\n", + ppc_rtas_process_error(error)); + return count; +} +/* ****************************************************************** */ +static ssize_t ppc_rtas_poweron_read(struct file * file, char * buf, + size_t count, loff_t *ppos) +{ + int n; + if (power_on_time == 0) + n = sprintf(buf, "Power on time not set\n"); + else + n = sprintf(buf, "%lu\n", power_on_time); + + if (*ppos >= strlen(buf)) + return 0; + if (n > strlen(buf) - *ppos) + n = strlen(buf) - *ppos; + if (n > count) + n = count; + *ppos += n; + return n; +} + +/* ****************************************************************** */ +/* PROGRESS */ +/* ****************************************************************** */ +static ssize_t ppc_rtas_progress_write(struct file * file, const char * buf, + size_t count, loff_t *ppos) +{ + unsigned long hex; + + strcpy(progress_led, buf); /* save the string */ + /* Lets see if the user passed hexdigits */ + hex = simple_strtoul(buf, NULL, 10); + + ppc_md.progress ((char *)buf, hex); + return count; + + /* clear the line */ /* ppc_md.progress(" ", 0xffff);*/ +} +/* ****************************************************************** */ +static ssize_t ppc_rtas_progress_read(struct file * file, char * buf, + size_t count, loff_t *ppos) +{ + int n = 0; + if (progress_led != NULL) + n = sprintf (buf, "%s\n", progress_led); + if (*ppos >= strlen(buf)) + return 0; + if (n > strlen(buf) - *ppos) + n = strlen(buf) - *ppos; + if (n > count) + n = count; + *ppos += n; + return n; +} + +/* ****************************************************************** */ +/* CLOCK */ +/* ****************************************************************** */ +static ssize_t ppc_rtas_clock_write(struct file * file, const char * buf, + size_t count, loff_t *ppos) +{ + struct rtc_time tm; + unsigned long nowtime; + char *dest; + int error; + + nowtime = simple_strtoul(buf, &dest, 10); + if (*dest != '\0' && *dest != '\n') { + printk("ppc_rtas_clock_write: Invalid time\n"); + return count; + } + + to_tm(nowtime, &tm); + error = rtas_call(rtas_token("set-time-of-day"), 7, 1, NULL, + tm.tm_year, tm.tm_mon, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec, 0); + if (error != 0) + printk(KERN_WARNING "error: setting the clock returned: %s\n", + ppc_rtas_process_error(error)); + return count; +} +/* ****************************************************************** */ +static ssize_t ppc_rtas_clock_read(struct file * file, char * buf, + size_t count, loff_t *ppos) +{ + unsigned int year, mon, day, hour, min, sec; + unsigned long *ret = kmalloc(4*8, GFP_KERNEL); + int n, error; + + error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret); + + year = ret[0]; mon = ret[1]; day = ret[2]; + hour = ret[3]; min = ret[4]; sec = ret[5]; + + if (error != 0){ + printk(KERN_WARNING "error: reading the clock returned: %s\n", + ppc_rtas_process_error(error)); + n = sprintf (buf, "0"); + } else { + n = sprintf (buf, "%lu\n", mktime(year, mon, day, hour, min, sec)); + } + kfree(ret); + + if (*ppos >= strlen(buf)) + return 0; + if (n > strlen(buf) - *ppos) + n = strlen(buf) - *ppos; + if (n > count) + n = count; + *ppos += n; + return n; +} + +/* ****************************************************************** */ +/* SENSOR STUFF */ +/* ****************************************************************** */ +static int ppc_rtas_sensor_read(char * buf, char ** start, off_t off, + int count, int *eof, void *data) +{ + int i,j,n; + unsigned long ret; + int state, error; + char *buffer; + int get_sensor_state = rtas_token("get-sensor-state"); + + if (count < 0) + return -EINVAL; + + /* May not be enough */ + buffer = kmalloc(MAX_LINELENGTH*MAX_SENSORS, GFP_KERNEL); + + if (!buffer) + return -ENOMEM; + + memset(buffer, 0, MAX_LINELENGTH*MAX_SENSORS); + + n = sprintf ( buffer , "RTAS (RunTime Abstraction Services) Sensor Information\n"); + n += sprintf ( buffer+n, "Sensor\t\tValue\t\tCondition\tLocation\n"); + n += sprintf ( buffer+n, "********************************************************\n"); + + if (ppc_rtas_find_all_sensors() != 0) { + n += sprintf ( buffer+n, "\nNo sensors are available\n"); + goto return_string; + } + + for (i=0; i= 0) { + error = rtas_call(get_sensor_state, 2, 2, &ret, + sensors.sensor[i].token, sensors.sensor[i].quant-j); + state = (int) ret; + n += ppc_rtas_process_sensor(sensors.sensor[i], state, error, buffer+n ); + n += sprintf (buffer+n, "\n"); + j--; + } /* while */ + } /* for */ + +return_string: + if (off >= strlen(buffer)) { + *eof = 1; + kfree(buffer); + return 0; + } + if (n > strlen(buffer) - off) + n = strlen(buffer) - off; + if (n > count) + n = count; + else + *eof = 1; + memcpy(buf, buffer + off, n); + *start = buf; + kfree(buffer); + return n; +} + +/* ****************************************************************** */ + +int ppc_rtas_find_all_sensors (void) +{ + unsigned long *utmp; + int len, i, j; + + utmp = (unsigned long *) get_property(rtas_node, "rtas-sensors", &len); + if (utmp == NULL) { + printk (KERN_ERR "error: could not get rtas-sensors\n"); + return 1; + } + + sensors.quant = len / 8; /* int + int */ + + for (i=0, j=0; j= llen) pos=0; + } + return n; +} +/* ****************************************************************** */ +/* INDICATORS - Tone Frequency */ +/* ****************************************************************** */ +static ssize_t ppc_rtas_tone_freq_write(struct file * file, const char * buf, + size_t count, loff_t *ppos) +{ + unsigned long freq; + char *dest; + int error; + freq = simple_strtoul(buf, &dest, 10); + if (*dest != '\0' && *dest != '\n') { + printk("ppc_rtas_tone_freq_write: Invalid tone freqency\n"); + return count; + } + if (freq < 0) freq = 0; + rtas_tone_frequency = freq; /* save it for later */ + error = rtas_call(rtas_token("set-indicator"), 3, 1, NULL, + TONE_FREQUENCY, 0, freq); + if (error != 0) + printk(KERN_WARNING "error: setting tone frequency returned: %s\n", + ppc_rtas_process_error(error)); + return count; +} +/* ****************************************************************** */ +static ssize_t ppc_rtas_tone_freq_read(struct file * file, char * buf, + size_t count, loff_t *ppos) +{ + int n; + n = sprintf(buf, "%lu\n", rtas_tone_frequency); + + if (*ppos >= strlen(buf)) + return 0; + if (n > strlen(buf) - *ppos) + n = strlen(buf) - *ppos; + if (n > count) + n = count; + *ppos += n; + return n; +} +/* ****************************************************************** */ +/* INDICATORS - Tone Volume */ +/* ****************************************************************** */ +static ssize_t ppc_rtas_tone_volume_write(struct file * file, const char * buf, + size_t count, loff_t *ppos) +{ + unsigned long volume; + char *dest; + int error; + volume = simple_strtoul(buf, &dest, 10); + if (*dest != '\0' && *dest != '\n') { + printk("ppc_rtas_tone_volume_write: Invalid tone volume\n"); + return count; + } + if (volume < 0) volume = 0; + if (volume > 100) volume = 100; + + rtas_tone_volume = volume; /* save it for later */ + error = rtas_call(rtas_token("set-indicator"), 3, 1, NULL, + TONE_VOLUME, 0, volume); + if (error != 0) + printk(KERN_WARNING "error: setting tone volume returned: %s\n", + ppc_rtas_process_error(error)); + return count; +} +/* ****************************************************************** */ +static ssize_t ppc_rtas_tone_volume_read(struct file * file, char * buf, + size_t count, loff_t *ppos) +{ + int n; + n = sprintf(buf, "%lu\n", rtas_tone_volume); + + if (*ppos >= strlen(buf)) + return 0; + if (n > strlen(buf) - *ppos) + n = strlen(buf) - *ppos; + if (n > count) + n = count; + *ppos += n; + return n; +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/rtas.c linuxppc64_2_4/arch/ppc64/kernel/rtas.c --- ../kernel.org/linux/arch/ppc64/kernel/rtas.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/rtas.c Wed Nov 7 13:05:40 2001 @@ -0,0 +1,208 @@ +/* + * + * Procedures for interfacing to the RTAS on CHRP machines. + * + * Peter Bergner, IBM March 2001. + * Copyright (C) 2001 IBM. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * prom_init() is called very early on, before the kernel text + * and data have been mapped to KERNELBASE. At this point the code + * is running at whatever address it has been loaded at, so + * references to extern and static variables must be relocated + * explicitly. The procedure reloc_offset() returns the address + * we're currently running at minus the address we were linked at. + * (Note that strings count as static variables.) + * + * Because OF may have mapped I/O devices into the area starting at + * KERNELBASE, particularly on CHRP machines, we can't safely call + * OF once the kernel has been mapped to KERNELBASE. Therefore all + * OF calls should be done within prom_init(), and prom_init() + * and all routines called within it must be careful to relocate + * references as necessary. + * + * Note that the bss is cleared *after* prom_init runs, so we have + * to make sure that any static or extern variables it accesses + * are put in the data segment. + */ + +struct rtas_t rtas = { + lock: SPIN_LOCK_UNLOCKED +}; + +extern unsigned long reloc_offset(void); + +void +phys_call_rtas(int token, int nargs, int nret, ...) +{ + va_list list; + unsigned long offset = reloc_offset(); + struct rtas_args *rtas = PTRRELOC(&(get_paca()->xRtas)); + int i; + + rtas->token = token; + rtas->nargs = nargs; + rtas->nret = nret; + rtas->rets = (rtas_arg_t *)PTRRELOC(&(rtas->args[nargs])); + + va_start(list, nret); + for (i = 0; i < nargs; i++) + rtas->args[i] = (rtas_arg_t)LONG_LSW(va_arg(list, ulong)); + va_end(list); + + enter_rtas(rtas); +} + +void +phys_call_rtas_display_status(char c) +{ + unsigned long offset = reloc_offset(); + struct rtas_args *rtas = PTRRELOC(&(get_paca()->xRtas)); + + rtas->token = 10; + rtas->nargs = 1; + rtas->nret = 1; + rtas->rets = (rtas_arg_t *)PTRRELOC(&(rtas->args[1])); + rtas->args[0] = (int)c; + + enter_rtas(rtas); +} + +void +call_rtas_display_status(char c) +{ + struct rtas_args *rtas = &(get_paca()->xRtas); + + rtas->token = 10; + rtas->nargs = 1; + rtas->nret = 1; + rtas->rets = (rtas_arg_t *)&(rtas->args[1]); + rtas->args[0] = (int)c; + + enter_rtas((void *)__pa((unsigned long)rtas)); +} + +#if 0 +#define DEBUG_RTAS +#endif +__openfirmware +int +rtas_token(const char *service) +{ + int *tokp; + if (rtas.dev == NULL) { +#ifdef DEBUG_RTAS + udbg_printf("\tNo rtas device in device-tree...\n"); +#endif /* DEBUG_RTAS */ + return RTAS_UNKNOWN_SERVICE; + } + tokp = (int *) get_property(rtas.dev, service, NULL); + return tokp ? *tokp : RTAS_UNKNOWN_SERVICE; +} + +__openfirmware +long +rtas_call(int token, int nargs, int nret, + unsigned long *outputs, ...) +{ + va_list list; + int i; + unsigned long s; + struct rtas_args *rtas_args = &(get_paca()->xRtas); + +#ifdef DEBUG_RTAS + udbg_printf("Entering rtas_call\n"); + udbg_printf("\ttoken = 0x%x\n", token); + udbg_printf("\tnargs = %d\n", nargs); + udbg_printf("\tnret = %d\n", nret); + udbg_printf("\t&outputs = 0x%lx\n", outputs); +#endif /* DEBUG_RTAS */ + if (token == RTAS_UNKNOWN_SERVICE) + return -1; + + rtas_args->token = token; + rtas_args->nargs = nargs; + rtas_args->nret = nret; + rtas_args->rets = (rtas_arg_t *)&(rtas_args->args[nargs]); + va_start(list, outputs); + for (i = 0; i < nargs; ++i) { + rtas_args->args[i] = (rtas_arg_t)LONG_LSW(va_arg(list, ulong)); +#ifdef DEBUG_RTAS + udbg_printf("\tnarg[%d] = 0x%lx\n", i, rtas_args->args[i]); +#endif /* DEBUG_RTAS */ + } + va_end(list); + + for (i = 0; i < nret; ++i) + rtas_args->rets[i] = 0; + +#if 0 /* Gotta do something different here, use global lock for now... */ + spin_lock_irqsave(&rtas_args->lock, s); +#else + spin_lock_irqsave(&rtas.lock, s); +#endif +#ifdef DEBUG_RTAS + udbg_printf("\tentering rtas with 0x%lx\n", (void *)__pa((unsigned long)rtas_args)); +#endif /* DEBUG_RTAS */ + enter_rtas((void *)__pa((unsigned long)rtas_args)); +#ifdef DEBUG_RTAS + udbg_printf("\treturned from rtas ...\n"); +#endif /* DEBUG_RTAS */ +#if 0 /* Gotta do something different here, use global lock for now... */ + spin_unlock_irqrestore(&rtas_args->lock, s); +#else + spin_unlock_irqrestore(&rtas.lock, s); +#endif +#ifdef DEBUG_RTAS + for(i=0; i < nret ;i++) + udbg_printf("\tnret[%d] = 0x%lx\n", i, (ulong)rtas_args->rets[i]); +#endif /* DEBUG_RTAS */ + + if (nret > 1 && outputs != NULL) + for (i = 0; i < nret-1; ++i) + outputs[i] = rtas_args->rets[i+1]; + return (ulong)((nret > 0) ? rtas_args->rets[0] : 0); +} + +void __chrp +rtas_restart(char *cmd) +{ + printk("RTAS system-reboot returned %ld\n", + rtas_call(rtas_token("system-reboot"), 0, 1, NULL)); + for (;;); +} + +void __chrp +rtas_power_off(void) +{ + /* allow power on only with power button press */ + printk("RTAS power-off returned %ld\n", + rtas_call(rtas_token("power-off"), 2, 1, NULL,0xffffffff,0xffffffff)); + for (;;); +} + +void __chrp +rtas_halt(void) +{ + rtas_power_off(); +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/rtasd.c linuxppc64_2_4/arch/ppc64/kernel/rtasd.c --- ../kernel.org/linux/arch/ppc64/kernel/rtasd.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/rtasd.c Wed Nov 7 13:05:40 2001 @@ -0,0 +1,310 @@ +/* + * Copyright (C) 2001 Anton Blanchard , IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Communication to userspace based on kernel/printk.c + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#if 0 +#define DEBUG(A...) printk(KERN_ERR A) +#else +#define DEBUG(A...) +#endif + +static spinlock_t rtas_log_lock = SPIN_LOCK_UNLOCKED; + +DECLARE_WAIT_QUEUE_HEAD(rtas_log_wait); + +#define LOG_NUMBER 64 /* must be a power of two */ +#define LOG_NUMBER_MASK (LOG_NUMBER-1) + +static char *rtas_log_buf; +static unsigned long rtas_log_start; +static unsigned long rtas_log_size; + +static int surveillance_requested; +static unsigned int rtas_event_scan_rate; +static unsigned int rtas_error_log_max; + +#define EVENT_SCAN_ALL_EVENTS 0xf0000000 +#define SURVEILLANCE_TOKEN 9000 +#define SURVEILLANCE_TIMEOUT 1 +#define SURVEILLANCE_SCANRATE 1 + +/* + * Since we use 32 bit RTAS, the physical address of this must be below + * 4G or else bad things happen. Allocate this in the kernel data and + * make it big enough. + */ +#define RTAS_ERROR_LOG_MAX 1024 +static unsigned char logdata[RTAS_ERROR_LOG_MAX]; + +static int rtas_log_open(struct inode * inode, struct file * file) +{ + return 0; +} + +static int rtas_log_release(struct inode * inode, struct file * file) +{ + return 0; +} + +static ssize_t rtas_log_read(struct file * file, char * buf, + size_t count, loff_t *ppos) +{ + int error; + char *tmp; + unsigned long offset; + + if (!buf || count < rtas_error_log_max) + return -EINVAL; + + count = rtas_error_log_max; + + error = verify_area(VERIFY_WRITE, buf, count); + if (error) + return -EINVAL; + + tmp = kmalloc(rtas_error_log_max, GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + error = wait_event_interruptible(rtas_log_wait, rtas_log_size); + if (error) + goto out; + + spin_lock(&rtas_log_lock); + offset = rtas_error_log_max * (rtas_log_start & LOG_NUMBER_MASK); + memcpy(tmp, &rtas_log_buf[offset], count); + rtas_log_start += 1; + rtas_log_size -= 1; + spin_unlock(&rtas_log_lock); + + copy_to_user(buf, tmp, count); + error = count; + +out: + kfree(tmp); + return error; +} + +static unsigned int rtas_log_poll(struct file *file, poll_table * wait) +{ + poll_wait(file, &rtas_log_wait, wait); + if (rtas_log_size) + return POLLIN | POLLRDNORM; + return 0; +} + +struct file_operations proc_rtas_log_operations = { + read: rtas_log_read, + poll: rtas_log_poll, + open: rtas_log_open, + release: rtas_log_release, +}; + +static void log_rtas(char *buf) +{ + unsigned long offset; + + DEBUG("logging rtas event\n"); + + spin_lock(&rtas_log_lock); + + offset = rtas_error_log_max * + ((rtas_log_start+rtas_log_size) & LOG_NUMBER_MASK); + + memcpy(&rtas_log_buf[offset], buf, rtas_error_log_max); + + if (rtas_log_size < LOG_NUMBER) + rtas_log_size += 1; + else + rtas_log_start += 1; + + spin_unlock(&rtas_log_lock); + wake_up_interruptible(&rtas_log_wait); +} + +static int enable_surveillance(void) +{ + int error; + + error = rtas_call(rtas_token("set-indicator"), 3, 1, NULL, SURVEILLANCE_TOKEN, + 0, SURVEILLANCE_TIMEOUT); + + if (error) { + printk(KERN_ERR "rtasd: could not enable surveillance\n"); + return -1; + } + + rtas_event_scan_rate = SURVEILLANCE_SCANRATE; + + return 0; +} + +static int get_eventscan_parms(void) +{ + struct device_node *node; + int *ip; + + node = find_path_device("/rtas"); + + ip = (int *)get_property(node, "rtas-event-scan-rate", NULL); + if (ip == NULL) { + printk(KERN_ERR "rtasd: no rtas-event-scan-rate\n"); + return -1; + } + rtas_event_scan_rate = *ip; + DEBUG("rtas-event-scan-rate %d\n", rtas_event_scan_rate); + + ip = (int *)get_property(node, "rtas-error-log-max", NULL); + if (ip == NULL) { + printk(KERN_ERR "rtasd: no rtas-error-log-max\n"); + return -1; + } + rtas_error_log_max = *ip; + DEBUG("rtas-error-log-max %d\n", rtas_error_log_max); + + if (rtas_error_log_max > RTAS_ERROR_LOG_MAX) { + printk(KERN_ERR "rtasd: truncated error log from %d to %d bytes\n", rtas_error_log_max, RTAS_ERROR_LOG_MAX); + rtas_error_log_max = RTAS_ERROR_LOG_MAX; + } + + return 0; +} + +extern long sys_sched_get_priority_max(int policy); + +static int rtasd(void *unused) +{ + int cpu = 0; + int error; + int first_pass = 1; + int event_scan = rtas_token("event-scan"); + + if (event_scan == RTAS_UNKNOWN_SERVICE || get_eventscan_parms() == -1) + goto error; + + rtas_log_buf = vmalloc(rtas_error_log_max*LOG_NUMBER); + if (!rtas_log_buf) { + printk(KERN_ERR "rtasd: no memory\n"); + goto error; + } + + DEBUG("will sleep for %d jiffies\n", (HZ*60/rtas_event_scan_rate) / 2); + + daemonize(); + sigfillset(¤t->blocked); + sprintf(current->comm, "rtasd"); + + /* Rusty unreal time task */ + current->policy = SCHED_FIFO; + current->nice = sys_sched_get_priority_max(SCHED_FIFO) + 1; + + cpu = 0; + current->cpus_allowed = 1UL << cpu_logical_map(cpu); + schedule(); + + while(1) { + do { + memset(logdata, 0, rtas_error_log_max); + error = rtas_call(event_scan, 4, 1, NULL, + EVENT_SCAN_ALL_EVENTS, 0, + __pa(logdata), rtas_error_log_max); + if (error == -1) { + printk(KERN_ERR "event-scan failed\n"); + break; + } + + if (error == 0) + log_rtas(logdata); + + } while(error == 0); + + DEBUG("watchdog scheduled on cpu %d\n", smp_processor_id()); + + cpu++; + if (cpu >= smp_num_cpus) { + + if (first_pass && surveillance_requested) { + DEBUG("enabling surveillance\n"); + if (enable_surveillance()) + goto error_vfree; + DEBUG("surveillance enabled\n"); + } + + first_pass = 0; + cpu = 0; + } + + current->cpus_allowed = 1UL << cpu_logical_map(cpu); + + /* Check all cpus for pending events before sleeping*/ + if (first_pass) { + schedule(); + } else { + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout((HZ*60/rtas_event_scan_rate) / 2); + } + } + +error_vfree: + vfree(rtas_log_buf); +error: + /* Should delete proc entries */ + return -EINVAL; +} + +static void __init rtas_init(void) +{ + struct proc_dir_entry *rtas_dir, *entry; + + rtas_dir = proc_mkdir("rtas", 0); + if (!rtas_dir) { + printk(KERN_ERR "Failed to create rtas proc directory\n"); + } else { + entry = create_proc_entry("error_log", S_IRUSR, rtas_dir); + if (entry) + entry->proc_fops = &proc_rtas_log_operations; + else + printk(KERN_ERR "Failed to create rtas/error_log proc entry\n"); + } + + if (kernel_thread(rtasd, 0, CLONE_FS) < 0) + printk(KERN_ERR "Failed to start RTAS daemon\n"); + + printk(KERN_ERR "RTAS daemon started\n"); +} + +static int __init surveillance_setup(char *str) +{ + int i; + + if (get_option(&str,&i)) { + if (i == 1) + surveillance_requested = 1; + } + + return 1; +} + +__initcall(rtas_init); +__setup("surveillance=", surveillance_setup); diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/rtc.c linuxppc64_2_4/arch/ppc64/kernel/rtc.c --- ../kernel.org/linux/arch/ppc64/kernel/rtc.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/rtc.c Wed Nov 7 13:05:40 2001 @@ -0,0 +1,381 @@ +/* + * Real Time Clock interface for PPC64. + * + * Based on rtc.c by Paul Gortmaker + * + * This driver allows use of the real time clock + * from user space. It exports the /dev/rtc + * interface supporting various ioctl() and also the + * /proc/driver/rtc pseudo-file for status information. + * + * Interface does not support RTC interrupts nor an alarm. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * 1.0 Mike Corrigan: IBM iSeries rtc support + * 1.1 Dave Engebretsen: IBM pSeries rtc support + */ + +#define RTC_VERSION "1.1" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +extern int piranha_simulator; + +/* + * We sponge a minor off of the misc major. No need slurping + * up another valuable major dev number for this. If you add + * an ioctl, make sure you don't conflict with SPARC's RTC + * ioctls. + */ + +static loff_t rtc_llseek(struct file *file, loff_t offset, int origin); + +static ssize_t rtc_read(struct file *file, char *buf, + size_t count, loff_t *ppos); + +static int rtc_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg); + +static int rtc_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data); + +/* + * If this driver ever becomes modularised, it will be really nice + * to make the epoch retain its value across module reload... + */ + +static unsigned long epoch = 1900; /* year corresponding to 0x00 */ + +static const unsigned char days_in_mo[] = +{0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; + +/* + * Now all the various file operations that we export. + */ + +static loff_t rtc_llseek(struct file *file, loff_t offset, int origin) +{ + return -ESPIPE; +} + +static ssize_t rtc_read(struct file *file, char *buf, + size_t count, loff_t *ppos) +{ + return -EIO; +} + +static int rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct rtc_time wtime; + + switch (cmd) { + case RTC_RD_TIME: /* Read the time/date from RTC */ + { + ppc_md.get_rtc_time(&wtime); + break; + } + case RTC_SET_TIME: /* Set the RTC */ + { + struct rtc_time rtc_tm; + unsigned char mon, day, hrs, min, sec, leap_yr; + unsigned int yrs; + + if (!capable(CAP_SYS_TIME)) + return -EACCES; + + if (copy_from_user(&rtc_tm, (struct rtc_time*)arg, + sizeof(struct rtc_time))) + return -EFAULT; + + yrs = rtc_tm.tm_year; + mon = rtc_tm.tm_mon + 1; /* tm_mon starts at zero */ + day = rtc_tm.tm_mday; + hrs = rtc_tm.tm_hour; + min = rtc_tm.tm_min; + sec = rtc_tm.tm_sec; + + if (yrs < 70) + return -EINVAL; + + leap_yr = ((!(yrs % 4) && (yrs % 100)) || !(yrs % 400)); + + if ((mon > 12) || (day == 0)) + return -EINVAL; + + if (day > (days_in_mo[mon] + ((mon == 2) && leap_yr))) + return -EINVAL; + + if ((hrs >= 24) || (min >= 60) || (sec >= 60)) + return -EINVAL; + + if ( yrs > 169 ) + return -EINVAL; + + ppc_md.set_rtc_time(&rtc_tm); + + return 0; + } + case RTC_EPOCH_READ: /* Read the epoch. */ + { + return put_user (epoch, (unsigned long *)arg); + } + case RTC_EPOCH_SET: /* Set the epoch. */ + { + /* + * There were no RTC clocks before 1900. + */ + if (arg < 1900) + return -EINVAL; + + if (!capable(CAP_SYS_TIME)) + return -EACCES; + + epoch = arg; + return 0; + } + default: + return -EINVAL; + } + return copy_to_user((void *)arg, &wtime, sizeof wtime) ? -EFAULT : 0; +} + +static int rtc_open(struct inode *inode, struct file *file) +{ + return 0; +} + +static int rtc_release(struct inode *inode, struct file *file) +{ + return 0; +} + +/* + * The various file operations we support. + */ +static struct file_operations rtc_fops = { + owner: THIS_MODULE, + llseek: rtc_llseek, + read: rtc_read, + ioctl: rtc_ioctl, + open: rtc_open, + release: rtc_release, +}; + +static struct miscdevice rtc_dev= +{ + RTC_MINOR, + "rtc", + &rtc_fops +}; + +static int __init rtc_init(void) +{ + misc_register(&rtc_dev); + create_proc_read_entry ("driver/rtc", 0, 0, rtc_read_proc, NULL); + + printk(KERN_INFO "i/pSeries Real Time Clock Driver v" RTC_VERSION "\n"); + + return 0; +} + +static void __exit rtc_exit (void) +{ + remove_proc_entry ("driver/rtc", NULL); + misc_deregister(&rtc_dev); +} + +module_init(rtc_init); +module_exit(rtc_exit); +EXPORT_NO_SYMBOLS; + +/* + * Info exported via "/proc/driver/rtc". + */ + +static int rtc_proc_output (char *buf) +{ + + char *p; + struct rtc_time tm; + + p = buf; + + ppc_md.get_rtc_time(&tm); + + /* + * There is no way to tell if the luser has the RTC set for local + * time or for Universal Standard Time (GMT). Probably local though. + */ + p += sprintf(p, + "rtc_time\t: %02d:%02d:%02d\n" + "rtc_date\t: %04d-%02d-%02d\n" + "rtc_epoch\t: %04lu\n", + tm.tm_hour, tm.tm_min, tm.tm_sec, + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, epoch); + + p += sprintf(p, + "DST_enable\t: no\n" + "BCD\t\t: yes\n" + "24hr\t\t: yes\n" ); + + return p - buf; +} + +static int rtc_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len = rtc_proc_output (page); + if (len <= off+count) *eof = 1; + *start = page + off; + len -= off; + if (len>count) len = count; + if (len<0) len = 0; + return len; +} + +/* + * Get the RTC from the virtual service processor + * This requires flowing LpEvents to the primary partition + */ +void iSeries_get_rtc_time(struct rtc_time *rtc_tm) +{ + if (piranha_simulator) + return; + + mf_getRtc(rtc_tm); + rtc_tm->tm_mon--; +} + + +void pSeries_get_rtc_time(struct rtc_time *rtc_tm) +{ + unsigned long ret[8]; + int error; + int count; + + /* + * error -2 is clock busy, we keep retrying a few times to see + * if it will come good -- paulus + */ + count = 0; + do { + error = rtas_call(rtas_token("get-time-of-day"), 0, 8, (void *)&ret); + } while (error == -2 && ++count < 1000); + + if (error != 0) { + printk(KERN_WARNING "error: reading the clock failed (%d)\n", + error); + return; + } + + rtc_tm->tm_sec = ret[5]; + rtc_tm->tm_min = ret[4]; + rtc_tm->tm_hour = ret[3]; + rtc_tm->tm_mday = ret[2]; + rtc_tm->tm_mon = ret[1] - 1; + rtc_tm->tm_year = ret[0] - 1900; +} + +int pSeries_set_rtc_time(struct rtc_time *tm) +{ + int error; + int count; + + /* + * error -2 is clock busy, we keep retrying a few times to see + * if it will come good -- paulus + */ + count = 0; + do { + error = rtas_call(rtas_token("set-time-of-day"), 7, 1, NULL, + tm->tm_year + 1900, tm->tm_mon + 1, + tm->tm_mday, tm->tm_hour, tm->tm_min, + tm->tm_sec, 0); + } while (error == -2 && ++count < 1000); + + if (error != 0) + printk(KERN_WARNING "error: setting the clock failed (%d)\n", + error); + + return 0; +} + +/* + * Set the RTC in the virtual service processor + * This requires flowing LpEvents to the primary partition + */ +int iSeries_set_rtc_time(struct rtc_time *tm) +{ + mf_setRtc(tm); + return 0; +} + +void iSeries_get_boot_time(struct rtc_time *tm) +{ + unsigned long time; + static unsigned long lastsec = 1; + + u32 dataWord1 = *((u32 *)(&xSpCommArea.xBcdTimeAtIplStart)); + u32 dataWord2 = *(((u32 *)&(xSpCommArea.xBcdTimeAtIplStart)) + 1); + int year = 1970; + int year1 = ( dataWord1 >> 24 ) & 0x000000FF; + int year2 = ( dataWord1 >> 16 ) & 0x000000FF; + int sec = ( dataWord1 >> 8 ) & 0x000000FF; + int min = dataWord1 & 0x000000FF; + int hour = ( dataWord2 >> 24 ) & 0x000000FF; + int day = ( dataWord2 >> 8 ) & 0x000000FF; + int mon = dataWord2 & 0x000000FF; + + if ( piranha_simulator ) + return; + + BCD_TO_BIN(sec); + BCD_TO_BIN(min); + BCD_TO_BIN(hour); + BCD_TO_BIN(day); + BCD_TO_BIN(mon); + BCD_TO_BIN(year1); + BCD_TO_BIN(year2); + year = year1 * 100 + year2; + + time = mktime(year, mon, day, hour, min, sec); + time += ( jiffies / HZ ); + + /* Now THIS is a nasty hack! + * It ensures that the first two calls get different answers. + * That way the loop in init_time (time.c) will not think + * the clock is stuck. + */ + if ( lastsec ) { + time -= lastsec; + --lastsec; + } + + to_tm(time, tm); +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/semaphore.c linuxppc64_2_4/arch/ppc64/kernel/semaphore.c --- ../kernel.org/linux/arch/ppc64/kernel/semaphore.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/semaphore.c Mon Jun 18 13:47:18 2001 @@ -0,0 +1,130 @@ +/* + * + * + * PowerPC-specific semaphore code. + * + * Copyright (C) 1999 Cort Dougan + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * April 2001 - Reworked by Paul Mackerras + * to eliminate the SMP races in the old version between the updates + * of `count' and `waking'. Now we use negative `count' values to + * indicate that some process(es) are waiting for the semaphore. + */ + +#include +#include +#include + +/* + * Atomically update sem->count. + * This does the equivalent of the following: + * + * old_count = sem->count; + * tmp = MAX(old_count, 0) + incr; + * sem->count = tmp; + * return old_count; + */ +static inline int __sem_update_count(struct semaphore *sem, int incr) +{ + int old_count, tmp; + + __asm__ __volatile__("\n" +"1: lwarx %0,0,%3\n" +" srawi %1,%0,31\n" +" andc %1,%0,%1\n" +" add %1,%1,%4\n" +" stwcx. %1,0,%3\n" +" bne 1b" + : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count) + : "r" (&sem->count), "r" (incr), "m" (sem->count) + : "cc"); + + return old_count; +} + +void __up(struct semaphore *sem) +{ + /* + * Note that we incremented count in up() before we came here, + * but that was ineffective since the result was <= 0, and + * any negative value of count is equivalent to 0. + * This ends up setting count to 1, unless count is now > 0 + * (i.e. because some other cpu has called up() in the meantime), + * in which case we just increment count. + */ + __sem_update_count(sem, 1); + wake_up(&sem->wait); +} + +/* + * Note that when we come in to __down or __down_interruptible, + * we have already decremented count, but that decrement was + * ineffective since the result was < 0, and any negative value + * of count is equivalent to 0. + * Thus it is only when we decrement count from some value > 0 + * that we have actually got the semaphore. + */ +void __down(struct semaphore *sem) +{ + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + + tsk->state = TASK_UNINTERRUPTIBLE; + add_wait_queue_exclusive(&sem->wait, &wait); + smp_wmb(); + + /* + * Try to get the semaphore. If the count is > 0, then we've + * got the semaphore; we decrement count and exit the loop. + * If the count is 0 or negative, we set it to -1, indicating + * that we are asleep, and then sleep. + */ + while (__sem_update_count(sem, -1) <= 0) { + schedule(); + tsk->state = TASK_UNINTERRUPTIBLE; + } + remove_wait_queue(&sem->wait, &wait); + tsk->state = TASK_RUNNING; + + /* + * If there are any more sleepers, wake one of them up so + * that it can either get the semaphore, or set count to -1 + * indicating that there are still processes sleeping. + */ + wake_up(&sem->wait); +} + +int __down_interruptible(struct semaphore * sem) +{ + int retval = 0; + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + + tsk->state = TASK_INTERRUPTIBLE; + add_wait_queue_exclusive(&sem->wait, &wait); + smp_wmb(); + + while (__sem_update_count(sem, -1) <= 0) { + if (signal_pending(current)) { + /* + * A signal is pending - give up trying. + * Set sem->count to 0 if it is negative, + * since we are no longer sleeping. + */ + __sem_update_count(sem, 0); + retval = -EINTR; + break; + } + schedule(); + tsk->state = TASK_INTERRUPTIBLE; + } + tsk->state = TASK_RUNNING; + remove_wait_queue(&sem->wait, &wait); + wake_up(&sem->wait); + return retval; +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/setup.c linuxppc64_2_4/arch/ppc64/kernel/setup.c --- ../kernel.org/linux/arch/ppc64/kernel/setup.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/setup.c Thu Nov 29 23:28:49 2001 @@ -0,0 +1,633 @@ +/* + * + * Common boot and setup code. + * + * Copyright (C) 2001 PPC64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern unsigned long klimit; +/* extern void *stab; */ +extern HTAB htab_data; +extern unsigned long loops_per_jiffy; + +extern unsigned long embedded_sysmap_start; +extern unsigned long embedded_sysmap_end; + +int have_of = 1; + +extern void chrp_init(unsigned long r3, + unsigned long r4, + unsigned long r5, + unsigned long r6, + unsigned long r7); + +extern void chrp_init_map_io_space( void ); +extern void iSeries_init( void ); +extern void iSeries_init_early( void ); +extern void pSeries_init_early( void ); +extern void mm_init_ppc64( void ); + +unsigned long decr_overclock = 1; +unsigned long decr_overclock_proc0 = 1; +unsigned long decr_overclock_set = 0; +unsigned long decr_overclock_proc0_set = 0; + +#ifdef CONFIG_XMON +extern void xmon_map_scc(void); +#endif + +char saved_command_line[256]; +unsigned char aux_device_present; +struct ide_machdep_calls ppc_ide_md; + +void parse_cmd_line(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7); +int parse_bootinfo(void); + +unsigned long DMA_MODE_READ, DMA_MODE_WRITE; +int _machine = _MACH_unknown; + +#ifdef CONFIG_MAGIC_SYSRQ +unsigned long SYSRQ_KEY; +#endif /* CONFIG_MAGIC_SYSRQ */ + +struct machdep_calls ppc_md; +struct Naca *naca; + +/* + * Perhaps we can put the pmac screen_info[] here + * on pmac as well so we don't need the ifdef's. + * Until we get multiple-console support in here + * that is. -- Cort + * Maybe tie it to serial consoles, since this is really what + * these processors use on existing boards. -- Dan + */ +struct screen_info screen_info = { + 0, 25, /* orig-x, orig-y */ + 0, /* unused */ + 0, /* orig-video-page */ + 0, /* orig-video-mode */ + 80, /* orig-video-cols */ + 0,0,0, /* ega_ax, ega_bx, ega_cx */ + 25, /* orig-video-lines */ + 1, /* orig-video-isVGA */ + 16 /* orig-video-points */ +}; + +/* + * These are used in binfmt_elf.c to put aux entries on the stack + * for each elf executable being started. + */ +int dcache_bsize; +int icache_bsize; +int ucache_bsize; + +/* + * Initialize the PPCDBG state. Called before relocation has been enabled. + */ +void ppcdbg_initialize(void) { + unsigned long offset = reloc_offset(); + struct Naca *_naca = RELOC(naca); + + _naca->debug_switch = PPC_DEBUG_DEFAULT; /* | PPCDBG_BUSWALK | PPCDBG_PHBINIT | PPCDBG_MM | PPCDBG_MMINIT | PPCDBG_TCEINIT | PPCDBG_TCE */; +} + +/* + * Initialize a set of PACA's, one for each processor. + * + * At this point, relocation is on, but we have not done any other + * setup of the mm subsystem. + */ +void paca_init(void) { +#if 0 + int processorCount = naca->processorCount, i; + struct Paca *paca[]; + + /* Put the array of paca's on a page boundary & allocate 1/2 page of */ + /* storage for each. */ + klimit += (PAGE_SIZE-1) & PAGE_MASK; + naca->xPaca = paca[0] = klimit; + klimit += ((PAGE_SIZE>>1) * processorCount); + + for(i=0; ixPacaIndex = i; + } +#endif +} + +/* + * Do some initial setup of the system. The paramters are those which + * were passed in from the bootloader. + */ +void setup_system(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7) +{ + /* This should be fixed properly in kernel/resource.c */ + iomem_resource.end = MEM_SPACE_LIMIT; + + /* pSeries systems are identified in prom.c via OF. */ + if ( itLpNaca.xLparInstalled == 1 ) + _machine = _MACH_iSeries; + switch (_machine) { + case _MACH_iSeries: + iSeries_init_early(); + break; + +#ifdef CONFIG_PPC_PSERIES + case _MACH_pSeries: + pSeries_init_early(); +#ifdef CONFIG_BLK_DEV_INITRD + initrd_start = initrd_end = 0; +#endif + parse_bootinfo(); + break; + + case _MACH_pSeriesLP: + pSeriesLP_init_early(); +#ifdef CONFIG_BLK_DEV_INITRD + initrd_start = initrd_end = 0; +#endif + parse_bootinfo(); + break; +#endif + } + + udbg_puts("\n-----------------------------------------------------\n"); + udbg_puts("Naca Info...\n\n"); + udbg_puts("naca = 0x"); + udbg_puthex((unsigned long)naca); + udbg_putc('\n'); + + udbg_puts("naca->processorCount = 0x"); + udbg_puthex(naca->processorCount); + udbg_putc('\n'); + + udbg_puts("naca->physicalMemorySize = 0x"); + udbg_puthex(naca->physicalMemorySize); + udbg_putc('\n'); + + udbg_puts("naca->dCacheL1LineSize = 0x"); + udbg_puthex(naca->dCacheL1LineSize); + udbg_putc('\n'); + + udbg_puts("naca->dCacheL1LogLineSize = 0x"); + udbg_puthex(naca->dCacheL1LogLineSize); + udbg_putc('\n'); + + udbg_puts("naca->dCacheL1LinesPerPage = 0x"); + udbg_puthex(naca->dCacheL1LinesPerPage); + udbg_putc('\n'); + + udbg_puts("naca->iCacheL1LineSize = 0x"); + udbg_puthex(naca->iCacheL1LineSize); + udbg_putc('\n'); + + udbg_puts("naca->iCacheL1LogLineSize = 0x"); + udbg_puthex(naca->iCacheL1LogLineSize); + udbg_putc('\n'); + + udbg_puts("naca->iCacheL1LinesPerPage = 0x"); + udbg_puthex(naca->iCacheL1LinesPerPage); + udbg_putc('\n'); + + udbg_puts("naca->pftSize = 0x"); + udbg_puthex(naca->pftSize); + udbg_putc('\n'); + + udbg_puts("naca->serialPortAddr = 0x"); + udbg_puthex(naca->serialPortAddr); + udbg_putc('\n'); + + udbg_puts("naca->interrupt_controller = 0x"); + udbg_puthex(naca->interrupt_controller); + udbg_putc('\n'); + + udbg_printf("\nHTAB Info ...\n\n"); + udbg_puts("htab_data.htab = 0x"); + udbg_puthex((unsigned long)htab_data.htab); + udbg_putc('\n'); + udbg_puts("htab_data.num_ptegs = 0x"); + udbg_puthex(htab_data.htab_num_ptegs); + udbg_putc('\n'); + + udbg_puts("\n-----------------------------------------------------\n"); + + + if ( _machine & _MACH_pSeries ) { + finish_device_tree(); + chrp_init(r3, r4, r5, r6, r7); + } + + mm_init_ppc64(); + + switch (_machine) { + case _MACH_iSeries: + iSeries_init(); + break; + default: + /* The following relies on the device tree being */ + /* fully configured. */ + parse_cmd_line(r3, r4, r5, r6, r7); + } +} + +void machine_restart(char *cmd) +{ + ppc_md.restart(cmd); +} + +void machine_power_off(void) +{ + ppc_md.power_off(); +} + +void machine_halt(void) +{ + ppc_md.halt(); +} + +int get_cpuinfo(char *buffer) +{ + unsigned long len = 0; + unsigned long bogosum = 0; + unsigned long i; + unsigned int pvr; + unsigned short maj, min; + +#ifdef CONFIG_SMP +#define CPU_PRESENT(x) (cpu_callin_map[(x)]) +#else +#define CPU_PRESENT(x) ((x)==0) +#define smp_num_cpus 1 +#endif + + for ( i = 0; i < smp_num_cpus ; i++ ) + { + if ( !CPU_PRESENT(i) ) + continue; + if ( i ) + len += sprintf(len+buffer,"\n"); + len += sprintf(len+buffer,"processor\t: %lu\n",i); + len += sprintf(len+buffer,"cpu\t\t: "); + + pvr = xPaca[i].pvr; + + switch (PVR_VER(pvr)) + { + case PV_PULSAR: + len += sprintf(len+buffer, "RS64-III (pulsar)\n"); + break; + case PV_POWER4: + len += sprintf(len+buffer, "POWER4 (gp)\n"); + break; + case PV_ICESTAR: + len += sprintf(len+buffer, "RS64-III (icestar)\n"); + break; + case PV_SSTAR: + len += sprintf(len+buffer, "RS64-IV (sstar)\n"); + break; + case PV_630: + len += sprintf(len+buffer, "POWER3 (630)\n"); + break; + case PV_630p: + len += sprintf(len+buffer, "POWER3 (630+)\n"); + break; + default: + len += sprintf(len+buffer, "Unknown (%08x)\n", pvr); + break; + } + + /* + * Assume here that all clock rates are the same in a + * smp system. -- Cort + */ + if ( _machine != _MACH_iSeries ) { + struct device_node *cpu_node; + int *fp; + + cpu_node = find_type_devices("cpu"); + if ( !cpu_node ) break; + { + int s; + for ( s = 0; (s < i) && cpu_node->next ; + s++, cpu_node = cpu_node->next ) + /* nothing */ ; + } + fp = (int *) get_property(cpu_node, "clock-frequency", NULL); + if ( !fp ) break; + len += sprintf(len+buffer, "clock\t\t: %dMHz\n", + *fp / 1000000); + } + + if (ppc_md.setup_residual != NULL) + { + len += ppc_md.setup_residual(buffer + len); + } + + maj = (pvr >> 8) & 0xFF; + min = pvr & 0xFF; + + len += sprintf(len+buffer, "revision\t: %hd.%hd\n", maj, min); + + len += sprintf(buffer+len, "bogomips\t: %lu.%02lu\n", + (loops_per_jiffy+2500)/(500000/HZ), + (loops_per_jiffy+2500)/(5000/HZ) % 100); + bogosum += loops_per_jiffy; + } + +#ifdef CONFIG_SMP + if ( i ) + len += sprintf(buffer+len, "\n"); + len += sprintf(buffer+len,"total bogomips\t: %lu.%02lu\n", + (bogosum+2500)/(500000/HZ), + (bogosum+2500)/(5000/HZ) % 100); +#endif /* CONFIG_SMP */ + + + if (ppc_md.get_cpuinfo != NULL) + { + len += ppc_md.get_cpuinfo(buffer+len); + } + + return len; +} + +/* + * Fetch the cmd_line from open firmware. */ +void parse_cmd_line(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7) +{ + struct device_node *chosen; + char *p; + +#ifdef CONFIG_BLK_DEV_INITRD + if ((initrd_start == 0) && r3 && r4 && r4 != 0xdeadbeef) { + initrd_start = (r3 >= KERNELBASE) ? r3 : (unsigned long)__va(r3); + initrd_end = initrd_start + r4; + ROOT_DEV = MKDEV(RAMDISK_MAJOR, 0); + initrd_below_start_ok = 1; + } +#endif + + cmd_line[0] = 0; + chosen = find_devices("chosen"); + if (chosen != NULL) { + p = get_property(chosen, "bootargs", NULL); + if (p != NULL) + strncpy(cmd_line, p, sizeof(cmd_line)); + } + cmd_line[sizeof(cmd_line) - 1] = 0; + + /* Look for mem= option on command line */ + if (strstr(cmd_line, "mem=")) { + char *p, *q; + unsigned long maxmem = 0; + extern unsigned long __max_memory; + + for (q = cmd_line; (p = strstr(q, "mem=")) != 0; ) { + q = p + 4; + if (p > cmd_line && p[-1] != ' ') + continue; + maxmem = simple_strtoul(q, &q, 0); + if (*q == 'k' || *q == 'K') { + maxmem <<= 10; + ++q; + } else if (*q == 'm' || *q == 'M') { + maxmem <<= 20; + ++q; + } + } + __max_memory = maxmem; + } + ppc_md.progress("id mach: done", 0x200); +} + + +char *bi_tag2str(unsigned long tag) +{ + switch (tag) { + case BI_FIRST: + return "BI_FIRST"; + case BI_LAST: + return "BI_LAST"; + case BI_CMD_LINE: + return "BI_CMD_LINE"; + case BI_BOOTLOADER_ID: + return "BI_BOOTLOADER_ID"; + case BI_INITRD: + return "BI_INITRD"; + case BI_SYSMAP: + return "BI_SYSMAP"; + case BI_MACHTYPE: + return "BI_MACHTYPE"; + default: + return "BI_UNKNOWN"; + } +} + +int parse_bootinfo(void) +{ + struct bi_record *rec; + extern char *sysmap; + extern unsigned long sysmap_size; + + rec = prom.bi_recs; + + if ( rec == NULL || rec->tag != BI_FIRST ) + return -1; + + for ( ; rec->tag != BI_LAST ; rec = bi_rec_next(rec) ) { + switch (rec->tag) { + case BI_CMD_LINE: + memcpy(cmd_line, (void *)rec->data, rec->size); + break; + case BI_SYSMAP: + sysmap = (char *)((rec->data[0] >= (KERNELBASE)) + ? rec->data[0] : (unsigned long)__va(rec->data[0])); + sysmap_size = rec->data[1]; + break; +#ifdef CONFIG_BLK_DEV_INITRD + case BI_INITRD: + initrd_start = (unsigned long)__va(rec->data[0]); + initrd_end = initrd_start + rec->data[1]; + ROOT_DEV = MKDEV(RAMDISK_MAJOR, 0); + initrd_below_start_ok = 1; + break; +#endif /* CONFIG_BLK_DEV_INITRD */ + } + } + + return 0; +} + +void __init ppc_init(void) +{ + /* clear the progress line */ + ppc_md.progress(" ", 0xffff); + + if (ppc_md.init != NULL) { + ppc_md.init(); + } +} + +/* + * Called into from start_kernel, after lock_kernel has been called. + * Initializes bootmem, which is unsed to manage page allocation until + * mem_init is called. + */ +void __init setup_arch(char **cmdline_p) +{ + extern int panic_timeout; + extern char _etext[], _edata[]; + extern void do_init_bootmem(void); + +#ifdef CONFIG_XMON + xmon_map_scc(); + if (strstr(cmd_line, "xmon")) + xmon(0); +#endif /* CONFIG_XMON */ +#ifdef CONFIG_KDB + xmon_map_scc(); /* in kdb/start.c --need to rename TAI */ +#endif + ppc_md.progress("setup_arch:enter", 0x3eab); + +#if defined(CONFIG_KGDB) + kgdb_map_scc(); + set_debug_traps(); + breakpoint(); +#endif + /* + * Set cache line size based on type of cpu as a default. + * Systems with OF can look in the properties on the cpu node(s) + * for a possibly more accurate value. + */ + dcache_bsize = naca->dCacheL1LineSize; + icache_bsize = naca->iCacheL1LineSize; + + /* reboot on panic */ + panic_timeout = 180; + + init_mm.start_code = PAGE_OFFSET; + init_mm.end_code = (unsigned long) _etext; + init_mm.end_data = (unsigned long) _edata; + init_mm.brk = (unsigned long) klimit; + + /* Save unparsed command line copy for /proc/cmdline */ + strcpy(saved_command_line, cmd_line); + *cmdline_p = cmd_line; + + /* set up the bootmem stuff with available memory */ + do_init_bootmem(); + ppc_md.progress("setup_arch:bootmem", 0x3eab); + + ppc_md.setup_arch(); + + paging_init(); + ppc_md.progress("setup_arch: exit", 0x3eab); +} + +void exception_trace(unsigned long trap) +{ + unsigned long x, srr0, srr1, reg20, reg1, reg21; + + asm("mflr %0" : "=r" (x) :); + asm("mfspr %0,0x1a" : "=r" (srr0) :); + asm("mfspr %0,0x1b" : "=r" (srr1) :); + asm("mr %0,1" : "=r" (reg1) :); + asm("mr %0,20" : "=r" (reg20) :); + asm("mr %0,21" : "=r" (reg21) :); + + udbg_puts("\n"); + udbg_puts("Took an exception : "); udbg_puthex(x); udbg_puts("\n"); + udbg_puts(" "); udbg_puthex(reg1); udbg_puts("\n"); + udbg_puts(" "); udbg_puthex(reg20); udbg_puts("\n"); + udbg_puts(" "); udbg_puthex(reg21); udbg_puts("\n"); + udbg_puts(" "); udbg_puthex(srr0); udbg_puts("\n"); + udbg_puts(" "); udbg_puthex(srr1); udbg_puts("\n"); +} + +int set_spread_lpevents( char * str ) +{ + /* The parameter is the number of processors to share in processing lp events */ + unsigned long i; + unsigned long val = simple_strtoul( str, NULL, 0 ); + if ( ( val > 0 ) && ( val <= maxPacas ) ) { + for ( i=1; idefault_decr = tb_ticks_per_jiffy / decr_overclock_proc0; + paca->next_jiffy_update_tb = get_tb() + tb_ticks_per_jiffy; +} + +int set_decr_overclock_proc0( char * str ) +{ + unsigned long val = simple_strtoul( str, NULL, 0 ); + if ( ( val >= 1 ) && ( val <= 48 ) ) { + decr_overclock_proc0_set = 1; + decr_overclock_proc0 = val; + printk("proc 0 decrementer overclock factor of %ld\n", val); + } + else + printk("invalid proc 0 decrementer overclock factor of %ld\n", val); + return 1; +} + +int set_decr_overclock( char * str ) +{ + unsigned long val = simple_strtoul( str, NULL, 0 ); + if ( ( val >= 1 ) && ( val <= 48 ) ) { + decr_overclock_set = 1; + decr_overclock = val; + printk("decrementer overclock factor of %ld\n", val); + } + else + printk("invalid decrementer overclock factor of %ld\n", val); + return 1; + +} + +__setup("spread_lpevents=", set_spread_lpevents ); +__setup("decr_overclock_proc0=", set_decr_overclock_proc0 ); +__setup("decr_overclock=", set_decr_overclock ); diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/signal.c linuxppc64_2_4/arch/ppc64/kernel/signal.c --- ../kernel.org/linux/arch/ppc64/kernel/signal.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/signal.c Sat Oct 20 06:53:25 2001 @@ -0,0 +1,822 @@ +/* + * linux/arch/ppc64/kernel/signal.c + * + * + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Derived from "arch/i386/kernel/signal.c" + * Copyright (C) 1991, 1992 Linus Torvalds + * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG_SIG 0 + +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + +#ifndef MIN +#define MIN(a,b) (((a) < (b)) ? (a) : (b)) +#endif + +#define GP_REGS_SIZE MIN(sizeof(elf_gregset_t), sizeof(struct pt_regs)) + +/* + * These are the flags in the MSR that the user is allowed to change + * by modifying the saved value of the MSR on the stack. SE and BE + * should not be in this list since gdb may want to change these. I.e, + * you should be able to step out of a signal handler to see what + * instruction executes next after the signal handler completes. + * Alternately, if you stepped into a signal handler, you should be + * able to continue 'til the next breakpoint from within the signal + * handler, even if the handler returns. + */ +#define MSR_USERCHANGE (MSR_FE0 | MSR_FE1) + +int do_signal(sigset_t *oldset, struct pt_regs *regs); +extern long sys_wait4(pid_t pid, unsigned int *stat_addr, + int options, /*unsigned long*/ struct rusage *ru); + +int copy_siginfo_to_user(siginfo_t *to, siginfo_t *from) +{ + if (!access_ok (VERIFY_WRITE, to, sizeof(siginfo_t))) + return -EFAULT; + if (from->si_code < 0) + return __copy_to_user(to, from, sizeof(siginfo_t)); + else { + int err; + + /* If you change siginfo_t structure, please be sure + this code is fixed accordingly. + It should never copy any pad contained in the structure + to avoid security leaks, but must copy the generic + 3 ints plus the relevant union member. */ + err = __put_user(from->si_signo, &to->si_signo); + err |= __put_user(from->si_errno, &to->si_errno); + err |= __put_user((short)from->si_code, &to->si_code); + /* First 32bits of unions are always present. */ + err |= __put_user(from->si_pid, &to->si_pid); + switch (from->si_code >> 16) { + case __SI_FAULT >> 16: + break; + case __SI_CHLD >> 16: + err |= __put_user(from->si_utime, &to->si_utime); + err |= __put_user(from->si_stime, &to->si_stime); + err |= __put_user(from->si_status, &to->si_status); + default: + err |= __put_user(from->si_uid, &to->si_uid); + break; + /* case __SI_RT: This is not generated by the kernel as of now. */ + } + return err; + } +} + +/* + * Atomically swap in the new signal mask, and wait for a signal. + */ +long sys_sigsuspend(old_sigset_t mask, int p2, int p3, int p4, int p6, int p7, + struct pt_regs *regs) +{ + sigset_t saveset; + + PPCDBG(PPCDBG_SYS64X, "sys_sigsuspend - running - pid=%ld current=%lx comm=%s \n", + current->pid, current, current->comm); + + + + mask &= _BLOCKABLE; + spin_lock_irq(¤t->sigmask_lock); + saveset = current->blocked; + siginitset(¤t->blocked, mask); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + regs->gpr[3] = -EINTR; + while (1) { + current->state = TASK_INTERRUPTIBLE; + schedule(); + if (do_signal(&saveset, regs)) + /* + * If a signal handler needs to be called, + * do_signal() has set R3 to the signal number (the + * first argument of the signal handler), so don't + * overwrite that with EINTR ! + * In the other cases, do_signal() doesn't touch + * R3, so it's still set to -EINTR (see above). + */ + return regs->gpr[3]; + } +} + +long sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize, int p3, int p4, int p6, + int p7, struct pt_regs *regs) +{ + sigset_t saveset, newset; + + + PPCDBG(PPCDBG_SYS64X, "sys_rt_sigsuspend - running - pid=%ld current=%lx comm=%s \n", + current->pid, current, current->comm); + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(sigset_t)) + return -EINVAL; + + if (copy_from_user(&newset, unewset, sizeof(newset))) + return -EFAULT; + sigdelsetmask(&newset, ~_BLOCKABLE); + + spin_lock_irq(¤t->sigmask_lock); + saveset = current->blocked; + current->blocked = newset; + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + regs->gpr[3] = -EINTR; + while (1) { + current->state = TASK_INTERRUPTIBLE; + schedule(); + if (do_signal(&saveset, regs)) + return regs->gpr[3]; + } +} + + + +asmlinkage long sys_sigaltstack(const stack_t *uss, stack_t *uoss) +{ + struct pt_regs *regs = (struct pt_regs *) &uss; + + PPCDBG(PPCDBG_SYS64X, "sys_sigaltstack - running - pid=%ld current=%lx comm=%s \n", + current->pid, current, current->comm); + + return do_sigaltstack(uss, uoss, regs->gpr[1]); +} + +long sys_sigaction(int sig, const struct old_sigaction *act, + struct old_sigaction *oact) +{ + struct k_sigaction new_ka, old_ka; + int ret; + + PPCDBG(PPCDBG_SYS64X, "sys_sigaction - running - pid=%ld current=%lx comm=%s \n", + current->pid, current, current->comm); + + + + if (act) { + old_sigset_t mask; + if (verify_area(VERIFY_READ, act, sizeof(*act)) || + __get_user(new_ka.sa.sa_handler, &act->sa_handler) || + __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) + return -EFAULT; + __get_user(new_ka.sa.sa_flags, &act->sa_flags); + __get_user(mask, &act->sa_mask); + siginitset(&new_ka.sa.sa_mask, mask); + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) || + __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || + __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) + return -EFAULT; + __put_user(old_ka.sa.sa_flags, &oact->sa_flags); + __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); + } + + + + + return ret; +} + +/* + * When we have signals to deliver, we set up on the + * user stack, going down from the original stack pointer: + * a sigregs struct + * one or more sigcontext structs + * a gap of __SIGNAL_FRAMESIZE bytes + * + * Each of these things must be a multiple of 16 bytes in size. + * + * XXX ultimately we will have to stack up a siginfo and ucontext + * for each rt signal. + */ +struct sigregs { + elf_gregset_t gp_regs; + double fp_regs[ELF_NFPREG]; + unsigned int tramp[2]; + /* 64 bit API allows for 288 bytes below sp before + decrementing it. */ + int abigap[72]; +}; + + + +struct rt_sigframe +{ + unsigned long _unused[2]; + struct siginfo *pinfo; + void *puc; + struct siginfo info; + struct ucontext uc; +}; + + +/* + * When we have rt signals to deliver, we set up on the + * user stack, going down from the original stack pointer: + * a sigregs struct + * one rt_sigframe struct (siginfo + ucontext) + * a gap of __SIGNAL_FRAMESIZE bytes + * + * Each of these things must be a multiple of 16 bytes in size. + * + */ + +extern long sys32_rt_sigreturn(struct pt_regs *regs); + +int sys_rt_sigreturn(struct pt_regs *regs) +{ + struct rt_sigframe *rt_sf; + struct sigcontext_struct sigctx; + struct sigregs *sr; + int ret; + elf_gregset_t saved_regs; /* an array of ELF_NGREG unsigned longs */ + sigset_t set; + stack_t st; + unsigned long prevsp; + + + PPCDBG(PPCDBG_SYS64X, "sys_rt_sigreturn - running - pid=%ld current=%lx comm=%s \n", + current->pid, current, current->comm); + + + /* If runnining a 32 bit process, then execute a 32 bit signal return */ + if (current->thread.flags & PPC_FLAG_32BIT) { + ret = sys32_rt_sigreturn(regs); + PPCDBG(PPCDBG_SYS64NI, "sys_sigreturn - returned from sys32_sigreturn w/ %ld \n", ret); + return ret; + } + + rt_sf = (struct rt_sigframe *)(regs->gpr[1] + __SIGNAL_FRAMESIZE); + if (copy_from_user(&sigctx, &rt_sf->uc.uc_mcontext, sizeof(sigctx)) + || copy_from_user(&set, &rt_sf->uc.uc_sigmask, sizeof(set)) + || copy_from_user(&st, &rt_sf->uc.uc_stack, sizeof(st))) + goto badframe; + sigdelsetmask(&set, ~_BLOCKABLE); + spin_lock_irq(¤t->sigmask_lock); + current->blocked = set; + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + rt_sf++; /* Look at next rt_sigframe */ + if (rt_sf == (struct rt_sigframe *)(sigctx.regs)) { + /* Last stacked signal - restore registers - + * sigctx is initialized to point to the + * preamble frame (where registers are stored) + * see handle_signal() + */ + sr = (struct sigregs *) sigctx.regs; + if (regs->msr & MSR_FP ) + giveup_fpu(current); + if (copy_from_user(saved_regs, &sr->gp_regs, + sizeof(sr->gp_regs))) + goto badframe; + saved_regs[PT_MSR] = (regs->msr & ~MSR_USERCHANGE) + | (saved_regs[PT_MSR] & MSR_USERCHANGE); + saved_regs[PT_SOFTE] = regs->softe; + memcpy(regs, saved_regs, GP_REGS_SIZE); + if (copy_from_user(current->thread.fpr, &sr->fp_regs, + sizeof(sr->fp_regs))) + goto badframe; + /* This function sets back the stack flags into + the current task structure. */ + sys_sigaltstack(&st, NULL); + + ret = regs->result; + } else { + /* More signals to go */ + /* Set up registers for next signal handler */ + regs->gpr[1] = (unsigned long)rt_sf - __SIGNAL_FRAMESIZE; + if (copy_from_user(&sigctx, &rt_sf->uc.uc_mcontext, sizeof(sigctx))) + goto badframe; + sr = (struct sigregs *) sigctx.regs; + regs->gpr[3] = ret = sigctx.signal; + /* Get the siginfo */ + get_user(regs->gpr[4], (unsigned long *)&rt_sf->pinfo); + /* Get the ucontext */ + get_user(regs->gpr[5], (unsigned long *)&rt_sf->puc); + regs->gpr[6] = (unsigned long) rt_sf; + + regs->link = (unsigned long) &sr->tramp; + regs->nip = sigctx.handler; + if (get_user(prevsp, &sr->gp_regs[PT_R1]) + || put_user(prevsp, (unsigned long *) regs->gpr[1])) + goto badframe; + } + return ret; + +badframe: + do_exit(SIGSEGV); +} + +static void +setup_rt_frame(struct pt_regs *regs, struct sigregs *frame, + signed long newsp) +{ + struct rt_sigframe *rt_sf = (struct rt_sigframe *) newsp; + /* Handler is *really* a pointer to the function descriptor for + * the signal routine. The first entry in the function + * descriptor is the entry address of signal and the second + * entry is the TOC value we need to use. + */ + struct funct_descr_entry { + unsigned long entry; + unsigned long toc; + }; + + + struct funct_descr_entry * funct_desc_ptr; + unsigned long temp_ptr; + + + /* Set up preamble frame */ + if (verify_area(VERIFY_WRITE, frame, sizeof(*frame))) + goto badframe; + if (regs->msr & MSR_FP) + giveup_fpu(current); + if (__copy_to_user(&frame->gp_regs, regs, GP_REGS_SIZE) + || __copy_to_user(&frame->fp_regs, current->thread.fpr, + ELF_NFPREG * sizeof(double)) + /* Set up to return from user space. + It calls the sc exception at offset 0x9999 + for sys_rt_sigreturn(). + */ + || __put_user(0x38006666UL, &frame->tramp[0]) /* li r0,0x6666 */ + || __put_user(0x44000002UL, &frame->tramp[1])) /* sc */ + goto badframe; + flush_icache_range((unsigned long) &frame->tramp[0], + (unsigned long) &frame->tramp[2]); + + /* Retrieve rt_sigframe from stack and + set up registers for signal handler + */ + newsp -= __SIGNAL_FRAMESIZE; + + if ( get_user(temp_ptr, &rt_sf->uc.uc_mcontext.handler)) { + goto badframe; + } + + funct_desc_ptr = ( struct funct_descr_entry *) temp_ptr; + + if (put_user(regs->gpr[1], (unsigned long *)newsp) + || get_user(regs->nip, &funct_desc_ptr->entry) + || get_user(regs->gpr[2], &funct_desc_ptr->toc) + || get_user(regs->gpr[3], &rt_sf->uc.uc_mcontext.signal) + || get_user(regs->gpr[4], (unsigned long *)&rt_sf->pinfo) + || get_user(regs->gpr[5], (unsigned long *)&rt_sf->puc)) + goto badframe; + + regs->gpr[1] = newsp; + regs->gpr[6] = (unsigned long) rt_sf; + regs->link = (unsigned long) frame->tramp; + + + return; + +badframe: +#if DEBUG_SIG + printk("badframe in setup_rt_frame, regs=%p frame=%p newsp=%lx\n", + regs, frame, newsp); +#endif + do_exit(SIGSEGV); +} + +/* + * Do a signal return; undo the signal stack. + */ +extern long sys32_sigreturn(struct pt_regs *regs); +asmlinkage long sys_sigreturn(struct pt_regs *regs) +{ + struct sigcontext_struct *sc, sigctx; + struct sigregs *sr; + long ret; + elf_gregset_t saved_regs; /* an array of ELF_NGREG unsigned longs */ + sigset_t set; + unsigned long prevsp; + + PPCDBG(PPCDBG_SYS64NI, "sys_sigreturn - running - pid=%ld current=%lx comm=%s \n", + current->pid, current, current->comm); + + /* If runnining a 32 bit process, then execute a 32 bit signal return */ + if (current->thread.flags & PPC_FLAG_32BIT) { + ret = sys32_sigreturn(regs); + PPCDBG(PPCDBG_SYS64NI, "sys_sigreturn - returned from sys32_sigreturn w/ %ld \n", ret); + return ret; + } + + sc = (struct sigcontext_struct *)(regs->gpr[1] + __SIGNAL_FRAMESIZE); + if (copy_from_user(&sigctx, sc, sizeof(sigctx))) + goto badframe; + + set.sig[0] = sigctx.oldmask; +#if _NSIG_WORDS > 1 + set.sig[1] = sigctx._unused[3]; +#endif + sigdelsetmask(&set, ~_BLOCKABLE); + spin_lock_irq(¤t->sigmask_lock); + current->blocked = set; + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + sc++; /* Look at next sigcontext */ + if (sc == (struct sigcontext_struct *)(sigctx.regs)) { + /* Last stacked signal - restore registers */ + sr = (struct sigregs *) sigctx.regs; + if (regs->msr & MSR_FP ) + giveup_fpu(current); + if (copy_from_user(saved_regs, &sr->gp_regs, + sizeof(sr->gp_regs))) + goto badframe; + saved_regs[PT_MSR] = (regs->msr & ~MSR_USERCHANGE) + | (saved_regs[PT_MSR] & MSR_USERCHANGE); + saved_regs[PT_SOFTE] = regs->softe; + memcpy(regs, saved_regs, GP_REGS_SIZE); + + if (copy_from_user(current->thread.fpr, &sr->fp_regs, + sizeof(sr->fp_regs))) + goto badframe; + + ret = regs->result; + + } else { + /* More signals to go */ + regs->gpr[1] = (unsigned long)sc - __SIGNAL_FRAMESIZE; + if (copy_from_user(&sigctx, sc, sizeof(sigctx))) + goto badframe; + sr = (struct sigregs *) sigctx.regs; + regs->gpr[3] = ret = sigctx.signal; + regs->gpr[4] = (unsigned long) sc; + regs->link = (unsigned long) &sr->tramp; + regs->nip = sigctx.handler; + + if (get_user(prevsp, &sr->gp_regs[PT_R1]) + || put_user(prevsp, (unsigned long *) regs->gpr[1])) + goto badframe; + } + return ret; + +badframe: + do_exit(SIGSEGV); +} + +/* + * Set up a signal frame. + */ +static void +setup_frame(struct pt_regs *regs, struct sigregs *frame, + unsigned long newsp) +{ + + /* Handler is *really* a pointer to the function descriptor for + * the signal routine. The first entry in the function + * descriptor is the entry address of signal and the second + * entry is the TOC value we need to use. + */ + struct funct_descr_entry { + unsigned long entry; + unsigned long toc; + }; + + + struct funct_descr_entry * funct_desc_ptr; + unsigned long temp_ptr; + + struct sigcontext_struct *sc = (struct sigcontext_struct *) newsp; + + PPCDBG(PPCDBG_SIGNAL, "setup_frame - entered - regs=%p, frame=%p, newsp=%lx \n", regs, frame, newsp); + + if (verify_area(VERIFY_WRITE, frame, sizeof(*frame))) + goto badframe; + if (regs->msr & MSR_FP) + giveup_fpu(current); + if (__copy_to_user(&frame->gp_regs, regs, GP_REGS_SIZE) + || __copy_to_user(&frame->fp_regs, current->thread.fpr, + ELF_NFPREG * sizeof(double)) + || __put_user(0x38007777UL, &frame->tramp[0]) /* li r0,0x7777 */ + || __put_user(0x44000002UL, &frame->tramp[1])) /* sc */ + goto badframe; + flush_icache_range((unsigned long) &frame->tramp[0], + (unsigned long) &frame->tramp[2]); + + newsp -= __SIGNAL_FRAMESIZE; + if ( get_user(temp_ptr, &sc->handler)) + goto badframe; + + funct_desc_ptr = ( struct funct_descr_entry *) temp_ptr; + + if (put_user(regs->gpr[1], (unsigned long *)newsp) + || get_user(regs->nip, & funct_desc_ptr ->entry) + || get_user(regs->gpr[2],& funct_desc_ptr->toc) + || get_user(regs->gpr[3], &sc->signal)) + goto badframe; + regs->gpr[1] = newsp; + regs->gpr[4] = (unsigned long) sc; + regs->link = (unsigned long) frame->tramp; + + + PPCDBG(PPCDBG_SIGNAL, "setup_frame - returning - regs->gpr[1]=%lx, regs->gpr[4]=%lx, regs->link=%lx \n", + regs->gpr[1], regs->gpr[4], regs->link); + + return; + + badframe: + PPCDBG(PPCDBG_SIGNAL, "setup_frame - badframe in setup_frame, regs=%p frame=%p newsp=%lx\n", regs, frame, newsp); PPCDBG_ENTER_DEBUGGER(); +#if DEBUG_SIG + printk("badframe in setup_frame, regs=%p frame=%p newsp=%lx\n", + regs, frame, newsp); +#endif + do_exit(SIGSEGV); +} + +/* + * OK, we're invoking a handler + */ +static void +handle_signal(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, struct pt_regs * regs, + unsigned long *newspp, unsigned long frame) +{ + struct sigcontext_struct *sc; + struct rt_sigframe *rt_sf; + + if (regs->trap == 0x0C00 /* System Call! */ + && ((int)regs->result == -ERESTARTNOHAND || + ((int)regs->result == -ERESTARTSYS && + !(ka->sa.sa_flags & SA_RESTART)))) + regs->result = -EINTR; + /* Set up Signal Frame */ + + if (ka->sa.sa_flags & SA_SIGINFO) { + /* Put a Real Time Context onto stack */ + *newspp -= sizeof(*rt_sf); + rt_sf = (struct rt_sigframe *) *newspp; + if (verify_area(VERIFY_WRITE, rt_sf, sizeof(*rt_sf))) + goto badframe; + + + if (__put_user((unsigned long) ka->sa.sa_handler, &rt_sf->uc.uc_mcontext.handler) + || __put_user(&rt_sf->info, &rt_sf->pinfo) + || __put_user(&rt_sf->uc, &rt_sf->puc) + /* Put the siginfo */ + || __copy_to_user(&rt_sf->info, info, sizeof(*info)) + /* Create the ucontext */ + || __put_user(0, &rt_sf->uc.uc_flags) + || __put_user(0, &rt_sf->uc.uc_link) + || __put_user(current->sas_ss_sp, &rt_sf->uc.uc_stack.ss_sp) + || __put_user(sas_ss_flags(regs->gpr[1]), + &rt_sf->uc.uc_stack.ss_flags) + || __put_user(current->sas_ss_size, &rt_sf->uc.uc_stack.ss_size) + || __copy_to_user(&rt_sf->uc.uc_sigmask, oldset, sizeof(*oldset)) + /* mcontext.regs points to preamble register frame */ + || __put_user((struct pt_regs *)frame, &rt_sf->uc.uc_mcontext.regs) + || __put_user(sig, &rt_sf->uc.uc_mcontext.signal)) + goto badframe; + + } else { + /* Put another sigcontext on the stack */ + *newspp -= sizeof(*sc); + sc = (struct sigcontext_struct *) *newspp; + if (verify_area(VERIFY_WRITE, sc, sizeof(*sc))) + goto badframe; + + if (__put_user((unsigned long) ka->sa.sa_handler, &sc->handler) + || __put_user(oldset->sig[0], &sc->oldmask) +#if _NSIG_WORDS > 1 + || __put_user(oldset->sig[1], &sc->_unused[3]) +#endif + || __put_user((struct pt_regs *)frame, &sc->regs) + || __put_user(sig, &sc->signal)) + goto badframe; + } + + if (ka->sa.sa_flags & SA_ONESHOT) + ka->sa.sa_handler = SIG_DFL; + + if (!(ka->sa.sa_flags & SA_NODEFER)) { + spin_lock_irq(¤t->sigmask_lock); + sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + sigaddset(¤t->blocked,sig); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + } + return; + +badframe: +#if DEBUG_SIG + printk("badframe in handle_signal, regs=%p frame=%lx newsp=%lx\n", + regs, frame, *newspp); + printk("sc=%p sig=%d ka=%p info=%p oldset=%p\n", sc, sig, ka, info, oldset); +#endif + do_exit(SIGSEGV); +} + +/* + * Note that 'init' is a special process: it doesn't get signals it doesn't + * want to handle. Thus you cannot kill init even with a SIGKILL even by + * mistake. + */ +extern int do_signal32(sigset_t *oldset, struct pt_regs *regs); +int do_signal(sigset_t *oldset, struct pt_regs *regs) +{ + siginfo_t info; + struct k_sigaction *ka; + unsigned long frame, newsp; + + /* + * If the current thread is 32 bit - invoke the + * 32 bit signal handling code + */ + if (current->thread.flags & PPC_FLAG_32BIT) + return do_signal32(oldset, regs); + + PPCDBG(PPCDBG_SIGNAL, "do_signal - running - pid=%ld current=%lx comm=%s \n", + current->pid, current, current->comm); + + + + if (!oldset) + oldset = ¤t->blocked; + + newsp = frame = 0; + + for (;;) { + unsigned long signr; + + PPCDBG(PPCDBG_SIGNAL, "do_signal - (pre) dequeueing signal - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + spin_lock_irq(¤t->sigmask_lock); + signr = dequeue_signal(¤t->blocked, &info); + spin_unlock_irq(¤t->sigmask_lock); + PPCDBG(PPCDBG_SIGNAL, "do_signal - (aft) dequeueing signal - signal=%lx - pid=%ld current=%lx comm=%s \n", signr, current->pid, current, current->comm); + + if (!signr) + break; + + if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) { + /* Let the debugger run. */ + current->exit_code = signr; + current->state = TASK_STOPPED; + notify_parent(current, SIGCHLD); + schedule(); + + /* We're back. Did the debugger cancel the sig? */ + if (!(signr = current->exit_code)) + continue; + current->exit_code = 0; + + /* The debugger continued. Ignore SIGSTOP. */ + if (signr == SIGSTOP) + continue; + + /* Update the siginfo structure. Is this good? */ + if (signr != info.si_signo) { + info.si_signo = signr; + info.si_errno = 0; + info.si_code = SI_USER; + info.si_pid = current->p_pptr->pid; + info.si_uid = current->p_pptr->uid; + } + + /* If the (new) signal is now blocked, requeue it. */ + if (sigismember(¤t->blocked, signr)) { + send_sig_info(signr, &info, current); + continue; + } + } + + ka = ¤t->sig->action[signr-1]; + + + PPCDBG(PPCDBG_SIGNAL, "do_signal - ka=%p, action handler=%lx \n", ka, ka->sa.sa_handler); + + if (ka->sa.sa_handler == SIG_IGN) { + PPCDBG(PPCDBG_SIGNAL, "do_signal - into SIG_IGN logic \n"); + if (signr != SIGCHLD) + continue; + /* Check for SIGCHLD: it's special. */ + while (sys_wait4(-1, NULL, WNOHANG, NULL) > 0) + /* nothing */; + continue; + } + + if (ka->sa.sa_handler == SIG_DFL) { + int exit_code = signr; + PPCDBG(PPCDBG_SIGNAL, "do_signal - into SIG_DFL logic \n"); + + /* Init gets no signals it doesn't want. */ + if (current->pid == 1) + continue; + + switch (signr) { + case SIGCONT: case SIGCHLD: case SIGWINCH: + continue; + + case SIGTSTP: case SIGTTIN: case SIGTTOU: + if (is_orphaned_pgrp(current->pgrp)) + continue; + /* FALLTHRU */ + + case SIGSTOP: + current->state = TASK_STOPPED; + current->exit_code = signr; + if (!(current->p_pptr->sig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP)) + notify_parent(current, SIGCHLD); + schedule(); + continue; + + case SIGQUIT: case SIGILL: case SIGTRAP: + case SIGABRT: case SIGFPE: case SIGSEGV: + case SIGBUS: case SIGSYS: case SIGXCPU: case SIGXFSZ: + if (do_coredump(signr, regs)) + exit_code |= 0x80; + /* FALLTHRU */ + + default: + sigaddset(¤t->pending.signal, signr); + recalc_sigpending(current); + current->flags |= PF_SIGNALED; + do_exit(exit_code); + /* NOTREACHED */ + } + } + + if ( (ka->sa.sa_flags & SA_ONSTACK) + && (! on_sig_stack(regs->gpr[1]))) + newsp = (current->sas_ss_sp + current->sas_ss_size); + else + newsp = regs->gpr[1]; + newsp = frame = newsp - sizeof(struct sigregs); + + /* Whee! Actually deliver the signal. */ + + PPCDBG(PPCDBG_SIGNAL, "do_signal - GOING TO RUN SIGNAL HANDLER - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + handle_signal(signr, ka, &info, oldset, regs, &newsp, frame); + PPCDBG(PPCDBG_SIGNAL, "do_signal - after running signal handler - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + break; + } + + if (regs->trap == 0x0C00 /* System Call! */ && + ((int)regs->result == -ERESTARTNOHAND || + (int)regs->result == -ERESTARTSYS || + (int)regs->result == -ERESTARTNOINTR)) { + PPCDBG(PPCDBG_SIGNAL, "do_signal - going to back up & retry system call \n"); + regs->gpr[3] = regs->orig_gpr3; + regs->nip -= 4; /* Back up & retry system call */ + regs->result = 0; + } + + if (newsp == frame) + { + PPCDBG(PPCDBG_SIGNAL, "do_signal - returning w/ no signal delivered \n"); + return 0; /* no signals delivered */ + } + + + + if (ka->sa.sa_flags & SA_SIGINFO) + setup_rt_frame(regs, (struct sigregs *) frame, newsp); + else + setup_frame(regs, (struct sigregs *) frame, newsp); + PPCDBG(PPCDBG_SIGNAL, "do_signal - returning a signal was delivered \n"); + return 1; +} + + + + + diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/signal32.c linuxppc64_2_4/arch/ppc64/kernel/signal32.c --- ../kernel.org/linux/arch/ppc64/kernel/signal32.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/signal32.c Fri Nov 30 10:30:45 2001 @@ -0,0 +1,1517 @@ +/* + * signal32.c: Support 32bit signal syscalls. + * + * Copyright (C) 2001 IBM + * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) + * + * These routines maintain argument size conversion between 32bit and 64bit + * environment. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) +/* + * These are the flags in the MSR that the user is allowed to change + * by modifying the saved value of the MSR on the stack. SE and BE + * should not be in this list since gdb may want to change these. I.e, + * you should be able to step out of a signal handler to see what + * instruction executes next after the signal handler completes. + * Alternately, if you stepped into a signal handler, you should be + * able to continue 'til the next breakpoint from within the signal + * handler, even if the handler returns. + */ +#define MSR_USERCHANGE (MSR_FE0 | MSR_FE1) + + + + +/* Use this to get at 32-bit user passed pointers. */ +/* Things to consider: the low-level assembly stub does + srl x, 0, x for first four arguments, so if you have + pointer to something in the first four arguments, just + declare it as a pointer, not u32. On the other side, + arguments from 5th onwards should be declared as u32 + for pointers, and need AA() around each usage. + A() macro should be used for places where you e.g. + have some internal variable u32 and just want to get + rid of a compiler warning. AA() has to be used in + places where you want to convert a function argument + to 32bit pointer or when you e.g. access pt_regs + structure and want to consider 32bit registers only. + - + */ +#define A(__x) ((unsigned long)(__x)) +#define AA(__x) \ +({ unsigned long __ret; \ + __asm__ ("clrldi %0, %0, 32" \ + : "=r" (__ret) \ + : "0" (__x)); \ + __ret; \ +}) + + + +struct timespec32 { + s32 tv_sec; + s32 tv_nsec; +}; + + + + +struct sigregs32 { + /***********************************************************************/ + /* the gp_regs array is 32 bit representation of the pt_regs structure */ + /* that was stored on the kernle stack during the system call that */ + /* was interrupted for the signal. */ + /* */ + /* Note that the entire pt_regs regs structure will fit in the gp_regs */ + /* structure because the ELF_NREG value is 48 for PPC and the pt_regs*/ + /* structure contains 44 registers */ + /* */ + /***********************************************************************/ + elf_gregset_t32 gp_regs; + double fp_regs[ELF_NFPREG]; + unsigned int tramp[2]; + /* Programs using the rs6000/xcoff abi can save up to 19 gp regs + and 18 fp regs below sp before decrementing it. */ + int abigap[56]; +}; + + +struct rt_sigframe_32 { + /* Unused space at start of frame to allow for storing of stack pointers */ + unsigned long _unused; + /* This is a 32 bit pointer in user address space + * it is a pointer to the siginfo stucture in the rt stack frame + */ + u32 pinfo; + /* This is a 32 bit pointer in user address space */ + /* it is a pointer to the user context in the rt stack frame */ + u32 puc; + struct siginfo32 info; + struct ucontext32 uc; +}; + + + + + +extern asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru); + + +/****************************************************************************/ +/* Start of nonRT signal support */ +/* */ +/* sigset_t is 32 bits for non-rt signals */ +/* */ +/* System Calls */ +/* sigaction sys32_sigaction */ +/* sigpending sys32_sigpending */ +/* sigprocmask sys32_sigprocmask */ +/* sigreturn sys32_sigreturn */ +/* */ +/* Note sigsuspend has no special 32 bit routine - uses the 64 bit routine */ +/* */ +/* Other routines */ +/* setup_frame32 */ +/* */ +/****************************************************************************/ + + +asmlinkage long sys32_sigaction(int sig, struct old_sigaction32 *act, struct old_sigaction32 *oact) +{ + struct k_sigaction new_ka, old_ka; + int ret; + + PPCDBG(PPCDBG_SYS32, "sys32_sigaction - entered - pid=%ld current=%lx comm=%s\n", current->pid, current, current->comm); + + if (sig < 0) + { + sig = -sig; + } + + if (act) + { + old_sigset_t32 mask; + + ret = get_user((long)new_ka.sa.sa_handler, &act->sa_handler); + ret |= __get_user((long)new_ka.sa.sa_restorer, &act->sa_restorer); + ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags); + ret |= __get_user(mask, &act->sa_mask); + if (ret) + return ret; + PPCDBG(PPCDBG_SIGNAL, "sys32_sigaction flags =%lx \n", new_ka.sa.sa_flags); + + siginitset(&new_ka.sa.sa_mask, mask); + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) + { + ret = put_user((long)old_ka.sa.sa_handler, &oact->sa_handler); + ret |= __put_user((long)old_ka.sa.sa_restorer, &oact->sa_restorer); + ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags); + ret |= __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); + } + + + PPCDBG(PPCDBG_SYS32, "sys32_sigaction - exited - pid=%ld current=%lx comm=%s\n", current->pid, current, current->comm); + + return ret; +} + + + + +extern asmlinkage long sys_sigpending(old_sigset_t *set); + +asmlinkage long sys32_sigpending(old_sigset_t32 *set) +{ + old_sigset_t s; + int ret; + mm_segment_t old_fs = get_fs(); + + PPCDBG(PPCDBG_SYS32, "sys32_sigpending - entered - pid=%ld current=%lx comm=%s\n", current->pid, current, current->comm); + + set_fs (KERNEL_DS); + ret = sys_sigpending(&s); + set_fs (old_fs); + if (put_user (s, set)) return -EFAULT; + + PPCDBG(PPCDBG_SYS32, "sys32_sigpending - exited - pid=%ld current=%lx comm=%s\n", current->pid, current, current->comm); + + return ret; +} + + + + +extern asmlinkage long sys_sigprocmask(int how, old_sigset_t *set, old_sigset_t *oset); + +/* Note: it is necessary to treat how as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_sigprocmask(u32 how, old_sigset_t32 *set, old_sigset_t32 *oset) +{ + old_sigset_t s; + int ret; + mm_segment_t old_fs = get_fs(); + + PPCDBG(PPCDBG_SYS32, "sys32_sigprocmask - entered - pid=%ld current=%lx comm=%s\n", current->pid, current, current->comm); + + if (set && get_user (s, set)) return -EFAULT; + set_fs (KERNEL_DS); + ret = sys_sigprocmask((int)how, set ? &s : NULL, oset ? &s : NULL); + set_fs (old_fs); + if (ret) return ret; + if (oset && put_user (s, oset)) return -EFAULT; + + PPCDBG(PPCDBG_SYS32, "sys32_sigprocmask - exited - pid=%ld current=%lx comm=%s\n", current->pid, current, current->comm); + + return 0; +} + + + +/* + * When we have signals to deliver, we set up on the + * user stack, going down from the original stack pointer: + * a sigregs struct + * one or more sigcontext structs + * a gap of __SIGNAL_FRAMESIZE32 bytes + * + * Each of these things must be a multiple of 16 bytes in size. + * +*/ + + +/* + * Do a signal return; undo the signal stack. + */ +long sys32_sigreturn(struct pt_regs *regs) +{ + struct sigcontext32_struct *sc, sigctx; + struct sigregs32 *sr; + int ret; + elf_gregset_t32 saved_regs; /* an array of ELF_NGREG unsigned ints (32 bits) */ + sigset_t set; + unsigned int prevsp; + + PPCDBG(PPCDBG_SIGNAL, "sys32_sigreturn - entered - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + + sc = (struct sigcontext32_struct *)(regs->gpr[1] + __SIGNAL_FRAMESIZE32); + if (copy_from_user(&sigctx, sc, sizeof(sigctx))) + goto badframe; + + /* Note that PPC32 puts the upper 32 bits of the sigmask in the */ + /* unused part of the signal stackframe */ + set.sig[0] = sigctx.oldmask + ((long)(sigctx._unused[3])<< 32); + sigdelsetmask(&set, ~_BLOCKABLE); + spin_lock_irq(¤t->sigmask_lock); + current->blocked = set; + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + sc++; /* Look at next sigcontext */ + /* If the next sigcontext is actually the sigregs (frame) */ + /* - then no more sigcontexts on the user stack */ + if (sc == (struct sigcontext32_struct*)(u64)sigctx.regs) + { + /* Last stacked signal - restore registers */ + sr = (struct sigregs32*)(u64)sigctx.regs; + if (regs->msr & MSR_FP ) + giveup_fpu(current); + /* copy the 32 bit register values off the user stack */ + /* into the 32 bit register area */ + if (copy_from_user(saved_regs, &sr->gp_regs,sizeof(sr->gp_regs))) + goto badframe; + /**********************************************************************/ + /* The saved reg structure in the frame is an elf_grepset_t32, it is */ + /* a 32 bit register save of the registers in the pt_regs structure */ + /* that was stored on the kernel stack during the system call */ + /* when the system call was interrupted for the signal. Only 32 bits*/ + /* are saved because the sigcontext contains a pointer to the regs */ + /* and the sig context address is passed as a pointer to the signal */ + /* handler. */ + /* */ + /* The entries in the elf_grepset have the same index as the elements */ + /* in the pt_regs structure. */ + /* */ + /**********************************************************************/ + + saved_regs[PT_MSR] = (regs->msr & ~MSR_USERCHANGE) + | (saved_regs[PT_MSR] & MSR_USERCHANGE); + regs->gpr[0] = (u64)(saved_regs[0]) & 0xFFFFFFFF; + regs->gpr[1] = (u64)(saved_regs[1]) & 0xFFFFFFFF; + /**********************************************************************/ + /* Register 2 is the kernel toc - should be reset on any calls into */ + /* the kernel */ + /**********************************************************************/ + regs->gpr[2] = (u64)(saved_regs[2]) & 0xFFFFFFFF; + + regs->gpr[3] = (u64)(saved_regs[3]) & 0xFFFFFFFF; + regs->gpr[4] = (u64)(saved_regs[4]) & 0xFFFFFFFF; + regs->gpr[5] = (u64)(saved_regs[5]) & 0xFFFFFFFF; + regs->gpr[6] = (u64)(saved_regs[6]) & 0xFFFFFFFF; + regs->gpr[7] = (u64)(saved_regs[7]) & 0xFFFFFFFF; + regs->gpr[8] = (u64)(saved_regs[8]) & 0xFFFFFFFF; + regs->gpr[9] = (u64)(saved_regs[9]) & 0xFFFFFFFF; + regs->gpr[10] = (u64)(saved_regs[10]) & 0xFFFFFFFF; + regs->gpr[11] = (u64)(saved_regs[11]) & 0xFFFFFFFF; + regs->gpr[12] = (u64)(saved_regs[12]) & 0xFFFFFFFF; + regs->gpr[13] = (u64)(saved_regs[13]) & 0xFFFFFFFF; + regs->gpr[14] = (u64)(saved_regs[14]) & 0xFFFFFFFF; + regs->gpr[15] = (u64)(saved_regs[15]) & 0xFFFFFFFF; + regs->gpr[16] = (u64)(saved_regs[16]) & 0xFFFFFFFF; + regs->gpr[17] = (u64)(saved_regs[17]) & 0xFFFFFFFF; + regs->gpr[18] = (u64)(saved_regs[18]) & 0xFFFFFFFF; + regs->gpr[19] = (u64)(saved_regs[19]) & 0xFFFFFFFF; + regs->gpr[20] = (u64)(saved_regs[20]) & 0xFFFFFFFF; + regs->gpr[21] = (u64)(saved_regs[21]) & 0xFFFFFFFF; + regs->gpr[22] = (u64)(saved_regs[22]) & 0xFFFFFFFF; + regs->gpr[23] = (u64)(saved_regs[23]) & 0xFFFFFFFF; + regs->gpr[24] = (u64)(saved_regs[24]) & 0xFFFFFFFF; + regs->gpr[25] = (u64)(saved_regs[25]) & 0xFFFFFFFF; + regs->gpr[26] = (u64)(saved_regs[26]) & 0xFFFFFFFF; + regs->gpr[27] = (u64)(saved_regs[27]) & 0xFFFFFFFF; + regs->gpr[28] = (u64)(saved_regs[28]) & 0xFFFFFFFF; + regs->gpr[29] = (u64)(saved_regs[29]) & 0xFFFFFFFF; + regs->gpr[30] = (u64)(saved_regs[30]) & 0xFFFFFFFF; + regs->gpr[31] = (u64)(saved_regs[31]) & 0xFFFFFFFF; + /****************************************************/ + /* restore the non gpr registers */ + /****************************************************/ + regs->msr = (u64)(saved_regs[PT_MSR]) & 0xFFFFFFFF; + /* Insure that the interrupt mode is 64 bit, during 32 bit execution. + * (This is necessary because we only saved lower 32 bits of msr.) + */ + regs->msr = regs->msr | MSR_ISF; /* When this thread is interrupted it should run in 64 bit mode. */ + + regs->nip = (u64)(saved_regs[PT_NIP]) & 0xFFFFFFFF; + regs->orig_gpr3 = (u64)(saved_regs[PT_ORIG_R3]) & 0xFFFFFFFF; + regs->ctr = (u64)(saved_regs[PT_CTR]) & 0xFFFFFFFF; + regs->link = (u64)(saved_regs[PT_LNK]) & 0xFFFFFFFF; + regs->xer = (u64)(saved_regs[PT_XER]) & 0xFFFFFFFF; + regs->ccr = (u64)(saved_regs[PT_CCR]) & 0xFFFFFFFF; + /* regs->softe is left unchanged (like the MSR.EE bit) */ + /******************************************************/ + /* the DAR and the DSISR are only relevant during a */ + /* data or instruction storage interrupt. The value */ + /* will be set to zero. */ + /******************************************************/ + regs->dar = 0; + regs->dsisr = 0; + regs->result = (u64)(saved_regs[PT_RESULT]) & 0xFFFFFFFF; + + if (copy_from_user(current->thread.fpr, &sr->fp_regs, sizeof(sr->fp_regs))) + goto badframe; + + ret = regs->result; + } else { + /* More signals to go */ + regs->gpr[1] = (unsigned long)sc - __SIGNAL_FRAMESIZE32; + if (copy_from_user(&sigctx, sc, sizeof(sigctx))) + goto badframe; + sr = (struct sigregs32*)(u64)sigctx.regs; + regs->gpr[3] = ret = sigctx.signal; + regs->gpr[4] = (unsigned long) sc; + regs->link = (unsigned long) &sr->tramp; + regs->nip = sigctx.handler; + + if (get_user(prevsp, &sr->gp_regs[PT_R1]) + || put_user(prevsp, (unsigned int*) regs->gpr[1])) + goto badframe; + } + + PPCDBG(PPCDBG_SIGNAL, "sys32_sigreturn - normal exit returning %ld - pid=%ld current=%lx comm=%s \n", ret, current->pid, current, current->comm); + return ret; + +badframe: + PPCDBG(PPCDBG_SYS32NI, "sys32_sigreturn - badframe - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + do_exit(SIGSEGV); +} + +/* + * Set up a signal frame. + */ +static void +setup_frame32(struct pt_regs *regs, struct sigregs32 *frame, + unsigned int newsp) +{ + struct sigcontext32_struct *sc = (struct sigcontext32_struct *)(u64)newsp; + + if (verify_area(VERIFY_WRITE, frame, sizeof(*frame))) + goto badframe; + if (regs->msr & MSR_FP) + giveup_fpu(current); + + /***************************************************************/ + /* */ + /* Copy the register contents for the pt_regs structure on the */ + /* kernel stack to the elf_gregset_t32 structure on the user */ + /* stack. This is a copy of 64 bit register values to 32 bit */ + /* register values. The high order 32 bits of the 64 bit */ + /* registers are not needed since a 32 bit application is */ + /* running and the saved registers are the contents of the */ + /* user registers at the time of a system call. */ + /* */ + /* The values saved on the user stack will be restored into */ + /* the registers during the signal return processing */ + /* */ + /* Note the +1 is needed in order to get the lower 32 bits */ + /* of 64 bit register */ + /***************************************************************/ + if (__copy_to_user(&frame->gp_regs[0], (u32*)(®s->gpr[0])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[1], (u32*)(®s->gpr[1])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[2], (u32*)(®s->gpr[2])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[3], (u32*)(®s->gpr[3])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[4], (u32*)(®s->gpr[4])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[5], (u32*)(®s->gpr[5])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[6], (u32*)(®s->gpr[6])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[7], (u32*)(®s->gpr[7])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[8], (u32*)(®s->gpr[8])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[9], (u32*)(®s->gpr[9])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[10], (u32*)(®s->gpr[10])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[11], (u32*)(®s->gpr[11])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[12], (u32*)(®s->gpr[12])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[13], (u32*)(®s->gpr[13])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[14], (u32*)(®s->gpr[14])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[15], (u32*)(®s->gpr[15])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[16], (u32*)(®s->gpr[16])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[17], (u32*)(®s->gpr[17])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[18], (u32*)(®s->gpr[18])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[19], (u32*)(®s->gpr[19])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[20], (u32*)(®s->gpr[20])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[21], (u32*)(®s->gpr[21])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[22], (u32*)(®s->gpr[22])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[23], (u32*)(®s->gpr[23])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[24], (u32*)(®s->gpr[24])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[25], (u32*)(®s->gpr[25])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[26], (u32*)(®s->gpr[26])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[27], (u32*)(®s->gpr[27])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[28], (u32*)(®s->gpr[28])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[29], (u32*)(®s->gpr[29])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[30], (u32*)(®s->gpr[30])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[31], (u32*)(®s->gpr[31])+1, sizeof(u32))) + goto badframe; + + /*****************************************************************************/ + /* Copy the non gpr registers to the user stack */ + /*****************************************************************************/ + + if (__copy_to_user(&frame->gp_regs[PT_NIP], (u32*)(®s->gpr[PT_NIP])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[PT_MSR], (u32*)(®s->gpr[PT_MSR])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[PT_ORIG_R3], (u32*)(®s->gpr[PT_ORIG_R3])+1, + sizeof(u32)) + || __copy_to_user(&frame->gp_regs[PT_CTR], (u32*)(®s->gpr[PT_CTR])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[PT_LNK], (u32*)(®s->gpr[PT_LNK])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[PT_XER], (u32*)(®s->gpr[PT_XER])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[PT_CCR], (u32*)(®s->gpr[PT_CCR])+1, sizeof(u32)) +# if 0 + || __copy_to_user(&frame->gp_regs[PT_MQ], (u32*)(®s->gpr[PT_MQ])+1, sizeof(u32)) +#endif + || __copy_to_user(&frame->gp_regs[PT_RESULT], (u32*)(®s->gpr[PT_RESULT])+1, + sizeof(u32))) + goto badframe; + + + /*****************************************************************************/ + /* Now copy the floating point registers onto the user stack */ + /* */ + /* Also set up so on the completion of the signal handler, the sys_sigreturn */ + /* will get control to reset the stack */ + /*****************************************************************************/ + if (__copy_to_user(&frame->fp_regs, current->thread.fpr, + ELF_NFPREG * sizeof(double)) + || __put_user(0x38007777U, &frame->tramp[0]) /* li r0,0x7777 */ + || __put_user(0x44000002U, &frame->tramp[1])) /* sc */ + goto badframe; + + flush_icache_range((unsigned long) &frame->tramp[0], + (unsigned long) &frame->tramp[2]); + + newsp -= __SIGNAL_FRAMESIZE32; + if (put_user(regs->gpr[1], (u32*)(u64)newsp) + || get_user(regs->nip, &sc->handler) + || get_user(regs->gpr[3], &sc->signal)) + goto badframe; + + regs->gpr[1] = newsp & 0xFFFFFFFF; + /**************************************************************/ + /* first parameter to the signal handler is the signal number */ + /* - the value is in gpr3 */ + /* second parameter to the signal handler is the sigcontext */ + /* - set the value into gpr4 */ + /**************************************************************/ + regs->gpr[4] = (unsigned long) sc; + regs->link = (unsigned long) frame->tramp; + return; + + badframe: + udbg_printf("setup_frame32 - badframe in setup_frame, regs=%p frame=%p newsp=%lx\n", regs, frame, newsp); PPCDBG_ENTER_DEBUGGER(); +#if DEBUG_SIG + printk("badframe in setup_frame32, regs=%p frame=%p newsp=%lx\n", + regs, frame, newsp); +#endif + do_exit(SIGSEGV); +} + + +/****************************************************************************/ +/* Start of RT signal support */ +/* */ +/* sigset_t is 64 bits for rt signals */ +/* */ +/* System Calls */ +/* sigaction sys32_rt_sigaction */ +/* sigpending sys32_rt_sigpending */ +/* sigprocmask sys32_rt_sigprocmask */ +/* sigreturn sys32_rt_sigreturn */ +/* sigtimedwait sys32_rt_sigtimedwait */ +/* sigqueueinfo sys32_rt_sigqueueinfo */ +/* sigsuspend sys32_rt_sigsuspend */ +/* */ +/* Other routines */ +/* setup_rt_frame32 */ +/* siginfo64to32 */ +/* siginfo32to64 */ +/* */ +/* */ +/****************************************************************************/ + + +// This code executes after the rt signal handler in 32 bit mode has completed and +// returned +long sys32_rt_sigreturn(struct pt_regs * regs) +{ + struct rt_sigframe_32 *rt_stack_frame; + struct sigcontext32_struct sigctx; + struct sigregs32 *signalregs; + + int ret; + elf_gregset_t32 saved_regs; /* an array of 32 bit register values */ + sigset_t signal_set; + stack_t stack; + unsigned int previous_stack; + + + + ret = 0; + /* Adjust the inputted reg1 to point to the first rt signal frame */ + rt_stack_frame = (struct rt_sigframe_32 *)(regs->gpr[1] + __SIGNAL_FRAMESIZE32); + /* Copy the information from the user stack */ + if (copy_from_user(&sigctx, &rt_stack_frame->uc.uc_mcontext,sizeof(sigctx)) + || copy_from_user(&signal_set, &rt_stack_frame->uc.uc_sigmask,sizeof(signal_set)) + || copy_from_user(&stack,&rt_stack_frame->uc.uc_stack,sizeof(stack))) + { + /* unable to copy from user storage */ + goto badframe; + } + + /* Unblock the signal that was processed + * After a signal handler runs - + * if the signal is blockable - the signal will be unblocked + * ( sigkill and sigstop are not blockable) + */ + sigdelsetmask(&signal_set, ~_BLOCKABLE); + /* update the current based on the sigmask found in the rt_stackframe */ + spin_lock_irq(¤t->sigmask_lock); + current->blocked = signal_set; + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + /* Set to point to the next rt_sigframe - this is used to determine whether this + * is the last signal to process + */ + rt_stack_frame ++; + + if (rt_stack_frame == (struct rt_sigframe_32 *)(u64)(sigctx.regs)) + { + signalregs = (struct sigregs32 *) (u64)sigctx.regs; + /* If currently owning the floating point - give them up */ + if (regs->msr & MSR_FP) + { + giveup_fpu(current); + } + if (copy_from_user(saved_regs,&signalregs->gp_regs,sizeof(signalregs->gp_regs))) + { + goto badframe; + } + /**********************************************************************/ + /* The saved reg structure in the frame is an elf_grepset_t32, it is */ + /* a 32 bit register save of the registers in the pt_regs structure */ + /* that was stored on the kernel stack during the system call */ + /* when the system call was interrupted for the signal. Only 32 bits*/ + /* are saved because the sigcontext contains a pointer to the regs */ + /* and the sig context address is passed as a pointer to the signal */ + /* handler. */ + /* */ + /* The entries in the elf_grepset have the same index as the elements */ + /* in the pt_regs structure. */ + /* */ + /**********************************************************************/ + + saved_regs[PT_MSR] = (regs->msr & ~MSR_USERCHANGE) + | (saved_regs[PT_MSR] & MSR_USERCHANGE); + regs->gpr[0] = (u64)(saved_regs[0]) & 0xFFFFFFFF; + regs->gpr[1] = (u64)(saved_regs[1]) & 0xFFFFFFFF; + /**********************************************************************/ + /* Register 2 is the kernel toc - should be reset on any calls into */ + /* the kernel */ + /**********************************************************************/ + regs->gpr[2] = (u64)(saved_regs[2]) & 0xFFFFFFFF; + + regs->gpr[3] = (u64)(saved_regs[3]) & 0xFFFFFFFF; + regs->gpr[4] = (u64)(saved_regs[4]) & 0xFFFFFFFF; + regs->gpr[5] = (u64)(saved_regs[5]) & 0xFFFFFFFF; + regs->gpr[6] = (u64)(saved_regs[6]) & 0xFFFFFFFF; + regs->gpr[7] = (u64)(saved_regs[7]) & 0xFFFFFFFF; + regs->gpr[8] = (u64)(saved_regs[8]) & 0xFFFFFFFF; + regs->gpr[9] = (u64)(saved_regs[9]) & 0xFFFFFFFF; + regs->gpr[10] = (u64)(saved_regs[10]) & 0xFFFFFFFF; + regs->gpr[11] = (u64)(saved_regs[11]) & 0xFFFFFFFF; + regs->gpr[12] = (u64)(saved_regs[12]) & 0xFFFFFFFF; + regs->gpr[13] = (u64)(saved_regs[13]) & 0xFFFFFFFF; + regs->gpr[14] = (u64)(saved_regs[14]) & 0xFFFFFFFF; + regs->gpr[15] = (u64)(saved_regs[15]) & 0xFFFFFFFF; + regs->gpr[16] = (u64)(saved_regs[16]) & 0xFFFFFFFF; + regs->gpr[17] = (u64)(saved_regs[17]) & 0xFFFFFFFF; + regs->gpr[18] = (u64)(saved_regs[18]) & 0xFFFFFFFF; + regs->gpr[19] = (u64)(saved_regs[19]) & 0xFFFFFFFF; + regs->gpr[20] = (u64)(saved_regs[20]) & 0xFFFFFFFF; + regs->gpr[21] = (u64)(saved_regs[21]) & 0xFFFFFFFF; + regs->gpr[22] = (u64)(saved_regs[22]) & 0xFFFFFFFF; + regs->gpr[23] = (u64)(saved_regs[23]) & 0xFFFFFFFF; + regs->gpr[24] = (u64)(saved_regs[24]) & 0xFFFFFFFF; + regs->gpr[25] = (u64)(saved_regs[25]) & 0xFFFFFFFF; + regs->gpr[26] = (u64)(saved_regs[26]) & 0xFFFFFFFF; + regs->gpr[27] = (u64)(saved_regs[27]) & 0xFFFFFFFF; + regs->gpr[28] = (u64)(saved_regs[28]) & 0xFFFFFFFF; + regs->gpr[29] = (u64)(saved_regs[29]) & 0xFFFFFFFF; + regs->gpr[30] = (u64)(saved_regs[30]) & 0xFFFFFFFF; + regs->gpr[31] = (u64)(saved_regs[31]) & 0xFFFFFFFF; + /****************************************************/ + /* restore the non gpr registers */ + /****************************************************/ + regs->msr = (u64)(saved_regs[PT_MSR]) & 0xFFFFFFFF; + + regs->nip = (u64)(saved_regs[PT_NIP]) & 0xFFFFFFFF; + regs->orig_gpr3 = (u64)(saved_regs[PT_ORIG_R3]) & 0xFFFFFFFF; + regs->ctr = (u64)(saved_regs[PT_CTR]) & 0xFFFFFFFF; + regs->link = (u64)(saved_regs[PT_LNK]) & 0xFFFFFFFF; + regs->xer = (u64)(saved_regs[PT_XER]) & 0xFFFFFFFF; + regs->ccr = (u64)(saved_regs[PT_CCR]) & 0xFFFFFFFF; + /* regs->softe is left unchanged (like MSR.EE) */ + /******************************************************/ + /* the DAR and the DSISR are only relevant during a */ + /* data or instruction storage interrupt. The value */ + /* will be set to zero. */ + /******************************************************/ + regs->dar = 0; + regs->dsisr = 0; + regs->result = (u64)(saved_regs[PT_RESULT]) & 0xFFFFFFFF; + ret = regs->result; + + } + else /* more signals to go */ + { + regs->gpr[1] = (u64)rt_stack_frame - __SIGNAL_FRAMESIZE32; + if (copy_from_user(&sigctx, &rt_stack_frame->uc.uc_mcontext,sizeof(sigctx))) + { + goto badframe; + } + signalregs = (struct sigregs32 *) (u64)sigctx.regs; + /* first parm to signal handler is the signal number */ + regs->gpr[3] = ret = sigctx.signal; + /* second parm is a pointer to sig info */ + get_user(regs->gpr[4], &rt_stack_frame->pinfo); + /* third parm is a pointer to the ucontext */ + get_user(regs->gpr[5], &rt_stack_frame->puc); + /* fourth parm is the stack frame */ + regs->gpr[6] = (u64)rt_stack_frame; + /* Set up link register to return to sigreturn when the */ + /* signal handler completes */ + regs->link = (u64)&signalregs->tramp; + /* Set next instruction to the start fo the signal handler */ + regs->nip = sigctx.handler; + /* Set the reg1 to look like a call to the signal handler */ + if (get_user(previous_stack,&signalregs->gp_regs[PT_R1]) + || put_user(previous_stack, (unsigned long *)regs->gpr[1])) + { + goto badframe; + } + + } + + return ret; + + badframe: + do_exit(SIGSEGV); +} + + + +asmlinkage long sys32_rt_sigaction(int sig, const struct sigaction32 *act, struct sigaction32 *oact, size_t sigsetsize) +{ + struct k_sigaction new_ka, old_ka; + int ret; + sigset32_t set32; + + PPCDBG(PPCDBG_SIGNAL, "sys32_rt_sigaction - entered - sig=%x \n", sig); + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(sigset32_t)) + return -EINVAL; + + if (act) { + ret = get_user((long)new_ka.sa.sa_handler, &act->sa_handler); + ret |= __copy_from_user(&set32, &act->sa_mask, + sizeof(sigset32_t)); + switch (_NSIG_WORDS) { + case 4: new_ka.sa.sa_mask.sig[3] = set32.sig[6] + | (((long)set32.sig[7]) << 32); + case 3: new_ka.sa.sa_mask.sig[2] = set32.sig[4] + | (((long)set32.sig[5]) << 32); + case 2: new_ka.sa.sa_mask.sig[1] = set32.sig[2] + | (((long)set32.sig[3]) << 32); + case 1: new_ka.sa.sa_mask.sig[0] = set32.sig[0] + | (((long)set32.sig[1]) << 32); + } + + ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags); + + if (ret) + return -EFAULT; + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + switch (_NSIG_WORDS) { + case 4: + set32.sig[7] = (old_ka.sa.sa_mask.sig[3] >> 32); + set32.sig[6] = old_ka.sa.sa_mask.sig[3]; + case 3: + set32.sig[5] = (old_ka.sa.sa_mask.sig[2] >> 32); + set32.sig[4] = old_ka.sa.sa_mask.sig[2]; + case 2: + set32.sig[3] = (old_ka.sa.sa_mask.sig[1] >> 32); + set32.sig[2] = old_ka.sa.sa_mask.sig[1]; + case 1: + set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32); + set32.sig[0] = old_ka.sa.sa_mask.sig[0]; + } + ret = put_user((long)old_ka.sa.sa_handler, &oact->sa_handler); + ret |= __copy_to_user(&oact->sa_mask, &set32, + sizeof(sigset32_t)); + ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags); + } + + + PPCDBG(PPCDBG_SIGNAL, "sys32_rt_sigaction - exiting - sig=%x \n", sig); + return ret; +} + + +extern asmlinkage long sys_rt_sigprocmask(int how, sigset_t *set, sigset_t *oset, + size_t sigsetsize); + +/* Note: it is necessary to treat how as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_rt_sigprocmask(u32 how, sigset32_t *set, sigset32_t *oset, size_t sigsetsize) +{ + sigset_t s; + sigset32_t s32; + int ret; + mm_segment_t old_fs = get_fs(); + + PPCDBG(PPCDBG_SIGNAL, "sys32_rt_sigprocmask - entered how=%x \n", (int)how); + + if (set) { + if (copy_from_user (&s32, set, sizeof(sigset32_t))) + return -EFAULT; + + switch (_NSIG_WORDS) { + case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32); + case 3: s.sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32); + case 2: s.sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32); + case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32); + } + } + + set_fs (KERNEL_DS); + ret = sys_rt_sigprocmask((int)how, set ? &s : NULL, oset ? &s : NULL, + sigsetsize); + set_fs (old_fs); + if (ret) return ret; + if (oset) { + switch (_NSIG_WORDS) { + case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; + case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2]; + case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; + case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; + } + if (copy_to_user (oset, &s32, sizeof(sigset32_t))) + return -EFAULT; + } + return 0; +} + + +extern asmlinkage long sys_rt_sigpending(sigset_t *set, size_t sigsetsize); + + + +asmlinkage long sys32_rt_sigpending(sigset32_t *set, __kernel_size_t32 sigsetsize) +{ + + sigset_t s; + sigset32_t s32; + int ret; + mm_segment_t old_fs = get_fs(); + + set_fs (KERNEL_DS); + ret = sys_rt_sigpending(&s, sigsetsize); + set_fs (old_fs); + if (!ret) { + switch (_NSIG_WORDS) { + case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; + case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2]; + case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; + case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; + } + if (copy_to_user (set, &s32, sizeof(sigset32_t))) + return -EFAULT; + } + return ret; +} + + + +siginfo_t32 * +siginfo64to32(siginfo_t32 *d, siginfo_t *s) +{ + memset (d, 0, sizeof(siginfo_t32)); + d->si_signo = s->si_signo; + d->si_errno = s->si_errno; + d->si_code = s->si_code; + if (s->si_signo >= SIGRTMIN) { + d->si_pid = s->si_pid; + d->si_uid = s->si_uid; + + d->si_int = s->si_int; + } else switch (s->si_signo) { + /* XXX: What about POSIX1.b timers */ + case SIGCHLD: + d->si_pid = s->si_pid; + d->si_status = s->si_status; + d->si_utime = s->si_utime; + d->si_stime = s->si_stime; + break; + case SIGSEGV: + case SIGBUS: + case SIGFPE: + case SIGILL: + d->si_addr = (long)(s->si_addr); + break; + case SIGPOLL: + d->si_band = s->si_band; + d->si_fd = s->si_fd; + break; + default: + d->si_pid = s->si_pid; + d->si_uid = s->si_uid; + break; + } + return d; +} + +extern asmlinkage long +sys_rt_sigtimedwait(const sigset_t *uthese, siginfo_t *uinfo, + const struct timespec *uts, size_t sigsetsize); + +asmlinkage long +sys32_rt_sigtimedwait(sigset32_t *uthese, siginfo_t32 *uinfo, + struct timespec32 *uts, __kernel_size_t32 sigsetsize) +{ + sigset_t s; + sigset32_t s32; + struct timespec t; + int ret; + mm_segment_t old_fs = get_fs(); + siginfo_t info; + siginfo_t32 info32; + + if (copy_from_user (&s32, uthese, sizeof(sigset32_t))) + return -EFAULT; + switch (_NSIG_WORDS) { + case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32); + case 3: s.sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32); + case 2: s.sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32); + case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32); + } + if (uts) { + ret = get_user (t.tv_sec, &uts->tv_sec); + ret |= __get_user (t.tv_nsec, &uts->tv_nsec); + if (ret) + return -EFAULT; + } + set_fs (KERNEL_DS); + if (uts) + { + ret = sys_rt_sigtimedwait(&s, &info, &t, sigsetsize); + } else { + ret = sys_rt_sigtimedwait(&s, &info, (struct timespec *)uts, sigsetsize); + } + + set_fs (old_fs); + if (ret >= 0 && uinfo) { + if (copy_to_user (uinfo, siginfo64to32(&info32, &info), + sizeof(siginfo_t32))) + return -EFAULT; + } + return ret; +} + + + +siginfo_t * +siginfo32to64(siginfo_t *d, siginfo_t32 *s) +{ + d->si_signo = s->si_signo; + d->si_errno = s->si_errno; + d->si_code = s->si_code; + if (s->si_signo >= SIGRTMIN) { + d->si_pid = s->si_pid; + d->si_uid = s->si_uid; + d->si_int = s->si_int; + + } else switch (s->si_signo) { + /* XXX: What about POSIX1.b timers */ + case SIGCHLD: + d->si_pid = s->si_pid; + d->si_status = s->si_status; + d->si_utime = s->si_utime; + d->si_stime = s->si_stime; + break; + case SIGSEGV: + case SIGBUS: + case SIGFPE: + case SIGILL: + d->si_addr = (void *)A(s->si_addr); + break; + case SIGPOLL: + d->si_band = s->si_band; + d->si_fd = s->si_fd; + break; + default: + d->si_pid = s->si_pid; + d->si_uid = s->si_uid; + break; + } + return d; +} + + +extern asmlinkage long sys_rt_sigqueueinfo(int pid, int sig, siginfo_t *uinfo); + +/* Note: it is necessary to treat pid and sig as unsigned ints, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_rt_sigqueueinfo(u32 pid, u32 sig, siginfo_t32 *uinfo) +{ + siginfo_t info; + siginfo_t32 info32; + int ret; + mm_segment_t old_fs = get_fs(); + + if (copy_from_user (&info32, uinfo, sizeof(siginfo_t32))) + return -EFAULT; + /* XXX: Is this correct? */ + siginfo32to64(&info, &info32); + + set_fs (KERNEL_DS); + ret = sys_rt_sigqueueinfo((int)pid, (int)sig, &info); + set_fs (old_fs); + return ret; +} + + +int do_signal(sigset_t *oldset, struct pt_regs *regs); +int sys32_rt_sigsuspend(sigset32_t* unewset, size_t sigsetsize, int p3, int p4, int p6, int p7, struct pt_regs *regs) +{ + sigset_t saveset, newset; + + sigset32_t s32; + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(sigset_t)) + return -EINVAL; + + if (copy_from_user(&s32, unewset, sizeof(s32))) + return -EFAULT; + + /* Swap the 2 words of the 64-bit sigset_t (they are stored in the "wrong" endian in 32-bit user storage). */ + switch (_NSIG_WORDS) + { + case 4: newset.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32); + case 3: newset.sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32); + case 2: newset.sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32); + case 1: newset.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32); + } + + sigdelsetmask(&newset, ~_BLOCKABLE); + + spin_lock_irq(¤t->sigmask_lock); + saveset = current->blocked; + current->blocked = newset; + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + regs->gpr[3] = -EINTR; + while (1) { + current->state = TASK_INTERRUPTIBLE; + schedule(); + if (do_signal(&saveset, regs)) + return regs->gpr[3]; + } +} + + + + + + + + +/* + * Set up a rt signal frame. + */ +static void +setup_rt_frame32(struct pt_regs *regs, struct sigregs32 *frame, + unsigned int newsp) +{ + unsigned int copyreg4,copyreg5; + struct rt_sigframe_32 * rt_sf = (struct rt_sigframe_32 *) (u64)newsp; + + + if (verify_area(VERIFY_WRITE, frame, sizeof(*frame))) + goto badframe; + if (regs->msr & MSR_FP) + giveup_fpu(current); + /***************************************************************/ + /* */ + /* Copy the register contents for the pt_regs structure on the */ + /* kernel stack to the elf_gregset_t32 structure on the user */ + /* stack. This is a copy of 64 bit register values to 32 bit */ + /* register values. The high order 32 bits of the 64 bit */ + /* registers are not needed since a 32 bit application is */ + /* running and the saved registers are the contents of the */ + /* user registers at the time of a system call. */ + /* */ + /* The values saved on the user stack will be restored into */ + /* the registers during the signal return processing */ + /* */ + /* Note the +1 is needed in order to get the lower 32 bits */ + /* of 64 bit register */ + /***************************************************************/ + if (__copy_to_user(&frame->gp_regs[0], (u32*)(®s->gpr[0])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[1], (u32*)(®s->gpr[1])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[2], (u32*)(®s->gpr[2])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[3], (u32*)(®s->gpr[3])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[4], (u32*)(®s->gpr[4])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[5], (u32*)(®s->gpr[5])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[6], (u32*)(®s->gpr[6])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[7], (u32*)(®s->gpr[7])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[8], (u32*)(®s->gpr[8])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[9], (u32*)(®s->gpr[9])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[10], (u32*)(®s->gpr[10])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[11], (u32*)(®s->gpr[11])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[12], (u32*)(®s->gpr[12])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[13], (u32*)(®s->gpr[13])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[14], (u32*)(®s->gpr[14])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[15], (u32*)(®s->gpr[15])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[16], (u32*)(®s->gpr[16])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[17], (u32*)(®s->gpr[17])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[18], (u32*)(®s->gpr[18])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[19], (u32*)(®s->gpr[19])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[20], (u32*)(®s->gpr[20])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[21], (u32*)(®s->gpr[21])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[22], (u32*)(®s->gpr[22])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[23], (u32*)(®s->gpr[23])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[24], (u32*)(®s->gpr[24])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[25], (u32*)(®s->gpr[25])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[26], (u32*)(®s->gpr[26])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[27], (u32*)(®s->gpr[27])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[28], (u32*)(®s->gpr[28])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[29], (u32*)(®s->gpr[29])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[30], (u32*)(®s->gpr[30])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[31], (u32*)(®s->gpr[31])+1, sizeof(u32))) + goto badframe; + + /*****************************************************************************/ + /* Copy the non gpr registers to the user stack */ + /*****************************************************************************/ + + if (__copy_to_user(&frame->gp_regs[PT_NIP], (u32*)(®s->gpr[PT_NIP])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[PT_MSR], (u32*)(®s->gpr[PT_MSR])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[PT_ORIG_R3], (u32*)(®s->gpr[PT_ORIG_R3])+1, + sizeof(u32)) + || __copy_to_user(&frame->gp_regs[PT_CTR], (u32*)(®s->gpr[PT_CTR])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[PT_LNK], (u32*)(®s->gpr[PT_LNK])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[PT_XER], (u32*)(®s->gpr[PT_XER])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[PT_CCR], (u32*)(®s->gpr[PT_CCR])+1, sizeof(u32)) + || __copy_to_user(&frame->gp_regs[PT_RESULT], (u32*)(®s->gpr[PT_RESULT])+1, + sizeof(u32))) + goto badframe; + + + /*****************************************************************************/ + /* Now copy the floating point registers onto the user stack */ + /* */ + /* Also set up so on the completion of the signal handler, the sys_sigreturn */ + /* will get control to reset the stack */ + /*****************************************************************************/ + + + if (__copy_to_user(&frame->fp_regs, current->thread.fpr, + ELF_NFPREG * sizeof(double)) + || __put_user(0x38006666U, &frame->tramp[0]) /* li r0,0x6666 */ + || __put_user(0x44000002U, &frame->tramp[1])) /* sc */ + goto badframe; + + flush_icache_range((unsigned long) &frame->tramp[0], + (unsigned long) &frame->tramp[2]); + + + /* Retrieve rt_sigframe from stack and + set up registers for signal handler + */ + newsp -= __SIGNAL_FRAMESIZE32; + + + if (put_user((u32)(regs->gpr[1]), (unsigned int *)(u64)newsp) + || get_user(regs->nip, &rt_sf->uc.uc_mcontext.handler) + || get_user(regs->gpr[3], &rt_sf->uc.uc_mcontext.signal) + || get_user(copyreg4, &rt_sf->pinfo) + || get_user(copyreg5, &rt_sf->puc)) + goto badframe; + + regs->gpr[4] = copyreg4; + regs->gpr[5] = copyreg5; + + + regs->gpr[1] = newsp; + regs->gpr[6] = (unsigned long) rt_sf; + + + regs->link = (unsigned long) frame->tramp; + + return; + + + badframe: + udbg_printf("setup_frame32 - badframe in setup_frame, regs=%p frame=%p newsp=%lx\n", regs, frame, newsp); PPCDBG_ENTER_DEBUGGER(); +#if DEBUG_SIG + printk("badframe in setup_frame32, regs=%p frame=%p newsp=%lx\n", + regs, frame, newsp); +#endif + do_exit(SIGSEGV); +} + + +/* + * OK, we're invoking a handler + */ +static void +handle_signal32(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, struct pt_regs * regs, + unsigned int *newspp, unsigned int frame) +{ + struct sigcontext32_struct *sc; + struct rt_sigframe_32 *rt_stack_frame; + siginfo_t32 siginfo32bit; + + if (regs->trap == 0x0C00 /* System Call! */ + && ((int)regs->result == -ERESTARTNOHAND || + ((int)regs->result == -ERESTARTSYS && + !(ka->sa.sa_flags & SA_RESTART)))) + regs->result = -EINTR; + + /* Set up the signal frame */ + /* Determine if an real time frame - siginfo required */ + if (ka->sa.sa_flags & SA_SIGINFO) + { + siginfo64to32(&siginfo32bit,info); + *newspp -= sizeof(*rt_stack_frame); + rt_stack_frame = (struct rt_sigframe_32 *) (u64)(*newspp) ; + + if (verify_area(VERIFY_WRITE, rt_stack_frame, sizeof(*rt_stack_frame))) + { + goto badframe; + } + if (__put_user((u32)(u64)ka->sa.sa_handler, &rt_stack_frame->uc.uc_mcontext.handler) + || __put_user((u32)(u64)&rt_stack_frame->info, &rt_stack_frame->pinfo) + || __put_user((u32)(u64)&rt_stack_frame->uc, &rt_stack_frame->puc) + /* put the siginfo on the user stack */ + || __copy_to_user(&rt_stack_frame->info,&siginfo32bit,sizeof(siginfo32bit)) + /* set the ucontext on the user stack */ + || __put_user(0,&rt_stack_frame->uc.uc_flags) + || __put_user(0,&rt_stack_frame->uc.uc_link) + || __put_user(current->sas_ss_sp, &rt_stack_frame->uc.uc_stack.ss_sp) + || __put_user(sas_ss_flags(regs->gpr[1]), + &rt_stack_frame->uc.uc_stack.ss_flags) + || __put_user(current->sas_ss_size, &rt_stack_frame->uc.uc_stack.ss_size) + || __copy_to_user(&rt_stack_frame->uc.uc_sigmask, oldset,sizeof(*oldset)) + /* point the mcontext.regs to the pramble register frame */ + || __put_user(frame, &rt_stack_frame->uc.uc_mcontext.regs) + || __put_user(sig,&rt_stack_frame->uc.uc_mcontext.signal)) + { + goto badframe; + } + } else { + /* Put another sigcontext on the stack */ + *newspp -= sizeof(*sc); + sc = (struct sigcontext32_struct *)(u64)*newspp; + if (verify_area(VERIFY_WRITE, sc, sizeof(*sc))) + goto badframe; + + /* Note the upper 32 bits of the signal mask are stored in the */ + /* unused part of the signal stack frame */ + if (__put_user((u32)(u64)ka->sa.sa_handler, &sc->handler) + || __put_user(oldset->sig[0], &sc->oldmask) + || __put_user((oldset->sig[0] >> 32), &sc->_unused[3]) + || __put_user((unsigned int)frame, &sc->regs) + || __put_user(sig, &sc->signal)) + goto badframe; + + if (ka->sa.sa_flags & SA_ONESHOT) + ka->sa.sa_handler = SIG_DFL; + + if (!(ka->sa.sa_flags & SA_NODEFER)) { + spin_lock_irq(¤t->sigmask_lock); + sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + sigaddset(¤t->blocked,sig); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + } + } + return; + +badframe: +#if DEBUG_SIG + printk("badframe in handle_signal32, regs=%p frame=%lx newsp=%lx\n", + regs, frame, *newspp); + printk("sc=%p sig=%d ka=%p info=%p oldset=%p\n", sc, sig, ka, info, oldset); +#endif + do_exit(SIGSEGV); +} + + +/****************************************************************************/ +/* Start Alternate signal stack support */ +/* */ +/* */ +/* */ +/* System Calls */ +/* sigaltatck sys32_sigaltstack */ +/* */ +/****************************************************************************/ + + +asmlinkage int sys32_sigaltstack(u32 newstack, u32 oldstack, int p3, int p4, int p6, + int p7, struct pt_regs *regs) +{ + stack_t uss, uoss; + int ret; + mm_segment_t old_fs; + unsigned long sp; + + /* set sp to the user stack on entry to the system call */ + /* the system call router sets R9 to the saved registers */ + sp = regs->gpr[1]; + + /* Put new stack info in local 64 bit stack struct */ + if (newstack && (get_user((long)uss.ss_sp, &((stack_32_t *)(long)newstack)->ss_sp) || + __get_user(uss.ss_flags, &((stack_32_t *)(long)newstack)->ss_flags) || + __get_user(uss.ss_size, &((stack_32_t *)(long)newstack)->ss_size))) + return -EFAULT; + + + old_fs = get_fs(); + set_fs(KERNEL_DS); + ret = do_sigaltstack(newstack ? &uss : NULL, oldstack ? &uoss : NULL, sp); + set_fs(old_fs); + /* Copy the stack information to the user output buffer */ + if (!ret && oldstack && (put_user((long)uoss.ss_sp, &((stack_32_t *)(long)oldstack)->ss_sp) || + __put_user(uoss.ss_flags, &((stack_32_t *)(long)oldstack)->ss_flags) || + __put_user(uoss.ss_size, &((stack_32_t *)(long)oldstack)->ss_size))) + return -EFAULT; + return ret; +} + + + +/****************************************************************************/ +/* Start of do_signal32 routine */ +/* */ +/* This routine gets control when a pemding signal needs to be processed */ +/* in the 32 bit target thread - */ +/* */ +/* It handles both rt and non-rt signals */ +/* */ +/****************************************************************************/ + +/* + * Note that 'init' is a special process: it doesn't get signals it doesn't + * want to handle. Thus you cannot kill init even with a SIGKILL even by + * mistake. + */ + +int do_signal32(sigset_t *oldset, struct pt_regs *regs) +{ + siginfo_t info; + struct k_sigaction *ka; + unsigned int frame, newsp; + + if (!oldset) + oldset = ¤t->blocked; + + newsp = frame = 0; + + for (;;) { + unsigned long signr; + + spin_lock_irq(¤t->sigmask_lock); + signr = dequeue_signal(¤t->blocked, &info); + spin_unlock_irq(¤t->sigmask_lock); + ifppcdebug(PPCDBG_SYS32) { + if (signr) + udbg_printf("do_signal32 - processing signal=%2lx - pid=%ld, comm=%s \n", signr, current->pid, current->comm); + } + + if (!signr) + break; + + if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) { + /* Let the debugger run. */ + current->exit_code = signr; + current->state = TASK_STOPPED; + notify_parent(current, SIGCHLD); + schedule(); + + /* We're back. Did the debugger cancel the sig? */ + if (!(signr = current->exit_code)) + continue; + current->exit_code = 0; + + /* The debugger continued. Ignore SIGSTOP. */ + if (signr == SIGSTOP) + continue; + + /* Update the siginfo structure. Is this good? */ + if (signr != info.si_signo) { + info.si_signo = signr; + info.si_errno = 0; + info.si_code = SI_USER; + info.si_pid = current->p_pptr->pid; + info.si_uid = current->p_pptr->uid; + } + + /* If the (new) signal is now blocked, requeue it. */ + if (sigismember(¤t->blocked, signr)) { + send_sig_info(signr, &info, current); + continue; + } + } + + ka = ¤t->sig->action[signr-1]; + + if (ka->sa.sa_handler == SIG_IGN) { + if (signr != SIGCHLD) + continue; + /* Check for SIGCHLD: it's special. */ + while (sys_wait4(-1, NULL, WNOHANG, NULL) > 0) + /* nothing */; + continue; + } + + if (ka->sa.sa_handler == SIG_DFL) { + int exit_code = signr; + + /* Init gets no signals it doesn't want. */ + if (current->pid == 1) + continue; + + switch (signr) { + case SIGCONT: case SIGCHLD: case SIGWINCH: + continue; + + case SIGTSTP: case SIGTTIN: case SIGTTOU: + if (is_orphaned_pgrp(current->pgrp)) + continue; + /* FALLTHRU */ + + case SIGSTOP: + current->state = TASK_STOPPED; + current->exit_code = signr; + if (!(current->p_pptr->sig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP)) + notify_parent(current, SIGCHLD); + schedule(); + continue; + + case SIGQUIT: case SIGILL: case SIGTRAP: + case SIGABRT: case SIGFPE: case SIGSEGV: + case SIGBUS: case SIGSYS: case SIGXCPU: case SIGXFSZ: + if (do_coredump(signr, regs)) + exit_code |= 0x80; + /* FALLTHRU */ + + default: + sigaddset(¤t->pending.signal, signr); + recalc_sigpending(current); + current->flags |= PF_SIGNALED; + do_exit(exit_code); + /* NOTREACHED */ + } + } + + PPCDBG(PPCDBG_SIGNAL, " do signal :sigaction flags = %lx \n" ,ka->sa.sa_flags); + PPCDBG(PPCDBG_SIGNAL, " do signal :on sig stack = %lx \n" ,on_sig_stack(regs->gpr[1])); + PPCDBG(PPCDBG_SIGNAL, " do signal :reg1 = %lx \n" ,regs->gpr[1]); + PPCDBG(PPCDBG_SIGNAL, " do signal :alt stack = %lx \n" ,current->sas_ss_sp); + PPCDBG(PPCDBG_SIGNAL, " do signal :alt stack size = %lx \n" ,current->sas_ss_size); + + + + if ( (ka->sa.sa_flags & SA_ONSTACK) + && (! on_sig_stack(regs->gpr[1]))) + { + newsp = (current->sas_ss_sp + current->sas_ss_size); + } else + newsp = regs->gpr[1]; + newsp = frame = newsp - sizeof(struct sigregs32); + + /* Whee! Actually deliver the signal. */ + handle_signal32(signr, ka, &info, oldset, regs, &newsp, frame); + break; + } + + if (regs->trap == 0x0C00 /* System Call! */ && + ((int)regs->result == -ERESTARTNOHAND || + (int)regs->result == -ERESTARTSYS || + (int)regs->result == -ERESTARTNOINTR)) { + regs->gpr[3] = regs->orig_gpr3; + regs->nip -= 4; /* Back up & retry system call */ + regs->result = 0; + } + + if (newsp == frame) + { + return 0; /* no signals delivered */ + } + // Invoke correct stack setup routine + if (ka->sa.sa_flags & SA_SIGINFO) + setup_rt_frame32(regs, (struct sigregs32*)(u64)frame, newsp); + else + setup_frame32(regs, (struct sigregs32*)(u64)frame, newsp); + + return 1; + +} + + + diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/smp.c linuxppc64_2_4/arch/ppc64/kernel/smp.c --- ../kernel.org/linux/arch/ppc64/kernel/smp.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/smp.c Mon Dec 3 17:14:05 2001 @@ -0,0 +1,763 @@ +/* + * + * + * SMP support for ppc. + * + * Written by Cort Dougan (cort@cs.nmt.edu) borrowing a great + * deal of code from the sparc and intel versions. + * + * Copyright (C) 1999 Cort Dougan + * + * PowerPC-64 Support added by Dave Engebretsen, Peter Bergner, and + * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#define __KERNEL_SYSCALLS__ +#include +#include +/* #include */ +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "open_pic.h" +#include + +int smp_threads_ready = 0; +volatile int smp_commenced = 0; +int smp_num_cpus = 1; +int smp_tb_synchronized = 0; +extern atomic_t ipi_recv; +extern atomic_t ipi_sent; +spinlock_t kernel_flag __cacheline_aligned = SPIN_LOCK_UNLOCKED; +cycles_t cacheflush_time; +static int max_cpus __initdata = NR_CPUS; + +unsigned long cpu_online_map; + +volatile unsigned long cpu_callin_map[NR_CPUS] = {0,}; + +#define TB_SYNC_PASSES 4 +volatile unsigned long __initdata tb_sync_flag = 0; +volatile unsigned long __initdata tb_offset = 0; + +extern unsigned char stab_array[]; + +int start_secondary(void *); +extern int cpu_idle(void *unused); +void smp_call_function_interrupt(void); +void smp_message_pass(int target, int msg, unsigned long data, int wait); +static unsigned long iSeries_smp_message[NR_CPUS]; +extern struct Naca *naca; +extern struct Paca xPaca[]; + +void xics_setup_cpu(void); +void xics_cause_IPI(int cpu); + +/* + * XICS only has a single IPI, so encode the messages per CPU + */ +volatile unsigned long xics_ipi_message[NR_CPUS] = {0}; + +#define smp_message_pass(t,m,d,w) \ + do { atomic_inc(&ipi_sent); \ + ppc_md.smp_message_pass((t),(m),(d),(w)); \ + } while(0) + +#ifdef CONFIG_KDB +void smp_kdb_stop(void) +{ +} +#endif + +static inline void set_tb(unsigned int upper, unsigned int lower) +{ + mtspr(SPRN_TBWL, 0); + mtspr(SPRN_TBWU, upper); + mtspr(SPRN_TBWL, lower); +} + +void iSeries_smp_message_recv( struct pt_regs * regs ) +{ + int cpu = smp_processor_id(); + int msg; + + if ( smp_num_cpus < 2 ) + return; + + for ( msg = 0; msg < 4; ++msg ) + if ( test_and_clear_bit( msg, &iSeries_smp_message[cpu] ) ) + smp_message_recv( msg, regs ); + +} + +static void smp_iSeries_message_pass(int target, int msg, unsigned long data, int wait) +{ + int i; + for (i = 0; i < smp_num_cpus; ++i) { + if ( (target == MSG_ALL) || + (target == i) || + ((target == MSG_ALL_BUT_SELF) && (i != smp_processor_id())) ) { + set_bit( msg, &iSeries_smp_message[i] ); + HvCall_sendIPI(&(xPaca[i])); + } + } +} + +static int smp_iSeries_numProcs(void) +{ + unsigned np, i; + struct ItLpPaca * lpPaca; + + np = 0; + for (i=0; i < maxPacas; ++i) { + lpPaca = xPaca[i].xLpPacaPtr; + if ( lpPaca->xDynProcStatus < 2 ) { + ++np; + } + } + return np; +} + +static int smp_iSeries_probe(void) +{ + unsigned i; + unsigned np; + struct ItLpPaca * lpPaca; + + np = 0; + for (i=0; i < maxPacas; ++i) { + lpPaca = xPaca[i].xLpPacaPtr; + if ( lpPaca->xDynProcStatus < 2 ) { + ++np; + xPaca[i].next_jiffy_update_tb = xPaca[0].next_jiffy_update_tb; + } + } + + smp_tb_synchronized = 1; + return np; +} + +static void smp_iSeries_kick_cpu(int nr) +{ + struct ItLpPaca * lpPaca; + /* Verify we have a Paca for processor nr */ + if ( ( nr <= 0 ) || + ( nr >= maxPacas ) ) + return; + /* Verify that our partition has a processor nr */ + lpPaca = xPaca[nr].xLpPacaPtr; + if ( lpPaca->xDynProcStatus >= 2 ) + return; + /* The processor is currently spinning, waiting + * for the xProcStart field to become non-zero + * After we set xProcStart, the processor will + * continue on to secondary_start in iSeries_head.S + */ + xPaca[nr].xProcStart = 1; +} + +static void smp_iSeries_setup_cpu(int nr) +{ +} + +/* This is called very early. */ +void smp_init_iSeries(void) +{ + ppc_md.smp_message_pass = smp_iSeries_message_pass; + ppc_md.smp_probe = smp_iSeries_probe; + ppc_md.smp_kick_cpu = smp_iSeries_kick_cpu; + ppc_md.smp_setup_cpu = smp_iSeries_setup_cpu; + + naca->processorCount = smp_iSeries_numProcs(); +} + + +static void +smp_openpic_message_pass(int target, int msg, unsigned long data, int wait) +{ + /* make sure we're sending something that translates to an IPI */ + if ( msg > 0x3 ){ + printk("SMP %d: smp_message_pass: unknown msg %d\n", + smp_processor_id(), msg); + return; + } + switch ( target ) + { + case MSG_ALL: + openpic_cause_IPI(msg, 0xffffffff); + break; + case MSG_ALL_BUT_SELF: + openpic_cause_IPI(msg, + 0xffffffff & ~(1 << smp_processor_id())); + break; + default: + openpic_cause_IPI(msg, 1<processorCount > 1) + openpic_request_IPIs(); + + return naca->processorCount; +} + +static void +smp_kick_cpu(int nr) +{ + /* Verify we have a Paca for processor nr */ + if ( ( nr <= 0 ) || + ( nr >= maxPacas ) ) + return; + + /* The processor is currently spinning, waiting + * for the xProcStart field to become non-zero + * After we set xProcStart, the processor will + * continue on to secondary_start in iSeries_head.S + */ + xPaca[nr].xProcStart = 1; +} + +extern struct gettimeofday_struct do_gtod; + +static void +smp_chrp_setup_cpu(int cpu_nr) +{ + static atomic_t ready = ATOMIC_INIT(1); + static volatile int frozen = 0; + + if (_machine == _MACH_pSeriesLP) { + /* timebases already synced under the hypervisor. */ + xPaca[cpu_nr].next_jiffy_update_tb = tb_last_stamp = get_tb(); + if (cpu_nr == 0) { + do_gtod.tb_orig_stamp = tb_last_stamp; + /* Should update do_gtod.stamp_xsec. + * For now we leave it which means the time can be some + * number of msecs off until someone does a settimeofday() + */ + } + smp_tb_synchronized = 1; + } else { + if (cpu_nr == 0) { + /* wait for all the others */ + while (atomic_read(&ready) < smp_num_cpus) + barrier(); + atomic_set(&ready, 1); + /* freeze the timebase */ + rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL); + mb(); + frozen = 1; + set_tb(0, 0); + xPaca[0].next_jiffy_update_tb = 0; + while (atomic_read(&ready) < smp_num_cpus) + barrier(); + /* thaw the timebase again */ + rtas_call(rtas_token("thaw-time-base"), 0, 1, NULL); + mb(); + frozen = 0; + tb_last_stamp = get_tb(); + do_gtod.tb_orig_stamp = tb_last_stamp; + smp_tb_synchronized = 1; + } else { + atomic_inc(&ready); + while (!frozen) + barrier(); + set_tb(0, 0); + xPaca[cpu_nr].next_jiffy_update_tb = 0; + mb(); + atomic_inc(&ready); + while (frozen) + barrier(); + } + } + + if (OpenPIC_Addr) { + do_openpic_setup_cpu(); + } else { + if (cpu_nr > 0) + xics_setup_cpu(); + } +} + +static void +smp_xics_message_pass(int target, int msg, unsigned long data, int wait) +{ + int i; + + for (i = 0; i < smp_num_cpus; ++i) { + if (target == MSG_ALL || target == i + || (target == MSG_ALL_BUT_SELF + && i != smp_processor_id())) { + set_bit(msg, &xics_ipi_message[i]); + mb(); + xics_cause_IPI(i); + } + } +} + +static int +smp_xics_probe(void) +{ + return naca->processorCount; +} + +/* This is called very early */ +void smp_init_pSeries(void) +{ + if(naca->interrupt_controller == IC_OPEN_PIC) { + ppc_md.smp_message_pass = smp_openpic_message_pass; + ppc_md.smp_probe = smp_chrp_probe; + ppc_md.smp_kick_cpu = smp_kick_cpu; + ppc_md.smp_setup_cpu = smp_chrp_setup_cpu; + } else { + ppc_md.smp_message_pass = smp_xics_message_pass; + ppc_md.smp_probe = smp_xics_probe; + ppc_md.smp_kick_cpu = smp_kick_cpu; + ppc_md.smp_setup_cpu = smp_chrp_setup_cpu; + } +} + + +void smp_local_timer_interrupt(struct pt_regs * regs) +{ + if (!--(get_paca()->prof_counter)) { + update_process_times(user_mode(regs)); + (get_paca()->prof_counter)=get_paca()->prof_multiplier; + } +} + +void smp_message_recv(int msg, struct pt_regs *regs) +{ + atomic_inc(&ipi_recv); + + switch( msg ) { + case PPC_MSG_CALL_FUNCTION: + smp_call_function_interrupt(); + break; + case PPC_MSG_RESCHEDULE: + current->need_resched = 1; + break; +#ifdef CONFIG_XMON + case PPC_MSG_XMON_BREAK: + xmon(regs); + break; +#endif /* CONFIG_XMON */ +#ifdef CONFIG_KDB + case PPC_MSG_XMON_BREAK: + /* This isn't finished yet, obviously -TAI */ + kdb(KDB_REASON_KEYBOARD,0, (kdb_eframe_t) regs); + break; +#endif + default: + printk("SMP %d: smp_message_recv(): unknown msg %d\n", + smp_processor_id(), msg); + break; + } +} + +void smp_send_reschedule(int cpu) +{ + /* + * This is only used if `cpu' is running an idle task, + * so it will reschedule itself anyway... + * + * This isn't the case anymore since the other CPU could be + * sleeping and won't reschedule until the next interrupt (such + * as the timer). + * -- Cort + */ + /* This is only used if `cpu' is running an idle task, + so it will reschedule itself anyway... */ + smp_message_pass(cpu, PPC_MSG_RESCHEDULE, 0, 0); +} + +#ifdef CONFIG_XMON +void smp_send_xmon_break(int cpu) +{ + smp_message_pass(cpu, PPC_MSG_XMON_BREAK, 0, 0); +} +#endif /* CONFIG_XMON */ + +static void stop_this_cpu(void *dummy) +{ + __cli(); + while (1) + ; +} + +void smp_send_stop(void) +{ + smp_call_function(stop_this_cpu, NULL, 1, 0); + smp_num_cpus = 1; +} + +/* + * Structure and data for smp_call_function(). This is designed to minimise + * static memory requirements. It also looks cleaner. + * Stolen from the i386 version. + */ +static spinlock_t call_lock ____cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; + +static struct call_data_struct { + void (*func) (void *info); + void *info; + atomic_t started; + atomic_t finished; + int wait; +} *call_data; + +/* + * This function sends a 'generic call function' IPI to all other CPUs + * in the system. + * + * [SUMMARY] Run a function on all other CPUs. + * The function to run. This must be fast and non-blocking. + * An arbitrary pointer to pass to the function. + * currently unused. + * If true, wait (atomically) until function has completed on other CPUs. + * [RETURNS] 0 on success, else a negative status code. Does not return until + * remote CPUs are nearly ready to execute <> or are or have executed. + * + * You must not call this function with disabled interrupts or from a + * hardware interrupt handler, you may call it from a bottom half handler. + */ +int smp_call_function (void (*func) (void *info), void *info, int nonatomic, + int wait) + +{ + struct call_data_struct data; + int ret = -1, cpus = smp_num_cpus-1; + int timeout; + + if (!cpus) + return 0; + + data.func = func; + data.info = info; + atomic_set(&data.started, 0); + data.wait = wait; + if (wait) + atomic_set(&data.finished, 0); + + spin_lock_bh(&call_lock); + call_data = &data; + /* Send a message to all other CPUs and wait for them to respond */ + smp_message_pass(MSG_ALL_BUT_SELF, PPC_MSG_CALL_FUNCTION, 0, 0); + + /* Wait for response */ + timeout = 8000000; + while (atomic_read(&data.started) != cpus) { + HMT_low(); + if (--timeout == 0) { + printk("smp_call_function on cpu %d: other cpus not responding (%d)\n", + smp_processor_id(), atomic_read(&data.started)); +#ifdef CONFIG_XMON + xmon(0); +#endif +#ifdef CONFIG_PPC_ISERIES + HvCall_terminateMachineSrc(); +#endif + goto out; + } + barrier(); + udelay(1); + } + + if (wait) { + timeout = 1000000; + while (atomic_read(&data.finished) != cpus) { + HMT_low(); + if (--timeout == 0) { + printk("smp_call_function on cpu %d: other cpus not finishing (%d/%d)\n", + smp_processor_id(), atomic_read(&data.finished), atomic_read(&data.started)); +#ifdef CONFIG_PPC_ISERIES + HvCall_terminateMachineSrc(); +#endif + goto out; + } + barrier(); + udelay(1); + } + } + ret = 0; + + out: + HMT_medium(); + spin_unlock_bh(&call_lock); + return ret; +} + +void smp_call_function_interrupt(void) +{ + void (*func) (void *info) = call_data->func; + void *info = call_data->info; + int wait = call_data->wait; + + /* + * Notify initiating CPU that I've grabbed the data and am + * about to execute the function + */ + atomic_inc(&call_data->started); + /* + * At this point the info structure may be out of scope unless wait==1 + */ + (*func)(info); + if (wait) + atomic_inc(&call_data->finished); +} + +static void smp_space_timers( unsigned nr ) +{ + unsigned long offset, i; + + offset = tb_ticks_per_jiffy / nr; + for ( i=1; iprocessor = 0; + + init_idle(); + + for (i = 0; i < NR_CPUS; i++) { + paca = &xPaca[i]; + paca->prof_counter=1; + paca->prof_multiplier = 1; + if(i != 0) { + /* + * Processor 0's segment table is statically + * initialized to real address 0x5000. The + * Other processor's tables are created and + * initialized here. + */ + paca->xStab_data.virt = (unsigned long)&stab_array[PAGE_SIZE * (i-1)]; + memset((void *)paca->xStab_data.virt, 0, PAGE_SIZE); + paca->xStab_data.real = __v2a(paca->xStab_data.virt); + paca->default_decr = tb_ticks_per_jiffy / decr_overclock; + } + } + + /* + * XXX very rough, assumes 20 bus cycles to read a cache line, + * timebase increments every 4 bus cycles, 32kB L1 data cache. + */ + cacheflush_time = 5 * 1024; + + /* Probe arch for CPUs */ + cpu_nr = ppc_md.smp_probe(); + + printk("Probe found %d CPUs\n", cpu_nr); + + /* + * only check for cpus we know exist. We keep the callin map + * with cpus at the bottom -- Cort + */ + if (cpu_nr > max_cpus) + cpu_nr = max_cpus; + + smp_space_timers( cpu_nr ); + + printk("Waiting for %d CPUs\n", cpu_nr-1); + + for ( i = 1 ; i < cpu_nr; i++ ) { + int c; + struct pt_regs regs; + + /* create a process for the processor */ + /* we don't care about the values in regs since we'll + never reschedule the forked task. */ + /* We DO care about one bit in the pt_regs we + pass to do_fork. That is the MSR_FP bit in + regs.msr. If that bit is on, then do_fork + (via copy_thread) will call giveup_fpu. + giveup_fpu will get a pointer to our (current's) + last register savearea via current->thread.regs + and using that pointer will turn off the MSR_FP, + MSR_FE0 and MSR_FE1 bits. At this point, this + pointer is pointing to some arbitrary point within + our stack */ + + memset(®s, 0, sizeof(struct pt_regs)); + + if (do_fork(CLONE_VM|CLONE_PID, 0, ®s, 0) < 0) + panic("failed fork for CPU %d", i); + p = init_task.prev_task; + if (!p) + panic("No idle task for CPU %d", i); + + PPCDBG(PPCDBG_SMP,"\tProcessor %d, task = 0x%lx\n", i, p); + + del_from_runqueue(p); + unhash_process(p); + init_tasks[i] = p; + + p->processor = i; + p->has_cpu = 1; + current_set[i].task = p; + sp = ((unsigned long)p) + sizeof(union task_union) + - STACK_FRAME_OVERHEAD; + current_set[i].sp_real = (void *)__v2a(sp); + + /* wake up cpus */ + ppc_md.smp_kick_cpu(i); + + /* + * wait to see if the cpu made a callin (is actually up). + * use this value that I found through experimentation. + * -- Cort + */ + for ( c = 5000; c && !cpu_callin_map[i] ; c-- ) { + udelay(100); + } + + if ( cpu_callin_map[i] ) + { + printk("Processor %d found.\n", i); + PPCDBG(PPCDBG_SMP, "\tProcessor %d found.\n", i); + /* this sync's the decr's -- Cort */ + smp_num_cpus++; + } else { + printk("Processor %d is stuck.\n", i); + PPCDBG(PPCDBG_SMP, "\tProcessor %d is stuck.\n", i); + } + } + + /* Setup CPU 0 last (important) */ + ppc_md.smp_setup_cpu(0); + + if (smp_num_cpus < 2) { + tb_last_stamp = get_tb(); + smp_tb_synchronized = 1; + } +} + +void __init smp_commence(void) +{ + /* + * Lets the callin's below out of their loop. + */ + PPCDBG(PPCDBG_SMP, "smp_commence: start\n"); + wmb(); + smp_commenced = 1; +} + +void __init smp_callin(void) +{ + int cpu = current->processor; + + smp_store_cpu_info(cpu); + set_dec(xPaca[cpu].default_decr); + cpu_callin_map[cpu] = 1; + + ppc_md.smp_setup_cpu(cpu); + + init_idle(); + + set_bit(smp_processor_id(), &cpu_online_map); + + while(!smp_commenced) { + barrier(); + } + __sti(); +} + +/* intel needs this */ +void __init initialize_secondary(void) +{ +} + +/* Activate a secondary processor. */ +int start_secondary(void *unused) +{ + int cpu; + + cpu = current->processor; + atomic_inc(&init_mm.mm_count); + current->active_mm = &init_mm; + smp_callin(); + + /* Go into the idle loop. */ + return cpu_idle(NULL); +} + +void __init smp_setup(char *str, int *ints) +{ +} + +int __init setup_profiling_timer(unsigned int multiplier) +{ + return 0; +} + +/* this function is called for each processor + */ +void __init smp_store_cpu_info(int id) +{ + xPaca[id].pvr = _get_PVR(); +} + +static int __init maxcpus(char *str) +{ + get_option(&str, &max_cpus); + return 1; +} + +__setup("maxcpus=", maxcpus); diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/stab.c linuxppc64_2_4/arch/ppc64/kernel/stab.c --- ../kernel.org/linux/arch/ppc64/kernel/stab.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/stab.c Thu Nov 8 23:13:41 2001 @@ -0,0 +1,357 @@ +/* + * PowerPC64 Segment Translation Support. + * + * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com + * Copyright (c) 2001 Dave Engebretsen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include + +inline int make_ste(unsigned long stab, + unsigned long esid, unsigned long vsid); +inline void make_slbe(unsigned long esid, unsigned long vsid, + int large); +extern struct Naca *naca; + +/* + * Build an entry for the base kernel segment and put it into + * the segment table or SLB. All other segment table or SLB + * entries are faulted in. + */ +void stab_initialize(unsigned long stab) +{ + unsigned long esid, vsid; + + esid = GET_ESID(KERNELBASE); + vsid = get_kernel_vsid(esid << SID_SHIFT); + + if (!__is_processor(PV_POWER4)) { + __asm__ __volatile__("isync; slbia; isync":::"memory"); + make_ste(stab, esid, vsid); + } else { + /* Invalidate the entire SLB & all the ERATS */ + __asm__ __volatile__("isync" : : : "memory"); +#ifndef CONFIG_PPC_ISERIES + __asm__ __volatile__("slbmte %0,%0" + : : "r" (0) : "memory"); +#endif + __asm__ __volatile__("isync; slbia; isync":::"memory"); + make_slbe(esid, vsid, 0); + } +} + +/* + * Create a segment table entry for the given esid/vsid pair. + */ +inline int +make_ste(unsigned long stab, unsigned long esid, unsigned long vsid) +{ + unsigned long entry, group, old_esid, castout_entry, i; + unsigned int global_entry; + STE *ste, *castout_ste; + + /* Search the primary group first. */ + global_entry = (esid & 0x1f) << 3; + ste = (STE *)(stab | ((esid & 0x1f) << 7)); + + /* + * Find an empty entry, if one exists. + */ + for(group = 0; group < 2; group++) { + for(entry = 0; entry < 8; entry++, ste++) { + if(!(ste->dw0.dw0.v)) { + ste->dw1.dw1.vsid = vsid; + /* Order VSID updte */ + __asm__ __volatile__ ("eieio" : : : "memory"); + ste->dw0.dw0.esid = esid; + ste->dw0.dw0.v = 1; + ste->dw0.dw0.kp = 1; + /* Order update */ + __asm__ __volatile__ ("sync" : : : "memory"); + + return(global_entry | entry); + } + } + /* Now search the secondary group. */ + global_entry = ((~esid) & 0x1f) << 3; + ste = (STE *)(stab | (((~esid) & 0x1f) << 7)); + } + + /* + * Could not find empty entry, pick one with a round robin selection. + * Search all entries in the two groups. Note that the first time + * we get here, we start with entry 1 so the initializer + * can be common with the SLB castout code. + */ + + /* This assumes we never castout when initializing the stab. */ + PMC_SW_PROCESSOR(stab_capacity_castouts); + + castout_entry = get_paca()->xStab_data.next_round_robin; + for(i = 0; i < 16; i++) { + if(castout_entry < 8) { + global_entry = (esid & 0x1f) << 3; + ste = (STE *)(stab | ((esid & 0x1f) << 7)); + castout_ste = ste + castout_entry; + } else { + global_entry = ((~esid) & 0x1f) << 3; + ste = (STE *)(stab | (((~esid) & 0x1f) << 7)); + castout_ste = ste + (castout_entry - 8); + } + + if((((castout_ste->dw0.dw0.esid) >> 32) == 0) || + (((castout_ste->dw0.dw0.esid) & 0xffffffff) > 0)) { + /* Found an entry to castout. It is either a user */ + /* region, or a secondary kernel segment. */ + break; + } + + castout_entry = (castout_entry + 1) & 0xf; + } + + get_paca()->xStab_data.next_round_robin = (castout_entry + 1) & 0xf; + + /* Modify the old entry to the new value. */ + + /* Force previous translations to complete. DRENG */ + __asm__ __volatile__ ("isync" : : : "memory" ); + + castout_ste->dw0.dw0.v = 0; + __asm__ __volatile__ ("sync" : : : "memory" ); /* Order update */ + castout_ste->dw1.dw1.vsid = vsid; + __asm__ __volatile__ ("eieio" : : : "memory" ); /* Order update */ + old_esid = castout_ste->dw0.dw0.esid; + castout_ste->dw0.dw0.esid = esid; + castout_ste->dw0.dw0.v = 1; + castout_ste->dw0.dw0.kp = 1; + __asm__ __volatile__ ("slbie %0" : : "r" (old_esid << SID_SHIFT)); + /* Ensure completion of slbie */ + __asm__ __volatile__ ("sync" : : : "memory" ); + + return(global_entry | (castout_entry & 0x7)); +} + +/* + * Create a segment buffer entry for the given esid/vsid pair. + */ +inline void make_slbe(unsigned long esid, unsigned long vsid, int large) +{ + unsigned long entry, castout_entry; + slb_dword0 castout_esid_data; + union { + unsigned long word0; + slb_dword0 data; + } esid_data; + union { + unsigned long word0; + slb_dword1 data; + } vsid_data; + + /* + * Find an empty entry, if one exists. + */ + for(entry = 0; entry < naca->slb_size; entry++) { + __asm__ __volatile__("slbmfee %0,%1" + : "=r" (esid_data) : "r" (entry)); + if(!esid_data.data.v) { + /* + * Write the new SLB entry. + */ + vsid_data.word0 = 0; + vsid_data.data.vsid = vsid; + vsid_data.data.kp = 1; + if (large) + vsid_data.data.l = 1; + + esid_data.word0 = 0; + esid_data.data.esid = esid; + esid_data.data.v = 1; + esid_data.data.index = entry; + + /* slbie not needed as no previous mapping existed. */ + /* Order update */ + __asm__ __volatile__ ("isync" : : : "memory"); + __asm__ __volatile__ ("slbmte %0,%1" + : : "r" (vsid_data), + "r" (esid_data)); + /* Order update */ + __asm__ __volatile__ ("isync" : : : "memory"); + return; + } + } + + /* + * Could not find empty entry, pick one with a round robin selection. + */ + + PMC_SW_PROCESSOR(stab_capacity_castouts); + + castout_entry = get_paca()->xStab_data.next_round_robin; + __asm__ __volatile__("slbmfee %0,%1" + : "=r" (castout_esid_data) + : "r" (castout_entry)); + + entry = castout_entry; + castout_entry++; + if(castout_entry >= naca->slb_size) { + castout_entry = 1; + } + get_paca()->xStab_data.next_round_robin = castout_entry; + + /* Invalidate the old entry. */ + castout_esid_data.v = 0; /* Set the class to 0 */ + /* slbie not needed as the previous mapping is still valid. */ + __asm__ __volatile__("slbie %0" : : "r" (castout_esid_data)); + + /* + * Write the new SLB entry. + */ + vsid_data.word0 = 0; + vsid_data.data.vsid = vsid; + vsid_data.data.kp = 1; + if (large) + vsid_data.data.l = 1; + + esid_data.word0 = 0; + esid_data.data.esid = esid; + esid_data.data.v = 1; + esid_data.data.index = entry; + + __asm__ __volatile__ ("isync" : : : "memory"); /* Order update */ + __asm__ __volatile__ ("slbmte %0,%1" + : : "r" (vsid_data), "r" (esid_data)); + __asm__ __volatile__ ("isync" : : : "memory" ); /* Order update */ +} + +/* + * Allocate a segment table entry for the given ea. + */ +int ste_allocate ( unsigned long ea, + unsigned long trap) +{ + unsigned long vsid, esid; + int kernel_segment = 0; + + PMC_SW_PROCESSOR(stab_faults); + + /* Check for invalid effective addresses. */ + if (!IS_VALID_EA(ea)) { + return 1; + } + + /* Kernel or user address? */ + if (REGION_ID(ea) >= KERNEL_REGION_ID) { + kernel_segment = 1; + vsid = get_kernel_vsid( ea ); + } else { + struct mm_struct *mm = current->mm; + if ( mm ) { + vsid = get_vsid(mm->context, ea ); + } else { + return 1; + } + } + + esid = GET_ESID(ea); + if (trap == 0x380 || trap == 0x480) { +#ifndef CONFIG_PPC_ISERIES + if (REGION_ID(ea) == KERNEL_REGION_ID) + make_slbe(esid, vsid, 1); + else +#endif + make_slbe(esid, vsid, 0); + } else { + unsigned char top_entry, stab_entry, *segments; + + stab_entry = make_ste(get_paca()->xStab_data.virt, esid, vsid); + PMC_SW_PROCESSOR_A(stab_entry_use, stab_entry & 0xf); + + segments = get_paca()->xSegments; + top_entry = segments[0]; + if(!kernel_segment && top_entry < (STAB_CACHE_SIZE - 1)) { + top_entry++; + segments[top_entry] = stab_entry; + if(top_entry == STAB_CACHE_SIZE - 1) top_entry = 0xff; + segments[0] = top_entry; + } + } + + return(0); +} + +/* + * Flush all entries from the segment table of the current processor. + * Kernel and Bolted entries are not removed as we cannot tolerate + * faults on those addresses. + */ + +#define STAB_PRESSURE 0 + +void flush_stab(void) +{ + STE *stab = (STE *) get_paca()->xStab_data.virt; + unsigned char *segments = get_paca()->xSegments; + unsigned long flags, i; + + if(!__is_processor(PV_POWER4)) { + unsigned long entry; + STE *ste; + + /* Force previous translations to complete. DRENG */ + __asm__ __volatile__ ("isync" : : : "memory"); + + __save_and_cli(flags); + if(segments[0] != 0xff && !STAB_PRESSURE) { + for(i = 1; i <= segments[0]; i++) { + ste = stab + segments[i]; + ste->dw0.dw0.v = 0; + PMC_SW_PROCESSOR(stab_invalidations); + } + } else { + /* Invalidate all entries. */ + ste = stab; + + /* Never flush the first entry. */ + ste += 1; + for(entry = 1; + entry < (PAGE_SIZE / sizeof(STE)); + entry++, ste++) { + unsigned long ea; + ea = ste->dw0.dw0.esid << SID_SHIFT; + if (STAB_PRESSURE || ea < KERNELBASE) { + ste->dw0.dw0.v = 0; + PMC_SW_PROCESSOR(stab_invalidations); + } + } + } + + *((unsigned long *)segments) = 0; + __restore_flags(flags); + + /* Invalidate the SLB. */ + /* Force invals to complete. */ + __asm__ __volatile__ ("sync" : : : "memory"); + /* Flush the SLB. */ + __asm__ __volatile__ ("slbia" : : : "memory"); + /* Force flush to complete. */ + __asm__ __volatile__ ("sync" : : : "memory"); + } else { + unsigned long flags; + + PMC_SW_PROCESSOR(stab_invalidations); + + __save_and_cli(flags); + __asm__ __volatile__("isync; slbia; isync":::"memory"); + __restore_flags(flags); + } +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/sys32.S linuxppc64_2_4/arch/ppc64/kernel/sys32.S --- ../kernel.org/linux/arch/ppc64/kernel/sys32.S Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/sys32.S Mon Jun 18 13:47:18 2001 @@ -0,0 +1,243 @@ +/* + * sys32.S: I-cache tricks for 32-bit compatability layer simple + * conversions. + * + * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz) + * Copyright (C) 2000 Ken Aaker (kdaaker@rchland.vnet.ibm.com) + * For PPC ABI convention is parms in Regs 3-10. + * The router in entry.S clears the high 32 bits in the first + * 4 arguments (R3-R6). + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "ppc_asm.h" +#include +#include + +/* NOTE: call as jump breaks return stack, we have to avoid that */ + + .text + +_GLOBAL(sys32_mmap) + clrldi r7, r7, 32 /* int fd parm */ + clrldi r8, r8, 32 /* off_t offset parm */ + b .sys_mmap + +_GLOBAL(sys32_lseek) + extsw r4,r4 /* sign extend off_t offset parm */ + b .sys_lseek + +_GLOBAL(sys32_chmod) +/* Ken Aaker.. hmmm maybe I don't need to do anything here */ + b .sys_chmod + +_GLOBAL(sys32_mknod) +/* Ken Aaker.. hmmm maybe I don't need to do anything here */ + b .sys_mknod + +_GLOBAL(sys32_sendto) + clrldi r7, r7, 32 /* struct sockaddr *addr parm */ + clrldi r8, r8, 32 /* int addr_len parm */ + b .sys_sendto + +_GLOBAL(sys32_recvfrom) + clrldi r7, r7, 32 /* struct sockaddr *addr parm */ + clrldi r8, r8, 32 /* int *addr_len parm */ + b .sys_recvfrom + +_GLOBAL(sys32_getsockopt) + clrldi r7, r7, 32 /* int *optlen parm */ + b .sys_getsockopt + +_GLOBAL(sys32_bdflush) + extsw r4,r4 /* sign extend long data parm */ + b .sys_bdflush + +_GLOBAL(sys32_mmap2) + clrldi r7, r7, 32 /* unsigned long fd parm */ + extsw r8, r8 /* off_t offset */ + b .sys_mmap + +_GLOBAL(sys32_socketcall) /* r3=call, r4=args */ + cmpwi r3, 1 + blt- .do_einval + cmpwi r3, 17 + bgt- .do_einval + subi r3, r3, 1 /* index into socketcall_table vectors and jmp */ + sldi r3, r3, 3 /* each entry is 8 bytes */ + LOADADDR(r10,.socketcall_table_begin) + ldx r10, r10, r3 + mtctr r10 + bctr + +/* Socket function vectored fix ups for 32 bit */ +_STATIC(do_sys_socket) /* sys_socket(int, int, int) */ + mr r10,r4 + lwa r3,0(r10) + lwa r4,4(r10) + lwa r5,8(r10) + b .sys_socket + +_STATIC(do_sys_bind) /* sys_bind(int fd, struct sockaddr *, int) */ + mr r10,r4 + lwa r3,0(r10) + lwz r4,4(r10) + lwa r5,8(r10) + b .sys_bind + +_STATIC(do_sys_connect) /* sys_connect(int, struct sockaddr *, int) */ + mr r10,r4 + lwa r3,0(r10) + lwz r4,4(r10) + lwa r5,8(r10) + b .sys_connect + +_STATIC(do_sys_listen) /* sys_listen(int, int) */ + mr r10,r4 + lwa r3,0(r10) + lwa r4,4(r10) + b .sys_listen + +_STATIC(do_sys_accept) /* sys_accept(int, struct sockaddr *, int *) */ + mr r10,r4 + lwa r3,0(r10) + lwz r4,4(r10) + lwz r5,8(r10) + b .sys_accept + +_STATIC(do_sys_getsockname) /* sys_getsockname(int, struct sockaddr *, int *) */ + mr r10,r4 + lwa r3,0(r10) + lwz r4,4(r10) + lwz r5,8(r10) + b .sys_getsockname + +_STATIC(do_sys_getpeername) /* sys_getpeername(int, struct sockaddr *, int *) */ + mr r10,r4 + lwa r3,0(r10) + lwz r4,4(r10) + lwz r5,8(r10) + b .sys_getpeername + +_STATIC(do_sys_socketpair) /* sys_socketpair(int, int, int, int *) */ + mr r10,r4 + lwa r3,0(r10) + lwa r4,4(r10) + lwa r5,8(r10) + lwz r6,12(r10) + b .sys_socketpair + +_STATIC(do_sys_send) /* sys_send(int, void *, size_t, unsigned int) */ + mr r10,r4 + lwa r3,0(r10) + lwz r4,4(r10) + lwz r5,8(r10) + lwz r6,12(r10) + b .sys_send + +_STATIC(do_sys_recv) /* sys_recv(int, void *, size_t, unsigned int) */ + mr r10,r4 + lwa r3,0(r10) + lwz r4,4(r10) + lwz r5,8(r10) + lwz r6,12(r10) + b .sys_recv + +_STATIC(do_sys_sendto) /* sys32_sendto(int, u32, __kernel_size_t32, unsigned int, u32, int) */ + mr r10,r4 + lwa r3,0(r10) + lwz r4,4(r10) + lwz r5,8(r10) + lwz r6,12(r10) + lwz r7,16(r10) + lwa r8,20(r10) + b .sys32_sendto + +_STATIC(do_sys_recvfrom) /* sys32_recvfrom(int, u32, __kernel_size_t32, unsigned int, u32, u32) */ + mr r10,r4 + lwa r3,0(r10) + lwz r4,4(r10) + lwz r5,8(r10) + lwz r6,12(r10) + lwz r7,16(r10) + lwz r8,20(r10) + b .sys32_recvfrom + +_STATIC(do_sys_shutdown) /* sys_shutdown(int, int) */ + mr r10,r4 + lwa r3,0(r10) + lwa r4,4(r10) + b .sys_shutdown + +_STATIC(do_sys_setsockopt) /* sys32_setsockopt(int, int, int, char *, int) */ + mr r10,r4 + lwa r3,0(r10) + lwa r4,4(r10) + lwa r5,8(r10) + lwz r6,12(r10) + lwa r7,16(r10) + b .sys32_setsockopt + +_STATIC(do_sys_getsockopt) /* sys32_getsockopt(int, int, int, u32, u32) */ + mr r10,r4 + lwa r3,0(r10) + lwa r4,4(r10) + lwa r5,8(r10) + lwz r6,12(r10) + lwz r7,16(r10) + b .sys32_getsockopt + +_STATIC(do_sys_sendmsg) /* sys32_sendmsg(int, struct msghdr32 *, unsigned int) */ + mr r10,r4 + lwa r3,0(r10) + lwz r4,4(r10) + lwa r5,8(r10) + b .sys32_sendmsg + +_STATIC(do_sys_recvmsg) /* sys32_recvmsg(int, struct msghdr32 *, unsigned int) */ + mr r10,r4 + lwa r3,0(r10) + lwz r4,4(r10) + lwa r5,8(r10) + b .sys32_recvmsg + +_STATIC(do_einval) + li r3,-EINVAL + b .ret_from_syscall_1 +_STATIC(do_efault) + li r3,-EFAULT + b .ret_from_syscall_1 + + .data + .align 8 +_GLOBAL(socketcall_table_begin) + .llong .do_sys_socket + .llong .do_sys_bind + .llong .do_sys_connect + .llong .do_sys_listen + .llong .do_sys_accept + .llong .do_sys_getsockname + .llong .do_sys_getpeername + .llong .do_sys_socketpair + .llong .do_sys_send + .llong .do_sys_recv + .llong .do_sys_sendto + .llong .do_sys_recvfrom + .llong .do_sys_shutdown + .llong .do_sys_setsockopt + .llong .do_sys_getsockopt + .llong .do_sys_sendmsg + .llong .do_sys_recvmsg +_GLOBAL(socketcall_table_end) + .section __ex_table,"a" + .align 3 + .llong .socketcall_table_begin + .llong 0 + .llong .socketcall_table_end + .llong .do_efault + .previous diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/sys_ppc32.c linuxppc64_2_4/arch/ppc64/kernel/sys_ppc32.c --- ../kernel.org/linux/arch/ppc64/kernel/sys_ppc32.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/sys_ppc32.c Wed Nov 28 19:03:54 2001 @@ -0,0 +1,4809 @@ +/* + * sys_ppc32.c: Conversion between 32bit and 64bit native syscalls. + * + * Copyright (C) 2001 IBM + * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) + * + * These routines maintain argument size conversion between 32bit and 64bit + * environment. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include +#include +#include + +extern unsigned long wall_jiffies; +#define USEC_PER_SEC (1000000) + +/* + * These are the flags in the MSR that the user is allowed to change + * by modifying the saved value of the MSR on the stack. SE and BE + * should not be in this list since gdb may want to change these. I.e, + * you should be able to step out of a signal handler to see what + * instruction executes next after the signal handler completes. + * Alternately, if you stepped into a signal handler, you should be + * able to continue 'til the next breakpoint from within the signal + * handler, even if the handler returns. + */ +#define MSR_USERCHANGE (MSR_FE0 | MSR_FE1) + +/* Use this to get at 32-bit user passed pointers. */ +/* Things to consider: the low-level assembly stub does + srl x, 0, x for first four arguments, so if you have + pointer to something in the first four arguments, just + declare it as a pointer, not u32. On the other side, + arguments from 5th onwards should be declared as u32 + for pointers, and need AA() around each usage. + A() macro should be used for places where you e.g. + have some internal variable u32 and just want to get + rid of a compiler warning. AA() has to be used in + places where you want to convert a function argument + to 32bit pointer or when you e.g. access pt_regs + structure and want to consider 32bit registers only. + - + */ +#define A(__x) ((unsigned long)(__x)) +#define AA(__x) \ +({ unsigned long __ret; \ + __asm__ ("clrldi %0, %0, 32" \ + : "=r" (__ret) \ + : "0" (__x)); \ + __ret; \ +}) + + + +/* In order to reduce some races, while at the same time doing additional + * checking and hopefully speeding things up, we copy filenames to the + * kernel data space before using them.. + * + * POSIX.1 2.4: an empty pathname is invalid (ENOENT). + */ +static inline int do_getname32(const char *filename, char *page) +{ + int retval; + + /* 32bit pointer will be always far below TASK_SIZE :)) */ + retval = strncpy_from_user((char *)page, (char *)filename, PAGE_SIZE); + if (retval > 0) { + if (retval < PAGE_SIZE) + return 0; + return -ENAMETOOLONG; + } else if (!retval) + retval = -ENOENT; + return retval; +} + +char * getname32(const char *filename) +{ + char *tmp, *result; + + result = ERR_PTR(-ENOMEM); + tmp = __getname(); + if (tmp) { + int retval = do_getname32(filename, tmp); + + result = tmp; + if (retval < 0) { + putname(tmp); + result = ERR_PTR(retval); + } + } + return result; +} + + + +extern asmlinkage long sys_utime(char * filename, struct utimbuf * times); + +struct utimbuf32 { + __kernel_time_t32 actime, modtime; +}; + +asmlinkage long sys32_utime(char * filename, struct utimbuf32 *times) +{ + struct utimbuf t; + mm_segment_t old_fs; + int ret; + char *filenam; + + PPCDBG(PPCDBG_SYS32NI, "sys32_utime - running - filename=%s, times=%p - pid=%ld, comm=%s \n", filename, times, current->pid, current->comm); + + if (!times) + return sys_utime(filename, NULL); + if (get_user(t.actime, ×->actime) || __get_user(t.modtime, ×->modtime)) + return -EFAULT; + filenam = getname32(filename); + + ret = PTR_ERR(filenam); + if (!IS_ERR(filenam)) { + old_fs = get_fs(); + set_fs (KERNEL_DS); + ret = sys_utime(filenam, &t); + set_fs (old_fs); + putname (filenam); + } + + return ret; +} + + + +struct iovec32 { u32 iov_base; __kernel_size_t32 iov_len; }; + +typedef ssize_t (*IO_fn_t)(struct file *, char *, size_t, loff_t *); + +static long do_readv_writev32(int type, struct file *file, + const struct iovec32 *vector, u32 count) +{ + unsigned long tot_len; + struct iovec iovstack[UIO_FASTIOV]; + struct iovec *iov=iovstack, *ivp; + struct inode *inode; + long retval, i; + IO_fn_t fn; + + /* First get the "struct iovec" from user memory and + * verify all the pointers + */ + if (!count) + return 0; + if(verify_area(VERIFY_READ, vector, sizeof(struct iovec32)*count)) + return -EFAULT; + if (count > UIO_MAXIOV) + return -EINVAL; + if (count > UIO_FASTIOV) { + iov = kmalloc(count*sizeof(struct iovec), GFP_KERNEL); + if (!iov) + return -ENOMEM; + } + + tot_len = 0; + i = count; + ivp = iov; + while(i > 0) { + u32 len; + u32 buf; + + __get_user(len, &vector->iov_len); + __get_user(buf, &vector->iov_base); + tot_len += len; + ivp->iov_base = (void *)A(buf); + ivp->iov_len = (__kernel_size_t) len; + vector++; + ivp++; + i--; + } + + inode = file->f_dentry->d_inode; + /* VERIFY_WRITE actually means a read, as we write to user space */ + retval = locks_verify_area((type == VERIFY_WRITE + ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE), + inode, file, file->f_pos, tot_len); + if (retval) { + if (iov != iovstack) + kfree(iov); + return retval; + } + + /* Then do the actual IO. Note that sockets need to be handled + * specially as they have atomicity guarantees and can handle + * iovec's natively + */ + if (inode->i_sock) { + int err; + err = sock_readv_writev(type, inode, file, iov, count, tot_len); + if (iov != iovstack) + kfree(iov); + return err; + } + + if (!file->f_op) { + if (iov != iovstack) + kfree(iov); + return -EINVAL; + } + /* VERIFY_WRITE actually means a read, as we write to user space */ + fn = file->f_op->read; + if (type == VERIFY_READ) + fn = (IO_fn_t) file->f_op->write; + ivp = iov; + while (count > 0) { + void * base; + int len, nr; + + base = ivp->iov_base; + len = ivp->iov_len; + ivp++; + count--; + nr = fn(file, base, len, &file->f_pos); + if (nr < 0) { + if (retval) + break; + retval = nr; + break; + } + retval += nr; + if (nr != len) + break; + } + if (iov != iovstack) + kfree(iov); + return retval; +} + +asmlinkage long sys32_readv(u32 fd, struct iovec32 *vector, u32 count) +{ + struct file *file; + long ret = -EBADF; + + PPCDBG(PPCDBG_SYS32, "sys32_readv - entered - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + + file = fget(fd); + if(!file) + goto bad_file; + + if (file->f_op && (file->f_mode & FMODE_READ) && + (file->f_op->readv || file->f_op->read)) + ret = do_readv_writev32(VERIFY_WRITE, file, vector, count); + fput(file); + +bad_file: + PPCDBG(PPCDBG_SYS32, "sys32_readv - exited - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + return ret; +} + +asmlinkage long sys32_writev(u32 fd, struct iovec32 *vector, u32 count) +{ + struct file *file; + int ret = -EBADF; + + PPCDBG(PPCDBG_SYS32, "sys32_writev - entered - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + + file = fget(fd); + if(!file) + goto bad_file; + if (file->f_op && (file->f_mode & FMODE_WRITE) && + (file->f_op->writev || file->f_op->write)) + ret = do_readv_writev32(VERIFY_READ, file, vector, count); + fput(file); + +bad_file: + PPCDBG(PPCDBG_SYS32, "sys32_writev - exited - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + return ret; +} + + + +static inline int get_flock(struct flock *kfl, struct flock32 *ufl) +{ + int err; + + err = get_user(kfl->l_type, &ufl->l_type); + err |= __get_user(kfl->l_whence, &ufl->l_whence); + err |= __get_user(kfl->l_start, &ufl->l_start); + err |= __get_user(kfl->l_len, &ufl->l_len); + err |= __get_user(kfl->l_pid, &ufl->l_pid); + return err; +} + +static inline int put_flock(struct flock *kfl, struct flock32 *ufl) +{ + int err; + + err = __put_user(kfl->l_type, &ufl->l_type); + err |= __put_user(kfl->l_whence, &ufl->l_whence); + err |= __put_user(kfl->l_start, &ufl->l_start); + err |= __put_user(kfl->l_len, &ufl->l_len); + err |= __put_user(kfl->l_pid, &ufl->l_pid); + return err; +} + +extern asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg); +asmlinkage long sys32_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case F_GETLK: + case F_SETLK: + case F_SETLKW: + { + struct flock f; + mm_segment_t old_fs; + long ret; + + if(get_flock(&f, (struct flock32 *)arg)) + return -EFAULT; + old_fs = get_fs(); set_fs (KERNEL_DS); + ret = sys_fcntl(fd, cmd, (unsigned long)&f); + set_fs (old_fs); + if(put_flock(&f, (struct flock32 *)arg)) + return -EFAULT; + return ret; + } + default: + return sys_fcntl(fd, cmd, (unsigned long)arg); + } +} + +struct ncp_mount_data32 { + int version; + unsigned int ncp_fd; + __kernel_uid_t32 mounted_uid; + __kernel_pid_t32 wdog_pid; + unsigned char mounted_vol[NCP_VOLNAME_LEN + 1]; + unsigned int time_out; + unsigned int retry_count; + unsigned int flags; + __kernel_uid_t32 uid; + __kernel_gid_t32 gid; + __kernel_mode_t32 file_mode; + __kernel_mode_t32 dir_mode; +}; + +static void *do_ncp_super_data_conv(void *raw_data) +{ + struct ncp_mount_data *n = (struct ncp_mount_data *)raw_data; + struct ncp_mount_data32 *n32 = (struct ncp_mount_data32 *)raw_data; + + n->dir_mode = n32->dir_mode; + n->file_mode = n32->file_mode; + n->gid = n32->gid; + n->uid = n32->uid; + memmove (n->mounted_vol, n32->mounted_vol, (sizeof (n32->mounted_vol) + 3 * sizeof (unsigned int))); + n->wdog_pid = n32->wdog_pid; + n->mounted_uid = n32->mounted_uid; + return raw_data; +} + +struct smb_mount_data32 { + int version; + __kernel_uid_t32 mounted_uid; + __kernel_uid_t32 uid; + __kernel_gid_t32 gid; + __kernel_mode_t32 file_mode; + __kernel_mode_t32 dir_mode; +}; + +static void *do_smb_super_data_conv(void *raw_data) +{ + struct smb_mount_data *s = (struct smb_mount_data *)raw_data; + struct smb_mount_data32 *s32 = (struct smb_mount_data32 *)raw_data; + + s->version = s32->version; + s->mounted_uid = s32->mounted_uid; + s->uid = s32->uid; + s->gid = s32->gid; + s->file_mode = s32->file_mode; + s->dir_mode = s32->dir_mode; + return raw_data; +} + +static int copy_mount_stuff_to_kernel(const void *user, unsigned long *kernel) +{ + int i; + unsigned long page; + struct vm_area_struct *vma; + + *kernel = 0; + if(!user) + return 0; + vma = find_vma(current->mm, (unsigned long)user); + if(!vma || (unsigned long)user < vma->vm_start) + return -EFAULT; + if(!(vma->vm_flags & VM_READ)) + return -EFAULT; + i = vma->vm_end - (unsigned long) user; + if(PAGE_SIZE <= (unsigned long) i) + i = PAGE_SIZE - 1; + if(!(page = __get_free_page(GFP_KERNEL))) + return -ENOMEM; + if(copy_from_user((void *) page, user, i)) { + free_page(page); + return -EFAULT; + } + *kernel = page; + return 0; +} + +#define SMBFS_NAME "smbfs" +#define NCPFS_NAME "ncpfs" + +asmlinkage long sys32_mount(char *dev_name, char *dir_name, char *type, unsigned long new_flags, u32 data) +{ + unsigned long type_page = 0; + unsigned long data_page = 0; + unsigned long dev_page = 0; + unsigned long dir_page = 0; + int err, is_smb, is_ncp; + + PPCDBG(PPCDBG_SYS32, "sys32_mount - entered - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + + is_smb = is_ncp = 0; + + err = copy_mount_stuff_to_kernel((const void *)type, &type_page); + if (err) + goto out; + + if (!type_page) { + err = -EINVAL; + goto out; + } + + is_smb = !strcmp((char *)type_page, SMBFS_NAME); + is_ncp = !strcmp((char *)type_page, NCPFS_NAME); + + err = copy_mount_stuff_to_kernel((const void *)AA(data), &data_page); + if (err) + goto type_out; + + err = copy_mount_stuff_to_kernel(dev_name, &dev_page); + if (err) + goto data_out; + + err = copy_mount_stuff_to_kernel(dir_name, &dir_page); + if (err) + goto dev_out; + + if (!is_smb && !is_ncp) { + lock_kernel(); + err = do_mount((char*)dev_page, (char*)dir_page, + (char*)type_page, new_flags, (char*)data_page); + unlock_kernel(); + } else { + if (is_ncp) + do_ncp_super_data_conv((void *)data_page); + else + do_smb_super_data_conv((void *)data_page); + + lock_kernel(); + err = do_mount((char*)dev_page, (char*)dir_page, + (char*)type_page, new_flags, (char*)data_page); + unlock_kernel(); + } + free_page(dir_page); + +dev_out: + free_page(dev_page); + +data_out: + free_page(data_page); + +type_out: + free_page(type_page); + +out: + + PPCDBG(PPCDBG_SYS32, "sys32_mount - exited - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + + return err; +} + +struct dqblk32 { + __u32 dqb_bhardlimit; + __u32 dqb_bsoftlimit; + __u32 dqb_curblocks; + __u32 dqb_ihardlimit; + __u32 dqb_isoftlimit; + __u32 dqb_curinodes; + __kernel_time_t32 dqb_btime; + __kernel_time_t32 dqb_itime; +}; + + +extern asmlinkage long sys_quotactl(int cmd, const char *special, int id, caddr_t addr); + +/* Note: it is necessary to treat cmd and id as unsigned ints, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_quotactl(u32 cmd_parm, const char *special, u32 id_parm, unsigned long addr) +{ + int cmd = (int)cmd_parm; + int id = (int)id_parm; + int cmds = cmd >> SUBCMDSHIFT; + int err; + struct dqblk d; + mm_segment_t old_fs; + char *spec; + + PPCDBG(PPCDBG_SYS32, "sys32_quotactl - entered - pid=%ld current=%lx comm=%s \n", + current->pid, current, current->comm); + + switch (cmds) { + case Q_GETQUOTA: + break; + case Q_SETQUOTA: + case Q_SETUSE: + case Q_SETQLIM: + if (copy_from_user (&d, (struct dqblk32 *)addr, + sizeof (struct dqblk32))) + return -EFAULT; + d.dqb_itime = ((struct dqblk32 *)&d)->dqb_itime; + d.dqb_btime = ((struct dqblk32 *)&d)->dqb_btime; + break; + default: + return sys_quotactl(cmd, special, + id, (caddr_t)addr); + } + spec = getname32 (special); + err = PTR_ERR(spec); + if (IS_ERR(spec)) return err; + old_fs = get_fs (); + set_fs (KERNEL_DS); + err = sys_quotactl(cmd, (const char *)spec, id, (caddr_t)&d); + set_fs (old_fs); + putname (spec); + if (cmds == Q_GETQUOTA) { + __kernel_time_t b = d.dqb_btime, i = d.dqb_itime; + ((struct dqblk32 *)&d)->dqb_itime = i; + ((struct dqblk32 *)&d)->dqb_btime = b; + if (copy_to_user ((struct dqblk32 *)addr, &d, + sizeof (struct dqblk32))) + return -EFAULT; + } + + PPCDBG(PPCDBG_SYS32, "sys32_quotactl - exited - pid=%ld current=%lx comm=%s \n", + current->pid, current, current->comm); + + return err; +} + + + +/* readdir & getdents */ +#define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de))) +#define ROUND_UP(x) (((x)+sizeof(u32)-1) & ~(sizeof(u32)-1)) + +struct old_linux_dirent32 { + u32 d_ino; + u32 d_offset; + unsigned short d_namlen; + /* unsigned char d_type; */ + char d_name[1]; +}; + +struct readdir_callback32 { + struct old_linux_dirent32 * dirent; + int count; +}; + +static int fillonedir(void * __buf, const char * name, int namlen, + off_t offset, ino_t ino, unsigned int d_type) +{ + struct readdir_callback32 * buf = (struct readdir_callback32 *) __buf; + struct old_linux_dirent32 * dirent; + + if (buf->count) + return -EINVAL; + buf->count++; + dirent = buf->dirent; + put_user(ino, &dirent->d_ino); + put_user(offset, &dirent->d_offset); + put_user(namlen, &dirent->d_namlen); + copy_to_user(dirent->d_name, name, namlen); + put_user(0, dirent->d_name + namlen); + return 0; +} + +asmlinkage int old32_readdir(unsigned int fd, struct old_linux_dirent32 *dirent, unsigned int count) +{ + int error = -EBADF; + struct file * file; + struct readdir_callback32 buf; + + file = fget(fd); + if (!file) + goto out; + + buf.count = 0; + buf.dirent = dirent; + + error = vfs_readdir(file, (filldir_t)fillonedir, &buf); + if (error < 0) + goto out_putf; + error = buf.count; + +out_putf: + fput(file); +out: + return error; +} + +#if 0 +struct linux_dirent32 { + u32 d_ino; + u32 d_off; + unsigned short d_reclen; + char d_name[1]; +}; +#else +struct linux_dirent32 { + u32 d_ino; + u32 d_off; + unsigned short d_reclen; + /* unsigned char d_type; */ + char d_name[256]; +}; +#endif + +struct getdents_callback32 { + struct linux_dirent32 * current_dir; + struct linux_dirent32 * previous; + int count; + int error; +}; + +static int +filldir(void * __buf, const char * name, int namlen, off_t offset, ino_t ino, + unsigned int d_type) +{ + struct linux_dirent32 * dirent; + struct getdents_callback32 * buf = (struct getdents_callback32 *) __buf; + int reclen = ROUND_UP(NAME_OFFSET(dirent) + namlen + 1); + + buf->error = -EINVAL; /* only used if we fail.. */ + if (reclen > buf->count) + return -EINVAL; + dirent = buf->previous; + if (dirent) + put_user(offset, &dirent->d_off); + dirent = buf->current_dir; + buf->previous = dirent; + put_user(ino, &dirent->d_ino); + put_user(reclen, &dirent->d_reclen); + /* put_user(d_type, &dirent->d_type); */ + copy_to_user(dirent->d_name, name, namlen); + put_user(0, dirent->d_name + namlen); + ((char *) dirent) += reclen; + buf->current_dir = dirent; + buf->count -= reclen; + return 0; +} + +asmlinkage long sys32_getdents(unsigned int fd, struct linux_dirent32 *dirent, unsigned int count) +{ + struct file * file; + struct linux_dirent32 * lastdirent; + struct getdents_callback32 buf; + int error = -EBADF; + + PPCDBG(PPCDBG_SYS32NI, "sys32_getdents - running - fd=%x, pid=%ld, comm=%s \n", fd, current->pid, current->comm); + + file = fget(fd); + if (!file) + goto out; + + buf.current_dir = dirent; + buf.previous = NULL; + buf.count = count; + buf.error = 0; + + error = vfs_readdir(file, (filldir_t)filldir, &buf); + if (error < 0) + goto out_putf; + lastdirent = buf.previous; + error = buf.error; + if(lastdirent) { + put_user(file->f_pos, &lastdirent->d_off); + error = count - buf.count; + } + out_putf: + fput(file); + + out: + return error; +} +/* end of readdir & getdents */ + + + +/* 32-bit timeval and related flotsam. */ + +struct timeval32 +{ + int tv_sec, tv_usec; +}; + +struct itimerval32 +{ + struct timeval32 it_interval; + struct timeval32 it_value; +}; + + + + +/* + * Ooo, nasty. We need here to frob 32-bit unsigned longs to + * 64-bit unsigned longs. + */ +static inline int +get_fd_set32(unsigned long n, unsigned long *fdset, u32 *ufdset) +{ + if (ufdset) { + unsigned long odd; + + if (verify_area(VERIFY_WRITE, ufdset, n*sizeof(u32))) + return -EFAULT; + + odd = n & 1UL; + n &= ~1UL; + while (n) { + unsigned long h, l; + __get_user(l, ufdset); + __get_user(h, ufdset+1); + ufdset += 2; + *fdset++ = h << 32 | l; + n -= 2; + } + if (odd) + __get_user(*fdset, ufdset); + } else { + /* Tricky, must clear full unsigned long in the + * kernel fdset at the end, this makes sure that + * actually happens. + */ + memset(fdset, 0, ((n + 1) & ~1)*sizeof(u32)); + } + return 0; +} + +static inline void +set_fd_set32(unsigned long n, u32 *ufdset, unsigned long *fdset) +{ + unsigned long odd; + + if (!ufdset) + return; + + odd = n & 1UL; + n &= ~1UL; + while (n) { + unsigned long h, l; + l = *fdset++; + h = l >> 32; + __put_user(l, ufdset); + __put_user(h, ufdset+1); + ufdset += 2; + n -= 2; + } + if (odd) + __put_user(*fdset, ufdset); +} + + + +#define MAX_SELECT_SECONDS ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) + +asmlinkage long sys32_select(int n, u32 *inp, u32 *outp, u32 *exp, u32 tvp_x) +{ + fd_set_bits fds; + struct timeval32 *tvp = (struct timeval32 *)AA(tvp_x); + char *bits; + unsigned long nn; + long timeout; + int ret, size; + + PPCDBG(PPCDBG_SYS32X, "sys32_select - entered - n=%x, inp=%p, outp=%p - pid=%ld comm=%s \n", n, inp, outp, current->pid, current->comm); + + timeout = MAX_SCHEDULE_TIMEOUT; + if (tvp) { + time_t sec, usec; + if ((ret = verify_area(VERIFY_READ, tvp, sizeof(*tvp))) + || (ret = __get_user(sec, &tvp->tv_sec)) + || (ret = __get_user(usec, &tvp->tv_usec))) + goto out_nofds; + + ret = -EINVAL; + if(sec < 0 || usec < 0) + goto out_nofds; + + if ((unsigned long) sec < MAX_SELECT_SECONDS) { + timeout = (usec + 1000000/HZ - 1) / (1000000/HZ); + timeout += sec * (unsigned long) HZ; + } + } + + ret = -EINVAL; + if (n < 0) + goto out_nofds; + if (n > current->files->max_fdset) + n = current->files->max_fdset; + + /* + * We need 6 bitmaps (in/out/ex for both incoming and outgoing), + * since we used fdset we need to allocate memory in units of + * long-words. + */ + ret = -ENOMEM; + size = FDS_BYTES(n); + bits = kmalloc(6 * size, GFP_KERNEL); + if (!bits) + goto out_nofds; + fds.in = (unsigned long *) bits; + fds.out = (unsigned long *) (bits + size); + fds.ex = (unsigned long *) (bits + 2*size); + fds.res_in = (unsigned long *) (bits + 3*size); + fds.res_out = (unsigned long *) (bits + 4*size); + fds.res_ex = (unsigned long *) (bits + 5*size); + + nn = (n + 8*sizeof(u32) - 1) / (8*sizeof(u32)); + if ((ret = get_fd_set32(nn, fds.in, inp)) || + (ret = get_fd_set32(nn, fds.out, outp)) || + (ret = get_fd_set32(nn, fds.ex, exp))) + goto out; + zero_fd_set(n, fds.res_in); + zero_fd_set(n, fds.res_out); + zero_fd_set(n, fds.res_ex); + + ret = do_select(n, &fds, &timeout); + + if (tvp && !(current->personality & STICKY_TIMEOUTS)) { + time_t sec = 0, usec = 0; + if (timeout) { + sec = timeout / HZ; + usec = timeout % HZ; + usec *= (1000000/HZ); + } + put_user(sec, &tvp->tv_sec); + put_user(usec, &tvp->tv_usec); + } + + if (ret < 0) + goto out; + if (!ret) { + ret = -ERESTARTNOHAND; + if (signal_pending(current)) + goto out; + ret = 0; + } + + set_fd_set32(nn, inp, fds.res_in); + set_fd_set32(nn, outp, fds.res_out); + set_fd_set32(nn, exp, fds.res_ex); + +out: + kfree(bits); + +out_nofds: + PPCDBG(PPCDBG_SYS32X, "sys32_select - exited - pid=%ld, comm=%s \n", current->pid, current->comm); + return ret; +} + + + + +/* + * Due to some executables calling the wrong select we sometimes + * get wrong args. This determines how the args are being passed + * (a single ptr to them all args passed) then calls + * sys_select() with the appropriate args. -- Cort + */ +/* Note: it is necessary to treat n as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage int ppc32_select(u32 n, u32* inp, u32* outp, u32* exp, u32 tvp_x) +{ + if ((unsigned int)n >= 4096) + panic("ppc32_select - wrong arguments were passed in \n"); + + return sys32_select((int)n, inp, outp, exp, tvp_x); +} + + + +static int cp_new_stat32(struct inode *inode, struct stat32 *statbuf) +{ + unsigned long ino, blksize, blocks; + kdev_t dev, rdev; + umode_t mode; + nlink_t nlink; + uid_t uid; + gid_t gid; + off_t size; + time_t atime, mtime, ctime; + int err; + + /* Stream the loads of inode data into the load buffer, + * then we push it all into the store buffer below. This + * should give optimal cache performance. + */ + ino = inode->i_ino; + dev = inode->i_dev; + mode = inode->i_mode; + nlink = inode->i_nlink; + uid = inode->i_uid; + gid = inode->i_gid; + rdev = inode->i_rdev; + size = inode->i_size; + atime = inode->i_atime; + mtime = inode->i_mtime; + ctime = inode->i_ctime; + blksize = inode->i_blksize; + blocks = inode->i_blocks; + + err = put_user(kdev_t_to_nr(dev), &statbuf->st_dev); + err |= put_user(ino, &statbuf->st_ino); + err |= put_user(mode, &statbuf->st_mode); + err |= put_user(nlink, &statbuf->st_nlink); + err |= put_user(uid, &statbuf->st_uid); + err |= put_user(gid, &statbuf->st_gid); + err |= put_user(kdev_t_to_nr(rdev), &statbuf->st_rdev); + err |= put_user(size, &statbuf->st_size); + err |= put_user(atime, &statbuf->st_atime); + err |= put_user(0, &statbuf->__unused1); + err |= put_user(mtime, &statbuf->st_mtime); + err |= put_user(0, &statbuf->__unused2); + err |= put_user(ctime, &statbuf->st_ctime); + err |= put_user(0, &statbuf->__unused3); + if (blksize) { + err |= put_user(blksize, &statbuf->st_blksize); + err |= put_user(blocks, &statbuf->st_blocks); + } else { + unsigned int tmp_blocks; + +#define D_B 7 +#define I_B (BLOCK_SIZE / sizeof(unsigned short)) + tmp_blocks = (size + BLOCK_SIZE - 1) / BLOCK_SIZE; + if (tmp_blocks > D_B) { + unsigned int indirect; + + indirect = (tmp_blocks - D_B + I_B - 1) / I_B; + tmp_blocks += indirect; + if (indirect > 1) { + indirect = (indirect - 1 + I_B - 1) / I_B; + tmp_blocks += indirect; + if (indirect > 1) + tmp_blocks++; + } + } + err |= put_user(BLOCK_SIZE, &statbuf->st_blksize); + err |= put_user((BLOCK_SIZE / 512) * tmp_blocks, &statbuf->st_blocks); +#undef D_B +#undef I_B + } + err |= put_user(0, &statbuf->__unused4[0]); + err |= put_user(0, &statbuf->__unused4[1]); + + return err; +} + +static __inline__ int +do_revalidate(struct dentry *dentry) +{ + struct inode * inode = dentry->d_inode; + if (inode->i_op && inode->i_op->revalidate) + return inode->i_op->revalidate(dentry); + return 0; +} + +asmlinkage long sys32_newstat(char* filename, struct stat32* statbuf) +{ + struct nameidata nd; + int error; + + PPCDBG(PPCDBG_SYS32X, "sys32_newstat - running - filename=%s, statbuf=%p, pid=%ld, comm=%s\n", filename, statbuf, current->pid, current->comm); + + error = user_path_walk(filename, &nd); + if (!error) { + error = do_revalidate(nd.dentry); + if (!error) + error = cp_new_stat32(nd.dentry->d_inode, statbuf); + path_release(&nd); + } + return error; +} + +asmlinkage long sys32_newlstat(char * filename, struct stat32 *statbuf) +{ + struct nameidata nd; + int error; + + PPCDBG(PPCDBG_SYS32X, "sys32_newlstat - running - fn=%s, pid=%ld, comm=%s\n", filename, current->pid, current->comm); + + error = user_path_walk_link(filename, &nd); + if (!error) { + error = do_revalidate(nd.dentry); + if (!error) + error = cp_new_stat32(nd.dentry->d_inode, statbuf); + + path_release(&nd); + } + return error; +} + +asmlinkage long sys32_newfstat(unsigned int fd, struct stat32 *statbuf) +{ + struct file *f; + int err = -EBADF; + + PPCDBG(PPCDBG_SYS32X, "sys32_newfstat - running - fd=%x, pid=%ld, comm=%s\n", fd, current->pid, current->comm); + + f = fget(fd); + if (f) { + struct dentry * dentry = f->f_dentry; + + err = do_revalidate(dentry); + if (!err) + err = cp_new_stat32(dentry->d_inode, statbuf); + fput(f); + } + return err; +} + +static inline int put_statfs (struct statfs32 *ubuf, struct statfs *kbuf) +{ + int err; + + err = put_user (kbuf->f_type, &ubuf->f_type); + err |= __put_user (kbuf->f_bsize, &ubuf->f_bsize); + err |= __put_user (kbuf->f_blocks, &ubuf->f_blocks); + err |= __put_user (kbuf->f_bfree, &ubuf->f_bfree); + err |= __put_user (kbuf->f_bavail, &ubuf->f_bavail); + err |= __put_user (kbuf->f_files, &ubuf->f_files); + err |= __put_user (kbuf->f_ffree, &ubuf->f_ffree); + err |= __put_user (kbuf->f_namelen, &ubuf->f_namelen); + err |= __put_user (kbuf->f_fsid.val[0], &ubuf->f_fsid.val[0]); + err |= __put_user (kbuf->f_fsid.val[1], &ubuf->f_fsid.val[1]); + return err; +} + +extern asmlinkage int sys_statfs(const char * path, struct statfs * buf); + +asmlinkage long sys32_statfs(const char * path, struct statfs32 *buf) +{ + int ret; + struct statfs s; + mm_segment_t old_fs = get_fs(); + char *pth; + + PPCDBG(PPCDBG_SYS32X, "sys32_statfs - entered - pid=%ld current=%lx comm=%s\n", current->pid, current, current->comm); + + pth = getname32 (path); + ret = PTR_ERR(pth); + if (!IS_ERR(pth)) { + set_fs (KERNEL_DS); + ret = sys_statfs((const char *)pth, &s); + set_fs (old_fs); + putname (pth); + if (put_statfs(buf, &s)) + return -EFAULT; + } + + PPCDBG(PPCDBG_SYS32X, "sys32_statfs - exited - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + + return ret; +} + +extern asmlinkage long sys_fstatfs(unsigned int fd, struct statfs * buf); + +asmlinkage long sys32_fstatfs(unsigned int fd, struct statfs32 *buf) +{ + int ret; + struct statfs s; + mm_segment_t old_fs = get_fs(); + + PPCDBG(PPCDBG_SYS32X, "sys32_fstatfs - entered - pid=%ld current=%lx comm=%s\n", current->pid, current, current->comm); + + set_fs (KERNEL_DS); + ret = sys_fstatfs(fd, &s); + set_fs (old_fs); + if (put_statfs(buf, &s)) + return -EFAULT; + + PPCDBG(PPCDBG_SYS32X, "sys32_fstatfs - exited - pid=%ld current=%lx comm=%s\n", current->pid, current, current->comm); + + return ret; +} + + + +extern asmlinkage long sys_sysfs(int option, unsigned long arg1, unsigned long arg2); + +/* Note: it is necessary to treat option as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_sysfs(u32 option, u32 arg1, u32 arg2) +{ + PPCDBG(PPCDBG_SYS32, "sys32_sysfs - running - pid=%ld, comm=%s\n", current->pid, current->comm); + return sys_sysfs((int)option, arg1, arg2); +} + + + + +extern unsigned long do_mremap(unsigned long addr, + unsigned long old_len, unsigned long new_len, + unsigned long flags, unsigned long new_addr); + +asmlinkage unsigned long sys32_mremap(unsigned long addr, unsigned long old_len, unsigned long new_len, + unsigned long flags, u32 __new_addr) +{ + unsigned long ret = -EINVAL; + unsigned long new_addr = AA(__new_addr); + + PPCDBG(PPCDBG_SYS32, "sys32_mremap - entered - pid=%ld current=%lx comm=%s\n", + current->pid, current, current->comm); + + + if (old_len > 0xf0000000UL || new_len > 0xf0000000UL) + goto out; + if (addr > 0xf0000000UL - old_len) + goto out; + down_write(¤t->mm->mmap_sem); + if (flags & MREMAP_FIXED) { + if (new_addr > 0xf0000000UL - new_len) + goto out_sem; + } else if (addr > 0xf0000000UL - new_len) { + ret = -ENOMEM; + if (!(flags & MREMAP_MAYMOVE)) + goto out_sem; + new_addr = get_unmapped_area (NULL, addr, new_len, 0, 0); + if (!new_addr) + goto out_sem; + flags |= MREMAP_FIXED; + } + ret = do_mremap(addr, old_len, new_len, flags, new_addr); +out_sem: + up_write(¤t->mm->mmap_sem); +out: + + PPCDBG(PPCDBG_SYS32, "sys32_mremap - exited - pid=%ld current=%lx comm=%s\n", + current->pid, current, current->comm); + + return ret; +} + + + +/* Handle adjtimex compatability. */ +struct timex32 { + u32 modes; + s32 offset, freq, maxerror, esterror; + s32 status, constant, precision, tolerance; + struct timeval32 time; + s32 tick; + s32 ppsfreq, jitter, shift, stabil; + s32 jitcnt, calcnt, errcnt, stbcnt; + s32 :32; s32 :32; s32 :32; s32 :32; + s32 :32; s32 :32; s32 :32; s32 :32; + s32 :32; s32 :32; s32 :32; s32 :32; +}; + +extern int do_adjtimex(struct timex *); + +asmlinkage long sys32_adjtimex(struct timex32 *utp) +{ + struct timex txc; + int ret; + + PPCDBG(PPCDBG_SYS32, "sys32_adjtimex - running - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + + memset(&txc, 0, sizeof(struct timex)); + + if(get_user(txc.modes, &utp->modes) || + __get_user(txc.offset, &utp->offset) || + __get_user(txc.freq, &utp->freq) || + __get_user(txc.maxerror, &utp->maxerror) || + __get_user(txc.esterror, &utp->esterror) || + __get_user(txc.status, &utp->status) || + __get_user(txc.constant, &utp->constant) || + __get_user(txc.precision, &utp->precision) || + __get_user(txc.tolerance, &utp->tolerance) || + __get_user(txc.time.tv_sec, &utp->time.tv_sec) || + __get_user(txc.time.tv_usec, &utp->time.tv_usec) || + __get_user(txc.tick, &utp->tick) || + __get_user(txc.ppsfreq, &utp->ppsfreq) || + __get_user(txc.jitter, &utp->jitter) || + __get_user(txc.shift, &utp->shift) || + __get_user(txc.stabil, &utp->stabil) || + __get_user(txc.jitcnt, &utp->jitcnt) || + __get_user(txc.calcnt, &utp->calcnt) || + __get_user(txc.errcnt, &utp->errcnt) || + __get_user(txc.stbcnt, &utp->stbcnt)) + return -EFAULT; + + ret = do_adjtimex(&txc); + + if(put_user(txc.modes, &utp->modes) || + __put_user(txc.offset, &utp->offset) || + __put_user(txc.freq, &utp->freq) || + __put_user(txc.maxerror, &utp->maxerror) || + __put_user(txc.esterror, &utp->esterror) || + __put_user(txc.status, &utp->status) || + __put_user(txc.constant, &utp->constant) || + __put_user(txc.precision, &utp->precision) || + __put_user(txc.tolerance, &utp->tolerance) || + __put_user(txc.time.tv_sec, &utp->time.tv_sec) || + __put_user(txc.time.tv_usec, &utp->time.tv_usec) || + __put_user(txc.tick, &utp->tick) || + __put_user(txc.ppsfreq, &utp->ppsfreq) || + __put_user(txc.jitter, &utp->jitter) || + __put_user(txc.shift, &utp->shift) || + __put_user(txc.stabil, &utp->stabil) || + __put_user(txc.jitcnt, &utp->jitcnt) || + __put_user(txc.calcnt, &utp->calcnt) || + __put_user(txc.errcnt, &utp->errcnt) || + __put_user(txc.stbcnt, &utp->stbcnt)) + ret = -EFAULT; + + return ret; +} + + + +#ifdef CONFIG_MODULES + +extern asmlinkage unsigned long sys_create_module(const char *name_user, size_t size); + +asmlinkage unsigned long sys32_create_module(const char *name_user, __kernel_size_t32 size) +{ + + PPCDBG(PPCDBG_SYS32M, "sys32_create_module - running - pid=%ld current=%lx comm=%s\n", current->pid, current, current->comm); + + return sys_create_module(name_user, (size_t)size); +} + + + +extern asmlinkage long sys_init_module(const char *name_user, struct module *mod_user); + +asmlinkage long sys32_init_module(const char *name_user, struct module *mod_user) +{ + + PPCDBG(PPCDBG_SYS32, "sys32_init_module - running - pid=%ld current=%lx comm=%s\n", current->pid, current, current->comm); + + return sys_init_module(name_user, mod_user); +} + + + +extern asmlinkage long sys_delete_module(const char *name_user); + +asmlinkage long sys32_delete_module(const char *name_user) +{ + + PPCDBG(PPCDBG_SYS32, "sys32_delete_module - running - pid=%ld current=%lx comm=%s\n", current->pid, current, current->comm); + + return sys_delete_module(name_user); +} + + + +struct module_info32 { + u32 addr; + u32 size; + u32 flags; + s32 usecount; +}; + +/* Query various bits about modules. */ + +static inline long +get_mod_name(const char *user_name, char **buf) +{ + unsigned long page; + long retval; + + if ((unsigned long)user_name >= TASK_SIZE + && !segment_eq(get_fs (), KERNEL_DS)) + return -EFAULT; + + page = __get_free_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + retval = strncpy_from_user((char *)page, user_name, PAGE_SIZE); + if (retval > 0) { + if (retval < PAGE_SIZE) { + *buf = (char *)page; + return retval; + } + retval = -ENAMETOOLONG; + } else if (!retval) + retval = -EINVAL; + + free_page(page); + return retval; +} + +static inline void +put_mod_name(char *buf) +{ + free_page((unsigned long)buf); +} + +static __inline__ struct module *find_module(const char *name) +{ + struct module *mod; + + for (mod = module_list; mod ; mod = mod->next) { + if (mod->flags & MOD_DELETED) + continue; + if (!strcmp(mod->name, name)) + break; + } + + return mod; +} + +static int +qm_modules(char *buf, size_t bufsize, __kernel_size_t32 *ret) +{ + struct module *mod; + size_t nmod, space, len; + + nmod = space = 0; + + for (mod = module_list; mod->next != NULL; mod = mod->next, ++nmod) { + len = strlen(mod->name)+1; + if (len > bufsize) + goto calc_space_needed; + if (copy_to_user(buf, mod->name, len)) + return -EFAULT; + buf += len; + bufsize -= len; + space += len; + } + + if (put_user(nmod, ret)) + return -EFAULT; + else + return 0; + +calc_space_needed: + space += len; + while ((mod = mod->next)->next != NULL) + space += strlen(mod->name)+1; + + if (put_user(space, ret)) + return -EFAULT; + else + return -ENOSPC; +} + +static int +qm_deps(struct module *mod, char *buf, size_t bufsize, __kernel_size_t32 *ret) +{ + size_t i, space, len; + + if (mod->next == NULL) + return -EINVAL; + if (!MOD_CAN_QUERY(mod)) + return put_user(0, ret); + + space = 0; + for (i = 0; i < mod->ndeps; ++i) { + const char *dep_name = mod->deps[i].dep->name; + + len = strlen(dep_name)+1; + if (len > bufsize) + goto calc_space_needed; + if (copy_to_user(buf, dep_name, len)) + return -EFAULT; + buf += len; + bufsize -= len; + space += len; + } + + return put_user(i, ret); + +calc_space_needed: + space += len; + while (++i < mod->ndeps) + space += strlen(mod->deps[i].dep->name)+1; + + if (put_user(space, ret)) + return -EFAULT; + else + return -ENOSPC; +} + +static int +qm_refs(struct module *mod, char *buf, size_t bufsize, __kernel_size_t32 *ret) +{ + size_t nrefs, space, len; + struct module_ref *ref; + + if (mod->next == NULL) + return -EINVAL; + if (!MOD_CAN_QUERY(mod)) + if (put_user(0, ret)) + return -EFAULT; + else + return 0; + + space = 0; + for (nrefs = 0, ref = mod->refs; ref ; ++nrefs, ref = ref->next_ref) { + const char *ref_name = ref->ref->name; + + len = strlen(ref_name)+1; + if (len > bufsize) + goto calc_space_needed; + if (copy_to_user(buf, ref_name, len)) + return -EFAULT; + buf += len; + bufsize -= len; + space += len; + } + + if (put_user(nrefs, ret)) + return -EFAULT; + else + return 0; + +calc_space_needed: + space += len; + while ((ref = ref->next_ref) != NULL) + space += strlen(ref->ref->name)+1; + + if (put_user(space, ret)) + return -EFAULT; + else + return -ENOSPC; +} + +static inline int +qm_symbols(struct module *mod, char *buf, size_t bufsize, __kernel_size_t32 *ret) +{ + size_t i, space, len; + struct module_symbol *s; + char *strings; + unsigned *vals; + + if (!MOD_CAN_QUERY(mod)) + if (put_user(0, ret)) + return -EFAULT; + else + return 0; + + space = mod->nsyms * 2*sizeof(u32); + + i = len = 0; + s = mod->syms; + + if (space > bufsize) + goto calc_space_needed; + + if (!access_ok(VERIFY_WRITE, buf, space)) + return -EFAULT; + + bufsize -= space; + vals = (unsigned *)buf; + strings = buf+space; + + for (; i < mod->nsyms ; ++i, ++s, vals += 2) { + len = strlen(s->name)+1; + if (len > bufsize) + goto calc_space_needed; + + if (copy_to_user(strings, s->name, len) + || __put_user(s->value, vals+0) + || __put_user(space, vals+1)) + return -EFAULT; + + strings += len; + bufsize -= len; + space += len; + } + + if (put_user(i, ret)) + return -EFAULT; + else + return 0; + +calc_space_needed: + for (; i < mod->nsyms; ++i, ++s) + space += strlen(s->name)+1; + + if (put_user(space, ret)) + return -EFAULT; + else + return -ENOSPC; +} + +static inline int +qm_info(struct module *mod, char *buf, size_t bufsize, __kernel_size_t32 *ret) +{ + int error = 0; + + if (mod->next == NULL) + return -EINVAL; + + if (sizeof(struct module_info32) <= bufsize) { + struct module_info32 info; + info.addr = (unsigned long)mod; + info.size = mod->size; + info.flags = mod->flags; + info.usecount = + ((mod_member_present(mod, can_unload) + && mod->can_unload) + ? -1 : atomic_read(&mod->uc.usecount)); + + if (copy_to_user(buf, &info, sizeof(struct module_info32))) + return -EFAULT; + } else + error = -ENOSPC; + + if (put_user(sizeof(struct module_info32), ret)) + return -EFAULT; + + return error; +} + +/* Note: it is necessary to treat which as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_query_module(char *name_user, u32 which, char *buf, __kernel_size_t32 bufsize, u32 ret) +{ + struct module *mod; + int err; + + PPCDBG(PPCDBG_SYS32M, "sys32_query_module - entered - pid=%ld current=%lx comm=%s\n", + current->pid, current, current->comm); + + lock_kernel(); + if (name_user == 0) { + /* This finds "kernel_module" which is not exported. */ + for(mod = module_list; mod->next != NULL; mod = mod->next) + ; + } else { + long namelen; + char *name; + + if ((namelen = get_mod_name(name_user, &name)) < 0) { + err = namelen; + goto out; + } + err = -ENOENT; + if (namelen == 0) { + /* This finds "kernel_module" which is not exported. */ + for(mod = module_list; mod->next != NULL; mod = mod->next) + ; + } else if ((mod = find_module(name)) == NULL) { + put_mod_name(name); + goto out; + } + put_mod_name(name); + } + + switch ((int)which) + { + case 0: + err = 0; + break; + case QM_MODULES: + err = qm_modules(buf, bufsize, (__kernel_size_t32 *)AA(ret)); + break; + case QM_DEPS: + err = qm_deps(mod, buf, bufsize, (__kernel_size_t32 *)AA(ret)); + break; + case QM_REFS: + err = qm_refs(mod, buf, bufsize, (__kernel_size_t32 *)AA(ret)); + break; + case QM_SYMBOLS: + err = qm_symbols(mod, buf, bufsize, (__kernel_size_t32 *)AA(ret)); + break; + case QM_INFO: + err = qm_info(mod, buf, bufsize, (__kernel_size_t32 *)AA(ret)); + break; + default: + err = -EINVAL; + break; + } +out: + unlock_kernel(); + + PPCDBG(PPCDBG_SYS32, "sys32_query_module - exited - pid=%ld current=%lx comm=%s\n", current->pid, current, current->comm); + + return err; +} + + + +struct kernel_sym32 { + u32 value; + char name[60]; +}; + +extern asmlinkage long sys_get_kernel_syms(struct kernel_sym *table); + +asmlinkage long sys32_get_kernel_syms(struct kernel_sym32 *table) +{ + int len, i; + struct kernel_sym *tbl; + mm_segment_t old_fs; + + PPCDBG(PPCDBG_SYS32, "sys32_get_kernel_syms - entered - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + + + len = sys_get_kernel_syms(NULL); + if (!table) return len; + tbl = kmalloc (len * sizeof (struct kernel_sym), GFP_KERNEL); + if (!tbl) return -ENOMEM; + old_fs = get_fs(); + set_fs (KERNEL_DS); + sys_get_kernel_syms(tbl); + set_fs (old_fs); + for (i = 0; i < len; i++, table += sizeof (struct kernel_sym32)) { + if (put_user (tbl[i].value, &table->value) || + copy_to_user (table->name, tbl[i].name, 60)) + break; + } + kfree (tbl); + + PPCDBG(PPCDBG_SYS32, "sys32_get_kernel_syms - exited - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + + return i; +} + +#else /* CONFIG_MODULES */ + +asmlinkage unsigned long sys32_create_module(const char *name_user, size_t size) +{ + + PPCDBG(PPCDBG_SYS32, "sys32_create_module - running - pid=%ld, comm=%s\n", current->pid, current->comm); + + return -ENOSYS; +} + +asmlinkage long sys32_init_module(const char *name_user, struct module *mod_user) +{ + PPCDBG(PPCDBG_SYS32, "sys32_init_module - running - pid=%ld, comm=%s\n", current->pid, current->comm); + + return -ENOSYS; +} + +asmlinkage long sys32_delete_module(const char *name_user) +{ + PPCDBG(PPCDBG_SYS32, "sys32_delete_module - running - pid=%ld, comm=%s\n", current->pid, current->comm); + + return -ENOSYS; +} + +/* Note: it is necessary to treat which as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_query_module(const char *name_user, u32 which, char *buf, size_t bufsize, size_t *ret) +{ + PPCDBG(PPCDBG_SYS32, "sys32_query_module - entered - pid=%ld current=%lx comm=%s\n", current->pid, current, current->comm); + + /* Let the program know about the new interface. Not that it'll do them much good. */ + if ((int)which == 0) + return 0; + + PPCDBG(PPCDBG_SYS32, "sys32_query_module - exited - pid=%ld current=%lx comm=%s\n", current->pid, current, current->comm); + return -ENOSYS; +} + +asmlinkage long sys32_get_kernel_syms(struct kernel_sym *table) +{ + PPCDBG(PPCDBG_SYS32, "sys32_get_kernel_syms - running - pid=%ld, comm=%s\n", current->pid, current->comm); + + return -ENOSYS; +} + +#endif /* CONFIG_MODULES */ + + + +/* Stuff for NFS server syscalls... */ +struct nfsctl_svc32 { + u16 svc32_port; + s32 svc32_nthreads; +}; + +struct nfsctl_client32 { + s8 cl32_ident[NFSCLNT_IDMAX+1]; + s32 cl32_naddr; + struct in_addr cl32_addrlist[NFSCLNT_ADDRMAX]; + s32 cl32_fhkeytype; + s32 cl32_fhkeylen; + u8 cl32_fhkey[NFSCLNT_KEYMAX]; +}; + +struct nfsctl_export32 { + s8 ex32_client[NFSCLNT_IDMAX+1]; + s8 ex32_path[NFS_MAXPATHLEN+1]; + __kernel_dev_t32 ex32_dev; + __kernel_ino_t32 ex32_ino; + s32 ex32_flags; + __kernel_uid_t32 ex32_anon_uid; + __kernel_gid_t32 ex32_anon_gid; +}; + +struct nfsctl_uidmap32 { + u32 ug32_ident; /* char * */ + __kernel_uid_t32 ug32_uidbase; + s32 ug32_uidlen; + u32 ug32_udimap; /* uid_t * */ + __kernel_uid_t32 ug32_gidbase; + s32 ug32_gidlen; + u32 ug32_gdimap; /* gid_t * */ +}; + +struct nfsctl_fhparm32 { + struct sockaddr gf32_addr; + __kernel_dev_t32 gf32_dev; + __kernel_ino_t32 gf32_ino; + s32 gf32_version; +}; + +struct nfsctl_fdparm32 { + struct sockaddr gd32_addr; + s8 gd32_path[NFS_MAXPATHLEN+1]; + s32 gd32_version; +}; + +struct nfsctl_fsparm32 { + struct sockaddr gd32_addr; + s8 gd32_path[NFS_MAXPATHLEN+1]; + s32 gd32_maxlen; +}; + +struct nfsctl_arg32 { + s32 ca32_version; /* safeguard */ + union { + struct nfsctl_svc32 u32_svc; + struct nfsctl_client32 u32_client; + struct nfsctl_export32 u32_export; + struct nfsctl_uidmap32 u32_umap; + struct nfsctl_fhparm32 u32_getfh; + struct nfsctl_fdparm32 u32_getfd; + struct nfsctl_fsparm32 u32_getfs; + } u; +#define ca32_svc u.u32_svc +#define ca32_client u.u32_client +#define ca32_export u.u32_export +#define ca32_umap u.u32_umap +#define ca32_getfh u.u32_getfh +#define ca32_getfd u.u32_getfd +#define ca32_getfs u.u32_getfs +#define ca32_authd u.u32_authd +}; + +union nfsctl_res32 { + __u8 cr32_getfh[NFS_FHSIZE]; + struct knfsd_fh cr32_getfs; +}; + +static int nfs_svc32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) +{ + int err; + + err = __get_user(karg->ca_version, &arg32->ca32_version); + err |= __get_user(karg->ca_svc.svc_port, &arg32->ca32_svc.svc32_port); + err |= __get_user(karg->ca_svc.svc_nthreads, &arg32->ca32_svc.svc32_nthreads); + return err; +} + +static int nfs_clnt32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) +{ + int err; + + err = __get_user(karg->ca_version, &arg32->ca32_version); + err |= copy_from_user(&karg->ca_client.cl_ident[0], + &arg32->ca32_client.cl32_ident[0], + NFSCLNT_IDMAX); + err |= __get_user(karg->ca_client.cl_naddr, &arg32->ca32_client.cl32_naddr); + err |= copy_from_user(&karg->ca_client.cl_addrlist[0], + &arg32->ca32_client.cl32_addrlist[0], + (sizeof(struct in_addr) * NFSCLNT_ADDRMAX)); + err |= __get_user(karg->ca_client.cl_fhkeytype, + &arg32->ca32_client.cl32_fhkeytype); + err |= __get_user(karg->ca_client.cl_fhkeylen, + &arg32->ca32_client.cl32_fhkeylen); + err |= copy_from_user(&karg->ca_client.cl_fhkey[0], + &arg32->ca32_client.cl32_fhkey[0], + NFSCLNT_KEYMAX); + return err; +} + +static int nfs_exp32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) +{ + int err; + + err = __get_user(karg->ca_version, &arg32->ca32_version); + err |= copy_from_user(&karg->ca_export.ex_client[0], + &arg32->ca32_export.ex32_client[0], + NFSCLNT_IDMAX); + err |= copy_from_user(&karg->ca_export.ex_path[0], + &arg32->ca32_export.ex32_path[0], + NFS_MAXPATHLEN); + err |= __get_user(karg->ca_export.ex_dev, + &arg32->ca32_export.ex32_dev); + err |= __get_user(karg->ca_export.ex_ino, + &arg32->ca32_export.ex32_ino); + err |= __get_user(karg->ca_export.ex_flags, + &arg32->ca32_export.ex32_flags); + err |= __get_user(karg->ca_export.ex_anon_uid, + &arg32->ca32_export.ex32_anon_uid); + err |= __get_user(karg->ca_export.ex_anon_gid, + &arg32->ca32_export.ex32_anon_gid); + karg->ca_export.ex_anon_uid = karg->ca_export.ex_anon_uid; + karg->ca_export.ex_anon_gid = karg->ca_export.ex_anon_gid; + return err; +} + +static int nfs_uud32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) +{ + u32 uaddr; + int i; + int err; + + memset(karg, 0, sizeof(*karg)); + if(__get_user(karg->ca_version, &arg32->ca32_version)) + return -EFAULT; + karg->ca_umap.ug_ident = (char *)get_free_page(GFP_USER); + if(!karg->ca_umap.ug_ident) + return -ENOMEM; + err = __get_user(uaddr, &arg32->ca32_umap.ug32_ident); + if(strncpy_from_user(karg->ca_umap.ug_ident, + (char *)A(uaddr), PAGE_SIZE) <= 0) + return -EFAULT; + err |= __get_user(karg->ca_umap.ug_uidbase, + &arg32->ca32_umap.ug32_uidbase); + err |= __get_user(karg->ca_umap.ug_uidlen, + &arg32->ca32_umap.ug32_uidlen); + err |= __get_user(uaddr, &arg32->ca32_umap.ug32_udimap); + if (err) + return -EFAULT; + karg->ca_umap.ug_udimap = kmalloc((sizeof(uid_t) * karg->ca_umap.ug_uidlen), + GFP_USER); + if(!karg->ca_umap.ug_udimap) + return -ENOMEM; + for(i = 0; i < karg->ca_umap.ug_uidlen; i++) + err |= __get_user(karg->ca_umap.ug_udimap[i], + &(((__kernel_uid_t32 *)A(uaddr))[i])); + err |= __get_user(karg->ca_umap.ug_gidbase, + &arg32->ca32_umap.ug32_gidbase); + err |= __get_user(karg->ca_umap.ug_uidlen, + &arg32->ca32_umap.ug32_gidlen); + err |= __get_user(uaddr, &arg32->ca32_umap.ug32_gdimap); + if (err) + return -EFAULT; + karg->ca_umap.ug_gdimap = kmalloc((sizeof(gid_t) * karg->ca_umap.ug_uidlen), + GFP_USER); + if(!karg->ca_umap.ug_gdimap) + return -ENOMEM; + for(i = 0; i < karg->ca_umap.ug_gidlen; i++) + err |= __get_user(karg->ca_umap.ug_gdimap[i], + &(((__kernel_gid_t32 *)A(uaddr))[i])); + + return err; +} + +static int nfs_getfh32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) +{ + int err; + + err = __get_user(karg->ca_version, &arg32->ca32_version); + err |= copy_from_user(&karg->ca_getfh.gf_addr, + &arg32->ca32_getfh.gf32_addr, + (sizeof(struct sockaddr))); + err |= __get_user(karg->ca_getfh.gf_dev, + &arg32->ca32_getfh.gf32_dev); + err |= __get_user(karg->ca_getfh.gf_ino, + &arg32->ca32_getfh.gf32_ino); + err |= __get_user(karg->ca_getfh.gf_version, + &arg32->ca32_getfh.gf32_version); + return err; +} + +static int nfs_getfd32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) +{ + int err; + + err = __get_user(karg->ca_version, &arg32->ca32_version); + err |= copy_from_user(&karg->ca_getfd.gd_addr, + &arg32->ca32_getfd.gd32_addr, + (sizeof(struct sockaddr))); + err |= copy_from_user(&karg->ca_getfd.gd_path, + &arg32->ca32_getfd.gd32_path, + (NFS_MAXPATHLEN+1)); + err |= __get_user(karg->ca_getfd.gd_version, + &arg32->ca32_getfd.gd32_version); + return err; +} + +static int nfs_getfs32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) +{ + int err; + + err = __get_user(karg->ca_version, &arg32->ca32_version); + err |= copy_from_user(&karg->ca_getfs.gd_addr, + &arg32->ca32_getfs.gd32_addr, + (sizeof(struct sockaddr))); + err |= copy_from_user(&karg->ca_getfs.gd_path, + &arg32->ca32_getfs.gd32_path, + (NFS_MAXPATHLEN+1)); + err |= __get_user(karg->ca_getfs.gd_maxlen, + &arg32->ca32_getfs.gd32_maxlen); + return err; +} + +/* This really doesn't need translations, we are only passing + * back a union which contains opaque nfs file handle data. + */ +static int nfs_getfh32_res_trans(union nfsctl_res *kres, union nfsctl_res32 *res32) +{ + return copy_to_user(res32, kres, sizeof(*res32)); +} + +/* Note: it is necessary to treat cmd_parm as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +int asmlinkage sys32_nfsservctl(u32 cmd_parm, struct nfsctl_arg32 *arg32, union nfsctl_res32 *res32) +{ + int cmd = (int)cmd_parm; + struct nfsctl_arg *karg = NULL; + union nfsctl_res *kres = NULL; + mm_segment_t oldfs; + int err; + + karg = kmalloc(sizeof(*karg), GFP_USER); + if(!karg) + return -ENOMEM; + if(res32) { + kres = kmalloc(sizeof(*kres), GFP_USER); + if(!kres) { + kfree(karg); + return -ENOMEM; + } + } + switch(cmd) { + case NFSCTL_SVC: + err = nfs_svc32_trans(karg, arg32); + break; + case NFSCTL_ADDCLIENT: + err = nfs_clnt32_trans(karg, arg32); + break; + case NFSCTL_DELCLIENT: + err = nfs_clnt32_trans(karg, arg32); + break; + case NFSCTL_EXPORT: + case NFSCTL_UNEXPORT: + err = nfs_exp32_trans(karg, arg32); + break; + /* This one is unimplemented, be we're ready for it. */ + case NFSCTL_UGIDUPDATE: + err = nfs_uud32_trans(karg, arg32); + break; + case NFSCTL_GETFH: + err = nfs_getfh32_trans(karg, arg32); + break; + case NFSCTL_GETFD: + err = nfs_getfd32_trans(karg, arg32); + break; + case NFSCTL_GETFS: + err = nfs_getfs32_trans(karg, arg32); + break; + default: + err = -EINVAL; + break; + } + if(err) + goto done; + oldfs = get_fs(); + set_fs(KERNEL_DS); + err = sys_nfsservctl(cmd, karg, kres); + set_fs(oldfs); + + if (err) + goto done; + + if((cmd == NFSCTL_GETFH) || + (cmd == NFSCTL_GETFD) || + (cmd == NFSCTL_GETFS)) + err = nfs_getfh32_res_trans(kres, res32); + +done: + if(karg) { + if(cmd == NFSCTL_UGIDUPDATE) { + if(karg->ca_umap.ug_ident) + kfree(karg->ca_umap.ug_ident); + if(karg->ca_umap.ug_udimap) + kfree(karg->ca_umap.ug_udimap); + if(karg->ca_umap.ug_gdimap) + kfree(karg->ca_umap.ug_gdimap); + } + kfree(karg); + } + if(kres) + kfree(kres); + return err; +} + + + +struct timespec32 { + s32 tv_sec; + s32 tv_nsec; +}; + +extern asmlinkage long sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp); + +asmlinkage long sys32_nanosleep(struct timespec32 *rqtp, struct timespec32 *rmtp) +{ + struct timespec t; + int ret; + mm_segment_t old_fs = get_fs (); + + PPCDBG(PPCDBG_SYS32NI, "sys32_nanosleep - running - pid=%ld, comm=%s \n", current->pid, current->comm); + + if (get_user (t.tv_sec, &rqtp->tv_sec) || + __get_user (t.tv_nsec, &rqtp->tv_nsec)) + return -EFAULT; + set_fs (KERNEL_DS); + ret = sys_nanosleep(&t, rmtp ? &t : NULL); + set_fs (old_fs); + if (rmtp && ret == -EINTR) { + if (__put_user (t.tv_sec, &rmtp->tv_sec) || + __put_user (t.tv_nsec, &rmtp->tv_nsec)) + return -EFAULT; + } + + return ret; +} + + + + +/* These are here just in case some old sparc32 binary calls it. */ +asmlinkage long sys32_pause(void) +{ + + PPCDBG(PPCDBG_SYS32, "sys32_pause - running - pid=%ld, comm=%s \n", current->pid, current->comm); + + current->state = TASK_INTERRUPTIBLE; + schedule(); + + return -ERESTARTNOHAND; +} + + + +static inline long get_it32(struct itimerval *o, struct itimerval32 *i) +{ + return (!access_ok(VERIFY_READ, i, sizeof(*i)) || + (__get_user(o->it_interval.tv_sec, &i->it_interval.tv_sec) | + __get_user(o->it_interval.tv_usec, &i->it_interval.tv_usec) | + __get_user(o->it_value.tv_sec, &i->it_value.tv_sec) | + __get_user(o->it_value.tv_usec, &i->it_value.tv_usec))); +} + +static inline long put_it32(struct itimerval32 *o, struct itimerval *i) +{ + return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) || + (__put_user(i->it_interval.tv_sec, &o->it_interval.tv_sec) | + __put_user(i->it_interval.tv_usec, &o->it_interval.tv_usec) | + __put_user(i->it_value.tv_sec, &o->it_value.tv_sec) | + __put_user(i->it_value.tv_usec, &o->it_value.tv_usec))); +} + +static inline long get_tv32(struct timeval *o, struct timeval32 *i) +{ + return (!access_ok(VERIFY_READ, i, sizeof(*i)) || + (__get_user(o->tv_sec, &i->tv_sec) | + __get_user(o->tv_usec, &i->tv_usec))); +} + +static inline long put_tv32(struct timeval32 *o, struct timeval *i) +{ + return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) || + (__put_user(i->tv_sec, &o->tv_sec) | + __put_user(i->tv_usec, &o->tv_usec))); +} + + + + +extern int do_getitimer(int which, struct itimerval *value); + +/* Note: it is necessary to treat which as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_getitimer(u32 which, struct itimerval32 *it) +{ + struct itimerval kit; + int error; + + PPCDBG(PPCDBG_SYS32, "sys32_getitimer - entered - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + + error = do_getitimer((int)which, &kit); + if (!error && put_it32(it, &kit)) + error = -EFAULT; + + + PPCDBG(PPCDBG_SYS32, "sys32_getitimer - exited - pid=%ld current=%lx comm=%s\n", current->pid, current, current->comm); + return error; +} + + + +extern int do_setitimer(int which, struct itimerval *, struct itimerval *); + +/* Note: it is necessary to treat which as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_setitimer(u32 which, struct itimerval32 *in, struct itimerval32 *out) +{ + struct itimerval kin, kout; + int error; + + PPCDBG(PPCDBG_SYS32, "sys32_setitimer - entered - pid=%ld current=%lx comm=%s\n", current->pid, current, current->comm); + + if (in) { + if (get_it32(&kin, in)) + return -EFAULT; + } else + memset(&kin, 0, sizeof(kin)); + + error = do_setitimer((int)which, &kin, out ? &kout : NULL); + if (error || !out) + return error; + if (put_it32(out, &kout)) + return -EFAULT; + + + PPCDBG(PPCDBG_SYS32, "sys32_setitimer - exited - pid=%ld current=%lx comm=%s\n", current->pid, current, current->comm); + return 0; +} + +#define RLIM_INFINITY32 0xffffffff +#define RESOURCE32(x) ((x > RLIM_INFINITY32) ? RLIM_INFINITY32 : x) + +struct rlimit32 { + u32 rlim_cur; + u32 rlim_max; +}; + +extern asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit *rlim); +asmlinkage long sys32_getrlimit(unsigned int resource, struct rlimit32 *rlim) +{ + struct rlimit r; + int ret; + mm_segment_t old_fs = get_fs(); + + set_fs (KERNEL_DS); + ret = sys_getrlimit(resource, &r); + set_fs(old_fs); + if (!ret) { + ret = put_user(RESOURCE32(r.rlim_cur), &rlim->rlim_cur); + ret |= __put_user(RESOURCE32(r.rlim_max), &rlim->rlim_max); + } + + return ret; +} + +/* Back compatibility for getrlimit. Needed for some apps. */ +asmlinkage long sys32_old_getrlimit(unsigned int resource, struct rlimit32* rlim) +{ + struct rlimit x; // 64-bit version of the resource limits. + struct rlimit32 x32; // 32-bit version of the resource limits. + long rc = 0; + + if (resource >= RLIM_NLIMITS) { + PPCDBG(PPCDBG_SYS32, "sys32_old_getrlimit - specified resource is too large (%x) - pid=%ld, comm=%s\n", resource, current->pid, current->comm); + return -EINVAL; + } + + memcpy(&x, current->rlim+resource, sizeof(struct rlimit)); + + if(x.rlim_cur > RLIM_INFINITY32) + x32.rlim_cur = RLIM_INFINITY32; + else + x32.rlim_cur = x.rlim_cur; + + if(x.rlim_max > RLIM_INFINITY32) + x32.rlim_max = RLIM_INFINITY32; + else + x32.rlim_max = x.rlim_max; + + rc = (copy_to_user(rlim, &x32, sizeof(x32))) ? (-EFAULT) : 0; + if (rc == 0) { + PPCDBG(PPCDBG_SYS32, "sys32_old_getrlimit - current=%x, maximum=%x - pid=%ld, comm=%s\n", x32.rlim_cur, x32.rlim_max, current->pid, current->comm); + } else { + PPCDBG(PPCDBG_SYS32, "sys32_old_getrlimit - unable to copy into user's storage - pid=%ld, comm=%s\n", current->pid, current->comm); + } + return rc; +} + +extern asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit *rlim); +asmlinkage long sys32_setrlimit(unsigned int resource, struct rlimit32 *rlim) +{ + struct rlimit r; + long ret; + mm_segment_t old_fs = get_fs (); + + PPCDBG(PPCDBG_SYS32, "sys32_setrlimit - entered - resource=%x, rlim=%p - pid=%ld, comm=%s\n", resource, rlim, current->pid, current->comm); + + if (resource >= RLIM_NLIMITS) return -EINVAL; + if (get_user (r.rlim_cur, &rlim->rlim_cur) || + __get_user (r.rlim_max, &rlim->rlim_max)) + return -EFAULT; + if (r.rlim_cur >= RLIM_INFINITY32) + r.rlim_cur = RLIM_INFINITY; + if (r.rlim_max >= RLIM_INFINITY32) + r.rlim_max = RLIM_INFINITY; + set_fs (KERNEL_DS); + ret = sys_setrlimit(resource, &r); + set_fs (old_fs); + + PPCDBG(PPCDBG_SYS32, "sys32_setrlimit - exited w/ ret=%x - pid=%ld, comm=%s\n", ret, current->pid, current->comm); + return ret; +} + + +struct rusage32 { + struct timeval32 ru_utime; + struct timeval32 ru_stime; + s32 ru_maxrss; + s32 ru_ixrss; + s32 ru_idrss; + s32 ru_isrss; + s32 ru_minflt; + s32 ru_majflt; + s32 ru_nswap; + s32 ru_inblock; + s32 ru_oublock; + s32 ru_msgsnd; + s32 ru_msgrcv; + s32 ru_nsignals; + s32 ru_nvcsw; + s32 ru_nivcsw; +}; + +static int put_rusage (struct rusage32 *ru, struct rusage *r) +{ + int err; + + err = put_user (r->ru_utime.tv_sec, &ru->ru_utime.tv_sec); + err |= __put_user (r->ru_utime.tv_usec, &ru->ru_utime.tv_usec); + err |= __put_user (r->ru_stime.tv_sec, &ru->ru_stime.tv_sec); + err |= __put_user (r->ru_stime.tv_usec, &ru->ru_stime.tv_usec); + err |= __put_user (r->ru_maxrss, &ru->ru_maxrss); + err |= __put_user (r->ru_ixrss, &ru->ru_ixrss); + err |= __put_user (r->ru_idrss, &ru->ru_idrss); + err |= __put_user (r->ru_isrss, &ru->ru_isrss); + err |= __put_user (r->ru_minflt, &ru->ru_minflt); + err |= __put_user (r->ru_majflt, &ru->ru_majflt); + err |= __put_user (r->ru_nswap, &ru->ru_nswap); + err |= __put_user (r->ru_inblock, &ru->ru_inblock); + err |= __put_user (r->ru_oublock, &ru->ru_oublock); + err |= __put_user (r->ru_msgsnd, &ru->ru_msgsnd); + err |= __put_user (r->ru_msgrcv, &ru->ru_msgrcv); + err |= __put_user (r->ru_nsignals, &ru->ru_nsignals); + err |= __put_user (r->ru_nvcsw, &ru->ru_nvcsw); + err |= __put_user (r->ru_nivcsw, &ru->ru_nivcsw); + return err; +} + + +extern asmlinkage long sys_getrusage(int who, struct rusage *ru); + +/* Note: it is necessary to treat who as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_getrusage(u32 who, struct rusage32 *ru) +{ + struct rusage r; + int ret; + mm_segment_t old_fs = get_fs(); + + PPCDBG(PPCDBG_SYS32X, "sys32_getrusage - running - pid=%ld, comm=%s\n", current->pid, current->comm); + + set_fs (KERNEL_DS); + ret = sys_getrusage((int)who, &r); + set_fs (old_fs); + if (put_rusage (ru, &r)) + return -EFAULT; + + return ret; +} + + + + +struct sysinfo32 { + s32 uptime; + u32 loads[3]; + u32 totalram; + u32 freeram; + u32 sharedram; + u32 bufferram; + u32 totalswap; + u32 freeswap; + unsigned short procs; + char _f[22]; +}; + +extern asmlinkage long sys_sysinfo(struct sysinfo *info); + +asmlinkage long sys32_sysinfo(struct sysinfo32 *info) +{ + struct sysinfo s; + int ret, err; + mm_segment_t old_fs = get_fs (); + + PPCDBG(PPCDBG_SYS32, "sys32_sysinfo - entered - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + + set_fs (KERNEL_DS); + ret = sys_sysinfo(&s); + set_fs (old_fs); + err = put_user (s.uptime, &info->uptime); + err |= __put_user (s.loads[0], &info->loads[0]); + err |= __put_user (s.loads[1], &info->loads[1]); + err |= __put_user (s.loads[2], &info->loads[2]); + err |= __put_user (s.totalram, &info->totalram); + err |= __put_user (s.freeram, &info->freeram); + err |= __put_user (s.sharedram, &info->sharedram); + err |= __put_user (s.bufferram, &info->bufferram); + err |= __put_user (s.totalswap, &info->totalswap); + err |= __put_user (s.freeswap, &info->freeswap); + err |= __put_user (s.procs, &info->procs); + if (err) + return -EFAULT; + + PPCDBG(PPCDBG_SYS32, "sys32_sysinfo - exited - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + + return ret; +} + + + + +/* Translations due to time_t size differences. Which affects all + sorts of things, like timeval and itimerval. */ +extern struct timezone sys_tz; +extern int do_sys_settimeofday(struct timeval *tv, struct timezone *tz); + +asmlinkage long sys32_gettimeofday(struct timeval32 *tv, struct timezone *tz) +{ + + PPCDBG(PPCDBG_SYS32X, "sys32_gettimeofday - running - pid=%ld, comm=%s\n", current->pid, current->comm); + + if (tv) { + struct timeval ktv; + do_gettimeofday(&ktv); + if (put_tv32(tv, &ktv)) + return -EFAULT; + } + if (tz) { + if (copy_to_user(tz, &sys_tz, sizeof(sys_tz))) + return -EFAULT; + } + + return 0; +} + + + +asmlinkage long sys32_settimeofday(struct timeval32 *tv, struct timezone *tz) +{ + struct timeval ktv; + struct timezone ktz; + + PPCDBG(PPCDBG_SYS32, "sys32_settimeofday - running - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + + if (tv) { + if (get_tv32(&ktv, tv)) + return -EFAULT; + } + if (tz) { + if (copy_from_user(&ktz, tz, sizeof(ktz))) + return -EFAULT; + } + + return do_sys_settimeofday(tv ? &ktv : NULL, tz ? &ktz : NULL); +} + + + + +struct tms32 { + __kernel_clock_t32 tms_utime; + __kernel_clock_t32 tms_stime; + __kernel_clock_t32 tms_cutime; + __kernel_clock_t32 tms_cstime; +}; + +extern asmlinkage long sys_times(struct tms * tbuf); + +asmlinkage long sys32_times(struct tms32 *tbuf) +{ + struct tms t; + long ret; + mm_segment_t old_fs = get_fs (); + int err; + + PPCDBG(PPCDBG_SYS32, "sys32_times - entered - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + + set_fs (KERNEL_DS); + ret = sys_times(tbuf ? &t : NULL); + set_fs (old_fs); + if (tbuf) { + err = put_user (t.tms_utime, &tbuf->tms_utime); + err |= __put_user (t.tms_stime, &tbuf->tms_stime); + err |= __put_user (t.tms_cutime, &tbuf->tms_cutime); + err |= __put_user (t.tms_cstime, &tbuf->tms_cstime); + if (err) + ret = -EFAULT; + } + + PPCDBG(PPCDBG_SYS32, "sys32_times - exited - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + + return ret; +} + +struct msgbuf32 { s32 mtype; char mtext[1]; }; + +struct semid_ds32 { + struct ipc_perm sem_perm; + __kernel_time_t32 sem_otime; + __kernel_time_t32 sem_ctime; + u32 sem_base; + u32 sem_pending; + u32 sem_pending_last; + u32 undo; + unsigned short sem_nsems; +}; + +struct semid64_ds32 { + struct ipc64_perm sem_perm; + unsigned int __unused1; + __kernel_time_t32 sem_otime; + unsigned int __unused2; + __kernel_time_t32 sem_ctime; + u32 sem_nsems; + u32 __unused3; + u32 __unused4; +}; + +struct msqid_ds32 +{ + struct ipc_perm msg_perm; + u32 msg_first; + u32 msg_last; + __kernel_time_t32 msg_stime; + __kernel_time_t32 msg_rtime; + __kernel_time_t32 msg_ctime; + u32 msg_lcbytes; + u32 msg_lqbytes; + unsigned short msg_cbytes; + unsigned short msg_qnum; + unsigned short msg_qbytes; + __kernel_ipc_pid_t32 msg_lspid; + __kernel_ipc_pid_t32 msg_lrpid; +}; + +struct msqid64_ds32 { + struct ipc64_perm msg_perm; + unsigned int __unused1; + __kernel_time_t32 msg_stime; + unsigned int __unused2; + __kernel_time_t32 msg_rtime; + unsigned int __unused3; + __kernel_time_t32 msg_ctime; + unsigned int msg_cbytes; + unsigned int msg_qnum; + unsigned int msg_qbytes; + __kernel_pid_t32 msg_lspid; + __kernel_pid_t32 msg_lrpid; + unsigned int __unused4; + unsigned int __unused5; +}; + +struct shmid_ds32 { + struct ipc_perm shm_perm; + int shm_segsz; + __kernel_time_t32 shm_atime; + __kernel_time_t32 shm_dtime; + __kernel_time_t32 shm_ctime; + __kernel_ipc_pid_t32 shm_cpid; + __kernel_ipc_pid_t32 shm_lpid; + unsigned short shm_nattch; + unsigned short __unused; + unsigned int __unused2; + unsigned int __unused3; +}; + +struct shmid64_ds32 { + struct ipc64_perm shm_perm; + unsigned int __unused1; + __kernel_time_t32 shm_atime; + unsigned int __unused2; + __kernel_time_t32 shm_dtime; + unsigned int __unused3; + __kernel_time_t32 shm_ctime; + unsigned int __unused4; + __kernel_size_t32 shm_segsz; + __kernel_pid_t32 shm_cpid; + __kernel_pid_t32 shm_lpid; + unsigned int shm_nattch; + unsigned int __unused5; + unsigned int __unused6; +}; + +/* + * sys32_ipc() is the de-multiplexer for the SysV IPC calls in 32bit + * emulation.. + * + * This is really horribly ugly. + */ +static long do_sys32_semctl(int first, int second, int third, void *uptr) +{ + union semun fourth; + u32 pad; + int err, err2; + mm_segment_t old_fs; + + if (!uptr) + return -EINVAL; + err = -EFAULT; + if (get_user(pad, (u32 *)uptr)) + return err; + if (third == SETVAL) + fourth.val = (int)pad; + else + fourth.__pad = (void *)A(pad); + switch (third & (~IPC_64)) { + + case IPC_INFO: + case IPC_RMID: + case SEM_INFO: + case GETVAL: + case GETPID: + case GETNCNT: + case GETZCNT: + case GETALL: + case SETALL: + case SETVAL: + err = sys_semctl(first, second, third, fourth); + break; + + case IPC_STAT: + case SEM_STAT: + if (third & IPC_64) { + struct semid64_ds s64; + struct semid64_ds32 *usp; + + usp = (struct semid64_ds32 *)A(pad); + fourth.__pad = &s64; + old_fs = get_fs(); + set_fs(KERNEL_DS); + err = sys_semctl(first, second, third, fourth); + set_fs(old_fs); + err2 = copy_to_user(&usp->sem_perm, &s64.sem_perm, + sizeof(struct ipc64_perm)); + err2 |= __put_user(s64.sem_otime, &usp->sem_otime); + err2 |= __put_user(s64.sem_ctime, &usp->sem_ctime); + err2 |= __put_user(s64.sem_nsems, &usp->sem_nsems); + if (err2) + err = -EFAULT; + } else { + struct semid_ds s; + struct semid_ds32 *usp; + + usp = (struct semid_ds32 *)A(pad); + fourth.__pad = &s; + old_fs = get_fs(); + set_fs(KERNEL_DS); + err = sys_semctl(first, second, third, fourth); + set_fs(old_fs); + err2 = copy_to_user(&usp->sem_perm, &s.sem_perm, + sizeof(struct ipc_perm)); + err2 |= __put_user(s.sem_otime, &usp->sem_otime); + err2 |= __put_user(s.sem_ctime, &usp->sem_ctime); + err2 |= __put_user(s.sem_nsems, &usp->sem_nsems); + if (err2) + err = -EFAULT; + } + break; + + case IPC_SET: + if (third & IPC_64) { + struct semid64_ds s64; + struct semid64_ds32 *usp; + + usp = (struct semid64_ds32 *)A(pad); + + err = get_user(s64.sem_perm.uid, &usp->sem_perm.uid); + err |= __get_user(s64.sem_perm.gid, + &usp->sem_perm.gid); + err |= __get_user(s64.sem_perm.mode, + &usp->sem_perm.mode); + if (err) + goto out; + fourth.__pad = &s64; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + err = sys_semctl(first, second, third, fourth); + set_fs(old_fs); + + } else { + struct semid_ds s; + struct semid_ds32 *usp; + + usp = (struct semid_ds32 *)A(pad); + + err = get_user(s.sem_perm.uid, &usp->sem_perm.uid); + err |= __get_user(s.sem_perm.gid, + &usp->sem_perm.gid); + err |= __get_user(s.sem_perm.mode, + &usp->sem_perm.mode); + if (err) + goto out; + fourth.__pad = &s; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + err = sys_semctl(first, second, third, fourth); + set_fs(old_fs); + } + break; + } +out: + return err; +} + +static int +do_sys32_msgsnd(int first, int second, int third, void *uptr) +{ + struct msgbuf *p; + struct msgbuf32 *up = (struct msgbuf32 *)uptr; + mm_segment_t old_fs; + int err; + + if (second < 0) + return -EINVAL; + + p = kmalloc(second + sizeof(struct msgbuf) + 4, GFP_USER); + if (!p) + return -ENOMEM; + err = get_user(p->mtype, &up->mtype); + err |= __copy_from_user(p->mtext, &up->mtext, second); + if (err) { + err = -EFAULT; + goto out; + } + old_fs = get_fs(); + set_fs(KERNEL_DS); + err = sys_msgsnd(first, p, second, third); + set_fs(old_fs); +out: + kfree(p); + return err; +} + +static int +do_sys32_msgrcv(int first, int second, int msgtyp, int third, + int version, void *uptr) +{ + struct msgbuf32 *up; + struct msgbuf *p; + mm_segment_t old_fs; + int err; + + if (second < 0) + return -EINVAL; + + if (!version) { + struct ipc_kludge *uipck = (struct ipc_kludge *)uptr; + struct ipc_kludge ipck; + + err = -EINVAL; + if (!uptr) + goto out; + err = -EFAULT; + if (copy_from_user(&ipck, uipck, sizeof(struct ipc_kludge))) + goto out; + uptr = (void *)A(ipck.msgp); + msgtyp = ipck.msgtyp; + } + err = -ENOMEM; + p = kmalloc(second + sizeof (struct msgbuf) + 4, GFP_USER); + if (!p) + goto out; + old_fs = get_fs(); + set_fs(KERNEL_DS); + err = sys_msgrcv(first, p, second + 4, msgtyp, third); + set_fs(old_fs); + if (err < 0) + goto free_then_out; + up = (struct msgbuf32 *)uptr; + if (put_user(p->mtype, &up->mtype) || + __copy_to_user(&up->mtext, p->mtext, err)) + err = -EFAULT; +free_then_out: + kfree(p); +out: + return err; +} + +static int +do_sys32_msgctl(int first, int second, void *uptr) +{ + int err = -EINVAL, err2; + mm_segment_t old_fs; + + switch (second & (~IPC_64)) { + + case IPC_INFO: + case IPC_RMID: + case MSG_INFO: + err = sys_msgctl(first, second, (struct msqid_ds *)uptr); + break; + + case IPC_SET: + if (second & IPC_64) { + struct msqid64_ds m64; + struct msqid64_ds32 *up = (struct msqid64_ds32 *)uptr; + + err = copy_from_user(&m64.msg_perm, &up->msg_perm, + sizeof(struct ipc64_perm)); + err |= __get_user(m64.msg_qbytes, &up->msg_qbytes); + if (err) + break; + old_fs = get_fs(); + set_fs(KERNEL_DS); + err = sys_msgctl(first, second, + (struct msqid_ds *)&m64); + set_fs(old_fs); + } else { + struct msqid_ds m; + struct msqid_ds32 *up = (struct msqid_ds32 *)uptr; + + err = copy_from_user(&m.msg_perm, &up->msg_perm, + sizeof(struct ipc_perm)); + err |= __get_user(m.msg_qbytes, &up->msg_qbytes); + if (err) + break; + old_fs = get_fs(); + set_fs(KERNEL_DS); + err = sys_msgctl(first, second, &m); + set_fs(old_fs); + } + break; + + case IPC_STAT: + case MSG_STAT: + if (second & IPC_64) { + struct msqid64_ds m64; + struct msqid64_ds32 *up = (struct msqid64_ds32 *)uptr; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + err = sys_msgctl(first, second, + (struct msqid_ds *)&m64); + set_fs(old_fs); + + err2 = copy_to_user(&up->msg_perm, &m64.msg_perm, + sizeof(struct ipc64_perm)); + err2 |= __put_user(m64.msg_stime, &up->msg_stime); + err2 |= __put_user(m64.msg_rtime, &up->msg_rtime); + err2 |= __put_user(m64.msg_ctime, &up->msg_ctime); + err2 |= __put_user(m64.msg_cbytes, &up->msg_cbytes); + err2 |= __put_user(m64.msg_qnum, &up->msg_qnum); + err2 |= __put_user(m64.msg_qbytes, &up->msg_qbytes); + err2 |= __put_user(m64.msg_lspid, &up->msg_lspid); + err2 |= __put_user(m64.msg_lrpid, &up->msg_lrpid); + if (err2) + err = -EFAULT; + } else { + struct msqid64_ds m; + struct msqid_ds32 *up = (struct msqid_ds32 *)uptr; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + err = sys_msgctl(first, second, (struct msqid_ds *)&m); + set_fs(old_fs); + + err2 = copy_to_user(&up->msg_perm, &m.msg_perm, + sizeof(struct ipc_perm)); + err2 |= __put_user(m.msg_stime, &up->msg_stime); + err2 |= __put_user(m.msg_rtime, &up->msg_rtime); + err2 |= __put_user(m.msg_ctime, &up->msg_ctime); + err2 |= __put_user(m.msg_cbytes, &up->msg_cbytes); + err2 |= __put_user(m.msg_qnum, &up->msg_qnum); + err2 |= __put_user(m.msg_qbytes, &up->msg_qbytes); + err2 |= __put_user(m.msg_lspid, &up->msg_lspid); + err2 |= __put_user(m.msg_lrpid, &up->msg_lrpid); + if (err2) + err = -EFAULT; + } + break; + } + return err; +} + +static int +do_sys32_shmat(int first, int second, int third, int version, void *uptr) +{ + unsigned long raddr; + u32 *uaddr = (u32 *)A((u32)third); + int err = -EINVAL; + + if (version == 1) + return err; + err = sys_shmat(first, uptr, second, &raddr); + if (err) + return err; + err = put_user(raddr, uaddr); + return err; +} + +static int +do_sys32_shmctl(int first, int second, void *uptr) +{ + int err = -EFAULT, err2; + mm_segment_t old_fs; + + switch (second & (~IPC_64)) { + + case IPC_INFO: + case IPC_RMID: + case SHM_LOCK: + case SHM_UNLOCK: + err = sys_shmctl(first, second, (struct shmid_ds *)uptr); + break; + case IPC_SET: + if (second & IPC_64) { + struct shmid64_ds32 *up = (struct shmid64_ds32 *)uptr; + struct shmid64_ds s64; + + err = get_user(s64.shm_perm.uid, &up->shm_perm.uid); + err |= __get_user(s64.shm_perm.gid, &up->shm_perm.gid); + err |= __get_user(s64.shm_perm.mode, + &up->shm_perm.mode); + if (err) + break; + old_fs = get_fs(); + set_fs(KERNEL_DS); + err = sys_shmctl(first, second, + (struct shmid_ds *)&s64); + set_fs(old_fs); + } else { + struct shmid_ds32 *up = (struct shmid_ds32 *)uptr; + struct shmid_ds s; + + err = get_user(s.shm_perm.uid, &up->shm_perm.uid); + err |= __get_user(s.shm_perm.gid, &up->shm_perm.gid); + err |= __get_user(s.shm_perm.mode, &up->shm_perm.mode); + if (err) + break; + old_fs = get_fs(); + set_fs(KERNEL_DS); + err = sys_shmctl(first, second, &s); + set_fs(old_fs); + } + break; + + case IPC_STAT: + case SHM_STAT: + if (second & IPC_64) { + struct shmid64_ds32 *up = (struct shmid64_ds32 *)uptr; + struct shmid64_ds s64; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + err = sys_shmctl(first, second, + (struct shmid_ds *)&s64); + set_fs(old_fs); + if (err < 0) + break; + + err2 = copy_to_user(&up->shm_perm, &s64.shm_perm, + sizeof(struct ipc64_perm)); + err2 |= __put_user(s64.shm_atime, &up->shm_atime); + err2 |= __put_user(s64.shm_dtime, &up->shm_dtime); + err2 |= __put_user(s64.shm_ctime, &up->shm_ctime); + err2 |= __put_user(s64.shm_segsz, &up->shm_segsz); + err2 |= __put_user(s64.shm_nattch, &up->shm_nattch); + err2 |= __put_user(s64.shm_cpid, &up->shm_cpid); + err2 |= __put_user(s64.shm_lpid, &up->shm_lpid); + if (err2) + err = -EFAULT; + } else { + struct shmid_ds32 *up = (struct shmid_ds32 *)uptr; + struct shmid_ds s; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + err = sys_shmctl(first, second, &s); + set_fs(old_fs); + if (err < 0) + break; + + err2 = copy_to_user(&up->shm_perm, &s.shm_perm, + sizeof(struct ipc_perm)); + err2 |= __put_user (s.shm_atime, &up->shm_atime); + err2 |= __put_user (s.shm_dtime, &up->shm_dtime); + err2 |= __put_user (s.shm_ctime, &up->shm_ctime); + err2 |= __put_user (s.shm_segsz, &up->shm_segsz); + err2 |= __put_user (s.shm_nattch, &up->shm_nattch); + err2 |= __put_user (s.shm_cpid, &up->shm_cpid); + err2 |= __put_user (s.shm_lpid, &up->shm_lpid); + if (err2) + err = -EFAULT; + } + break; + + case SHM_INFO: { + struct shm_info si; + struct shm_info32 { + int used_ids; + u32 shm_tot, shm_rss, shm_swp; + u32 swap_attempts, swap_successes; + } *uip = (struct shm_info32 *)uptr; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + err = sys_shmctl(first, second, (struct shmid_ds *)&si); + set_fs(old_fs); + if (err < 0) + break; + err2 = put_user(si.used_ids, &uip->used_ids); + err2 |= __put_user(si.shm_tot, &uip->shm_tot); + err2 |= __put_user(si.shm_rss, &uip->shm_rss); + err2 |= __put_user(si.shm_swp, &uip->shm_swp); + err2 |= __put_user(si.swap_attempts, &uip->swap_attempts); + err2 |= __put_user(si.swap_successes, &uip->swap_successes); + if (err2) + err = -EFAULT; + break; + } + } + return err; +} + +/* + * Note: it is necessary to treat first_parm, second_parm, and + * third_parm as unsigned ints, with the corresponding cast to a + * signed int to insure that the proper conversion (sign extension) + * between the register representation of a signed int (msr in 32-bit + * mode) and the register representation of a signed int (msr in + * 64-bit mode) is performed. + */ +asmlinkage long sys32_ipc(u32 call, u32 first_parm, u32 second_parm, u32 third_parm, u32 ptr, u32 fifth) +{ + int first = (int)first_parm; + int second = (int)second_parm; + int third = (int)third_parm; + int version, err; + + PPCDBG(PPCDBG_SYS32, "sys32_ipc - entered - call=%x, parm1=%x, parm2=%x, parm3=%x, parm4=%x, parm5=%x \n", + call, first_parm, second_parm, third_parm, ptr, fifth); + + version = call >> 16; /* hack for backward compatibility */ + call &= 0xffff; + + switch (call) { + + case SEMOP: + /* struct sembuf is the same on 32 and 64bit :)) */ + err = sys_semop(first, (struct sembuf *)AA(ptr), + second); + break; + case SEMGET: + err = sys_semget(first, second, third); + break; + case SEMCTL: + err = do_sys32_semctl(first, second, third, + (void *)AA(ptr)); + break; + + case MSGSND: + err = do_sys32_msgsnd(first, second, third, + (void *)AA(ptr)); + break; + case MSGRCV: + err = do_sys32_msgrcv(first, second, fifth, third, + version, (void *)AA(ptr)); + break; + case MSGGET: + err = sys_msgget((key_t)first, second); + break; + case MSGCTL: + err = do_sys32_msgctl(first, second, (void *)AA(ptr)); + break; + + case SHMAT: + err = do_sys32_shmat(first, second, third, + version, (void *)AA(ptr)); + break; + case SHMDT: + err = sys_shmdt((char *)AA(ptr)); + break; + case SHMGET: + err = sys_shmget(first, second, third); + break; + case SHMCTL: + err = do_sys32_shmctl(first, second, (void *)AA(ptr)); + break; + default: + err = -EINVAL; + break; + } + + + PPCDBG(PPCDBG_SYS32, "sys32_ipc - exited w/ %d/0x%x \n", err, err); + return err; +} + +/* stat syscall methods. */ +extern asmlinkage int sys_stat(char* filename, struct __old_kernel_stat* statbuf); + +static int cp_old_stat32(struct inode* inode, struct __old_kernel_stat32* statbuf) +{ + static int warncount = 5; + struct __old_kernel_stat32 tmp; + + if (warncount) { + warncount--; + printk("VFS: Warning: %s using old stat() call. Recompile your binary.\n", + current->comm); + } + + tmp.st_dev = kdev_t_to_nr(inode->i_dev); + tmp.st_ino = inode->i_ino; + tmp.st_mode = inode->i_mode; + tmp.st_nlink = inode->i_nlink; + SET_OLDSTAT_UID(tmp, inode->i_uid); + SET_OLDSTAT_GID(tmp, inode->i_gid); + tmp.st_rdev = kdev_t_to_nr(inode->i_rdev); + tmp.st_size = inode->i_size; + tmp.st_atime = inode->i_atime; + tmp.st_mtime = inode->i_mtime; + tmp.st_ctime = inode->i_ctime; + return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; +} + +asmlinkage long sys32_stat(char* filename, struct __old_kernel_stat32* statbuf) +{ + struct nameidata nd; + int error; + + PPCDBG(PPCDBG_SYS32X, "sys32_stat - entered - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + + error = user_path_walk(filename, &nd); + if (!error) { + error = do_revalidate(nd.dentry); + if (!error) + error = cp_old_stat32(nd.dentry->d_inode, statbuf); + path_release(&nd); + } + + PPCDBG(PPCDBG_SYS32X, "sys32_stat - exited - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + + return error; +} + +asmlinkage long sys32_fstat(unsigned int fd, struct __old_kernel_stat32* statbuf) +{ + struct file *f; + int err = -EBADF; + + PPCDBG(PPCDBG_SYS32X, "sys32_fstat - entered - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + + f = fget(fd); + if (f) { + struct dentry * dentry = f->f_dentry; + + err = do_revalidate(dentry); + if (!err) + err = cp_old_stat32(dentry->d_inode, statbuf); + fput(f); + } + + PPCDBG(PPCDBG_SYS32X, "sys32_fstat - exited - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + + return err; +} + +asmlinkage long sys32_lstat(char* filename, struct __old_kernel_stat32* statbuf) +{ + struct nameidata nd; + int error; + + PPCDBG(PPCDBG_SYS32X, "sys32_lstat - entered - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + + error = user_path_walk_link(filename, &nd); + if (!error) { + error = do_revalidate(nd.dentry); + if (!error) + error = cp_old_stat32(nd.dentry->d_inode, statbuf); + + path_release(&nd); + } + + PPCDBG(PPCDBG_SYS32X, "sys32_lstat - exited - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + + return error; +} + +extern asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t* offset, size_t count); + +/* Note: it is necessary to treat out_fd and in_fd as unsigned ints, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_sendfile(u32 out_fd, u32 in_fd, __kernel_off_t32* offset, u32 count) +{ + mm_segment_t old_fs = get_fs(); + int ret; + off_t of; + + if (offset && get_user(of, offset)) + return -EFAULT; + + set_fs(KERNEL_DS); + ret = sys_sendfile((int)out_fd, (int)in_fd, offset ? &of : NULL, count); + set_fs(old_fs); + + if (offset && put_user(of, offset)) + return -EFAULT; + + return ret; +} + +extern asmlinkage int sys_setsockopt(int fd, int level, int optname, char *optval, int optlen); + +asmlinkage long sys32_setsockopt(int fd, int level, int optname, char* optval, int optlen) +{ + + PPCDBG(PPCDBG_SYS32,"sys32_setsockopt - running - pid=%ld, comm=%s\n", current->pid, current->comm); + + if (optname == SO_ATTACH_FILTER) { + struct sock_fprog32 { + __u16 len; + __u32 filter; + } *fprog32 = (struct sock_fprog32 *)optval; + struct sock_fprog kfprog; + struct sock_filter *kfilter; + unsigned int fsize; + mm_segment_t old_fs; + __u32 uptr; + int ret; + + if (get_user(kfprog.len, &fprog32->len) || + __get_user(uptr, &fprog32->filter)) + return -EFAULT; + kfprog.filter = (struct sock_filter *)A(uptr); + fsize = kfprog.len * sizeof(struct sock_filter); + kfilter = (struct sock_filter *)kmalloc(fsize, GFP_KERNEL); + if (kfilter == NULL) + return -ENOMEM; + if (copy_from_user(kfilter, kfprog.filter, fsize)) { + kfree(kfilter); + return -EFAULT; + } + kfprog.filter = kfilter; + old_fs = get_fs(); + set_fs(KERNEL_DS); + ret = sys_setsockopt(fd, level, optname, + (char *)&kfprog, sizeof(kfprog)); + set_fs(old_fs); + kfree(kfilter); + return ret; + } + return sys_setsockopt(fd, level, optname, optval, optlen); +} + + + + +#define MAX_SOCK_ADDR 128 /* 108 for Unix domain - 16 for IP, 16 for IPX, 24 for IPv6, about 80 for AX.25 */ +#define __CMSG32_NXTHDR(ctl, len, cmsg, cmsglen) __cmsg32_nxthdr((ctl),(len),(cmsg),(cmsglen)) +#define CMSG32_NXTHDR(mhdr, cmsg, cmsglen) cmsg32_nxthdr((mhdr), (cmsg), (cmsglen)) + +#define CMSG32_ALIGN(len) ( ((len)+sizeof(int)-1) & ~(sizeof(int)-1) ) + +#define CMSG32_DATA(cmsg) ((void *)((char *)(cmsg) + CMSG32_ALIGN(sizeof(struct cmsghdr32)))) +#define CMSG32_SPACE(len) (CMSG32_ALIGN(sizeof(struct cmsghdr32)) + CMSG32_ALIGN(len)) +#define CMSG32_LEN(len) (CMSG32_ALIGN(sizeof(struct cmsghdr32)) + (len)) +#define __CMSG32_FIRSTHDR(ctl,len) ((len) >= sizeof(struct cmsghdr32) ? \ + (struct cmsghdr32 *)(ctl) : \ + (struct cmsghdr32 *)NULL) +#define CMSG32_FIRSTHDR(msg) __CMSG32_FIRSTHDR((msg)->msg_control, (msg)->msg_controllen) + +struct msghdr32 +{ + u32 msg_name; + int msg_namelen; + u32 msg_iov; + __kernel_size_t32 msg_iovlen; + u32 msg_control; + __kernel_size_t32 msg_controllen; + unsigned msg_flags; +}; + +struct cmsghdr32 +{ + __kernel_size_t32 cmsg_len; + int cmsg_level; + int cmsg_type; +}; + +__inline__ struct cmsghdr32 *__cmsg32_nxthdr(void *__ctl, __kernel_size_t __size, + struct cmsghdr32 *__cmsg, int __cmsg_len) +{ + struct cmsghdr32 * __ptr; + + __ptr = (struct cmsghdr32 *)(((unsigned char *) __cmsg) + + CMSG32_ALIGN(__cmsg_len)); + if ((unsigned long)((char*)(__ptr+1) - (char *) __ctl) > __size) + return NULL; + + return __ptr; +} + +__inline__ struct cmsghdr32 *cmsg32_nxthdr (struct msghdr *__msg, + struct cmsghdr32 *__cmsg, + int __cmsg_len) +{ + return __cmsg32_nxthdr(__msg->msg_control, __msg->msg_controllen, + __cmsg, __cmsg_len); +} + +extern __inline__ struct socket *socki_lookup(struct inode *inode) +{ + return &inode->u.socket_i; +} + +extern __inline__ struct socket *sockfd_lookup(int fd, int *err) +{ + struct file *file; + struct inode *inode; + + if (!(file = fget(fd))) + { + *err = -EBADF; + return NULL; + } + + inode = file->f_dentry->d_inode; + if (!inode || !inode->i_sock || !socki_lookup(inode)) + { + *err = -ENOTSOCK; + fput(file); + return NULL; + } + + return socki_lookup(inode); +} + +extern __inline__ void sockfd_put(struct socket *sock) +{ + fput(sock->file); +} + +static inline int msghdr_from_user32_to_kern(struct msghdr *kmsg, struct msghdr32 *umsg) +{ + u32 tmp1, tmp2, tmp3; + int err; + + err = get_user(tmp1, &umsg->msg_name); + err |= __get_user(tmp2, &umsg->msg_iov); + err |= __get_user(tmp3, &umsg->msg_control); + if (err) + return -EFAULT; + + kmsg->msg_name = (void *)A(tmp1); + kmsg->msg_iov = (struct iovec *)A(tmp2); + kmsg->msg_control = (void *)A(tmp3); + + err = get_user(kmsg->msg_namelen, &umsg->msg_namelen); + err |= get_user(kmsg->msg_iovlen, &umsg->msg_iovlen); + err |= get_user(kmsg->msg_controllen, &umsg->msg_controllen); + err |= get_user(kmsg->msg_flags, &umsg->msg_flags); + + return err; +} + +static inline int iov_from_user32_to_kern(struct iovec *kiov, + struct iovec32 *uiov32, + int niov) +{ + int tot_len = 0; + + while(niov > 0) { + u32 len, buf; + + if(get_user(len, &uiov32->iov_len) || + get_user(buf, &uiov32->iov_base)) { + tot_len = -EFAULT; + break; + } + tot_len += len; + kiov->iov_base = (void *)A(buf); + kiov->iov_len = (__kernel_size_t) len; + uiov32++; + kiov++; + niov--; + } + return tot_len; +} + +/* I've named the args so it is easy to tell whose space the pointers are in. */ +static int verify_iovec32(struct msghdr *kern_msg, struct iovec *kern_iov, + char *kern_address, int mode) +{ + int tot_len; + + if(kern_msg->msg_namelen) { + if(mode==VERIFY_READ) { + int err = move_addr_to_kernel(kern_msg->msg_name, + kern_msg->msg_namelen, + kern_address); + if(err < 0) + return err; + } + kern_msg->msg_name = kern_address; + } else + kern_msg->msg_name = NULL; + + if(kern_msg->msg_iovlen > UIO_FASTIOV) { + kern_iov = kmalloc(kern_msg->msg_iovlen * sizeof(struct iovec), + GFP_KERNEL); + if(!kern_iov) + return -ENOMEM; + } + + tot_len = iov_from_user32_to_kern(kern_iov, + (struct iovec32 *)kern_msg->msg_iov, + kern_msg->msg_iovlen); + if(tot_len >= 0) + kern_msg->msg_iov = kern_iov; + else if(kern_msg->msg_iovlen > UIO_FASTIOV) + kfree(kern_iov); + + return tot_len; +} + +/* There is a lot of hair here because the alignment rules (and + * thus placement) of cmsg headers and length are different for + * 32-bit apps. -DaveM + */ +static int cmsghdr_from_user32_to_kern(struct msghdr *kmsg, + unsigned char *stackbuf, int stackbuf_size) +{ + struct cmsghdr32 *ucmsg; + struct cmsghdr *kcmsg, *kcmsg_base; + __kernel_size_t32 ucmlen; + __kernel_size_t kcmlen, tmp; + + kcmlen = 0; + kcmsg_base = kcmsg = (struct cmsghdr *)stackbuf; + ucmsg = CMSG32_FIRSTHDR(kmsg); + while(ucmsg != NULL) { + if(get_user(ucmlen, &ucmsg->cmsg_len)) + return -EFAULT; + + /* Catch bogons. */ + if(CMSG32_ALIGN(ucmlen) < + CMSG32_ALIGN(sizeof(struct cmsghdr32))) + return -EINVAL; + if((unsigned long)(((char *)ucmsg - (char *)kmsg->msg_control) + + ucmlen) > kmsg->msg_controllen) + return -EINVAL; + + tmp = ((ucmlen - CMSG32_ALIGN(sizeof(*ucmsg))) + + CMSG_ALIGN(sizeof(struct cmsghdr))); + kcmlen += tmp; + ucmsg = CMSG32_NXTHDR(kmsg, ucmsg, ucmlen); + } + if (kcmlen == 0) + return -EINVAL; + + /* The kcmlen holds the 64-bit version of the control length. + * It may not be modified as we do not stick it into the kmsg + * until we have successfully copied over all of the data + * from the user. + */ + if (kcmlen > stackbuf_size) + kcmsg_base = kcmsg = kmalloc(kcmlen, GFP_KERNEL); + if (kcmsg == NULL) + return -ENOBUFS; + + /* Now copy them over neatly. */ + memset(kcmsg, 0, kcmlen); + ucmsg = CMSG32_FIRSTHDR(kmsg); + while (ucmsg != NULL) { + __get_user(ucmlen, &ucmsg->cmsg_len); + tmp = ((ucmlen - CMSG32_ALIGN(sizeof(*ucmsg))) + + CMSG_ALIGN(sizeof(struct cmsghdr))); + kcmsg->cmsg_len = tmp; + __get_user(kcmsg->cmsg_level, &ucmsg->cmsg_level); + __get_user(kcmsg->cmsg_type, &ucmsg->cmsg_type); + + /* Copy over the data. */ + if(copy_from_user(CMSG_DATA(kcmsg), + CMSG32_DATA(ucmsg), + (ucmlen - CMSG32_ALIGN(sizeof(*ucmsg))))) + goto out_free_efault; + + /* Advance. */ + kcmsg = (struct cmsghdr *)((char *)kcmsg + CMSG_ALIGN(tmp)); + ucmsg = CMSG32_NXTHDR(kmsg, ucmsg, ucmlen); + } + + /* Ok, looks like we made it. Hook it up and return success. */ + kmsg->msg_control = kcmsg_base; + kmsg->msg_controllen = kcmlen; + return 0; + +out_free_efault: + if(kcmsg_base != (struct cmsghdr *)stackbuf) + kfree(kcmsg_base); + return -EFAULT; +} + +asmlinkage long sys32_sendmsg(int fd, struct msghdr32* user_msg, unsigned int user_flags) +{ + struct socket *sock; + char address[MAX_SOCK_ADDR]; + struct iovec iov[UIO_FASTIOV]; + unsigned char ctl[sizeof(struct cmsghdr) + 20]; + unsigned char *ctl_buf = ctl; + struct msghdr kern_msg; + int err, total_len; + + PPCDBG(PPCDBG_SYS32, "sys32_sendmsg - entered - fd=%x, user_msg@=%p, user_flags=%x \n", fd, user_msg, user_flags); + + if(msghdr_from_user32_to_kern(&kern_msg, user_msg)) + return -EFAULT; + if(kern_msg.msg_iovlen > UIO_MAXIOV) + return -EINVAL; + err = verify_iovec32(&kern_msg, iov, address, VERIFY_READ); + if (err < 0) + goto out; + total_len = err; + + if(kern_msg.msg_controllen) { + err = cmsghdr_from_user32_to_kern(&kern_msg, ctl, sizeof(ctl)); + if(err) + goto out_freeiov; + ctl_buf = kern_msg.msg_control; + } + kern_msg.msg_flags = user_flags; + + sock = sockfd_lookup(fd, &err); + if (sock != NULL) { + if (sock->file->f_flags & O_NONBLOCK) + kern_msg.msg_flags |= MSG_DONTWAIT; + err = sock_sendmsg(sock, &kern_msg, total_len); + sockfd_put(sock); + } + + /* N.B. Use kfree here, as kern_msg.msg_controllen might change? */ + if(ctl_buf != ctl) + kfree(ctl_buf); +out_freeiov: + if(kern_msg.msg_iov != iov) + kfree(kern_msg.msg_iov); +out: + + PPCDBG(PPCDBG_SYS32, "sys32_sendmsg - exited w/ %lx \n", err); + return err; +} + +static void put_cmsg32(struct msghdr *kmsg, int level, int type, + int len, void *data) +{ + struct cmsghdr32 *cm = (struct cmsghdr32 *) kmsg->msg_control; + struct cmsghdr32 cmhdr; + int cmlen = CMSG32_LEN(len); + + if (cm == NULL || kmsg->msg_controllen < sizeof(*cm)) { + kmsg->msg_flags |= MSG_CTRUNC; + return; + } + + if (kmsg->msg_controllen < cmlen) { + kmsg->msg_flags |= MSG_CTRUNC; + cmlen = kmsg->msg_controllen; + } + cmhdr.cmsg_level = level; + cmhdr.cmsg_type = type; + cmhdr.cmsg_len = cmlen; + + if (copy_to_user(cm, &cmhdr, sizeof cmhdr)) + return; + if (copy_to_user(CMSG32_DATA(cm), data, cmlen - sizeof(struct cmsghdr32))) + return; + cmlen = CMSG32_SPACE(len); + kmsg->msg_control += cmlen; + kmsg->msg_controllen -= cmlen; +} + + +static void scm_detach_fds32(struct msghdr *kmsg, struct scm_cookie *scm) +{ + struct cmsghdr32 *cm = (struct cmsghdr32 *) kmsg->msg_control; + int fdmax = (kmsg->msg_controllen - sizeof(struct cmsghdr32)) / sizeof(int); + int fdnum = scm->fp->count; + struct file **fp = scm->fp->fp; + int *cmfptr; + int err = 0, i; + + if (fdnum < fdmax) + fdmax = fdnum; + + for (i = 0, cmfptr = (int *) CMSG32_DATA(cm); i < fdmax; i++, cmfptr++) { + int new_fd; + err = get_unused_fd(); + if (err < 0) + break; + new_fd = err; + err = put_user(new_fd, cmfptr); + if (err) { + put_unused_fd(new_fd); + break; + } + /* Bump the usage count and install the file. */ + get_file(fp[i]); + fd_install(new_fd, fp[i]); + } + + if (i > 0) { + int cmlen = CMSG32_LEN(i * sizeof(int)); + if (!err) + err = put_user(SOL_SOCKET, &cm->cmsg_level); + if (!err) + err = put_user(SCM_RIGHTS, &cm->cmsg_type); + if (!err) + err = put_user(cmlen, &cm->cmsg_len); + if (!err) { + cmlen = CMSG32_SPACE(i * sizeof(int)); + kmsg->msg_control += cmlen; + kmsg->msg_controllen -= cmlen; + } + } + if (i < fdnum) + kmsg->msg_flags |= MSG_CTRUNC; + + /* + * All of the files that fit in the message have had their + * usage counts incremented, so we just free the list. + */ + __scm_destroy(scm); +} + +/* In these cases we (currently) can just copy to data over verbatim + * because all CMSGs created by the kernel have well defined types which + * have the same layout in both the 32-bit and 64-bit API. One must add + * some special cased conversions here if we start sending control messages + * with incompatible types. + * + * SCM_RIGHTS and SCM_CREDENTIALS are done by hand in recvmsg32 right after + * we do our work. The remaining cases are: + * + * SOL_IP IP_PKTINFO struct in_pktinfo 32-bit clean + * IP_TTL int 32-bit clean + * IP_TOS __u8 32-bit clean + * IP_RECVOPTS variable length 32-bit clean + * IP_RETOPTS variable length 32-bit clean + * (these last two are clean because the types are defined + * by the IPv4 protocol) + * IP_RECVERR struct sock_extended_err + + * struct sockaddr_in 32-bit clean + * SOL_IPV6 IPV6_RECVERR struct sock_extended_err + + * struct sockaddr_in6 32-bit clean + * IPV6_PKTINFO struct in6_pktinfo 32-bit clean + * IPV6_HOPLIMIT int 32-bit clean + * IPV6_FLOWINFO u32 32-bit clean + * IPV6_HOPOPTS ipv6 hop exthdr 32-bit clean + * IPV6_DSTOPTS ipv6 dst exthdr(s) 32-bit clean + * IPV6_RTHDR ipv6 routing exthdr 32-bit clean + * IPV6_AUTHHDR ipv6 auth exthdr 32-bit clean + */ +static void cmsg32_recvmsg_fixup(struct msghdr *kmsg, unsigned long orig_cmsg_uptr) +{ + unsigned char *workbuf, *wp; + unsigned long bufsz, space_avail; + struct cmsghdr *ucmsg; + + bufsz = ((unsigned long)kmsg->msg_control) - orig_cmsg_uptr; + space_avail = kmsg->msg_controllen + bufsz; + wp = workbuf = kmalloc(bufsz, GFP_KERNEL); + if(workbuf == NULL) + goto fail; + + /* To make this more sane we assume the kernel sends back properly + * formatted control messages. Because of how the kernel will truncate + * the cmsg_len for MSG_TRUNC cases, we need not check that case either. + */ + ucmsg = (struct cmsghdr *) orig_cmsg_uptr; + while(((unsigned long)ucmsg) <= + (((unsigned long)kmsg->msg_control) - sizeof(struct cmsghdr))) { + struct cmsghdr32 *kcmsg32 = (struct cmsghdr32 *) wp; + int clen64, clen32; + + /* UCMSG is the 64-bit format CMSG entry in user-space. + * KCMSG32 is within the kernel space temporary buffer + * we use to convert into a 32-bit style CMSG. + */ + __get_user(kcmsg32->cmsg_len, &ucmsg->cmsg_len); + __get_user(kcmsg32->cmsg_level, &ucmsg->cmsg_level); + __get_user(kcmsg32->cmsg_type, &ucmsg->cmsg_type); + + clen64 = kcmsg32->cmsg_len; + copy_from_user(CMSG32_DATA(kcmsg32), CMSG_DATA(ucmsg), + clen64 - CMSG_ALIGN(sizeof(*ucmsg))); + clen32 = ((clen64 - CMSG_ALIGN(sizeof(*ucmsg))) + + CMSG32_ALIGN(sizeof(struct cmsghdr32))); + kcmsg32->cmsg_len = clen32; + + switch (kcmsg32->cmsg_type) { + /* + * The timestamp type's data needs to be converted + * from 64-bit time values to 32-bit time values + */ + case SO_TIMESTAMP: { + __kernel_time_t32* ptr_time32 = CMSG32_DATA(kcmsg32); + __kernel_time_t* ptr_time = CMSG_DATA(ucmsg); + *ptr_time32 = *ptr_time; + *(ptr_time32+1) = *(ptr_time+1); + kcmsg32->cmsg_len -= 2*(sizeof(__kernel_time_t) - + sizeof(__kernel_time_t32)); + } + default:; + } + + ucmsg = (struct cmsghdr *) (((char *)ucmsg) + CMSG_ALIGN(clen64)); + wp = (((char *)kcmsg32) + CMSG32_ALIGN(kcmsg32->cmsg_len)); + } + + /* Copy back fixed up data, and adjust pointers. */ + bufsz = (wp - workbuf); + copy_to_user((void *)orig_cmsg_uptr, workbuf, bufsz); + + kmsg->msg_control = (struct cmsghdr *) + (((char *)orig_cmsg_uptr) + bufsz); + kmsg->msg_controllen = space_avail - bufsz; + + kfree(workbuf); + return; + +fail: + /* If we leave the 64-bit format CMSG chunks in there, + * the application could get confused and crash. So to + * ensure greater recovery, we report no CMSGs. + */ + kmsg->msg_controllen += bufsz; + kmsg->msg_control = (void *) orig_cmsg_uptr; +} + +asmlinkage long sys32_recvmsg(int fd, struct msghdr32* user_msg, unsigned int user_flags) +{ + struct iovec iovstack[UIO_FASTIOV]; + struct msghdr kern_msg; + char addr[MAX_SOCK_ADDR]; + struct socket *sock; + struct iovec *iov = iovstack; + struct sockaddr *uaddr; + int *uaddr_len; + unsigned long cmsg_ptr; + int err, total_len, len = 0; + + PPCDBG(PPCDBG_SYS32, "sys32_recvmsg - entered - fd=%x, user_msg@=%p, user_flags=%x \n", fd, user_msg, user_flags); + + if(msghdr_from_user32_to_kern(&kern_msg, user_msg)) + return -EFAULT; + if(kern_msg.msg_iovlen > UIO_MAXIOV) + return -EINVAL; + + uaddr = kern_msg.msg_name; + uaddr_len = &user_msg->msg_namelen; + err = verify_iovec32(&kern_msg, iov, addr, VERIFY_WRITE); + if (err < 0) + goto out; + total_len = err; + + cmsg_ptr = (unsigned long) kern_msg.msg_control; + kern_msg.msg_flags = 0; + + sock = sockfd_lookup(fd, &err); + if (sock != NULL) { + struct scm_cookie scm; + + if (sock->file->f_flags & O_NONBLOCK) + user_flags |= MSG_DONTWAIT; + memset(&scm, 0, sizeof(scm)); + err = sock->ops->recvmsg(sock, &kern_msg, total_len, + user_flags, &scm); + if(err >= 0) { + len = err; + if(!kern_msg.msg_control) { + if(sock->passcred || scm.fp) + kern_msg.msg_flags |= MSG_CTRUNC; + if(scm.fp) + __scm_destroy(&scm); + } else { + /* If recvmsg processing itself placed some + * control messages into user space, it's is + * using 64-bit CMSG processing, so we need + * to fix it up before we tack on more stuff. + */ + if((unsigned long) kern_msg.msg_control != cmsg_ptr) + cmsg32_recvmsg_fixup(&kern_msg, cmsg_ptr); + + /* Wheee... */ + if(sock->passcred) + put_cmsg32(&kern_msg, + SOL_SOCKET, SCM_CREDENTIALS, + sizeof(scm.creds), &scm.creds); + if(scm.fp != NULL) + scm_detach_fds32(&kern_msg, &scm); + } + } + sockfd_put(sock); + } + + if (uaddr != NULL && err >= 0 && kern_msg.msg_namelen) + err = move_addr_to_user(addr, kern_msg.msg_namelen, uaddr, uaddr_len); + if(cmsg_ptr != 0 && err >= 0) { + unsigned long ucmsg_ptr = ((unsigned long)kern_msg.msg_control); + __kernel_size_t32 uclen = (__kernel_size_t32) (ucmsg_ptr - cmsg_ptr); + err |= __put_user(uclen, &user_msg->msg_controllen); + } + if(err >= 0) + err = __put_user(kern_msg.msg_flags, &user_msg->msg_flags); + if(kern_msg.msg_iov != iov) + kfree(kern_msg.msg_iov); +out: + if(err < 0) + return err; + + PPCDBG(PPCDBG_SYS32, "sys32_recvmsg - exited w/ %lx \n", len); + return len; +} + +/* + * count32() counts the number of arguments/envelopes + */ +static int count32(u32 * argv, int max) +{ + int i = 0; + + if (argv != NULL) { + for (;;) { + u32 p; int error; + + error = get_user(p,argv); + if (error) + return error; + if (!p) + break; + argv++; + if (++i > max) + return -E2BIG; + } + } + return i; +} + +/* + * 'copy_string32()' copies argument/envelope strings from user + * memory to free pages in kernel mem. These are in a format ready + * to be put directly into the top of new user memory. + */ +static int copy_strings32(int argc, u32 * argv, struct linux_binprm *bprm) +{ + while (argc-- > 0) { + u32 str; + int len; + unsigned long pos; + + if (get_user(str, argv + argc) || + !str || + !(len = strnlen_user((char *)A(str), bprm->p))) + return -EFAULT; + + if (bprm->p < len) + return -E2BIG; + + bprm->p -= len; + + pos = bprm->p; + while (len) { + char *kaddr; + struct page *page; + int offset, bytes_to_copy, new, err; + + offset = pos % PAGE_SIZE; + page = bprm->page[pos / PAGE_SIZE]; + new = 0; + if (!page) { + page = alloc_page(GFP_USER); + bprm->page[pos / PAGE_SIZE] = page; + if (!page) + return -ENOMEM; + new = 1; + } + kaddr = (char *)kmap(page); + + if (new && offset) + memset(kaddr, 0, offset); + bytes_to_copy = PAGE_SIZE - offset; + if (bytes_to_copy > len) { + bytes_to_copy = len; + if (new) + memset(kaddr+offset+len, 0, + PAGE_SIZE-offset-len); + } + + err = copy_from_user(kaddr + offset, (char *)A(str), + bytes_to_copy); + flush_page_to_ram(page); + kunmap((unsigned long)kaddr); + + if (err) + return -EFAULT; + + pos += bytes_to_copy; + str += bytes_to_copy; + len -= bytes_to_copy; + } + } + return 0; +} + +/* + * sys32_execve() executes a new program. + */ +static int do_execve32(char * filename, u32 * argv, u32 * envp, struct pt_regs * regs) +{ + struct linux_binprm bprm; + struct file * file; + int retval; + int i; + + bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); + memset(bprm.page, 0, MAX_ARG_PAGES * sizeof(bprm.page[0])); + + file = open_exec(filename); + + retval = PTR_ERR(file); + if (IS_ERR(file)) + return retval; + + bprm.file = file; + bprm.filename = filename; + bprm.sh_bang = 0; + bprm.loader = 0; + bprm.exec = 0; + if ((bprm.argc = count32(argv, bprm.p / sizeof(u32))) < 0) { + allow_write_access(file); + fput(file); + return bprm.argc; + } + if ((bprm.envc = count32(envp, bprm.p / sizeof(u32))) < 0) { + allow_write_access(file); + fput(file); + return bprm.argc; + } + + retval = prepare_binprm(&bprm); + if (retval < 0) + goto out; + + retval = copy_strings_kernel(1, &bprm.filename, &bprm); + if (retval < 0) + goto out; + + bprm.exec = bprm.p; + retval = copy_strings32(bprm.envc, envp, &bprm); + if (retval < 0) + goto out; + + retval = copy_strings32(bprm.argc, argv, &bprm); + if (retval < 0) + goto out; + + retval = search_binary_handler(&bprm, regs); + if (retval >= 0) + /* execve success */ + return retval; + +out: + /* Something went wrong, return the inode and free the argument pages*/ + allow_write_access(bprm.file); + if (bprm.file) + fput(bprm.file); + + for (i=0 ; ipid, current->comm); + //PPCDBG(PPCDBG_SYS32NI, " a0=%lx, a1=%lx, a2=%lx, a3=%lx, a4=%lx, a5=%lx, regs=%p \n", a0, a1, a2, a3, a4, a5, regs); + } + + filename = getname((char *) a0); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + goto out; + if (regs->msr & MSR_FP) + giveup_fpu(current); + + error = do_execve32(filename, (u32*) a1, (u32*) a2, regs); + + if (error == 0) + current->ptrace &= ~PT_DTRACE; + putname(filename); + +out: + ifppcdebug(PPCDBG_SYS32) { + udbg_printf("sys32_execve - exited - returning %x - pid=%ld \n", error, current->pid); + //udbg_printf("sys32_execve - at exit - regs->gpr[1]=%lx, gpr[3]=%lx, gpr[4]=%lx, gpr[5]=%lx, gpr[6]=%lx \n", regs->gpr[1], regs->gpr[3], regs->gpr[4], regs->gpr[5], regs->gpr[6]); + } + return error; +} + +/* Set up a thread for executing a new program. */ +void start_thread32(struct pt_regs* regs, unsigned long nip, unsigned long sp) +{ + set_fs(USER_DS); + memset(regs->gpr, 0, sizeof(regs->gpr)); + memset(®s->ctr, 0, 4 * sizeof(regs->ctr)); + regs->nip = nip; + regs->gpr[1] = sp; + regs->msr = MSR_USER32; + if (last_task_used_math == current) + last_task_used_math = 0; + current->thread.fpscr = 0; +} + +extern asmlinkage int sys_prctl(int option, unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5); + +/* Note: it is necessary to treat option as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_prctl(u32 option, u32 arg2, u32 arg3, u32 arg4, u32 arg5) +{ + PPCDBG(PPCDBG_SYS32, "sys32_prctl - running - pid=%ld current=%lx comm=%s\n", current->pid, current, current->comm); + + return sys_prctl((int)option, + (unsigned long) arg2, + (unsigned long) arg3, + (unsigned long) arg4, + (unsigned long) arg5); +} + +extern asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval); + +/* Note: it is necessary to treat pid as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage int sys32_sched_rr_get_interval(u32 pid, struct timespec32 *interval) +{ + struct timespec t; + int ret; + mm_segment_t old_fs = get_fs (); + + PPCDBG(PPCDBG_SYS32, "sys32_sched_rr_get_interval - entered - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + + set_fs (KERNEL_DS); + ret = sys_sched_rr_get_interval((int)pid, &t); + set_fs (old_fs); + if (put_user (t.tv_sec, &interval->tv_sec) || + __put_user (t.tv_nsec, &interval->tv_nsec)) + return -EFAULT; + + PPCDBG(PPCDBG_SYS32, "sys32_sched_rr_get_interval - exited - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + return ret; +} + +extern asmlinkage int sys_pciconfig_read(unsigned long bus, unsigned long dfn, unsigned long off, + unsigned long len, unsigned char *buf); + +asmlinkage int sys32_pciconfig_read(u32 bus, u32 dfn, u32 off, u32 len, u32 ubuf) +{ + + PPCDBG(PPCDBG_SYS32, "sys32_pciconfig_read - running - pid=%ld current=%lx comm=%s\n", current->pid, current, current->comm); + + return sys_pciconfig_read((unsigned long) bus, + (unsigned long) dfn, + (unsigned long) off, + (unsigned long) len, + (unsigned char *)AA(ubuf)); +} + + + + +extern asmlinkage int sys_pciconfig_write(unsigned long bus, unsigned long dfn, unsigned long off, + unsigned long len, unsigned char *buf); + +asmlinkage int sys32_pciconfig_write(u32 bus, u32 dfn, u32 off, u32 len, u32 ubuf) +{ + + PPCDBG(PPCDBG_SYS32, "sys32_pciconfig_write - running - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + + return sys_pciconfig_write((unsigned long) bus, + (unsigned long) dfn, + (unsigned long) off, + (unsigned long) len, + (unsigned char *)AA(ubuf)); +} + +extern asmlinkage int sys_newuname(struct new_utsname * name); + +asmlinkage int ppc64_newuname(struct new_utsname * name) +{ + int ret = sys_newuname(name); + + if (current->personality == PER_LINUX32 && !ret) { + ret = copy_to_user(name->machine, "ppc\0\0", 8); + } + return ret; +} + +extern asmlinkage long sys_personality(unsigned long); + +asmlinkage int sys32_personality(unsigned long personality) +{ + int ret; + if (current->personality == PER_LINUX32 && personality == PER_LINUX) + personality = PER_LINUX32; + ret = sys_personality(personality); + if (ret == PER_LINUX32) + ret = PER_LINUX; + return ret; +} + + + +extern asmlinkage long sys_access(const char * filename, int mode); + +/* Note: it is necessary to treat mode as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_access(const char * filename, u32 mode) +{ + return sys_access(filename, (int)mode); +} + + +extern asmlinkage int sys_clone(int p1, int p2, int p3, int p4, int p5, int p6, struct pt_regs *regs); + +/* Note: it is necessary to treat p1, p2, p3, p4, p5, p7, and regs as unsigned ints, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage int sys32_clone(u32 p1, u32 p2, u32 p3, u32 p4, u32 p5, u32 p6, struct pt_regs *regs) +{ + return sys_clone((int)p1, (int)p2, (int)p3, (int)p4, (int)p5, (int)p6, regs); +} + + +extern asmlinkage long sys_creat(const char * pathname, int mode); + +/* Note: it is necessary to treat mode as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_creat(const char * pathname, u32 mode) +{ + return sys_creat(pathname, (int)mode); +} + + +extern asmlinkage long sys_exit(int error_code); + +/* Note: it is necessary to treat error_code as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_exit(u32 error_code) +{ + return sys_exit((int)error_code); +} + + +extern asmlinkage long sys_wait4(pid_t pid, unsigned int * stat_addr, int options, struct rusage * ru); + +/* Note: it is necessary to treat pid and options as unsigned ints, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_wait4(u32 pid, unsigned int * stat_addr, u32 options, struct rusage * ru) +{ + PPCDBG(PPCDBG_SYS32, "sys32_wait4 - running - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + + if (!ru) + return sys_wait4((int)pid, stat_addr, options, NULL); + else { + struct rusage r; + int ret; + unsigned int status; + mm_segment_t old_fs = get_fs(); + + set_fs (KERNEL_DS); + ret = sys_wait4((int)pid, stat_addr ? &status : NULL, options, &r); + set_fs (old_fs); + if (put_rusage ((struct rusage32 *)ru, &r)) return -EFAULT; + if (stat_addr && put_user (status, stat_addr)) + return -EFAULT; + return ret; + } + +} + + +extern asmlinkage long sys_waitpid(pid_t pid, unsigned int * stat_addr, int options); + +/* Note: it is necessary to treat pid and options as unsigned ints, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_waitpid(u32 pid, unsigned int * stat_addr, u32 options) +{ + return sys_waitpid((int)pid, stat_addr, (int)options); +} + + +extern asmlinkage int sys_fork(int p1, int p2, int p3, int p4, int p5, int p6, struct pt_regs *regs); + +/* Note: it is necessary to treat p1, p2, p3, p4, p5, and p6 as unsigned ints, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage int sys32_fork(u32 p1, u32 p2, u32 p3, u32 p4, u32 p5, u32 p6, struct pt_regs *regs) +{ + return sys_fork((int)p1, (int)p2, (int)p3, (int)p4, (int)p5, (int)p6, regs); +} + + +extern asmlinkage long sys_getgroups(int gidsetsize, gid_t *grouplist); + +/* Note: it is necessary to treat gidsetsize as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_getgroups(u32 gidsetsize, gid_t *grouplist) +{ + return sys_getgroups((int)gidsetsize, grouplist); +} + + +extern asmlinkage long sys_getpgid(pid_t pid); + +/* Note: it is necessary to treat pid as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_getpgid(u32 pid) +{ + return sys_getpgid((int)pid); +} + + +extern asmlinkage long sys_getpriority(int which, int who); + +/* Note: it is necessary to treat which and who as unsigned ints, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_getpriority(u32 which, u32 who) +{ + return sys_getpriority((int)which, (int)who); +} + + +extern asmlinkage long sys_getsid(pid_t pid); + +/* Note: it is necessary to treat pid as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_getsid(u32 pid) +{ + return sys_getsid((int)pid); +} + + +extern asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int on); + +/* Note: it is necessary to treat on as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_ioperm(unsigned long from, unsigned long num, u32 on) +{ + return sys_ioperm(from, num, (int)on); +} + + +extern asmlinkage int sys_iopl(int a1, int a2, int a3, int a4); + +/* Note: it is necessary to treat a1, a2, a3, and a4 as unsigned ints, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage int sys32_iopl(u32 a1, u32 a2, u32 a3, u32 a4) +{ + return sys_iopl((int)a1, (int)a2, (int)a3, (int)a4); +} + + +extern asmlinkage long sys_kill(int pid, int sig); + +/* Note: it is necessary to treat pid and sig as unsigned ints, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_kill(u32 pid, u32 sig) +{ + return sys_kill((int)pid, (int)sig); +} + + +extern asmlinkage long sys_mkdir(const char * pathname, int mode); + +/* Note: it is necessary to treat mode as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_mkdir(const char * pathname, u32 mode) +{ + return sys_mkdir(pathname, (int)mode); +} + + +extern asmlinkage long sys_mlockall(int flags); + +/* Note: it is necessary to treat flags as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_mlockall(u32 flags) +{ + return sys_mlockall((int)flags); +} + + +extern asmlinkage int sys_modify_ldt(int a1, int a2, int a3, int a4); + +/* Note: it is necessary to treat a1, a2, a3, and a4 as unsigned ints, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage int sys32_modify_ldt(u32 a1, u32 a2, u32 a3, u32 a4) +{ + return sys_modify_ldt((int)a1, (int)a2, (int)a3, (int)a4); +} + + +extern asmlinkage long sys_msync(unsigned long start, size_t len, int flags); + +/* Note: it is necessary to treat flags as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_msync(unsigned long start, size_t len, u32 flags) +{ + return sys_msync(start, len, (int)flags); +} + + +extern asmlinkage long sys_nice(int increment); + +/* Note: it is necessary to treat increment as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_nice(u32 increment) +{ + return sys_nice((int)increment); +} + + +extern asmlinkage long sys_open(const char * filename, int flags, int mode); + +/* Note: it is necessary to treat flags and mode as unsigned ints, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_open(const char * filename, int flags, int mode) +{ + return sys_open(filename, (int)flags, (int)mode); +} + + +extern asmlinkage long sys_readlink(const char * path, char * buf, int bufsiz); + +/* Note: it is necessary to treat bufsiz as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_readlink(const char * path, char * buf, u32 bufsiz) +{ + return sys_readlink(path, buf, (int)bufsiz); +} + + +extern asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void * arg); + +/* Note: it is necessary to treat magic1 and magic2 as unsigned ints, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_reboot(u32 magic1, u32 magic2, unsigned int cmd, void * arg) +{ + return sys_reboot((int)magic1, (int)magic2, cmd, arg); +} + + +extern asmlinkage long sys_sched_get_priority_max(int policy); + +/* Note: it is necessary to treat option as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_sched_get_priority_max(u32 policy) +{ + return sys_sched_get_priority_max((int)policy); +} + + +extern asmlinkage long sys_sched_get_priority_min(int policy); + +/* Note: it is necessary to treat policy as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_sched_get_priority_min(u32 policy) +{ + return sys_sched_get_priority_min((int)policy); +} + + +extern asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param *param); + +/* Note: it is necessary to treat pid as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_sched_getparam(u32 pid, struct sched_param *param) +{ + return sys_sched_getparam((int)pid, param); +} + + +extern asmlinkage long sys_sched_getscheduler(pid_t pid); + +/* Note: it is necessary to treat pid as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_sched_getscheduler(u32 pid) +{ + return sys_sched_getscheduler((int)pid); +} + + +extern asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param *param); + +/* Note: it is necessary to treat pid as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_sched_setparam(u32 pid, struct sched_param *param) +{ + return sys_sched_setparam((int)pid, param); +} + + +extern asmlinkage long sys_sched_setscheduler(pid_t pid, int policy, struct sched_param *param); + +/* Note: it is necessary to treat pid and policy as unsigned ints, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_sched_setscheduler(u32 pid, u32 policy, struct sched_param *param) +{ + return sys_sched_setscheduler((int)pid, (int)policy, param); +} + + +extern asmlinkage long sys_setdomainname(char *name, int len); + +/* Note: it is necessary to treat len as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_setdomainname(char *name, u32 len) +{ + return sys_setdomainname(name, (int)len); +} + + +extern asmlinkage long sys_setgroups(int gidsetsize, gid_t *grouplist); + +/* Note: it is necessary to treat gidsetsize as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_setgroups(u32 gidsetsize, gid_t *grouplist) +{ + return sys_setgroups((int)gidsetsize, grouplist); +} + + +extern asmlinkage long sys_sethostname(char *name, int len); + +/* Note: it is necessary to treat len as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_sethostname(char *name, u32 len) +{ + return sys_sethostname(name, (int)len); +} + + +extern asmlinkage long sys_setpgid(pid_t pid, pid_t pgid); + +/* Note: it is necessary to treat pid and pgid as unsigned ints, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_setpgid(u32 pid, u32 pgid) +{ + return sys_setpgid((int)pid, (int)pgid); +} + + +extern asmlinkage long sys_setpriority(int which, int who, int niceval); + +/* Note: it is necessary to treat which, who, and niceval as unsigned ints, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_setpriority(u32 which, u32 who, u32 niceval) +{ + return sys_setpriority((int)which, (int)who, (int)niceval); +} + + +extern asmlinkage long sys_ssetmask(int newmask); + +/* Note: it is necessary to treat newmask as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_ssetmask(u32 newmask) +{ + return sys_ssetmask((int) newmask); +} + + +extern asmlinkage long sys_swapon(const char * specialfile, int swap_flags); + +/* Note: it is necessary to treat swap_flags as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_swapon(const char * specialfile, u32 swap_flags) +{ + return sys_swapon(specialfile, (int)swap_flags); +} + + +extern asmlinkage long sys_syslog(int type, char * buf, int len); + +/* Note: it is necessary to treat type and len as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_syslog(u32 type, char * buf, u32 len) +{ + return sys_syslog((int)type, buf, (int)len); +} + + +extern asmlinkage long sys_umask(int mask); + +/* Note: it is necessary to treat mask as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_umask(u32 mask) +{ + return sys_umask((int)mask); +} + + +extern asmlinkage long sys_umount(char * name, int flags); + +/* Note: it is necessary to treat flags as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_umount(char * name, u32 flags) +{ + return sys_umount(name, (int)flags); +} + + +extern asmlinkage int sys_vfork(int p1, int p2, int p3, int p4, int p5, int p6, struct pt_regs *regs); + +/* Note: it is necessary to treat p1, p2, p3, p4, p5, and p6 as unsigned ints, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage int sys32_vfork(u32 p1, u32 p2, u32 p3, u32 p4, u32 p5, u32 p6, struct pt_regs *regs) +{ + return sys_vfork((int)p1, (int)p2, (int)p3, (int)p4, (int)p5, (int)p6, regs); +} + + +extern asmlinkage int sys_vm86(int a1, int a2, int a3, int a4); + +/* Note: it is necessary to treat a1, a2, a3, and a4 as unsigned ints, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage int sys32_vm86(u32 a1, u32 a2, u32 a3, u32 a4) +{ + return sys_vm86((int)a1, (int)a2, (int)a3, (int)a4); +} + + + + + +extern asmlinkage ssize_t sys_pread(unsigned int fd, char * buf, + size_t count, loff_t pos); + +extern asmlinkage ssize_t sys_pwrite(unsigned int fd, const char * buf, + size_t count, loff_t pos); + +typedef __kernel_ssize_t32 ssize_t32; + +asmlinkage ssize_t32 sys32_pread(unsigned int fd, char *ubuf, + __kernel_size_t32 count, u32 reg6, u32 poshi, u32 poslo) +{ + return sys_pread(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo)); +} + +asmlinkage ssize_t32 sys32_pwrite(unsigned int fd, char *ubuf, + __kernel_size_t32 count, u32 reg6 ,u32 poshi, u32 poslo) +{ + return sys_pwrite(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo)); +} + + +extern asmlinkage long sys_truncate(const char * path, unsigned long length); +extern asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length); + +asmlinkage int sys32_truncate64(const char * path, u32 reg4, unsigned long high, unsigned long low) +{ + if ((int)high < 0) + return -EINVAL; + else + return sys_truncate(path, (high << 32) | low); +} + +asmlinkage int sys32_ftruncate64(unsigned int fd, u32 reg4, unsigned long high, unsigned long low) +{ + if ((int)high < 0) + return -EINVAL; + else + return sys_ftruncate(fd, (high << 32) | low); +} + + + +asmlinkage long sys32_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + if (cmd >= F_GETLK64 && cmd <= F_SETLKW64) + return sys_fcntl(fd, cmd + F_GETLK - F_GETLK64, arg); + return sys32_fcntl(fd, cmd, arg); +} + + + + +struct __sysctl_args32 { + u32 name; + int nlen; + u32 oldval; + u32 oldlenp; + u32 newval; + u32 newlen; + u32 __unused[4]; +}; + +extern asmlinkage long sys32_sysctl(struct __sysctl_args32 *args) +{ + struct __sysctl_args32 tmp; + int error; + size_t oldlen, *oldlenp = NULL; + unsigned long addr = (((long)&args->__unused[0]) + 7) & ~7; + + if (copy_from_user(&tmp, args, sizeof(tmp))) + return -EFAULT; + + if (tmp.oldval && tmp.oldlenp) { + /* Duh, this is ugly and might not work if sysctl_args + is in read-only memory, but do_sysctl does indirectly + a lot of uaccess in both directions and we'd have to + basically copy the whole sysctl.c here, and + glibc's __sysctl uses rw memory for the structure + anyway. */ + if (get_user(oldlen, (u32 *)A(tmp.oldlenp)) || + put_user(oldlen, (size_t *)addr)) + return -EFAULT; + oldlenp = (size_t *)addr; + } + + lock_kernel(); + error = do_sysctl((int *)A(tmp.name), tmp.nlen, (void *)A(tmp.oldval), + oldlenp, (void *)A(tmp.newval), tmp.newlen); + unlock_kernel(); + if (oldlenp) { + if (!error) { + if (get_user(oldlen, (size_t *)addr) || + put_user(oldlen, (u32 *)A(tmp.oldlenp))) + error = -EFAULT; + } + copy_to_user(args->__unused, tmp.__unused, sizeof(tmp.__unused)); + } + return error; +} + +asmlinkage long sys32_time(__kernel_time_t32* tloc) +{ + __kernel_time_t32 secs; + __kernel_time_t32 usecs; + + long tb_delta = tb_ticks_since(tb_last_stamp); + tb_delta += (jiffies - wall_jiffies) * tb_ticks_per_jiffy; + + secs = xtime.tv_sec; + usecs = xtime.tv_usec + tb_delta / tb_ticks_per_usec; + while (usecs >= USEC_PER_SEC) { + ++secs; + usecs -= USEC_PER_SEC; + } + + if (tloc) { + if (put_user(secs,tloc)) + secs = -EFAULT; + } + + return secs; +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/syscalls.c linuxppc64_2_4/arch/ppc64/kernel/syscalls.c --- ../kernel.org/linux/arch/ppc64/kernel/syscalls.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/syscalls.c Tue Nov 20 17:02:58 2001 @@ -0,0 +1,316 @@ +/* + * linux/arch/ppc/kernel/sys_ppc.c + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Derived from "arch/i386/kernel/sys_i386.c" + * Adapted from the i386 version by Gary Thomas + * Modified by Cort Dougan (cort@cs.nmt.edu) + * and Paul Mackerras (paulus@cs.anu.edu.au). + * + * This file contains various random system calls that + * have a non-standard calling sequence on the Linux/PPC + * platform. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +extern unsigned long wall_jiffies; +#define USEC_PER_SEC (1000000) + +void +check_bugs(void) +{ +} + +asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int on) +{ + printk(KERN_ERR "sys_ioperm()\n"); + return -EIO; +} + +int sys_iopl(int a1, int a2, int a3, int a4) +{ + printk(KERN_ERR "sys_iopl(%x, %x, %x, %x)!\n", a1, a2, a3, a4); + return (-ENOSYS); +} + +int sys_vm86(int a1, int a2, int a3, int a4) +{ + printk(KERN_ERR "sys_vm86(%x, %x, %x, %x)!\n", a1, a2, a3, a4); + return (-ENOSYS); +} + +int sys_modify_ldt(int a1, int a2, int a3, int a4) +{ + printk(KERN_ERR "sys_modify_ldt(%x, %x, %x, %x)!\n", a1, a2, a3, a4); + return (-ENOSYS); +} + +/* + * sys_ipc() is the de-multiplexer for the SysV IPC calls.. + * + * This is really horribly ugly. + */ +asmlinkage int +sys_ipc (uint call, int first, int second, long third, void *ptr, long fifth) +{ + int version, ret; + + PPCDBG(PPCDBG_SYS64X, "sys_ipc - entered - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + + version = call >> 16; /* hack for backward compatibility */ + call &= 0xffff; + + ret = -EINVAL; + switch (call) { + case SEMOP: + ret = sys_semop (first, (struct sembuf *)ptr, second); + break; + case SEMGET: + ret = sys_semget (first, second, third); + break; + case SEMCTL: { + union semun fourth; + + if (!ptr) + break; + if ((ret = verify_area (VERIFY_READ, ptr, sizeof(long))) + || (ret = get_user(fourth.__pad, (void **)ptr))) + break; + ret = sys_semctl (first, second, third, fourth); + break; + } + case MSGSND: + ret = sys_msgsnd (first, (struct msgbuf *) ptr, second, third); + break; + case MSGRCV: + switch (version) { + case 0: { + struct ipc_kludge tmp; + + if (!ptr) + break; + if ((ret = verify_area (VERIFY_READ, ptr, sizeof(tmp))) + || (ret = copy_from_user(&tmp, + (struct ipc_kludge *) ptr, + sizeof (tmp)))) + break; + ret = sys_msgrcv (first, (struct msgbuf *)(unsigned long)tmp.msgp, + second, tmp.msgtyp, third); + break; + } + default: + ret = sys_msgrcv (first, (struct msgbuf *) ptr, + second, fifth, third); + break; + } + break; + case MSGGET: + ret = sys_msgget ((key_t) first, second); + break; + case MSGCTL: + ret = sys_msgctl (first, second, (struct msqid_ds *) ptr); + break; + case SHMAT: + switch (version) { + default: { + ulong raddr; + + if ((ret = verify_area(VERIFY_WRITE, (ulong*) third, + sizeof(ulong)))) + break; + ret = sys_shmat (first, (char *) ptr, second, &raddr); + if (ret) + break; + ret = put_user (raddr, (ulong *) third); + break; + } + case 1: /* iBCS2 emulator entry point */ + if (!segment_eq(get_fs(), get_ds())) + break; + ret = sys_shmat (first, (char *) ptr, second, + (ulong *) third); + break; + } + break; + case SHMDT: + ret = sys_shmdt ((char *)ptr); + break; + case SHMGET: + ret = sys_shmget (first, second, third); + break; + case SHMCTL: + ret = sys_shmctl (first, second, (struct shmid_ds *) ptr); + break; + } + + PPCDBG(PPCDBG_SYS64X, "sys_ipc - exited - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + return ret; +} + +/* + * sys_pipe() is the normal C calling standard for creating + * a pipe. It's not the way unix traditionally does this, though. + */ +asmlinkage int sys_pipe(int *fildes) +{ + int fd[2]; + int error; + + PPCDBG(PPCDBG_SYS64X, "sys_pipe - entered - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + + error = do_pipe(fd); + if (!error) { + if (copy_to_user(fildes, fd, 2*sizeof(int))) + error = -EFAULT; + } + + PPCDBG(PPCDBG_SYS64X, "sys_pipe - exited - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + return error; +} + +asmlinkage unsigned long sys_mmap(unsigned long addr, size_t len, + unsigned long prot, unsigned long flags, + unsigned long fd, off_t offset) +{ + struct file * file = NULL; + unsigned long ret = -EBADF; + + PPCDBG(PPCDBG_SYS64X, "sys_mmap - entered - addr=%lx, len=%lx - pid=%ld, comm=%s \n", addr, len, current->pid, current->comm); + + if (!(flags & MAP_ANONYMOUS)) { + if (!(file = fget(fd))) + goto out; + } + + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); + down_write(¤t->mm->mmap_sem); + ret = do_mmap(file, addr, len, prot, flags, offset); + up_write(¤t->mm->mmap_sem); + if (file) + fput(file); + +out: + + PPCDBG(PPCDBG_SYS64X, "sys_mmap - exited - ret=%x \n", ret); + + return ret; +} + +asmlinkage int sys_pause(void) +{ + + PPCDBG(PPCDBG_SYS64X, "sys_pause - entered - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + current->state = TASK_INTERRUPTIBLE; + schedule(); + + PPCDBG(PPCDBG_SYS64X, "sys_pause - exited - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + return -ERESTARTNOHAND; +} + +static int __init set_fakeppc(char *str) +{ + if (*str) + return 0; + init_task.personality = PER_LINUX32; + return 1; +} +__setup("fakeppc", set_fakeppc); + +asmlinkage int sys_uname(struct old_utsname * name) +{ + int err = -EFAULT; + + PPCDBG(PPCDBG_SYS64X, "sys_uname - entered - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + + down_read(&uts_sem); + if (name && !copy_to_user(name, &system_utsname, sizeof (*name))) + err = 0; + up_read(&uts_sem); + + PPCDBG(PPCDBG_SYS64X, "sys_uname - exited - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + return err; +} + +asmlinkage int sys_olduname(struct oldold_utsname * name) +{ + int error; + + PPCDBG(PPCDBG_SYS64X, "sys_olduname - entered - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + + if (!name) + return -EFAULT; + if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname))) + return -EFAULT; + + down_read(&uts_sem); + error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN); + error -= __put_user(0,name->sysname+__OLD_UTS_LEN); + error -= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); + error -= __put_user(0,name->nodename+__OLD_UTS_LEN); + error -= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN); + error -= __put_user(0,name->release+__OLD_UTS_LEN); + error -= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN); + error -= __put_user(0,name->version+__OLD_UTS_LEN); + error -= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN); + error = __put_user(0,name->machine+__OLD_UTS_LEN); + up_read(&uts_sem); + + error = error ? -EFAULT : 0; + + PPCDBG(PPCDBG_SYS64X, "sys_olduname - exited - pid=%ld current=%lx comm=%s \n", current->pid, current, current->comm); + return error; +} + +asmlinkage time_t sys64_time(time_t* tloc) +{ + time_t secs; + time_t usecs; + + long tb_delta = tb_ticks_since(tb_last_stamp); + tb_delta += (jiffies - wall_jiffies) * tb_ticks_per_jiffy; + + secs = xtime.tv_sec; + usecs = xtime.tv_usec + tb_delta / tb_ticks_per_usec; + while (usecs >= USEC_PER_SEC) { + ++secs; + usecs -= USEC_PER_SEC; + } + + if (tloc) { + if (put_user(secs,tloc)) + secs = -EFAULT; + } + + return secs; +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/time.c linuxppc64_2_4/arch/ppc64/kernel/time.c --- ../kernel.org/linux/arch/ppc64/kernel/time.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/time.c Sat Nov 24 06:48:48 2001 @@ -0,0 +1,576 @@ +/* + * + * Common time routines among all ppc machines. + * + * Written by Cort Dougan (cort@cs.nmt.edu) to merge + * Paul Mackerras' version and mine for PReP and Pmac. + * MPC8xx/MBX changes by Dan Malek (dmalek@jlc.net). + * Converted for 64-bit by Mike Corrigan (mikejc@us.ibm.com) + * + * First round of bugfixes by Gabriel Paubert (paubert@iram.es) + * to make clock more stable (2.4.0-test5). The only thing + * that this code assumes is that the timebases have been synchronized + * by firmware on SMP and are never stopped (never do sleep + * on SMP then, nap and doze are OK). + * + * Speeded up do_gettimeofday by getting rid of references to + * xtime (which required locks for consistency). (mikejc@us.ibm.com) + * + * TODO (not necessarily in this file): + * - improve precision and reproducibility of timebase frequency + * measurement at boot time. (for iSeries, we calibrate the timebase + * against the Titan chip's clock.) + * - for astronomical applications: add a new function to get + * non ambiguous timestamps even around leap seconds. This needs + * a new timestamp format and a good name. + * + * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 + * "A Kernel Model for Precision Timekeeping" by Dave Mills + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_PPC_ISERIES +#include +#endif + +#include +#include + +void smp_local_timer_interrupt(struct pt_regs *); + +/* keep track of when we need to update the rtc */ +time_t last_rtc_update; +extern rwlock_t xtime_lock; +extern int piranha_simulator; +#ifdef CONFIG_PPC_ISERIES +unsigned long iSeries_recal_titan = 0; +unsigned long iSeries_recal_tb = 0; +static unsigned long first_settimeofday = 1; +#endif + +#define XSEC_PER_SEC (1024*1024) +#define USEC_PER_SEC (1000000) + +unsigned long tb_ticks_per_jiffy; +unsigned long tb_ticks_per_usec; +unsigned long tb_ticks_per_sec; +unsigned long tb_to_xs; +unsigned tb_to_us; +spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED; + +struct gettimeofday_struct do_gtod; + +extern unsigned long wall_jiffies; +extern unsigned long lpEvent_count; +extern int smp_tb_synchronized; + +static long time_offset = 0; + +extern unsigned long prof_cpu_mask; +extern unsigned int * prof_buffer; +extern unsigned long prof_len; +extern unsigned long prof_shift; +extern char _stext; + +static inline void ppc_do_profile (unsigned long nip) +{ + if (!prof_buffer) + return; + + /* + * Only measure the CPUs specified by /proc/irq/prof_cpu_mask. + * (default is all CPUs.) + */ + if (!((1<>= prof_shift; + /* + * Don't ignore out-of-bounds EIP values silently, + * put them into the last histogram slot, so if + * present, they will show up as a sharp peak. + */ + if (nip > prof_len-1) + nip = prof_len-1; + atomic_inc((atomic_t *)&prof_buffer[nip]); +} + + +static __inline__ void timer_check_rtc(void) +{ + /* + * update the rtc when needed, this should be performed on the + * right fraction of a second. Half or full second ? + * Full second works on mk48t59 clocks, others need testing. + * Note that this update is basically only used through + * the adjtimex system calls. Setting the HW clock in + * any other way is a /dev/rtc and userland business. + * This is still wrong by -0.5/+1.5 jiffies because of the + * timer interrupt resolution and possible delay, but here we + * hit a quantization limit which can only be solved by higher + * resolution timers and decoupling time management from timer + * interrupts. This is also wrong on the clocks + * which require being written at the half second boundary. + * We should have an rtc call that only sets the minutes and + * seconds like on Intel to avoid problems with non UTC clocks. + */ + if ( (time_status & STA_UNSYNC) == 0 && + xtime.tv_sec - last_rtc_update >= 659 && + abs(xtime.tv_usec - (1000000-1000000/HZ)) < 500000/HZ && + jiffies - wall_jiffies == 1) { + struct rtc_time tm; + to_tm(xtime.tv_sec+1 + time_offset, &tm); + tm.tm_year -= 1900; + tm.tm_mon -= 1; + if (ppc_md.set_rtc_time(&tm) == 0) + last_rtc_update = xtime.tv_sec+1; + else + /* Try again one minute later */ + last_rtc_update += 60; + } +} + +#ifdef CONFIG_PPC_ISERIES + +/* + * This function recalibrates the timebase based on the 49-bit time-of-day value in the Titan chip. + * The Titan is much more accurate than the value returned by the service processor for the + * timebase frequency. + */ + +static void iSeries_tb_recal(void) +{ + struct div_result divres; + unsigned long titan, tb; + tb = get_tb(); + titan = HvCallXm_loadTod(); + if ( iSeries_recal_titan ) { + unsigned long tb_ticks = tb - iSeries_recal_tb; + unsigned long titan_usec = (titan - iSeries_recal_titan) >> 12; + unsigned long new_tb_ticks_per_sec = (tb_ticks * USEC_PER_SEC)/titan_usec; + unsigned long new_tb_ticks_per_jiffy = (new_tb_ticks_per_sec+(HZ/2))/HZ; + long tick_diff = new_tb_ticks_per_jiffy - tb_ticks_per_jiffy; + char sign = '+'; + /* make sure tb_ticks_per_sec and tb_ticks_per_jiffy are consistent */ + new_tb_ticks_per_sec = new_tb_ticks_per_jiffy * HZ; + + if ( tick_diff < 0 ) { + tick_diff = -tick_diff; + sign = '-'; + } + if ( tick_diff ) { + if ( tick_diff < tb_ticks_per_jiffy/25 ) { + printk( "Titan recalibrate: new tb_ticks_per_jiffy = %lu (%c%ld)\n", + new_tb_ticks_per_jiffy, sign, tick_diff ); + tb_ticks_per_jiffy = new_tb_ticks_per_jiffy; + tb_ticks_per_sec = new_tb_ticks_per_sec; + div128_by_32( XSEC_PER_SEC, 0, tb_ticks_per_sec, &divres ); + do_gtod.tb_ticks_per_sec = tb_ticks_per_sec; + tb_to_xs = divres.result_low; + do_gtod.tb_to_xs = tb_to_xs; + } + else { + printk( "Titan recalibrate: FAILED (difference > 4 percent)\n" + " new tb_ticks_per_jiffy = %lu\n" + " old tb_ticks_per_jiffy = %lu\n", + new_tb_ticks_per_jiffy, tb_ticks_per_jiffy ); + } + } + } + iSeries_recal_titan = titan; + iSeries_recal_tb = tb; +} +#endif + +/* + * For iSeries shared processors, we have to let the hypervisor + * set the hardware decrementer. We set a virtual decrementer + * in the ItLpPaca and call the hypervisor if the virtual + * decrementer is less than the current value in the hardware + * decrementer. (almost always the new decrementer value will + * be greater than the current hardware decementer so the hypervisor + * call will not be needed) + */ + +unsigned long tb_last_stamp=0; + +/* + * timer_interrupt - gets called when the decrementer overflows, + * with interrupts disabled. + */ +int timer_interrupt(struct pt_regs * regs) +{ + int next_dec; + unsigned long cur_tb; + struct Paca * paca = (struct Paca *)mfspr(SPRG3); + unsigned long cpu = paca->xPacaIndex; + struct ItLpQueue * lpq; + + irq_enter(cpu); + +#ifndef CONFIG_PPC_ISERIES + if (!user_mode(regs)) + ppc_do_profile(instruction_pointer(regs)); +#endif + + paca->xLpPaca.xIntDword.xFields.xDecrInt = 0; + + while (paca->next_jiffy_update_tb <= (cur_tb = get_tb())) { + +#ifdef CONFIG_SMP + smp_local_timer_interrupt(regs); +#endif + if (cpu == 0) { + write_lock(&xtime_lock); + tb_last_stamp = paca->next_jiffy_update_tb; + do_timer(regs); + timer_check_rtc(); + write_unlock(&xtime_lock); + } + paca->next_jiffy_update_tb += tb_ticks_per_jiffy; + } + + next_dec = paca->next_jiffy_update_tb - cur_tb; + if (next_dec > paca->default_decr) + next_dec = paca->default_decr; + set_dec(next_dec); + + lpq = paca->lpQueuePtr; + if (lpq && ItLpQueue_isLpIntPending(lpq)) + lpEvent_count += ItLpQueue_process(lpq, regs); + + irq_exit(cpu); + + if (softirq_pending(cpu)) + do_softirq(); + + return 1; +} + +/* + * This version of gettimeofday has microsecond resolution. + */ +void do_gettimeofday(struct timeval *tv) +{ + unsigned long sec, usec, tb_ticks; + unsigned long xsec, tb_xsec; + + /* These calculations are faster (gets rid of divides) + * if done in units of 1/2^20 rather than microseconds. + * The conversion to microseconds at the end is done + * without a divide (and in fact, without a multiply) */ + tb_ticks = get_tb() - do_gtod.tb_orig_stamp; + tb_xsec = mulhdu( tb_ticks, do_gtod.tb_to_xs ); + xsec = do_gtod.stamp_xsec + tb_xsec; + sec = xsec / XSEC_PER_SEC; + xsec -= sec * XSEC_PER_SEC; + usec = (xsec * USEC_PER_SEC)/XSEC_PER_SEC; + + tv->tv_sec = sec; + tv->tv_usec = usec; +} + +void do_settimeofday(struct timeval *tv) +{ + unsigned long flags; + unsigned long delta_xsec; + long int tb_delta, new_usec, new_sec; + unsigned long new_xsec; + + write_lock_irqsave(&xtime_lock, flags); + /* Updating the RTC is not the job of this code. If the time is + * stepped under NTP, the RTC will be update after STA_UNSYNC + * is cleared. Tool like clock/hwclock either copy the RTC + * to the system time, in which case there is no point in writing + * to the RTC again, or write to the RTC but then they don't call + * settimeofday to perform this operation. + */ +#ifdef CONFIG_PPC_ISERIES + if ( first_settimeofday ) { + iSeries_tb_recal(); + first_settimeofday = 0; + } +#endif + tb_delta = tb_ticks_since(tb_last_stamp); + tb_delta += (jiffies - wall_jiffies) * tb_ticks_per_jiffy; + + new_sec = tv->tv_sec; + new_usec = tv->tv_usec - tb_delta / tb_ticks_per_usec; + while (new_usec <0) { + new_sec--; + new_usec += USEC_PER_SEC; + } + xtime.tv_usec = new_usec; + xtime.tv_sec = new_sec; + + /* In case of a large backwards jump in time with NTP, we want the + * clock to be updated as soon as the PLL is again in lock. + */ + last_rtc_update = new_sec - 658; + + time_adjust = 0; /* stop active adjtime() */ + time_status |= STA_UNSYNC; + time_state = TIME_ERROR; /* p. 24, (a) */ + time_maxerror = NTP_PHASE_LIMIT; + time_esterror = NTP_PHASE_LIMIT; + + delta_xsec = mulhdu( (tb_last_stamp-do_gtod.tb_orig_stamp), do_gtod.tb_to_xs ); + new_xsec = (new_usec * XSEC_PER_SEC) / USEC_PER_SEC; + new_xsec += new_sec * XSEC_PER_SEC; + if ( new_xsec > delta_xsec ) { + do_gtod.stamp_xsec = new_xsec - delta_xsec; + } + else { + /* This is only for the case where the user is setting the time + * way back to a time such that the boot time would have been + * before 1970 ... eg. we booted ten days ago, and we are setting + * the time to Jan 5, 1970 */ + do_gtod.stamp_xsec = new_xsec; + do_gtod.tb_orig_stamp = tb_last_stamp; + } + + write_unlock_irqrestore(&xtime_lock, flags); +} + +/* + * This function is a copy of the architecture independent function + * but which calls do_settimeofday rather than setting the xtime + * fields itself. This way, the fields which are used for + * do_settimeofday get updated too. + */ + +long ppc64_sys_stime(int * tptr) +{ + int value; + struct timeval myTimeval; + + PPCDBG(PPCDBG_SYS32, "ppc64_sys_stime - entered - tptr=%p, *tptr=0x%x \n", tptr, *tptr); + + if (!capable(CAP_SYS_TIME)) + return -EPERM; + + if (get_user(value, tptr)) + return -EFAULT; + + myTimeval.tv_sec = value; + myTimeval.tv_usec = 0; + + do_settimeofday(&myTimeval); + + PPCDBG(PPCDBG_SYS32, "ppc64_sys_stime - exiting w/ 0 \n"); + return 0; +} + +void __init time_init(void) +{ + /* This function is only called on the boot processor */ + unsigned long flags; + struct rtc_time tm; + + ppc_md.calibrate_decr(); + + if ( ! piranha_simulator ) { + ppc_md.get_boot_time(&tm); + } + write_lock_irqsave(&xtime_lock, flags); + xtime.tv_sec = mktime(tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec); + tb_last_stamp = get_tb(); + do_gtod.tb_orig_stamp = tb_last_stamp; + do_gtod.stamp_xsec = xtime.tv_sec * XSEC_PER_SEC; + do_gtod.tb_ticks_per_sec = tb_ticks_per_sec; + do_gtod.tb_to_xs = tb_to_xs; + do_gtod.tb_to_us = tb_to_us; + + xtime.tv_usec = 0; + last_rtc_update = xtime.tv_sec; + write_unlock_irqrestore(&xtime_lock, flags); + +#ifdef CONFIG_PPC_ISERIES + /* HACK HACK This allows the iSeries profiling to use /proc/profile */ + prof_shift = 0; +#endif + + /* Not exact, but the timer interrupt takes care of this */ + set_dec(tb_ticks_per_jiffy); + + do_get_fast_time = do_gettimeofday; +} + +#define TICK_SIZE tick +#define FEBRUARY 2 +#define STARTOFTIME 1970 +#define SECDAY 86400L +#define SECYR (SECDAY * 365) +#define leapyear(year) ((year) % 4 == 0) +#define days_in_year(a) (leapyear(a) ? 366 : 365) +#define days_in_month(a) (month_days[(a) - 1]) + +static int month_days[12] = { + 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 +}; + +/* + * This only works for the Gregorian calendar - i.e. after 1752 (in the UK) + */ +void GregorianDay(struct rtc_time * tm) +{ + int leapsToDate; + int lastYear; + int day; + int MonthOffset[] = { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 }; + + lastYear=tm->tm_year-1; + + /* + * Number of leap corrections to apply up to end of last year + */ + leapsToDate = lastYear/4 - lastYear/100 + lastYear/400; + + /* + * This year is a leap year if it is divisible by 4 except when it is + * divisible by 100 unless it is divisible by 400 + * + * e.g. 1904 was a leap year, 1900 was not, 1996 is, and 2000 will be + */ + if((tm->tm_year%4==0) && + ((tm->tm_year%100!=0) || (tm->tm_year%400==0)) && + (tm->tm_mon>2)) + { + /* + * We are past Feb. 29 in a leap year + */ + day=1; + } + else + { + day=0; + } + + day += lastYear*365 + leapsToDate + MonthOffset[tm->tm_mon-1] + + tm->tm_mday; + + tm->tm_wday=day%7; +} + +void to_tm(int tim, struct rtc_time * tm) +{ + register int i; + register long hms, day; + + day = tim / SECDAY; + hms = tim % SECDAY; + + /* Hours, minutes, seconds are easy */ + tm->tm_hour = hms / 3600; + tm->tm_min = (hms % 3600) / 60; + tm->tm_sec = (hms % 3600) % 60; + + /* Number of years in days */ + for (i = STARTOFTIME; day >= days_in_year(i); i++) + day -= days_in_year(i); + tm->tm_year = i; + + /* Number of months in days left */ + if (leapyear(tm->tm_year)) + days_in_month(FEBRUARY) = 29; + for (i = 1; day >= days_in_month(i); i++) + day -= days_in_month(i); + days_in_month(FEBRUARY) = 28; + tm->tm_mon = i; + + /* Days are what is left over (+1) from all that. */ + tm->tm_mday = day + 1; + + /* + * Determine the day of week + */ + GregorianDay(tm); +} + +/* Auxiliary function to compute scaling factors */ +/* Actually the choice of a timebase running at 1/4 the of the bus + * frequency giving resolution of a few tens of nanoseconds is quite nice. + * It makes this computation very precise (27-28 bits typically) which + * is optimistic considering the stability of most processor clock + * oscillators and the precision with which the timebase frequency + * is measured but does not harm. + */ +unsigned mulhwu_scale_factor(unsigned inscale, unsigned outscale) { + unsigned mlt=0, tmp, err; + /* No concern for performance, it's done once: use a stupid + * but safe and compact method to find the multiplier. + */ + + for (tmp = 1U<<31; tmp != 0; tmp >>= 1) { + if (mulhwu(inscale, mlt|tmp) < outscale) mlt|=tmp; + } + + /* We might still be off by 1 for the best approximation. + * A side effect of this is that if outscale is too large + * the returned value will be zero. + * Many corner cases have been checked and seem to work, + * some might have been forgotten in the test however. + */ + + err = inscale*(mlt+1); + if (err <= inscale/2) mlt++; + return mlt; + } + +/* + * Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit + * result. + */ + +void div128_by_32( unsigned long dividend_high, unsigned long dividend_low, + unsigned divisor, struct div_result *dr ) +{ + unsigned long a,b,c,d, w,x,y,z, ra,rb,rc; + + a = dividend_high >> 32; + b = dividend_high & 0xffffffff; + c = dividend_low >> 32; + d = dividend_low & 0xffffffff; + + w = a/divisor; + ra = (a - (w * divisor)) << 32; + + x = (ra + b)/divisor; + rb = ((ra + b) - (x * divisor)) << 32; + + y = (rb + c)/divisor; + rc = ((rb + b) - (y * divisor)) << 32; + + z = (rc + d)/divisor; + + dr->result_high = (w << 32) + x; + dr->result_low = (y << 32) + z; + +} + diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/traps.c linuxppc64_2_4/arch/ppc64/kernel/traps.c --- ../kernel.org/linux/arch/ppc64/kernel/traps.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/traps.c Fri Sep 21 16:01:03 2001 @@ -0,0 +1,281 @@ +/* + * linux/arch/ppc/kernel/traps.c + * + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Modified by Cort Dougan (cort@cs.nmt.edu) + * and Paul Mackerras (paulus@cs.anu.edu.au) + */ + +/* + * This file handles the architecture-dependent parts of hardware exceptions + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_KDB +#include +#endif + +#include +#include +#include +#include +#include +#include + +extern int fix_alignment(struct pt_regs *); +extern void bad_page_fault(struct pt_regs *, unsigned long); + +#ifdef CONFIG_XMON +extern void xmon(struct pt_regs *regs); +extern int xmon_bpt(struct pt_regs *regs); +extern int xmon_sstep(struct pt_regs *regs); +extern int xmon_iabr_match(struct pt_regs *regs); +extern int xmon_dabr_match(struct pt_regs *regs); +extern void (*xmon_fault_handler)(struct pt_regs *regs); +#endif + +#ifdef CONFIG_XMON +void (*debugger)(struct pt_regs *regs) = xmon; +int (*debugger_bpt)(struct pt_regs *regs) = xmon_bpt; +int (*debugger_sstep)(struct pt_regs *regs) = xmon_sstep; +int (*debugger_iabr_match)(struct pt_regs *regs) = xmon_iabr_match; +int (*debugger_dabr_match)(struct pt_regs *regs) = xmon_dabr_match; +void (*debugger_fault_handler)(struct pt_regs *regs); +#else +#ifdef CONFIG_KGDB +void (*debugger)(struct pt_regs *regs); +int (*debugger_bpt)(struct pt_regs *regs); +int (*debugger_sstep)(struct pt_regs *regs); +int (*debugger_iabr_match)(struct pt_regs *regs); +int (*debugger_dabr_match)(struct pt_regs *regs); +void (*debugger_fault_handler)(struct pt_regs *regs); +#endif +#endif +/* + * Trap & Exception support + */ + +void +_exception(int signr, struct pt_regs *regs) +{ + if (!user_mode(regs)) + { + show_regs(regs); +#if defined(CONFIG_XMON) || defined(CONFIG_KGDB) + debugger(regs); +#endif +#if defined(CONFIG_KDB) + kdb(KDB_REASON_OOPS, 0, (kdb_eframe_t) regs); +#endif + print_backtrace((unsigned long *)regs->gpr[1]); + panic("Exception in kernel pc %lx signal %d",regs->nip,signr); +#if defined(CONFIG_PPCDBG) && (defined(CONFIG_XMON) || defined(CONFIG_KGDB)) + /* Allow us to catch SIGILLs for 64-bit app/glibc debugging. -Peter */ + } else if (signr == SIGILL) { + ifppcdebug(PPCDBG_SIGNALXMON) + debugger(regs); +#endif + } + force_sig(signr, current); +} + +void +SystemResetException(struct pt_regs *regs) +{ + udbg_printf("System Reset in kernel mode.\n"); + printk("System Reset in kernel mode.\n"); +#if defined(CONFIG_XMON) + xmon(regs); +#endif + for(;;); +} + + +void +MachineCheckException(struct pt_regs *regs) +{ + if ( !user_mode(regs) ) + { +#if defined(CONFIG_XMON) || defined(CONFIG_KGDB) + if (debugger_fault_handler) { + debugger_fault_handler(regs); + return; + } +#endif +#ifdef CONFIG_KDB + if (kdb(KDB_REASON_FAULT, 0, regs)) + return; +#endif + printk("Machine check in kernel mode.\n"); + printk("Caused by (from SRR1=%lx): ", regs->msr); + show_regs(regs); +#if defined(CONFIG_XMON) || defined(CONFIG_KGDB) + debugger(regs); +#endif +#ifdef CONFIG_KDB + if (kdb(KDB_REASON_FAULT, 0, regs)) + return ; +#endif + print_backtrace((unsigned long *)regs->gpr[1]); + panic("machine check"); + } + _exception(SIGSEGV, regs); +} + +void +SMIException(struct pt_regs *regs) +{ +#if defined(CONFIG_XMON) || defined(CONFIG_KGDB) + { + debugger(regs); + return; + } +#endif +#ifdef CONFIG_KDB + { + kdb(KDB_REASON_OOPS, 0, regs); + return; + } +#endif + show_regs(regs); + print_backtrace((unsigned long *)regs->gpr[1]); + panic("System Management Interrupt"); +} + +void +UnknownException(struct pt_regs *regs) +{ + printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", + regs->nip, regs->msr, regs->trap); + _exception(SIGTRAP, regs); +} + +void +InstructionBreakpointException(struct pt_regs *regs) +{ +#if defined(CONFIG_XMON) || defined(CONFIG_KGDB) + if (debugger_iabr_match(regs)) + return; +#endif +#ifdef CONFIG_KDB + if (kdb(KDB_REASON_BREAK, 0, regs)) + return ; +#endif + _exception(SIGTRAP, regs); +} + +void +ProgramCheckException(struct pt_regs *regs) +{ + if (regs->msr & 0x100000) { + /* IEEE FP exception */ + _exception(SIGFPE, regs); + } else if (regs->msr & 0x20000) { + /* trap exception */ +#if defined(CONFIG_XMON) || defined(CONFIG_KGDB) + if (debugger_bpt(regs)) + return; +#endif +#ifdef CONFIG_KDB + if (kdb(KDB_REASON_BREAK, 0, regs)) + return; +#endif + _exception(SIGTRAP, regs); + } else { + _exception(SIGILL, regs); + } +} + +void +SingleStepException(struct pt_regs *regs) +{ + regs->msr &= ~MSR_SE; /* Turn off 'trace' bit */ +#if defined(CONFIG_XMON) || defined(CONFIG_KGDB) + if (debugger_sstep(regs)) + return; +#endif +#ifdef CONFIG_KDB + if (kdb(KDB_REASON_DEBUG, 0, regs)) + return; +#endif + _exception(SIGTRAP, regs); +} + +/* Dummy handler for Performance Monitor */ + +void +PerformanceMonitorException(struct pt_regs *regs) +{ + _exception(SIGTRAP, regs); +} + +void +AlignmentException(struct pt_regs *regs) +{ + int fixed; + + fixed = fix_alignment(regs); + if (fixed == 1) { + ifppcdebug(PPCDBG_ALIGNFIXUP) + if (!user_mode(regs)) + PPCDBG(PPCDBG_ALIGNFIXUP, "fix alignment at %lx\n", regs->nip); + regs->nip += 4; /* skip over emulated instruction */ + return; + } + if (fixed == -EFAULT) { + /* fixed == -EFAULT means the operand address was bad */ + if (user_mode(regs)) + force_sig(SIGSEGV, current); + else + bad_page_fault(regs, regs->dar); + return; + } + _exception(SIGBUS, regs); +} + +void +StackOverflow(struct pt_regs *regs) +{ + printk(KERN_CRIT "Kernel stack overflow in process %p, r1=%lx\n", + current, regs->gpr[1]); +#if defined(CONFIG_XMON) || defined(CONFIG_KGDB) + debugger(regs); +#endif +#if defined(CONFIG_KDB) + kdb(KDB_REASON_OOPS, 0, regs); +#endif + show_regs(regs); + print_backtrace((unsigned long *)regs->gpr[1]); + panic("kernel stack overflow"); +} + +void +trace_syscall(struct pt_regs *regs) +{ + printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld\n", + current, current->pid, regs->nip, regs->link, regs->gpr[0], + regs->ccr&0x10000000?"Error=":"", regs->gpr[3]); +} + +void __init trap_init(void) +{ +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/udbg.c linuxppc64_2_4/arch/ppc64/kernel/udbg.c --- ../kernel.org/linux/arch/ppc64/kernel/udbg.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/udbg.c Mon Nov 19 21:43:25 2001 @@ -0,0 +1,242 @@ +/* + * NS16550 Serial Port (uart) debugging stuff. + * + * c 2001 PPC 64 Team, IBM Corp + * + * NOTE: I am trying to make this code avoid any static data references to + * simplify debugging early boot. We'll see how that goes... + * + * To use this call udbg_init() first. It will init the uart to 9600 8N1. + * You may need to update the COM1 define if your uart is at a different addr. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#define WANT_PPCDBG_TAB /* Only defined here */ +#include +#include +#include + +extern struct Naca *naca; +extern int _machine; + +struct NS16550 { + /* this struct must be packed */ + unsigned char rbr; /* 0 */ + unsigned char ier; /* 1 */ + unsigned char fcr; /* 2 */ + unsigned char lcr; /* 3 */ + unsigned char mcr; /* 4 */ + unsigned char lsr; /* 5 */ + unsigned char msr; /* 6 */ + unsigned char scr; /* 7 */ +}; + +#define thr rbr +#define iir fcr +#define dll rbr +#define dlm ier +#define dlab lcr + +#define LSR_DR 0x01 /* Data ready */ +#define LSR_OE 0x02 /* Overrun */ +#define LSR_PE 0x04 /* Parity error */ +#define LSR_FE 0x08 /* Framing error */ +#define LSR_BI 0x10 /* Break */ +#define LSR_THRE 0x20 /* Xmit holding register empty */ +#define LSR_TEMT 0x40 /* Xmitter empty */ +#define LSR_ERR 0x80 /* Error */ + +volatile struct NS16550 *udbg_comport; + +void +udbg_init_uart(void *comport) +{ + if (comport) { + udbg_comport = (struct NS16550 *)comport; + udbg_comport->lcr = 0x00; eieio(); + udbg_comport->ier = 0xFF; eieio(); + udbg_comport->ier = 0x00; eieio(); + udbg_comport->lcr = 0x80; eieio(); /* Access baud rate */ + udbg_comport->dll = 12; eieio(); /* 1 = 115200, 2 = 57600, 3 = 38400, 12 = 9600 baud */ + udbg_comport->dlm = 0; eieio(); /* dll >> 8 which should be zero for fast rates; */ + udbg_comport->lcr = 0x03; eieio(); /* 8 data, 1 stop, no parity */ + udbg_comport->mcr = 0x03; eieio(); /* RTS/DTR */ + udbg_comport->fcr = 0x07; eieio(); /* Clear & enable FIFOs */ + } +} + +void +udbg_putc(unsigned char c) +{ + if ( udbg_comport ) { + while ((udbg_comport->lsr & LSR_THRE) == 0) + /* wait for idle */; + udbg_comport->thr = c; eieio(); + if (c == '\n') { + /* Also put a CR. This is for convenience. */ + while ((udbg_comport->lsr & LSR_THRE) == 0) + /* wait for idle */; + udbg_comport->thr = '\r'; eieio(); + } + } else if ( _machine == _MACH_iSeries ) { + /* ToDo: switch this via ppc_md */ + printk("%c", c); + } +} + +int udbg_getc_poll(void) +{ + if (udbg_comport) { + if ((udbg_comport->lsr & LSR_DR) != 0) + return udbg_comport->rbr; + else + return -1; + } + return -1; +} + +unsigned char +udbg_getc(void) +{ + if ( udbg_comport ) { + while ((udbg_comport->lsr & LSR_DR) == 0) + /* wait for char */; + return udbg_comport->rbr; + } + return 0; +} + +void +udbg_puts(const char *s) +{ + if (ppc_md.udbg_putc) { + char c; + + if (s && *s != '\0') { + while ((c = *s++) != '\0') + ppc_md.udbg_putc(c); + } else { + udbg_puts("NULL"); + } + } else { + printk("%s", s); + } +} + +int +udbg_write(const char *s, int n) +{ + int remain = n; + char c; + if (!ppc_md.udbg_putc) + for (;;); /* stop here for cpuctl */ + if ( s && *s != '\0' ) { + while ( (( c = *s++ ) != '\0') && (remain-- > 0)) { + ppc_md.udbg_putc(c); + } + } else + udbg_puts("NULL"); + return n - remain; +} + +int +udbg_read(char *buf, int buflen) { + char c, *p = buf; + int i; + if (!ppc_md.udbg_putc) + for (;;); /* stop here for cpuctl */ + for (i = 0; i < buflen; ++i) { + do { + c = ppc_md.udbg_getc(); + } while (c == 0x11 || c == 0x13); + *p++ = c; + } + return i; +} + +void +udbg_puthex(unsigned long val) +{ + int i, nibbles = sizeof(val)*2; + unsigned char buf[sizeof(val)*2+1]; + for (i = nibbles-1; i >= 0; i--) { + buf[i] = (val & 0xf) + '0'; + if (buf[i] > '9') + buf[i] += ('a'-'0'-10); + val >>= 4; + } + buf[nibbles] = '\0'; + udbg_puts(buf); +} + +void +udbg_printSP(const char *s) +{ + if (_machine == _MACH_pSeries) { + unsigned long sp; + asm("mr %0,1" : "=r" (sp) :); + if (s) + udbg_puts(s); + udbg_puthex(sp); + } +} + +void +udbg_printf(const char *fmt, ...) +{ + unsigned char buf[256]; + + va_list args; + va_start(args, fmt); + + vsprintf(buf, fmt, args); + udbg_puts(buf); + + va_end(args); +} + +/* Special print used by PPCDBG() macro */ +void +udbg_ppcdbg(unsigned long flags, const char *fmt, ...) +{ + unsigned long active_debugs = flags & naca->debug_switch; + if ( active_debugs ) { + va_list ap; + unsigned char buf[256]; + unsigned long i, len = 0; + for(i=0; i < PPCDBG_NUM_FLAGS ;i++) { + if (((1U << i) & active_debugs) && + trace_names[i]) { + len += strlen(trace_names[i]); + udbg_puts(trace_names[i]); + break; + } + } + sprintf(buf, " [%s]: ", current->comm); + len += strlen(buf); + udbg_puts(buf); + + while(len < 18) { + udbg_puts(" "); + len++; + } + + va_start(ap, fmt); + vsprintf(buf, fmt, ap); + udbg_puts(buf); + va_end(ap); + } +} + +unsigned long +udbg_ifdebug(unsigned long flags) +{ + return (flags & naca->debug_switch); +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/xics.c linuxppc64_2_4/arch/ppc64/kernel/xics.c --- ../kernel.org/linux/arch/ppc64/kernel/xics.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/xics.c Wed Nov 7 13:05:40 2001 @@ -0,0 +1,469 @@ +/* + * arch/ppc/kernel/xics.c + * + * Copyright 2000 IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "i8259.h" +#include "xics.h" +#include + +extern struct Naca *naca; + +void xics_enable_irq(u_int irq); +void xics_disable_irq(u_int irq); +void xics_mask_and_ack_irq(u_int irq); +void xics_end_irq(u_int irq); +void xics_set_affinity(unsigned int irq_nr, unsigned long cpumask); + +struct hw_interrupt_type xics_pic = { + " XICS ", + NULL, + NULL, + xics_enable_irq, + xics_disable_irq, + xics_mask_and_ack_irq, + xics_end_irq, + xics_set_affinity +}; + +struct hw_interrupt_type xics_8259_pic = { + " XICS/8259", + NULL, + NULL, + NULL, + NULL, + xics_mask_and_ack_irq, + NULL +}; + +#define XICS_IPI 2 +#define XICS_IRQ_OFFSET 0x10 +#define XICS_IRQ_SPURIOUS 0 + +/* Want a priority other than 0. Various HW issues require this. */ +#define DEFAULT_PRIORITY 5 + +struct xics_ipl { + union { + u32 word; + u8 bytes[4]; + } xirr_poll; + union { + u32 word; + u8 bytes[4]; + } xirr; + u32 dummy; + union { + u32 word; + u8 bytes[4]; + } qirr; +}; + +struct xics_info { + volatile struct xics_ipl * per_cpu[NR_CPUS]; +}; + +struct xics_info xics_info; + +unsigned long long intr_base = 0; +int xics_irq_8259_cascade = 0; +int xics_irq_8259_cascade_real = 0; +unsigned int default_server = 0; +unsigned int default_distrib_server = 0; + +/* RTAS service tokens */ +int ibm_get_xive; +int ibm_set_xive; +int ibm_int_off; + +struct xics_interrupt_node { + unsigned long long addr; + unsigned long long size; +} inodes[NR_CPUS*2]; + +typedef struct { + int (*xirr_info_get)(int cpu); + void (*xirr_info_set)(int cpu, int val); + void (*cppr_info)(int cpu, u8 val); + void (*qirr_info)(int cpu, u8 val); +} xics_ops; + + +static int pSeries_xirr_info_get(int n_cpu) +{ + return (xics_info.per_cpu[n_cpu]->xirr.word); +} + +static void pSeries_xirr_info_set(int n_cpu, int value) +{ + xics_info.per_cpu[n_cpu]->xirr.word = value; +} + +static void pSeries_cppr_info(int n_cpu, u8 value) +{ + xics_info.per_cpu[n_cpu]->xirr.bytes[0] = value; +} + +static void pSeries_qirr_info(int n_cpu , u8 value) +{ + xics_info.per_cpu[n_cpu]->qirr.bytes[0] = value; +} + +static xics_ops pSeries_ops = { + pSeries_xirr_info_get, + pSeries_xirr_info_set, + pSeries_cppr_info, + pSeries_qirr_info +}; + +static xics_ops *ops = &pSeries_ops; +extern xics_ops pSeriesLP_ops; + + +void +xics_enable_irq( + u_int virq + ) +{ + u_int irq; + unsigned long status; + long call_status; + + virq -= XICS_IRQ_OFFSET; + irq = virt_irq_to_real(virq); + if (irq == XICS_IPI) + return; +#ifdef CONFIG_IRQ_ALL_CPUS + call_status = rtas_call(ibm_set_xive, 3, 1, (unsigned long*)&status, + irq, smp_threads_ready ? default_distrib_server : default_server, DEFAULT_PRIORITY); +#else + call_status = rtas_call(ibm_set_xive, 3, 1, (unsigned long*)&status, + irq, default_server, DEFAULT_PRIORITY); +#endif + if( call_status != 0 ) { + printk("xics_enable_irq: irq=%x: rtas_call failed; retn=%lx, status=%lx\n", + irq, call_status, status); + return; + } +} + +void +xics_disable_irq( + u_int virq + ) +{ + u_int irq; + unsigned long status; + long call_status; + + virq -= XICS_IRQ_OFFSET; + irq = virt_irq_to_real(virq); + call_status = rtas_call(ibm_int_off, 1, 1, (unsigned long*)&status, + irq); + if( call_status != 0 ) { + printk("xics_disable_irq: irq=%x: rtas_call failed, retn=%lx\n", + irq, call_status); + return; + } +} + +void +xics_end_irq( + u_int irq + ) +{ + int cpu = smp_processor_id(); + + ops->cppr_info(cpu, 0); /* actually the value overwritten by ack */ + iosync(); + ops->xirr_info_set(cpu, ((0xff<<24) | (virt_irq_to_real(irq-XICS_IRQ_OFFSET)))); + iosync(); +} + +void +xics_mask_and_ack_irq( + u_int irq + ) +{ + int cpu = smp_processor_id(); + + if( irq < XICS_IRQ_OFFSET ) { + i8259_pic.ack(irq); + iosync(); + ops->xirr_info_set(cpu, ((0xff<<24) | xics_irq_8259_cascade_real)); + iosync(); + } + else { + ops->cppr_info(cpu, 0xff); + iosync(); + } +} + +int +xics_get_irq(struct pt_regs *regs) +{ + u_int cpu = smp_processor_id(); + u_int vec; + int irq; + + vec = ops->xirr_info_get(cpu); + /* (vec >> 24) == old priority */ + vec &= 0x00ffffff; + /* for sanity, this had better be < NR_IRQS - 16 */ + if( vec == xics_irq_8259_cascade_real ) { + irq = i8259_irq(cpu); + if(irq == -1) { + /* Spurious cascaded interrupt. Still must ack xics */ + xics_end_irq(XICS_IRQ_OFFSET + xics_irq_8259_cascade); + irq = -1; + } + } else if( vec == XICS_IRQ_SPURIOUS ) { + irq = -1; + printk("spurious PPC interrupt!\n"); + } else + irq = real_irq_to_virt(vec) + XICS_IRQ_OFFSET; + return irq; +} + + +#ifdef CONFIG_SMP +void xics_ipi_action(int irq, void *dev_id, struct pt_regs *regs) +{ + extern volatile unsigned long xics_ipi_message[]; + int cpu = smp_processor_id(); + + ops->qirr_info(cpu, 0xff); + while (xics_ipi_message[cpu]) { + if (test_and_clear_bit(PPC_MSG_CALL_FUNCTION, &xics_ipi_message[cpu])) { + mb(); + smp_message_recv(PPC_MSG_CALL_FUNCTION, regs); + } + if (test_and_clear_bit(PPC_MSG_RESCHEDULE, &xics_ipi_message[cpu])) { + mb(); + smp_message_recv(PPC_MSG_RESCHEDULE, regs); + } + } +} + +void xics_cause_IPI(int cpu) +{ + ops->qirr_info(cpu,0) ; +} + +void xics_setup_cpu(void) +{ + int cpu = smp_processor_id(); + + ops->cppr_info(cpu, 0xff); + iosync(); +} +#endif /* CONFIG_SMP */ + +void +xics_init_IRQ( void ) +{ + int i; + unsigned long intr_size = 0; + struct device_node *np; + uint *ireg, ilen, indx=0; + + ibm_get_xive = rtas_token("ibm,get-xive"); + ibm_set_xive = rtas_token("ibm,set-xive"); + ibm_int_off = rtas_token("ibm,int-off"); + + np = find_type_devices("PowerPC-External-Interrupt-Presentation"); + if (!np) { + printk(KERN_WARNING "Can't find Interrupt Presentation\n"); + udbg_printf("Can't find Interrupt Presentation\n"); + while (1); + } +nextnode: + ireg = (uint *)get_property(np, "ibm,interrupt-server-ranges", 0); + if (ireg) { + /* + * set node starting index for this node + */ + indx = *ireg; + } + + ireg = (uint *)get_property(np, "reg", &ilen); + if (!ireg) { + printk(KERN_WARNING "Can't find Interrupt Reg Property\n"); + udbg_printf("Can't find Interrupt Reg Property\n"); + while (1); + } + + while (ilen) { + inodes[indx].addr = (unsigned long long)*ireg++ << 32; + ilen -= sizeof(uint); + inodes[indx].addr |= *ireg++; + ilen -= sizeof(uint); + inodes[indx].size = (unsigned long long)*ireg++ << 32; + ilen -= sizeof(uint); + inodes[indx].size |= *ireg++; + ilen -= sizeof(uint); + indx++; + if (indx >= NR_CPUS) break; + } + + np = np->next; + if ((indx < NR_CPUS) && np) goto nextnode; + + /* Find the server numbers for the boot cpu. */ + for (np = find_type_devices("cpu"); np; np = np->next) { + ireg = (uint *)get_property(np, "reg", &ilen); + if (ireg && ireg[0] == hard_smp_processor_id()) { + ireg = (uint *)get_property(np, "ibm,ppc-interrupt-gserver#s", &ilen); + i = ilen / sizeof(int); + if (ireg && i > 0) { + default_server = ireg[0]; + default_distrib_server = ireg[i-1]; /* take last element */ + } + break; + } + } + + intr_base = inodes[0].addr; + intr_size = (ulong)inodes[0].size; + + np = find_type_devices("interrupt-controller"); + if (!np) { + printk(KERN_WARNING "xics: no ISA Interrupt Controller\n"); + xics_irq_8259_cascade = -1; + } else { + ireg = (uint *) get_property(np, "interrupts", 0); + if (!ireg) { + printk(KERN_WARNING "Can't find ISA Interrupts Property\n"); + udbg_printf("Can't find ISA Interrupts Property\n"); + while (1); + } + xics_irq_8259_cascade_real = *ireg; + xics_irq_8259_cascade = virt_irq_create_mapping(xics_irq_8259_cascade_real); + } + + if (_machine == _MACH_pSeries) { +#ifdef CONFIG_SMP + for (i = 0; i < naca->processorCount; ++i) { + xics_info.per_cpu[i] = + __ioremap((ulong)inodes[get_hard_smp_processor_id(i)].addr, + (ulong)inodes[get_hard_smp_processor_id(i)].size, _PAGE_NO_CACHE); + } +#else + xics_info.per_cpu[0] = __ioremap((ulong)intr_base, intr_size, _PAGE_NO_CACHE); +#endif /* CONFIG_SMP */ +#ifdef CONFIG_PPC_PSERIES + /* actually iSeries does not use any of xics...but it has link dependencies + * for now, except this new one... + */ + } else if (_machine == _MACH_pSeriesLP) { + ops = &pSeriesLP_ops; +#endif + } + + xics_8259_pic.enable = i8259_pic.enable; + xics_8259_pic.disable = i8259_pic.disable; + for (i = 0; i < 16; ++i) + irq_desc[i].handler = &xics_8259_pic; + for (; i < NR_IRQS; ++i) + irq_desc[i].handler = &xics_pic; + + ops->cppr_info(0, 0xff); + iosync(); + if (xics_irq_8259_cascade != -1) { + if (request_irq(xics_irq_8259_cascade + XICS_IRQ_OFFSET, no_action, + 0, "8259 cascade", 0)) + printk(KERN_ERR "xics_init_IRQ: couldn't get 8259 cascade\n"); + i8259_init(); + } + +#ifdef CONFIG_SMP + real_irq_to_virt_map[XICS_IPI] = virt_irq_to_real_map[XICS_IPI] = XICS_IPI; + request_irq(XICS_IPI + XICS_IRQ_OFFSET, xics_ipi_action, 0, "IPI", 0); + irq_desc[XICS_IPI+XICS_IRQ_OFFSET].status |= IRQ_PER_CPU; +#endif +} + +void xics_isa_init(void) +{ + return; + if (request_irq(xics_irq_8259_cascade + XICS_IRQ_OFFSET, no_action, + 0, "8259 cascade", 0)) + printk(KERN_ERR "xics_init_IRQ: couldn't get 8259 cascade\n"); + i8259_init(); +} + +/* + * Find first logical cpu and return its physical cpu number + */ +static inline u32 physmask(u32 cpumask) +{ + int i; + + for (i = 0; i < smp_num_cpus; ++i, cpumask >>= 1) { + if (cpumask & 1) + return get_hard_smp_processor_id(i); + } + + printk(KERN_ERR "xics_set_affinity: invalid irq mask\n"); + + return default_distrib_server; +} + +void xics_set_affinity(unsigned int virq, unsigned long cpumask) +{ + irq_desc_t *desc = irq_desc + virq; + unsigned int irq; + unsigned long flags; + long status; + unsigned long xics_status[2]; + u32 newmask; + + virq -= XICS_IRQ_OFFSET; + irq = virt_irq_to_real(virq); + if (irq == XICS_IPI) + return; + + spin_lock_irqsave(&desc->lock, flags); + + status = rtas_call(ibm_get_xive, 1, 3, (void *)&xics_status, irq); + + if (status) { + printk("xics_set_affinity: irq=%d ibm,get-xive returns %ld\n", + irq, status); + goto out; + } + + /* For the moment only implement delivery to all cpus or one cpu */ + if (cpumask == 0xffffffff) + newmask = default_distrib_server; + else + newmask = physmask(cpumask); + + status = rtas_call(ibm_set_xive, 3, 1, NULL, + irq, newmask, xics_status[1]); + + if (status) { + printk("xics_set_affinity irq=%d ibm,set-xive returns %ld\n", + irq, status); + goto out; + } + +out: + spin_unlock_irqrestore(&desc->lock, flags); +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/kernel/xics.h linuxppc64_2_4/arch/ppc64/kernel/xics.h --- ../kernel.org/linux/arch/ppc64/kernel/xics.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/kernel/xics.h Tue Jun 26 17:16:50 2001 @@ -0,0 +1,24 @@ +/* + * arch/ppc/kernel/xics.h + * + * Copyright 2000 IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _PPC_KERNEL_XICS_H +#define _PPC_KERNEL_XICS_H + +#include "local_irq.h" + +extern struct hw_interrupt_type xics_pic; +extern struct hw_interrupt_type xics_8259_pic; + +void xics_init_IRQ(void); +int xics_get_irq(struct pt_regs *); +void xics_isa_init(void); + +#endif /* _PPC_KERNEL_XICS_H */ diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/lib/Makefile linuxppc64_2_4/arch/ppc64/lib/Makefile --- ../kernel.org/linux/arch/ppc64/lib/Makefile Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/lib/Makefile Wed Oct 24 00:50:20 2001 @@ -0,0 +1,11 @@ +# +# Makefile for ppc64-specific library files.. +# + +USE_STANDARD_AS_RULE := true + +O_TARGET = lib.o + +obj-y := checksum.o dec_and_lock.o string.o strcase.o + +include $(TOPDIR)/Rules.make diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/lib/checksum.S linuxppc64_2_4/arch/ppc64/lib/checksum.S --- ../kernel.org/linux/arch/ppc64/lib/checksum.S Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/lib/checksum.S Fri May 4 17:13:58 2001 @@ -0,0 +1,231 @@ +/* + * This file contains assembly-language implementations + * of IP-style 1's complement checksum routines. + * + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au). + */ + +#include +#include +#include +#include "../kernel/ppc_asm.tmpl" + + .text + +/* + * ip_fast_csum(r3=buf, r4=len) -- Optimized for IP header + * len is in words and is always >= 5. + * + * In practice len == 5, but this is not guaranteed. So this code does not + * attempt to use doubleword instructions. + */ +_GLOBAL(ip_fast_csum) + lwz r0,0(r3) + lwzu r5,4(r3) + addic. r4,r4,-2 + addc r0,r0,r5 + mtctr r4 + blelr- +1: lwzu r4,4(r3) + adde r0,r0,r4 + bdnz 1b + addze r0,r0 /* add in final carry */ + rldicl r4,r0,32,0 /* fold two 32-bit halves together */ + add r0,r0,r4 + srdi r0,r0,32 + rlwinm r3,r0,16,0,31 /* fold two halves together */ + add r3,r0,r3 + not r3,r3 + srwi r3,r3,16 + blr + +/* + * Compute checksum of TCP or UDP pseudo-header: + * csum_tcpudp_magic(r3=saddr, r4=daddr, r5=len, r6=proto, r7=sum) + * No real gain trying to do this specially for 64 bit, but + * the 32 bit addition may spill into the upper bits of + * the doubleword so we still must fold it down from 64. + */ +_GLOBAL(csum_tcpudp_magic) + rlwimi r5,r6,16,0,15 /* put proto in upper half of len */ + addc r0,r3,r4 /* add 4 32-bit words together */ + adde r0,r0,r5 + adde r0,r0,r7 + rldicl r4,r0,32,0 /* fold 64 bit value */ + add r0,r4,r0 + srdi r0,r0,32 + rlwinm r3,r0,16,0,31 /* fold two halves together */ + add r3,r0,r3 + not r3,r3 + srwi r3,r3,16 + blr + +/* + * Computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit). + * + * This code assumes at least halfword alignment, though the length + * can be any number of bytes. The sum is accumulated in r5. + * + * csum_partial(r3=buff, r4=len, r5=sum) + */ +_GLOBAL(csum_partial) + subi r3,r3,8 /* we'll offset by 8 for the loads */ + srdi. r6,r4,3 /* divide by 8 for doubleword count */ + addic r5,r5,0 /* clear carry */ + beq 3f /* if we're doing < 8 bytes */ + andi. r0,r3,2 /* aligned on a word boundary already? */ + beq+ 1f + lhz r6,8(r3) /* do 2 bytes to get aligned */ + addi r3,r3,2 + subi r4,r4,2 + addc r5,r5,r6 + srdi. r6,r4,3 /* recompute number of doublewords */ + beq 3f /* any left? */ +1: mtctr r6 +2: ldu r6,8(r3) /* main sum loop */ + adde r5,r5,r6 + bdnz 2b + andi. r4,r4,7 /* compute bytes left to sum after doublewords */ +3: cmpi 0,r4,4 /* is at least a full word left? */ + blt 4f + lwz r6,8(r3) /* sum this word */ + addi r3,r3,4 + subi r4,r4,4 + adde r5,r5,r6 +4: cmpi 0,r4,2 /* is at least a halfword left? */ + blt+ 5f + lhz r6,8(r3) /* sum this halfword */ + addi r3,r3,2 + subi r4,r4,2 + adde r5,r5,r6 +5: cmpi 0,r4,1 /* is at least a byte left? */ + bne+ 6f + lbz r6,8(r3) /* sum this byte */ + slwi r6,r6,8 /* this byte is assumed to be the upper byte of a halfword */ + adde r5,r5,r6 +6: addze r5,r5 /* add in final carry */ + rldicl r4,r5,32,0 /* fold two 32-bit halves together */ + add r3,r4,r5 + srdi r3,r3,32 + blr + +/* + * Computes the checksum of a memory block at src, length len, + * and adds in "sum" (32-bit), while copying the block to dst. + * If an access exception occurs on src or dst, it stores -EFAULT + * to *src_err or *dst_err respectively, and (for an error on + * src) zeroes the rest of dst. + * + * This code needs to be reworked to take advantage of 64 bit sum+copy. + * However, due to tokenring halfword alignment problems this will be very + * tricky. For now we'll leave it until we instrument it somehow. + * + * csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err) + */ +_GLOBAL(csum_partial_copy_generic) + addic r0,r6,0 + subi r3,r3,4 + subi r4,r4,4 + srwi. r6,r5,2 + beq 3f /* if we're doing < 4 bytes */ + andi. r9,r4,2 /* Align dst to longword boundary */ + beq+ 1f +81: lhz r6,4(r3) /* do 2 bytes to get aligned */ + addi r3,r3,2 + subi r5,r5,2 +91: sth r6,4(r4) + addi r4,r4,2 + addc r0,r0,r6 + srwi. r6,r5,2 /* # words to do */ + beq 3f +1: mtctr r6 +82: lwzu r6,4(r3) /* the bdnz has zero overhead, so it should */ +92: stwu r6,4(r4) /* be unnecessary to unroll this loop */ + adde r0,r0,r6 + bdnz 82b + andi. r5,r5,3 +3: cmpi 0,r5,2 + blt+ 4f +83: lhz r6,4(r3) + addi r3,r3,2 + subi r5,r5,2 +93: sth r6,4(r4) + addi r4,r4,2 + adde r0,r0,r6 +4: cmpi 0,r5,1 + bne+ 5f +84: lbz r6,4(r3) +94: stb r6,4(r4) + slwi r6,r6,8 /* Upper byte of word */ + adde r0,r0,r6 +5: addze r3,r0 /* add in final carry (unlikely with 64-bit regs) */ + rldicl r4,r3,32,0 /* fold 64 bit value */ + add r3,r4,r3 + srdi r3,r3,32 + blr + +/* These shouldn't go in the fixup section, since that would + cause the ex_table addresses to get out of order. */ + + .globl src_error_1 +src_error_1: + li r6,0 + subi r5,r5,2 +95: sth r6,4(r4) + addi r4,r4,2 + srwi. r6,r5,2 + beq 3f + mtctr r6 + .globl src_error_2 +src_error_2: + li r6,0 +96: stwu r6,4(r4) + bdnz 96b +3: andi. r5,r5,3 + beq src_error + .globl src_error_3 +src_error_3: + li r6,0 + mtctr r5 + addi r4,r4,3 +97: stbu r6,1(r4) + bdnz 97b + .globl src_error +src_error: + cmpi 0,r7,0 + beq 1f + li r6,-EFAULT + stw r6,0(r7) +1: addze r3,r0 + blr + + .globl dst_error +dst_error: + cmpi 0,r8,0 + beq 1f + li r6,-EFAULT + stw r6,0(r8) +1: addze r3,r0 + blr + +.section __ex_table,"a" + .align 3 + .llong 81b,src_error_1 + .llong 91b,dst_error + .llong 82b,src_error_2 + .llong 92b,dst_error + .llong 83b,src_error_3 + .llong 93b,dst_error + .llong 84b,src_error_3 + .llong 94b,dst_error + .llong 95b,dst_error + .llong 96b,dst_error + .llong 97b,dst_error diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/lib/dec_and_lock.c linuxppc64_2_4/arch/ppc64/lib/dec_and_lock.c --- ../kernel.org/linux/arch/ppc64/lib/dec_and_lock.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/lib/dec_and_lock.c Wed Oct 24 00:50:20 2001 @@ -0,0 +1,38 @@ +/* + * ppc64 version of atomic_dec_and_lock() using cmpxchg + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include + +int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) +{ + int counter; + int newcount; + +repeat: + counter = atomic_read(atomic); + newcount = counter-1; + + if (!newcount) + goto slow_path; + + newcount = cmpxchg(&atomic->counter, counter, newcount); + + if (newcount != counter) + goto repeat; + return 0; + +slow_path: + spin_lock(lock); + if (atomic_dec_and_test(atomic)) + return 1; + spin_unlock(lock); + return 0; +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/lib/strcase.c linuxppc64_2_4/arch/ppc64/lib/strcase.c --- ../kernel.org/linux/arch/ppc64/lib/strcase.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/lib/strcase.c Mon May 7 12:48:05 2001 @@ -0,0 +1,31 @@ +/* + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include + +int strcasecmp(const char *s1, const char *s2) +{ + int c1, c2; + + do { + c1 = tolower(*s1++); + c2 = tolower(*s2++); + } while (c1 == c2 && c1 != 0); + return c1 - c2; +} + +int strncasecmp(const char *s1, const char *s2, int n) +{ + int c1, c2; + + do { + c1 = tolower(*s1++); + c2 = tolower(*s2++); + } while ((--n > 0) && c1 == c2 && c1 != 0); + return c1 - c2; +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/lib/string.S linuxppc64_2_4/arch/ppc64/lib/string.S --- ../kernel.org/linux/arch/ppc64/lib/string.S Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/lib/string.S Fri May 4 17:13:58 2001 @@ -0,0 +1,661 @@ +/* + * String handling functions for PowerPC. + * + * Copyright (C) 1996 Paul Mackerras. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include "../kernel/ppc_asm.tmpl" +#include +#include +#include + +#define CACHE_LINE_SIZE 128 +#define LG_CACHE_LINE_SIZE 7 +#define MAX_COPY_PREFETCH 1 + +#define COPY_16_BYTES \ + lwz r7,4(r4); \ + lwz r8,8(r4); \ + lwz r9,12(r4); \ + lwzu r10,16(r4); \ + stw r7,4(r6); \ + stw r8,8(r6); \ + stw r9,12(r6); \ + stwu r10,16(r6) + +#define COPY_16_BYTES_WITHEX(n) \ +8 ## n ## 0: \ + lwz r7,4(r4); \ +8 ## n ## 1: \ + lwz r8,8(r4); \ +8 ## n ## 2: \ + lwz r9,12(r4); \ +8 ## n ## 3: \ + lwzu r10,16(r4); \ +8 ## n ## 4: \ + stw r7,4(r6); \ +8 ## n ## 5: \ + stw r8,8(r6); \ +8 ## n ## 6: \ + stw r9,12(r6); \ +8 ## n ## 7: \ + stwu r10,16(r6) + +#define COPY_16_BYTES_EXCODE(n) \ +9 ## n ## 0: \ + addi r5,r5,-(16 * n); \ + b 104f; \ +9 ## n ## 1: \ + addi r5,r5,-(16 * n); \ + b 105f; \ +.section __ex_table,"a"; \ + .align 3; \ + .llong 8 ## n ## 0b,9 ## n ## 0b; \ + .llong 8 ## n ## 1b,9 ## n ## 0b; \ + .llong 8 ## n ## 2b,9 ## n ## 0b; \ + .llong 8 ## n ## 3b,9 ## n ## 0b; \ + .llong 8 ## n ## 4b,9 ## n ## 1b; \ + .llong 8 ## n ## 5b,9 ## n ## 1b; \ + .llong 8 ## n ## 6b,9 ## n ## 1b; \ + .llong 8 ## n ## 7b,9 ## n ## 1b; \ +.text + +CACHELINE_BYTES = CACHE_LINE_SIZE +LG_CACHELINE_BYTES = LG_CACHE_LINE_SIZE +CACHELINE_MASK = (CACHE_LINE_SIZE-1) + +_GLOBAL(strcpy) + addi r5,r3,-1 + addi r4,r4,-1 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + stbu r0,1(r5) + bne 1b + blr + +_GLOBAL(strncpy) + cmpwi 0,r5,0 + beqlr + mtctr r5 + addi r6,r3,-1 + addi r4,r4,-1 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + stbu r0,1(r6) + bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */ + blr + +_GLOBAL(strcat) + addi r5,r3,-1 + addi r4,r4,-1 +1: lbzu r0,1(r5) + cmpwi 0,r0,0 + bne 1b + addi r5,r5,-1 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + stbu r0,1(r5) + bne 1b + blr + +_GLOBAL(strcmp) + addi r5,r3,-1 + addi r4,r4,-1 +1: lbzu r3,1(r5) + cmpwi 1,r3,0 + lbzu r0,1(r4) + subf. r3,r0,r3 + beqlr 1 + beq 1b + blr + +_GLOBAL(strlen) + addi r4,r3,-1 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + bne 1b + subf r3,r3,r4 + blr + +/* + * Use dcbz on the complete cache lines in the destination + * to set them to zero. This requires that the destination + * area is cacheable. -- paulus + */ +_GLOBAL(cacheable_memzero) + mr r5,r4 + li r4,0 + addi r6,r3,-4 + cmplwi 0,r5,4 + blt 7f + stwu r4,4(r6) + beqlr + andi. r0,r6,3 + add r5,r0,r5 + subf r6,r0,r6 + clrlwi r7,r6,32-LG_CACHELINE_BYTES + add r8,r7,r5 + srwi r9,r8,LG_CACHELINE_BYTES + addic. r9,r9,-1 /* total number of complete cachelines */ + ble 2f + xori r0,r7,CACHELINE_MASK & ~3 + srwi. r0,r0,2 + beq 3f + mtctr r0 +4: stwu r4,4(r6) + bdnz 4b +3: mtctr r9 + li r7,4 +10: dcbz r7,r6 + addi r6,r6,CACHELINE_BYTES + bdnz 10b + clrlwi r5,r8,32-LG_CACHELINE_BYTES + addi r5,r5,4 +2: srwi r0,r5,2 + mtctr r0 + bdz 6f +1: stwu r4,4(r6) + bdnz 1b +6: andi. r5,r5,3 +7: cmpwi 0,r5,0 + beqlr + mtctr r5 + addi r6,r6,3 +8: stbu r4,1(r6) + bdnz 8b + blr + +_GLOBAL(memset) + rlwimi r4,r4,8,16,23 + rlwimi r4,r4,16,0,15 + addi r6,r3,-4 + cmplwi 0,r5,4 + blt 7f + stwu r4,4(r6) + beqlr + andi. r0,r6,3 + add r5,r0,r5 + subf r6,r0,r6 + srwi r0,r5,2 + mtctr r0 + bdz 6f +1: stwu r4,4(r6) + bdnz 1b +6: andi. r5,r5,3 +7: cmpwi 0,r5,0 + beqlr + mtctr r5 + addi r6,r6,3 +8: stbu r4,1(r6) + bdnz 8b + blr + +_GLOBAL(bcopy) + mr r6,r3 + mr r3,r4 + mr r4,r6 + b .memcpy + +/* + * This version uses dcbz on the complete cache lines in the + * destination area to reduce memory traffic. This requires that + * the destination area is cacheable. + * We only use this version if the source and dest don't overlap. + * -- paulus. + */ +_GLOBAL(cacheable_memcpy) + add r7,r3,r5 /* test if the src & dst overlap */ + add r8,r4,r5 + cmplw 0,r4,r7 + cmplw 1,r3,r8 + crand 0,0,4 /* cr0.lt &= cr1.lt */ + blt .memcpy /* if regions overlap */ + + addi r4,r4,-4 + addi r6,r3,-4 + neg r0,r3 + andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ + beq 58f + + cmplw 0,r5,r0 /* is this more than total to do? */ + blt 63f /* if not much to do */ + andi. r8,r0,3 /* get it word-aligned first */ + subf r5,r0,r5 + mtctr r8 + beq+ 61f +70: lbz r9,4(r4) /* do some bytes */ + stb r9,4(r6) + addi r4,r4,1 + addi r6,r6,1 + bdnz 70b +61: srwi. r0,r0,2 + mtctr r0 + beq 58f +72: lwzu r9,4(r4) /* do some words */ + stwu r9,4(r6) + bdnz 72b + +58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ + clrlwi r5,r5,32-LG_CACHELINE_BYTES + li r11,4 + mtctr r0 + beq 63f +53: + dcbz r11,r6 + COPY_16_BYTES +#if CACHE_LINE_SIZE >= 32 + COPY_16_BYTES +#if CACHE_LINE_SIZE >= 64 + COPY_16_BYTES + COPY_16_BYTES +#if CACHE_LINE_SIZE >= 128 + COPY_16_BYTES + COPY_16_BYTES + COPY_16_BYTES + COPY_16_BYTES +#endif +#endif +#endif + bdnz 53b + +63: srwi. r0,r5,2 + mtctr r0 + beq 64f +30: lwzu r0,4(r4) + stwu r0,4(r6) + bdnz 30b + +64: andi. r0,r5,3 + mtctr r0 + beq+ 65f +40: lbz r0,4(r4) + stb r0,4(r6) + addi r4,r4,1 + addi r6,r6,1 + bdnz 40b +65: blr + +_GLOBAL(memmove) + cmplw 0,r3,r4 + bgt .backwards_memcpy + /* fall through */ + +_GLOBAL(memcpy) + srwi. r7,r5,3 + addi r6,r3,-4 + addi r4,r4,-4 + beq 2f /* if less than 8 bytes to do */ + andi. r0,r6,3 /* get dest word aligned */ + mtctr r7 + bne 5f +1: lwz r7,4(r4) + lwzu r8,8(r4) + stw r7,4(r6) + stwu r8,8(r6) + bdnz 1b + andi. r5,r5,7 +2: cmplwi 0,r5,4 + blt 3f + lwzu r0,4(r4) + addi r5,r5,-4 + stwu r0,4(r6) +3: cmpwi 0,r5,0 + beqlr + mtctr r5 + addi r4,r4,3 + addi r6,r6,3 +4: lbzu r0,1(r4) + stbu r0,1(r6) + bdnz 4b + blr +5: subfic r0,r0,4 + mtctr r0 +6: lbz r7,4(r4) + addi r4,r4,1 + stb r7,4(r6) + addi r6,r6,1 + bdnz 6b + subf r5,r0,r5 + rlwinm. r7,r5,32-3,3,31 + beq 2b + mtctr r7 + b 1b + +_GLOBAL(backwards_memcpy) + rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */ + add r6,r3,r5 + add r4,r4,r5 + beq 2f + andi. r0,r6,3 + mtctr r7 + bne 5f +1: lwz r7,-4(r4) + lwzu r8,-8(r4) + stw r7,-4(r6) + stwu r8,-8(r6) + bdnz 1b + andi. r5,r5,7 +2: cmplwi 0,r5,4 + blt 3f + lwzu r0,-4(r4) + subi r5,r5,4 + stwu r0,-4(r6) +3: cmpwi 0,r5,0 + beqlr + mtctr r5 +4: lbzu r0,-1(r4) + stbu r0,-1(r6) + bdnz 4b + blr +5: mtctr r0 +6: lbzu r7,-1(r4) + stbu r7,-1(r6) + bdnz 6b + subf r5,r0,r5 + rlwinm. r7,r5,32-3,3,31 + beq 2b + mtctr r7 + b 1b + +_GLOBAL(memcmp) + cmpwi 0,r5,0 + ble- 2f + mtctr r5 + addi r6,r3,-1 + addi r4,r4,-1 +1: lbzu r3,1(r6) + lbzu r0,1(r4) + subf. r3,r0,r3 + bdnzt 2,1b + blr +2: li r3,0 + blr + +_GLOBAL(memchr) + cmpwi 0,r5,0 + ble- 2f + mtctr r5 + addi r3,r3,-1 +1: lbzu r0,1(r3) + cmpw 0,r0,r4 + bdnzf 2,1b + beqlr +2: li r3,0 + blr + +_GLOBAL(__copy_tofrom_user) + addi r4,r4,-4 + addi r6,r3,-4 + neg r0,r3 + andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ + beq 58f + + cmplw 0,r5,r0 /* is this more than total to do? */ + blt 63f /* if not much to do */ + andi. r8,r0,3 /* get it word-aligned first */ + mtctr r8 + beq+ 61f +70: lbz r9,4(r4) /* do some bytes */ +71: stb r9,4(r6) + addi r4,r4,1 + addi r6,r6,1 + bdnz 70b +61: subf r5,r0,r5 + srwi. r0,r0,2 + mtctr r0 + beq 58f +72: lwzu r9,4(r4) /* do some words */ +73: stwu r9,4(r6) + bdnz 72b + +58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ + clrlwi r5,r5,32-LG_CACHELINE_BYTES + li r11,4 + beq 63f + + /* Here we decide how far ahead to prefetch the source */ +#if MAX_COPY_PREFETCH > 1 + /* Heuristically, for large transfers we prefetch + MAX_COPY_PREFETCH cachelines ahead. For small transfers + we prefetch 1 cacheline ahead. */ + cmpwi r0,MAX_COPY_PREFETCH + li r7,1 + li r3,4 + ble 111f + li r7,MAX_COPY_PREFETCH +111: mtctr r7 +112: dcbt r3,r4 + addi r3,r3,CACHELINE_BYTES + bdnz 112b +#else /* MAX_COPY_PREFETCH == 1 */ + li r3,CACHELINE_BYTES + 4 + dcbt r11,r4 +#endif /* MAX_COPY_PREFETCH */ + + mtctr r0 +53: + dcbt r3,r4 + dcbz r11,r6 +/* had to move these to keep extable in order */ + .section __ex_table,"a" + .align 3 + .llong 70b,100f + .llong 71b,101f + .llong 72b,102f + .llong 73b,103f + .llong 53b,105f + .text +/* the main body of the cacheline loop */ + COPY_16_BYTES_WITHEX(0) +#if CACHE_LINE_SIZE >= 32 + COPY_16_BYTES_WITHEX(1) +#if CACHE_LINE_SIZE >= 64 + COPY_16_BYTES_WITHEX(2) + COPY_16_BYTES_WITHEX(3) +#if CACHE_LINE_SIZE >= 128 + COPY_16_BYTES_WITHEX(4) + COPY_16_BYTES_WITHEX(5) + COPY_16_BYTES_WITHEX(6) + COPY_16_BYTES_WITHEX(7) +#endif +#endif +#endif + bdnz 53b + +63: srwi. r0,r5,2 + mtctr r0 + beq 64f +30: lwzu r0,4(r4) +31: stwu r0,4(r6) + bdnz 30b + +64: andi. r0,r5,3 + mtctr r0 + beq+ 65f +40: lbz r0,4(r4) +41: stb r0,4(r6) + addi r4,r4,1 + addi r6,r6,1 + bdnz 40b +65: li r3,0 + blr + +/* read fault, initial single-byte copy */ +100: li r4,0 + b 90f +/* write fault, initial single-byte copy */ +101: li r4,1 +90: subf r5,r8,r5 + li r3,0 + b 99f +/* read fault, initial word copy */ +102: li r4,0 + b 91f +/* write fault, initial word copy */ +103: li r4,1 +91: li r3,2 + b 99f + +/* + * this stuff handles faults in the cacheline loop and branches to either + * 104f (if in read part) or 105f (if in write part), after updating r5 + */ + COPY_16_BYTES_EXCODE(0) +#if CACHE_LINE_SIZE >= 32 + COPY_16_BYTES_EXCODE(1) +#if CACHE_LINE_SIZE >= 64 + COPY_16_BYTES_EXCODE(2) + COPY_16_BYTES_EXCODE(3) +#if CACHE_LINE_SIZE >= 128 + COPY_16_BYTES_EXCODE(4) + COPY_16_BYTES_EXCODE(5) + COPY_16_BYTES_EXCODE(6) + COPY_16_BYTES_EXCODE(7) +#endif +#endif +#endif + +/* read fault in cacheline loop */ +104: li r4,0 + b 92f +/* fault on dcbz (effectively a write fault) */ +/* or write fault in cacheline loop */ +105: li r4,1 +92: li r3,LG_CACHELINE_BYTES + b 99f +/* read fault in final word loop */ +108: li r4,0 + b 93f +/* write fault in final word loop */ +109: li r4,1 +93: andi. r5,r5,3 + li r3,2 + b 99f +/* read fault in final byte loop */ +110: li r4,0 + b 94f +/* write fault in final byte loop */ +111: li r4,1 +94: li r5,0 + li r3,0 +/* + * At this stage the number of bytes not copied is + * r5 + (ctr << r3), and r4 is 0 for read or 1 for write. + */ +99: mfctr r0 + slw r3,r0,r3 + add r3,r3,r5 + cmpwi 0,r4,0 + bne 120f +/* for read fault, clear out the destination: r3 bytes starting at 4(r6) */ + srwi. r0,r3,2 + li r9,0 + mtctr r0 + beq 113f +112: stwu r9,4(r6) + bdnz 112b +113: andi. r0,r3,3 + mtctr r0 + beq 120f +114: stb r9,4(r6) + addi r6,r6,1 + bdnz 114b +120: blr + + .section __ex_table,"a" + .align 3 + .llong 30b,108b + .llong 31b,109b + .llong 40b,110b + .llong 41b,111b + .llong 112b,120b + .llong 114b,120b + .text + +_GLOBAL(__clear_user) + addi r6,r3,-4 + li r3,0 + li r5,0 + cmplwi 0,r4,4 + blt 7f + /* clear a single word */ +11: stwu r5,4(r6) + beqlr + /* clear word sized chunks */ + andi. r0,r6,3 + add r4,r0,r4 + subf r6,r0,r6 + srwi r0,r4,2 + mtctr r0 + bdz 6f +1: stwu r5,4(r6) + bdnz 1b +6: andi. r4,r4,3 + /* clear byte sized chunks */ +7: cmpwi 0,r4,0 + beqlr + mtctr r4 + addi r6,r6,3 +8: stbu r5,1(r6) + bdnz 8b + blr +99: li r3,-EFAULT + blr + + .section __ex_table,"a" + .align 3 + .llong 11b,99b + .llong 1b,99b + .llong 8b,99b + .text + +_GLOBAL(__strncpy_from_user) + addi r6,r3,-1 + addi r4,r4,-1 + cmpwi 0,r5,0 + beq 2f + mtctr r5 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + stbu r0,1(r6) + bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */ + beq 3f +2: addi r6,r6,1 +3: subf r3,r3,r6 + blr +99: li r3,-EFAULT + blr + + .section __ex_table,"a" + .align 3 + .llong 1b,99b + .text + +/* r3 = str, r4 = len (> 0), r5 = top (highest addr) */ +_GLOBAL(__strnlen_user) + addi r7,r3,-1 + subf r6,r7,r5 /* top+1 - str */ + cmplw 0,r4,r6 + bge 0f + mr r6,r4 +0: mtctr r6 /* ctr = min(len, top - str) */ +1: lbzu r0,1(r7) /* get next byte */ + cmpwi 0,r0,0 + bdnzf 2,1b /* loop if --ctr != 0 && byte != 0 */ + addi r7,r7,1 + subf r3,r3,r7 /* number of bytes we have looked at */ + beqlr /* return if we found a 0 byte */ + cmpw 0,r3,r4 /* did we look at all len bytes? */ + blt 99f /* if not, must have hit top */ + addi r3,r4,1 /* return len + 1 to indicate no null found */ + blr +99: li r3,0 /* bad address, return 0 */ + blr + + .section __ex_table,"a" + .align 3 + .llong 1b,99b diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/mm/Makefile linuxppc64_2_4/arch/ppc64/mm/Makefile --- ../kernel.org/linux/arch/ppc64/mm/Makefile Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/mm/Makefile Thu Aug 30 05:08:46 2001 @@ -0,0 +1,16 @@ +# +# Makefile for the linux ppc-specific parts of the memory manager. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definition is now in the main makefile... + +EXTRA_CFLAGS = -mno-minimal-toc + +O_TARGET := mm.o + +obj-y := fault.o init.o extable.o imalloc.o + +include $(TOPDIR)/Rules.make diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/mm/extable.c linuxppc64_2_4/arch/ppc64/mm/extable.c --- ../kernel.org/linux/arch/ppc64/mm/extable.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/mm/extable.c Mon May 7 12:48:27 2001 @@ -0,0 +1,48 @@ +/* + * linux/arch/ppc/mm/extable.c + * + * from linux/arch/i386/mm/extable.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include + +extern const struct exception_table_entry __start___ex_table[]; +extern const struct exception_table_entry __stop___ex_table[]; + +static inline unsigned long +search_one_table(const struct exception_table_entry *first, + const struct exception_table_entry *last, + unsigned long value) +{ + while (first <= last) { + const struct exception_table_entry *mid; + long diff; + + mid = (last - first) / 2 + first; + diff = mid->insn - value; + if (diff == 0) + return mid->fixup; + else if (diff < 0) + first = mid+1; + else + last = mid-1; + } + return 0; +} + +unsigned long +search_exception_table(unsigned long addr) +{ + unsigned long ret; + + ret = search_one_table(__start___ex_table, __stop___ex_table-1, addr); + if (ret) return ret; + + return 0; +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/mm/fault.c linuxppc64_2_4/arch/ppc64/mm/fault.c --- ../kernel.org/linux/arch/ppc64/mm/fault.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/mm/fault.c Wed Nov 7 00:49:28 2001 @@ -0,0 +1,231 @@ +/* + * arch/ppc/mm/fault.c + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Derived from "arch/i386/mm/fault.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * Modified by Cort Dougan and Paul Mackerras. + * + * Modified for PPC64 by Dave Engebretsen (engebret@ibm.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#if defined(CONFIG_XMON) || defined(CONFIG_KGDB) +extern void (*debugger)(struct pt_regs *); +extern void (*debugger_fault_handler)(struct pt_regs *); +extern int (*debugger_dabr_match)(struct pt_regs *); +int debugger_kernel_faults = 1; +#endif + +extern void die_if_kernel(char *, struct pt_regs *, long); +void bad_page_fault(struct pt_regs *, unsigned long); +void do_page_fault(struct pt_regs *, unsigned long, unsigned long); + +#ifdef CONFIG_PPCDBG +extern unsigned long get_srr0(void); +extern unsigned long get_srr1(void); +#endif + +/* + * For 600- and 800-family processors, the error_code parameter is DSISR + * for a data fault, SRR1 for an instruction fault. + */ +void do_page_fault(struct pt_regs *regs, unsigned long address, + unsigned long error_code) +{ + struct vm_area_struct * vma; + struct mm_struct *mm = current->mm; + siginfo_t info; + unsigned long code = SEGV_MAPERR; + unsigned long is_write = error_code & 0x02000000; + unsigned long mm_fault_return; + + PPCDBG(PPCDBG_MM, "Entering do_page_fault: addr = 0x%16.16lx, error_code = %lx\n\tregs_trap = %lx, srr0 = %lx, srr1 = %lx\n", address, error_code, regs->trap, get_srr0(), get_srr1()); + /* + * Fortunately the bit assignments in SRR1 for an instruction + * fault and DSISR for a data fault are mostly the same for the + * bits we are interested in. But there are some bits which + * indicate errors in DSISR but can validly be set in SRR1. + */ + if (regs->trap == 0x400) + error_code &= 0x48200000; + +#if defined(CONFIG_XMON) || defined(CONFIG_KGDB) +#if 1 /* Crap??? PPPBBB */ + if (debugger_fault_handler && regs->trap == 0x300) { + debugger_fault_handler(regs); + return; + } +#endif + if (error_code & 0x00400000) { + /* DABR match */ + if (debugger_dabr_match(regs)) + return; + } +#endif /* CONFIG_XMON || CONFIG_KGDB */ + + if (in_interrupt() || mm == NULL) { + bad_page_fault(regs, address); + return; + } + down_read(&mm->mmap_sem); + vma = find_vma(mm, address); + PPCDBG(PPCDBG_MM, "\tdo_page_fault: vma = 0x%16.16lx\n", vma); + if (!vma) { + PPCDBG(PPCDBG_MM, "\tdo_page_fault: !vma\n"); + goto bad_area; + } + PPCDBG(PPCDBG_MM, "\tdo_page_fault: vma->vm_start = 0x%16.16lx, vma->vm_flags = 0x%16.16lx\n", vma->vm_start, vma->vm_flags); + if (vma->vm_start <= address) { + goto good_area; + } + if (!(vma->vm_flags & VM_GROWSDOWN)) { + PPCDBG(PPCDBG_MM, "\tdo_page_fault: vma->vm_flags = %lx, %lx\n", vma->vm_flags, VM_GROWSDOWN); + goto bad_area; + } + if (expand_stack(vma, address)) { + PPCDBG(PPCDBG_MM, "\tdo_page_fault: expand_stack\n"); + goto bad_area; + } + +good_area: + code = SEGV_ACCERR; + + /* a write */ + if (is_write) { + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + /* a read */ + } else { + /* protection fault */ + if (error_code & 0x08000000) + goto bad_area; + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto bad_area; + } + + /* + * If for any reason at all we couldn't handle the fault, + * make sure we exit gracefully rather than endlessly redo + * the fault. + */ + PPCDBG(PPCDBG_MM, "\tdo_page_fault: calling handle_mm_fault\n"); + mm_fault_return = handle_mm_fault(mm, vma, address, is_write); + PPCDBG(PPCDBG_MM, "\tdo_page_fault: handle_mm_fault = 0x%lx\n", + mm_fault_return); + switch(mm_fault_return) { + case 1: + current->min_flt++; + break; + case 2: + current->maj_flt++; + break; + case 0: + goto do_sigbus; + default: + goto out_of_memory; + } + + up_read(&mm->mmap_sem); + return; + +bad_area: + up_read(&mm->mmap_sem); + + /* User mode accesses cause a SIGSEGV */ + if (user_mode(regs)) { + info.si_signo = SIGSEGV; + info.si_errno = 0; + info.si_code = code; + info.si_addr = (void *) address; + PPCDBG(PPCDBG_SIGNAL, "Bad addr in user: 0x%lx\n", address); +#ifdef CONFIG_XMON + ifppcdebug(PPCDBG_SIGNALXMON) + PPCDBG_ENTER_DEBUGGER_REGS(regs); +#endif + + force_sig_info(SIGSEGV, &info, current); + return; + } + + bad_page_fault(regs, address); + return; + +/* + * We ran out of memory, or some other thing happened to us that made + * us unable to handle the page fault gracefully. + */ +out_of_memory: + up_read(&mm->mmap_sem); + printk("VM: killing process %s\n", current->comm); + if (user_mode(regs)) + do_exit(SIGKILL); + bad_page_fault(regs, address); + return; + +do_sigbus: + up_read(&mm->mmap_sem); + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRERR; + info.si_addr = (void *)address; + force_sig_info (SIGBUS, &info, current); + if (!user_mode(regs)) + bad_page_fault(regs, address); +} + +/* + * bad_page_fault is called when we have a bad access from the kernel. + * It is called from do_page_fault above and from some of the procedures + * in traps.c. + */ +void +bad_page_fault(struct pt_regs *regs, unsigned long address) +{ + unsigned long fixup; + + /* Are we prepared to handle this fault? */ + if ((fixup = search_exception_table(regs->nip)) != 0) { + regs->nip = fixup; + return; + } + + /* kernel has accessed a bad area */ + show_regs(regs); +#if defined(CONFIG_XMON) || defined(CONFIG_KGDB) + if (debugger_kernel_faults) + debugger(regs); +#endif + print_backtrace( (unsigned long *)regs->gpr[1] ); + panic("kernel access of bad area pc %lx lr %lx address %lX tsk %s/%d", + regs->nip,regs->link,address,current->comm,current->pid); +} + diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/mm/imalloc.c linuxppc64_2_4/arch/ppc64/mm/imalloc.c --- ../kernel.org/linux/arch/ppc64/mm/imalloc.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/mm/imalloc.c Mon Jun 4 20:06:03 2001 @@ -0,0 +1,69 @@ +/* + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include + +#include +#include + +rwlock_t imlist_lock = RW_LOCK_UNLOCKED; +struct vm_struct * imlist = NULL; + +struct vm_struct *get_im_area(unsigned long size) { + unsigned long addr; + struct vm_struct **p, *tmp, *area; + + area = (struct vm_struct *) kmalloc(sizeof(*area), GFP_KERNEL); + if (!area) + return NULL; + addr = IMALLOC_START; + write_lock(&imlist_lock); + for (p = &imlist; (tmp = *p) ; p = &tmp->next) { + if (size + addr < (unsigned long) tmp->addr) + break; + addr = tmp->size + (unsigned long) tmp->addr; + if (addr > IMALLOC_END-size) { + write_unlock(&imlist_lock); + kfree(area); + return NULL; + } + } + area->flags = 0; + area->addr = (void *)addr; + area->size = size; + area->next = *p; + *p = area; + write_unlock(&imlist_lock); + return area; +} + +void ifree(void * addr) { + struct vm_struct **p, *tmp; + + if (!addr) + return; + if ((PAGE_SIZE-1) & (unsigned long) addr) { + printk(KERN_ERR "Trying to ifree() bad address (%p)\n", addr); + return; + } + write_lock(&imlist_lock); + for (p = &imlist ; (tmp = *p) ; p = &tmp->next) { + if (tmp->addr == addr) { + *p = tmp->next; + kfree(tmp); + write_unlock(&imlist_lock); + return; + } + } + write_unlock(&imlist_lock); + printk(KERN_ERR "Trying to ifree() nonexistent area (%p)\n", addr); +} + diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/mm/init.c linuxppc64_2_4/arch/ppc64/mm/init.c --- ../kernel.org/linux/arch/ppc64/mm/init.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/mm/init.c Mon Nov 19 22:52:57 2001 @@ -0,0 +1,710 @@ +/* + * + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * Dave Engebretsen + * Rework for PPC64 port. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_BLK_DEV_INITRD +#include /* for initrd_* */ +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_PPC_EEH +#include +#endif + +#include + +#define PGTOKB(pages) (((pages) * PAGE_SIZE) >> 10) + +#ifdef CONFIG_PPC_ISERIES +#include +#endif + +struct mmu_context_queue_t mmu_context_queue; +int mem_init_done; +unsigned long ioremap_bot = IMALLOC_BASE; + +static int boot_mapsize; +static unsigned long totalram_pages; + +extern pgd_t swapper_pg_dir[]; +extern char __init_begin, __init_end; +extern char __chrp_begin, __chrp_end; +extern char __openfirmware_begin, __openfirmware_end; +extern struct _of_tce_table of_tce_table[]; +extern char _start[], _end[]; +extern char _stext[], etext[]; +extern struct task_struct *current_set[NR_CPUS]; +extern struct Naca *naca; + +void mm_init_ppc64(void); + +unsigned long *pmac_find_end_of_memory(void); +extern unsigned long *find_end_of_memory(void); + +extern pgd_t ioremap_dir[]; +pgd_t * ioremap_pgd = (pgd_t *)&ioremap_dir; + +static void map_io_page(unsigned long va, unsigned long pa, int flags); +extern void die_if_kernel(char *,struct pt_regs *,long); + +unsigned long klimit = (unsigned long)_end; + +HPTE *Hash=0; +unsigned long Hash_size=0; +unsigned long _SDR1=0; +unsigned long _ASR=0; + +/* max amount of RAM to use */ +unsigned long __max_memory; + +/* This is declared as we are using the more or less generic + * include/asm-ppc64/tlb.h file -- tgall + */ +mmu_gather_t mmu_gathers[NR_CPUS]; + +int do_check_pgt_cache(int low, int high) +{ + int freed = 0; + + if (pgtable_cache_size > high) { + do { + if (pgd_quicklist) + free_page((unsigned long)pgd_alloc_one_fast(0)), ++freed; + if (pmd_quicklist) + free_page((unsigned long)pmd_alloc_one_fast(0, 0)), ++freed; + if (pte_quicklist) + free_page((unsigned long)pte_alloc_one_fast(0, 0)), ++freed; + } while (pgtable_cache_size > low); + } + return freed; +} + +void show_mem(void) +{ + int i,free = 0,total = 0,reserved = 0; + int shared = 0, cached = 0; + struct task_struct *p; + + printk("Mem-info:\n"); + show_free_areas(); + printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); + i = max_mapnr; + while (i-- > 0) { + total++; + if (PageReserved(mem_map+i)) + reserved++; + else if (PageSwapCache(mem_map+i)) + cached++; + else if (!atomic_read(&mem_map[i].count)) + free++; + else + shared += atomic_read(&mem_map[i].count) - 1; + } + printk("%d pages of RAM\n",total); + printk("%d free pages\n",free); + printk("%d reserved pages\n",reserved); + printk("%d pages shared\n",shared); + printk("%d pages swap cached\n",cached); + printk("%d pages in page table cache\n",(int)pgtable_cache_size); + show_buffers(); + printk("%-8s %3s %8s %8s %8s %9s %8s", "Process", "Pid", + "Ctx", "Ctx<<4", "Last Sys", "pc", "task"); +#ifdef CONFIG_SMP + printk(" %3s", "CPU"); +#endif /* CONFIG_SMP */ + printk("\n"); + for_each_task(p) + { + printk("%-8.8s %3d %8ld %8ld %8ld %c%08lx %08lx ", + p->comm,p->pid, + (p->mm)?p->mm->context:0, + (p->mm)?(p->mm->context<<4):0, + p->thread.last_syscall, + (p->thread.regs)?user_mode(p->thread.regs) ? 'u' : 'k' : '?', + (p->thread.regs)?p->thread.regs->nip:0, + (ulong)p); + { + int iscur = 0; +#ifdef CONFIG_SMP + printk("%3d ", p->processor); + if ( (p->processor != NO_PROC_ID) && + (p == current_set[p->processor]) ) + { + iscur = 1; + printk("current"); + } +#else + if ( p == current ) + { + iscur = 1; + printk("current"); + } + + if ( p == last_task_used_math ) + { + if ( iscur ) + printk(","); + printk("last math"); + } +#endif /* CONFIG_SMP */ + printk("\n"); + } + } +} + +void si_meminfo(struct sysinfo *val) +{ + val->totalram = totalram_pages; + val->sharedram = 0; + val->freeram = nr_free_pages(); + val->bufferram = atomic_read(&buffermem_pages); + val->totalhigh = 0; + val->freehigh = 0; + val->mem_unit = PAGE_SIZE; +} + +void * +ioremap(unsigned long addr, unsigned long size) +{ +#ifdef CONFIG_PPC_ISERIES + return (void*)addr; +#else +#ifdef CONFIG_PPC_EEH + if(mem_init_done && (addr >> 60UL)) { + if (IS_EEH_TOKEN_DISABLED(addr)) + return IO_TOKEN_TO_ADDR(addr); + return (void*)addr; /* already mapped address or EEH token. */ + } +#endif + return __ioremap(addr, size, _PAGE_NO_CACHE); +#endif +} + +extern struct vm_struct * get_im_area( unsigned long size ); + +void * +__ioremap(unsigned long addr, unsigned long size, unsigned long flags) +{ + unsigned long pa, ea, i; + + /* + * Choose an address to map it to. + * Once the imalloc system is running, we use it. + * Before that, we map using addresses going + * up from ioremap_bot. imalloc will use + * the addresses from ioremap_bot through + * IMALLOC_END (0xE000001fffffffff) + * + */ + pa = addr & PAGE_MASK; + size = PAGE_ALIGN(addr + size) - pa; + + if (size == 0) + return NULL; + + if (mem_init_done) { + struct vm_struct *area; + area = get_im_area(size); + if (area == 0) + return NULL; + ea = (unsigned long)(area->addr); + } + else { + ea = ioremap_bot; + ioremap_bot += size; + } + + if ((flags & _PAGE_PRESENT) == 0) + flags |= pgprot_val(PAGE_KERNEL); + if (flags & (_PAGE_NO_CACHE | _PAGE_WRITETHRU)) + flags |= _PAGE_GUARDED; + + for (i = 0; i < size; i += PAGE_SIZE) { + map_io_page(ea+i, pa+i, flags); + } + + return (void *) (ea + (addr & ~PAGE_MASK)); +} + +void iounmap(void *addr) +{ +#ifdef CONFIG_PPC_ISERIES + /* iSeries I/O Remap is a noop */ + return; +#else + /* DRENG / PPPBBB todo */ + return; +#endif +} + +/* + * map_io_page currently only called by __ioremap + * map_io_page adds an entry to the ioremap page table + * and adds an entry to the HPT, possibly bolting it + */ +static void map_io_page(unsigned long ea, unsigned long pa, int flags) +{ + pgd_t *pgdp; + pmd_t *pmdp; + pte_t *ptep; + unsigned long vsid; + + if (mem_init_done) { + spin_lock(&ioremap_mm.page_table_lock); + pgdp = pgd_offset_i(ea); + pmdp = pmd_alloc(&ioremap_mm, pgdp, ea); + ptep = pte_alloc(&ioremap_mm, pmdp, ea); + set_pte(ptep, mk_pte_phys(pa & PAGE_MASK, __pgprot(flags))); + spin_unlock(&ioremap_mm.page_table_lock); + } else { + /* If the mm subsystem is not fully up, we cannot create a + * linux page table entry for this mapping. Simply bolt an + * entry in the hardware page table. + */ + vsid = get_kernel_vsid(ea); + make_pte(htab_data.htab, + (vsid << 28) | (ea & 0xFFFFFFF), // va (NOT the ea) + pa, + _PAGE_NO_CACHE | _PAGE_GUARDED | PP_RWXX, + htab_data.htab_hash_mask, 0); + } +} + +void +local_flush_tlb_all(void) +{ + /* Implemented to just flush the vmalloc area. + * vmalloc is the only user of flush_tlb_all. + */ + local_flush_tlb_range( NULL, VMALLOC_START, VMALLOC_END ); +} + +void +local_flush_tlb_mm(struct mm_struct *mm) +{ + if ( mm->map_count ) { + struct vm_area_struct *mp; + for ( mp = mm->mmap; mp != NULL; mp = mp->vm_next ) + local_flush_tlb_range( mm, mp->vm_start, mp->vm_end ); + } + else /* MIKEC: It is not clear why this is needed */ + /* paulus: it is needed to clear out stale HPTEs + * when an address space (represented by an mm_struct) + * is being destroyed. */ + local_flush_tlb_range( mm, USER_START, USER_END ); +} + + +/* + * Callers should hold the mm->page_table_lock + */ +void +local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) +{ + unsigned long context = 0; + pgd_t *pgd; + pmd_t *pmd; + pte_t *ptep; + pte_t pte; + + switch( REGION_ID(vmaddr) ) { + case VMALLOC_REGION_ID: + pgd = pgd_offset_k( vmaddr ); + break; + case IO_REGION_ID: + pgd = pgd_offset_i( vmaddr ); + break; + case USER_REGION_ID: + pgd = pgd_offset( vma->vm_mm, vmaddr ); + context = vma->vm_mm->context; + break; + default: + panic("local_flush_tlb_page: invalid region 0x%016lx", vmaddr); + + } + + + if (!pgd_none(*pgd)) { + pmd = pmd_offset(pgd, vmaddr); + if (!pmd_none(*pmd)) { + ptep = pte_offset(pmd, vmaddr); + /* Check if HPTE might exist and flush it if so */ + pte = __pte(pte_update(ptep, _PAGE_HPTEFLAGS, 0)); + if ( pte_val(pte) & _PAGE_HASHPTE ) { + flush_hash_page(context, vmaddr, pte); + } + } + } +} + +void +local_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end) +{ + pgd_t *pgd; + pmd_t *pmd; + pte_t *ptep; + pte_t pte; + unsigned long pgd_end, pmd_end; + unsigned long context; + + if ( start >= end ) + panic("flush_tlb_range: start (%016lx) greater than end (%016lx)\n", start, end ); + + if ( REGION_ID(start) != REGION_ID(end) ) + panic("flush_tlb_range: start (%016lx) and end (%016lx) not in same region\n", start, end ); + + context = 0; + + switch( REGION_ID(start) ) { + case VMALLOC_REGION_ID: + pgd = pgd_offset_k( start ); + break; + case IO_REGION_ID: + pgd = pgd_offset_i( start ); + break; + case USER_REGION_ID: + pgd = pgd_offset( mm, start ); + context = mm->context; + break; + default: + panic("flush_tlb_range: invalid region for start (%016lx) and end (%016lx)\n", start, end); + + } + + do { + pgd_end = (start + PGDIR_SIZE) & PGDIR_MASK; + if ( pgd_end > end ) + pgd_end = end; + if ( !pgd_none( *pgd ) ) { + pmd = pmd_offset( pgd, start ); + do { + pmd_end = ( start + PMD_SIZE ) & PMD_MASK; + if ( pmd_end > end ) + pmd_end = end; + if ( !pmd_none( *pmd ) ) { + ptep = pte_offset( pmd, start ); + do { + if ( pte_val(*ptep) & _PAGE_HASHPTE ) { + pte = __pte(pte_update(ptep, _PAGE_HPTEFLAGS, 0)); + if ( pte_val(pte) & _PAGE_HASHPTE ) + flush_hash_page( context, start, pte ); + } + start += PAGE_SIZE; + ++ptep; + } while ( start < pmd_end ); + } + else + start = pmd_end; + ++pmd; + } while ( start < pgd_end ); + } + else + start = pgd_end; + ++pgd; + } while ( start < end ); +} + + +void __init free_initmem(void) +{ + unsigned long a; + unsigned long num_freed_pages = 0; +#define FREESEC(START,END,CNT) do { \ + a = (unsigned long)(&START); \ + for (; a < (unsigned long)(&END); a += PAGE_SIZE) { \ + clear_bit(PG_reserved, &mem_map[MAP_NR(a)].flags); \ + set_page_count(mem_map+MAP_NR(a), 1); \ + free_page(a); \ + CNT++; \ + } \ +} while (0) + + FREESEC(__init_begin,__init_end,num_freed_pages); + + printk ("Freeing unused kernel memory: %ldk init\n", + PGTOKB(num_freed_pages)); +} + +#ifdef CONFIG_BLK_DEV_INITRD +void free_initrd_mem(unsigned long start, unsigned long end) +{ + for (; start < end; start += PAGE_SIZE) { + ClearPageReserved(mem_map + MAP_NR(start)); + set_page_count(mem_map+MAP_NR(start), 1); + free_page(start); + totalram_pages++; + } + printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); +} +#endif + + + +/* + * Do very early mm setup. + */ +void __init mm_init_ppc64(void) { + struct Paca *paca; + unsigned long guard_page, index; + + ppc_md.progress("MM:init", 0); + + /* Reserve all contexts < FIRST_USER_CONTEXT for kernel use. + * The range of contexts [FIRST_USER_CONTEXT, NUM_USER_CONTEXT) + * are stored on a stack/queue for easy allocation and deallocation. + */ + mmu_context_queue.lock = SPIN_LOCK_UNLOCKED; + mmu_context_queue.head = 0; + mmu_context_queue.tail = NUM_USER_CONTEXT-1; + mmu_context_queue.size = NUM_USER_CONTEXT; + for(index=0; index < NUM_USER_CONTEXT ;index++) { + mmu_context_queue.elements[index] = index+FIRST_USER_CONTEXT; + } + + /* Setup guard pages for the Paca's */ + for (index = 0; index < NR_CPUS; index++) { + paca = &xPaca[index]; + guard_page = ((unsigned long)paca) + 0x1000; + ppc_md.hpte_updateboltedpp(PP_RXRX, guard_page); + } + + ppc_md.progress("MM:exit", 0x211); +} + + + +/* + * Initialize the bootmem system and give it all the memory we + * have available. + */ +void __init do_init_bootmem(void) +{ + unsigned long i; + unsigned long start, bootmap_pages; + unsigned long total_pages = lmb_end_of_DRAM() >> PAGE_SHIFT; + + PPCDBG(PPCDBG_MMINIT, "do_init_bootmem: start\n"); + /* + * Find an area to use for the bootmem bitmap. Calculate the size of + * bitmap required as (Total Memory) / PAGE_SIZE / BITS_PER_BYTE. + * Add 1 additional page in case the address isn't page-aligned. + */ + bootmap_pages = bootmem_bootmap_pages(total_pages); + + start = (unsigned long)__a2p(lmb_alloc(bootmap_pages<physicalMemorySize); + + boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages); + PPCDBG(PPCDBG_MMINIT, "\tboot_mapsize = 0x%lx\n", boot_mapsize); + + /* add all physical memory to the bootmem map */ + for (i=0; i < lmb.memory.cnt ;i++) { + unsigned long physbase = lmb.memory.region[i].physbase; + unsigned long size = lmb.memory.region[i].size; + free_bootmem(physbase, size); + } + /* reserve the sections we're already using */ + for (i=0; i < lmb.reserved.cnt ;i++) { + unsigned long physbase = lmb.reserved.region[i].physbase; + unsigned long size = lmb.reserved.region[i].size; +#if 0 /* PPPBBB */ + if ( (physbase == 0) && (size < (16<<20)) ) { + size = 16 << 20; + } +#endif + reserve_bootmem(physbase, size); + } + + PPCDBG(PPCDBG_MMINIT, "do_init_bootmem: end\n"); +} + +/* + * paging_init() sets up the page tables - in fact we've already done this. + */ +void __init paging_init(void) +{ + unsigned long zones_size[MAX_NR_ZONES], i; + + /* + * All pages are DMA-able so we put them all in the DMA zone. + */ + zones_size[0] = lmb_end_of_DRAM() >> PAGE_SHIFT; + for (i = 1; i < MAX_NR_ZONES; i++) + zones_size[i] = 0; + free_area_init(zones_size); +} + +extern unsigned long prof_shift; +extern unsigned long prof_len; +extern unsigned int * prof_buffer; +extern unsigned long dprof_shift; +extern unsigned long dprof_len; +extern unsigned int * dprof_buffer; + +void __init mem_init(void) +{ + extern char *sysmap; + extern unsigned long sysmap_size; + unsigned long addr; + int codepages = 0; + int datapages = 0; + int initpages = 0; + unsigned long va_rtas_base = (unsigned long)__va(rtas.base); + max_mapnr = max_low_pfn; + high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); + num_physpages = max_mapnr; /* RAM is assumed contiguous */ + + totalram_pages += free_all_bootmem(); + + ifppcdebug(PPCDBG_MMINIT) { + udbg_printf("mem_init: totalram_pages = 0x%lx\n", totalram_pages); + udbg_printf("mem_init: va_rtas_base = 0x%lx\n", va_rtas_base); + udbg_printf("mem_init: va_rtas_end = 0x%lx\n", PAGE_ALIGN(va_rtas_base+rtas.size)); + udbg_printf("mem_init: pinned start = 0x%lx\n", __va(0)); + udbg_printf("mem_init: pinned end = 0x%lx\n", PAGE_ALIGN(klimit)); + } + + if ( sysmap_size ) + for (addr = (unsigned long)sysmap; + addr < PAGE_ALIGN((unsigned long)sysmap+sysmap_size) ; + addr += PAGE_SIZE) + SetPageReserved(mem_map + MAP_NR(addr)); + + for (addr = KERNELBASE; addr <= (unsigned long)__va(lmb_end_of_DRAM()); + addr += PAGE_SIZE) { + if (!PageReserved(mem_map + MAP_NR(addr))) + continue; + if (addr < (ulong) etext) + codepages++; + + else if (addr >= (unsigned long)&__init_begin + && addr < (unsigned long)&__init_end) + initpages++; + else if (addr < klimit) + datapages++; + } + + printk("Memory: %luk available (%dk kernel code, %dk data, %dk init) [%08lx,%08lx]\n", + (unsigned long)nr_free_pages()<< (PAGE_SHIFT-10), + codepages<< (PAGE_SHIFT-10), datapages<< (PAGE_SHIFT-10), + initpages<< (PAGE_SHIFT-10), + PAGE_OFFSET, (unsigned long)__va(lmb_end_of_DRAM())); + mem_init_done = 1; + + /* set the last page of each hardware interrupt stack to be protected */ + initialize_paca_hardware_interrupt_stack(); + +#ifdef CONFIG_PPC_ISERIES + create_virtual_bus_tce_table(); + /* HACK HACK This allows the iSeries profiling to use /proc/profile */ + prof_shift = dprof_shift; + prof_len = dprof_len; + prof_buffer = dprof_buffer; +#endif +} + + + +/* + * This is called when a page has been modified by the kernel. + * It just marks the page as not i-cache clean. We do the i-cache + * flush later when the page is given to a user process, if necessary. + */ +void flush_dcache_page(struct page *page) +{ + clear_bit(PG_arch_1, &page->flags); +} + +/* + * set_pte stores a linux PTE into the linux page table. + * On machines which use an MMU hash table we avoid changing the + * _PAGE_HASHPTE bit. + * If the new PTE has _PAGE_EXEC set, meaning that the user wants + * to be able to execute out of the page, we check if the page is + * i-cache dirty and flush it if so, and mark it clean. + */ +void set_pte(pte_t *ptep, pte_t pte) +{ + pte_update(ptep, ~_PAGE_HPTEFLAGS, pte_val(pte) & ~_PAGE_HPTEFLAGS); + if (mem_init_done && (pte_val(pte) & _PAGE_EXEC) + && pte_pagenr(pte) < max_mapnr) { + struct page *page = pte_page(pte); + if (!test_bit(PG_arch_1, &page->flags)) { + __flush_dcache_icache((unsigned long)page_address(page)); + set_bit(PG_arch_1, &page->flags); + } + } +} + +void clear_user_page(struct page *page, unsigned long vaddr) +{ + prefetchw(&page->flags); + clear_mem_page(page); + clear_bit(PG_arch_1, &page->flags); +} + +void copy_user_page(struct page *to, struct page *from, unsigned long vaddr) +{ + prefetchw(&to->flags); + copy_mem_page(to, from); + clear_bit(PG_arch_1, &to->flags); +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/vmlinux.lds linuxppc64_2_4/arch/ppc64/vmlinux.lds --- ../kernel.org/linux/arch/ppc64/vmlinux.lds Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/vmlinux.lds Tue Sep 11 08:55:40 2001 @@ -0,0 +1,140 @@ +OUTPUT_ARCH(powerpc) +SEARCH_DIR(/lib); SEARCH_DIR(/usr/lib); SEARCH_DIR(/usr/local/lib); SEARCH_DIR(/usr/local/powerpc-any-elf/lib); +/* Do we need any of these for elf? + __DYNAMIC = 0; */ +SECTIONS +{ + /* Read-only sections, merged into text segment: */ + . = + SIZEOF_HEADERS; + .interp : { *(.interp) } + .hash : { *(.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .rel.text : { *(.rel.text) } + .rela.text : { *(.rela.text) } + .rel.data : { *(.rel.data) } + .rela.data : { *(.rela.data) } + .rel.rodata : { *(.rel.rodata) } + .rela.rodata : { *(.rela.rodata) } + .rel.got : { *(.rel.got) } + .rela.got : { *(.rela.got) } + .rel.ctors : { *(.rel.ctors) } + .rela.ctors : { *(.rela.ctors) } + .rel.dtors : { *(.rel.dtors) } + .rela.dtors : { *(.rela.dtors) } + .rel.bss : { *(.rel.bss) } + .rela.bss : { *(.rela.bss) } + .rel.plt : { *(.rel.plt) } + .rela.plt : { *(.rela.plt) } +/* .init : { *(.init) } =0*/ + .plt : { *(.plt) } + .text : + { + *(.text) + *(.fixup) + *(.got1) + } + . = ALIGN(4096); + _etext = .; + PROVIDE (etext = .); + .rodata : + { + *(.rodata) + *(.rodata1) + } + .fini : { *(.fini) } =0 + .ctors : { *(.ctors) } + .dtors : { *(.dtors) } + /* Read-write section, merged into data segment: */ + . = (. + 0x0FFF) & 0xFFFFFFFFFFFFF000; + .data : + { + *(.data) + *(.data1) + *(.sdata) + *(.sdata2) + *(.got.plt) *(.got) + *(.dynamic) + CONSTRUCTORS + } + . = ALIGN(4096); + _edata = .; + PROVIDE (edata = .); + + .fixup : { *(.fixup) } + __start___ex_table = .; + __ex_table : { *(__ex_table) } + __stop___ex_table = .; + + __start___ksymtab = .; /* Kernel symbol table */ + __ksymtab : { *(__ksymtab) } + __stop___ksymtab = .; + __start___kallsyms = .; /* All kernel symbols */ + __kallsyms : { *(__kallsyms) } + __stop___kallsyms = .; + + + . = ALIGN(4096); + .data.page_aligned : { *(.data.page_aligned) } + + . = ALIGN(128); + .data.cacheline_aligned : { *(.data.cacheline_aligned) } + + . = ALIGN(4096); + __init_begin = .; + .text.init : { *(.text.init) } + .data.init : { + *(.data.init); + __vtop_table_begin = .; + *(.vtop_fixup); + __vtop_table_end = .; + __ptov_table_begin = .; + *(.ptov_fixup); + __ptov_table_end = .; + } + . = ALIGN(16); + __setup_start = .; + .setup.init : { *(.setup.init) } + __setup_end = .; + __initcall_start = .; + .initcall.init : { *(.initcall.init) } + __initcall_end = .; + + + . = ALIGN(4096); + __init_end = .; + + __chrp_begin = .; + .text.chrp : { *(.text.chrp) } + .data.chrp : { *(.data.chrp) } + . = ALIGN(4096); + __chrp_end = .; + + . = ALIGN(4096); + __openfirmware_begin = .; + .text.openfirmware : { *(.text.openfirmware) } + .data.openfirmware : { *(.data.openfirmware) } + . = ALIGN(4096); + __openfirmware_end = .; + + __toc_start = .; + .toc : + { + *(.toc) + } + . = ALIGN(4096); + __toc_end = .; + + __bss_start = .; + .bss : + { + *(.sbss) *(.scommon) + *(.dynbss) + *(.bss) + *(COMMON) + } + + . = ALIGN(4096); + _end = . ; + PROVIDE (end = .); +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/xmon/Makefile linuxppc64_2_4/arch/ppc64/xmon/Makefile --- ../kernel.org/linux/arch/ppc64/xmon/Makefile Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/xmon/Makefile Fri May 4 17:13:59 2001 @@ -0,0 +1,9 @@ +# Makefile for xmon + +EXTRA_CFLAGS = -mno-minimal-toc + +O_TARGET = x.o + +obj-y := start.o xmon.o ppc-dis.o ppc-opc.o subr_prf.o setjmp.o + +include $(TOPDIR)/Rules.make diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/xmon/adb.c linuxppc64_2_4/arch/ppc64/xmon/adb.c --- ../kernel.org/linux/arch/ppc64/xmon/adb.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/xmon/adb.c Mon May 7 12:48:56 2001 @@ -0,0 +1,217 @@ +/* + * Copyright (C) 1996 Paul Mackerras. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include "nonstdio.h" +#include "privinst.h" + +#define scanhex xmon_scanhex +#define skipbl xmon_skipbl + +#define ADB_B (*(volatile unsigned char *)0xf3016000) +#define ADB_SR (*(volatile unsigned char *)0xf3017400) +#define ADB_ACR (*(volatile unsigned char *)0xf3017600) +#define ADB_IFR (*(volatile unsigned char *)0xf3017a00) + +static inline void eieio(void) { asm volatile ("eieio" : :); } + +#define N_ADB_LOG 1000 +struct adb_log { + unsigned char b; + unsigned char ifr; + unsigned char acr; + unsigned int time; +} adb_log[N_ADB_LOG]; +int n_adb_log; + +void +init_adb_log(void) +{ + adb_log[0].b = ADB_B; + adb_log[0].ifr = ADB_IFR; + adb_log[0].acr = ADB_ACR; + adb_log[0].time = get_dec(); + n_adb_log = 0; +} + +void +dump_adb_log(void) +{ + unsigned t, t0; + struct adb_log *ap; + int i; + + ap = adb_log; + t0 = ap->time; + for (i = 0; i <= n_adb_log; ++i, ++ap) { + t = t0 - ap->time; + printf("b=%x ifr=%x acr=%x at %d.%.7d\n", ap->b, ap->ifr, ap->acr, + t / 1000000000, (t % 1000000000) / 100); + } +} + +void +adb_chklog(void) +{ + struct adb_log *ap = &adb_log[n_adb_log + 1]; + + ap->b = ADB_B; + ap->ifr = ADB_IFR; + ap->acr = ADB_ACR; + if (ap->b != ap[-1].b || (ap->ifr & 4) != (ap[-1].ifr & 4) + || ap->acr != ap[-1].acr) { + ap->time = get_dec(); + ++n_adb_log; + } +} + +int +adb_bitwait(int bmask, int bval, int fmask, int fval) +{ + int i; + struct adb_log *ap; + + for (i = 10000; i > 0; --i) { + adb_chklog(); + ap = &adb_log[n_adb_log]; + if ((ap->b & bmask) == bval && (ap->ifr & fmask) == fval) + return 0; + } + return -1; +} + +int +adb_wait(void) +{ + if (adb_bitwait(0, 0, 4, 4) < 0) { + printf("adb: ready wait timeout\n"); + return -1; + } + return 0; +} + +void +adb_readin(void) +{ + int i, j; + unsigned char d[64]; + + if (ADB_B & 8) { + printf("ADB_B: %x\n", ADB_B); + return; + } + i = 0; + adb_wait(); + j = ADB_SR; + eieio(); + ADB_B &= ~0x20; + eieio(); + for (;;) { + if (adb_wait() < 0) + break; + d[i++] = ADB_SR; + eieio(); + if (ADB_B & 8) + break; + ADB_B ^= 0x10; + eieio(); + } + ADB_B |= 0x30; + if (adb_wait() == 0) + j = ADB_SR; + for (j = 0; j < i; ++j) + printf("%.2x ", d[j]); + printf("\n"); +} + +int +adb_write(unsigned char *d, int i) +{ + int j; + unsigned x; + + if ((ADB_B & 8) == 0) { + printf("r: "); + adb_readin(); + } + for (;;) { + ADB_ACR = 0x1c; + eieio(); + ADB_SR = d[0]; + eieio(); + ADB_B &= ~0x20; + eieio(); + if (ADB_B & 8) + break; + ADB_ACR = 0xc; + eieio(); + ADB_B |= 0x20; + eieio(); + adb_readin(); + } + adb_wait(); + for (j = 1; j < i; ++j) { + ADB_SR = d[j]; + eieio(); + ADB_B ^= 0x10; + eieio(); + if (adb_wait() < 0) + break; + } + ADB_ACR = 0xc; + eieio(); + x = ADB_SR; + eieio(); + ADB_B |= 0x30; + return j; +} + +void +adbcmds(void) +{ + char cmd; + unsigned rtcu, rtcl, dec, pdec, x; + int i, j; + unsigned char d[64]; + + cmd = skipbl(); + switch (cmd) { + case 't': + for (;;) { + rtcl = get_rtcl(); + rtcu = get_rtcu(); + dec = get_dec(); + printf("rtc u=%u l=%u dec=%x (%d = %d.%.7d)\n", + rtcu, rtcl, dec, pdec - dec, (pdec - dec) / 1000000000, + ((pdec - dec) % 1000000000) / 100); + pdec = dec; + if (cmd == 'x') + break; + while (xmon_read(stdin, &cmd, 1) != 1) + ; + } + break; + case 'r': + init_adb_log(); + while (adb_bitwait(8, 0, 0, 0) == 0) + adb_readin(); + break; + case 'w': + i = 0; + while (scanhex(&x)) + d[i++] = x; + init_adb_log(); + j = adb_write(d, i); + printf("sent %d bytes\n", j); + while (adb_bitwait(8, 0, 0, 0) == 0) + adb_readin(); + break; + case 'l': + dump_adb_log(); + break; + } +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/xmon/ansidecl.h linuxppc64_2_4/arch/ppc64/xmon/ansidecl.h --- ../kernel.org/linux/arch/ppc64/xmon/ansidecl.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/xmon/ansidecl.h Fri May 4 17:13:59 2001 @@ -0,0 +1,141 @@ +/* ANSI and traditional C compatability macros + Copyright 1991, 1992 Free Software Foundation, Inc. + This file is part of the GNU C Library. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +/* ANSI and traditional C compatibility macros + + ANSI C is assumed if __STDC__ is #defined. + + Macro ANSI C definition Traditional C definition + ----- ---- - ---------- ----------- - ---------- + PTR `void *' `char *' + LONG_DOUBLE `long double' `double' + VOLATILE `volatile' `' + SIGNED `signed' `' + PTRCONST `void *const' `char *' + ANSI_PROTOTYPES 1 not defined + + CONST is also defined, but is obsolete. Just use const. + + DEFUN (name, arglist, args) + + Defines function NAME. + + ARGLIST lists the arguments, separated by commas and enclosed in + parentheses. ARGLIST becomes the argument list in traditional C. + + ARGS list the arguments with their types. It becomes a prototype in + ANSI C, and the type declarations in traditional C. Arguments should + be separated with `AND'. For functions with a variable number of + arguments, the last thing listed should be `DOTS'. + + DEFUN_VOID (name) + + Defines a function NAME, which takes no arguments. + + obsolete -- EXFUN (name, (prototype)) -- obsolete. + + Replaced by PARAMS. Do not use; will disappear someday soon. + Was used in external function declarations. + In ANSI C it is `NAME PROTOTYPE' (so PROTOTYPE should be enclosed in + parentheses). In traditional C it is `NAME()'. + For a function that takes no arguments, PROTOTYPE should be `(void)'. + + PARAMS ((args)) + + We could use the EXFUN macro to handle prototype declarations, but + the name is misleading and the result is ugly. So we just define a + simple macro to handle the parameter lists, as in: + + static int foo PARAMS ((int, char)); + + This produces: `static int foo();' or `static int foo (int, char);' + + EXFUN would have done it like this: + + static int EXFUN (foo, (int, char)); + + but the function is not external...and it's hard to visually parse + the function name out of the mess. EXFUN should be considered + obsolete; new code should be written to use PARAMS. + + For example: + extern int printf PARAMS ((CONST char *format DOTS)); + int DEFUN(fprintf, (stream, format), + FILE *stream AND CONST char *format DOTS) { ... } + void DEFUN_VOID(abort) { ... } +*/ + +#ifndef _ANSIDECL_H + +#define _ANSIDECL_H 1 + + +/* Every source file includes this file, + so they will all get the switch for lint. */ +/* LINTLIBRARY */ + + +#if defined (__STDC__) || defined (_AIX) || (defined (__mips) && defined (_SYSTYPE_SVR4)) || defined(WIN32) +/* All known AIX compilers implement these things (but don't always + define __STDC__). The RISC/OS MIPS compiler defines these things + in SVR4 mode, but does not define __STDC__. */ + +#define PTR void * +#define PTRCONST void *CONST +#define LONG_DOUBLE long double + +#define AND , +#define NOARGS void +#define CONST const +#define VOLATILE volatile +#define SIGNED signed +#define DOTS , ... + +#define EXFUN(name, proto) name proto +#define DEFUN(name, arglist, args) name(args) +#define DEFUN_VOID(name) name(void) + +#define PROTO(type, name, arglist) type name arglist +#define PARAMS(paramlist) paramlist +#define ANSI_PROTOTYPES 1 + +#else /* Not ANSI C. */ + +#define PTR char * +#define PTRCONST PTR +#define LONG_DOUBLE double + +#define AND ; +#define NOARGS +#define CONST +#ifndef const /* some systems define it in header files for non-ansi mode */ +#define const +#endif +#define VOLATILE +#define SIGNED +#define DOTS + +#define EXFUN(name, proto) name() +#define DEFUN(name, arglist, args) name arglist args; +#define DEFUN_VOID(name) name() +#define PROTO(type, name, arglist) type name () +#define PARAMS(paramlist) () + +#endif /* ANSI C. */ + +#endif /* ansidecl.h */ diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/xmon/nonstdio.h linuxppc64_2_4/arch/ppc64/xmon/nonstdio.h --- ../kernel.org/linux/arch/ppc64/xmon/nonstdio.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/xmon/nonstdio.h Fri May 4 17:13:59 2001 @@ -0,0 +1,22 @@ +typedef int FILE; +extern FILE *xmon_stdin, *xmon_stdout; +#define EOF (-1) +#define stdin xmon_stdin +#define stdout xmon_stdout +#define printf xmon_printf +#define fprintf xmon_fprintf +#define fputs xmon_fputs +#define fgets xmon_fgets +#define putchar xmon_putchar +#define getchar xmon_getchar +#define putc xmon_putc +#define getc xmon_getc +#define fopen(n, m) NULL +#define fflush(f) do {} while (0) +#define fclose(f) do {} while (0) +extern char *fgets(char *, int, void *); +extern void xmon_printf(const char *, ...); +extern void xmon_fprintf(void *, const char *, ...); +extern void xmon_sprintf(char *, const char *, ...); + +#define perror(s) printf("%s: no files!\n", (s)) diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/xmon/ppc-dis.c linuxppc64_2_4/arch/ppc64/xmon/ppc-dis.c --- ../kernel.org/linux/arch/ppc64/xmon/ppc-dis.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/xmon/ppc-dis.c Fri May 4 17:13:59 2001 @@ -0,0 +1,190 @@ +/* ppc-dis.c -- Disassemble PowerPC instructions + Copyright 1994 Free Software Foundation, Inc. + Written by Ian Lance Taylor, Cygnus Support + +This file is part of GDB, GAS, and the GNU binutils. + +GDB, GAS, and the GNU binutils are free software; you can redistribute +them and/or modify them under the terms of the GNU General Public +License as published by the Free Software Foundation; either version +2, or (at your option) any later version. + +GDB, GAS, and the GNU binutils are distributed in the hope that they +will be useful, but WITHOUT ANY WARRANTY; without even the implied +warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See +the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this file; see the file COPYING. If not, write to the Free +Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#include "nonstdio.h" +#include "ansidecl.h" +#include "ppc.h" + +static int print_insn_powerpc PARAMS ((FILE *, unsigned long insn, + unsigned long memaddr, int dialect)); + +extern void print_address PARAMS((unsigned long memaddr)); + +/* Print a big endian PowerPC instruction. For convenience, also + disassemble instructions supported by the Motorola PowerPC 601. */ + +int +print_insn_big_powerpc (FILE *out, unsigned long insn, unsigned long memaddr) +{ + return print_insn_powerpc (out, insn, memaddr, + PPC_OPCODE_PPC | PPC_OPCODE_601); +} + +/* Print a PowerPC or POWER instruction. */ + +static int +print_insn_powerpc (FILE *out, unsigned long insn, unsigned long memaddr, + int dialect) +{ + const struct powerpc_opcode *opcode; + const struct powerpc_opcode *opcode_end; + unsigned long op; + + /* Get the major opcode of the instruction. */ + op = PPC_OP (insn); + + /* Find the first match in the opcode table. We could speed this up + a bit by doing a binary search on the major opcode. */ + opcode_end = powerpc_opcodes + powerpc_num_opcodes; + for (opcode = powerpc_opcodes; opcode < opcode_end; opcode++) + { + unsigned long table_op; + const unsigned char *opindex; + const struct powerpc_operand *operand; + int invalid; + int need_comma; + int need_paren; + + table_op = PPC_OP (opcode->opcode); + if (op < table_op) + break; + if (op > table_op) + continue; + + if ((insn & opcode->mask) != opcode->opcode + || (opcode->flags & dialect) == 0) + continue; + + /* Make two passes over the operands. First see if any of them + have extraction functions, and, if they do, make sure the + instruction is valid. */ + invalid = 0; + for (opindex = opcode->operands; *opindex != 0; opindex++) + { + operand = powerpc_operands + *opindex; + if (operand->extract) + (*operand->extract) (insn, &invalid); + } + if (invalid) + continue; + + /* The instruction is valid. */ + fprintf(out, "%s", opcode->name); + if (opcode->operands[0] != 0) + fprintf(out, "\t"); + + /* Now extract and print the operands. */ + need_comma = 0; + need_paren = 0; + for (opindex = opcode->operands; *opindex != 0; opindex++) + { + long value; + + operand = powerpc_operands + *opindex; + + /* Operands that are marked FAKE are simply ignored. We + already made sure that the extract function considered + the instruction to be valid. */ + if ((operand->flags & PPC_OPERAND_FAKE) != 0) + continue; + + /* Extract the value from the instruction. */ + if (operand->extract) + value = (*operand->extract) (insn, (int *) 0); + else + { + value = (insn >> operand->shift) & ((1 << operand->bits) - 1); + if ((operand->flags & PPC_OPERAND_SIGNED) != 0 + && (value & (1 << (operand->bits - 1))) != 0) + value -= 1 << operand->bits; + } + + /* If the operand is optional, and the value is zero, don't + print anything. */ + if ((operand->flags & PPC_OPERAND_OPTIONAL) != 0 + && (operand->flags & PPC_OPERAND_NEXT) == 0 + && value == 0) + continue; + + if (need_comma) + { + fprintf(out, ","); + need_comma = 0; + } + + /* Print the operand as directed by the flags. */ + if ((operand->flags & PPC_OPERAND_GPR) != 0) + fprintf(out, "r%ld", value); + else if ((operand->flags & PPC_OPERAND_FPR) != 0) + fprintf(out, "f%ld", value); + else if ((operand->flags & PPC_OPERAND_RELATIVE) != 0) + print_address (memaddr + value); + else if ((operand->flags & PPC_OPERAND_ABSOLUTE) != 0) + print_address (value & 0xffffffff); + else if ((operand->flags & PPC_OPERAND_CR) == 0 + || (dialect & PPC_OPCODE_PPC) == 0) + fprintf(out, "%ld", value); + else + { + if (operand->bits == 3) + fprintf(out, "cr%d", value); + else + { + static const char *cbnames[4] = { "lt", "gt", "eq", "so" }; + int cr; + int cc; + + cr = value >> 2; + if (cr != 0) + fprintf(out, "4*cr%d", cr); + cc = value & 3; + if (cc != 0) + { + if (cr != 0) + fprintf(out, "+"); + fprintf(out, "%s", cbnames[cc]); + } + } + } + + if (need_paren) + { + fprintf(out, ")"); + need_paren = 0; + } + + if ((operand->flags & PPC_OPERAND_PARENS) == 0) + need_comma = 1; + else + { + fprintf(out, "("); + need_paren = 1; + } + } + + /* We have found and printed an instruction; return. */ + return 4; + } + + /* We could not find a match. */ + fprintf(out, ".long 0x%lx", insn); + + return 4; +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/xmon/ppc-opc.c linuxppc64_2_4/arch/ppc64/xmon/ppc-opc.c --- ../kernel.org/linux/arch/ppc64/xmon/ppc-opc.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/xmon/ppc-opc.c Fri May 4 17:13:59 2001 @@ -0,0 +1,2816 @@ +/* ppc-opc.c -- PowerPC opcode list + Copyright 1994 Free Software Foundation, Inc. + Written by Ian Lance Taylor, Cygnus Support + +This file is part of GDB, GAS, and the GNU binutils. + +GDB, GAS, and the GNU binutils are free software; you can redistribute +them and/or modify them under the terms of the GNU General Public +License as published by the Free Software Foundation; either version +2, or (at your option) any later version. + +GDB, GAS, and the GNU binutils are distributed in the hope that they +will be useful, but WITHOUT ANY WARRANTY; without even the implied +warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See +the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this file; see the file COPYING. If not, write to the Free +Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#include +#include "ansidecl.h" +#include "ppc.h" + +/* This file holds the PowerPC opcode table. The opcode table + includes almost all of the extended instruction mnemonics. This + permits the disassembler to use them, and simplifies the assembler + logic, at the cost of increasing the table size. The table is + strictly constant data, so the compiler should be able to put it in + the .text section. + + This file also holds the operand table. All knowledge about + inserting operands into instructions and vice-versa is kept in this + file. */ + +/* Local insertion and extraction functions. */ + +static unsigned long insert_bat PARAMS ((unsigned long, long, const char **)); +static long extract_bat PARAMS ((unsigned long, int *)); +static unsigned long insert_bba PARAMS ((unsigned long, long, const char **)); +static long extract_bba PARAMS ((unsigned long, int *)); +static unsigned long insert_bd PARAMS ((unsigned long, long, const char **)); +static long extract_bd PARAMS ((unsigned long, int *)); +static unsigned long insert_bdm PARAMS ((unsigned long, long, const char **)); +static long extract_bdm PARAMS ((unsigned long, int *)); +static unsigned long insert_bdp PARAMS ((unsigned long, long, const char **)); +static long extract_bdp PARAMS ((unsigned long, int *)); +static unsigned long insert_bo PARAMS ((unsigned long, long, const char **)); +static long extract_bo PARAMS ((unsigned long, int *)); +static unsigned long insert_boe PARAMS ((unsigned long, long, const char **)); +static long extract_boe PARAMS ((unsigned long, int *)); +static unsigned long insert_ds PARAMS ((unsigned long, long, const char **)); +static long extract_ds PARAMS ((unsigned long, int *)); +static unsigned long insert_li PARAMS ((unsigned long, long, const char **)); +static long extract_li PARAMS ((unsigned long, int *)); +static unsigned long insert_mbe PARAMS ((unsigned long, long, const char **)); +static long extract_mbe PARAMS ((unsigned long, int *)); +static unsigned long insert_mb6 PARAMS ((unsigned long, long, const char **)); +static long extract_mb6 PARAMS ((unsigned long, int *)); +static unsigned long insert_nb PARAMS ((unsigned long, long, const char **)); +static long extract_nb PARAMS ((unsigned long, int *)); +static unsigned long insert_nsi PARAMS ((unsigned long, long, const char **)); +static long extract_nsi PARAMS ((unsigned long, int *)); +static unsigned long insert_ral PARAMS ((unsigned long, long, const char **)); +static unsigned long insert_ram PARAMS ((unsigned long, long, const char **)); +static unsigned long insert_ras PARAMS ((unsigned long, long, const char **)); +static unsigned long insert_rbs PARAMS ((unsigned long, long, const char **)); +static long extract_rbs PARAMS ((unsigned long, int *)); +static unsigned long insert_sh6 PARAMS ((unsigned long, long, const char **)); +static long extract_sh6 PARAMS ((unsigned long, int *)); +static unsigned long insert_spr PARAMS ((unsigned long, long, const char **)); +static long extract_spr PARAMS ((unsigned long, int *)); +static unsigned long insert_tbr PARAMS ((unsigned long, long, const char **)); +static long extract_tbr PARAMS ((unsigned long, int *)); + +/* The operands table. + + The fields are bits, shift, signed, insert, extract, flags. */ + +const struct powerpc_operand powerpc_operands[] = +{ + /* The zero index is used to indicate the end of the list of + operands. */ +#define UNUSED (0) + { 0, 0, 0, 0, 0 }, + + /* The BA field in an XL form instruction. */ +#define BA (1) +#define BA_MASK (0x1f << 16) + { 5, 16, 0, 0, PPC_OPERAND_CR }, + + /* The BA field in an XL form instruction when it must be the same + as the BT field in the same instruction. */ +#define BAT (2) + { 5, 16, insert_bat, extract_bat, PPC_OPERAND_FAKE }, + + /* The BB field in an XL form instruction. */ +#define BB (3) +#define BB_MASK (0x1f << 11) + { 5, 11, 0, 0, PPC_OPERAND_CR }, + + /* The BB field in an XL form instruction when it must be the same + as the BA field in the same instruction. */ +#define BBA (4) + { 5, 11, insert_bba, extract_bba, PPC_OPERAND_FAKE }, + + /* The BD field in a B form instruction. The lower two bits are + forced to zero. */ +#define BD (5) + { 16, 0, insert_bd, extract_bd, PPC_OPERAND_RELATIVE | PPC_OPERAND_SIGNED }, + + /* The BD field in a B form instruction when absolute addressing is + used. */ +#define BDA (6) + { 16, 0, insert_bd, extract_bd, PPC_OPERAND_ABSOLUTE | PPC_OPERAND_SIGNED }, + + /* The BD field in a B form instruction when the - modifier is used. + This sets the y bit of the BO field appropriately. */ +#define BDM (7) + { 16, 0, insert_bdm, extract_bdm, + PPC_OPERAND_RELATIVE | PPC_OPERAND_SIGNED }, + + /* The BD field in a B form instruction when the - modifier is used + and absolute address is used. */ +#define BDMA (8) + { 16, 0, insert_bdm, extract_bdm, + PPC_OPERAND_ABSOLUTE | PPC_OPERAND_SIGNED }, + + /* The BD field in a B form instruction when the + modifier is used. + This sets the y bit of the BO field appropriately. */ +#define BDP (9) + { 16, 0, insert_bdp, extract_bdp, + PPC_OPERAND_RELATIVE | PPC_OPERAND_SIGNED }, + + /* The BD field in a B form instruction when the + modifier is used + and absolute addressing is used. */ +#define BDPA (10) + { 16, 0, insert_bdp, extract_bdp, + PPC_OPERAND_ABSOLUTE | PPC_OPERAND_SIGNED }, + + /* The BF field in an X or XL form instruction. */ +#define BF (11) + { 3, 23, 0, 0, PPC_OPERAND_CR }, + + /* An optional BF field. This is used for comparison instructions, + in which an omitted BF field is taken as zero. */ +#define OBF (12) + { 3, 23, 0, 0, PPC_OPERAND_CR | PPC_OPERAND_OPTIONAL }, + + /* The BFA field in an X or XL form instruction. */ +#define BFA (13) + { 3, 18, 0, 0, PPC_OPERAND_CR }, + + /* The BI field in a B form or XL form instruction. */ +#define BI (14) +#define BI_MASK (0x1f << 16) + { 5, 16, 0, 0, PPC_OPERAND_CR }, + + /* The BO field in a B form instruction. Certain values are + illegal. */ +#define BO (15) +#define BO_MASK (0x1f << 21) + { 5, 21, insert_bo, extract_bo, 0 }, + + /* The BO field in a B form instruction when the + or - modifier is + used. This is like the BO field, but it must be even. */ +#define BOE (16) + { 5, 21, insert_boe, extract_boe, 0 }, + + /* The BT field in an X or XL form instruction. */ +#define BT (17) + { 5, 21, 0, 0, PPC_OPERAND_CR }, + + /* The condition register number portion of the BI field in a B form + or XL form instruction. This is used for the extended + conditional branch mnemonics, which set the lower two bits of the + BI field. This field is optional. */ +#define CR (18) + { 3, 18, 0, 0, PPC_OPERAND_CR | PPC_OPERAND_OPTIONAL }, + + /* The D field in a D form instruction. This is a displacement off + a register, and implies that the next operand is a register in + parentheses. */ +#define D (19) + { 16, 0, 0, 0, PPC_OPERAND_PARENS | PPC_OPERAND_SIGNED }, + + /* The DS field in a DS form instruction. This is like D, but the + lower two bits are forced to zero. */ +#define DS (20) + { 16, 0, insert_ds, extract_ds, PPC_OPERAND_PARENS | PPC_OPERAND_SIGNED }, + + /* The FL1 field in a POWER SC form instruction. */ +#define FL1 (21) + { 4, 12, 0, 0, 0 }, + + /* The FL2 field in a POWER SC form instruction. */ +#define FL2 (22) + { 3, 2, 0, 0, 0 }, + + /* The FLM field in an XFL form instruction. */ +#define FLM (23) + { 8, 17, 0, 0, 0 }, + + /* The FRA field in an X or A form instruction. */ +#define FRA (24) +#define FRA_MASK (0x1f << 16) + { 5, 16, 0, 0, PPC_OPERAND_FPR }, + + /* The FRB field in an X or A form instruction. */ +#define FRB (25) +#define FRB_MASK (0x1f << 11) + { 5, 11, 0, 0, PPC_OPERAND_FPR }, + + /* The FRC field in an A form instruction. */ +#define FRC (26) +#define FRC_MASK (0x1f << 6) + { 5, 6, 0, 0, PPC_OPERAND_FPR }, + + /* The FRS field in an X form instruction or the FRT field in a D, X + or A form instruction. */ +#define FRS (27) +#define FRT (FRS) + { 5, 21, 0, 0, PPC_OPERAND_FPR }, + + /* The FXM field in an XFX instruction. */ +#define FXM (28) +#define FXM_MASK (0xff << 12) + { 8, 12, 0, 0, 0 }, + + /* The L field in a D or X form instruction. */ +#define L (29) + { 1, 21, 0, 0, PPC_OPERAND_OPTIONAL }, + + /* The LEV field in a POWER SC form instruction. */ +#define LEV (30) + { 7, 5, 0, 0, 0 }, + + /* The LI field in an I form instruction. The lower two bits are + forced to zero. */ +#define LI (31) + { 26, 0, insert_li, extract_li, PPC_OPERAND_RELATIVE | PPC_OPERAND_SIGNED }, + + /* The LI field in an I form instruction when used as an absolute + address. */ +#define LIA (32) + { 26, 0, insert_li, extract_li, PPC_OPERAND_ABSOLUTE | PPC_OPERAND_SIGNED }, + + /* The MB field in an M form instruction. */ +#define MB (33) +#define MB_MASK (0x1f << 6) + { 5, 6, 0, 0, 0 }, + + /* The ME field in an M form instruction. */ +#define ME (34) +#define ME_MASK (0x1f << 1) + { 5, 1, 0, 0, 0 }, + + /* The MB and ME fields in an M form instruction expressed a single + operand which is a bitmask indicating which bits to select. This + is a two operand form using PPC_OPERAND_NEXT. See the + description in opcode/ppc.h for what this means. */ +#define MBE (35) + { 5, 6, 0, 0, PPC_OPERAND_OPTIONAL | PPC_OPERAND_NEXT }, + { 32, 0, insert_mbe, extract_mbe, 0 }, + + /* The MB or ME field in an MD or MDS form instruction. The high + bit is wrapped to the low end. */ +#define MB6 (37) +#define ME6 (MB6) +#define MB6_MASK (0x3f << 5) + { 6, 5, insert_mb6, extract_mb6, 0 }, + + /* The NB field in an X form instruction. The value 32 is stored as + 0. */ +#define NB (38) + { 6, 11, insert_nb, extract_nb, 0 }, + + /* The NSI field in a D form instruction. This is the same as the + SI field, only negated. */ +#define NSI (39) + { 16, 0, insert_nsi, extract_nsi, + PPC_OPERAND_NEGATIVE | PPC_OPERAND_SIGNED }, + + /* The RA field in an D, DS, X, XO, M, or MDS form instruction. */ +#define RA (40) +#define RA_MASK (0x1f << 16) + { 5, 16, 0, 0, PPC_OPERAND_GPR }, + + /* The RA field in a D or X form instruction which is an updating + load, which means that the RA field may not be zero and may not + equal the RT field. */ +#define RAL (41) + { 5, 16, insert_ral, 0, PPC_OPERAND_GPR }, + + /* The RA field in an lmw instruction, which has special value + restrictions. */ +#define RAM (42) + { 5, 16, insert_ram, 0, PPC_OPERAND_GPR }, + + /* The RA field in a D or X form instruction which is an updating + store or an updating floating point load, which means that the RA + field may not be zero. */ +#define RAS (43) + { 5, 16, insert_ras, 0, PPC_OPERAND_GPR }, + + /* The RB field in an X, XO, M, or MDS form instruction. */ +#define RB (44) +#define RB_MASK (0x1f << 11) + { 5, 11, 0, 0, PPC_OPERAND_GPR }, + + /* The RB field in an X form instruction when it must be the same as + the RS field in the instruction. This is used for extended + mnemonics like mr. */ +#define RBS (45) + { 5, 1, insert_rbs, extract_rbs, PPC_OPERAND_FAKE }, + + /* The RS field in a D, DS, X, XFX, XS, M, MD or MDS form + instruction or the RT field in a D, DS, X, XFX or XO form + instruction. */ +#define RS (46) +#define RT (RS) +#define RT_MASK (0x1f << 21) + { 5, 21, 0, 0, PPC_OPERAND_GPR }, + + /* The SH field in an X or M form instruction. */ +#define SH (47) +#define SH_MASK (0x1f << 11) + { 5, 11, 0, 0, 0 }, + + /* The SH field in an MD form instruction. This is split. */ +#define SH6 (48) +#define SH6_MASK ((0x1f << 11) | (1 << 1)) + { 6, 1, insert_sh6, extract_sh6, 0 }, + + /* The SI field in a D form instruction. */ +#define SI (49) + { 16, 0, 0, 0, PPC_OPERAND_SIGNED }, + + /* The SI field in a D form instruction when we accept a wide range + of positive values. */ +#define SISIGNOPT (50) + { 16, 0, 0, 0, PPC_OPERAND_SIGNED | PPC_OPERAND_SIGNOPT }, + + /* The SPR field in an XFX form instruction. This is flipped--the + lower 5 bits are stored in the upper 5 and vice- versa. */ +#define SPR (51) +#define SPR_MASK (0x3ff << 11) + { 10, 11, insert_spr, extract_spr, 0 }, + + /* The BAT index number in an XFX form m[ft]ibat[lu] instruction. */ +#define SPRBAT (52) +#define SPRBAT_MASK (0x3 << 17) + { 2, 17, 0, 0, 0 }, + + /* The SPRG register number in an XFX form m[ft]sprg instruction. */ +#define SPRG (53) +#define SPRG_MASK (0x3 << 16) + { 2, 16, 0, 0, 0 }, + + /* The SR field in an X form instruction. */ +#define SR (54) + { 4, 16, 0, 0, 0 }, + + /* The SV field in a POWER SC form instruction. */ +#define SV (55) + { 14, 2, 0, 0, 0 }, + + /* The TBR field in an XFX form instruction. This is like the SPR + field, but it is optional. */ +#define TBR (56) + { 10, 11, insert_tbr, extract_tbr, PPC_OPERAND_OPTIONAL }, + + /* The TO field in a D or X form instruction. */ +#define TO (57) +#define TO_MASK (0x1f << 21) + { 5, 21, 0, 0, 0 }, + + /* The U field in an X form instruction. */ +#define U (58) + { 4, 12, 0, 0, 0 }, + + /* The UI field in a D form instruction. */ +#define UI (59) + { 16, 0, 0, 0, 0 }, +}; + +/* The functions used to insert and extract complicated operands. */ + +/* The BA field in an XL form instruction when it must be the same as + the BT field in the same instruction. This operand is marked FAKE. + The insertion function just copies the BT field into the BA field, + and the extraction function just checks that the fields are the + same. */ + +/*ARGSUSED*/ +static unsigned long +insert_bat (insn, value, errmsg) + unsigned long insn; + long value; + const char **errmsg; +{ + return insn | (((insn >> 21) & 0x1f) << 16); +} + +static long +extract_bat (insn, invalid) + unsigned long insn; + int *invalid; +{ + if (invalid != (int *) NULL + && ((insn >> 21) & 0x1f) != ((insn >> 16) & 0x1f)) + *invalid = 1; + return 0; +} + +/* The BB field in an XL form instruction when it must be the same as + the BA field in the same instruction. This operand is marked FAKE. + The insertion function just copies the BA field into the BB field, + and the extraction function just checks that the fields are the + same. */ + +/*ARGSUSED*/ +static unsigned long +insert_bba (insn, value, errmsg) + unsigned long insn; + long value; + const char **errmsg; +{ + return insn | (((insn >> 16) & 0x1f) << 11); +} + +static long +extract_bba (insn, invalid) + unsigned long insn; + int *invalid; +{ + if (invalid != (int *) NULL + && ((insn >> 16) & 0x1f) != ((insn >> 11) & 0x1f)) + *invalid = 1; + return 0; +} + +/* The BD field in a B form instruction. The lower two bits are + forced to zero. */ + +/*ARGSUSED*/ +static unsigned long +insert_bd (insn, value, errmsg) + unsigned long insn; + long value; + const char **errmsg; +{ + return insn | (value & 0xfffc); +} + +/*ARGSUSED*/ +static long +extract_bd (insn, invalid) + unsigned long insn; + int *invalid; +{ + if ((insn & 0x8000) != 0) + return (insn & 0xfffc) - 0x10000; + else + return insn & 0xfffc; +} + +/* The BD field in a B form instruction when the - modifier is used. + This modifier means that the branch is not expected to be taken. + We must set the y bit of the BO field to 1 if the offset is + negative. When extracting, we require that the y bit be 1 and that + the offset be positive, since if the y bit is 0 we just want to + print the normal form of the instruction. */ + +/*ARGSUSED*/ +static unsigned long +insert_bdm (insn, value, errmsg) + unsigned long insn; + long value; + const char **errmsg; +{ + if ((value & 0x8000) != 0) + insn |= 1 << 21; + return insn | (value & 0xfffc); +} + +static long +extract_bdm (insn, invalid) + unsigned long insn; + int *invalid; +{ + if (invalid != (int *) NULL + && ((insn & (1 << 21)) == 0 + || (insn & (1 << 15)) == 0)) + *invalid = 1; + if ((insn & 0x8000) != 0) + return (insn & 0xfffc) - 0x10000; + else + return insn & 0xfffc; +} + +/* The BD field in a B form instruction when the + modifier is used. + This is like BDM, above, except that the branch is expected to be + taken. */ + +/*ARGSUSED*/ +static unsigned long +insert_bdp (insn, value, errmsg) + unsigned long insn; + long value; + const char **errmsg; +{ + if ((value & 0x8000) == 0) + insn |= 1 << 21; + return insn | (value & 0xfffc); +} + +static long +extract_bdp (insn, invalid) + unsigned long insn; + int *invalid; +{ + if (invalid != (int *) NULL + && ((insn & (1 << 21)) == 0 + || (insn & (1 << 15)) != 0)) + *invalid = 1; + if ((insn & 0x8000) != 0) + return (insn & 0xfffc) - 0x10000; + else + return insn & 0xfffc; +} + +/* Check for legal values of a BO field. */ + +static int +valid_bo (long value) +{ + /* Certain encodings have bits that are required to be zero. These + are (z must be zero, y may be anything): + 001zy + 011zy + 1z00y + 1z01y + 1z1zz + */ + switch (value & 0x14) + { + default: + case 0: + return 1; + case 0x4: + return (value & 0x2) == 0; + case 0x10: + return (value & 0x8) == 0; + case 0x14: + return value == 0x14; + } +} + +/* The BO field in a B form instruction. Warn about attempts to set + the field to an illegal value. */ + +static unsigned long +insert_bo (insn, value, errmsg) + unsigned long insn; + long value; + const char **errmsg; +{ + if (errmsg != (const char **) NULL + && ! valid_bo (value)) + *errmsg = "invalid conditional option"; + return insn | ((value & 0x1f) << 21); +} + +static long +extract_bo (insn, invalid) + unsigned long insn; + int *invalid; +{ + long value; + + value = (insn >> 21) & 0x1f; + if (invalid != (int *) NULL + && ! valid_bo (value)) + *invalid = 1; + return value; +} + +/* The BO field in a B form instruction when the + or - modifier is + used. This is like the BO field, but it must be even. When + extracting it, we force it to be even. */ + +static unsigned long +insert_boe (insn, value, errmsg) + unsigned long insn; + long value; + const char **errmsg; +{ + if (errmsg != (const char **) NULL) + { + if (! valid_bo (value)) + *errmsg = "invalid conditional option"; + else if ((value & 1) != 0) + *errmsg = "attempt to set y bit when using + or - modifier"; + } + return insn | ((value & 0x1f) << 21); +} + +static long +extract_boe (insn, invalid) + unsigned long insn; + int *invalid; +{ + long value; + + value = (insn >> 21) & 0x1f; + if (invalid != (int *) NULL + && ! valid_bo (value)) + *invalid = 1; + return value & 0x1e; +} + +/* The DS field in a DS form instruction. This is like D, but the + lower two bits are forced to zero. */ + +/*ARGSUSED*/ +static unsigned long +insert_ds (insn, value, errmsg) + unsigned long insn; + long value; + const char **errmsg; +{ + return insn | (value & 0xfffc); +} + +/*ARGSUSED*/ +static long +extract_ds (insn, invalid) + unsigned long insn; + int *invalid; +{ + if ((insn & 0x8000) != 0) + return (insn & 0xfffc) - 0x10000; + else + return insn & 0xfffc; +} + +/* The LI field in an I form instruction. The lower two bits are + forced to zero. */ + +/*ARGSUSED*/ +static unsigned long +insert_li (insn, value, errmsg) + unsigned long insn; + long value; + const char **errmsg; +{ + return insn | (value & 0x3fffffc); +} + +/*ARGSUSED*/ +static long +extract_li (insn, invalid) + unsigned long insn; + int *invalid; +{ + if ((insn & 0x2000000) != 0) + return (insn & 0x3fffffc) - 0x4000000; + else + return insn & 0x3fffffc; +} + +/* The MB and ME fields in an M form instruction expressed as a single + operand which is itself a bitmask. The extraction function always + marks it as invalid, since we never want to recognize an + instruction which uses a field of this type. */ + +static unsigned long +insert_mbe (insn, value, errmsg) + unsigned long insn; + long value; + const char **errmsg; +{ + unsigned long uval; + int mb, me; + + uval = value; + + if (uval == 0) + { + if (errmsg != (const char **) NULL) + *errmsg = "illegal bitmask"; + return insn; + } + + me = 31; + while ((uval & 1) == 0) + { + uval >>= 1; + --me; + } + + mb = me; + uval >>= 1; + while ((uval & 1) != 0) + { + uval >>= 1; + --mb; + } + + if (uval != 0) + { + if (errmsg != (const char **) NULL) + *errmsg = "illegal bitmask"; + } + + return insn | (mb << 6) | (me << 1); +} + +static long +extract_mbe (insn, invalid) + unsigned long insn; + int *invalid; +{ + long ret; + int mb, me; + int i; + + if (invalid != (int *) NULL) + *invalid = 1; + + ret = 0; + mb = (insn >> 6) & 0x1f; + me = (insn >> 1) & 0x1f; + for (i = mb; i < me; i++) + ret |= 1 << (31 - i); + return ret; +} + +/* The MB or ME field in an MD or MDS form instruction. The high bit + is wrapped to the low end. */ + +/*ARGSUSED*/ +static unsigned long +insert_mb6 (insn, value, errmsg) + unsigned long insn; + long value; + const char **errmsg; +{ + return insn | ((value & 0x1f) << 6) | (value & 0x20); +} + +/*ARGSUSED*/ +static long +extract_mb6 (insn, invalid) + unsigned long insn; + int *invalid; +{ + return ((insn >> 6) & 0x1f) | (insn & 0x20); +} + +/* The NB field in an X form instruction. The value 32 is stored as + 0. */ + +static unsigned long +insert_nb (insn, value, errmsg) + unsigned long insn; + long value; + const char **errmsg; +{ + if (value < 0 || value > 32) + *errmsg = "value out of range"; + if (value == 32) + value = 0; + return insn | ((value & 0x1f) << 11); +} + +/*ARGSUSED*/ +static long +extract_nb (insn, invalid) + unsigned long insn; + int *invalid; +{ + long ret; + + ret = (insn >> 11) & 0x1f; + if (ret == 0) + ret = 32; + return ret; +} + +/* The NSI field in a D form instruction. This is the same as the SI + field, only negated. The extraction function always marks it as + invalid, since we never want to recognize an instruction which uses + a field of this type. */ + +/*ARGSUSED*/ +static unsigned long +insert_nsi (insn, value, errmsg) + unsigned long insn; + long value; + const char **errmsg; +{ + return insn | ((- value) & 0xffff); +} + +static long +extract_nsi (insn, invalid) + unsigned long insn; + int *invalid; +{ + if (invalid != (int *) NULL) + *invalid = 1; + if ((insn & 0x8000) != 0) + return - ((insn & 0xffff) - 0x10000); + else + return - (insn & 0xffff); +} + +/* The RA field in a D or X form instruction which is an updating + load, which means that the RA field may not be zero and may not + equal the RT field. */ + +static unsigned long +insert_ral (insn, value, errmsg) + unsigned long insn; + long value; + const char **errmsg; +{ + if (value == 0 + || value == ((insn >> 21) & 0x1f)) + *errmsg = "invalid register operand when updating"; + return insn | ((value & 0x1f) << 16); +} + +/* The RA field in an lmw instruction, which has special value + restrictions. */ + +static unsigned long +insert_ram (insn, value, errmsg) + unsigned long insn; + long value; + const char **errmsg; +{ + if (value >= ((insn >> 21) & 0x1f)) + *errmsg = "index register in load range"; + return insn | ((value & 0x1f) << 16); +} + +/* The RA field in a D or X form instruction which is an updating + store or an updating floating point load, which means that the RA + field may not be zero. */ + +static unsigned long +insert_ras (insn, value, errmsg) + unsigned long insn; + long value; + const char **errmsg; +{ + if (value == 0) + *errmsg = "invalid register operand when updating"; + return insn | ((value & 0x1f) << 16); +} + +/* The RB field in an X form instruction when it must be the same as + the RS field in the instruction. This is used for extended + mnemonics like mr. This operand is marked FAKE. The insertion + function just copies the BT field into the BA field, and the + extraction function just checks that the fields are the same. */ + +/*ARGSUSED*/ +static unsigned long +insert_rbs (insn, value, errmsg) + unsigned long insn; + long value; + const char **errmsg; +{ + return insn | (((insn >> 21) & 0x1f) << 11); +} + +static long +extract_rbs (insn, invalid) + unsigned long insn; + int *invalid; +{ + if (invalid != (int *) NULL + && ((insn >> 21) & 0x1f) != ((insn >> 11) & 0x1f)) + *invalid = 1; + return 0; +} + +/* The SH field in an MD form instruction. This is split. */ + +/*ARGSUSED*/ +static unsigned long +insert_sh6 (insn, value, errmsg) + unsigned long insn; + long value; + const char **errmsg; +{ + return insn | ((value & 0x1f) << 11) | ((value & 0x20) >> 4); +} + +/*ARGSUSED*/ +static long +extract_sh6 (insn, invalid) + unsigned long insn; + int *invalid; +{ + return ((insn >> 11) & 0x1f) | ((insn << 4) & 0x20); +} + +/* The SPR field in an XFX form instruction. This is flipped--the + lower 5 bits are stored in the upper 5 and vice- versa. */ + +static unsigned long +insert_spr (insn, value, errmsg) + unsigned long insn; + long value; + const char **errmsg; +{ + return insn | ((value & 0x1f) << 16) | ((value & 0x3e0) << 6); +} + +static long +extract_spr (insn, invalid) + unsigned long insn; + int *invalid; +{ + return ((insn >> 16) & 0x1f) | ((insn >> 6) & 0x3e0); +} + +/* The TBR field in an XFX instruction. This is just like SPR, but it + is optional. When TBR is omitted, it must be inserted as 268 (the + magic number of the TB register). These functions treat 0 + (indicating an omitted optional operand) as 268. This means that + ``mftb 4,0'' is not handled correctly. This does not matter very + much, since the architecture manual does not define mftb as + accepting any values other than 268 or 269. */ + +#define TB (268) + +static unsigned long +insert_tbr (insn, value, errmsg) + unsigned long insn; + long value; + const char **errmsg; +{ + if (value == 0) + value = TB; + return insn | ((value & 0x1f) << 16) | ((value & 0x3e0) << 6); +} + +static long +extract_tbr (insn, invalid) + unsigned long insn; + int *invalid; +{ + long ret; + + ret = ((insn >> 16) & 0x1f) | ((insn >> 6) & 0x3e0); + if (ret == TB) + ret = 0; + return ret; +} + +/* Macros used to form opcodes. */ + +/* The main opcode. */ +#define OP(x) (((x) & 0x3f) << 26) +#define OP_MASK OP (0x3f) + +/* The main opcode combined with a trap code in the TO field of a D + form instruction. Used for extended mnemonics for the trap + instructions. */ +#define OPTO(x,to) (OP (x) | (((to) & 0x1f) << 21)) +#define OPTO_MASK (OP_MASK | TO_MASK) + +/* The main opcode combined with a comparison size bit in the L field + of a D form or X form instruction. Used for extended mnemonics for + the comparison instructions. */ +#define OPL(x,l) (OP (x) | (((l) & 1) << 21)) +#define OPL_MASK OPL (0x3f,1) + +/* An A form instruction. */ +#define A(op, xop, rc) (OP (op) | (((xop) & 0x1f) << 1) | ((rc) & 1)) +#define A_MASK A (0x3f, 0x1f, 1) + +/* An A_MASK with the FRB field fixed. */ +#define AFRB_MASK (A_MASK | FRB_MASK) + +/* An A_MASK with the FRC field fixed. */ +#define AFRC_MASK (A_MASK | FRC_MASK) + +/* An A_MASK with the FRA and FRC fields fixed. */ +#define AFRAFRC_MASK (A_MASK | FRA_MASK | FRC_MASK) + +/* A B form instruction. */ +#define B(op, aa, lk) (OP (op) | (((aa) & 1) << 1) | ((lk) & 1)) +#define B_MASK B (0x3f, 1, 1) + +/* A B form instruction setting the BO field. */ +#define BBO(op, bo, aa, lk) (B ((op), (aa), (lk)) | (((bo) & 0x1f) << 21)) +#define BBO_MASK BBO (0x3f, 0x1f, 1, 1) + +/* A BBO_MASK with the y bit of the BO field removed. This permits + matching a conditional branch regardless of the setting of the y + bit. */ +#define Y_MASK (1 << 21) +#define BBOY_MASK (BBO_MASK &~ Y_MASK) + +/* A B form instruction setting the BO field and the condition bits of + the BI field. */ +#define BBOCB(op, bo, cb, aa, lk) \ + (BBO ((op), (bo), (aa), (lk)) | (((cb) & 0x3) << 16)) +#define BBOCB_MASK BBOCB (0x3f, 0x1f, 0x3, 1, 1) + +/* A BBOCB_MASK with the y bit of the BO field removed. */ +#define BBOYCB_MASK (BBOCB_MASK &~ Y_MASK) + +/* A BBOYCB_MASK in which the BI field is fixed. */ +#define BBOYBI_MASK (BBOYCB_MASK | BI_MASK) + +/* The main opcode mask with the RA field clear. */ +#define DRA_MASK (OP_MASK | RA_MASK) + +/* A DS form instruction. */ +#define DSO(op, xop) (OP (op) | ((xop) & 0x3)) +#define DS_MASK DSO (0x3f, 3) + +/* An M form instruction. */ +#define M(op, rc) (OP (op) | ((rc) & 1)) +#define M_MASK M (0x3f, 1) + +/* An M form instruction with the ME field specified. */ +#define MME(op, me, rc) (M ((op), (rc)) | (((me) & 0x1f) << 1)) + +/* An M_MASK with the MB and ME fields fixed. */ +#define MMBME_MASK (M_MASK | MB_MASK | ME_MASK) + +/* An M_MASK with the SH and ME fields fixed. */ +#define MSHME_MASK (M_MASK | SH_MASK | ME_MASK) + +/* An MD form instruction. */ +#define MD(op, xop, rc) (OP (op) | (((xop) & 0x7) << 2) | ((rc) & 1)) +#define MD_MASK MD (0x3f, 0x7, 1) + +/* An MD_MASK with the MB field fixed. */ +#define MDMB_MASK (MD_MASK | MB6_MASK) + +/* An MD_MASK with the SH field fixed. */ +#define MDSH_MASK (MD_MASK | SH6_MASK) + +/* An MDS form instruction. */ +#define MDS(op, xop, rc) (OP (op) | (((xop) & 0xf) << 1) | ((rc) & 1)) +#define MDS_MASK MDS (0x3f, 0xf, 1) + +/* An MDS_MASK with the MB field fixed. */ +#define MDSMB_MASK (MDS_MASK | MB6_MASK) + +/* An SC form instruction. */ +#define SC(op, sa, lk) (OP (op) | (((sa) & 1) << 1) | ((lk) & 1)) +#define SC_MASK (OP_MASK | (0x3ff << 16) | (1 << 1) | 1) + +/* An X form instruction. */ +#define X(op, xop) (OP (op) | (((xop) & 0x3ff) << 1)) + +/* An X form instruction with the RC bit specified. */ +#define XRC(op, xop, rc) (X ((op), (xop)) | ((rc) & 1)) + +/* The mask for an X form instruction. */ +#define X_MASK XRC (0x3f, 0x3ff, 1) + +/* An X_MASK with the RA field fixed. */ +#define XRA_MASK (X_MASK | RA_MASK) + +/* An X_MASK with the RB field fixed. */ +#define XRB_MASK (X_MASK | RB_MASK) + +/* An X_MASK with the RT field fixed. */ +#define XRT_MASK (X_MASK | RT_MASK) + +/* An X_MASK with the RA and RB fields fixed. */ +#define XRARB_MASK (X_MASK | RA_MASK | RB_MASK) + +/* An X_MASK with the RT and RA fields fixed. */ +#define XRTRA_MASK (X_MASK | RT_MASK | RA_MASK) + +/* An X form comparison instruction. */ +#define XCMPL(op, xop, l) (X ((op), (xop)) | (((l) & 1) << 21)) + +/* The mask for an X form comparison instruction. */ +#define XCMP_MASK (X_MASK | (1 << 22)) + +/* The mask for an X form comparison instruction with the L field + fixed. */ +#define XCMPL_MASK (XCMP_MASK | (1 << 21)) + +/* An X form trap instruction with the TO field specified. */ +#define XTO(op, xop, to) (X ((op), (xop)) | (((to) & 0x1f) << 21)) +#define XTO_MASK (X_MASK | TO_MASK) + +/* An XFL form instruction. */ +#define XFL(op, xop, rc) (OP (op) | (((xop) & 0x3ff) << 1) | ((rc) & 1)) +#define XFL_MASK (XFL (0x3f, 0x3ff, 1) | (1 << 25) | (1 << 16)) + +/* An XL form instruction with the LK field set to 0. */ +#define XL(op, xop) (OP (op) | (((xop) & 0x3ff) << 1)) + +/* An XL form instruction which uses the LK field. */ +#define XLLK(op, xop, lk) (XL ((op), (xop)) | ((lk) & 1)) + +/* The mask for an XL form instruction. */ +#define XL_MASK XLLK (0x3f, 0x3ff, 1) + +/* An XL form instruction which explicitly sets the BO field. */ +#define XLO(op, bo, xop, lk) \ + (XLLK ((op), (xop), (lk)) | (((bo) & 0x1f) << 21)) +#define XLO_MASK (XL_MASK | BO_MASK) + +/* An XL form instruction which explicitly sets the y bit of the BO + field. */ +#define XLYLK(op, xop, y, lk) (XLLK ((op), (xop), (lk)) | (((y) & 1) << 21)) +#define XLYLK_MASK (XL_MASK | Y_MASK) + +/* An XL form instruction which sets the BO field and the condition + bits of the BI field. */ +#define XLOCB(op, bo, cb, xop, lk) \ + (XLO ((op), (bo), (xop), (lk)) | (((cb) & 3) << 16)) +#define XLOCB_MASK XLOCB (0x3f, 0x1f, 0x3, 0x3ff, 1) + +/* An XL_MASK or XLYLK_MASK or XLOCB_MASK with the BB field fixed. */ +#define XLBB_MASK (XL_MASK | BB_MASK) +#define XLYBB_MASK (XLYLK_MASK | BB_MASK) +#define XLBOCBBB_MASK (XLOCB_MASK | BB_MASK) + +/* An XL_MASK with the BO and BB fields fixed. */ +#define XLBOBB_MASK (XL_MASK | BO_MASK | BB_MASK) + +/* An XL_MASK with the BO, BI and BB fields fixed. */ +#define XLBOBIBB_MASK (XL_MASK | BO_MASK | BI_MASK | BB_MASK) + +/* An XO form instruction. */ +#define XO(op, xop, oe, rc) \ + (OP (op) | (((xop) & 0x1ff) << 1) | (((oe) & 1) << 10) | ((rc) & 1)) +#define XO_MASK XO (0x3f, 0x1ff, 1, 1) + +/* An XO_MASK with the RB field fixed. */ +#define XORB_MASK (XO_MASK | RB_MASK) + +/* An XS form instruction. */ +#define XS(op, xop, rc) (OP (op) | (((xop) & 0x1ff) << 2) | ((rc) & 1)) +#define XS_MASK XS (0x3f, 0x1ff, 1) + +/* A mask for the FXM version of an XFX form instruction. */ +#define XFXFXM_MASK (X_MASK | (1 << 20) | (1 << 11)) + +/* An XFX form instruction with the FXM field filled in. */ +#define XFXM(op, xop, fxm) \ + (X ((op), (xop)) | (((fxm) & 0xff) << 12)) + +/* An XFX form instruction with the SPR field filled in. */ +#define XSPR(op, xop, spr) \ + (X ((op), (xop)) | (((spr) & 0x1f) << 16) | (((spr) & 0x3e0) << 6)) +#define XSPR_MASK (X_MASK | SPR_MASK) + +/* An XFX form instruction with the SPR field filled in except for the + SPRBAT field. */ +#define XSPRBAT_MASK (XSPR_MASK &~ SPRBAT_MASK) + +/* An XFX form instruction with the SPR field filled in except for the + SPRG field. */ +#define XSPRG_MASK (XSPR_MASK &~ SPRG_MASK) + +/* The BO encodings used in extended conditional branch mnemonics. */ +#define BODNZF (0x0) +#define BODNZFP (0x1) +#define BODZF (0x2) +#define BODZFP (0x3) +#define BOF (0x4) +#define BOFP (0x5) +#define BODNZT (0x8) +#define BODNZTP (0x9) +#define BODZT (0xa) +#define BODZTP (0xb) +#define BOT (0xc) +#define BOTP (0xd) +#define BODNZ (0x10) +#define BODNZP (0x11) +#define BODZ (0x12) +#define BODZP (0x13) +#define BOU (0x14) + +/* The BI condition bit encodings used in extended conditional branch + mnemonics. */ +#define CBLT (0) +#define CBGT (1) +#define CBEQ (2) +#define CBSO (3) + +/* The TO encodings used in extended trap mnemonics. */ +#define TOLGT (0x1) +#define TOLLT (0x2) +#define TOEQ (0x4) +#define TOLGE (0x5) +#define TOLNL (0x5) +#define TOLLE (0x6) +#define TOLNG (0x6) +#define TOGT (0x8) +#define TOGE (0xc) +#define TONL (0xc) +#define TOLT (0x10) +#define TOLE (0x14) +#define TONG (0x14) +#define TONE (0x18) +#define TOU (0x1f) + +/* Smaller names for the flags so each entry in the opcodes table will + fit on a single line. */ +#undef PPC +#define PPC PPC_OPCODE_PPC +#define POWER PPC_OPCODE_POWER +#define POWER2 PPC_OPCODE_POWER2 +#define B32 PPC_OPCODE_32 +#define B64 PPC_OPCODE_64 +#define M601 PPC_OPCODE_601 + +/* The opcode table. + + The format of the opcode table is: + + NAME OPCODE MASK FLAGS { OPERANDS } + + NAME is the name of the instruction. + OPCODE is the instruction opcode. + MASK is the opcode mask; this is used to tell the disassembler + which bits in the actual opcode must match OPCODE. + FLAGS are flags indicated what processors support the instruction. + OPERANDS is the list of operands. + + The disassembler reads the table in order and prints the first + instruction which matches, so this table is sorted to put more + specific instructions before more general instructions. It is also + sorted by major opcode. */ + +const struct powerpc_opcode powerpc_opcodes[] = { +{ "tdlgti", OPTO(2,TOLGT), OPTO_MASK, PPC|B64, { RA, SI } }, +{ "tdllti", OPTO(2,TOLLT), OPTO_MASK, PPC|B64, { RA, SI } }, +{ "tdeqi", OPTO(2,TOEQ), OPTO_MASK, PPC|B64, { RA, SI } }, +{ "tdlgei", OPTO(2,TOLGE), OPTO_MASK, PPC|B64, { RA, SI } }, +{ "tdlnli", OPTO(2,TOLNL), OPTO_MASK, PPC|B64, { RA, SI } }, +{ "tdllei", OPTO(2,TOLLE), OPTO_MASK, PPC|B64, { RA, SI } }, +{ "tdlngi", OPTO(2,TOLNG), OPTO_MASK, PPC|B64, { RA, SI } }, +{ "tdgti", OPTO(2,TOGT), OPTO_MASK, PPC|B64, { RA, SI } }, +{ "tdgei", OPTO(2,TOGE), OPTO_MASK, PPC|B64, { RA, SI } }, +{ "tdnli", OPTO(2,TONL), OPTO_MASK, PPC|B64, { RA, SI } }, +{ "tdlti", OPTO(2,TOLT), OPTO_MASK, PPC|B64, { RA, SI } }, +{ "tdlei", OPTO(2,TOLE), OPTO_MASK, PPC|B64, { RA, SI } }, +{ "tdngi", OPTO(2,TONG), OPTO_MASK, PPC|B64, { RA, SI } }, +{ "tdnei", OPTO(2,TONE), OPTO_MASK, PPC|B64, { RA, SI } }, +{ "tdi", OP(2), OP_MASK, PPC|B64, { TO, RA, SI } }, + +{ "twlgti", OPTO(3,TOLGT), OPTO_MASK, PPC, { RA, SI } }, +{ "tlgti", OPTO(3,TOLGT), OPTO_MASK, POWER, { RA, SI } }, +{ "twllti", OPTO(3,TOLLT), OPTO_MASK, PPC, { RA, SI } }, +{ "tllti", OPTO(3,TOLLT), OPTO_MASK, POWER, { RA, SI } }, +{ "tweqi", OPTO(3,TOEQ), OPTO_MASK, PPC, { RA, SI } }, +{ "teqi", OPTO(3,TOEQ), OPTO_MASK, POWER, { RA, SI } }, +{ "twlgei", OPTO(3,TOLGE), OPTO_MASK, PPC, { RA, SI } }, +{ "tlgei", OPTO(3,TOLGE), OPTO_MASK, POWER, { RA, SI } }, +{ "twlnli", OPTO(3,TOLNL), OPTO_MASK, PPC, { RA, SI } }, +{ "tlnli", OPTO(3,TOLNL), OPTO_MASK, POWER, { RA, SI } }, +{ "twllei", OPTO(3,TOLLE), OPTO_MASK, PPC, { RA, SI } }, +{ "tllei", OPTO(3,TOLLE), OPTO_MASK, POWER, { RA, SI } }, +{ "twlngi", OPTO(3,TOLNG), OPTO_MASK, PPC, { RA, SI } }, +{ "tlngi", OPTO(3,TOLNG), OPTO_MASK, POWER, { RA, SI } }, +{ "twgti", OPTO(3,TOGT), OPTO_MASK, PPC, { RA, SI } }, +{ "tgti", OPTO(3,TOGT), OPTO_MASK, POWER, { RA, SI } }, +{ "twgei", OPTO(3,TOGE), OPTO_MASK, PPC, { RA, SI } }, +{ "tgei", OPTO(3,TOGE), OPTO_MASK, POWER, { RA, SI } }, +{ "twnli", OPTO(3,TONL), OPTO_MASK, PPC, { RA, SI } }, +{ "tnli", OPTO(3,TONL), OPTO_MASK, POWER, { RA, SI } }, +{ "twlti", OPTO(3,TOLT), OPTO_MASK, PPC, { RA, SI } }, +{ "tlti", OPTO(3,TOLT), OPTO_MASK, POWER, { RA, SI } }, +{ "twlei", OPTO(3,TOLE), OPTO_MASK, PPC, { RA, SI } }, +{ "tlei", OPTO(3,TOLE), OPTO_MASK, POWER, { RA, SI } }, +{ "twngi", OPTO(3,TONG), OPTO_MASK, PPC, { RA, SI } }, +{ "tngi", OPTO(3,TONG), OPTO_MASK, POWER, { RA, SI } }, +{ "twnei", OPTO(3,TONE), OPTO_MASK, PPC, { RA, SI } }, +{ "tnei", OPTO(3,TONE), OPTO_MASK, POWER, { RA, SI } }, +{ "twi", OP(3), OP_MASK, PPC, { TO, RA, SI } }, +{ "ti", OP(3), OP_MASK, POWER, { TO, RA, SI } }, + +{ "mulli", OP(7), OP_MASK, PPC, { RT, RA, SI } }, +{ "muli", OP(7), OP_MASK, POWER, { RT, RA, SI } }, + +{ "subfic", OP(8), OP_MASK, PPC, { RT, RA, SI } }, +{ "sfi", OP(8), OP_MASK, POWER, { RT, RA, SI } }, + +{ "dozi", OP(9), OP_MASK, POWER|M601, { RT, RA, SI } }, + +{ "cmplwi", OPL(10,0), OPL_MASK, PPC, { OBF, RA, UI } }, +{ "cmpldi", OPL(10,1), OPL_MASK, PPC|B64, { OBF, RA, UI } }, +{ "cmpli", OP(10), OP_MASK, PPC, { BF, L, RA, UI } }, +{ "cmpli", OP(10), OP_MASK, POWER, { BF, RA, UI } }, + +{ "cmpwi", OPL(11,0), OPL_MASK, PPC, { OBF, RA, SI } }, +{ "cmpdi", OPL(11,1), OPL_MASK, PPC|B64, { OBF, RA, SI } }, +{ "cmpi", OP(11), OP_MASK, PPC, { BF, L, RA, SI } }, +{ "cmpi", OP(11), OP_MASK, POWER, { BF, RA, SI } }, + +{ "addic", OP(12), OP_MASK, PPC, { RT, RA, SI } }, +{ "ai", OP(12), OP_MASK, POWER, { RT, RA, SI } }, +{ "subic", OP(12), OP_MASK, PPC, { RT, RA, NSI } }, + +{ "addic.", OP(13), OP_MASK, PPC, { RT, RA, SI } }, +{ "ai.", OP(13), OP_MASK, POWER, { RT, RA, SI } }, +{ "subic.", OP(13), OP_MASK, PPC, { RT, RA, NSI } }, + +{ "li", OP(14), DRA_MASK, PPC, { RT, SI } }, +{ "lil", OP(14), DRA_MASK, POWER, { RT, SI } }, +{ "addi", OP(14), OP_MASK, PPC, { RT, RA, SI } }, +{ "cal", OP(14), OP_MASK, POWER, { RT, D, RA } }, +{ "subi", OP(14), OP_MASK, PPC, { RT, RA, NSI } }, +{ "la", OP(14), OP_MASK, PPC, { RT, D, RA } }, + +{ "lis", OP(15), DRA_MASK, PPC, { RT, SISIGNOPT } }, +{ "liu", OP(15), DRA_MASK, POWER, { RT, SISIGNOPT } }, +{ "addis", OP(15), OP_MASK, PPC, { RT,RA,SISIGNOPT } }, +{ "cau", OP(15), OP_MASK, POWER, { RT,RA,SISIGNOPT } }, +{ "subis", OP(15), OP_MASK, PPC, { RT, RA, NSI } }, + +{ "bdnz-", BBO(16,BODNZ,0,0), BBOYBI_MASK, PPC, { BDM } }, +{ "bdnz+", BBO(16,BODNZ,0,0), BBOYBI_MASK, PPC, { BDP } }, +{ "bdnz", BBO(16,BODNZ,0,0), BBOYBI_MASK, PPC, { BD } }, +{ "bdn", BBO(16,BODNZ,0,0), BBOYBI_MASK, POWER, { BD } }, +{ "bdnzl-", BBO(16,BODNZ,0,1), BBOYBI_MASK, PPC, { BDM } }, +{ "bdnzl+", BBO(16,BODNZ,0,1), BBOYBI_MASK, PPC, { BDP } }, +{ "bdnzl", BBO(16,BODNZ,0,1), BBOYBI_MASK, PPC, { BD } }, +{ "bdnl", BBO(16,BODNZ,0,1), BBOYBI_MASK, POWER, { BD } }, +{ "bdnza-", BBO(16,BODNZ,1,0), BBOYBI_MASK, PPC, { BDMA } }, +{ "bdnza+", BBO(16,BODNZ,1,0), BBOYBI_MASK, PPC, { BDPA } }, +{ "bdnza", BBO(16,BODNZ,1,0), BBOYBI_MASK, PPC, { BDA } }, +{ "bdna", BBO(16,BODNZ,1,0), BBOYBI_MASK, POWER, { BDA } }, +{ "bdnzla-", BBO(16,BODNZ,1,1), BBOYBI_MASK, PPC, { BDMA } }, +{ "bdnzla+", BBO(16,BODNZ,1,1), BBOYBI_MASK, PPC, { BDPA } }, +{ "bdnzla", BBO(16,BODNZ,1,1), BBOYBI_MASK, PPC, { BDA } }, +{ "bdnla", BBO(16,BODNZ,1,1), BBOYBI_MASK, POWER, { BDA } }, +{ "bdz-", BBO(16,BODZ,0,0), BBOYBI_MASK, PPC, { BDM } }, +{ "bdz+", BBO(16,BODZ,0,0), BBOYBI_MASK, PPC, { BDP } }, +{ "bdz", BBO(16,BODZ,0,0), BBOYBI_MASK, PPC|POWER, { BD } }, +{ "bdzl-", BBO(16,BODZ,0,1), BBOYBI_MASK, PPC, { BDM } }, +{ "bdzl+", BBO(16,BODZ,0,1), BBOYBI_MASK, PPC, { BDP } }, +{ "bdzl", BBO(16,BODZ,0,1), BBOYBI_MASK, PPC|POWER, { BD } }, +{ "bdza-", BBO(16,BODZ,1,0), BBOYBI_MASK, PPC, { BDMA } }, +{ "bdza+", BBO(16,BODZ,1,0), BBOYBI_MASK, PPC, { BDPA } }, +{ "bdza", BBO(16,BODZ,1,0), BBOYBI_MASK, PPC|POWER, { BDA } }, +{ "bdzla-", BBO(16,BODZ,1,1), BBOYBI_MASK, PPC, { BDMA } }, +{ "bdzla+", BBO(16,BODZ,1,1), BBOYBI_MASK, PPC, { BDPA } }, +{ "bdzla", BBO(16,BODZ,1,1), BBOYBI_MASK, PPC|POWER, { BDA } }, +{ "blt-", BBOCB(16,BOT,CBLT,0,0), BBOYCB_MASK, PPC, { CR, BDM } }, +{ "blt+", BBOCB(16,BOT,CBLT,0,0), BBOYCB_MASK, PPC, { CR, BDP } }, +{ "blt", BBOCB(16,BOT,CBLT,0,0), BBOYCB_MASK, PPC|POWER, { CR, BD } }, +{ "bltl-", BBOCB(16,BOT,CBLT,0,1), BBOYCB_MASK, PPC, { CR, BDM } }, +{ "bltl+", BBOCB(16,BOT,CBLT,0,1), BBOYCB_MASK, PPC, { CR, BDP } }, +{ "bltl", BBOCB(16,BOT,CBLT,0,1), BBOYCB_MASK, PPC|POWER, { CR, BD } }, +{ "blta-", BBOCB(16,BOT,CBLT,1,0), BBOYCB_MASK, PPC, { CR, BDMA } }, +{ "blta+", BBOCB(16,BOT,CBLT,1,0), BBOYCB_MASK, PPC, { CR, BDPA } }, +{ "blta", BBOCB(16,BOT,CBLT,1,0), BBOYCB_MASK, PPC|POWER, { CR, BDA } }, +{ "bltla-", BBOCB(16,BOT,CBLT,1,1), BBOYCB_MASK, PPC, { CR, BDMA } }, +{ "bltla+", BBOCB(16,BOT,CBLT,1,1), BBOYCB_MASK, PPC, { CR, BDPA } }, +{ "bltla", BBOCB(16,BOT,CBLT,1,1), BBOYCB_MASK, PPC|POWER, { CR, BDA } }, +{ "bgt-", BBOCB(16,BOT,CBGT,0,0), BBOYCB_MASK, PPC, { CR, BDM } }, +{ "bgt+", BBOCB(16,BOT,CBGT,0,0), BBOYCB_MASK, PPC, { CR, BDP } }, +{ "bgt", BBOCB(16,BOT,CBGT,0,0), BBOYCB_MASK, PPC|POWER, { CR, BD } }, +{ "bgtl-", BBOCB(16,BOT,CBGT,0,1), BBOYCB_MASK, PPC, { CR, BDM } }, +{ "bgtl+", BBOCB(16,BOT,CBGT,0,1), BBOYCB_MASK, PPC, { CR, BDP } }, +{ "bgtl", BBOCB(16,BOT,CBGT,0,1), BBOYCB_MASK, PPC|POWER, { CR, BD } }, +{ "bgta-", BBOCB(16,BOT,CBGT,1,0), BBOYCB_MASK, PPC, { CR, BDMA } }, +{ "bgta+", BBOCB(16,BOT,CBGT,1,0), BBOYCB_MASK, PPC, { CR, BDPA } }, +{ "bgta", BBOCB(16,BOT,CBGT,1,0), BBOYCB_MASK, PPC|POWER, { CR, BDA } }, +{ "bgtla-", BBOCB(16,BOT,CBGT,1,1), BBOYCB_MASK, PPC, { CR, BDMA } }, +{ "bgtla+", BBOCB(16,BOT,CBGT,1,1), BBOYCB_MASK, PPC, { CR, BDPA } }, +{ "bgtla", BBOCB(16,BOT,CBGT,1,1), BBOYCB_MASK, PPC|POWER, { CR, BDA } }, +{ "beq-", BBOCB(16,BOT,CBEQ,0,0), BBOYCB_MASK, PPC, { CR, BDM } }, +{ "beq+", BBOCB(16,BOT,CBEQ,0,0), BBOYCB_MASK, PPC, { CR, BDP } }, +{ "beq", BBOCB(16,BOT,CBEQ,0,0), BBOYCB_MASK, PPC|POWER, { CR, BD } }, +{ "beql-", BBOCB(16,BOT,CBEQ,0,1), BBOYCB_MASK, PPC, { CR, BDM } }, +{ "beql+", BBOCB(16,BOT,CBEQ,0,1), BBOYCB_MASK, PPC, { CR, BDP } }, +{ "beql", BBOCB(16,BOT,CBEQ,0,1), BBOYCB_MASK, PPC|POWER, { CR, BD } }, +{ "beqa-", BBOCB(16,BOT,CBEQ,1,0), BBOYCB_MASK, PPC, { CR, BDMA } }, +{ "beqa+", BBOCB(16,BOT,CBEQ,1,0), BBOYCB_MASK, PPC, { CR, BDPA } }, +{ "beqa", BBOCB(16,BOT,CBEQ,1,0), BBOYCB_MASK, PPC|POWER, { CR, BDA } }, +{ "beqla-", BBOCB(16,BOT,CBEQ,1,1), BBOYCB_MASK, PPC, { CR, BDMA } }, +{ "beqla+", BBOCB(16,BOT,CBEQ,1,1), BBOYCB_MASK, PPC, { CR, BDPA } }, +{ "beqla", BBOCB(16,BOT,CBEQ,1,1), BBOYCB_MASK, PPC|POWER, { CR, BDA } }, +{ "bso-", BBOCB(16,BOT,CBSO,0,0), BBOYCB_MASK, PPC, { CR, BDM } }, +{ "bso+", BBOCB(16,BOT,CBSO,0,0), BBOYCB_MASK, PPC, { CR, BDP } }, +{ "bso", BBOCB(16,BOT,CBSO,0,0), BBOYCB_MASK, PPC|POWER, { CR, BD } }, +{ "bsol-", BBOCB(16,BOT,CBSO,0,1), BBOYCB_MASK, PPC, { CR, BDM } }, +{ "bsol+", BBOCB(16,BOT,CBSO,0,1), BBOYCB_MASK, PPC, { CR, BDP } }, +{ "bsol", BBOCB(16,BOT,CBSO,0,1), BBOYCB_MASK, PPC|POWER, { CR, BD } }, +{ "bsoa-", BBOCB(16,BOT,CBSO,1,0), BBOYCB_MASK, PPC, { CR, BDMA } }, +{ "bsoa+", BBOCB(16,BOT,CBSO,1,0), BBOYCB_MASK, PPC, { CR, BDPA } }, +{ "bsoa", BBOCB(16,BOT,CBSO,1,0), BBOYCB_MASK, PPC|POWER, { CR, BDA } }, +{ "bsola-", BBOCB(16,BOT,CBSO,1,1), BBOYCB_MASK, PPC, { CR, BDMA } }, +{ "bsola+", BBOCB(16,BOT,CBSO,1,1), BBOYCB_MASK, PPC, { CR, BDPA } }, +{ "bsola", BBOCB(16,BOT,CBSO,1,1), BBOYCB_MASK, PPC|POWER, { CR, BDA } }, +{ "bun-", BBOCB(16,BOT,CBSO,0,0), BBOYCB_MASK, PPC, { CR, BDM } }, +{ "bun+", BBOCB(16,BOT,CBSO,0,0), BBOYCB_MASK, PPC, { CR, BDP } }, +{ "bun", BBOCB(16,BOT,CBSO,0,0), BBOYCB_MASK, PPC, { CR, BD } }, +{ "bunl-", BBOCB(16,BOT,CBSO,0,1), BBOYCB_MASK, PPC, { CR, BDM } }, +{ "bunl+", BBOCB(16,BOT,CBSO,0,1), BBOYCB_MASK, PPC, { CR, BDP } }, +{ "bunl", BBOCB(16,BOT,CBSO,0,1), BBOYCB_MASK, PPC, { CR, BD } }, +{ "buna-", BBOCB(16,BOT,CBSO,1,0), BBOYCB_MASK, PPC, { CR, BDMA } }, +{ "buna+", BBOCB(16,BOT,CBSO,1,0), BBOYCB_MASK, PPC, { CR, BDPA } }, +{ "buna", BBOCB(16,BOT,CBSO,1,0), BBOYCB_MASK, PPC, { CR, BDA } }, +{ "bunla-", BBOCB(16,BOT,CBSO,1,1), BBOYCB_MASK, PPC, { CR, BDMA } }, +{ "bunla+", BBOCB(16,BOT,CBSO,1,1), BBOYCB_MASK, PPC, { CR, BDPA } }, +{ "bunla", BBOCB(16,BOT,CBSO,1,1), BBOYCB_MASK, PPC, { CR, BDA } }, +{ "bge-", BBOCB(16,BOF,CBLT,0,0), BBOYCB_MASK, PPC, { CR, BDM } }, +{ "bge+", BBOCB(16,BOF,CBLT,0,0), BBOYCB_MASK, PPC, { CR, BDP } }, +{ "bge", BBOCB(16,BOF,CBLT,0,0), BBOYCB_MASK, PPC|POWER, { CR, BD } }, +{ "bgel-", BBOCB(16,BOF,CBLT,0,1), BBOYCB_MASK, PPC, { CR, BDM } }, +{ "bgel+", BBOCB(16,BOF,CBLT,0,1), BBOYCB_MASK, PPC, { CR, BDP } }, +{ "bgel", BBOCB(16,BOF,CBLT,0,1), BBOYCB_MASK, PPC|POWER, { CR, BD } }, +{ "bgea-", BBOCB(16,BOF,CBLT,1,0), BBOYCB_MASK, PPC, { CR, BDMA } }, +{ "bgea+", BBOCB(16,BOF,CBLT,1,0), BBOYCB_MASK, PPC, { CR, BDPA } }, +{ "bgea", BBOCB(16,BOF,CBLT,1,0), BBOYCB_MASK, PPC|POWER, { CR, BDA } }, +{ "bgela-", BBOCB(16,BOF,CBLT,1,1), BBOYCB_MASK, PPC, { CR, BDMA } }, +{ "bgela+", BBOCB(16,BOF,CBLT,1,1), BBOYCB_MASK, PPC, { CR, BDPA } }, +{ "bgela", BBOCB(16,BOF,CBLT,1,1), BBOYCB_MASK, PPC|POWER, { CR, BDA } }, +{ "bnl-", BBOCB(16,BOF,CBLT,0,0), BBOYCB_MASK, PPC, { CR, BDM } }, +{ "bnl+", BBOCB(16,BOF,CBLT,0,0), BBOYCB_MASK, PPC, { CR, BDP } }, +{ "bnl", BBOCB(16,BOF,CBLT,0,0), BBOYCB_MASK, PPC|POWER, { CR, BD } }, +{ "bnll-", BBOCB(16,BOF,CBLT,0,1), BBOYCB_MASK, PPC, { CR, BDM } }, +{ "bnll+", BBOCB(16,BOF,CBLT,0,1), BBOYCB_MASK, PPC, { CR, BDP } }, +{ "bnll", BBOCB(16,BOF,CBLT,0,1), BBOYCB_MASK, PPC|POWER, { CR, BD } }, +{ "bnla-", BBOCB(16,BOF,CBLT,1,0), BBOYCB_MASK, PPC, { CR, BDMA } }, +{ "bnla+", BBOCB(16,BOF,CBLT,1,0), BBOYCB_MASK, PPC, { CR, BDPA } }, +{ "bnla", BBOCB(16,BOF,CBLT,1,0), BBOYCB_MASK, PPC|POWER, { CR, BDA } }, +{ "bnlla-", BBOCB(16,BOF,CBLT,1,1), BBOYCB_MASK, PPC, { CR, BDMA } }, +{ "bnlla+", BBOCB(16,BOF,CBLT,1,1), BBOYCB_MASK, PPC, { CR, BDPA } }, +{ "bnlla", BBOCB(16,BOF,CBLT,1,1), BBOYCB_MASK, PPC|POWER, { CR, BDA } }, +{ "ble-", BBOCB(16,BOF,CBGT,0,0), BBOYCB_MASK, PPC, { CR, BDM } }, +{ "ble+", BBOCB(16,BOF,CBGT,0,0), BBOYCB_MASK, PPC, { CR, BDP } }, +{ "ble", BBOCB(16,BOF,CBGT,0,0), BBOYCB_MASK, PPC|POWER, { CR, BD } }, +{ "blel-", BBOCB(16,BOF,CBGT,0,1), BBOYCB_MASK, PPC, { CR, BDM } }, +{ "blel+", BBOCB(16,BOF,CBGT,0,1), BBOYCB_MASK, PPC, { CR, BDP } }, +{ "blel", BBOCB(16,BOF,CBGT,0,1), BBOYCB_MASK, PPC|POWER, { CR, BD } }, +{ "blea-", BBOCB(16,BOF,CBGT,1,0), BBOYCB_MASK, PPC, { CR, BDMA } }, +{ "blea+", BBOCB(16,BOF,CBGT,1,0), BBOYCB_MASK, PPC, { CR, BDPA } }, +{ "blea", BBOCB(16,BOF,CBGT,1,0), BBOYCB_MASK, PPC|POWER, { CR, BDA } }, +{ "blela-", BBOCB(16,BOF,CBGT,1,1), BBOYCB_MASK, PPC, { CR, BDMA } }, +{ "blela+", BBOCB(16,BOF,CBGT,1,1), BBOYCB_MASK, PPC, { CR, BDPA } }, +{ "blela", BBOCB(16,BOF,CBGT,1,1), BBOYCB_MASK, PPC|POWER, { CR, BDA } }, +{ "bng-", BBOCB(16,BOF,CBGT,0,0), BBOYCB_MASK, PPC, { CR, BDM } }, +{ "bng+", BBOCB(16,BOF,CBGT,0,0), BBOYCB_MASK, PPC, { CR, BDP } }, +{ "bng", BBOCB(16,BOF,CBGT,0,0), BBOYCB_MASK, PPC|POWER, { CR, BD } }, +{ "bngl-", BBOCB(16,BOF,CBGT,0,1), BBOYCB_MASK, PPC, { CR, BDM } }, +{ "bngl+", BBOCB(16,BOF,CBGT,0,1), BBOYCB_MASK, PPC, { CR, BDP } }, +{ "bngl", BBOCB(16,BOF,CBGT,0,1), BBOYCB_MASK, PPC|POWER, { CR, BD } }, +{ "bnga-", BBOCB(16,BOF,CBGT,1,0), BBOYCB_MASK, PPC, { CR, BDMA } }, +{ "bnga+", BBOCB(16,BOF,CBGT,1,0), BBOYCB_MASK, PPC, { CR, BDPA } }, +{ "bnga", BBOCB(16,BOF,CBGT,1,0), BBOYCB_MASK, PPC|POWER, { CR, BDA } }, +{ "bngla-", BBOCB(16,BOF,CBGT,1,1), BBOYCB_MASK, PPC, { CR, BDMA } }, +{ "bngla+", BBOCB(16,BOF,CBGT,1,1), BBOYCB_MASK, PPC, { CR, BDPA } }, +{ "bngla", BBOCB(16,BOF,CBGT,1,1), BBOYCB_MASK, PPC|POWER, { CR, BDA } }, +{ "bne-", BBOCB(16,BOF,CBEQ,0,0), BBOYCB_MASK, PPC, { CR, BDM } }, +{ "bne+", BBOCB(16,BOF,CBEQ,0,0), BBOYCB_MASK, PPC, { CR, BDP } }, +{ "bne", BBOCB(16,BOF,CBEQ,0,0), BBOYCB_MASK, PPC|POWER, { CR, BD } }, +{ "bnel-", BBOCB(16,BOF,CBEQ,0,1), BBOYCB_MASK, PPC, { CR, BDM } }, +{ "bnel+", BBOCB(16,BOF,CBEQ,0,1), BBOYCB_MASK, PPC, { CR, BDP } }, +{ "bnel", BBOCB(16,BOF,CBEQ,0,1), BBOYCB_MASK, PPC|POWER, { CR, BD } }, +{ "bnea-", BBOCB(16,BOF,CBEQ,1,0), BBOYCB_MASK, PPC, { CR, BDMA } }, +{ "bnea+", BBOCB(16,BOF,CBEQ,1,0), BBOYCB_MASK, PPC, { CR, BDPA } }, +{ "bnea", BBOCB(16,BOF,CBEQ,1,0), BBOYCB_MASK, PPC|POWER, { CR, BDA } }, +{ "bnela-", BBOCB(16,BOF,CBEQ,1,1), BBOYCB_MASK, PPC, { CR, BDMA } }, +{ "bnela+", BBOCB(16,BOF,CBEQ,1,1), BBOYCB_MASK, PPC, { CR, BDPA } }, +{ "bnela", BBOCB(16,BOF,CBEQ,1,1), BBOYCB_MASK, PPC|POWER, { CR, BDA } }, +{ "bns-", BBOCB(16,BOF,CBSO,0,0), BBOYCB_MASK, PPC, { CR, BDM } }, +{ "bns+", BBOCB(16,BOF,CBSO,0,0), BBOYCB_MASK, PPC, { CR, BDP } }, +{ "bns", BBOCB(16,BOF,CBSO,0,0), BBOYCB_MASK, PPC|POWER, { CR, BD } }, +{ "bnsl-", BBOCB(16,BOF,CBSO,0,1), BBOYCB_MASK, PPC, { CR, BDM } }, +{ "bnsl+", BBOCB(16,BOF,CBSO,0,1), BBOYCB_MASK, PPC, { CR, BDP } }, +{ "bnsl", BBOCB(16,BOF,CBSO,0,1), BBOYCB_MASK, PPC|POWER, { CR, BD } }, +{ "bnsa-", BBOCB(16,BOF,CBSO,1,0), BBOYCB_MASK, PPC, { CR, BDMA } }, +{ "bnsa+", BBOCB(16,BOF,CBSO,1,0), BBOYCB_MASK, PPC, { CR, BDPA } }, +{ "bnsa", BBOCB(16,BOF,CBSO,1,0), BBOYCB_MASK, PPC|POWER, { CR, BDA } }, +{ "bnsla-", BBOCB(16,BOF,CBSO,1,1), BBOYCB_MASK, PPC, { CR, BDMA } }, +{ "bnsla+", BBOCB(16,BOF,CBSO,1,1), BBOYCB_MASK, PPC, { CR, BDPA } }, +{ "bnsla", BBOCB(16,BOF,CBSO,1,1), BBOYCB_MASK, PPC|POWER, { CR, BDA } }, +{ "bnu-", BBOCB(16,BOF,CBSO,0,0), BBOYCB_MASK, PPC, { CR, BDM } }, +{ "bnu+", BBOCB(16,BOF,CBSO,0,0), BBOYCB_MASK, PPC, { CR, BDP } }, +{ "bnu", BBOCB(16,BOF,CBSO,0,0), BBOYCB_MASK, PPC, { CR, BD } }, +{ "bnul-", BBOCB(16,BOF,CBSO,0,1), BBOYCB_MASK, PPC, { CR, BDM } }, +{ "bnul+", BBOCB(16,BOF,CBSO,0,1), BBOYCB_MASK, PPC, { CR, BDP } }, +{ "bnul", BBOCB(16,BOF,CBSO,0,1), BBOYCB_MASK, PPC, { CR, BD } }, +{ "bnua-", BBOCB(16,BOF,CBSO,1,0), BBOYCB_MASK, PPC, { CR, BDMA } }, +{ "bnua+", BBOCB(16,BOF,CBSO,1,0), BBOYCB_MASK, PPC, { CR, BDPA } }, +{ "bnua", BBOCB(16,BOF,CBSO,1,0), BBOYCB_MASK, PPC, { CR, BDA } }, +{ "bnula-", BBOCB(16,BOF,CBSO,1,1), BBOYCB_MASK, PPC, { CR, BDMA } }, +{ "bnula+", BBOCB(16,BOF,CBSO,1,1), BBOYCB_MASK, PPC, { CR, BDPA } }, +{ "bnula", BBOCB(16,BOF,CBSO,1,1), BBOYCB_MASK, PPC, { CR, BDA } }, +{ "bdnzt-", BBO(16,BODNZT,0,0), BBOY_MASK, PPC, { BI, BDM } }, +{ "bdnzt+", BBO(16,BODNZT,0,0), BBOY_MASK, PPC, { BI, BDP } }, +{ "bdnzt", BBO(16,BODNZT,0,0), BBOY_MASK, PPC, { BI, BD } }, +{ "bdnztl-", BBO(16,BODNZT,0,1), BBOY_MASK, PPC, { BI, BDM } }, +{ "bdnztl+", BBO(16,BODNZT,0,1), BBOY_MASK, PPC, { BI, BDP } }, +{ "bdnztl", BBO(16,BODNZT,0,1), BBOY_MASK, PPC, { BI, BD } }, +{ "bdnzta-", BBO(16,BODNZT,1,0), BBOY_MASK, PPC, { BI, BDMA } }, +{ "bdnzta+", BBO(16,BODNZT,1,0), BBOY_MASK, PPC, { BI, BDPA } }, +{ "bdnzta", BBO(16,BODNZT,1,0), BBOY_MASK, PPC, { BI, BDA } }, +{ "bdnztla-",BBO(16,BODNZT,1,1), BBOY_MASK, PPC, { BI, BDMA } }, +{ "bdnztla+",BBO(16,BODNZT,1,1), BBOY_MASK, PPC, { BI, BDPA } }, +{ "bdnztla", BBO(16,BODNZT,1,1), BBOY_MASK, PPC, { BI, BDA } }, +{ "bdnzf-", BBO(16,BODNZF,0,0), BBOY_MASK, PPC, { BI, BDM } }, +{ "bdnzf+", BBO(16,BODNZF,0,0), BBOY_MASK, PPC, { BI, BDP } }, +{ "bdnzf", BBO(16,BODNZF,0,0), BBOY_MASK, PPC, { BI, BD } }, +{ "bdnzfl-", BBO(16,BODNZF,0,1), BBOY_MASK, PPC, { BI, BDM } }, +{ "bdnzfl+", BBO(16,BODNZF,0,1), BBOY_MASK, PPC, { BI, BDP } }, +{ "bdnzfl", BBO(16,BODNZF,0,1), BBOY_MASK, PPC, { BI, BD } }, +{ "bdnzfa-", BBO(16,BODNZF,1,0), BBOY_MASK, PPC, { BI, BDMA } }, +{ "bdnzfa+", BBO(16,BODNZF,1,0), BBOY_MASK, PPC, { BI, BDPA } }, +{ "bdnzfa", BBO(16,BODNZF,1,0), BBOY_MASK, PPC, { BI, BDA } }, +{ "bdnzfla-",BBO(16,BODNZF,1,1), BBOY_MASK, PPC, { BI, BDMA } }, +{ "bdnzfla+",BBO(16,BODNZF,1,1), BBOY_MASK, PPC, { BI, BDPA } }, +{ "bdnzfla", BBO(16,BODNZF,1,1), BBOY_MASK, PPC, { BI, BDA } }, +{ "bt-", BBO(16,BOT,0,0), BBOY_MASK, PPC, { BI, BDM } }, +{ "bt+", BBO(16,BOT,0,0), BBOY_MASK, PPC, { BI, BDP } }, +{ "bt", BBO(16,BOT,0,0), BBOY_MASK, PPC, { BI, BD } }, +{ "bbt", BBO(16,BOT,0,0), BBOY_MASK, POWER, { BI, BD } }, +{ "btl-", BBO(16,BOT,0,1), BBOY_MASK, PPC, { BI, BDM } }, +{ "btl+", BBO(16,BOT,0,1), BBOY_MASK, PPC, { BI, BDP } }, +{ "btl", BBO(16,BOT,0,1), BBOY_MASK, PPC, { BI, BD } }, +{ "bbtl", BBO(16,BOT,0,1), BBOY_MASK, POWER, { BI, BD } }, +{ "bta-", BBO(16,BOT,1,0), BBOY_MASK, PPC, { BI, BDMA } }, +{ "bta+", BBO(16,BOT,1,0), BBOY_MASK, PPC, { BI, BDPA } }, +{ "bta", BBO(16,BOT,1,0), BBOY_MASK, PPC, { BI, BDA } }, +{ "bbta", BBO(16,BOT,1,0), BBOY_MASK, POWER, { BI, BDA } }, +{ "btla-", BBO(16,BOT,1,1), BBOY_MASK, PPC, { BI, BDMA } }, +{ "btla+", BBO(16,BOT,1,1), BBOY_MASK, PPC, { BI, BDPA } }, +{ "btla", BBO(16,BOT,1,1), BBOY_MASK, PPC, { BI, BDA } }, +{ "bbtla", BBO(16,BOT,1,1), BBOY_MASK, POWER, { BI, BDA } }, +{ "bf-", BBO(16,BOF,0,0), BBOY_MASK, PPC, { BI, BDM } }, +{ "bf+", BBO(16,BOF,0,0), BBOY_MASK, PPC, { BI, BDP } }, +{ "bf", BBO(16,BOF,0,0), BBOY_MASK, PPC, { BI, BD } }, +{ "bbf", BBO(16,BOF,0,0), BBOY_MASK, POWER, { BI, BD } }, +{ "bfl-", BBO(16,BOF,0,1), BBOY_MASK, PPC, { BI, BDM } }, +{ "bfl+", BBO(16,BOF,0,1), BBOY_MASK, PPC, { BI, BDP } }, +{ "bfl", BBO(16,BOF,0,1), BBOY_MASK, PPC, { BI, BD } }, +{ "bbfl", BBO(16,BOF,0,1), BBOY_MASK, POWER, { BI, BD } }, +{ "bfa-", BBO(16,BOF,1,0), BBOY_MASK, PPC, { BI, BDMA } }, +{ "bfa+", BBO(16,BOF,1,0), BBOY_MASK, PPC, { BI, BDPA } }, +{ "bfa", BBO(16,BOF,1,0), BBOY_MASK, PPC, { BI, BDA } }, +{ "bbfa", BBO(16,BOF,1,0), BBOY_MASK, POWER, { BI, BDA } }, +{ "bfla-", BBO(16,BOF,1,1), BBOY_MASK, PPC, { BI, BDMA } }, +{ "bfla+", BBO(16,BOF,1,1), BBOY_MASK, PPC, { BI, BDPA } }, +{ "bfla", BBO(16,BOF,1,1), BBOY_MASK, PPC, { BI, BDA } }, +{ "bbfla", BBO(16,BOF,1,1), BBOY_MASK, POWER, { BI, BDA } }, +{ "bdzt-", BBO(16,BODZT,0,0), BBOY_MASK, PPC, { BI, BDM } }, +{ "bdzt+", BBO(16,BODZT,0,0), BBOY_MASK, PPC, { BI, BDP } }, +{ "bdzt", BBO(16,BODZT,0,0), BBOY_MASK, PPC, { BI, BD } }, +{ "bdztl-", BBO(16,BODZT,0,1), BBOY_MASK, PPC, { BI, BDM } }, +{ "bdztl+", BBO(16,BODZT,0,1), BBOY_MASK, PPC, { BI, BDP } }, +{ "bdztl", BBO(16,BODZT,0,1), BBOY_MASK, PPC, { BI, BD } }, +{ "bdzta-", BBO(16,BODZT,1,0), BBOY_MASK, PPC, { BI, BDMA } }, +{ "bdzta+", BBO(16,BODZT,1,0), BBOY_MASK, PPC, { BI, BDPA } }, +{ "bdzta", BBO(16,BODZT,1,0), BBOY_MASK, PPC, { BI, BDA } }, +{ "bdztla-", BBO(16,BODZT,1,1), BBOY_MASK, PPC, { BI, BDMA } }, +{ "bdztla+", BBO(16,BODZT,1,1), BBOY_MASK, PPC, { BI, BDPA } }, +{ "bdztla", BBO(16,BODZT,1,1), BBOY_MASK, PPC, { BI, BDA } }, +{ "bdzf-", BBO(16,BODZF,0,0), BBOY_MASK, PPC, { BI, BDM } }, +{ "bdzf+", BBO(16,BODZF,0,0), BBOY_MASK, PPC, { BI, BDP } }, +{ "bdzf", BBO(16,BODZF,0,0), BBOY_MASK, PPC, { BI, BD } }, +{ "bdzfl-", BBO(16,BODZF,0,1), BBOY_MASK, PPC, { BI, BDM } }, +{ "bdzfl+", BBO(16,BODZF,0,1), BBOY_MASK, PPC, { BI, BDP } }, +{ "bdzfl", BBO(16,BODZF,0,1), BBOY_MASK, PPC, { BI, BD } }, +{ "bdzfa-", BBO(16,BODZF,1,0), BBOY_MASK, PPC, { BI, BDMA } }, +{ "bdzfa+", BBO(16,BODZF,1,0), BBOY_MASK, PPC, { BI, BDPA } }, +{ "bdzfa", BBO(16,BODZF,1,0), BBOY_MASK, PPC, { BI, BDA } }, +{ "bdzfla-", BBO(16,BODZF,1,1), BBOY_MASK, PPC, { BI, BDMA } }, +{ "bdzfla+", BBO(16,BODZF,1,1), BBOY_MASK, PPC, { BI, BDPA } }, +{ "bdzfla", BBO(16,BODZF,1,1), BBOY_MASK, PPC, { BI, BDA } }, +{ "bc-", B(16,0,0), B_MASK, PPC, { BOE, BI, BDM } }, +{ "bc+", B(16,0,0), B_MASK, PPC, { BOE, BI, BDP } }, +{ "bc", B(16,0,0), B_MASK, PPC|POWER, { BO, BI, BD } }, +{ "bcl-", B(16,0,1), B_MASK, PPC, { BOE, BI, BDM } }, +{ "bcl+", B(16,0,1), B_MASK, PPC, { BOE, BI, BDP } }, +{ "bcl", B(16,0,1), B_MASK, PPC|POWER, { BO, BI, BD } }, +{ "bca-", B(16,1,0), B_MASK, PPC, { BOE, BI, BDMA } }, +{ "bca+", B(16,1,0), B_MASK, PPC, { BOE, BI, BDPA } }, +{ "bca", B(16,1,0), B_MASK, PPC|POWER, { BO, BI, BDA } }, +{ "bcla-", B(16,1,1), B_MASK, PPC, { BOE, BI, BDMA } }, +{ "bcla+", B(16,1,1), B_MASK, PPC, { BOE, BI, BDPA } }, +{ "bcla", B(16,1,1), B_MASK, PPC|POWER, { BO, BI, BDA } }, + +{ "sc", SC(17,1,0), 0xffffffff, PPC, { 0 } }, +{ "svc", SC(17,0,0), SC_MASK, POWER, { LEV, FL1, FL2 } }, +{ "svcl", SC(17,0,1), SC_MASK, POWER, { LEV, FL1, FL2 } }, +{ "svca", SC(17,1,0), SC_MASK, POWER, { SV } }, +{ "svcla", SC(17,1,1), SC_MASK, POWER, { SV } }, + +{ "b", B(18,0,0), B_MASK, PPC|POWER, { LI } }, +{ "bl", B(18,0,1), B_MASK, PPC|POWER, { LI } }, +{ "ba", B(18,1,0), B_MASK, PPC|POWER, { LIA } }, +{ "bla", B(18,1,1), B_MASK, PPC|POWER, { LIA } }, + +{ "mcrf", XL(19,0), XLBB_MASK|(3<<21)|(3<<16), PPC|POWER, { BF, BFA } }, + +{ "blr", XLO(19,BOU,16,0), XLBOBIBB_MASK, PPC, { 0 } }, +{ "br", XLO(19,BOU,16,0), XLBOBIBB_MASK, POWER, { 0 } }, +{ "blrl", XLO(19,BOU,16,1), XLBOBIBB_MASK, PPC, { 0 } }, +{ "brl", XLO(19,BOU,16,1), XLBOBIBB_MASK, POWER, { 0 } }, +{ "bdnzlr", XLO(19,BODNZ,16,0), XLBOBIBB_MASK, PPC, { 0 } }, +{ "bdnzlr-", XLO(19,BODNZ,16,0), XLBOBIBB_MASK, PPC, { 0 } }, +{ "bdnzlr+", XLO(19,BODNZP,16,0), XLBOBIBB_MASK, PPC, { 0 } }, +{ "bdnzlrl", XLO(19,BODNZ,16,1), XLBOBIBB_MASK, PPC, { 0 } }, +{ "bdnzlrl-",XLO(19,BODNZ,16,1), XLBOBIBB_MASK, PPC, { 0 } }, +{ "bdnzlrl+",XLO(19,BODNZP,16,1), XLBOBIBB_MASK, PPC, { 0 } }, +{ "bdzlr", XLO(19,BODZ,16,0), XLBOBIBB_MASK, PPC, { 0 } }, +{ "bdzlr-", XLO(19,BODZ,16,0), XLBOBIBB_MASK, PPC, { 0 } }, +{ "bdzlr+", XLO(19,BODZP,16,0), XLBOBIBB_MASK, PPC, { 0 } }, +{ "bdzlrl", XLO(19,BODZ,16,1), XLBOBIBB_MASK, PPC, { 0 } }, +{ "bdzlrl-", XLO(19,BODZ,16,1), XLBOBIBB_MASK, PPC, { 0 } }, +{ "bdzlrl+", XLO(19,BODZP,16,1), XLBOBIBB_MASK, PPC, { 0 } }, +{ "bltlr", XLOCB(19,BOT,CBLT,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bltlr-", XLOCB(19,BOT,CBLT,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bltlr+", XLOCB(19,BOTP,CBLT,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bltr", XLOCB(19,BOT,CBLT,16,0), XLBOCBBB_MASK, POWER, { CR } }, +{ "bltlrl", XLOCB(19,BOT,CBLT,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bltlrl-", XLOCB(19,BOT,CBLT,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bltlrl+", XLOCB(19,BOTP,CBLT,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bltrl", XLOCB(19,BOT,CBLT,16,1), XLBOCBBB_MASK, POWER, { CR } }, +{ "bgtlr", XLOCB(19,BOT,CBGT,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bgtlr-", XLOCB(19,BOT,CBGT,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bgtlr+", XLOCB(19,BOTP,CBGT,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bgtr", XLOCB(19,BOT,CBGT,16,0), XLBOCBBB_MASK, POWER, { CR } }, +{ "bgtlrl", XLOCB(19,BOT,CBGT,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bgtlrl-", XLOCB(19,BOT,CBGT,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bgtlrl+", XLOCB(19,BOTP,CBGT,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bgtrl", XLOCB(19,BOT,CBGT,16,1), XLBOCBBB_MASK, POWER, { CR } }, +{ "beqlr", XLOCB(19,BOT,CBEQ,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "beqlr-", XLOCB(19,BOT,CBEQ,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "beqlr+", XLOCB(19,BOTP,CBEQ,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "beqr", XLOCB(19,BOT,CBEQ,16,0), XLBOCBBB_MASK, POWER, { CR } }, +{ "beqlrl", XLOCB(19,BOT,CBEQ,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "beqlrl-", XLOCB(19,BOT,CBEQ,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "beqlrl+", XLOCB(19,BOTP,CBEQ,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "beqrl", XLOCB(19,BOT,CBEQ,16,1), XLBOCBBB_MASK, POWER, { CR } }, +{ "bsolr", XLOCB(19,BOT,CBSO,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bsolr-", XLOCB(19,BOT,CBSO,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bsolr+", XLOCB(19,BOTP,CBSO,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bsor", XLOCB(19,BOT,CBSO,16,0), XLBOCBBB_MASK, POWER, { CR } }, +{ "bsolrl", XLOCB(19,BOT,CBSO,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bsolrl-", XLOCB(19,BOT,CBSO,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bsolrl+", XLOCB(19,BOTP,CBSO,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bsorl", XLOCB(19,BOT,CBSO,16,1), XLBOCBBB_MASK, POWER, { CR } }, +{ "bunlr", XLOCB(19,BOT,CBSO,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bunlr-", XLOCB(19,BOT,CBSO,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bunlr+", XLOCB(19,BOTP,CBSO,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bunlrl", XLOCB(19,BOT,CBSO,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bunlrl-", XLOCB(19,BOT,CBSO,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bunlrl+", XLOCB(19,BOTP,CBSO,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bgelr", XLOCB(19,BOF,CBLT,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bgelr-", XLOCB(19,BOF,CBLT,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bgelr+", XLOCB(19,BOFP,CBLT,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bger", XLOCB(19,BOF,CBLT,16,0), XLBOCBBB_MASK, POWER, { CR } }, +{ "bgelrl", XLOCB(19,BOF,CBLT,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bgelrl-", XLOCB(19,BOF,CBLT,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bgelrl+", XLOCB(19,BOFP,CBLT,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bgerl", XLOCB(19,BOF,CBLT,16,1), XLBOCBBB_MASK, POWER, { CR } }, +{ "bnllr", XLOCB(19,BOF,CBLT,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnllr-", XLOCB(19,BOF,CBLT,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnllr+", XLOCB(19,BOFP,CBLT,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnlr", XLOCB(19,BOF,CBLT,16,0), XLBOCBBB_MASK, POWER, { CR } }, +{ "bnllrl", XLOCB(19,BOF,CBLT,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnllrl-", XLOCB(19,BOF,CBLT,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnllrl+", XLOCB(19,BOFP,CBLT,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnlrl", XLOCB(19,BOF,CBLT,16,1), XLBOCBBB_MASK, POWER, { CR } }, +{ "blelr", XLOCB(19,BOF,CBGT,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "blelr-", XLOCB(19,BOF,CBGT,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "blelr+", XLOCB(19,BOFP,CBGT,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bler", XLOCB(19,BOF,CBGT,16,0), XLBOCBBB_MASK, POWER, { CR } }, +{ "blelrl", XLOCB(19,BOF,CBGT,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "blelrl-", XLOCB(19,BOF,CBGT,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "blelrl+", XLOCB(19,BOFP,CBGT,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "blerl", XLOCB(19,BOF,CBGT,16,1), XLBOCBBB_MASK, POWER, { CR } }, +{ "bnglr", XLOCB(19,BOF,CBGT,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnglr-", XLOCB(19,BOF,CBGT,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnglr+", XLOCB(19,BOFP,CBGT,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bngr", XLOCB(19,BOF,CBGT,16,0), XLBOCBBB_MASK, POWER, { CR } }, +{ "bnglrl", XLOCB(19,BOF,CBGT,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnglrl-", XLOCB(19,BOF,CBGT,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnglrl+", XLOCB(19,BOFP,CBGT,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bngrl", XLOCB(19,BOF,CBGT,16,1), XLBOCBBB_MASK, POWER, { CR } }, +{ "bnelr", XLOCB(19,BOF,CBEQ,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnelr-", XLOCB(19,BOF,CBEQ,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnelr+", XLOCB(19,BOFP,CBEQ,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bner", XLOCB(19,BOF,CBEQ,16,0), XLBOCBBB_MASK, POWER, { CR } }, +{ "bnelrl", XLOCB(19,BOF,CBEQ,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnelrl-", XLOCB(19,BOF,CBEQ,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnelrl+", XLOCB(19,BOFP,CBEQ,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnerl", XLOCB(19,BOF,CBEQ,16,1), XLBOCBBB_MASK, POWER, { CR } }, +{ "bnslr", XLOCB(19,BOF,CBSO,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnslr-", XLOCB(19,BOF,CBSO,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnslr+", XLOCB(19,BOFP,CBSO,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnsr", XLOCB(19,BOF,CBSO,16,0), XLBOCBBB_MASK, POWER, { CR } }, +{ "bnslrl", XLOCB(19,BOF,CBSO,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnslrl-", XLOCB(19,BOF,CBSO,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnslrl+", XLOCB(19,BOFP,CBSO,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnsrl", XLOCB(19,BOF,CBSO,16,1), XLBOCBBB_MASK, POWER, { CR } }, +{ "bnulr", XLOCB(19,BOF,CBSO,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnulr-", XLOCB(19,BOF,CBSO,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnulr+", XLOCB(19,BOFP,CBSO,16,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnulrl", XLOCB(19,BOF,CBSO,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnulrl-", XLOCB(19,BOF,CBSO,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnulrl+", XLOCB(19,BOFP,CBSO,16,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "btlr", XLO(19,BOT,16,0), XLBOBB_MASK, PPC, { BI } }, +{ "btlr-", XLO(19,BOT,16,0), XLBOBB_MASK, PPC, { BI } }, +{ "btlr+", XLO(19,BOTP,16,0), XLBOBB_MASK, PPC, { BI } }, +{ "bbtr", XLO(19,BOT,16,0), XLBOBB_MASK, POWER, { BI } }, +{ "btlrl", XLO(19,BOT,16,1), XLBOBB_MASK, PPC, { BI } }, +{ "btlrl-", XLO(19,BOT,16,1), XLBOBB_MASK, PPC, { BI } }, +{ "btlrl+", XLO(19,BOTP,16,1), XLBOBB_MASK, PPC, { BI } }, +{ "bbtrl", XLO(19,BOT,16,1), XLBOBB_MASK, POWER, { BI } }, +{ "bflr", XLO(19,BOF,16,0), XLBOBB_MASK, PPC, { BI } }, +{ "bflr-", XLO(19,BOF,16,0), XLBOBB_MASK, PPC, { BI } }, +{ "bflr+", XLO(19,BOFP,16,0), XLBOBB_MASK, PPC, { BI } }, +{ "bbfr", XLO(19,BOF,16,0), XLBOBB_MASK, POWER, { BI } }, +{ "bflrl", XLO(19,BOF,16,1), XLBOBB_MASK, PPC, { BI } }, +{ "bflrl-", XLO(19,BOF,16,1), XLBOBB_MASK, PPC, { BI } }, +{ "bflrl+", XLO(19,BOFP,16,1), XLBOBB_MASK, PPC, { BI } }, +{ "bbfrl", XLO(19,BOF,16,1), XLBOBB_MASK, POWER, { BI } }, +{ "bdnztlr", XLO(19,BODNZT,16,0), XLBOBB_MASK, PPC, { BI } }, +{ "bdnztlr-",XLO(19,BODNZT,16,0), XLBOBB_MASK, PPC, { BI } }, +{ "bdnztlr+",XLO(19,BODNZTP,16,0), XLBOBB_MASK, PPC, { BI } }, +{ "bdnztlrl",XLO(19,BODNZT,16,1), XLBOBB_MASK, PPC, { BI } }, +{ "bdnztlrl-",XLO(19,BODNZT,16,1), XLBOBB_MASK, PPC, { BI } }, +{ "bdnztlrl+",XLO(19,BODNZTP,16,1), XLBOBB_MASK, PPC, { BI } }, +{ "bdnzflr", XLO(19,BODNZF,16,0), XLBOBB_MASK, PPC, { BI } }, +{ "bdnzflr-",XLO(19,BODNZF,16,0), XLBOBB_MASK, PPC, { BI } }, +{ "bdnzflr+",XLO(19,BODNZFP,16,0), XLBOBB_MASK, PPC, { BI } }, +{ "bdnzflrl",XLO(19,BODNZF,16,1), XLBOBB_MASK, PPC, { BI } }, +{ "bdnzflrl-",XLO(19,BODNZF,16,1), XLBOBB_MASK, PPC, { BI } }, +{ "bdnzflrl+",XLO(19,BODNZFP,16,1), XLBOBB_MASK, PPC, { BI } }, +{ "bdztlr", XLO(19,BODZT,16,0), XLBOBB_MASK, PPC, { BI } }, +{ "bdztlr-", XLO(19,BODZT,16,0), XLBOBB_MASK, PPC, { BI } }, +{ "bdztlr+", XLO(19,BODZTP,16,0), XLBOBB_MASK, PPC, { BI } }, +{ "bdztlrl", XLO(19,BODZT,16,1), XLBOBB_MASK, PPC, { BI } }, +{ "bdztlrl-",XLO(19,BODZT,16,1), XLBOBB_MASK, PPC, { BI } }, +{ "bdztlrl+",XLO(19,BODZTP,16,1), XLBOBB_MASK, PPC, { BI } }, +{ "bdzflr", XLO(19,BODZF,16,0), XLBOBB_MASK, PPC, { BI } }, +{ "bdzflr-", XLO(19,BODZF,16,0), XLBOBB_MASK, PPC, { BI } }, +{ "bdzflr+", XLO(19,BODZFP,16,0), XLBOBB_MASK, PPC, { BI } }, +{ "bdzflrl", XLO(19,BODZF,16,1), XLBOBB_MASK, PPC, { BI } }, +{ "bdzflrl-",XLO(19,BODZF,16,1), XLBOBB_MASK, PPC, { BI } }, +{ "bdzflrl+",XLO(19,BODZFP,16,1), XLBOBB_MASK, PPC, { BI } }, +{ "bclr", XLLK(19,16,0), XLYBB_MASK, PPC, { BO, BI } }, +{ "bclrl", XLLK(19,16,1), XLYBB_MASK, PPC, { BO, BI } }, +{ "bclr+", XLYLK(19,16,1,0), XLYBB_MASK, PPC, { BOE, BI } }, +{ "bclrl+", XLYLK(19,16,1,1), XLYBB_MASK, PPC, { BOE, BI } }, +{ "bclr-", XLYLK(19,16,0,0), XLYBB_MASK, PPC, { BOE, BI } }, +{ "bclrl-", XLYLK(19,16,0,1), XLYBB_MASK, PPC, { BOE, BI } }, +{ "bcr", XLLK(19,16,0), XLBB_MASK, POWER, { BO, BI } }, +{ "bcrl", XLLK(19,16,1), XLBB_MASK, POWER, { BO, BI } }, + +{ "crnot", XL(19,33), XL_MASK, PPC, { BT, BA, BBA } }, +{ "crnor", XL(19,33), XL_MASK, PPC|POWER, { BT, BA, BB } }, + +{ "rfi", XL(19,50), 0xffffffff, PPC|POWER, { 0 } }, +{ "rfci", XL(19,51), 0xffffffff, PPC, { 0 } }, + +{ "rfsvc", XL(19,82), 0xffffffff, POWER, { 0 } }, + +{ "crandc", XL(19,129), XL_MASK, PPC|POWER, { BT, BA, BB } }, + +{ "isync", XL(19,150), 0xffffffff, PPC, { 0 } }, +{ "ics", XL(19,150), 0xffffffff, POWER, { 0 } }, + +{ "crclr", XL(19,193), XL_MASK, PPC, { BT, BAT, BBA } }, +{ "crxor", XL(19,193), XL_MASK, PPC|POWER, { BT, BA, BB } }, + +{ "crnand", XL(19,225), XL_MASK, PPC|POWER, { BT, BA, BB } }, + +{ "crand", XL(19,257), XL_MASK, PPC|POWER, { BT, BA, BB } }, + +{ "crset", XL(19,289), XL_MASK, PPC, { BT, BAT, BBA } }, +{ "creqv", XL(19,289), XL_MASK, PPC|POWER, { BT, BA, BB } }, + +{ "crorc", XL(19,417), XL_MASK, PPC|POWER, { BT, BA, BB } }, + +{ "crmove", XL(19,449), XL_MASK, PPC, { BT, BA, BBA } }, +{ "cror", XL(19,449), XL_MASK, PPC|POWER, { BT, BA, BB } }, + +{ "bctr", XLO(19,BOU,528,0), XLBOBIBB_MASK, PPC|POWER, { 0 } }, +{ "bctrl", XLO(19,BOU,528,1), XLBOBIBB_MASK, PPC|POWER, { 0 } }, +{ "bltctr", XLOCB(19,BOT,CBLT,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bltctr-", XLOCB(19,BOT,CBLT,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bltctr+", XLOCB(19,BOTP,CBLT,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bltctrl", XLOCB(19,BOT,CBLT,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bltctrl-",XLOCB(19,BOT,CBLT,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bltctrl+",XLOCB(19,BOTP,CBLT,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bgtctr", XLOCB(19,BOT,CBGT,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bgtctr-", XLOCB(19,BOT,CBGT,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bgtctr+", XLOCB(19,BOTP,CBGT,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bgtctrl", XLOCB(19,BOT,CBGT,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bgtctrl-",XLOCB(19,BOT,CBGT,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bgtctrl+",XLOCB(19,BOTP,CBGT,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "beqctr", XLOCB(19,BOT,CBEQ,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "beqctr-", XLOCB(19,BOT,CBEQ,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "beqctr+", XLOCB(19,BOTP,CBEQ,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "beqctrl", XLOCB(19,BOT,CBEQ,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "beqctrl-",XLOCB(19,BOT,CBEQ,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "beqctrl+",XLOCB(19,BOTP,CBEQ,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bsoctr", XLOCB(19,BOT,CBSO,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bsoctr-", XLOCB(19,BOT,CBSO,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bsoctr+", XLOCB(19,BOTP,CBSO,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bsoctrl", XLOCB(19,BOT,CBSO,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bsoctrl-",XLOCB(19,BOT,CBSO,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bsoctrl+",XLOCB(19,BOTP,CBSO,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bunctr", XLOCB(19,BOT,CBSO,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bunctr-", XLOCB(19,BOT,CBSO,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bunctr+", XLOCB(19,BOTP,CBSO,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bunctrl", XLOCB(19,BOT,CBSO,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bunctrl-",XLOCB(19,BOT,CBSO,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bunctrl+",XLOCB(19,BOTP,CBSO,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bgectr", XLOCB(19,BOF,CBLT,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bgectr-", XLOCB(19,BOF,CBLT,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bgectr+", XLOCB(19,BOFP,CBLT,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bgectrl", XLOCB(19,BOF,CBLT,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bgectrl-",XLOCB(19,BOF,CBLT,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bgectrl+",XLOCB(19,BOFP,CBLT,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnlctr", XLOCB(19,BOF,CBLT,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnlctr-", XLOCB(19,BOF,CBLT,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnlctr+", XLOCB(19,BOFP,CBLT,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnlctrl", XLOCB(19,BOF,CBLT,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnlctrl-",XLOCB(19,BOF,CBLT,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnlctrl+",XLOCB(19,BOFP,CBLT,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "blectr", XLOCB(19,BOF,CBGT,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "blectr-", XLOCB(19,BOF,CBGT,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "blectr+", XLOCB(19,BOFP,CBGT,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "blectrl", XLOCB(19,BOF,CBGT,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "blectrl-",XLOCB(19,BOF,CBGT,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "blectrl+",XLOCB(19,BOFP,CBGT,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bngctr", XLOCB(19,BOF,CBGT,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bngctr-", XLOCB(19,BOF,CBGT,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bngctr+", XLOCB(19,BOFP,CBGT,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bngctrl", XLOCB(19,BOF,CBGT,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bngctrl-",XLOCB(19,BOF,CBGT,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bngctrl+",XLOCB(19,BOFP,CBGT,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnectr", XLOCB(19,BOF,CBEQ,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnectr-", XLOCB(19,BOF,CBEQ,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnectr+", XLOCB(19,BOFP,CBEQ,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnectrl", XLOCB(19,BOF,CBEQ,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnectrl-",XLOCB(19,BOF,CBEQ,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnectrl+",XLOCB(19,BOFP,CBEQ,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnsctr", XLOCB(19,BOF,CBSO,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnsctr-", XLOCB(19,BOF,CBSO,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnsctr+", XLOCB(19,BOFP,CBSO,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnsctrl", XLOCB(19,BOF,CBSO,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnsctrl-",XLOCB(19,BOF,CBSO,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnsctrl+",XLOCB(19,BOFP,CBSO,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnuctr", XLOCB(19,BOF,CBSO,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnuctr-", XLOCB(19,BOF,CBSO,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnuctr+", XLOCB(19,BOFP,CBSO,528,0), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnuctrl", XLOCB(19,BOF,CBSO,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnuctrl-",XLOCB(19,BOF,CBSO,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "bnuctrl+",XLOCB(19,BOFP,CBSO,528,1), XLBOCBBB_MASK, PPC, { CR } }, +{ "btctr", XLO(19,BOT,528,0), XLBOBB_MASK, PPC, { BI } }, +{ "btctr-", XLO(19,BOT,528,0), XLBOBB_MASK, PPC, { BI } }, +{ "btctr+", XLO(19,BOTP,528,0), XLBOBB_MASK, PPC, { BI } }, +{ "btctrl", XLO(19,BOT,528,1), XLBOBB_MASK, PPC, { BI } }, +{ "btctrl-", XLO(19,BOT,528,1), XLBOBB_MASK, PPC, { BI } }, +{ "btctrl+", XLO(19,BOTP,528,1), XLBOBB_MASK, PPC, { BI } }, +{ "bfctr", XLO(19,BOF,528,0), XLBOBB_MASK, PPC, { BI } }, +{ "bfctr-", XLO(19,BOF,528,0), XLBOBB_MASK, PPC, { BI } }, +{ "bfctr+", XLO(19,BOFP,528,0), XLBOBB_MASK, PPC, { BI } }, +{ "bfctrl", XLO(19,BOF,528,1), XLBOBB_MASK, PPC, { BI } }, +{ "bfctrl-", XLO(19,BOF,528,1), XLBOBB_MASK, PPC, { BI } }, +{ "bfctrl+", XLO(19,BOFP,528,1), XLBOBB_MASK, PPC, { BI } }, +{ "bcctr", XLLK(19,528,0), XLYBB_MASK, PPC, { BO, BI } }, +{ "bcctr-", XLYLK(19,528,0,0), XLYBB_MASK, PPC, { BOE, BI } }, +{ "bcctr+", XLYLK(19,528,1,0), XLYBB_MASK, PPC, { BOE, BI } }, +{ "bcctrl", XLLK(19,528,1), XLYBB_MASK, PPC, { BO, BI } }, +{ "bcctrl-", XLYLK(19,528,0,1), XLYBB_MASK, PPC, { BOE, BI } }, +{ "bcctrl+", XLYLK(19,528,1,1), XLYBB_MASK, PPC, { BOE, BI } }, +{ "bcc", XLLK(19,528,0), XLBB_MASK, POWER, { BO, BI } }, +{ "bccl", XLLK(19,528,1), XLBB_MASK, POWER, { BO, BI } }, + +{ "rlwimi", M(20,0), M_MASK, PPC, { RA,RS,SH,MBE,ME } }, +{ "rlimi", M(20,0), M_MASK, POWER, { RA,RS,SH,MBE,ME } }, + +{ "rlwimi.", M(20,1), M_MASK, PPC, { RA,RS,SH,MBE,ME } }, +{ "rlimi.", M(20,1), M_MASK, POWER, { RA,RS,SH,MBE,ME } }, + +{ "rotlwi", MME(21,31,0), MMBME_MASK, PPC, { RA, RS, SH } }, +{ "clrlwi", MME(21,31,0), MSHME_MASK, PPC, { RA, RS, MB } }, +{ "rlwinm", M(21,0), M_MASK, PPC, { RA,RS,SH,MBE,ME } }, +{ "rlinm", M(21,0), M_MASK, POWER, { RA,RS,SH,MBE,ME } }, +{ "rotlwi.", MME(21,31,1), MMBME_MASK, PPC, { RA,RS,SH } }, +{ "clrlwi.", MME(21,31,1), MSHME_MASK, PPC, { RA, RS, MB } }, +{ "rlwinm.", M(21,1), M_MASK, PPC, { RA,RS,SH,MBE,ME } }, +{ "rlinm.", M(21,1), M_MASK, POWER, { RA,RS,SH,MBE,ME } }, + +{ "rlmi", M(22,0), M_MASK, POWER|M601, { RA,RS,RB,MBE,ME } }, +{ "rlmi.", M(22,1), M_MASK, POWER|M601, { RA,RS,RB,MBE,ME } }, + +{ "rotlw", MME(23,31,0), MMBME_MASK, PPC, { RA, RS, RB } }, +{ "rlwnm", M(23,0), M_MASK, PPC, { RA,RS,RB,MBE,ME } }, +{ "rlnm", M(23,0), M_MASK, POWER, { RA,RS,RB,MBE,ME } }, +{ "rotlw.", MME(23,31,1), MMBME_MASK, PPC, { RA, RS, RB } }, +{ "rlwnm.", M(23,1), M_MASK, PPC, { RA,RS,RB,MBE,ME } }, +{ "rlnm.", M(23,1), M_MASK, POWER, { RA,RS,RB,MBE,ME } }, + +{ "nop", OP(24), 0xffffffff, PPC, { 0 } }, +{ "ori", OP(24), OP_MASK, PPC, { RA, RS, UI } }, +{ "oril", OP(24), OP_MASK, POWER, { RA, RS, UI } }, + +{ "oris", OP(25), OP_MASK, PPC, { RA, RS, UI } }, +{ "oriu", OP(25), OP_MASK, POWER, { RA, RS, UI } }, + +{ "xori", OP(26), OP_MASK, PPC, { RA, RS, UI } }, +{ "xoril", OP(26), OP_MASK, POWER, { RA, RS, UI } }, + +{ "xoris", OP(27), OP_MASK, PPC, { RA, RS, UI } }, +{ "xoriu", OP(27), OP_MASK, POWER, { RA, RS, UI } }, + +{ "andi.", OP(28), OP_MASK, PPC, { RA, RS, UI } }, +{ "andil.", OP(28), OP_MASK, POWER, { RA, RS, UI } }, + +{ "andis.", OP(29), OP_MASK, PPC, { RA, RS, UI } }, +{ "andiu.", OP(29), OP_MASK, POWER, { RA, RS, UI } }, + +{ "rotldi", MD(30,0,0), MDMB_MASK, PPC|B64, { RA, RS, SH6 } }, +{ "clrldi", MD(30,0,0), MDSH_MASK, PPC|B64, { RA, RS, MB6 } }, +{ "rldicl", MD(30,0,0), MD_MASK, PPC|B64, { RA, RS, SH6, MB6 } }, +{ "rotldi.", MD(30,0,1), MDMB_MASK, PPC|B64, { RA, RS, SH6 } }, +{ "clrldi.", MD(30,0,1), MDSH_MASK, PPC|B64, { RA, RS, MB6 } }, +{ "rldicl.", MD(30,0,1), MD_MASK, PPC|B64, { RA, RS, SH6, MB6 } }, + +{ "rldicr", MD(30,1,0), MD_MASK, PPC|B64, { RA, RS, SH6, ME6 } }, +{ "rldicr.", MD(30,1,1), MD_MASK, PPC|B64, { RA, RS, SH6, ME6 } }, + +{ "rldic", MD(30,2,0), MD_MASK, PPC|B64, { RA, RS, SH6, MB6 } }, +{ "rldic.", MD(30,2,1), MD_MASK, PPC|B64, { RA, RS, SH6, MB6 } }, + +{ "rldimi", MD(30,3,0), MD_MASK, PPC|B64, { RA, RS, SH6, MB6 } }, +{ "rldimi.", MD(30,3,1), MD_MASK, PPC|B64, { RA, RS, SH6, MB6 } }, + +{ "rotld", MDS(30,8,0), MDSMB_MASK, PPC|B64, { RA, RS, RB } }, +{ "rldcl", MDS(30,8,0), MDS_MASK, PPC|B64, { RA, RS, RB, MB6 } }, +{ "rotld.", MDS(30,8,1), MDSMB_MASK, PPC|B64, { RA, RS, RB } }, +{ "rldcl.", MDS(30,8,1), MDS_MASK, PPC|B64, { RA, RS, RB, MB6 } }, + +{ "rldcr", MDS(30,9,0), MDS_MASK, PPC|B64, { RA, RS, RB, ME6 } }, +{ "rldcr.", MDS(30,9,1), MDS_MASK, PPC|B64, { RA, RS, RB, ME6 } }, + +{ "cmpw", XCMPL(31,0,0), XCMPL_MASK, PPC, { OBF, RA, RB } }, +{ "cmpd", XCMPL(31,0,1), XCMPL_MASK, PPC|B64, { OBF, RA, RB } }, +{ "cmp", X(31,0), XCMP_MASK, PPC, { BF, L, RA, RB } }, +{ "cmp", X(31,0), XCMPL_MASK, POWER, { BF, RA, RB } }, + +{ "twlgt", XTO(31,4,TOLGT), XTO_MASK, PPC, { RA, RB } }, +{ "tlgt", XTO(31,4,TOLGT), XTO_MASK, POWER, { RA, RB } }, +{ "twllt", XTO(31,4,TOLLT), XTO_MASK, PPC, { RA, RB } }, +{ "tllt", XTO(31,4,TOLLT), XTO_MASK, POWER, { RA, RB } }, +{ "tweq", XTO(31,4,TOEQ), XTO_MASK, PPC, { RA, RB } }, +{ "teq", XTO(31,4,TOEQ), XTO_MASK, POWER, { RA, RB } }, +{ "twlge", XTO(31,4,TOLGE), XTO_MASK, PPC, { RA, RB } }, +{ "tlge", XTO(31,4,TOLGE), XTO_MASK, POWER, { RA, RB } }, +{ "twlnl", XTO(31,4,TOLNL), XTO_MASK, PPC, { RA, RB } }, +{ "tlnl", XTO(31,4,TOLNL), XTO_MASK, POWER, { RA, RB } }, +{ "twlle", XTO(31,4,TOLLE), XTO_MASK, PPC, { RA, RB } }, +{ "tlle", XTO(31,4,TOLLE), XTO_MASK, POWER, { RA, RB } }, +{ "twlng", XTO(31,4,TOLNG), XTO_MASK, PPC, { RA, RB } }, +{ "tlng", XTO(31,4,TOLNG), XTO_MASK, POWER, { RA, RB } }, +{ "twgt", XTO(31,4,TOGT), XTO_MASK, PPC, { RA, RB } }, +{ "tgt", XTO(31,4,TOGT), XTO_MASK, POWER, { RA, RB } }, +{ "twge", XTO(31,4,TOGE), XTO_MASK, PPC, { RA, RB } }, +{ "tge", XTO(31,4,TOGE), XTO_MASK, POWER, { RA, RB } }, +{ "twnl", XTO(31,4,TONL), XTO_MASK, PPC, { RA, RB } }, +{ "tnl", XTO(31,4,TONL), XTO_MASK, POWER, { RA, RB } }, +{ "twlt", XTO(31,4,TOLT), XTO_MASK, PPC, { RA, RB } }, +{ "tlt", XTO(31,4,TOLT), XTO_MASK, POWER, { RA, RB } }, +{ "twle", XTO(31,4,TOLE), XTO_MASK, PPC, { RA, RB } }, +{ "tle", XTO(31,4,TOLE), XTO_MASK, POWER, { RA, RB } }, +{ "twng", XTO(31,4,TONG), XTO_MASK, PPC, { RA, RB } }, +{ "tng", XTO(31,4,TONG), XTO_MASK, POWER, { RA, RB } }, +{ "twne", XTO(31,4,TONE), XTO_MASK, PPC, { RA, RB } }, +{ "tne", XTO(31,4,TONE), XTO_MASK, POWER, { RA, RB } }, +{ "trap", XTO(31,4,TOU), 0xffffffff, PPC, { 0 } }, +{ "tw", X(31,4), X_MASK, PPC, { TO, RA, RB } }, +{ "t", X(31,4), X_MASK, POWER, { TO, RA, RB } }, + +{ "subfc", XO(31,8,0,0), XO_MASK, PPC, { RT, RA, RB } }, +{ "sf", XO(31,8,0,0), XO_MASK, POWER, { RT, RA, RB } }, +{ "subc", XO(31,8,0,0), XO_MASK, PPC, { RT, RB, RA } }, +{ "subfc.", XO(31,8,0,1), XO_MASK, PPC, { RT, RA, RB } }, +{ "sf.", XO(31,8,0,1), XO_MASK, POWER, { RT, RA, RB } }, +{ "subc.", XO(31,8,0,1), XO_MASK, PPC, { RT, RB, RA } }, +{ "subfco", XO(31,8,1,0), XO_MASK, PPC, { RT, RA, RB } }, +{ "sfo", XO(31,8,1,0), XO_MASK, POWER, { RT, RA, RB } }, +{ "subco", XO(31,8,1,0), XO_MASK, PPC, { RT, RB, RA } }, +{ "subfco.", XO(31,8,1,1), XO_MASK, PPC, { RT, RA, RB } }, +{ "sfo.", XO(31,8,1,1), XO_MASK, POWER, { RT, RA, RB } }, +{ "subco.", XO(31,8,1,1), XO_MASK, PPC, { RT, RB, RA } }, + +{ "mulhdu", XO(31,9,0,0), XO_MASK, PPC|B64, { RT, RA, RB } }, +{ "mulhdu.", XO(31,9,0,1), XO_MASK, PPC|B64, { RT, RA, RB } }, + +{ "addc", XO(31,10,0,0), XO_MASK, PPC, { RT, RA, RB } }, +{ "a", XO(31,10,0,0), XO_MASK, POWER, { RT, RA, RB } }, +{ "addc.", XO(31,10,0,1), XO_MASK, PPC, { RT, RA, RB } }, +{ "a.", XO(31,10,0,1), XO_MASK, POWER, { RT, RA, RB } }, +{ "addco", XO(31,10,1,0), XO_MASK, PPC, { RT, RA, RB } }, +{ "ao", XO(31,10,1,0), XO_MASK, POWER, { RT, RA, RB } }, +{ "addco.", XO(31,10,1,1), XO_MASK, PPC, { RT, RA, RB } }, +{ "ao.", XO(31,10,1,1), XO_MASK, POWER, { RT, RA, RB } }, + +{ "mulhwu", XO(31,11,0,0), XO_MASK, PPC, { RT, RA, RB } }, +{ "mulhwu.", XO(31,11,0,1), XO_MASK, PPC, { RT, RA, RB } }, + +{ "mfcr", X(31,19), XRARB_MASK, POWER|PPC, { RT } }, + +{ "lwarx", X(31,20), X_MASK, PPC, { RT, RA, RB } }, + +{ "ldx", X(31,21), X_MASK, PPC|B64, { RT, RA, RB } }, + +{ "lwzx", X(31,23), X_MASK, PPC, { RT, RA, RB } }, +{ "lx", X(31,23), X_MASK, POWER, { RT, RA, RB } }, + +{ "slw", XRC(31,24,0), X_MASK, PPC, { RA, RS, RB } }, +{ "sl", XRC(31,24,0), X_MASK, POWER, { RA, RS, RB } }, +{ "slw.", XRC(31,24,1), X_MASK, PPC, { RA, RS, RB } }, +{ "sl.", XRC(31,24,1), X_MASK, POWER, { RA, RS, RB } }, + +{ "cntlzw", XRC(31,26,0), XRB_MASK, PPC, { RA, RS } }, +{ "cntlz", XRC(31,26,0), XRB_MASK, POWER, { RA, RS } }, +{ "cntlzw.", XRC(31,26,1), XRB_MASK, PPC, { RA, RS } }, +{ "cntlz.", XRC(31,26,1), XRB_MASK, POWER, { RA, RS } }, + +{ "sld", XRC(31,27,0), X_MASK, PPC|B64, { RA, RS, RB } }, +{ "sld.", XRC(31,27,1), X_MASK, PPC|B64, { RA, RS, RB } }, + +{ "and", XRC(31,28,0), X_MASK, PPC|POWER, { RA, RS, RB } }, +{ "and.", XRC(31,28,1), X_MASK, PPC|POWER, { RA, RS, RB } }, + +{ "maskg", XRC(31,29,0), X_MASK, POWER|M601, { RA, RS, RB } }, +{ "maskg.", XRC(31,29,1), X_MASK, POWER|M601, { RA, RS, RB } }, + +{ "cmplw", XCMPL(31,32,0), XCMPL_MASK, PPC, { OBF, RA, RB } }, +{ "cmpld", XCMPL(31,32,1), XCMPL_MASK, PPC|B64, { OBF, RA, RB } }, +{ "cmpl", X(31,32), XCMP_MASK, PPC, { BF, L, RA, RB } }, +{ "cmpl", X(31,32), XCMPL_MASK, POWER, { BF, RA, RB } }, + +{ "subf", XO(31,40,0,0), XO_MASK, PPC, { RT, RA, RB } }, +{ "sub", XO(31,40,0,0), XO_MASK, PPC, { RT, RB, RA } }, +{ "subf.", XO(31,40,0,1), XO_MASK, PPC, { RT, RA, RB } }, +{ "sub.", XO(31,40,0,1), XO_MASK, PPC, { RT, RB, RA } }, +{ "subfo", XO(31,40,1,0), XO_MASK, PPC, { RT, RA, RB } }, +{ "subo", XO(31,40,1,0), XO_MASK, PPC, { RT, RB, RA } }, +{ "subfo.", XO(31,40,1,1), XO_MASK, PPC, { RT, RA, RB } }, +{ "subo.", XO(31,40,1,1), XO_MASK, PPC, { RT, RB, RA } }, + +{ "ldux", X(31,53), X_MASK, PPC|B64, { RT, RAL, RB } }, + +{ "dcbst", X(31,54), XRT_MASK, PPC, { RA, RB } }, + +{ "lwzux", X(31,55), X_MASK, PPC, { RT, RAL, RB } }, +{ "lux", X(31,55), X_MASK, POWER, { RT, RA, RB } }, + +{ "cntlzd", XRC(31,58,0), XRB_MASK, PPC|B64, { RA, RS } }, +{ "cntlzd.", XRC(31,58,1), XRB_MASK, PPC|B64, { RA, RS } }, + +{ "andc", XRC(31,60,0), X_MASK, PPC|POWER, { RA, RS, RB } }, +{ "andc.", XRC(31,60,1), X_MASK, PPC|POWER, { RA, RS, RB } }, + +{ "tdlgt", XTO(31,68,TOLGT), XTO_MASK, PPC|B64, { RA, RB } }, +{ "tdllt", XTO(31,68,TOLLT), XTO_MASK, PPC|B64, { RA, RB } }, +{ "tdeq", XTO(31,68,TOEQ), XTO_MASK, PPC|B64, { RA, RB } }, +{ "tdlge", XTO(31,68,TOLGE), XTO_MASK, PPC|B64, { RA, RB } }, +{ "tdlnl", XTO(31,68,TOLNL), XTO_MASK, PPC|B64, { RA, RB } }, +{ "tdlle", XTO(31,68,TOLLE), XTO_MASK, PPC|B64, { RA, RB } }, +{ "tdlng", XTO(31,68,TOLNG), XTO_MASK, PPC|B64, { RA, RB } }, +{ "tdgt", XTO(31,68,TOGT), XTO_MASK, PPC|B64, { RA, RB } }, +{ "tdge", XTO(31,68,TOGE), XTO_MASK, PPC|B64, { RA, RB } }, +{ "tdnl", XTO(31,68,TONL), XTO_MASK, PPC|B64, { RA, RB } }, +{ "tdlt", XTO(31,68,TOLT), XTO_MASK, PPC|B64, { RA, RB } }, +{ "tdle", XTO(31,68,TOLE), XTO_MASK, PPC|B64, { RA, RB } }, +{ "tdng", XTO(31,68,TONG), XTO_MASK, PPC|B64, { RA, RB } }, +{ "tdne", XTO(31,68,TONE), XTO_MASK, PPC|B64, { RA, RB } }, +{ "td", X(31,68), X_MASK, PPC|B64, { TO, RA, RB } }, + +{ "mulhd", XO(31,73,0,0), XO_MASK, PPC|B64, { RT, RA, RB } }, +{ "mulhd.", XO(31,73,0,1), XO_MASK, PPC|B64, { RT, RA, RB } }, + +{ "mulhw", XO(31,75,0,0), XO_MASK, PPC, { RT, RA, RB } }, +{ "mulhw.", XO(31,75,0,1), XO_MASK, PPC, { RT, RA, RB } }, + +{ "mfmsr", X(31,83), XRARB_MASK, PPC|POWER, { RT } }, + +{ "ldarx", X(31,84), X_MASK, PPC|B64, { RT, RA, RB } }, + +{ "dcbf", X(31,86), XRT_MASK, PPC, { RA, RB } }, + +{ "lbzx", X(31,87), X_MASK, PPC|POWER, { RT, RA, RB } }, + +{ "neg", XO(31,104,0,0), XORB_MASK, PPC|POWER, { RT, RA } }, +{ "neg.", XO(31,104,0,1), XORB_MASK, PPC|POWER, { RT, RA } }, +{ "nego", XO(31,104,1,0), XORB_MASK, PPC|POWER, { RT, RA } }, +{ "nego.", XO(31,104,1,1), XORB_MASK, PPC|POWER, { RT, RA } }, + +{ "mul", XO(31,107,0,0), XO_MASK, POWER|M601, { RT, RA, RB } }, +{ "mul.", XO(31,107,0,1), XO_MASK, POWER|M601, { RT, RA, RB } }, +{ "mulo", XO(31,107,1,0), XO_MASK, POWER|M601, { RT, RA, RB } }, +{ "mulo.", XO(31,107,1,1), XO_MASK, POWER|M601, { RT, RA, RB } }, + +{ "clf", X(31,118), XRB_MASK, POWER, { RT, RA } }, + +{ "lbzux", X(31,119), X_MASK, PPC|POWER, { RT, RAL, RB } }, + +{ "not", XRC(31,124,0), X_MASK, PPC|POWER, { RA, RS, RBS } }, +{ "nor", XRC(31,124,0), X_MASK, PPC|POWER, { RA, RS, RB } }, +{ "not.", XRC(31,124,1), X_MASK, PPC|POWER, { RA, RS, RBS } }, +{ "nor.", XRC(31,124,1), X_MASK, PPC|POWER, { RA, RS, RB } }, + +{ "subfe", XO(31,136,0,0), XO_MASK, PPC, { RT, RA, RB } }, +{ "sfe", XO(31,136,0,0), XO_MASK, POWER, { RT, RA, RB } }, +{ "subfe.", XO(31,136,0,1), XO_MASK, PPC, { RT, RA, RB } }, +{ "sfe.", XO(31,136,0,1), XO_MASK, POWER, { RT, RA, RB } }, +{ "subfeo", XO(31,136,1,0), XO_MASK, PPC, { RT, RA, RB } }, +{ "sfeo", XO(31,136,1,0), XO_MASK, POWER, { RT, RA, RB } }, +{ "subfeo.", XO(31,136,1,1), XO_MASK, PPC, { RT, RA, RB } }, +{ "sfeo.", XO(31,136,1,1), XO_MASK, POWER, { RT, RA, RB } }, + +{ "adde", XO(31,138,0,0), XO_MASK, PPC, { RT, RA, RB } }, +{ "ae", XO(31,138,0,0), XO_MASK, POWER, { RT, RA, RB } }, +{ "adde.", XO(31,138,0,1), XO_MASK, PPC, { RT, RA, RB } }, +{ "ae.", XO(31,138,0,1), XO_MASK, POWER, { RT, RA, RB } }, +{ "addeo", XO(31,138,1,0), XO_MASK, PPC, { RT, RA, RB } }, +{ "aeo", XO(31,138,1,0), XO_MASK, POWER, { RT, RA, RB } }, +{ "addeo.", XO(31,138,1,1), XO_MASK, PPC, { RT, RA, RB } }, +{ "aeo.", XO(31,138,1,1), XO_MASK, POWER, { RT, RA, RB } }, + +{ "mtcr", XFXM(31,144,0xff), XFXFXM_MASK|FXM_MASK, PPC|POWER, { RS }}, +{ "mtcrf", X(31,144), XFXFXM_MASK, PPC|POWER, { FXM, RS } }, + +{ "mtmsr", X(31,146), XRARB_MASK, PPC|POWER, { RS } }, +{ "mtmsrd", X(31,178), XRARB_MASK, PPC|POWER, { RS } }, + +{ "stdx", X(31,149), X_MASK, PPC|B64, { RS, RA, RB } }, + +{ "stwcx.", XRC(31,150,1), X_MASK, PPC, { RS, RA, RB } }, + +{ "stwx", X(31,151), X_MASK, PPC, { RS, RA, RB } }, +{ "stx", X(31,151), X_MASK, POWER, { RS, RA, RB } }, + +{ "slq", XRC(31,152,0), X_MASK, POWER|M601, { RA, RS, RB } }, +{ "slq.", XRC(31,152,1), X_MASK, POWER|M601, { RA, RS, RB } }, + +{ "sle", XRC(31,153,0), X_MASK, POWER|M601, { RA, RS, RB } }, +{ "sle.", XRC(31,153,1), X_MASK, POWER|M601, { RA, RS, RB } }, + +{ "stdux", X(31,181), X_MASK, PPC|B64, { RS, RAS, RB } }, + +{ "stwux", X(31,183), X_MASK, PPC, { RS, RAS, RB } }, +{ "stux", X(31,183), X_MASK, POWER, { RS, RA, RB } }, + +{ "sliq", XRC(31,184,0), X_MASK, POWER|M601, { RA, RS, SH } }, +{ "sliq.", XRC(31,184,1), X_MASK, POWER|M601, { RA, RS, SH } }, + +{ "subfze", XO(31,200,0,0), XORB_MASK, PPC, { RT, RA } }, +{ "sfze", XO(31,200,0,0), XORB_MASK, POWER, { RT, RA } }, +{ "subfze.", XO(31,200,0,1), XORB_MASK, PPC, { RT, RA } }, +{ "sfze.", XO(31,200,0,1), XORB_MASK, POWER, { RT, RA } }, +{ "subfzeo", XO(31,200,1,0), XORB_MASK, PPC, { RT, RA } }, +{ "sfzeo", XO(31,200,1,0), XORB_MASK, POWER, { RT, RA } }, +{ "subfzeo.",XO(31,200,1,1), XORB_MASK, PPC, { RT, RA } }, +{ "sfzeo.", XO(31,200,1,1), XORB_MASK, POWER, { RT, RA } }, + +{ "addze", XO(31,202,0,0), XORB_MASK, PPC, { RT, RA } }, +{ "aze", XO(31,202,0,0), XORB_MASK, POWER, { RT, RA } }, +{ "addze.", XO(31,202,0,1), XORB_MASK, PPC, { RT, RA } }, +{ "aze.", XO(31,202,0,1), XORB_MASK, POWER, { RT, RA } }, +{ "addzeo", XO(31,202,1,0), XORB_MASK, PPC, { RT, RA } }, +{ "azeo", XO(31,202,1,0), XORB_MASK, POWER, { RT, RA } }, +{ "addzeo.", XO(31,202,1,1), XORB_MASK, PPC, { RT, RA } }, +{ "azeo.", XO(31,202,1,1), XORB_MASK, POWER, { RT, RA } }, + +{ "mtsr", X(31,210), XRB_MASK|(1<<20), PPC|POWER|B32, { SR, RS } }, + +{ "stdcx.", XRC(31,214,1), X_MASK, PPC|B64, { RS, RA, RB } }, + +{ "stbx", X(31,215), X_MASK, PPC|POWER, { RS, RA, RB } }, + +{ "sllq", XRC(31,216,0), X_MASK, POWER|M601, { RA, RS, RB } }, +{ "sllq.", XRC(31,216,1), X_MASK, POWER|M601, { RA, RS, RB } }, + +{ "sleq", XRC(31,217,0), X_MASK, POWER|M601, { RA, RS, RB } }, +{ "sleq.", XRC(31,217,1), X_MASK, POWER|M601, { RA, RS, RB } }, + +{ "subfme", XO(31,232,0,0), XORB_MASK, PPC, { RT, RA } }, +{ "sfme", XO(31,232,0,0), XORB_MASK, POWER, { RT, RA } }, +{ "subfme.", XO(31,232,0,1), XORB_MASK, PPC, { RT, RA } }, +{ "sfme.", XO(31,232,0,1), XORB_MASK, POWER, { RT, RA } }, +{ "subfmeo", XO(31,232,1,0), XORB_MASK, PPC, { RT, RA } }, +{ "sfmeo", XO(31,232,1,0), XORB_MASK, POWER, { RT, RA } }, +{ "subfmeo.",XO(31,232,1,1), XORB_MASK, PPC, { RT, RA } }, +{ "sfmeo.", XO(31,232,1,1), XORB_MASK, POWER, { RT, RA } }, + +{ "mulld", XO(31,233,0,0), XO_MASK, PPC|B64, { RT, RA, RB } }, +{ "mulld.", XO(31,233,0,1), XO_MASK, PPC|B64, { RT, RA, RB } }, +{ "mulldo", XO(31,233,1,0), XO_MASK, PPC|B64, { RT, RA, RB } }, +{ "mulldo.", XO(31,233,1,1), XO_MASK, PPC|B64, { RT, RA, RB } }, + +{ "addme", XO(31,234,0,0), XORB_MASK, PPC, { RT, RA } }, +{ "ame", XO(31,234,0,0), XORB_MASK, POWER, { RT, RA } }, +{ "addme.", XO(31,234,0,1), XORB_MASK, PPC, { RT, RA } }, +{ "ame.", XO(31,234,0,1), XORB_MASK, POWER, { RT, RA } }, +{ "addmeo", XO(31,234,1,0), XORB_MASK, PPC, { RT, RA } }, +{ "ameo", XO(31,234,1,0), XORB_MASK, POWER, { RT, RA } }, +{ "addmeo.", XO(31,234,1,1), XORB_MASK, PPC, { RT, RA } }, +{ "ameo.", XO(31,234,1,1), XORB_MASK, POWER, { RT, RA } }, + +{ "mullw", XO(31,235,0,0), XO_MASK, PPC, { RT, RA, RB } }, +{ "muls", XO(31,235,0,0), XO_MASK, POWER, { RT, RA, RB } }, +{ "mullw.", XO(31,235,0,1), XO_MASK, PPC, { RT, RA, RB } }, +{ "muls.", XO(31,235,0,1), XO_MASK, POWER, { RT, RA, RB } }, +{ "mullwo", XO(31,235,1,0), XO_MASK, PPC, { RT, RA, RB } }, +{ "mulso", XO(31,235,1,0), XO_MASK, POWER, { RT, RA, RB } }, +{ "mullwo.", XO(31,235,1,1), XO_MASK, PPC, { RT, RA, RB } }, +{ "mulso.", XO(31,235,1,1), XO_MASK, POWER, { RT, RA, RB } }, + +{ "mtsrin", X(31,242), XRA_MASK, PPC|B32, { RS, RB } }, +{ "mtsri", X(31,242), XRA_MASK, POWER|B32, { RS, RB } }, + +{ "dcbtst", X(31,246), XRT_MASK, PPC, { RA, RB } }, + +{ "stbux", X(31,247), X_MASK, PPC|POWER, { RS, RAS, RB } }, + +{ "slliq", XRC(31,248,0), X_MASK, POWER|M601, { RA, RS, SH } }, +{ "slliq.", XRC(31,248,1), X_MASK, POWER|M601, { RA, RS, SH } }, + +{ "doz", XO(31,264,0,0), XO_MASK, POWER|M601, { RT, RA, RB } }, +{ "doz.", XO(31,264,0,1), XO_MASK, POWER|M601, { RT, RA, RB } }, +{ "dozo", XO(31,264,1,0), XO_MASK, POWER|M601, { RT, RA, RB } }, +{ "dozo.", XO(31,264,1,1), XO_MASK, POWER|M601, { RT, RA, RB } }, + +{ "add", XO(31,266,0,0), XO_MASK, PPC, { RT, RA, RB } }, +{ "cax", XO(31,266,0,0), XO_MASK, POWER, { RT, RA, RB } }, +{ "add.", XO(31,266,0,1), XO_MASK, PPC, { RT, RA, RB } }, +{ "cax.", XO(31,266,0,1), XO_MASK, POWER, { RT, RA, RB } }, +{ "addo", XO(31,266,1,0), XO_MASK, PPC, { RT, RA, RB } }, +{ "caxo", XO(31,266,1,0), XO_MASK, POWER, { RT, RA, RB } }, +{ "addo.", XO(31,266,1,1), XO_MASK, PPC, { RT, RA, RB } }, +{ "caxo.", XO(31,266,1,1), XO_MASK, POWER, { RT, RA, RB } }, + +{ "lscbx", XRC(31,277,0), X_MASK, POWER|M601, { RT, RA, RB } }, +{ "lscbx.", XRC(31,277,1), X_MASK, POWER|M601, { RT, RA, RB } }, + +{ "dcbt", X(31,278), XRT_MASK, PPC, { RA, RB } }, + +{ "lhzx", X(31,279), X_MASK, PPC|POWER, { RT, RA, RB } }, + +{ "icbt", X(31,262), XRT_MASK, PPC, { RA, RB } }, + +{ "eqv", XRC(31,284,0), X_MASK, PPC|POWER, { RA, RS, RB } }, +{ "eqv.", XRC(31,284,1), X_MASK, PPC|POWER, { RA, RS, RB } }, + +{ "tlbie", X(31,306), XRTRA_MASK, PPC, { RB } }, +{ "tlbi", X(31,306), XRTRA_MASK, POWER, { RB } }, + +{ "eciwx", X(31,310), X_MASK, PPC, { RT, RA, RB } }, + +{ "lhzux", X(31,311), X_MASK, PPC|POWER, { RT, RAL, RB } }, + +{ "xor", XRC(31,316,0), X_MASK, PPC|POWER, { RA, RS, RB } }, +{ "xor.", XRC(31,316,1), X_MASK, PPC|POWER, { RA, RS, RB } }, + +{ "mfdcr", X(31,323), X_MASK, PPC, { RT, SPR } }, + +{ "div", XO(31,331,0,0), XO_MASK, POWER|M601, { RT, RA, RB } }, +{ "div.", XO(31,331,0,1), XO_MASK, POWER|M601, { RT, RA, RB } }, +{ "divo", XO(31,331,1,0), XO_MASK, POWER|M601, { RT, RA, RB } }, +{ "divo.", XO(31,331,1,1), XO_MASK, POWER|M601, { RT, RA, RB } }, + +{ "mfmq", XSPR(31,339,0), XSPR_MASK, POWER|M601, { RT } }, +{ "mfxer", XSPR(31,339,1), XSPR_MASK, PPC|POWER, { RT } }, +{ "mfrtcu", XSPR(31,339,4), XSPR_MASK, PPC|POWER, { RT } }, +{ "mfrtcl", XSPR(31,339,5), XSPR_MASK, PPC|POWER, { RT } }, +{ "mfdec", XSPR(31,339,6), XSPR_MASK, POWER|M601, { RT } }, +{ "mflr", XSPR(31,339,8), XSPR_MASK, PPC|POWER, { RT } }, +{ "mfctr", XSPR(31,339,9), XSPR_MASK, PPC|POWER, { RT } }, +{ "mftid", XSPR(31,339,17), XSPR_MASK, POWER, { RT } }, +{ "mfdsisr", XSPR(31,339,18), XSPR_MASK, PPC|POWER, { RT } }, +{ "mfdar", XSPR(31,339,19), XSPR_MASK, PPC|POWER, { RT } }, +{ "mfdec", XSPR(31,339,22), XSPR_MASK, PPC, { RT } }, +{ "mfsdr0", XSPR(31,339,24), XSPR_MASK, POWER, { RT } }, +{ "mfsdr1", XSPR(31,339,25), XSPR_MASK, PPC|POWER, { RT } }, +{ "mfsrr0", XSPR(31,339,26), XSPR_MASK, PPC|POWER, { RT } }, +{ "mfsrr1", XSPR(31,339,27), XSPR_MASK, PPC|POWER, { RT } }, +{ "mfsprg", XSPR(31,339,272), XSPRG_MASK, PPC, { RT, SPRG } }, +{ "mfasr", XSPR(31,339,280), XSPR_MASK, PPC|B64, { RT } }, +{ "mfear", XSPR(31,339,282), XSPR_MASK, PPC, { RT } }, +{ "mfpvr", XSPR(31,339,287), XSPR_MASK, PPC, { RT } }, +{ "mfibatu", XSPR(31,339,528), XSPRBAT_MASK, PPC, { RT, SPRBAT } }, +{ "mfibatl", XSPR(31,339,529), XSPRBAT_MASK, PPC, { RT, SPRBAT } }, +{ "mfdbatu", XSPR(31,339,536), XSPRBAT_MASK, PPC, { RT, SPRBAT } }, +{ "mfdbatl", XSPR(31,339,537), XSPRBAT_MASK, PPC, { RT, SPRBAT } }, +{ "mfspr", X(31,339), X_MASK, PPC|POWER, { RT, SPR } }, + +{ "lwax", X(31,341), X_MASK, PPC|B64, { RT, RA, RB } }, + +{ "lhax", X(31,343), X_MASK, PPC|POWER, { RT, RA, RB } }, + +{ "dccci", X(31,454), XRT_MASK, PPC, { RA, RB } }, + +{ "abs", XO(31,360,0,0), XORB_MASK, POWER|M601, { RT, RA } }, +{ "abs.", XO(31,360,0,1), XORB_MASK, POWER|M601, { RT, RA } }, +{ "abso", XO(31,360,1,0), XORB_MASK, POWER|M601, { RT, RA } }, +{ "abso.", XO(31,360,1,1), XORB_MASK, POWER|M601, { RT, RA } }, + +{ "divs", XO(31,363,0,0), XO_MASK, POWER|M601, { RT, RA, RB } }, +{ "divs.", XO(31,363,0,1), XO_MASK, POWER|M601, { RT, RA, RB } }, +{ "divso", XO(31,363,1,0), XO_MASK, POWER|M601, { RT, RA, RB } }, +{ "divso.", XO(31,363,1,1), XO_MASK, POWER|M601, { RT, RA, RB } }, + +{ "tlbia", X(31,370), 0xffffffff, PPC, { 0 } }, + +{ "mftbu", XSPR(31,371,269), XSPR_MASK, PPC, { RT } }, +{ "mftb", X(31,371), X_MASK, PPC, { RT, TBR } }, + +{ "lwaux", X(31,373), X_MASK, PPC|B64, { RT, RAL, RB } }, + +{ "lhaux", X(31,375), X_MASK, PPC|POWER, { RT, RAL, RB } }, + +{ "sthx", X(31,407), X_MASK, PPC|POWER, { RS, RA, RB } }, + +{ "lfqx", X(31,791), X_MASK, POWER2, { FRT, RA, RB } }, + +{ "lfqux", X(31,823), X_MASK, POWER2, { FRT, RA, RB } }, + +{ "stfqx", X(31,919), X_MASK, POWER2, { FRS, RA, RB } }, + +{ "stfqux", X(31,951), X_MASK, POWER2, { FRS, RA, RB } }, + +{ "orc", XRC(31,412,0), X_MASK, PPC|POWER, { RA, RS, RB } }, +{ "orc.", XRC(31,412,1), X_MASK, PPC|POWER, { RA, RS, RB } }, + +{ "sradi", XS(31,413,0), XS_MASK, PPC|B64, { RA, RS, SH6 } }, +{ "sradi.", XS(31,413,1), XS_MASK, PPC|B64, { RA, RS, SH6 } }, + +{ "slbie", X(31,434), XRTRA_MASK, PPC|B64, { RB } }, + +{ "ecowx", X(31,438), X_MASK, PPC, { RT, RA, RB } }, + +{ "sthux", X(31,439), X_MASK, PPC|POWER, { RS, RAS, RB } }, + +{ "mr", XRC(31,444,0), X_MASK, PPC|POWER, { RA, RS, RBS } }, +{ "or", XRC(31,444,0), X_MASK, PPC|POWER, { RA, RS, RB } }, +{ "mr.", XRC(31,444,1), X_MASK, PPC|POWER, { RA, RS, RBS } }, +{ "or.", XRC(31,444,1), X_MASK, PPC|POWER, { RA, RS, RB } }, + +{ "mtdcr", X(31,451), X_MASK, PPC, { SPR, RS } }, + +{ "divdu", XO(31,457,0,0), XO_MASK, PPC|B64, { RT, RA, RB } }, +{ "divdu.", XO(31,457,0,1), XO_MASK, PPC|B64, { RT, RA, RB } }, +{ "divduo", XO(31,457,1,0), XO_MASK, PPC|B64, { RT, RA, RB } }, +{ "divduo.", XO(31,457,1,1), XO_MASK, PPC|B64, { RT, RA, RB } }, + +{ "divwu", XO(31,459,0,0), XO_MASK, PPC, { RT, RA, RB } }, +{ "divwu.", XO(31,459,0,1), XO_MASK, PPC, { RT, RA, RB } }, +{ "divwuo", XO(31,459,1,0), XO_MASK, PPC, { RT, RA, RB } }, +{ "divwuo.", XO(31,459,1,1), XO_MASK, PPC, { RT, RA, RB } }, + +{ "mtmq", XSPR(31,467,0), XSPR_MASK, POWER|M601, { RS } }, +{ "mtxer", XSPR(31,467,1), XSPR_MASK, PPC|POWER, { RS } }, +{ "mtlr", XSPR(31,467,8), XSPR_MASK, PPC|POWER, { RS } }, +{ "mtctr", XSPR(31,467,9), XSPR_MASK, PPC|POWER, { RS } }, +{ "mttid", XSPR(31,467,17), XSPR_MASK, POWER, { RS } }, +{ "mtdsisr", XSPR(31,467,18), XSPR_MASK, PPC|POWER, { RS } }, +{ "mtdar", XSPR(31,467,19), XSPR_MASK, PPC|POWER, { RS } }, +{ "mtrtcu", XSPR(31,467,20), XSPR_MASK, PPC|POWER, { RS } }, +{ "mtrtcl", XSPR(31,467,21), XSPR_MASK, PPC|POWER, { RS } }, +{ "mtdec", XSPR(31,467,22), XSPR_MASK, PPC|POWER, { RS } }, +{ "mtsdr0", XSPR(31,467,24), XSPR_MASK, POWER, { RS } }, +{ "mtsdr1", XSPR(31,467,25), XSPR_MASK, PPC|POWER, { RS } }, +{ "mtsrr0", XSPR(31,467,26), XSPR_MASK, PPC|POWER, { RS } }, +{ "mtsrr1", XSPR(31,467,27), XSPR_MASK, PPC|POWER, { RS } }, +{ "mtsprg", XSPR(31,467,272), XSPRG_MASK, PPC, { SPRG, RS } }, +{ "mtasr", XSPR(31,467,280), XSPR_MASK, PPC|B64, { RS } }, +{ "mtear", XSPR(31,467,282), XSPR_MASK, PPC, { RS } }, +{ "mttbl", XSPR(31,467,284), XSPR_MASK, PPC, { RS } }, +{ "mttbu", XSPR(31,467,285), XSPR_MASK, PPC, { RS } }, +{ "mtibatu", XSPR(31,467,528), XSPRBAT_MASK, PPC, { SPRBAT, RS } }, +{ "mtibatl", XSPR(31,467,529), XSPRBAT_MASK, PPC, { SPRBAT, RS } }, +{ "mtdbatu", XSPR(31,467,536), XSPRBAT_MASK, PPC, { SPRBAT, RS } }, +{ "mtdbatl", XSPR(31,467,537), XSPRBAT_MASK, PPC, { SPRBAT, RS } }, +{ "mtspr", X(31,467), X_MASK, PPC|POWER, { SPR, RS } }, + +{ "dcbi", X(31,470), XRT_MASK, PPC, { RA, RB } }, + +{ "nand", XRC(31,476,0), X_MASK, PPC|POWER, { RA, RS, RB } }, +{ "nand.", XRC(31,476,1), X_MASK, PPC|POWER, { RA, RS, RB } }, + +{ "nabs", XO(31,488,0,0), XORB_MASK, POWER|M601, { RT, RA } }, +{ "nabs.", XO(31,488,0,1), XORB_MASK, POWER|M601, { RT, RA } }, +{ "nabso", XO(31,488,1,0), XORB_MASK, POWER|M601, { RT, RA } }, +{ "nabso.", XO(31,488,1,1), XORB_MASK, POWER|M601, { RT, RA } }, + +{ "divd", XO(31,489,0,0), XO_MASK, PPC|B64, { RT, RA, RB } }, +{ "divd.", XO(31,489,0,1), XO_MASK, PPC|B64, { RT, RA, RB } }, +{ "divdo", XO(31,489,1,0), XO_MASK, PPC|B64, { RT, RA, RB } }, +{ "divdo.", XO(31,489,1,1), XO_MASK, PPC|B64, { RT, RA, RB } }, + +{ "divw", XO(31,491,0,0), XO_MASK, PPC, { RT, RA, RB } }, +{ "divw.", XO(31,491,0,1), XO_MASK, PPC, { RT, RA, RB } }, +{ "divwo", XO(31,491,1,0), XO_MASK, PPC, { RT, RA, RB } }, +{ "divwo.", XO(31,491,1,1), XO_MASK, PPC, { RT, RA, RB } }, + +{ "slbia", X(31,498), 0xffffffff, PPC|B64, { 0 } }, + +{ "cli", X(31,502), XRB_MASK, POWER, { RT, RA } }, + +{ "mcrxr", X(31,512), XRARB_MASK|(3<<21), PPC|POWER, { BF } }, + +{ "clcs", X(31,531), XRB_MASK, POWER|M601, { RT, RA } }, + +{ "lswx", X(31,533), X_MASK, PPC, { RT, RA, RB } }, +{ "lsx", X(31,533), X_MASK, POWER, { RT, RA, RB } }, + +{ "lwbrx", X(31,534), X_MASK, PPC, { RT, RA, RB } }, +{ "lbrx", X(31,534), X_MASK, POWER, { RT, RA, RB } }, + +{ "lfsx", X(31,535), X_MASK, PPC|POWER, { FRT, RA, RB } }, + +{ "srw", XRC(31,536,0), X_MASK, PPC, { RA, RS, RB } }, +{ "sr", XRC(31,536,0), X_MASK, POWER, { RA, RS, RB } }, +{ "srw.", XRC(31,536,1), X_MASK, PPC, { RA, RS, RB } }, +{ "sr.", XRC(31,536,1), X_MASK, POWER, { RA, RS, RB } }, + +{ "rrib", XRC(31,537,0), X_MASK, POWER|M601, { RA, RS, RB } }, +{ "rrib.", XRC(31,537,1), X_MASK, POWER|M601, { RA, RS, RB } }, + +{ "srd", XRC(31,539,0), X_MASK, PPC|B64, { RA, RS, RB } }, +{ "srd.", XRC(31,539,1), X_MASK, PPC|B64, { RA, RS, RB } }, + +{ "maskir", XRC(31,541,0), X_MASK, POWER|M601, { RA, RS, RB } }, +{ "maskir.", XRC(31,541,1), X_MASK, POWER|M601, { RA, RS, RB } }, + +{ "tlbsync", X(31,566), 0xffffffff, PPC, { 0 } }, + +{ "lfsux", X(31,567), X_MASK, PPC|POWER, { FRT, RAS, RB } }, + +{ "mfsr", X(31,595), XRB_MASK|(1<<20), PPC|POWER|B32, { RT, SR } }, + +{ "lswi", X(31,597), X_MASK, PPC, { RT, RA, NB } }, +{ "lsi", X(31,597), X_MASK, POWER, { RT, RA, NB } }, + +{ "sync", X(31,598), 0xffffffff, PPC, { 0 } }, +{ "dcs", X(31,598), 0xffffffff, POWER, { 0 } }, + +{ "lfdx", X(31,599), X_MASK, PPC|POWER, { FRT, RA, RB } }, + +{ "mfsri", X(31,627), X_MASK, POWER, { RT, RA, RB } }, + +{ "dclst", X(31,630), XRB_MASK, POWER, { RS, RA } }, + +{ "lfdux", X(31,631), X_MASK, PPC|POWER, { FRT, RAS, RB } }, + +{ "mfsrin", X(31,659), XRA_MASK, PPC|B32, { RT, RB } }, + +{ "stswx", X(31,661), X_MASK, PPC, { RS, RA, RB } }, +{ "stsx", X(31,661), X_MASK, POWER, { RS, RA, RB } }, + +{ "stwbrx", X(31,662), X_MASK, PPC, { RS, RA, RB } }, +{ "stbrx", X(31,662), X_MASK, POWER, { RS, RA, RB } }, + +{ "stfsx", X(31,663), X_MASK, PPC|POWER, { FRS, RA, RB } }, + +{ "srq", XRC(31,664,0), X_MASK, POWER|M601, { RA, RS, RB } }, +{ "srq.", XRC(31,664,1), X_MASK, POWER|M601, { RA, RS, RB } }, + +{ "sre", XRC(31,665,0), X_MASK, POWER|M601, { RA, RS, RB } }, +{ "sre.", XRC(31,665,1), X_MASK, POWER|M601, { RA, RS, RB } }, + +{ "stfsux", X(31,695), X_MASK, PPC|POWER, { FRS, RAS, RB } }, + +{ "sriq", XRC(31,696,0), X_MASK, POWER|M601, { RA, RS, SH } }, +{ "sriq.", XRC(31,696,1), X_MASK, POWER|M601, { RA, RS, SH } }, + +{ "stswi", X(31,725), X_MASK, PPC, { RS, RA, NB } }, +{ "stsi", X(31,725), X_MASK, POWER, { RS, RA, NB } }, + +{ "stfdx", X(31,727), X_MASK, PPC|POWER, { FRS, RA, RB } }, + +{ "srlq", XRC(31,728,0), X_MASK, POWER|M601, { RA, RS, RB } }, +{ "srlq.", XRC(31,728,1), X_MASK, POWER|M601, { RA, RS, RB } }, + +{ "sreq", XRC(31,729,0), X_MASK, POWER|M601, { RA, RS, RB } }, +{ "sreq.", XRC(31,729,1), X_MASK, POWER|M601, { RA, RS, RB } }, + +{ "stfdux", X(31,759), X_MASK, PPC|POWER, { FRS, RAS, RB } }, + +{ "srliq", XRC(31,760,0), X_MASK, POWER|M601, { RA, RS, SH } }, +{ "srliq.", XRC(31,760,1), X_MASK, POWER|M601, { RA, RS, SH } }, + +{ "lhbrx", X(31,790), X_MASK, PPC|POWER, { RT, RA, RB } }, + +{ "sraw", XRC(31,792,0), X_MASK, PPC, { RA, RS, RB } }, +{ "sra", XRC(31,792,0), X_MASK, POWER, { RA, RS, RB } }, +{ "sraw.", XRC(31,792,1), X_MASK, PPC, { RA, RS, RB } }, +{ "sra.", XRC(31,792,1), X_MASK, POWER, { RA, RS, RB } }, + +{ "srad", XRC(31,794,0), X_MASK, PPC|B64, { RA, RS, RB } }, +{ "srad.", XRC(31,794,1), X_MASK, PPC|B64, { RA, RS, RB } }, + +{ "rac", X(31,818), X_MASK, POWER, { RT, RA, RB } }, + +{ "srawi", XRC(31,824,0), X_MASK, PPC, { RA, RS, SH } }, +{ "srai", XRC(31,824,0), X_MASK, POWER, { RA, RS, SH } }, +{ "srawi.", XRC(31,824,1), X_MASK, PPC, { RA, RS, SH } }, +{ "srai.", XRC(31,824,1), X_MASK, POWER, { RA, RS, SH } }, + +{ "eieio", X(31,854), 0xffffffff, PPC, { 0 } }, + +{ "sthbrx", X(31,918), X_MASK, PPC|POWER, { RS, RA, RB } }, + +{ "sraq", XRC(31,920,0), X_MASK, POWER|M601, { RA, RS, RB } }, +{ "sraq.", XRC(31,920,1), X_MASK, POWER|M601, { RA, RS, RB } }, + +{ "srea", XRC(31,921,0), X_MASK, POWER|M601, { RA, RS, RB } }, +{ "srea.", XRC(31,921,1), X_MASK, POWER|M601, { RA, RS, RB } }, + +{ "extsh", XRC(31,922,0), XRB_MASK, PPC, { RA, RS } }, +{ "exts", XRC(31,922,0), XRB_MASK, POWER, { RA, RS } }, +{ "extsh.", XRC(31,922,1), XRB_MASK, PPC, { RA, RS } }, +{ "exts.", XRC(31,922,1), XRB_MASK, POWER, { RA, RS } }, + +{ "sraiq", XRC(31,952,0), X_MASK, POWER|M601, { RA, RS, SH } }, +{ "sraiq.", XRC(31,952,1), X_MASK, POWER|M601, { RA, RS, SH } }, + +{ "extsb", XRC(31,954,0), XRB_MASK, PPC, { RA, RS} }, +{ "extsb.", XRC(31,954,1), XRB_MASK, PPC, { RA, RS} }, + +{ "iccci", X(31,966), XRT_MASK, PPC, { RA, RB } }, + +{ "icbi", X(31,982), XRT_MASK, PPC, { RA, RB } }, + +{ "stfiwx", X(31,983), X_MASK, PPC, { FRS, RA, RB } }, + +{ "extsw", XRC(31,986,0), XRB_MASK, PPC, { RA, RS } }, +{ "extsw.", XRC(31,986,1), XRB_MASK, PPC, { RA, RS } }, + +{ "dcbz", X(31,1014), XRT_MASK, PPC, { RA, RB } }, +{ "dclz", X(31,1014), XRT_MASK, PPC, { RA, RB } }, + +{ "lwz", OP(32), OP_MASK, PPC, { RT, D, RA } }, +{ "l", OP(32), OP_MASK, POWER, { RT, D, RA } }, + +{ "lwzu", OP(33), OP_MASK, PPC, { RT, D, RAL } }, +{ "lu", OP(33), OP_MASK, POWER, { RT, D, RA } }, + +{ "lbz", OP(34), OP_MASK, PPC|POWER, { RT, D, RA } }, + +{ "lbzu", OP(35), OP_MASK, PPC|POWER, { RT, D, RAL } }, + +{ "stw", OP(36), OP_MASK, PPC, { RS, D, RA } }, +{ "st", OP(36), OP_MASK, POWER, { RS, D, RA } }, + +{ "stwu", OP(37), OP_MASK, PPC, { RS, D, RAS } }, +{ "stu", OP(37), OP_MASK, POWER, { RS, D, RA } }, + +{ "stb", OP(38), OP_MASK, PPC|POWER, { RS, D, RA } }, + +{ "stbu", OP(39), OP_MASK, PPC|POWER, { RS, D, RAS } }, + +{ "lhz", OP(40), OP_MASK, PPC|POWER, { RT, D, RA } }, + +{ "lhzu", OP(41), OP_MASK, PPC|POWER, { RT, D, RAL } }, + +{ "lha", OP(42), OP_MASK, PPC|POWER, { RT, D, RA } }, + +{ "lhau", OP(43), OP_MASK, PPC|POWER, { RT, D, RAL } }, + +{ "sth", OP(44), OP_MASK, PPC|POWER, { RS, D, RA } }, + +{ "sthu", OP(45), OP_MASK, PPC|POWER, { RS, D, RAS } }, + +{ "lmw", OP(46), OP_MASK, PPC, { RT, D, RAM } }, +{ "lm", OP(46), OP_MASK, POWER, { RT, D, RA } }, + +{ "stmw", OP(47), OP_MASK, PPC, { RS, D, RA } }, +{ "stm", OP(47), OP_MASK, POWER, { RS, D, RA } }, + +{ "lfs", OP(48), OP_MASK, PPC|POWER, { FRT, D, RA } }, + +{ "lfsu", OP(49), OP_MASK, PPC|POWER, { FRT, D, RAS } }, + +{ "lfd", OP(50), OP_MASK, PPC|POWER, { FRT, D, RA } }, + +{ "lfdu", OP(51), OP_MASK, PPC|POWER, { FRT, D, RAS } }, + +{ "stfs", OP(52), OP_MASK, PPC|POWER, { FRS, D, RA } }, + +{ "stfsu", OP(53), OP_MASK, PPC|POWER, { FRS, D, RAS } }, + +{ "stfd", OP(54), OP_MASK, PPC|POWER, { FRS, D, RA } }, + +{ "stfdu", OP(55), OP_MASK, PPC|POWER, { FRS, D, RAS } }, + +{ "lfq", OP(56), OP_MASK, POWER2, { FRT, D, RA } }, + +{ "lfqu", OP(57), OP_MASK, POWER2, { FRT, D, RA } }, + +{ "ld", DSO(58,0), DS_MASK, PPC|B64, { RT, DS, RA } }, + +{ "ldu", DSO(58,1), DS_MASK, PPC|B64, { RT, DS, RAL } }, + +{ "lwa", DSO(58,2), DS_MASK, PPC|B64, { RT, DS, RA } }, + +{ "fdivs", A(59,18,0), AFRC_MASK, PPC, { FRT, FRA, FRB } }, +{ "fdivs.", A(59,18,1), AFRC_MASK, PPC, { FRT, FRA, FRB } }, + +{ "fsubs", A(59,20,0), AFRC_MASK, PPC, { FRT, FRA, FRB } }, +{ "fsubs.", A(59,20,1), AFRC_MASK, PPC, { FRT, FRA, FRB } }, + +{ "fadds", A(59,21,0), AFRC_MASK, PPC, { FRT, FRA, FRB } }, +{ "fadds.", A(59,21,1), AFRC_MASK, PPC, { FRT, FRA, FRB } }, + +{ "fsqrts", A(59,22,0), AFRAFRC_MASK, PPC, { FRT, FRB } }, +{ "fsqrts.", A(59,22,1), AFRAFRC_MASK, PPC, { FRT, FRB } }, + +{ "fres", A(59,24,0), AFRAFRC_MASK, PPC, { FRT, FRB } }, +{ "fres.", A(59,24,1), AFRAFRC_MASK, PPC, { FRT, FRB } }, + +{ "fmuls", A(59,25,0), AFRB_MASK, PPC, { FRT, FRA, FRC } }, +{ "fmuls.", A(59,25,1), AFRB_MASK, PPC, { FRT, FRA, FRC } }, + +{ "fmsubs", A(59,28,0), A_MASK, PPC, { FRT,FRA,FRC,FRB } }, +{ "fmsubs.", A(59,28,1), A_MASK, PPC, { FRT,FRA,FRC,FRB } }, + +{ "fmadds", A(59,29,0), A_MASK, PPC, { FRT,FRA,FRC,FRB } }, +{ "fmadds.", A(59,29,1), A_MASK, PPC, { FRT,FRA,FRC,FRB } }, + +{ "fnmsubs", A(59,30,0), A_MASK, PPC, { FRT,FRA,FRC,FRB } }, +{ "fnmsubs.",A(59,30,1), A_MASK, PPC, { FRT,FRA,FRC,FRB } }, + +{ "fnmadds", A(59,31,0), A_MASK, PPC, { FRT,FRA,FRC,FRB } }, +{ "fnmadds.",A(59,31,1), A_MASK, PPC, { FRT,FRA,FRC,FRB } }, + +{ "stfq", OP(60), OP_MASK, POWER2, { FRS, D, RA } }, + +{ "stfqu", OP(61), OP_MASK, POWER2, { FRS, D, RA } }, + +{ "std", DSO(62,0), DS_MASK, PPC|B64, { RS, DS, RA } }, + +{ "stdu", DSO(62,1), DS_MASK, PPC|B64, { RS, DS, RAS } }, + +{ "fcmpu", X(63,0), X_MASK|(3<<21), PPC|POWER, { BF, FRA, FRB } }, + +{ "frsp", XRC(63,12,0), XRA_MASK, PPC|POWER, { FRT, FRB } }, +{ "frsp.", XRC(63,12,1), XRA_MASK, PPC|POWER, { FRT, FRB } }, + +{ "fctiw", XRC(63,14,0), XRA_MASK, PPC, { FRT, FRB } }, +{ "fcir", XRC(63,14,0), XRA_MASK, POWER2, { FRT, FRB } }, +{ "fctiw.", XRC(63,14,1), XRA_MASK, PPC, { FRT, FRB } }, +{ "fcir.", XRC(63,14,1), XRA_MASK, POWER2, { FRT, FRB } }, + +{ "fctiwz", XRC(63,15,0), XRA_MASK, PPC, { FRT, FRB } }, +{ "fcirz", XRC(63,15,0), XRA_MASK, POWER2, { FRT, FRB } }, +{ "fctiwz.", XRC(63,15,1), XRA_MASK, PPC, { FRT, FRB } }, +{ "fcirz.", XRC(63,15,1), XRA_MASK, POWER2, { FRT, FRB } }, + +{ "fdiv", A(63,18,0), AFRC_MASK, PPC, { FRT, FRA, FRB } }, +{ "fd", A(63,18,0), AFRC_MASK, POWER, { FRT, FRA, FRB } }, +{ "fdiv.", A(63,18,1), AFRC_MASK, PPC, { FRT, FRA, FRB } }, +{ "fd.", A(63,18,1), AFRC_MASK, POWER, { FRT, FRA, FRB } }, + +{ "fsub", A(63,20,0), AFRC_MASK, PPC, { FRT, FRA, FRB } }, +{ "fs", A(63,20,0), AFRC_MASK, POWER, { FRT, FRA, FRB } }, +{ "fsub.", A(63,20,1), AFRC_MASK, PPC, { FRT, FRA, FRB } }, +{ "fs.", A(63,20,1), AFRC_MASK, POWER, { FRT, FRA, FRB } }, + +{ "fadd", A(63,21,0), AFRC_MASK, PPC, { FRT, FRA, FRB } }, +{ "fa", A(63,21,0), AFRC_MASK, POWER, { FRT, FRA, FRB } }, +{ "fadd.", A(63,21,1), AFRC_MASK, PPC, { FRT, FRA, FRB } }, +{ "fa.", A(63,21,1), AFRC_MASK, POWER, { FRT, FRA, FRB } }, + +{ "fsqrt", A(63,22,0), AFRAFRC_MASK, PPC|POWER2, { FRT, FRB } }, +{ "fsqrt.", A(63,22,1), AFRAFRC_MASK, PPC|POWER2, { FRT, FRB } }, + +{ "fsel", A(63,23,0), A_MASK, PPC, { FRT,FRA,FRC,FRB } }, +{ "fsel.", A(63,23,1), A_MASK, PPC, { FRT,FRA,FRC,FRB } }, + +{ "fmul", A(63,25,0), AFRB_MASK, PPC, { FRT, FRA, FRC } }, +{ "fm", A(63,25,0), AFRB_MASK, POWER, { FRT, FRA, FRC } }, +{ "fmul.", A(63,25,1), AFRB_MASK, PPC, { FRT, FRA, FRC } }, +{ "fm.", A(63,25,1), AFRB_MASK, POWER, { FRT, FRA, FRC } }, + +{ "frsqrte", A(63,26,0), AFRAFRC_MASK, PPC, { FRT, FRB } }, +{ "frsqrte.",A(63,26,1), AFRAFRC_MASK, PPC, { FRT, FRB } }, + +{ "fmsub", A(63,28,0), A_MASK, PPC, { FRT,FRA,FRC,FRB } }, +{ "fms", A(63,28,0), A_MASK, POWER, { FRT,FRA,FRC,FRB } }, +{ "fmsub.", A(63,28,1), A_MASK, PPC, { FRT,FRA,FRC,FRB } }, +{ "fms.", A(63,28,1), A_MASK, POWER, { FRT,FRA,FRC,FRB } }, + +{ "fmadd", A(63,29,0), A_MASK, PPC, { FRT,FRA,FRC,FRB } }, +{ "fma", A(63,29,0), A_MASK, POWER, { FRT,FRA,FRC,FRB } }, +{ "fmadd.", A(63,29,1), A_MASK, PPC, { FRT,FRA,FRC,FRB } }, +{ "fma.", A(63,29,1), A_MASK, POWER, { FRT,FRA,FRC,FRB } }, + +{ "fnmsub", A(63,30,0), A_MASK, PPC, { FRT,FRA,FRC,FRB } }, +{ "fnms", A(63,30,0), A_MASK, POWER, { FRT,FRA,FRC,FRB } }, +{ "fnmsub.", A(63,30,1), A_MASK, PPC, { FRT,FRA,FRC,FRB } }, +{ "fnms.", A(63,30,1), A_MASK, POWER, { FRT,FRA,FRC,FRB } }, + +{ "fnmadd", A(63,31,0), A_MASK, PPC, { FRT,FRA,FRC,FRB } }, +{ "fnma", A(63,31,0), A_MASK, POWER, { FRT,FRA,FRC,FRB } }, +{ "fnmadd.", A(63,31,1), A_MASK, PPC, { FRT,FRA,FRC,FRB } }, +{ "fnma.", A(63,31,1), A_MASK, POWER, { FRT,FRA,FRC,FRB } }, + +{ "fcmpo", X(63,30), X_MASK|(3<<21), PPC|POWER, { BF, FRA, FRB } }, + +{ "mtfsb1", XRC(63,38,0), XRARB_MASK, PPC|POWER, { BT } }, +{ "mtfsb1.", XRC(63,38,1), XRARB_MASK, PPC|POWER, { BT } }, + +{ "fneg", XRC(63,40,0), XRA_MASK, PPC|POWER, { FRT, FRB } }, +{ "fneg.", XRC(63,40,1), XRA_MASK, PPC|POWER, { FRT, FRB } }, + +{ "mcrfs", X(63,64), XRB_MASK|(3<<21)|(3<<16), PPC|POWER, { BF, BFA } }, + +{ "mtfsb0", XRC(63,70,0), XRARB_MASK, PPC|POWER, { BT } }, +{ "mtfsb0.", XRC(63,70,1), XRARB_MASK, PPC|POWER, { BT } }, + +{ "fmr", XRC(63,72,0), XRA_MASK, PPC|POWER, { FRT, FRB } }, +{ "fmr.", XRC(63,72,1), XRA_MASK, PPC|POWER, { FRT, FRB } }, + +{ "mtfsfi", XRC(63,134,0), XRA_MASK|(3<<21)|(1<<11), PPC|POWER, { BF, U } }, +{ "mtfsfi.", XRC(63,134,1), XRA_MASK|(3<<21)|(1<<11), PPC|POWER, { BF, U } }, + +{ "fnabs", XRC(63,136,0), XRA_MASK, PPC|POWER, { FRT, FRB } }, +{ "fnabs.", XRC(63,136,1), XRA_MASK, PPC|POWER, { FRT, FRB } }, + +{ "fabs", XRC(63,264,0), XRA_MASK, PPC|POWER, { FRT, FRB } }, +{ "fabs.", XRC(63,264,1), XRA_MASK, PPC|POWER, { FRT, FRB } }, + +{ "mffs", XRC(63,583,0), XRARB_MASK, PPC|POWER, { FRT } }, +{ "mffs.", XRC(63,583,1), XRARB_MASK, PPC|POWER, { FRT } }, + +{ "mtfsf", XFL(63,711,0), XFL_MASK, PPC|POWER, { FLM, FRB } }, +{ "mtfsf.", XFL(63,711,1), XFL_MASK, PPC|POWER, { FLM, FRB } }, + +{ "fctid", XRC(63,814,0), XRA_MASK, PPC|B64, { FRT, FRB } }, +{ "fctid.", XRC(63,814,1), XRA_MASK, PPC|B64, { FRT, FRB } }, + +{ "fctidz", XRC(63,815,0), XRA_MASK, PPC|B64, { FRT, FRB } }, +{ "fctidz.", XRC(63,815,1), XRA_MASK, PPC|B64, { FRT, FRB } }, + +{ "fcfid", XRC(63,846,0), XRA_MASK, PPC|B64, { FRT, FRB } }, +{ "fcfid.", XRC(63,846,1), XRA_MASK, PPC|B64, { FRT, FRB } }, + +}; + +const int powerpc_num_opcodes = + sizeof (powerpc_opcodes) / sizeof (powerpc_opcodes[0]); + +/* The macro table. This is only used by the assembler. */ + +const struct powerpc_macro powerpc_macros[] = { +{ "extldi", 4, PPC|B64, "rldicr %0,%1,%3,(%2)-1" }, +{ "extldi.", 4, PPC|B64, "rldicr. %0,%1,%3,(%2)-1" }, +{ "extrdi", 4, PPC|B64, "rldicl %0,%1,(%2)+(%3),64-(%2)" }, +{ "extrdi.", 4, PPC|B64, "rldicl. %0,%1,(%2)+(%3),64-(%2)" }, +{ "insrdi", 4, PPC|B64, "rldimi %0,%1,64-((%2)+(%3)),%3" }, +{ "insrdi.", 4, PPC|B64, "rldimi. %0,%1,64-((%2)+(%3)),%3" }, +{ "rotrdi", 3, PPC|B64, "rldicl %0,%1,64-(%2),0" }, +{ "rotrdi.", 3, PPC|B64, "rldicl. %0,%1,64-(%2),0" }, +{ "sldi", 3, PPC|B64, "rldicr %0,%1,%2,63-(%2)" }, +{ "sldi.", 3, PPC|B64, "rldicr. %0,%1,%2,63-(%2)" }, +{ "srdi", 3, PPC|B64, "rldicl %0,%1,64-(%2),%2" }, +{ "srdi.", 3, PPC|B64, "rldicl. %0,%1,64-(%2),%2" }, +{ "clrrdi", 3, PPC|B64, "rldicr %0,%1,0,63-(%2)" }, +{ "clrrdi.", 3, PPC|B64, "rldicr. %0,%1,0,63-(%2)" }, +{ "clrlsldi",4, PPC|B64, "rldic %0,%1,%3,(%2)-(%3)" }, +{ "clrlsldi.",4, PPC|B64, "rldic. %0,%1,%3,(%2)-(%3)" }, + +{ "extlwi", 4, PPC, "rlwinm %0,%1,%3,0,(%2)-1" }, +{ "extlwi.", 4, PPC, "rlwinm. %0,%1,%3,0,(%2)-1" }, +{ "extrwi", 4, PPC, "rlwinm %0,%1,(%2)+(%3),32-(%2),31" }, +{ "extrwi.", 4, PPC, "rlwinm. %0,%1,(%2)+(%3),32-(%2),31" }, +{ "inslwi", 4, PPC, "rlwimi %0,%1,32-(%3),%3,(%2)+(%3)-1" }, +{ "inslwi.", 4, PPC, "rlwimi. %0,%1,32-(%3),%3,(%2)+(%3)-1" }, +{ "insrwi", 4, PPC, "rlwimi %0,%1,32-((%2)+(%3)),%3,(%2)+(%3)-1" }, +{ "insrwi.", 4, PPC, "rlwimi. %0,%1,32-((%2)+(%3)),%3,(%2)+(%3)-1"}, +{ "rotrwi", 3, PPC, "rlwinm %0,%1,32-(%2),0,31" }, +{ "rotrwi.", 3, PPC, "rlwinm. %0,%1,32-(%2),0,31" }, +{ "slwi", 3, PPC, "rlwinm %0,%1,%2,0,31-(%2)" }, +{ "sli", 3, POWER, "rlinm %0,%1,%2,0,31-(%2)" }, +{ "slwi.", 3, PPC, "rlwinm. %0,%1,%2,0,31-(%2)" }, +{ "sli.", 3, POWER, "rlinm. %0,%1,%2,0,31-(%2)" }, +{ "srwi", 3, PPC, "rlwinm %0,%1,32-(%2),%2,31" }, +{ "sri", 3, POWER, "rlinm %0,%1,32-(%2),%2,31" }, +{ "srwi.", 3, PPC, "rlwinm. %0,%1,32-(%2),%2,31" }, +{ "sri.", 3, POWER, "rlinm. %0,%1,32-(%2),%2,31" }, +{ "clrrwi", 3, PPC, "rlwinm %0,%1,0,0,31-(%2)" }, +{ "clrrwi.", 3, PPC, "rlwinm. %0,%1,0,0,31-(%2)" }, +{ "clrlslwi",4, PPC, "rlwinm %0,%1,%3,(%2)-(%3),31-(%3)" }, +{ "clrlslwi.",4, PPC, "rlwinm. %0,%1,%3,(%2)-(%3),31-(%3)" }, + +}; + +const int powerpc_num_macros = + sizeof (powerpc_macros) / sizeof (powerpc_macros[0]); diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/xmon/ppc.h linuxppc64_2_4/arch/ppc64/xmon/ppc.h --- ../kernel.org/linux/arch/ppc64/xmon/ppc.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/xmon/ppc.h Fri May 4 17:13:59 2001 @@ -0,0 +1,240 @@ +/* ppc.h -- Header file for PowerPC opcode table + Copyright 1994 Free Software Foundation, Inc. + Written by Ian Lance Taylor, Cygnus Support + +This file is part of GDB, GAS, and the GNU binutils. + +GDB, GAS, and the GNU binutils are free software; you can redistribute +them and/or modify them under the terms of the GNU General Public +License as published by the Free Software Foundation; either version +1, or (at your option) any later version. + +GDB, GAS, and the GNU binutils are distributed in the hope that they +will be useful, but WITHOUT ANY WARRANTY; without even the implied +warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See +the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this file; see the file COPYING. If not, write to the Free +Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#ifndef PPC_H +#define PPC_H + +/* The opcode table is an array of struct powerpc_opcode. */ + +struct powerpc_opcode +{ + /* The opcode name. */ + const char *name; + + /* The opcode itself. Those bits which will be filled in with + operands are zeroes. */ + unsigned long opcode; + + /* The opcode mask. This is used by the disassembler. This is a + mask containing ones indicating those bits which must match the + opcode field, and zeroes indicating those bits which need not + match (and are presumably filled in by operands). */ + unsigned long mask; + + /* One bit flags for the opcode. These are used to indicate which + specific processors support the instructions. The defined values + are listed below. */ + unsigned long flags; + + /* An array of operand codes. Each code is an index into the + operand table. They appear in the order which the operands must + appear in assembly code, and are terminated by a zero. */ + unsigned char operands[8]; +}; + +/* The table itself is sorted by major opcode number, and is otherwise + in the order in which the disassembler should consider + instructions. */ +extern const struct powerpc_opcode powerpc_opcodes[]; +extern const int powerpc_num_opcodes; + +/* Values defined for the flags field of a struct powerpc_opcode. */ + +/* Opcode is defined for the PowerPC architecture. */ +#define PPC_OPCODE_PPC (01) + +/* Opcode is defined for the POWER (RS/6000) architecture. */ +#define PPC_OPCODE_POWER (02) + +/* Opcode is defined for the POWER2 (Rios 2) architecture. */ +#define PPC_OPCODE_POWER2 (04) + +/* Opcode is only defined on 32 bit architectures. */ +#define PPC_OPCODE_32 (010) + +/* Opcode is only defined on 64 bit architectures. */ +#define PPC_OPCODE_64 (020) + +/* Opcode is supported by the Motorola PowerPC 601 processor. The 601 + is assumed to support all PowerPC (PPC_OPCODE_PPC) instructions, + but it also supports many additional POWER instructions. */ +#define PPC_OPCODE_601 (040) + +/* A macro to extract the major opcode from an instruction. */ +#define PPC_OP(i) (((i) >> 26) & 0x3f) + +/* The operands table is an array of struct powerpc_operand. */ + +struct powerpc_operand +{ + /* The number of bits in the operand. */ + int bits; + + /* How far the operand is left shifted in the instruction. */ + int shift; + + /* Insertion function. This is used by the assembler. To insert an + operand value into an instruction, check this field. + + If it is NULL, execute + i |= (op & ((1 << o->bits) - 1)) << o->shift; + (i is the instruction which we are filling in, o is a pointer to + this structure, and op is the opcode value; this assumes twos + complement arithmetic). + + If this field is not NULL, then simply call it with the + instruction and the operand value. It will return the new value + of the instruction. If the ERRMSG argument is not NULL, then if + the operand value is illegal, *ERRMSG will be set to a warning + string (the operand will be inserted in any case). If the + operand value is legal, *ERRMSG will be unchanged (most operands + can accept any value). */ + unsigned long (*insert) PARAMS ((unsigned long instruction, long op, + const char **errmsg)); + + /* Extraction function. This is used by the disassembler. To + extract this operand type from an instruction, check this field. + + If it is NULL, compute + op = ((i) >> o->shift) & ((1 << o->bits) - 1); + if ((o->flags & PPC_OPERAND_SIGNED) != 0 + && (op & (1 << (o->bits - 1))) != 0) + op -= 1 << o->bits; + (i is the instruction, o is a pointer to this structure, and op + is the result; this assumes twos complement arithmetic). + + If this field is not NULL, then simply call it with the + instruction value. It will return the value of the operand. If + the INVALID argument is not NULL, *INVALID will be set to + non-zero if this operand type can not actually be extracted from + this operand (i.e., the instruction does not match). If the + operand is valid, *INVALID will not be changed. */ + long (*extract) PARAMS ((unsigned long instruction, int *invalid)); + + /* One bit syntax flags. */ + unsigned long flags; +}; + +/* Elements in the table are retrieved by indexing with values from + the operands field of the powerpc_opcodes table. */ + +extern const struct powerpc_operand powerpc_operands[]; + +/* Values defined for the flags field of a struct powerpc_operand. */ + +/* This operand takes signed values. */ +#define PPC_OPERAND_SIGNED (01) + +/* This operand takes signed values, but also accepts a full positive + range of values when running in 32 bit mode. That is, if bits is + 16, it takes any value from -0x8000 to 0xffff. In 64 bit mode, + this flag is ignored. */ +#define PPC_OPERAND_SIGNOPT (02) + +/* This operand does not actually exist in the assembler input. This + is used to support extended mnemonics such as mr, for which two + operands fields are identical. The assembler should call the + insert function with any op value. The disassembler should call + the extract function, ignore the return value, and check the value + placed in the valid argument. */ +#define PPC_OPERAND_FAKE (04) + +/* The next operand should be wrapped in parentheses rather than + separated from this one by a comma. This is used for the load and + store instructions which want their operands to look like + reg,displacement(reg) + */ +#define PPC_OPERAND_PARENS (010) + +/* This operand may use the symbolic names for the CR fields, which + are + lt 0 gt 1 eq 2 so 3 un 3 + cr0 0 cr1 1 cr2 2 cr3 3 + cr4 4 cr5 5 cr6 6 cr7 7 + These may be combined arithmetically, as in cr2*4+gt. These are + only supported on the PowerPC, not the POWER. */ +#define PPC_OPERAND_CR (020) + +/* This operand names a register. The disassembler uses this to print + register names with a leading 'r'. */ +#define PPC_OPERAND_GPR (040) + +/* This operand names a floating point register. The disassembler + prints these with a leading 'f'. */ +#define PPC_OPERAND_FPR (0100) + +/* This operand is a relative branch displacement. The disassembler + prints these symbolically if possible. */ +#define PPC_OPERAND_RELATIVE (0200) + +/* This operand is an absolute branch address. The disassembler + prints these symbolically if possible. */ +#define PPC_OPERAND_ABSOLUTE (0400) + +/* This operand is optional, and is zero if omitted. This is used for + the optional BF and L fields in the comparison instructions. The + assembler must count the number of operands remaining on the line, + and the number of operands remaining for the opcode, and decide + whether this operand is present or not. The disassembler should + print this operand out only if it is not zero. */ +#define PPC_OPERAND_OPTIONAL (01000) + +/* This flag is only used with PPC_OPERAND_OPTIONAL. If this operand + is omitted, then for the next operand use this operand value plus + 1, ignoring the next operand field for the opcode. This wretched + hack is needed because the Power rotate instructions can take + either 4 or 5 operands. The disassembler should print this operand + out regardless of the PPC_OPERAND_OPTIONAL field. */ +#define PPC_OPERAND_NEXT (02000) + +/* This operand should be regarded as a negative number for the + purposes of overflow checking (i.e., the normal most negative + number is disallowed and one more than the normal most positive + number is allowed). This flag will only be set for a signed + operand. */ +#define PPC_OPERAND_NEGATIVE (04000) + +/* The POWER and PowerPC assemblers use a few macros. We keep them + with the operands table for simplicity. The macro table is an + array of struct powerpc_macro. */ + +struct powerpc_macro +{ + /* The macro name. */ + const char *name; + + /* The number of operands the macro takes. */ + unsigned int operands; + + /* One bit flags for the opcode. These are used to indicate which + specific processors support the instructions. The values are the + same as those for the struct powerpc_opcode flags field. */ + unsigned long flags; + + /* A format string to turn the macro into a normal instruction. + Each %N in the string is replaced with operand number N (zero + based). */ + const char *format; +}; + +extern const struct powerpc_macro powerpc_macros[]; +extern const int powerpc_num_macros; + +#endif /* PPC_H */ diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/xmon/privinst.h linuxppc64_2_4/arch/ppc64/xmon/privinst.h --- ../kernel.org/linux/arch/ppc64/xmon/privinst.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/xmon/privinst.h Fri Sep 14 20:00:19 2001 @@ -0,0 +1,94 @@ +/* + * Copyright (C) 1996 Paul Mackerras. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include + +#define GETREG(reg) \ + static inline unsigned long get_ ## reg (void) \ + { unsigned long ret; asm volatile ("mf" #reg " %0" : "=r" (ret) :); return ret; } + +#define SETREG(reg) \ + static inline void set_ ## reg (unsigned long val) \ + { asm volatile ("mt" #reg " %0" : : "r" (val)); } + +GETREG(msr) +SETREG(msrd) +GETREG(cr) + +#define GSETSPR(n, name) \ + static inline long get_ ## name (void) \ + { long ret; asm volatile ("mfspr %0," #n : "=r" (ret) : ); return ret; } \ + static inline void set_ ## name (long val) \ + { asm volatile ("mtspr " #n ",%0" : : "r" (val)); } + +GSETSPR(0, mq) +GSETSPR(1, xer) +GSETSPR(4, rtcu) +GSETSPR(5, rtcl) +GSETSPR(8, lr) +GSETSPR(9, ctr) +GSETSPR(18, dsisr) +GSETSPR(19, dar) +GSETSPR(22, dec) +GSETSPR(25, sdr1) +GSETSPR(26, srr0) +GSETSPR(27, srr1) +GSETSPR(272, sprg0) +GSETSPR(273, sprg1) +GSETSPR(274, sprg2) +GSETSPR(275, sprg3) +GSETSPR(282, ear) +GSETSPR(287, pvr) +GSETSPR(528, bat0u) +GSETSPR(529, bat0l) +GSETSPR(530, bat1u) +GSETSPR(531, bat1l) +GSETSPR(532, bat2u) +GSETSPR(533, bat2l) +GSETSPR(534, bat3u) +GSETSPR(535, bat3l) +GSETSPR(1008, hid0) +GSETSPR(1009, hid1) +GSETSPR(1010, iabr) +GSETSPR(1013, dabr) +GSETSPR(1023, pir) + +static inline int get_sr(int n) +{ + int ret; + +#if 0 +// DRENG does not assemble + asm (" mfsrin %0,%1" : "=r" (ret) : "r" (n << 28)); +#endif + return ret; +} + +static inline void set_sr(int n, int val) +{ +#if 0 +// DRENG does not assemble + asm ("mtsrin %0,%1" : : "r" (val), "r" (n << 28)); +#endif +} + +static inline void store_inst(void *p) +{ + asm volatile ("dcbst 0,%0; sync; icbi 0,%0; isync" : : "r" (p)); +} + +static inline void cflush(void *p) +{ + asm volatile ("dcbf 0,%0; icbi 0,%0" : : "r" (p)); +} + +static inline void cinval(void *p) +{ + asm volatile ("dcbi 0,%0; icbi 0,%0" : : "r" (p)); +} + diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/xmon/setjmp.c linuxppc64_2_4/arch/ppc64/xmon/setjmp.c --- ../kernel.org/linux/arch/ppc64/xmon/setjmp.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/xmon/setjmp.c Mon May 7 12:48:56 2001 @@ -0,0 +1,77 @@ +/* + * Copyright (C) 1996 Paul Mackerras. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * NB this file must be compiled with -O2. + */ + +int +xmon_setjmp(long *buf) /* NOTE: assert(sizeof(buf) > 184) */ +{ + /* XXX should save fp regs as well */ + asm volatile ( + "mflr 0; std 0,0(%0)\n\ + std 1,8(%0)\n\ + std 2,16(%0)\n\ + mfcr 0; std 0,24(%0)\n\ + std 13,32(%0)\n\ + std 14,40(%0)\n\ + std 15,48(%0)\n\ + std 16,56(%0)\n\ + std 17,64(%0)\n\ + std 18,72(%0)\n\ + std 19,80(%0)\n\ + std 20,88(%0)\n\ + std 21,96(%0)\n\ + std 22,104(%0)\n\ + std 23,112(%0)\n\ + std 24,120(%0)\n\ + std 25,128(%0)\n\ + std 26,136(%0)\n\ + std 27,144(%0)\n\ + std 28,152(%0)\n\ + std 29,160(%0)\n\ + std 30,168(%0)\n\ + std 31,176(%0)\n\ + " : : "r" (buf)); + return 0; +} + +void +xmon_longjmp(long *buf, int val) +{ + if (val == 0) + val = 1; + asm volatile ( + "ld 13,32(%0)\n\ + ld 14,40(%0)\n\ + ld 15,48(%0)\n\ + ld 16,56(%0)\n\ + ld 17,64(%0)\n\ + ld 18,72(%0)\n\ + ld 19,80(%0)\n\ + ld 20,88(%0)\n\ + ld 21,96(%0)\n\ + ld 22,104(%0)\n\ + ld 23,112(%0)\n\ + ld 24,120(%0)\n\ + ld 25,128(%0)\n\ + ld 26,136(%0)\n\ + ld 27,144(%0)\n\ + ld 28,152(%0)\n\ + ld 29,160(%0)\n\ + ld 30,168(%0)\n\ + ld 31,176(%0)\n\ + ld 0,24(%0)\n\ + mtcrf 0x38,0\n\ + ld 0,0(%0)\n\ + ld 1,8(%0)\n\ + ld 2,16(%0)\n\ + mtlr 0\n\ + mr 3,%1\n\ + " : : "r" (buf), "r" (val)); +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/xmon/start.c linuxppc64_2_4/arch/ppc64/xmon/start.c --- ../kernel.org/linux/arch/ppc64/xmon/start.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/xmon/start.c Tue Sep 25 13:53:20 2001 @@ -0,0 +1,340 @@ +/* + * Copyright (C) 1996 Paul Mackerras. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Transition to udbg isn't quite done yet...but very close. */ +#define USE_UDBG +#ifdef USE_UDBG +#include +#endif + +#ifndef USE_UDBG +static volatile unsigned char *sccc, *sccd; +#endif +unsigned long TXRDY, RXRDY; +extern void xmon_printf(const char *fmt, ...); +static int xmon_expect(const char *str, unsigned int timeout); + +#ifndef USE_UDBG +static int console = 0; +#endif +static int via_modem = 0; +/* static int xmon_use_sccb = 0; --Unused */ + +#define TB_SPEED 25000000 + +extern void *comport1; +static inline unsigned int readtb(void) +{ + unsigned int ret; + + asm volatile("mftb %0" : "=r" (ret) :); + return ret; +} + +#ifndef USE_UDBG +void buf_access(void) +{ + sccd[3] &= ~0x80; /* reset DLAB */ +} +#endif + +extern int adb_init(void); + +static void sysrq_handle_xmon(int key, struct pt_regs *pt_regs, struct kbd_struct *kbd, struct tty_struct *tty) +{ + xmon(pt_regs); +} +static struct sysrq_key_op sysrq_xmon_op = +{ + handler: sysrq_handle_xmon, + help_msg: "xmon", + action_msg: "Entering xmon\n", +}; + +void +xmon_map_scc(void) +{ + /* This maybe isn't the best place to register sysrq 'x' */ + __sysrq_put_key_op('x', &sysrq_xmon_op); +#ifndef USE_UDBG + /* should already be mapped by the kernel boot */ + sccd = (volatile unsigned char *) (((unsigned long)comport1)); + sccc = (volatile unsigned char *) (((unsigned long)comport1)+5); + TXRDY = 0x20; + RXRDY = 1; +#endif +} + +static int scc_initialized = 0; + +void xmon_init_scc(void); +extern void pmu_poll(void); + +int +xmon_write(void *handle, void *ptr, int nb) +{ +#ifdef USE_UDBG + return udbg_write(ptr, nb); +#else + char *p = ptr; + int i, c, ct; + + if (!scc_initialized) + xmon_init_scc(); + ct = 0; + for (i = 0; i < nb; ++i) { + while ((*sccc & TXRDY) == 0) { + } + c = p[i]; + if (c == '\n' && !ct) { + c = '\r'; + ct = 1; + --i; + } else { + if (console) + printk("%c", c); + ct = 0; + } + buf_access(); + *sccd = c; + } + return i; +#endif +} + +int xmon_wants_key; +int xmon_adb_keycode; + +int +xmon_read(void *handle, void *ptr, int nb) +{ +#ifdef USE_UDBG + return udbg_read(ptr, nb); +#else + char *p = ptr; + int i, c; + + if (!scc_initialized) + xmon_init_scc(); + for (i = 0; i < nb; ++i) { + do { + while ((*sccc & RXRDY) == 0) + ; + buf_access(); + c = *sccd; + } while (c == 0x11 || c == 0x13); + *p++ = c; + } + return i; +#endif +} + +int +xmon_read_poll(void) +{ +#ifdef USE_UDBG + return udbg_getc_poll(); +#else + if ((*sccc & RXRDY) == 0) { + return -1; + } + buf_access(); + return *sccd; +#endif +} + +void +xmon_init_scc() +{ +#ifndef USE_UDBG + sccd[3] = 0x83; eieio(); /* LCR = 8N1 + DLAB */ + sccd[0] = 12; eieio(); /* DLL = 9600 baud */ + sccd[1] = 0; eieio(); + sccd[2] = 0; eieio(); /* FCR = 0 */ + sccd[3] = 3; eieio(); /* LCR = 8N1 */ + sccd[1] = 0; eieio(); /* IER = 0 */ +#endif + + scc_initialized = 1; + if (via_modem) { + for (;;) { + xmon_write(0, "ATE1V1\r", 7); + if (xmon_expect("OK", 5)) { + xmon_write(0, "ATA\r", 4); + if (xmon_expect("CONNECT", 40)) + break; + } + xmon_write(0, "+++", 3); + xmon_expect("OK", 3); + } + } +} + +void *xmon_stdin; +void *xmon_stdout; +void *xmon_stderr; + +void +xmon_init(void) +{ +} + +int +xmon_putc(int c, void *f) +{ + char ch = c; + + if (c == '\n') + xmon_putc('\r', f); + return xmon_write(f, &ch, 1) == 1? c: -1; +} + +int +xmon_putchar(int c) +{ + return xmon_putc(c, xmon_stdout); +} + +int +xmon_fputs(char *str, void *f) +{ + int n = strlen(str); + + return xmon_write(f, str, n) == n? 0: -1; +} + +int +xmon_readchar(void) +{ + char ch; + + for (;;) { + switch (xmon_read(xmon_stdin, &ch, 1)) { + case 1: + return ch; + case -1: + xmon_printf("read(stdin) returned -1\r\n", 0, 0); + return -1; + } + } +} + +static char line[256]; +static char *lineptr; +static int lineleft; + +int xmon_expect(const char *str, unsigned int timeout) +{ + int c; + unsigned int t0; + + timeout *= TB_SPEED; + t0 = readtb(); + do { + lineptr = line; + for (;;) { + c = xmon_read_poll(); + if (c == -1) { + if (readtb() - t0 > timeout) + return 0; + continue; + } + if (c == '\n') + break; + if (c != '\r' && lineptr < &line[sizeof(line) - 1]) + *lineptr++ = c; + } + *lineptr = 0; + } while (strstr(line, str) == NULL); + return 1; +} + +int +xmon_getchar(void) +{ + int c; + + if (lineleft == 0) { + lineptr = line; + for (;;) { + c = xmon_readchar(); + if (c == -1 || c == 4) + break; + if (c == '\r' || c == '\n') { + *lineptr++ = '\n'; + xmon_putchar('\n'); + break; + } + switch (c) { + case 0177: + case '\b': + if (lineptr > line) { + xmon_putchar('\b'); + xmon_putchar(' '); + xmon_putchar('\b'); + --lineptr; + } + break; + case 'U' & 0x1F: + while (lineptr > line) { + xmon_putchar('\b'); + xmon_putchar(' '); + xmon_putchar('\b'); + --lineptr; + } + break; + default: + if (lineptr >= &line[sizeof(line) - 1]) + xmon_putchar('\a'); + else { + xmon_putchar(c); + *lineptr++ = c; + } + } + } + lineleft = lineptr - line; + lineptr = line; + } + if (lineleft == 0) + return -1; + --lineleft; + return *lineptr++; +} + +char * +xmon_fgets(char *str, int nb, void *f) +{ + char *p; + int c; + + for (p = str; p < str + nb - 1; ) { + c = xmon_getchar(); + if (c == -1) { + if (p == str) + return 0; + break; + } + *p++ = c; + if (c == '\n') + break; + } + *p = 0; + return str; +} diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/xmon/subr_prf.c linuxppc64_2_4/arch/ppc64/xmon/subr_prf.c --- ../kernel.org/linux/arch/ppc64/xmon/subr_prf.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/xmon/subr_prf.c Mon May 7 12:48:56 2001 @@ -0,0 +1,55 @@ +/* + * Written by Cort Dougan to replace the version originally used + * by Paul Mackerras, which came from NetBSD and thus had copyright + * conflicts with Linux. + * + * This file makes liberal use of the standard linux utility + * routines to reduce the size of the binary. We assume we can + * trust some parts of Linux inside the debugger. + * -- Cort (cort@cs.nmt.edu) + * + * Copyright (C) 1999 Cort Dougan. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include "nonstdio.h" + +extern int xmon_write(void *, void *, int); + +void +xmon_vfprintf(void *f, const char *fmt, va_list ap) +{ + static char xmon_buf[2048]; + int n; + + n = vsprintf(xmon_buf, fmt, ap); + xmon_write(f, xmon_buf, n); +} + +void +xmon_printf(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + xmon_vfprintf(stdout, fmt, ap); + va_end(ap); +} + +void +xmon_fprintf(void *f, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + xmon_vfprintf(f, fmt, ap); + va_end(ap); +} + diff -uNr --exclude=CVS ../kernel.org/linux/arch/ppc64/xmon/xmon.c linuxppc64_2_4/arch/ppc64/xmon/xmon.c --- ../kernel.org/linux/arch/ppc64/xmon/xmon.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/arch/ppc64/xmon/xmon.c Wed Oct 31 23:31:15 2001 @@ -0,0 +1,2957 @@ +/* + * Routines providing a simple monitor for use on the PowerMac. + * + * Copyright (C) 1996 Paul Mackerras. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "nonstdio.h" +#include "privinst.h" +#include + +#include + +#include + +#define scanhex xmon_scanhex +#define skipbl xmon_skipbl + +#ifdef CONFIG_SMP +static unsigned long cpus_in_xmon = 0; +static unsigned long got_xmon = 0; +static volatile int take_xmon = -1; +#endif /* CONFIG_SMP */ + +static unsigned long adrs; +static int size = 1; +static unsigned long ndump = 64; +static unsigned long nidump = 16; +static unsigned long ncsum = 4096; +static int termch; + +static u_int bus_error_jmp[100]; +#define setjmp xmon_setjmp +#define longjmp xmon_longjmp + +#define memlist_entry list_entry +#define memlist_next(x) ((x)->next) +#define memlist_prev(x) ((x)->prev) + + +/* Max number of stack frames we are willing to produce on a backtrace. */ +#define MAXFRAMECOUNT 50 + +/* Breakpoint stuff */ +struct bpt { + unsigned long address; + unsigned instr; + unsigned long count; + unsigned char enabled; + char funcname[64]; /* function name for humans */ +}; + +#define NBPTS 16 +static struct bpt bpts[NBPTS]; +static struct bpt dabr; +static struct bpt iabr; +static unsigned bpinstr = 0x7fe00008; /* trap */ + +/* Prototypes */ +extern void (*debugger_fault_handler)(struct pt_regs *); +static int cmds(struct pt_regs *); +static int mread(unsigned long, void *, int); +static int mwrite(unsigned long, void *, int); +static void handle_fault(struct pt_regs *); +static void byterev(unsigned char *, int); +static void memex(void); +static int bsesc(void); +static void dump(void); +static void prdump(unsigned long, long); +#ifdef __MWERKS__ +static void prndump(unsigned, int); +static int nvreadb(unsigned); +#endif +static int ppc_inst_dump(unsigned long, long); +void print_address(unsigned long); +static int getsp(void); +static void dump_hash_table(void); +static void backtrace(struct pt_regs *); +static void excprint(struct pt_regs *); +static void prregs(struct pt_regs *); +static void memops(int); +static void memlocate(void); +static void memzcan(void); +static void memdiffs(unsigned char *, unsigned char *, unsigned, unsigned); +int skipbl(void); +int scanhex(unsigned long *valp); +static void scannl(void); +static int hexdigit(int); +void getstring(char *, int); +static void flush_input(void); +static int inchar(void); +static void take_input(char *); +/* static void openforth(void); */ +static unsigned long read_spr(int); +static void write_spr(int, unsigned long); +static void super_regs(void); +static void print_sysmap(void); +static void remove_bpts(void); +static void insert_bpts(void); +static struct bpt *at_breakpoint(unsigned long pc); +static void bpt_cmds(void); +static void cacheflush(void); +#ifdef CONFIG_SMP +static void cpu_cmd(void); +#endif /* CONFIG_SMP */ +static void csum(void); +static void mem_translate(void); +static void mem_check(void); +static void mem_find_real(void); +static void mem_find_vsid(void); +static void mem_check_full_group(void); +static void mem_check_pagetable_vsids (void); + +static void mem_map_check_slab(void); +static void mem_map_lock_pages(void); +static void mem_map_check_hash(void); +static void mem_check_dup_rpn (void); +static void show_task(struct task_struct * p); +static void xmon_show_state(void); +static void debug_trace(void); + +extern int print_insn_big_powerpc(FILE *, unsigned long, unsigned long); +extern void printf(const char *fmt, ...); +extern void xmon_vfprintf(void *f, const char *fmt, va_list ap); +extern int xmon_putc(int c, void *f); +extern int putchar(int ch); +extern int xmon_read_poll(void); +extern int setjmp(u_int *); +extern void longjmp(u_int *, int); +extern unsigned long _ASR; +extern struct Naca *naca; + +pte_t *find_linux_pte(pgd_t *pgdir, unsigned long va); /* from htab.c */ + +#define GETWORD(v) (((v)[0] << 24) + ((v)[1] << 16) + ((v)[2] << 8) + (v)[3]) + +static char *help_string = "\ +Commands:\n\ + b show breakpoints\n\ + bd set data breakpoint\n\ + bi set instruction breakpoint\n\ + bc clear breakpoint\n\ + d dump bytes\n\ + di dump instructions\n\ + df dump float values\n\ + dd dump double values\n\ + e print exception information\n\ + f flush cache\n\ + h dump hash table\n\ + m examine/change memory\n\ + mm move a block of memory\n\ + ms set a block of memory\n\ + md compare two blocks of memory\n\ + ml locate a block of memory\n\ + mz zero a block of memory\n\ + mx translation information for an effective address\n\ + mi show information about memory allocation\n\ + M print System.map\n\ + p show the task list\n\ + r print registers\n\ + s single step\n\ + S print special registers\n\ + t print backtrace\n\ + T Enable/Disable PPCDBG flags\n\ + x exit monitor\n\ + z reboot\n\ +"; + +static int xmon_trace[NR_CPUS]; +#define SSTEP 1 /* stepping because of 's' command */ +#define BRSTEP 2 /* stepping over breakpoint */ + +/* + * Stuff for reading and writing memory safely + */ +extern inline void sync(void) +{ + asm volatile("sync; isync"); +} + +extern inline void __delay(unsigned int loops) +{ + if (loops != 0) + __asm__ __volatile__("mtctr %0; 1: bdnz 1b" : : + "r" (loops) : "ctr"); +} + +/* (Ref: 64-bit PowerPC ELF ABI Spplement; Ian Lance Taylor, Zembu Labs). + A PPC stack frame looks like this: + + High Address + Back Chain + FP reg save area + GP reg save area + Local var space + Parameter save area (SP+48) + TOC save area (SP+40) + link editor doubleword (SP+32) + compiler doubleword (SP+24) + LR save (SP+16) + CR save (SP+8) + Back Chain (SP+0) + + Note that the LR (ret addr) may not be saved in the current frame if + no functions have been called from the current function. + */ + +/* + A traceback table typically follows each function. + The find_tb_table() func will fill in this struct. Note that the struct + is not an exact match with the encoded table defined by the ABI. It is + defined here more for programming convenience. + */ +struct tbtable { + unsigned long flags; /* flags: */ +#define TBTAB_FLAGSGLOBALLINK (1L<<47) +#define TBTAB_FLAGSISEPROL (1L<<46) +#define TBTAB_FLAGSHASTBOFF (1L<<45) +#define TBTAB_FLAGSINTPROC (1L<<44) +#define TBTAB_FLAGSHASCTL (1L<<43) +#define TBTAB_FLAGSTOCLESS (1L<<42) +#define TBTAB_FLAGSFPPRESENT (1L<<41) +#define TBTAB_FLAGSNAMEPRESENT (1L<<38) +#define TBTAB_FLAGSUSESALLOCA (1L<<37) +#define TBTAB_FLAGSSAVESCR (1L<<33) +#define TBTAB_FLAGSSAVESLR (1L<<32) +#define TBTAB_FLAGSSTORESBC (1L<<31) +#define TBTAB_FLAGSFIXUP (1L<<30) +#define TBTAB_FLAGSPARMSONSTK (1L<<0) + unsigned char fp_saved; /* num fp regs saved f(32-n)..f31 */ + unsigned char gpr_saved; /* num gpr's saved */ + unsigned char fixedparms; /* num fixed point parms */ + unsigned char floatparms; /* num float parms */ + unsigned char parminfo[32]; /* types of args. null terminated */ +#define TBTAB_PARMFIXED 1 +#define TBTAB_PARMSFLOAT 2 +#define TBTAB_PARMDFLOAT 3 + unsigned int tb_offset; /* offset from start of func */ + unsigned long funcstart; /* addr of start of function */ + char name[64]; /* name of function (null terminated)*/ +}; +static int find_tb_table(unsigned long codeaddr, struct tbtable *tab); + +void +xmon(struct pt_regs *excp) +{ + struct pt_regs regs; + int cmd; + unsigned long msr; + + if (excp == NULL) { + /* Ok, grab regs as they are now. + This won't do a particularily good job because the + prologue has already been executed. + ToDo: We could reach back into the callers save + area to do a better job of representing the + caller's state. + */ + asm volatile ("std 0,0(%0)\n\ + std 1,8(%0)\n\ + std 2,16(%0)\n\ + std 3,24(%0)\n\ + std 4,32(%0)\n\ + std 5,40(%0)\n\ + std 6,48(%0)\n\ + std 7,56(%0)\n\ + std 8,64(%0)\n\ + std 9,72(%0)\n\ + std 10,80(%0)\n\ + std 11,88(%0)\n\ + std 12,96(%0)\n\ + std 13,104(%0)\n\ + std 14,112(%0)\n\ + std 15,120(%0)\n\ + std 16,128(%0)\n\ + std 17,136(%0)\n\ + std 18,144(%0)\n\ + std 19,152(%0)\n\ + std 20,160(%0)\n\ + std 21,168(%0)\n\ + std 22,176(%0)\n\ + std 23,184(%0)\n\ + std 24,192(%0)\n\ + std 25,200(%0)\n\ + std 26,208(%0)\n\ + std 27,216(%0)\n\ + std 28,224(%0)\n\ + std 29,232(%0)\n\ + std 30,240(%0)\n\ + std 31,248(%0)" : : "b" (®s)); + printf("xmon called\n"); + /* Fetch the link reg for this stack frame. + NOTE: the prev printf fills in the lr. */ + regs.nip = regs.link = ((unsigned long *)(regs.gpr[1]))[2]; + regs.msr = get_msr(); + regs.ctr = get_ctr(); + regs.xer = get_xer(); + regs.ccr = get_cr(); + regs.trap = 0; + excp = ®s; + } + + msr = get_msr(); + set_msrd(msr & ~MSR_EE); /* disable interrupts */ + excprint(excp); +#ifdef CONFIG_SMP + if (test_and_set_bit(smp_processor_id(), &cpus_in_xmon)) + for (;;) + ; + while (test_and_set_bit(0, &got_xmon)) { + if (take_xmon == smp_processor_id()) { + take_xmon = -1; + break; + } + } + /* + * XXX: breakpoints are removed while any cpu is in xmon + */ +#endif /* CONFIG_SMP */ + remove_bpts(); + cmd = cmds(excp); + if (cmd == 's') { + xmon_trace[smp_processor_id()] = SSTEP; + excp->msr |= 0x400; + } else if (at_breakpoint(excp->nip)) { + xmon_trace[smp_processor_id()] = BRSTEP; + excp->msr |= 0x400; + } else { + xmon_trace[smp_processor_id()] = 0; + insert_bpts(); + } +#ifdef CONFIG_SMP + clear_bit(0, &got_xmon); + clear_bit(smp_processor_id(), &cpus_in_xmon); +#endif /* CONFIG_SMP */ + set_msrd(msr); /* restore interrupt enable */ +} + +/* Code can call this to get a backtrace and continue. */ +void +xmon_backtrace(const char *fmt, ...) +{ + va_list ap; + struct pt_regs regs; + + + /* Ok, grab regs as they are now. + This won't do a particularily good job because the + prologue has already been executed. + ToDo: We could reach back into the callers save + area to do a better job of representing the + caller's state. + */ + asm volatile ("std 0,0(%0)\n\ + std 1,8(%0)\n\ + std 2,16(%0)\n\ + std 3,24(%0)\n\ + std 4,32(%0)\n\ + std 5,40(%0)\n\ + std 6,48(%0)\n\ + std 7,56(%0)\n\ + std 8,64(%0)\n\ + std 9,72(%0)\n\ + std 10,80(%0)\n\ + std 11,88(%0)\n\ + std 12,96(%0)\n\ + std 13,104(%0)\n\ + std 14,112(%0)\n\ + std 15,120(%0)\n\ + std 16,128(%0)\n\ + std 17,136(%0)\n\ + std 18,144(%0)\n\ + std 19,152(%0)\n\ + std 20,160(%0)\n\ + std 21,168(%0)\n\ + std 22,176(%0)\n\ + std 23,184(%0)\n\ + std 24,192(%0)\n\ + std 25,200(%0)\n\ + std 26,208(%0)\n\ + std 27,216(%0)\n\ + std 28,224(%0)\n\ + std 29,232(%0)\n\ + std 30,240(%0)\n\ + std 31,248(%0)" : : "b" (®s)); + /* Fetch the link reg for this stack frame. + NOTE: the prev printf fills in the lr. */ + regs.nip = regs.link = ((unsigned long *)(regs.gpr[1]))[2]; + regs.msr = get_msr(); + regs.ctr = get_ctr(); + regs.xer = get_xer(); + regs.ccr = get_cr(); + regs.trap = 0; + + va_start(ap, fmt); + xmon_vfprintf(stdout, fmt, ap); + xmon_putc('\n', stdout); + va_end(ap); + take_input("\n"); + backtrace(®s); +} + +/* Call this to poll for ^C during busy operations. + * Returns true if the user has hit ^C. + */ +int +xmon_interrupted(void) +{ + int ret = xmon_read_poll(); + if (ret == 3) { + printf("\n^C interrupted.\n"); + return 1; + } + return 0; +} + + +void +xmon_irq(int irq, void *d, struct pt_regs *regs) +{ + unsigned long flags; + __save_flags(flags); + __cli(); + printf("Keyboard interrupt\n"); + xmon(regs); + __restore_flags(flags); +} + +int +xmon_bpt(struct pt_regs *regs) +{ + struct bpt *bp; + + bp = at_breakpoint(regs->nip); + if (!bp) + return 0; + if (bp->count) { + --bp->count; + remove_bpts(); + excprint(regs); + xmon_trace[smp_processor_id()] = BRSTEP; + regs->msr |= 0x400; + } else { + printf("Stopped at breakpoint %x (%lx %s)\n", (bp - bpts)+1, bp->address, bp->funcname); + xmon(regs); + } + return 1; +} + +int +xmon_sstep(struct pt_regs *regs) +{ + if (!xmon_trace[smp_processor_id()]) + return 0; + if (xmon_trace[smp_processor_id()] == BRSTEP) { + xmon_trace[smp_processor_id()] = 0; + insert_bpts(); + } else { + xmon(regs); + } + return 1; +} + +int +xmon_dabr_match(struct pt_regs *regs) +{ + if (dabr.enabled && dabr.count) { + --dabr.count; + remove_bpts(); + excprint(regs); + xmon_trace[smp_processor_id()] = BRSTEP; + regs->msr |= 0x400; + } else { + dabr.instr = regs->nip; + xmon(regs); + } + return 1; +} + +int +xmon_iabr_match(struct pt_regs *regs) +{ + if (iabr.enabled && iabr.count) { + --iabr.count; + remove_bpts(); + excprint(regs); + xmon_trace[smp_processor_id()] = BRSTEP; + regs->msr |= 0x400; + } else { + xmon(regs); + } + return 1; +} + +static struct bpt * +at_breakpoint(unsigned long pc) +{ + int i; + struct bpt *bp; + + if (dabr.enabled && pc == dabr.instr) + return &dabr; + if (iabr.enabled && pc == iabr.address) + return &iabr; + bp = bpts; + for (i = 0; i < NBPTS; ++i, ++bp) + if (bp->enabled && pc == bp->address) + return bp; + return 0; +} + +static void +insert_bpts() +{ + int i; + struct bpt *bp; + + if (_machine != _MACH_pSeries) + return; + bp = bpts; + for (i = 0; i < NBPTS; ++i, ++bp) { + if (!bp->enabled) + continue; + if (mread(bp->address, &bp->instr, 4) != 4 + || mwrite(bp->address, &bpinstr, 4) != 4) { + printf("Couldn't insert breakpoint at %x, disabling\n", + bp->address); + bp->enabled = 0; + } else { + store_inst((void *)bp->address); + } + } + + if (!__is_processor(PV_POWER4)) { + if (dabr.enabled) + set_dabr(dabr.address); + if (iabr.enabled) + set_iabr(iabr.address); + } +} + +static void +remove_bpts() +{ + int i; + struct bpt *bp; + unsigned instr; + + if (_machine != _MACH_pSeries) + return; + if (!__is_processor(PV_POWER4)) { + set_dabr(0); + set_iabr(0); + } + + bp = bpts; + for (i = 0; i < NBPTS; ++i, ++bp) { + if (!bp->enabled) + continue; + if (mread(bp->address, &instr, 4) == 4 + && instr == bpinstr + && mwrite(bp->address, &bp->instr, 4) != 4) + printf("Couldn't remove breakpoint at %x\n", + bp->address); + else + store_inst((void *)bp->address); + } +} + +static char *last_cmd; + +/* Command interpreting routine */ +static int +cmds(struct pt_regs *excp) +{ + int cmd; + + last_cmd = NULL; + for(;;) { +#ifdef CONFIG_SMP + printf("%d:", smp_processor_id()); +#endif /* CONFIG_SMP */ + printf("mon> "); + fflush(stdout); + flush_input(); + termch = 0; + cmd = skipbl(); + if( cmd == '\n' ) { + if (last_cmd == NULL) + continue; + take_input(last_cmd); + last_cmd = NULL; + cmd = inchar(); + } + switch (cmd) { + case 'z': + machine_restart(NULL); + break; + case 'm': + cmd = inchar(); + switch (cmd) { + case 'm': + case 's': + case 'd': + memops(cmd); + break; + case 'l': + memlocate(); + break; + case 'z': + memzcan(); + break; + case 'x': + mem_translate(); + break; + case 'c': + mem_check(); + break; + case 'g': + mem_check_full_group(); + break; + case 'j': + mem_map_check_slab(); + break; + case 'h': + mem_map_check_hash(); + break; + case 'f': + mem_find_real(); + break; + case 'e': + mem_find_vsid(); + break; + case 'r': + mem_check_dup_rpn(); + break; + case 'i': + show_mem(); + break; + case 'o': + mem_check_pagetable_vsids (); + break; + case 'q': + mem_map_lock_pages() ; + break; + + + default: + termch = cmd; + memex(); + } + break; + case 'd': + dump(); + break; + case 'r': + if (excp != NULL) + prregs(excp); /* print regs */ + break; + case 'e': + if (excp == NULL) + printf("No exception information\n"); + else + excprint(excp); + break; + case 'M': + print_sysmap(); + break; + case 'S': + super_regs(); + break; + case 't': + backtrace(excp); + break; + case 'f': + cacheflush(); + break; + case 'h': + dump_hash_table(); + break; + case 's': + case 'x': + case EOF: + return cmd; + case '?': + printf(help_string); + break; + case 'p': + xmon_show_state(); + break; + case 'b': + bpt_cmds(); + break; + case 'C': + csum(); + break; +#ifdef CONFIG_SMP + case 'c': + cpu_cmd(); + break; +#endif /* CONFIG_SMP */ + case 'T': + debug_trace(); + break; + default: + printf("Unrecognized command: "); + do { + if( ' ' < cmd && cmd <= '~' ) + putchar(cmd); + else + printf("\\x%x", cmd); + cmd = inchar(); + } while (cmd != '\n'); + printf(" (type ? for help)\n"); + break; + } + } +} + +#ifdef CONFIG_SMP +static void cpu_cmd(void) +{ + unsigned long cpu; + int timeout; + int cmd; + + cmd = inchar(); + if (cmd == 'i') { + printf("stopping all cpus\n"); + /* interrupt other cpu(s) */ + cpu = MSG_ALL_BUT_SELF; + smp_send_xmon_break(cpu); + return; + } + termch = cmd; + if (!scanhex(&cpu)) { + /* print cpus waiting or in xmon */ + printf("cpus stopped:"); + for (cpu = 0; cpu < NR_CPUS; ++cpu) { + if (test_bit(cpu, &cpus_in_xmon)) { + printf(" %d", cpu); + if (cpu == smp_processor_id()) + printf("*", cpu); + } + } + printf("\n"); + return; + } + /* try to switch to cpu specified */ + take_xmon = cpu; + timeout = 10000000; + while (take_xmon >= 0) { + if (--timeout == 0) { + /* yes there's a race here */ + take_xmon = -1; + printf("cpu %u didn't take control\n", cpu); + return; + } + } + /* now have to wait to be given control back */ + while (test_and_set_bit(0, &got_xmon)) { + if (take_xmon == smp_processor_id()) { + take_xmon = -1; + break; + } + } +} +#endif /* CONFIG_SMP */ + +static unsigned short fcstab[256] = { + 0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf, + 0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7, + 0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e, + 0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876, + 0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd, + 0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5, + 0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c, + 0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974, + 0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb, + 0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3, + 0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a, + 0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72, + 0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9, + 0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1, + 0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738, + 0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70, + 0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7, + 0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff, + 0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036, + 0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e, + 0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5, + 0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd, + 0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134, + 0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c, + 0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3, + 0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb, + 0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232, + 0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a, + 0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1, + 0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9, + 0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330, + 0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78 +}; + +#define FCS(fcs, c) (((fcs) >> 8) ^ fcstab[((fcs) ^ (c)) & 0xff]) + +static void +csum(void) +{ + unsigned int i; + unsigned short fcs; + unsigned char v; + + if (!scanhex(&adrs)) + return; + if (!scanhex(&ncsum)) + return; + fcs = 0xffff; + for (i = 0; i < ncsum; ++i) { + if (mread(adrs+i, &v, 1) == 0) { + printf("csum stopped at %x\n", adrs+i); + break; + } + fcs = FCS(fcs, v); + } + printf("%x\n", fcs); +} + +static char *breakpoint_help_string = + "Breakpoint command usage:\n" + "b show breakpoints\n" + "b [cnt] set breakpoint at given instr addr\n" + "bc clear all breakpoints\n" + "bc clear breakpoint number n or at addr\n" + "bi [cnt] set hardware instr breakpoint (broken?)\n" + "bd [cnt] set hardware data breakpoint (broken?)\n" + ""; + +static void +bpt_cmds(void) +{ + int cmd; + unsigned long a; + int mode, i; + struct bpt *bp; + struct tbtable tab; + + cmd = inchar(); + switch (cmd) { + case 'd': /* bd - hardware data breakpoint */ + if (__is_processor(PV_POWER4)) { + printf("Not implemented on POWER4\n"); + break; + } + mode = 7; + cmd = inchar(); + if (cmd == 'r') + mode = 5; + else if (cmd == 'w') + mode = 6; + else + termch = cmd; + dabr.address = 0; + dabr.count = 0; + dabr.enabled = scanhex(&dabr.address); + scanhex(&dabr.count); + if (dabr.enabled) + dabr.address = (dabr.address & ~7) | mode; + break; + case 'i': /* bi - hardware instr breakpoint */ + if (__is_processor(PV_POWER4)) { + printf("Not implemented on POWER4\n"); + break; + } + iabr.address = 0; + iabr.count = 0; + iabr.enabled = scanhex(&iabr.address); + if (iabr.enabled) + iabr.address |= 3; + scanhex(&iabr.count); + break; + case 'c': + if (!scanhex(&a)) { + /* clear all breakpoints */ + for (i = 0; i < NBPTS; ++i) + bpts[i].enabled = 0; + iabr.enabled = 0; + dabr.enabled = 0; + printf("All breakpoints cleared\n"); + } else { + if (a <= NBPTS && a >= 1) { + /* assume a breakpoint number */ + --a; /* bp nums are 1 based */ + bp = &bpts[a]; + } else { + /* assume a breakpoint address */ + bp = at_breakpoint(a); + } + if (bp == 0) { + printf("No breakpoint at %x\n", a); + } else { + printf("Cleared breakpoint %x (%lx %s)\n", (bp - bpts)+1, bp->address, bp->funcname); + bp->enabled = 0; + } + } + break; + case '?': + printf(breakpoint_help_string); + break; + default: + termch = cmd; + cmd = skipbl(); + if (cmd == '?') { + printf(breakpoint_help_string); + break; + } + termch = cmd; + if (!scanhex(&a)) { + /* print all breakpoints */ + int bpnum; + + printf(" type address count\n"); + if (dabr.enabled) { + printf(" data %.16lx %8x [", dabr.address & ~7, + dabr.count); + if (dabr.address & 1) + printf("r"); + if (dabr.address & 2) + printf("w"); + printf("]\n"); + } + if (iabr.enabled) + printf(" inst %.16lx %8x\n", iabr.address & ~3, + iabr.count); + for (bp = bpts, bpnum = 1; bp < &bpts[NBPTS]; ++bp, ++bpnum) + if (bp->enabled) + printf("%2x trap %.16lx %8x %s\n", bpnum, bp->address, bp->count, bp->funcname); + break; + } + bp = at_breakpoint(a); + if (bp == 0) { + for (bp = bpts; bp < &bpts[NBPTS]; ++bp) + if (!bp->enabled) + break; + if (bp >= &bpts[NBPTS]) { + printf("Sorry, no free breakpoints. Please clear one first.\n"); + break; + } + } + bp->enabled = 1; + bp->address = a; + bp->count = 0; + scanhex(&bp->count); + /* Find the function name just once. */ + bp->funcname[0] = '\0'; + if (find_tb_table(bp->address, &tab) && tab.name[0]) { + /* Got a nice name for it. */ + int delta = bp->address - tab.funcstart; + sprintf(bp->funcname, "%s+0x%x", tab.name, delta); + } + printf("Set breakpoint %2x trap %.16lx %8x %s\n", (bp-bpts)+1, bp->address, bp->count, bp->funcname); + break; + } +} + +/* Very cheap human name for vector lookup. */ +static +const char *getvecname(unsigned long vec) +{ + char *ret; + switch (vec) { + case 0x100: ret = "(System Reset)"; break; + case 0x200: ret = "(Machine Check)"; break; + case 0x300: ret = "(Data Access)"; break; + case 0x400: ret = "(Instruction Access)"; break; + case 0x500: ret = "(Hardware Interrupt)"; break; + case 0x600: ret = "(Alignment)"; break; + case 0x700: ret = "(Program Check)"; break; + case 0x800: ret = "(FPU Unavailable)"; break; + case 0x900: ret = "(Decrementer)"; break; + case 0xc00: ret = "(System Call)"; break; + case 0xd00: ret = "(Single Step)"; break; + case 0xf00: ret = "(Performance Monitor)"; break; + default: ret = ""; + } + return ret; +} + +static void +backtrace(struct pt_regs *excp) +{ + unsigned long sp; + unsigned long lr; + unsigned long stack[3]; + struct pt_regs regs; + struct tbtable tab; + int framecount; + char *funcname; + /* declare these as raw ptrs so we don't get func descriptors */ + extern void *ret_from_except, *ret_from_syscall_1; + + if (excp != NULL) { + lr = excp->link; + sp = excp->gpr[1]; + } else { + /* Use care not to call any function before this point + so the saved lr has a chance of being good. */ + asm volatile ("mflr %0" : "=r" (lr) :); + sp = getsp(); + } + scanhex(&sp); + scannl(); + for (framecount = 0; + sp != 0 && framecount < MAXFRAMECOUNT; + sp = stack[0], framecount++) { + if (mread(sp, stack, sizeof(stack)) != sizeof(stack)) + break; +#if 0 + if (lr != 0) { + stack[2] = lr; /* fake out the first saved lr. It may not be saved yet. */ + lr = 0; + } +#endif + printf("%.16lx %.16lx", sp, stack[2]); + /* TAI -- for now only the ones cast to unsigned long will match. + * Need to test the rest... + */ + if ((stack[2] == (unsigned long)ret_from_except && + (funcname = "ret_from_except")) + || (stack[2] == (unsigned long)ret_from_syscall_1 && + (funcname = "ret_from_syscall_1")) +#if 0 + || stack[2] == (unsigned) &ret_from_syscall_2 + || stack[2] == (unsigned) &do_bottom_half_ret + || stack[2] == (unsigned) &do_signal_ret +#endif + ) { + printf(" %s\n", funcname); + if (mread(sp+112, ®s, sizeof(regs)) != sizeof(regs)) + break; + printf("exception: %lx %s regs %lx\n", regs.trap, getvecname(regs.trap), sp+112); + printf(" %.16lx", regs.nip); + if ((regs.nip & 0xffffffff00000000UL) && + find_tb_table(regs.nip, &tab)) { + int delta = regs.nip-tab.funcstart; + if (delta < 0) + printf(" "); + else + printf(" %s+0x%x", tab.name, delta); + } + printf("\n"); + if (regs.gpr[1] < sp) { + printf("\n", regs.gpr[1]); + break; + } + + sp = regs.gpr[1]; + if (mread(sp, stack, sizeof(stack)) != sizeof(stack)) + break; + } else { + if (stack[2] && find_tb_table(stack[2], &tab)) { + int delta = stack[2]-tab.funcstart; + if (delta < 0) + printf(" "); + else + printf(" %s+0x%x", tab.name, delta); + } + printf("\n"); + } + if (stack[0] && stack[0] <= sp) { + if ((stack[0] & 0xffffffff00000000UL) == 0) + printf("\n", stack[0]); + else + printf("\n", stack[0]); + break; + } + } + if (framecount >= MAXFRAMECOUNT) + printf("\n"); +} + +int +getsp() +{ + int x; + + asm("mr %0,1" : "=r" (x) :); + return x; +} + +spinlock_t exception_print_lock = SPIN_LOCK_UNLOCKED; + +void +excprint(struct pt_regs *fp) +{ + struct task_struct *c; + struct tbtable tab; + unsigned long flags; + + spin_lock_irqsave(&exception_print_lock, flags); + +#ifdef CONFIG_SMP + printf("cpu %d: ", smp_processor_id()); +#endif /* CONFIG_SMP */ + + printf("Vector: %lx %s at [%lx]\n", fp->trap, getvecname(fp->trap), fp); + printf(" pc: %lx", fp->nip); + if (find_tb_table(fp->nip, &tab) && tab.name[0]) { + /* Got a nice name for it */ + int delta = fp->nip - tab.funcstart; + printf(" (%s+0x%x)", tab.name, delta); + } + printf("\n"); + printf(" lr: %lx", fp->link); + if (find_tb_table(fp->link, &tab) && tab.name[0]) { + /* Got a nice name for it */ + int delta = fp->link - tab.funcstart; + printf(" (%s+0x%x)", tab.name, delta); + } + printf("\n"); + printf(" sp: %lx\n", fp->gpr[1]); + printf(" msr: %lx\n", fp->msr); + + if (fp->trap == 0x300 || fp->trap == 0x600) { + printf(" dar: %lx\n", fp->dar); + printf(" dsisr: %lx\n", fp->dsisr); + } + + /* XXX: need to copy current or we die. Why? */ + c = current; + printf(" current = 0x%lx\n", c); + printf(" paca = 0x%lx\n", get_paca()); + if (c) { + printf(" current = %lx, pid = %ld, comm = %s\n", + c, c->pid, c->comm); + } + + spin_unlock_irqrestore(&exception_print_lock, flags); +} + +void +prregs(struct pt_regs *fp) +{ + int n; + unsigned long base; + + if (scanhex((void *)&base)) + fp = (struct pt_regs *) base; + for (n = 0; n < 16; ++n) + printf("R%.2ld = %.16lx R%.2ld = %.16lx\n", n, fp->gpr[n], + n+16, fp->gpr[n+16]); + printf("pc = %.16lx msr = %.16lx\nlr = %.16lx cr = %.16lx\n", + fp->nip, fp->msr, fp->link, fp->ccr); + printf("ctr = %.16lx xer = %.16lx trap = %8lx\n", + fp->ctr, fp->xer, fp->trap); +} + +void +cacheflush(void) +{ + int cmd; + unsigned long nflush; + + cmd = inchar(); + if (cmd != 'i') + termch = cmd; + scanhex((void *)&adrs); + if (termch != '\n') + termch = 0; + nflush = 1; + scanhex(&nflush); + nflush = (nflush + 31) / 32; + if (cmd != 'i') { + for (; nflush > 0; --nflush, adrs += 0x20) + cflush((void *) adrs); + } else { + for (; nflush > 0; --nflush, adrs += 0x20) + cinval((void *) adrs); + } +} + +unsigned long +read_spr(int n) +{ + unsigned int instrs[2]; + unsigned long (*code)(void); + unsigned long opd[3]; + + instrs[0] = 0x7c6002a6 + ((n & 0x1F) << 16) + ((n & 0x3e0) << 6); + instrs[1] = 0x4e800020; + opd[0] = instrs; + opd[1] = 0; + opd[2] = 0; + store_inst(instrs); + store_inst(instrs+1); + code = (unsigned long (*)(void)) opd; + + return code(); +} + +void +write_spr(int n, unsigned long val) +{ + unsigned int instrs[2]; + unsigned long (*code)(unsigned long); + unsigned long opd[3]; + + instrs[0] = 0x7c6003a6 + ((n & 0x1F) << 16) + ((n & 0x3e0) << 6); + instrs[1] = 0x4e800020; + opd[0] = instrs; + opd[1] = 0; + opd[2] = 0; + store_inst(instrs); + store_inst(instrs+1); + code = (unsigned long (*)(unsigned long)) opd; + + code(val); +} + +static unsigned long regno; +extern char exc_prolog; +extern char dec_exc; + +void +print_sysmap(void) +{ + extern char *sysmap; + if ( sysmap ) + printf("System.map: \n%s", sysmap); +} + +void +super_regs() +{ + int i, cmd; + unsigned long val; + struct Paca* ptrPaca = NULL; + struct ItLpPaca* ptrLpPaca = NULL; + struct ItLpRegSave* ptrLpRegSave = NULL; + + cmd = skipbl(); + if (cmd == '\n') { + unsigned long sp, toc; + asm("mr %0,1" : "=r" (sp) :); + asm("mr %0,2" : "=r" (toc) :); + + printf("msr = %.16lx sprg0= %.16lx\n", get_msr(), get_sprg0()); + printf("pvr = %.16lx sprg1= %.16lx\n", get_pvr(), get_sprg1()); + printf("dec = %.16lx sprg2= %.16lx\n", get_dec(), get_sprg2()); + printf("sp = %.16lx sprg3= %.16lx\n", sp, get_sprg3()); + printf("toc = %.16lx dar = %.16lx\n", toc, get_dar()); + printf("srr0 = %.16lx srr1 = %.16lx\n", get_srr0(), get_srr1()); + printf("asr = %.16lx\n", mfasr()); + for (i = 0; i < 8; ++i) + printf("sr%.2ld = %.16lx sr%.2ld = %.16lx\n", i, get_sr(i), i+8, get_sr(i+8)); + + // Dump out relevant Paca data areas. + printf("Paca: \n"); + ptrPaca = (struct Paca*)get_sprg3(); + + printf(" Local Processor Control Area (LpPaca): \n"); + ptrLpPaca = ptrPaca->xLpPacaPtr; + printf(" Saved Srr0=%.16lx Saved Srr1=%.16lx \n", ptrLpPaca->xSavedSrr0, ptrLpPaca->xSavedSrr1); + printf(" Saved Gpr3=%.16lx Saved Gpr4=%.16lx \n", ptrLpPaca->xSavedGpr3, ptrLpPaca->xSavedGpr4); + printf(" Saved Gpr5=%.16lx \n", ptrLpPaca->xSavedGpr5); + + printf(" Local Processor Register Save Area (LpRegSave): \n"); + ptrLpRegSave = ptrPaca->xLpRegSavePtr; + printf(" Saved Sprg0=%.16lx Saved Sprg1=%.16lx \n", ptrLpRegSave->xSPRG0, ptrLpRegSave->xSPRG0); + printf(" Saved Sprg2=%.16lx Saved Sprg3=%.16lx \n", ptrLpRegSave->xSPRG2, ptrLpRegSave->xSPRG3); + printf(" Saved Msr =%.16lx Saved Nia =%.16lx \n", ptrLpRegSave->xMSR, ptrLpRegSave->xNIA); + + return; + } + + scanhex(®no); + switch (cmd) { + case 'w': + val = read_spr(regno); + scanhex(&val); + write_spr(regno, val); + /* fall through */ + case 'r': + printf("spr %lx = %lx\n", regno, read_spr(regno)); + break; + case 's': + val = get_sr(regno); + scanhex(&val); + set_sr(regno, val); + break; + case 'm': + val = get_msr(); + scanhex(&val); + set_msrd(val); + break; + } + scannl(); +} + +#if 0 +static void +openforth() +{ + int c; + char *p; + char cmd[1024]; + int args[5]; + extern int (*prom_entry)(int *); + + p = cmd; + c = skipbl(); + while (c != '\n') { + *p++ = c; + c = inchar(); + } + *p = 0; + args[0] = (int) "interpret"; + args[1] = 1; + args[2] = 1; + args[3] = (int) cmd; + (*prom_entry)(args); + printf("\n"); + if (args[4] != 0) + printf("error %x\n", args[4]); +} +#endif + +#ifndef CONFIG_PPC64BRIDGE +static void +dump_hash_table_seg(unsigned seg, unsigned start, unsigned end) +{ + extern void *Hash; + extern unsigned long Hash_size; + unsigned *htab = Hash; + unsigned hsize = Hash_size; + unsigned v, hmask, va, last_va; + int found, last_found, i; + unsigned *hg, w1, last_w2, last_va0; + + last_found = 0; + hmask = hsize / 64 - 1; + va = start; + start = (start >> 12) & 0xffff; + end = (end >> 12) & 0xffff; + for (v = start; v < end; ++v) { + found = 0; + hg = htab + (((v ^ seg) & hmask) * 16); + w1 = 0x80000000 | (seg << 7) | (v >> 10); + for (i = 0; i < 8; ++i, hg += 2) { + if (*hg == w1) { + found = 1; + break; + } + } + if (!found) { + w1 ^= 0x40; + hg = htab + ((~(v ^ seg) & hmask) * 16); + for (i = 0; i < 8; ++i, hg += 2) { + if (*hg == w1) { + found = 1; + break; + } + } + } + if (!(last_found && found && (hg[1] & ~0x180) == last_w2 + 4096)) { + if (last_found) { + if (last_va != last_va0) + printf(" ... %x", last_va); + printf("\n"); + } + if (found) { + printf("%x to %x", va, hg[1]); + last_va0 = va; + } + last_found = found; + } + if (found) { + last_w2 = hg[1] & ~0x180; + last_va = va; + } + va += 4096; + } + if (last_found) + printf(" ... %x\n", last_va); +} + +#else /* CONFIG_PPC64BRIDGE */ +static void +dump_hash_table_seg(unsigned seg, unsigned start, unsigned end) +{ + extern void *Hash; + extern unsigned long Hash_size; + unsigned *htab = Hash; + unsigned hsize = Hash_size; + unsigned v, hmask, va, last_va; + int found, last_found, i; + unsigned *hg, w1, last_w2, last_va0; + + last_found = 0; + hmask = hsize / 128 - 1; + va = start; + start = (start >> 12) & 0xffff; + end = (end >> 12) & 0xffff; + for (v = start; v < end; ++v) { + found = 0; + hg = htab + (((v ^ seg) & hmask) * 32); + w1 = 1 | (seg << 12) | ((v & 0xf800) >> 4); + for (i = 0; i < 8; ++i, hg += 4) { + if (hg[1] == w1) { + found = 1; + break; + } + } + if (!found) { + w1 ^= 2; + hg = htab + ((~(v ^ seg) & hmask) * 32); + for (i = 0; i < 8; ++i, hg += 4) { + if (hg[1] == w1) { + found = 1; + break; + } + } + } + if (!(last_found && found && (hg[3] & ~0x180) == last_w2 + 4096)) { + if (last_found) { + if (last_va != last_va0) + printf(" ... %x", last_va); + printf("\n"); + } + if (found) { + printf("%x to %x", va, hg[3]); + last_va0 = va; + } + last_found = found; + } + if (found) { + last_w2 = hg[3] & ~0x180; + last_va = va; + } + va += 4096; + } + if (last_found) + printf(" ... %x\n", last_va); +} +#endif /* CONFIG_PPC64BRIDGE */ + +static unsigned long hash_ctx; +static unsigned long hash_start; +static unsigned long hash_end; + +static void +dump_hash_table() +{ + int seg; + unsigned seg_start, seg_end; + + hash_ctx = 0; + hash_start = 0; + hash_end = 0xfffff000; + scanhex(&hash_ctx); + scanhex(&hash_start); + scanhex(&hash_end); + printf("Mappings for context %x\n", hash_ctx); + seg_start = hash_start; + for (seg = hash_start >> 28; seg <= hash_end >> 28; ++seg) { + seg_end = (seg << 28) | 0x0ffff000; + if (seg_end > hash_end) + seg_end = hash_end; + dump_hash_table_seg((hash_ctx << 4) + seg, seg_start, seg_end); + seg_start = seg_end + 0x1000; + } +} + +int +mread(unsigned long adrs, void *buf, int size) +{ + volatile int n; + char *p, *q; + + n = 0; + if( setjmp(bus_error_jmp) == 0 ){ + debugger_fault_handler = handle_fault; + sync(); + p = (char *) adrs; + q = (char *) buf; + switch (size) { + case 2: *(short *)q = *(short *)p; break; + case 4: *(int *)q = *(int *)p; break; + default: + for( ; n < size; ++n ) { + *q++ = *p++; + sync(); + } + } + sync(); + /* wait a little while to see if we get a machine check */ + __delay(200); + n = size; + } + debugger_fault_handler = 0; + return n; +} + +int +mwrite(unsigned long adrs, void *buf, int size) +{ + volatile int n; + char *p, *q; + + n = 0; + if( setjmp(bus_error_jmp) == 0 ){ + debugger_fault_handler = handle_fault; + sync(); + p = (char *) adrs; + q = (char *) buf; + switch (size) { + case 2: *(short *)p = *(short *)q; break; + case 4: *(int *)p = *(int *)q; break; + default: + for( ; n < size; ++n ) { + *p++ = *q++; + sync(); + } + } + sync(); + /* wait a little while to see if we get a machine check */ + __delay(200); + n = size; + } else { + printf("*** Error writing address %x\n", adrs + n); + } + debugger_fault_handler = 0; + return n; +} + +static int fault_type; +static char *fault_chars[] = { "--", "**", "##" }; + +static void +handle_fault(struct pt_regs *regs) +{ + fault_type = regs->trap == 0x200? 0: regs->trap == 0x300? 1: 2; + longjmp(bus_error_jmp, 1); +} + +#define SWAP(a, b, t) ((t) = (a), (a) = (b), (b) = (t)) + +void +byterev(unsigned char *val, int size) +{ + int t; + + switch (size) { + case 2: + SWAP(val[0], val[1], t); + break; + case 4: + SWAP(val[0], val[3], t); + SWAP(val[1], val[2], t); + break; + case 8: /* is there really any use for this? */ + SWAP(val[0], val[7], t); + SWAP(val[1], val[6], t); + SWAP(val[2], val[5], t); + SWAP(val[3], val[4], t); + break; + } +} + +static int brev; +static int mnoread; + +static char *memex_help_string = + "Memory examine command usage:\n" + "m [addr] [flags] examine/change memory\n" + " addr is optional. will start where left off.\n" + " flags may include chars from this set:\n" + " b modify by bytes (default)\n" + " w modify by words (2 byte)\n" + " l modify by longs (4 byte)\n" + " d modify by doubleword (8 byte)\n" + " r toggle reverse byte order mode\n" + " n do not read memory (for i/o spaces)\n" + " . ok to read (default)\n" + "NOTE: flags are saved as defaults\n" + ""; + +static char *memex_subcmd_help_string = + "Memory examine subcommands:\n" + " hexval write this val to current location\n" + " 'string' write chars from string to this location\n" + " ' increment address\n" + " ^ decrement address\n" + " / increment addr by 0x10. //=0x100, ///=0x1000, etc\n" + " \\ decrement addr by 0x10. \\\\=0x100, \\\\\\=0x1000, etc\n" + " ` clear no-read flag\n" + " ; stay at this addr\n" + " v change to byte mode\n" + " w change to word (2 byte) mode\n" + " l change to long (4 byte) mode\n" + " u change to doubleword (8 byte) mode\n" + " m addr change current addr\n" + " n toggle no-read flag\n" + " r toggle byte reverse flag\n" + " < count back up count bytes\n" + " > count skip forward count bytes\n" + " x exit this mode\n" + ""; + +void +memex() +{ + int cmd, inc, i, nslash; + unsigned long n; + unsigned char val[16]; + + scanhex((void *)&adrs); + cmd = skipbl(); + if (cmd == '?') { + printf(memex_help_string); + return; + } else { + termch = cmd; + } + last_cmd = "m\n"; + while ((cmd = skipbl()) != '\n') { + switch( cmd ){ + case 'b': size = 1; break; + case 'w': size = 2; break; + case 'l': size = 4; break; + case 'd': size = 8; break; + case 'r': brev = !brev; break; + case 'n': mnoread = 1; break; + case '.': mnoread = 0; break; + } + } + if( size <= 0 ) + size = 1; + else if( size > 8 ) + size = 8; + for(;;){ + if (!mnoread) + n = mread(adrs, val, size); + printf("%.16x%c", adrs, brev? 'r': ' '); + if (!mnoread) { + if (brev) + byterev(val, size); + putchar(' '); + for (i = 0; i < n; ++i) + printf("%.2x", val[i]); + for (; i < size; ++i) + printf("%s", fault_chars[fault_type]); + } + putchar(' '); + inc = size; + nslash = 0; + for(;;){ + if( scanhex(&n) ){ + for (i = 0; i < size; ++i) + val[i] = n >> (i * 8); + if (!brev) + byterev(val, size); + mwrite(adrs, val, size); + inc = size; + } + cmd = skipbl(); + if (cmd == '\n') + break; + inc = 0; + switch (cmd) { + case '\'': + for(;;){ + n = inchar(); + if( n == '\\' ) + n = bsesc(); + else if( n == '\'' ) + break; + for (i = 0; i < size; ++i) + val[i] = n >> (i * 8); + if (!brev) + byterev(val, size); + mwrite(adrs, val, size); + adrs += size; + } + adrs -= size; + inc = size; + break; + case ',': + adrs += size; + break; + case '.': + mnoread = 0; + break; + case ';': + break; + case 'x': + case EOF: + scannl(); + return; + case 'b': + case 'v': + size = 1; + break; + case 'w': + size = 2; + break; + case 'l': + size = 4; + break; + case 'u': + size = 8; + break; + case '^': + adrs -= size; + break; + break; + case '/': + if (nslash > 0) + adrs -= 1 << nslash; + else + nslash = 0; + nslash += 4; + adrs += 1 << nslash; + break; + case '\\': + if (nslash < 0) + adrs += 1 << -nslash; + else + nslash = 0; + nslash -= 4; + adrs -= 1 << -nslash; + break; + case 'm': + scanhex((void *)&adrs); + break; + case 'n': + mnoread = 1; + break; + case 'r': + brev = !brev; + break; + case '<': + n = size; + scanhex(&n); + adrs -= n; + break; + case '>': + n = size; + scanhex(&n); + adrs += n; + break; + case '?': + printf(memex_subcmd_help_string); + break; + } + } + adrs += inc; + } +} + +int +bsesc() +{ + int c; + + c = inchar(); + switch( c ){ + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 'b': c = '\b'; break; + case 't': c = '\t'; break; + } + return c; +} + +#define isxdigit(c) (('0' <= (c) && (c) <= '9') \ + || ('a' <= (c) && (c) <= 'f') \ + || ('A' <= (c) && (c) <= 'F')) +void +dump() +{ + int c; + + c = inchar(); + if ((isxdigit(c) && c != 'f' && c != 'd') || c == '\n') + termch = c; + scanhex((void *)&adrs); + if( termch != '\n') + termch = 0; + if( c == 'i' ){ + scanhex(&nidump); + if( nidump == 0 ) + nidump = 16; + adrs += ppc_inst_dump(adrs, nidump); + last_cmd = "di\n"; + } else { + scanhex(&ndump); + if( ndump == 0 ) + ndump = 64; + prdump(adrs, ndump); + adrs += ndump; + last_cmd = "d\n"; + } +} + +void +prdump(unsigned long adrs, long ndump) +{ + long n, m, c, r, nr; + unsigned char temp[16]; + + for( n = ndump; n > 0; ){ + printf("%.16lx", adrs); + putchar(' '); + r = n < 16? n: 16; + nr = mread(adrs, temp, r); + adrs += nr; + for( m = 0; m < r; ++m ){ + if ((m & 7) == 0 && m > 0) + putchar(' '); + if( m < nr ) + printf("%.2x", temp[m]); + else + printf("%s", fault_chars[fault_type]); + } + for(; m < 16; ++m ) + printf(" "); + printf(" |"); + for( m = 0; m < r; ++m ){ + if( m < nr ){ + c = temp[m]; + putchar(' ' <= c && c <= '~'? c: '.'); + } else + putchar(' '); + } + n -= r; + for(; m < 16; ++m ) + putchar(' '); + printf("|\n"); + if( nr < r ) + break; + } +} + +int +ppc_inst_dump(unsigned long adr, long count) +{ + int nr, dotted; + unsigned long first_adr; + unsigned long inst, last_inst; + unsigned char val[4]; + + dotted = 0; + for (first_adr = adr; count > 0; --count, adr += 4){ + nr = mread(adr, val, 4); + if( nr == 0 ){ + const char *x = fault_chars[fault_type]; + printf("%.16lx %s%s%s%s\n", adr, x, x, x, x); + break; + } + inst = GETWORD(val); + if (adr > first_adr && inst == last_inst) { + if (!dotted) { + printf(" ...\n"); + dotted = 1; + } + continue; + } + dotted = 0; + last_inst = inst; + printf("%.16lx ", adr); + printf("%.8x\t", inst); + print_insn_big_powerpc(stdout, inst, adr); /* always returns 4 */ + printf("\n"); + } + return adr - first_adr; +} + +void +print_address(unsigned long addr) +{ + printf("0x%lx", addr); +} + +/* + * Memory operations - move, set, print differences + */ +static unsigned long mdest; /* destination address */ +static unsigned long msrc; /* source address */ +static unsigned long mval; /* byte value to set memory to */ +static unsigned long mcount; /* # bytes to affect */ +static unsigned long mdiffs; /* max # differences to print */ + +void +memops(int cmd) +{ + scanhex((void *)&mdest); + if( termch != '\n' ) + termch = 0; + scanhex((void *)(cmd == 's'? &mval: &msrc)); + if( termch != '\n' ) + termch = 0; + scanhex((void *)&mcount); + switch( cmd ){ + case 'm': + memmove((void *)mdest, (void *)msrc, mcount); + break; + case 's': + memset((void *)mdest, mval, mcount); + break; + case 'd': + if( termch != '\n' ) + termch = 0; + scanhex((void *)&mdiffs); + memdiffs((unsigned char *)mdest, (unsigned char *)msrc, mcount, mdiffs); + break; + } +} + +void +memdiffs(unsigned char *p1, unsigned char *p2, unsigned nb, unsigned maxpr) +{ + unsigned n, prt; + + prt = 0; + for( n = nb; n > 0; --n ) + if( *p1++ != *p2++ ) + if( ++prt <= maxpr ) + printf("%.16x %.2x # %.16x %.2x\n", p1 - 1, + p1[-1], p2 - 1, p2[-1]); + if( prt > maxpr ) + printf("Total of %d differences\n", prt); +} + +static unsigned mend; +static unsigned mask; + +void +memlocate() +{ + unsigned a, n; + unsigned char val[4]; + + last_cmd = "ml"; + scanhex((void *)&mdest); + if (termch != '\n') { + termch = 0; + scanhex((void *)&mend); + if (termch != '\n') { + termch = 0; + scanhex((void *)&mval); + mask = ~0; + if (termch != '\n') termch = 0; + scanhex((void *)&mask); + } + } + n = 0; + for (a = mdest; a < mend; a += 4) { + if (mread(a, val, 4) == 4 + && ((GETWORD(val) ^ mval) & mask) == 0) { + printf("%.16x: %.16x\n", a, GETWORD(val)); + if (++n >= 10) + break; + } + } +} + +static unsigned long mskip = 0x1000; +static unsigned long mlim = 0xffffffff; + +void +memzcan() +{ + unsigned char v; + unsigned a; + int ok, ook; + + scanhex(&mdest); + if (termch != '\n') termch = 0; + scanhex(&mskip); + if (termch != '\n') termch = 0; + scanhex(&mlim); + ook = 0; + for (a = mdest; a < mlim; a += mskip) { + ok = mread(a, &v, 1); + if (ok && !ook) { + printf("%.8x .. ", a); + fflush(stdout); + } else if (!ok && ook) + printf("%.8x\n", a - mskip); + ook = ok; + if (a + mskip < a) + break; + } + if (ook) + printf("%.8x\n", a - mskip); +} + +/* Input scanning routines */ +int +skipbl() +{ + int c; + + if( termch != 0 ){ + c = termch; + termch = 0; + } else + c = inchar(); + while( c == ' ' || c == '\t' ) + c = inchar(); + return c; +} + +int +scanhex(vp) +unsigned long *vp; +{ + int c, d; + unsigned long v; + + c = skipbl(); + d = hexdigit(c); + if( d == EOF ){ + termch = c; + return 0; + } + v = 0; + do { + v = (v << 4) + d; + c = inchar(); + d = hexdigit(c); + } while( d != EOF ); + termch = c; + *vp = v; + return 1; +} + +void +scannl() +{ + int c; + + c = termch; + termch = 0; + while( c != '\n' ) + c = inchar(); +} + +int +hexdigit(int c) +{ + if( '0' <= c && c <= '9' ) + return c - '0'; + if( 'A' <= c && c <= 'F' ) + return c - ('A' - 10); + if( 'a' <= c && c <= 'f' ) + return c - ('a' - 10); + return EOF; +} + +void +getstring(char *s, int size) +{ + int c; + + c = skipbl(); + do { + if( size > 1 ){ + *s++ = c; + --size; + } + c = inchar(); + } while( c != ' ' && c != '\t' && c != '\n' ); + termch = c; + *s = 0; +} + +static char line[256]; +static char *lineptr; + +void +flush_input() +{ + lineptr = NULL; +} + +int +inchar() +{ + if (lineptr == NULL || *lineptr == 0) { + if (fgets(line, sizeof(line), stdin) == NULL) { + lineptr = NULL; + return EOF; + } + lineptr = line; + } + return *lineptr++; +} + +void +take_input(str) +char *str; +{ + lineptr = str; +} + + +/* Starting at codeaddr scan forward for a tbtable and fill in the + given table. Return non-zero if successful at doing something. + */ +static int +find_tb_table(unsigned long codeaddr, struct tbtable *tab) +{ + unsigned long codeaddr_max; + unsigned long tbtab_start; + int nr; + int instr; + int num_parms; + + if (tab == NULL) + return 0; + memset(tab, 0, sizeof(tab)); + + /* Scan instructions starting at codeaddr for 128k max */ + for (codeaddr_max = codeaddr + 128*1024*4; + codeaddr < codeaddr_max; + codeaddr += 4) { + nr = mread(codeaddr, &instr, 4); + if (nr != 4) + return 0; /* Bad read. Give up promptly. */ + if (instr == 0) { + /* table should follow. */ + int version; + unsigned long flags; + tbtab_start = codeaddr; /* save it to compute func start addr */ + codeaddr += 4; + nr = mread(codeaddr, &flags, 8); + if (nr != 8) + return 0; /* Bad read or no tb table. */ + tab->flags = flags; + version = (flags >> 56) & 0xff; + if (version != 0) + continue; /* No tb table here. */ + /* Now, like the version, some of the flags are values + that are more conveniently extracted... */ + tab->fp_saved = (flags >> 24) & 0x3f; + tab->gpr_saved = (flags >> 16) & 0x3f; + tab->fixedparms = (flags >> 8) & 0xff; + tab->floatparms = (flags >> 1) & 0x7f; + codeaddr += 8; + num_parms = tab->fixedparms + tab->floatparms; + if (num_parms) { + unsigned int parminfo; + int parm; + if (num_parms > 32) + return 1; /* incomplete */ + nr = mread(codeaddr, &parminfo, 4); + if (nr != 4) + return 1; /* incomplete */ + /* decode parminfo...32 bits. + A zero means fixed. A one means float and the + following bit determines single (0) or double (1). + */ + for (parm = 0; parm < num_parms; parm++) { + if (parminfo & 0x80000000) { + parminfo <<= 1; + if (parminfo & 0x80000000) + tab->parminfo[parm] = TBTAB_PARMDFLOAT; + else + tab->parminfo[parm] = TBTAB_PARMSFLOAT; + } else { + tab->parminfo[parm] = TBTAB_PARMFIXED; + } + parminfo <<= 1; + } + codeaddr += 4; + } + if (flags & TBTAB_FLAGSHASTBOFF) { + nr = mread(codeaddr, &tab->tb_offset, 4); + if (nr != 4) + return 1; /* incomplete */ + if (tab->tb_offset > 0) { + tab->funcstart = tbtab_start - tab->tb_offset; + } + codeaddr += 4; + } + /* hand_mask appears to be always be omitted. */ + if (flags & TBTAB_FLAGSHASCTL) { + /* Assume this will never happen for C or asm */ + return 1; /* incomplete */ + } + if (flags & TBTAB_FLAGSNAMEPRESENT) { + short namlen; + nr = mread(codeaddr, &namlen, 2); + if (nr != 2) + return 1; /* incomplete */ + if (namlen >= sizeof(tab->name)) + namlen = sizeof(tab->name)-1; + codeaddr += 2; + nr = mread(codeaddr, tab->name, namlen); + tab->name[namlen] = '\0'; + codeaddr += namlen; + } + return 1; + } + } + return 0; /* hit max...sorry. */ +} + +void +mem_translate() { + int c; + unsigned long ea, va, vsid, vpn, page, hpteg_slot_primary, hpteg_slot_secondary, primary_hash, i, *steg, esid, stabl; + HPTE * hpte; + struct mm_struct * mm; + pte_t *ptep = NULL; + void * pgdir; + + c = inchar(); + if ((isxdigit(c) && c != 'f' && c != 'd') || c == '\n') + termch = c; + scanhex((void *)&ea); + + if ((ea >= KRANGE_START) && (ea <= (KRANGE_START + (1UL<<60)))) { + ptep = 0; + vsid = get_kernel_vsid(ea); + va = ( vsid << 28 ) | ( ea & 0x0fffffff ); + } else { + // if in vmalloc range, use the vmalloc page directory + if ( ( ea >= VMALLOC_START ) && ( ea <= VMALLOC_END ) ) { + mm = &init_mm; + vsid = get_kernel_vsid( ea ); + } + // if in ioremap range, use the ioremap page directory + else if ( ( ea >= IMALLOC_START ) && ( ea <= IMALLOC_END ) ) { + mm = &ioremap_mm; + vsid = get_kernel_vsid( ea ); + } + // if in user range, use the current task's page directory + else if ( ( ea >= USER_START ) && ( ea <= USER_END ) ) { + mm = current->mm; + vsid = get_vsid(mm->context, ea ); + } + pgdir = mm->pgd; + va = ( vsid << 28 ) | ( ea & 0x0fffffff ); + ptep = find_linux_pte( pgdir, ea ); + } + + vpn = ((vsid << 28) | (((ea) & 0xFFFF000))) >> 12; + page = vpn & 0xffff; + esid = (ea >> 28) & 0xFFFFFFFFF; + + // Search the primary group for an available slot + primary_hash = ( vsid & 0x7fffffffff ) ^ page; + hpteg_slot_primary = ( primary_hash & htab_data.htab_hash_mask ) * HPTES_PER_GROUP; + hpteg_slot_secondary = ( ~primary_hash & htab_data.htab_hash_mask ) * HPTES_PER_GROUP; + + printf("ea : %.16lx\n", ea); + printf("esid : %.16lx\n", esid); + printf("vsid : %.16lx\n", vsid); + + printf("\nSoftware Page Table\n-------------------\n"); + printf("ptep : %.16lx\n", ((unsigned long *)ptep)); + if(ptep) { + printf("*ptep : %.16lx\n", *((unsigned long *)ptep)); + } + + hpte = htab_data.htab + hpteg_slot_primary; + printf("\nHardware Page Table\n-------------------\n"); + printf("htab base : %.16lx\n", htab_data.htab); + printf("slot primary : %.16lx\n", hpteg_slot_primary); + printf("slot secondary : %.16lx\n", hpteg_slot_secondary); + printf("\nPrimary Group\n"); + for (i=0; i<8; ++i) { + if ( hpte->dw0.dw0.v != 0 ) { + printf("%d: (hpte)%.16lx %.16lx\n", i, hpte->dw0.dword0, hpte->dw1.dword1); + printf(" vsid: %.13lx api: %.2lx hash: %.1lx\n", + (hpte->dw0.dw0.avpn)>>5, + (hpte->dw0.dw0.avpn) & 0x1f, + (hpte->dw0.dw0.h)); + printf(" rpn: %.13lx \n", (hpte->dw1.dw1.rpn)); + printf(" pp: %.1lx \n", + ((hpte->dw1.dw1.pp0)<<2)|(hpte->dw1.dw1.pp)); + printf(" wimgn: %.2lx reference: %.1lx change: %.1lx\n", + ((hpte->dw1.dw1.w)<<4)| + ((hpte->dw1.dw1.i)<<3)| + ((hpte->dw1.dw1.m)<<2)| + ((hpte->dw1.dw1.g)<<1)| + ((hpte->dw1.dw1.n)<<0), + hpte->dw1.dw1.r, hpte->dw1.dw1.c); + } + hpte++; + } + + printf("\nSecondary Group\n"); + // Search the secondary group + hpte = htab_data.htab + hpteg_slot_secondary; + for (i=0; i<8; ++i) { + if(hpte->dw0.dw0.v) { + printf("%d: (hpte)%.16lx %.16lx\n", i, hpte->dw0.dword0, hpte->dw1.dword1); + printf(" vsid: %.13lx api: %.2lx hash: %.1lx\n", + (hpte->dw0.dw0.avpn)>>5, + (hpte->dw0.dw0.avpn) & 0x1f, + (hpte->dw0.dw0.h)); + printf(" rpn: %.13lx \n", (hpte->dw1.dw1.rpn)); + printf(" pp: %.1lx \n", + ((hpte->dw1.dw1.pp0)<<2)|(hpte->dw1.dw1.pp)); + printf(" wimgn: %.2lx reference: %.1lx change: %.1lx\n", + ((hpte->dw1.dw1.w)<<4)| + ((hpte->dw1.dw1.i)<<3)| + ((hpte->dw1.dw1.m)<<2)| + ((hpte->dw1.dw1.g)<<1)| + ((hpte->dw1.dw1.n)<<0), + hpte->dw1.dw1.r, hpte->dw1.dw1.c); + } + hpte++; + } + + printf("\nHardware Segment Table\n-----------------------\n"); + stabl = (unsigned long)(KERNELBASE+(_ASR&0xFFFFFFFFFFFFFFFE)); + steg = (unsigned long *)((stabl) | ((esid & 0x1f) << 7)); + + printf("stab base : %.16lx\n", stabl); + printf("slot : %.16lx\n", steg); + + for (i=0; i<8; ++i) { + printf("%d: (ste) %.16lx %.16lx\n", i, + *((unsigned long *)(steg+i*2)),*((unsigned long *)(steg+i*2+1)) ); + } +} + +void mem_check() { + unsigned long htab_size_bytes; + unsigned long htab_end; + unsigned long last_rpn; + HPTE *hpte1, *hpte2; + + htab_size_bytes = htab_data.htab_num_ptegs * 128; // 128B / PTEG + htab_end = (unsigned long)htab_data.htab + htab_size_bytes; + // last_rpn = (naca->physicalMemorySize-1) >> PAGE_SHIFT; + last_rpn = 0xfffff; + + printf("\nHardware Page Table Check\n-------------------\n"); + printf("htab base : %.16lx\n", htab_data.htab); + printf("htab size : %.16lx\n", htab_size_bytes); + +#if 1 + for(hpte1 = htab_data.htab; hpte1 < (HPTE *)htab_end; hpte1++) { + if ( hpte1->dw0.dw0.v != 0 ) { + if ( hpte1->dw1.dw1.rpn <= last_rpn ) { + for(hpte2 = hpte1+1; hpte2 < (HPTE *)htab_end; hpte2++) { + if ( hpte2->dw0.dw0.v != 0 ) { + if(hpte1->dw1.dw1.rpn == hpte2->dw1.dw1.rpn) { + printf(" Duplicate rpn: %.13lx \n", (hpte1->dw1.dw1.rpn)); + printf(" hpte1: %16.16lx *hpte1: %16.16lx %16.16lx\n", + hpte1, hpte1->dw0.dword0, hpte1->dw1.dword1); + printf(" hpte2: %16.16lx *hpte2: %16.16lx %16.16lx\n", + hpte2, hpte2->dw0.dword0, hpte2->dw1.dword1); + } + } + } + } else { + printf(" Bogus rpn: %.13lx \n", (hpte1->dw1.dw1.rpn)); + printf(" hpte: %16.16lx *hpte: %16.16lx %16.16lx\n", + hpte1, hpte1->dw0.dword0, hpte1->dw1.dword1); + } + } + } +#endif + printf("\nDone -------------------\n"); +} + +void mem_find_real() { + unsigned long htab_size_bytes; + unsigned long htab_end; + unsigned long last_rpn; + HPTE *hpte1; + unsigned long pa, rpn; + int c; + + c = inchar(); + if ((isxdigit(c) && c != 'f' && c != 'd') || c == '\n') + termch = c; + scanhex((void *)&pa); + rpn = pa >> 12; + + htab_size_bytes = htab_data.htab_num_ptegs * 128; // 128B / PTEG + htab_end = (unsigned long)htab_data.htab + htab_size_bytes; + // last_rpn = (naca->physicalMemorySize-1) >> PAGE_SHIFT; + last_rpn = 0xfffff; + + printf("\nMem Find RPN\n-------------------\n"); + printf("htab base : %.16lx\n", htab_data.htab); + printf("htab size : %.16lx\n", htab_size_bytes); + + for(hpte1 = htab_data.htab; hpte1 < (HPTE *)htab_end; hpte1++) { + if ( hpte1->dw0.dw0.v != 0 ) { + if ( hpte1->dw1.dw1.rpn == rpn ) { + printf(" Found rpn: %.13lx \n", (hpte1->dw1.dw1.rpn)); + printf(" hpte: %16.16lx *hpte1: %16.16lx %16.16lx\n", + hpte1, hpte1->dw0.dword0, hpte1->dw1.dword1); + } + } + } + printf("\nDone -------------------\n"); +} + +void mem_find_vsid() { + unsigned long htab_size_bytes; + unsigned long htab_end; + HPTE *hpte1; + unsigned long vsid; + int c; + + c = inchar(); + if ((isxdigit(c) && c != 'f' && c != 'd') || c == '\n') + termch = c; + scanhex((void *)&vsid); + + htab_size_bytes = htab_data.htab_num_ptegs * 128; // 128B / PTEG + htab_end = (unsigned long)htab_data.htab + htab_size_bytes; + + printf("\nMem Find VSID\n-------------------\n"); + printf("htab base : %.16lx\n", htab_data.htab); + printf("htab size : %.16lx\n", htab_size_bytes); + + for(hpte1 = htab_data.htab; hpte1 < (HPTE *)htab_end; hpte1++) { + if ( hpte1->dw0.dw0.v != 0 ) { + if ( ((hpte1->dw0.dw0.avpn)>>5) == vsid ) { + printf(" Found vsid: %.16lx \n", ((hpte1->dw0.dw0.avpn) >> 5)); + printf(" hpte: %16.16lx *hpte1: %16.16lx %16.16lx\n", + hpte1, hpte1->dw0.dword0, hpte1->dw1.dword1); + } + } + } + printf("\nDone -------------------\n"); +} + +void mem_map_check_slab() { + int i, slab_count; + + i = max_mapnr; + slab_count = 0; + + while (i-- > 0) { + if (PageSlab(mem_map+i)){ + printf(" slab entry - mem_map entry =%p \n", mem_map+i); + slab_count ++; + } + + } + + printf(" count of pages for slab = %d \n", slab_count); + + +} + +void mem_map_lock_pages() { + int i, lock_count; + + i = max_mapnr; + lock_count = 0; + + while (i-- > 0) { + if (PageLocked(mem_map+i)){ + printf(" locked entry - mem_map entry =%p \n", mem_map+i); + lock_count ++; + } + + } + + printf(" count of locked pages = %d \n", lock_count); + + +} + + + +void mem_map_check_hash() { + int i = max_mapnr; + + while (i-- > 0) { + /* skip the reserved */ + if (!PageReserved(mem_map+i)) { + if (((mem_map+i)->next_hash) != NULL) { + if ( REGION_ID((mem_map+i)->next_hash) != KERNEL_REGION_ID ) { + printf(" mem_map check hash - non c0 entry - " + "address/value = %p %lx\n", mem_map+i,(mem_map+i)->next_hash); + } + if ((unsigned long)((mem_map+i)->next_hash) == KERNELBASE){ + printf(" mem_map check hash - 0x%lx entry = %p \n", + KERNELBASE, mem_map+i); + } + } + } else { + if (page_count(mem_map+i) < 0) { + printf(" reserved page with negative count- entry = %lx \n", mem_map+i); + } + } + } + printf(" mem_map check hash completed \n"); +} + +void mem_check_dup_rpn () { + unsigned long htab_size_bytes; + unsigned long htab_end; + unsigned long last_rpn; + HPTE *hpte1, *hpte2; + int dup_count; + struct task_struct *p; + unsigned long kernel_vsid_c0,kernel_vsid_c1,kernel_vsid_c2,kernel_vsid_c3; + unsigned long kernel_vsid_c4,kernel_vsid_c5,kernel_vsid_d,kernel_vsid_e; + unsigned long kernel_vsid_f; + unsigned long vsid0,vsid1,vsidB,vsid2; + + htab_size_bytes = htab_data.htab_num_ptegs * 128; // 128B / PTEG + htab_end = (unsigned long)htab_data.htab + htab_size_bytes; + // last_rpn = (naca->physicalMemorySize-1) >> PAGE_SHIFT; + last_rpn = 0xfffff; + + printf("\nHardware Page Table Check\n-------------------\n"); + printf("htab base : %.16lx\n", htab_data.htab); + printf("htab size : %.16lx\n", htab_size_bytes); + + + for(hpte1 = htab_data.htab; hpte1 < (HPTE *)htab_end; hpte1++) { + if ( hpte1->dw0.dw0.v != 0 ) { + if ( hpte1->dw1.dw1.rpn <= last_rpn ) { + dup_count = 0; + for(hpte2 = hpte1+1; hpte2 < (HPTE *)htab_end; hpte2++) { + if ( hpte2->dw0.dw0.v != 0 ) { + if(hpte1->dw1.dw1.rpn == hpte2->dw1.dw1.rpn) { + dup_count++; + } + } + } + if(dup_count > 5) { + printf(" Duplicate rpn: %.13lx \n", (hpte1->dw1.dw1.rpn)); + printf(" mem map array entry %p count = %d \n", + (mem_map+(hpte1->dw1.dw1.rpn)), (mem_map+(hpte1->dw1.dw1.rpn))->count); + for(hpte2 = hpte1+1; hpte2 < (HPTE *)htab_end; hpte2++) { + if ( hpte2->dw0.dw0.v != 0 ) { + if(hpte1->dw1.dw1.rpn == hpte2->dw1.dw1.rpn) { + printf(" hpte2: %16.16lx *hpte2: %16.16lx %16.16lx\n", + hpte2, hpte2->dw0.dword0, hpte2->dw1.dword1); + } + } + } + } + } else { + printf(" Bogus rpn: %.13lx \n", (hpte1->dw1.dw1.rpn)); + printf(" hpte: %16.16lx *hpte: %16.16lx %16.16lx\n", + hpte1, hpte1->dw0.dword0, hpte1->dw1.dword1); + } + } + if (xmon_interrupted()) + return; + } + + + + // print the kernel vsids + kernel_vsid_c0 = get_kernel_vsid(0xC000000000000000); + kernel_vsid_c1 = get_kernel_vsid(0xC000000010000000); + kernel_vsid_c2 = get_kernel_vsid(0xC000000020000000); + kernel_vsid_c3 = get_kernel_vsid(0xC000000030000000); + kernel_vsid_c4 = get_kernel_vsid(0xC000000040000000); + kernel_vsid_c5 = get_kernel_vsid(0xC000000050000000); + kernel_vsid_d = get_kernel_vsid(0xD000000000000000); + kernel_vsid_e = get_kernel_vsid(0xE000000000000000); + kernel_vsid_f = get_kernel_vsid(0xF000000000000000); + + printf(" kernel vsid - seg c0 = %lx\n", kernel_vsid_c0 ); + printf(" kernel vsid - seg c1 = %lx\n", kernel_vsid_c1 ); + printf(" kernel vsid - seg c2 = %lx\n", kernel_vsid_c2 ); + printf(" kernel vsid - seg c3 = %lx\n", kernel_vsid_c3 ); + printf(" kernel vsid - seg c4 = %lx\n", kernel_vsid_c4 ); + printf(" kernel vsid - seg c5 = %lx\n", kernel_vsid_c5 ); + printf(" kernel vsid - seg d = %lx\n", kernel_vsid_d ); + printf(" kernel vsid - seg e = %lx\n", kernel_vsid_e ); + printf(" kernel vsid - seg f = %lx\n", kernel_vsid_f ); + + + // print a list of valid vsids for the tasks + read_lock(&tasklist_lock); + for_each_task(p) + if(p->mm) { + struct mm_struct *mm = p->mm; + printf(" task = %p mm = %lx pgd %lx\n", + p, mm, mm->pgd); + vsid0 = get_vsid( mm->context, 0 ); + vsid1 = get_vsid( mm->context, 0x10000000 ); + vsid2 = get_vsid( mm->context, 0x20000000 ); + vsidB = get_vsid( mm->context, 0xB0000000 ); + printf(" context = %lx vsid seg 0 = %lx\n", mm->context, vsid0 ); + printf(" vsid seg 1 = %lx\n", vsid1 ); + printf(" vsid seg 2 = %lx\n", vsid2 ); + printf(" vsid seg 2 = %lx\n", vsidB ); + + printf("\n"); + }; + read_unlock(&tasklist_lock); + + + printf("\nDone -------------------\n"); + +} + + + +void mem_check_pagetable_vsids () { + unsigned long htab_size_bytes; + unsigned long htab_end; + unsigned long last_rpn; + struct task_struct *p; + unsigned long valid_table_count,invalid_table_count,bogus_rpn_count; + int found; + unsigned long user_address_table_count,kernel_page_table_count; + unsigned long pt_vsid; + HPTE *hpte1; + + + htab_size_bytes = htab_data.htab_num_ptegs * 128; // 128B / PTEG + htab_end = (unsigned long)htab_data.htab + htab_size_bytes; + // last_rpn = (naca->physicalMemorySize-1) >> PAGE_SHIFT; + last_rpn = 0xfffff; + + printf("\nHardware Page Table Check\n-------------------\n"); + printf("htab base : %.16lx\n", htab_data.htab); + printf("htab size : %.16lx\n", htab_size_bytes); + + valid_table_count = 0; + invalid_table_count = 0; + bogus_rpn_count = 0; + user_address_table_count = 0; + kernel_page_table_count = 0; + for(hpte1 = htab_data.htab; hpte1 < (HPTE *)htab_end; hpte1++) { + if ( hpte1->dw0.dw0.v != 0 ) { + valid_table_count++; + if ( hpte1->dw1.dw1.rpn <= last_rpn ) { + pt_vsid = (hpte1->dw0.dw0.avpn) >> 5; + if ((pt_vsid == get_kernel_vsid(0xC000000000000000)) | + (pt_vsid == get_kernel_vsid(0xC000000010000000)) | + (pt_vsid == get_kernel_vsid(0xC000000020000000)) | + (pt_vsid == get_kernel_vsid(0xC000000030000000)) | + (pt_vsid == get_kernel_vsid(0xC000000040000000)) | + (pt_vsid == get_kernel_vsid(0xC000000050000000)) | + (pt_vsid == get_kernel_vsid(0xD000000000000000)) | + (pt_vsid == get_kernel_vsid(0xE000000000000000)) | + (pt_vsid == get_kernel_vsid(0xF000000000000000)) ) + + { + kernel_page_table_count ++; + } + else + { + read_lock(&tasklist_lock); + found = 0; + for_each_task(p) { + + if(p->mm && (found == 0)) { + struct mm_struct *mm = p->mm; + + if ((pt_vsid == get_vsid( mm->context, 0 )) | + (pt_vsid == get_vsid( mm->context, 0x10000000 )) | + (pt_vsid == get_vsid( mm->context, 0x20000000 )) | + (pt_vsid == get_vsid( mm->context, 0x30000000 )) | + (pt_vsid == get_vsid( mm->context, 0x40000000 )) | + (pt_vsid == get_vsid( mm->context, 0x50000000 )) | + (pt_vsid == get_vsid( mm->context, 0x60000000 )) | + (pt_vsid == get_vsid( mm->context, 0x70000000 )) | + (pt_vsid == get_vsid( mm->context, 0x80000000 )) | + (pt_vsid == get_vsid( mm->context, 0x90000000 )) | + (pt_vsid == get_vsid( mm->context, 0xA0000000 )) | + (pt_vsid == get_vsid( mm->context, 0xB0000000 ))) + { + user_address_table_count ++; + found = 1; + } + } + } + read_unlock(&tasklist_lock); + if (found == 0) + { + printf(" vsid not found vsid = %lx, hpte = %p \n", + pt_vsid,hpte1); + printf(" rpn in entry = %lx \n", hpte1->dw1.dw1.rpn); + printf(" mem map address = %lx \n", mem_map + (hpte1->dw1.dw1.rpn)); + + } + else // found + { + } + + } // good rpn + + } + else + { + bogus_rpn_count ++; + + } + } + else + { + invalid_table_count++; + } + } + + + printf(" page table valid counts - valid entries = %lx invalid entries = %lx \n", + valid_table_count, invalid_table_count); + + printf(" bogus rpn entries ( probably io) = %lx \n", bogus_rpn_count); + + + + printf(" page table counts - kernel entries = %lx user entries = %lx \n", + kernel_page_table_count, user_address_table_count); + + printf("\nDone -------------------\n"); + +} + + +void mem_check_full_group() { + unsigned long htab_size_bytes; + unsigned count; + unsigned count_array[] = {0,0,0,0,0,0,0,0,0}; + unsigned i; + unsigned long htab_end; + HPTE *hpte1, *hpte2, *hpte3; + u64 rpn = 0; + + htab_size_bytes = htab_data.htab_num_ptegs * 128; // 128B / PTEG + htab_end = (unsigned long)htab_data.htab + htab_size_bytes; + + printf("\nHardware Page Find full groups \n-------------------\n"); + printf("htab base : %.16lx\n", htab_data.htab); + printf("htab size : %.16lx\n", htab_size_bytes); + + for (hpte1 = htab_data.htab; (unsigned long)hpte1 < htab_end; hpte1= hpte1 + 8) + { + count = 0; + hpte2 = hpte1; + for (i=0; i<8; ++i) + { + if ( hpte2->dw0.dw0.v != 0 ) + { + count++; + } + hpte2++; + } + if (count == 8 ) + { + printf(" full group starting with entry %lx \n", hpte1); + hpte3 = hpte1; + for (i=0; i<8; ++i) + { + if ( hpte3->dw0.dw0.v != 0 ) + { + printf(" entry number %d \n",i); + printf(" vsid: %.13lx api: %.2lx hash: %.1lx\n", + (hpte3->dw0.dw0.avpn)>>5, + (hpte3->dw0.dw0.avpn) & 0x1f, + (hpte3->dw0.dw0.h)); + printf(" rpn: %.13lx \n", (hpte3->dw1.dw1.rpn)); + // Dump out the memmap array entry address, corresponding virtual address, and reference count. + rpn = hpte3->dw1.dw1.rpn; + printf(" mem_map+rpn=%p, virtual@=%p, count=%lx \n", mem_map+rpn, (mem_map+rpn)->virtual, (mem_map+rpn)->count); + } + hpte3++; + } + if (xmon_interrupted()) + return; + } + + count_array[count]++; + } + for (i=1; i<9; ++i) + { + printf(" group count for size %i = %lx \n", i, count_array[i]); + } + + printf("\nDone -------------------\n"); +} + + + +static void show_task(struct task_struct * p) +{ + /* unsigned long free = 0; --Unused */ + int state; + static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" }; + + printf("--------------------------------------------------------------------------\n"); + printf("%-11.11s pid: %5.5lx ppid: %5.5lx state: ", + p->comm, p->pid, p->p_pptr->pid); + state = p->state ? ffz(~p->state) + 1 : 0; + if (((unsigned) state) < sizeof(stat_nam)/sizeof(char *)) + printf(stat_nam[state]); + else + printf(" "); + if (p == current) + printf(" pc: current task "); + else + printf(" pc: 0x%16.16lx ", thread_saved_pc(&p->thread)); + + if (p->p_cptr) + printf("%5d ", p->p_cptr->pid); + else + printf(" "); + if (!p->mm) + printf(" (L-TLB) "); + else + printf(" (NOTLB) "); + if (p->p_ysptr) + printf("%7d", p->p_ysptr->pid); + else + printf(" "); + if (p->p_osptr) + printf(" %5d\n", p->p_osptr->pid); + else + printf("\n"); + + { + struct sigqueue *q; + char s[sizeof(sigset_t)*2+1], b[sizeof(sigset_t)*2+1]; + + render_sigset_t(&p->pending.signal, s); + render_sigset_t(&p->blocked, b); + printf(" sig: %d %s %s :", signal_pending(p), s, b); + for (q = p->pending.head; q ; q = q->next) + printf(" %d", q->info.si_signo); + printf(" X\n"); + } + + printf(" pers : %lx current : %lx", + p->personality, p); + printf("\n"); + + printf(" thread : 0x%16.16lx ksp : 0x%16.16lx\n", + &(p->thread), (p->thread.ksp)); + printf(" pgdir : 0x%16.16lx\n", (p->thread.pgdir)); + printf(" regs : 0x%16.16lx sysc : 0x%16.16lx\n", + (p->thread.regs), (p->thread.last_syscall)); + if(p->thread.regs) { + printf(" nip : 0x%16.16lx msr : 0x%16.16lx\n", + ((p->thread.regs)->nip), ((p->thread.regs)->msr)); + printf(" ctr : 0x%16.16lx link : 0x%16.16lx\n", + ((p->thread.regs)->ctr), ((p->thread.regs)->link)); + printf(" xer : 0x%16.16lx ccr : 0x%16.16lx\n", + ((p->thread.regs)->xer), ((p->thread.regs)->ccr)); + printf(" trap : 0x%16.16lx\n", + ((p->thread.regs)->trap)); + printf(" dar : 0x%16.16lx dsis : 0x%16.16lx\n", + ((p->thread.regs)->dar), ((p->thread.regs)->dsisr)); + printf(" rslt : 0x%16.16lx org3 : 0x%16.16lx\n", + ((p->thread.regs)->result), (p->thread.regs->orig_gpr3)); + } + + if(p->mm) { + struct mm_struct *mm = p->mm; + printf(" mm : 0x%16.16lx pgd : 0x%16.16lx\n", + mm, mm->pgd); + printf(" context: 0x%16.16lx mmap : 0x%16.16lx\n", + mm->context, mm->mmap); + + printf("\n"); + } + +} + +static void xmon_show_state(void) +{ + struct task_struct *p; + +#if (BITS_PER_LONG == 32) + printf("\n" + " free sibling\n"); + printf("task name st PC stack pid father child younger older\n"); +#else + printf("\n" + " free sibling\n"); + printf(" task PC stack pid father child younger older\n"); +#endif + read_lock(&tasklist_lock); + for_each_task(p) + show_task(p); + read_unlock(&tasklist_lock); +} + +static void debug_trace(void) { + unsigned long val, cmd, on; + + cmd = skipbl(); + if (cmd == '\n') { + /* show current state */ + unsigned long i; + printf("naca->debug_switch = 0x%lx\n", naca->debug_switch); + for (i = 0; i < PPCDBG_NUM_FLAGS ;i++) { + on = PPCDBG_BITVAL(i) & naca->debug_switch; + printf("%02x %s %12s ", i, on ? "on " : "off", trace_names[i] ? trace_names[i] : ""); + if (((i+1) % 3) == 0) + printf("\n"); + } + printf("\n"); + return; + } + while (cmd != '\n') { + on = 1; /* default if no sign given */ + while (cmd == '+' || cmd == '-') { + on = (cmd == '+'); + cmd = inchar(); + if (cmd == ' ' || cmd == '\n') { /* Turn on or off based on + or - */ + naca->debug_switch = on ? PPCDBG_ALL:PPCDBG_NONE; + printf("Setting all values to %s...\n", on ? "on" : "off"); + if (cmd == '\n') return; + else cmd = skipbl(); + } + else + termch = cmd; + } + termch = cmd; /* not +/- ... let scanhex see it */ + scanhex((void *)&val); + if (val >= 64) { + printf("Value %x out of range:\n", val); + return; + } + if (on) { + naca->debug_switch |= PPCDBG_BITVAL(val); + printf("enable debug %x %s\n", val, trace_names[val] ? trace_names[val] : ""); + } else { + naca->debug_switch &= ~PPCDBG_BITVAL(val); + printf("disable debug %x %s\n", val, trace_names[val] ? trace_names[val] : ""); + } + cmd = skipbl(); + } +} diff -uNr --exclude=CVS ../kernel.org/linux/drivers/Makefile linuxppc64_2_4/drivers/Makefile --- ../kernel.org/linux/drivers/Makefile Sun Oct 21 12:12:41 2001 +++ linuxppc64_2_4/drivers/Makefile Fri Oct 26 02:25:06 2001 @@ -8,7 +8,7 @@ mod-subdirs := dio mtd sbus video macintosh usb input telephony sgi ide \ message/i2o message/fusion scsi md ieee1394 pnp isdn atm \ - fc4 net/hamradio i2c acpi bluetooth + fc4 net/hamradio i2c acpi bluetooth iseries subdir-y := parport char block net sound misc media cdrom subdir-m := $(subdir-y) @@ -25,6 +25,7 @@ subdir-$(CONFIG_VT) += video subdir-$(CONFIG_MAC) += macintosh subdir-$(CONFIG_ALL_PPC) += macintosh +subdir-$(CONFIG_PPC_ISERIES) += iseries subdir-$(CONFIG_USB) += usb subdir-$(CONFIG_INPUT) += input subdir-$(CONFIG_PHONE) += telephony diff -uNr --exclude=CVS ../kernel.org/linux/drivers/block/genhd.c linuxppc64_2_4/drivers/block/genhd.c --- ../kernel.org/linux/drivers/block/genhd.c Wed Oct 17 16:46:29 2001 +++ linuxppc64_2_4/drivers/block/genhd.c Fri Oct 26 02:25:10 2001 @@ -198,6 +198,9 @@ #ifdef CONFIG_VT console_map_init(); #endif +#ifdef CONFIG_VIODASD + viodasd_init(); +#endif return 0; } diff -uNr --exclude=CVS ../kernel.org/linux/drivers/block/ll_rw_blk.c linuxppc64_2_4/drivers/block/ll_rw_blk.c --- ../kernel.org/linux/drivers/block/ll_rw_blk.c Sat Oct 13 12:30:30 2001 +++ linuxppc64_2_4/drivers/block/ll_rw_blk.c Fri Oct 26 02:25:10 2001 @@ -1162,6 +1162,9 @@ #ifdef CONFIG_BLK_DEV_XD xd_init(); #endif +#ifdef CONFIG_VIOCD + viocd_init(); +#endif #ifdef CONFIG_BLK_DEV_MFM mfm_init(); #endif diff -uNr --exclude=CVS ../kernel.org/linux/drivers/cdrom/Makefile linuxppc64_2_4/drivers/cdrom/Makefile --- ../kernel.org/linux/drivers/cdrom/Makefile Fri Dec 29 16:07:21 2000 +++ linuxppc64_2_4/drivers/cdrom/Makefile Thu Oct 11 11:10:49 2001 @@ -27,6 +27,7 @@ obj-$(CONFIG_BLK_DEV_IDECD) += cdrom.o obj-$(CONFIG_BLK_DEV_SR) += cdrom.o obj-$(CONFIG_PARIDE_PCD) += cdrom.o +obj-$(CONFIG_VIOCD) += cdrom.o obj-$(CONFIG_AZTCD) += aztcd.o obj-$(CONFIG_CDU31A) += cdu31a.o cdrom.o diff -uNr --exclude=CVS ../kernel.org/linux/drivers/char/Config.in linuxppc64_2_4/drivers/char/Config.in --- ../kernel.org/linux/drivers/char/Config.in Mon Oct 15 15:31:51 2001 +++ linuxppc64_2_4/drivers/char/Config.in Fri Oct 26 02:25:10 2001 @@ -101,6 +101,7 @@ fi dep_tristate 'Support for user-space parallel port device drivers' CONFIG_PPDEV $CONFIG_PARPORT fi +dep_bool 'pSeries Hypervisor Virtual Console support' CONFIG_HVC_CONSOLE $CONFIG_PPC64 source drivers/i2c/Config.in @@ -181,6 +182,9 @@ dep_tristate 'Intel i8x0 Random Number Generator support' CONFIG_INTEL_RNG $CONFIG_PCI tristate '/dev/nvram support' CONFIG_NVRAM +if [ "$CONFIG_PPC_ISERIES" != "y" ]; then + tristate 'Enhanced Real Time Clock Support' CONFIG_RTC +fi tristate 'Enhanced Real Time Clock Support' CONFIG_RTC if [ "$CONFIG_IA64" = "y" ]; then bool 'EFI Real Time Clock Services' CONFIG_EFI_RTC diff -uNr --exclude=CVS ../kernel.org/linux/drivers/char/Makefile linuxppc64_2_4/drivers/char/Makefile --- ../kernel.org/linux/drivers/char/Makefile Mon Oct 15 15:36:48 2001 +++ linuxppc64_2_4/drivers/char/Makefile Fri Oct 26 02:25:11 2001 @@ -162,6 +162,7 @@ obj-$(CONFIG_MVME147_SCC) += generic_serial.o vme_scc.o obj-$(CONFIG_MVME162_SCC) += generic_serial.o vme_scc.o obj-$(CONFIG_BVME6000_SCC) += generic_serial.o vme_scc.o +obj-$(CONFIG_HVC_CONSOLE) += hvc_console.o obj-$(CONFIG_SERIAL_TX3912) += generic_serial.o serial_tx3912.o subdir-$(CONFIG_RIO) += rio diff -uNr --exclude=CVS ../kernel.org/linux/drivers/char/hvc_console.c linuxppc64_2_4/drivers/char/hvc_console.c --- ../kernel.org/linux/drivers/char/hvc_console.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/drivers/char/hvc_console.c Thu Nov 29 23:33:06 2001 @@ -0,0 +1,336 @@ +/* + * Copyright (C) 2001 Anton Blanchard , IBM Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern int hvc_count(int *); +extern int hvc_get_chars(int index, char *buf, int count); +extern int hvc_put_chars(int index, const char *buf, int count); + +#define HVC_MAJOR 229 +#define HVC_MINOR 0 + +#define MAX_NR_HVC_CONSOLES 4 + +#define TIMEOUT ((HZ + 99) / 100) + +struct tty_driver hvc_driver; +static int hvc_refcount; +static struct tty_struct *hvc_table[MAX_NR_HVC_CONSOLES]; +static struct termios *hvc_termios[MAX_NR_HVC_CONSOLES]; +static struct termios *hvc_termios_locked[MAX_NR_HVC_CONSOLES]; +static int hvc_offset; + +#define N_OUTBUF 16 + +#define __ALIGNED__ __attribute__((__aligned__(8))) + +struct hvc_struct { + spinlock_t lock; + int index; + struct tty_struct *tty; + unsigned int count; + int do_wakeup; + char outbuf[N_OUTBUF] __ALIGNED__; + int n_outbuf; +}; + +struct hvc_struct hvc_struct[MAX_NR_HVC_CONSOLES]; + +static int hvc_open(struct tty_struct *tty, struct file * filp) +{ + int line = MINOR(tty->device) - tty->driver.minor_start; + struct hvc_struct *hp; + + if (line < 0 || line >= MAX_NR_HVC_CONSOLES) + return -ENODEV; + hp = &hvc_struct[line]; + + tty->driver_data = hp; + spin_lock(&hp->lock); + hp->tty = tty; + hp->count++; + spin_unlock(&hp->lock); + + return 0; +} + +static void hvc_close(struct tty_struct *tty, struct file * filp) +{ + struct hvc_struct *hp = tty->driver_data; + + if (tty_hung_up_p(filp)) + return; + spin_lock(&hp->lock); + if (--hp->count == 0) + hp->tty = NULL; + else if (hp->count < 0) + printk(KERN_ERR "hvc_close %lu: oops, count is %d\n", + hp - hvc_struct, hp->count); + spin_unlock(&hp->lock); +} + +/* called with hp->lock held */ +static void hvc_push(struct hvc_struct *hp) +{ + int n; + + n = hvc_put_chars(hp->index + hvc_offset, hp->outbuf, hp->n_outbuf); + if (n <= 0) { + if (n == 0) + return; + /* throw away output on error; this happens when + there is no session connected to the vterm. */ + hp->n_outbuf = 0; + } else + hp->n_outbuf -= n; + if (hp->n_outbuf > 0) + memmove(hp->outbuf, hp->outbuf + n, hp->n_outbuf); + else + hp->do_wakeup = 1; +} + +static int hvc_write(struct tty_struct *tty, int from_user, + const unsigned char *buf, int count) +{ + struct hvc_struct *hp = tty->driver_data; + char *p; + int todo, written = 0; + + spin_lock(&hp->lock); + while (count > 0 && (todo = N_OUTBUF - hp->n_outbuf) > 0) { + if (todo > count) + todo = count; + p = hp->outbuf + hp->n_outbuf; + if (from_user) { + todo -= copy_from_user(p, buf, todo); + if (todo == 0) { + if (written == 0) + written = -EFAULT; + break; + } + } else + memcpy(p, buf, todo); + count -= todo; + buf += todo; + hp->n_outbuf += todo; + written += todo; + hvc_push(hp); + } + spin_unlock(&hp->lock); + + return written; +} + +static int hvc_write_room(struct tty_struct *tty) +{ + struct hvc_struct *hp = tty->driver_data; + + return N_OUTBUF - hp->n_outbuf; +} + +static int hvc_chars_in_buffer(struct tty_struct *tty) +{ + struct hvc_struct *hp = tty->driver_data; + + return hp->n_outbuf; +} + +static void hvc_poll(int index) +{ + struct hvc_struct *hp = &hvc_struct[index]; + struct tty_struct *tty; + int i, n; + char buf[16] __ALIGNED__; + + spin_lock(&hp->lock); + + if (hp->n_outbuf > 0) + hvc_push(hp); + + tty = hp->tty; + if (tty) { + for (;;) { + if (TTY_FLIPBUF_SIZE - tty->flip.count < sizeof(buf)) + break; + n = hvc_get_chars(index + hvc_offset, buf, sizeof(buf)); + if (n <= 0) + break; + for (i = 0; i < n; ++i) + tty_insert_flip_char(tty, buf[i], 0); + } + if (tty->flip.count) + tty_schedule_flip(tty); + + if (hp->do_wakeup) { + hp->do_wakeup = 0; + if ((tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) + && tty->ldisc.write_wakeup) + (tty->ldisc.write_wakeup)(tty); + wake_up_interruptible(&tty->write_wait); + } + } + + spin_unlock(&hp->lock); +} + +int khvcd(void *unused) +{ + int i; + + daemonize(); + reparent_to_init(); + strcpy(current->comm, "khvcd"); + sigfillset(¤t->blocked); + + for (;;) { + for (i = 0; i < MAX_NR_HVC_CONSOLES; ++i) + hvc_poll(i); + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(TIMEOUT); + } +} + +int __init hvc_init(void) +{ + int i; + + memset(&hvc_driver, 0, sizeof(struct tty_driver)); + + hvc_driver.magic = TTY_DRIVER_MAGIC; + hvc_driver.driver_name = "hvc"; + hvc_driver.name = "hvc/%d"; + hvc_driver.major = HVC_MAJOR; + hvc_driver.minor_start = HVC_MINOR; + hvc_driver.num = hvc_count(&hvc_offset); + if (hvc_driver.num > MAX_NR_HVC_CONSOLES) + hvc_driver.num = MAX_NR_HVC_CONSOLES; + hvc_driver.type = TTY_DRIVER_TYPE_SYSTEM; + hvc_driver.init_termios = tty_std_termios; + hvc_driver.flags = TTY_DRIVER_REAL_RAW; + hvc_driver.refcount = &hvc_refcount; + hvc_driver.table = hvc_table; + hvc_driver.termios = hvc_termios; + hvc_driver.termios_locked = hvc_termios_locked; + + hvc_driver.open = hvc_open; + hvc_driver.close = hvc_close; + hvc_driver.write = hvc_write; + hvc_driver.write_room = hvc_write_room; + hvc_driver.chars_in_buffer = hvc_chars_in_buffer; + + for (i = 0; i < hvc_driver.num; i++) { + hvc_struct[i].lock = SPIN_LOCK_UNLOCKED; + hvc_struct[i].index = i; + tty_register_devfs(&hvc_driver, 0, hvc_driver.minor_start + i); + } + + if (tty_register_driver(&hvc_driver)) + panic("Couldn't register hvc console driver\n"); + + if (hvc_driver.num > 0) + kernel_thread(khvcd, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGNAL); + + return 0; +} + +static void __exit hvc_exit(void) +{ +} + +void hvc_console_print(struct console *co, const char *b, unsigned count) +{ + char c[16] __ALIGNED__; + unsigned i, n; + int r, donecr = 0; + + i = n = 0; + while (count > 0 || i > 0) { + if (count > 0 && i < sizeof(c)) { + if (b[n] == '\n' && !donecr) { + c[i++] = '\r'; + donecr = 1; + } else { + c[i++] = b[n++]; + donecr = 0; + --count; + } + } else { + r = hvc_put_chars(co->index + hvc_offset, c, i); + if (r < 0) { + /* throw away chars on error */ + i = 0; + } else if (r > 0) { + i -= r; + if (i > 0) + memmove(c, c+r, i); + } + } + } +} + +static kdev_t hvc_console_device(struct console *c) +{ + return MKDEV(HVC_MAJOR, HVC_MINOR + c->index); +} + +int hvc_wait_for_keypress(struct console *co) +{ + char c[16] __ALIGNED__; + + while (hvc_get_chars(co->index, &c[0], 1) < 1) + ; + return 0; +} + +static int __init hvc_console_setup(struct console *co, char *options) +{ + if (co->index < 0 || co->index >= MAX_NR_HVC_CONSOLES + || co->index >= hvc_count(&hvc_offset)) + return -1; + return 0; +} + +struct console hvc_con_driver = { + name: "hvc", + write: hvc_console_print, + device: hvc_console_device, + wait_key: hvc_wait_for_keypress, + setup: hvc_console_setup, + flags: CON_PRINTBUFFER, + index: -1, +}; + +int __init hvc_console_init(void) +{ + register_console(&hvc_con_driver); + return 0; +} + +module_init(hvc_init); +module_exit(hvc_exit); diff -uNr --exclude=CVS ../kernel.org/linux/drivers/char/tty_io.c linuxppc64_2_4/drivers/char/tty_io.c --- ../kernel.org/linux/drivers/char/tty_io.c Sat Sep 22 13:51:43 2001 +++ linuxppc64_2_4/drivers/char/tty_io.c Fri Oct 26 02:25:11 2001 @@ -2183,6 +2183,11 @@ * set up the console device so that later boot sequences can * inform about problems etc.. */ + +#ifdef CONFIG_VIOCONS + viocons_init(); +#endif + #ifdef CONFIG_VT con_init(); #endif @@ -2239,6 +2244,9 @@ #ifdef CONFIG_SERIAL_TX3912_CONSOLE tx3912_console_init(); #endif +#ifdef CONFIG_HVC_CONSOLE + hvc_console_init(); +#endif } static struct tty_driver dev_tty_driver, dev_syscons_driver; @@ -2289,6 +2297,10 @@ /* console calls tty_register_driver() before kmalloc() works. * Thus, we can't devfs_register() then. Do so now, instead. */ +#ifdef CONFIG_VIOCONS + viocons_init2(); +#endif + #ifdef CONFIG_VT con_init_devfs(); #endif diff -uNr --exclude=CVS ../kernel.org/linux/drivers/iseries/Makefile linuxppc64_2_4/drivers/iseries/Makefile --- ../kernel.org/linux/drivers/iseries/Makefile Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/drivers/iseries/Makefile Thu Oct 11 11:10:49 2001 @@ -0,0 +1,43 @@ +# +# Makefile for the iSeries-specific device drivers. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definitions are now inherited from the +# parent makes.. +# + +# The target object and module list name. + +# O_TARGET := macintosh.o + +O_TARGET := iseries.o + +# Objects that export symbols. + +# export-objs := adb.o rtc.o mac_hid.o via-pmu.o + +export-objs := veth.o viocons.o viotape.o viodasd.o viocd.o viopath.o + +# Object file lists. + +obj-y := +obj-m := +obj-n := +obj- := + +# Each configuration option enables a list of files. + +obj-$(CONFIG_VETH) += veth.o +obj-$(CONFIG_VIOCONS) += viocons.o +obj-$(CONFIG_VIOPATH) += viopath.o +obj-$(CONFIG_VIOTAPE) += viotape.o +obj-$(CONFIG_VIODASD) += viodasd.o +obj-$(CONFIG_VIOCD) += viocd.o + +# The global Rules.make. + +include $(TOPDIR)/Rules.make + diff -uNr --exclude=CVS ../kernel.org/linux/drivers/iseries/veth.c linuxppc64_2_4/drivers/iseries/veth.c --- ../kernel.org/linux/drivers/iseries/veth.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/drivers/iseries/veth.c Wed Nov 14 13:42:56 2001 @@ -0,0 +1,1760 @@ +/* File veth.c created by Kyle A. Lucke on Mon Aug 7 2000. */ + +/**************************************************************************/ +/* */ +/* IBM eServer iSeries Virtual Ethernet Device Driver */ +/* Copyright (C) 2001 Kyle A. Lucke (klucke@us.ibm.com), IBM Corp. */ +/* */ +/* This program is free software; you can redistribute it and/or modify */ +/* it under the terms of the GNU General Public License as published by */ +/* the Free Software Foundation; either version 2 of the License, or */ +/* (at your option) any later version. */ +/* */ +/* This program is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ +/* GNU General Public License for more details. */ +/* */ +/* You should have received a copy of the GNU General Public License */ +/* along with this program; if not, write to the Free Software */ +/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 */ +/* USA */ +/* */ +/* This module contains the implementation of a virtual ethernet device */ +/* for use with iSeries LPAR Linux. It utilizes low-level message passing*/ +/* provided by the hypervisor to enable an ethernet-like network device */ +/* that can be used to enable inter-partition communications on the same */ +/* physical iSeries. */ +/* */ +/* The iSeries LPAR hypervisor has currently defined the ability for a */ +/* partition to communicate on up to 16 different virtual ethernets, all */ +/* dynamically configurable, at least for an OS/400 partition. The */ +/* dynamic nature is not supported for Linux yet. */ +/* */ +/* Each virtual ethernet a given Linux partition participates in will */ +/* cause a network device with the form ethXX to be created, */ +/* */ +/* The virtual ethernet a given ethXX virtual ethernet device talks on */ +/* can be determined either by dumping /proc/iSeries/veth/vethX, where */ +/* X is the virtual ethernet number, and the netdevice name will be */ +/* printed out. The virtual ethernet a given ethX device communicates on */ +/* is also printed to the printk() buffer at module load time. */ +/* */ +/* This driver (and others like it on other partitions) is responsible for*/ +/* routing packets to and from other partitions. The MAC addresses used */ +/* by the virtual ethernets contain meaning, and should not be modified. */ +/* Doing so could disable the ability of your Linux partition to */ +/* communicate with the other OS/400 partitions on your physical iSeries. */ +/* Similarly, setting the MAC address to something other than the */ +/* "virtual burned-in" address is not allowed, for the same reason. */ +/* */ +/* Notes: */ +/* */ +/* 1. Although there is the capability to talk on multiple shared */ +/* ethernets to communicate to the same partition, each shared */ +/* ethernet to a given partition X will use a finite, shared amount */ +/* of hypervisor messages to do the communication. So having 2 shared */ +/* ethernets to the same remote partition DOES NOT double the */ +/* available bandwidth. Each of the 2 shared ethernets will share the */ +/* same bandwidth available to another. */ +/* */ +/* 2. It is allowed to have a virtual ethernet that does not communicate */ +/* with any other partition. It won't do anything, but it's allowed. */ +/* */ +/* 3. There is no "loopback" mode for a virtual ethernet device. If you */ +/* send a packet to your own mac address, it will just be dropped, you */ +/* won't get it on the receive side. Such a thing could be done, */ +/* but my default driver DOES NOT do so. */ +/* */ +/* 4. Multicast addressing is implemented via broadcasting the multicast */ +/* frames to other partitions. It is the responsibility of the */ +/* receiving partition to filter the addresses desired. */ +/* */ +/* 5. This module utilizes several different bottom half handlers for */ +/* non-high-use path function (setup, error handling, etc.). Multiple */ +/* bottom halves were used because only one would not keep up to the */ +/* much faster iSeries device drivers this Linux driver is talking to. */ +/* All hi-priority work (receiving frames, handling frame acks) is done*/ +/* in the interrupt handler for maximum performance. */ +/* */ +/* Tunable parameters: */ +/* */ +/* VethBuffersToAllocate: This compile time option defaults to 120. It can*/ +/* be safely changed to something greater or less than the default. It */ +/* controls how much memory Linux will allocate per remote partition it is*/ +/* communicating with. The user can play with this to see how it affects */ +/* performance, packets dropped, etc. Without trying to understand the */ +/* complete driver, it can be thought of as the maximum number of packets */ +/* outstanding to a remote partition at a time. */ +/* */ +/**************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef _VETH_H +#include "veth.h" +#endif +#ifndef _HVLPCONFIG_H +#include +#endif +#ifndef _VETH_PROC_H +#include +#endif +#ifndef _HVTYPES_H +#include +#endif +#ifndef _ISERIES_PROC_H +#include +#endif +#include +#include + + +#define veth_printk(fmt, args...) \ +printk(KERN_INFO "%s: " fmt, __FILE__, ## args) + +#define veth_error_printk(fmt, args...) \ +printk(KERN_ERR "(%s:%3.3d) ERROR: " fmt, __FILE__, __LINE__ , ## args) + +#ifdef MODULE + #define VIRT_TO_ABSOLUTE(a) virt_to_absolute_outline(a) +#else + #define VIRT_TO_ABSOLUTE(a) virt_to_absolute(a) +#endif + +static const char __initdata *version = +"v0.9 02/15/2001 Kyle Lucke, klucke@us.ibm.com\n"; + +static int probed __initdata = 0; +#define VethBuffersToAllocate 120 + +static struct VethFabricMgr *mFabricMgr = NULL; +static struct proc_dir_entry * veth_proc_root = NULL; + +DECLARE_MUTEX_LOCKED(VethProcSemaphore); + +static int veth_open(struct net_device *dev); +static int veth_close(struct net_device *dev); +static int veth_start_xmit(struct sk_buff *skb, struct net_device *dev); +static int veth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); +static void veth_handleEvent(struct HvLpEvent *, struct pt_regs *); +static void veth_handleAck(struct HvLpEvent *); +static void veth_handleInt(struct HvLpEvent *); +static void veth_openConnections(void); +static void veth_openConnection(u8, int lockMe); +static void veth_closeConnection(u8, int lockMe); +static void veth_intFinishOpeningConnections(void *, int number); +static void veth_finishOpeningConnections(void *); +static void veth_finishOpeningConnectionsLocked(struct VethLpConnection *); +static int veth_multicast_wanted(struct VethPort *port, u64 dest); +static void veth_set_multicast_list(struct net_device *dev); + +static void veth_sendCap(struct VethLpConnection *); +static void veth_sendMonitor(struct VethLpConnection *); +static void veth_takeCap(struct VethLpConnection *, struct VethLpEvent *); +static void veth_takeCapAck(struct VethLpConnection *, struct VethLpEvent *); +static void veth_takeMonitorAck(struct VethLpConnection *, struct VethLpEvent *); +static void veth_msgsInit(struct VethLpConnection *connection); +static void veth_recycleMsg(struct VethLpConnection *, u16); +static void veth_capBh(struct VethLpConnection *); +static void veth_capAckBh(struct VethLpConnection *); +static void veth_monitorAckBh(struct VethLpConnection *); +static void veth_takeFrames(struct VethLpConnection *, struct VethLpEvent *); +static void veth_pTransmit(struct sk_buff *skb, HvLpIndex remoteLp, struct net_device *dev); +static struct net_device_stats *veth_get_stats(struct net_device *dev); +static void veth_intFinishMsgsInit(void *, int); +static void veth_finishMsgsInit(struct VethLpConnection *connection); +static void veth_intFinishCapBh(void *, int); +static void veth_finishCapBh(struct VethLpConnection *connection); +static void veth_finishCapBhLocked(struct VethLpConnection *connection); +static void veth_finishSendCap(struct VethLpConnection *connection); +static void veth_timedAck(unsigned long connectionPtr); +#ifdef MODULE +static void veth_waitForEnd(void); +#endif +static void veth_failMe(struct VethLpConnection *connection); + +extern struct pci_dev * iSeries_veth_dev; + +int __init veth_probe(void) +{ + struct net_device *dev= NULL; + struct VethPort *port = NULL; + int vlansFound = 0; + int displayVersion = 0; + + u16 vlanMap = HvLpConfig_getVirtualLanIndexMap(); + int vlanIndex = 0; + + if (probed) + return -ENODEV; + probed = 1; + + while (vlanMap != 0) + { + int bitOn = vlanMap & 0x8000; + + if (bitOn) + { + vlansFound++; + + dev = init_etherdev(NULL, sizeof(struct VethPort)); + + if (dev == NULL) { + veth_error_printk("Unable to allocate net_device structure!\n"); + break; + } + + if (!dev->priv) + dev->priv = kmalloc(sizeof(struct VethPort), GFP_KERNEL); + if (!dev->priv) { + veth_error_printk("Unable to allocate memory\n"); + return -ENOMEM; + } + + veth_printk("Found an ethernet device %s (veth=%d) (addr=%p)\n", dev->name, vlanIndex, dev); + port = mFabricMgr->mPorts[vlanIndex] = (struct VethPort *)dev->priv; + memset(port, 0, sizeof(struct VethPort)); + rwlock_init(&(port->mMcastGate)); + mFabricMgr->mPorts[vlanIndex]->mDev = dev; + + dev->dev_addr[0] = 0x02; + dev->dev_addr[1] = 0x01; + dev->dev_addr[2] = 0xFF; + dev->dev_addr[3] = vlanIndex; + dev->dev_addr[4] = 0xFF; + dev->dev_addr[5] = HvLpConfig_getLpIndex_outline(); + dev->mtu = 9000; + + memcpy(&(port->mMyAddress), dev->dev_addr, 6); + + dev->open = &veth_open; + dev->hard_start_xmit = &veth_start_xmit; + dev->stop = &veth_close; + dev->get_stats = veth_get_stats; + dev->set_multicast_list = &veth_set_multicast_list; + dev->do_ioctl = &veth_ioctl; + + /* display version info if adapter is found */ + if (!displayVersion) + { + /* set display flag to TRUE so that */ + /* we only display this string ONCE */ + displayVersion = 1; + veth_printk("%s", version); + } + + } + + ++vlanIndex; + vlanMap = vlanMap << 1; + } + + if (vlansFound > 0) + return 0; + else + return -ENODEV; +} + +#ifdef MODULE +MODULE_AUTHOR("Kyle Lucke "); +MODULE_DESCRIPTION("iSeries Virtual ethernet driver"); +MODULE_LICENSE("GPL"); + +DECLARE_MUTEX_LOCKED(VethModuleBhDone); +int VethModuleReopen = 1; + +void veth_proc_delete(struct proc_dir_entry *iSeries_proc) +{ + int i=0; + HvLpIndex thisLp = HvLpConfig_getLpIndex_outline(); + u16 vlanMap = HvLpConfig_getVirtualLanIndexMap(); + int vlanIndex = 0; + + for (i=0; i < HvMaxArchitectedLps; ++i) + { + if (i != thisLp) + { + if (HvLpConfig_doLpsCommunicateOnVirtualLan(thisLp, i)) + { + char name[10] = ""; + sprintf(name, "lpar%d", i); + remove_proc_entry(name, veth_proc_root); + } + } + } + + while (vlanMap != 0) + { + int bitOn = vlanMap & 0x8000; + + if (bitOn) + { + char name[10] = ""; + sprintf(name, "veth%d", vlanIndex); + remove_proc_entry(name, veth_proc_root); + } + + ++vlanIndex; + vlanMap = vlanMap << 1; + } + + remove_proc_entry("veth", iSeries_proc); + + up(&VethProcSemaphore); +} + +void veth_waitForEnd(void) +{ + up(&VethModuleBhDone); +} + +void __exit veth_module_cleanup(void) +{ + int i; + struct VethFabricMgr *myFm = mFabricMgr; + struct tq_struct myBottomHalf; + struct net_device *thisOne = NULL; + + VethModuleReopen = 0; + + for (i = 0; i < HvMaxArchitectedLps; ++i) + { + veth_closeConnection(i, 1); + } + + myBottomHalf.routine = (void *)(void *)veth_waitForEnd; + + queue_task(&myBottomHalf, &tq_immediate); + mark_bh(IMMEDIATE_BH); + + down(&VethModuleBhDone); + + HvLpEvent_unregisterHandler(HvLpEvent_Type_VirtualLan); + + mb(); + mFabricMgr = NULL; + mb(); + + down(&VethProcSemaphore); + + iSeries_proc_callback(&veth_proc_delete); + + down(&VethProcSemaphore); + + for (i = 0; i < HvMaxArchitectedLps; ++i) + { + if (myFm->mConnection[i].mNumberAllocated + myFm->mConnection[i].mNumberRcvMsgs > 0) + { + mf_deallocateLpEvents(myFm->mConnection[i].mRemoteLp, + HvLpEvent_Type_VirtualLan, + myFm->mConnection[i].mNumberAllocated + myFm->mConnection[i].mNumberRcvMsgs, + NULL, + NULL); + } + + if (myFm->mConnection[i].mMsgs != NULL) + { + kfree(myFm->mConnection[i].mMsgs); + } + } + + for (i = 0; i < HvMaxArchitectedVirtualLans; ++i) + { + if (myFm->mPorts[i] != NULL) + { + thisOne = myFm->mPorts[i]->mDev; + myFm->mPorts[i] = NULL; + + mb(); + + if (thisOne != NULL) + { + veth_printk("Unregistering %s (veth=%d)\n", thisOne->name, i); + unregister_netdev(thisOne); + } + } + } + + kfree(myFm); +} + +module_exit(veth_module_cleanup); +#endif + + +void veth_proc_init(struct proc_dir_entry *iSeries_proc) +{ + long i=0; + HvLpIndex thisLp = HvLpConfig_getLpIndex_outline(); + u16 vlanMap = HvLpConfig_getVirtualLanIndexMap(); + long vlanIndex = 0; + + + veth_proc_root = proc_mkdir("veth", iSeries_proc); + if (!veth_proc_root) return; + + for (i=0; i < HvMaxArchitectedLps; ++i) + { + if (i != thisLp) + { + if (HvLpConfig_doLpsCommunicateOnVirtualLan(thisLp, i)) + { + struct proc_dir_entry *ent; + char name[10] = ""; + sprintf(name, "lpar%d", (int)i); + ent = create_proc_entry(name, S_IFREG|S_IRUSR, veth_proc_root); + if (!ent) return; + ent->nlink = 1; + ent->data = (void *)i; + ent->read_proc = proc_veth_dump_connection; + ent->write_proc = NULL; + } + } + } + + while (vlanMap != 0) + { + int bitOn = vlanMap & 0x8000; + + if (bitOn) + { + struct proc_dir_entry *ent; + char name[10] = ""; + sprintf(name, "veth%d", (int)vlanIndex); + ent = create_proc_entry(name, S_IFREG|S_IRUSR, veth_proc_root); + if (!ent) return; + ent->nlink = 1; + ent->data = (void *)vlanIndex; + ent->read_proc = proc_veth_dump_port; + ent->write_proc = NULL; + } + + ++vlanIndex; + vlanMap = vlanMap << 1; + } + + up(&VethProcSemaphore); +} + +int __init veth_module_init(void) +{ + int status; + int i; + + mFabricMgr = kmalloc(sizeof(struct VethFabricMgr), GFP_KERNEL); + memset(mFabricMgr, 0, sizeof(struct VethFabricMgr)); + veth_printk("Initializing veth module, fabric mgr (address=%p)\n", mFabricMgr); + + mFabricMgr->mEyecatcher = 0x56455448464D4752ULL; + mFabricMgr->mThisLp = HvLpConfig_getLpIndex_outline(); + + for (i=0; i < HvMaxArchitectedLps; ++i) + { + mFabricMgr->mConnection[i].mEyecatcher = 0x564554484C50434EULL; + veth_failMe(mFabricMgr->mConnection+i); + spin_lock_init(&mFabricMgr->mConnection[i].mAckGate); + spin_lock_init(&mFabricMgr->mConnection[i].mStatusGate); + } + + status = veth_probe(); + + if (status == 0) + { + veth_openConnections(); + } + + iSeries_proc_callback(&veth_proc_init); + + return status; +} + +module_init(veth_module_init); + +static void veth_failMe(struct VethLpConnection *connection) +{ + connection->mConnectionStatus.mSentCap = 0; + connection->mConnectionStatus.mCapAcked = 0; + connection->mConnectionStatus.mGotCap = 0; + connection->mConnectionStatus.mGotCapAcked = 0; + connection->mConnectionStatus.mSentMonitor = 0; + connection->mConnectionStatus.mFailed = 1; +} + +static int veth_open(struct net_device *dev) +{ + struct VethPort *port = (struct VethPort *)dev->priv; + + memset(&port->mStats, 0, sizeof(port->mStats)); + MOD_INC_USE_COUNT; + + netif_start_queue(dev); + + return 0; +} + +static int veth_close(struct net_device *dev) +{ + netif_stop_queue(dev); + + MOD_DEC_USE_COUNT; + + return 0; +} + +static struct net_device_stats *veth_get_stats(struct net_device *dev) +{ + struct VethPort *port = (struct VethPort *)dev->priv; + + return(&port->mStats); +} + + +static int veth_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + unsigned char *frame = skb->data; + HvLpIndex remoteLp = frame[5]; + int i = 0; + int clone = 0; + + if (mFabricMgr == NULL) + { + veth_error_printk("NULL fabric manager with active ports!\n"); + netif_stop_queue(dev); + return 1; + } + + mb(); + + if ((*frame & 0x01) != 0x01) /* broadcast or multicast */ + { + if ((remoteLp != mFabricMgr->mThisLp) && + (HvLpConfig_doLpsCommunicateOnVirtualLan(mFabricMgr->mThisLp, remoteLp))) + veth_pTransmit(skb, remoteLp, dev); + } + else + { + for (i=0; i < HvMaxArchitectedLps; ++i) + { + if (i != mFabricMgr->mThisLp) + { + if (clone) + skb = skb_clone(skb, GFP_ATOMIC); + else + clone = 1; + + if (HvLpConfig_doLpsCommunicateOnVirtualLan(mFabricMgr->mThisLp, i)) + { + /* the ack handles deleting the skb */ + veth_pTransmit(skb, i, dev); + } + } + } + } + + return 0; +} + +static void veth_pTransmit(struct sk_buff *skb, HvLpIndex remoteLp, struct net_device *dev) +{ + struct VethLpConnection *connection = mFabricMgr->mConnection + remoteLp; + HvLpEvent_Rc returnCode; + + if (connection->mConnectionStatus.mFailed != 1) + { + struct VethMsg *msg = NULL; + VETHSTACKPOP(&(connection->mMsgStack), msg); + + if (msg != NULL) + { + if ((skb->len > 14) && + (skb->len <= 9018)) + { + dma_addr_t dma_addr = pci_map_single(iSeries_veth_dev, + skb->data, + skb->len, + PCI_DMA_TODEVICE); + + + + if (dma_addr != -1) + { + msg->mSkb = skb; + msg->mEvent.mSendData.mAddress[0] = dma_addr; + msg->mEvent.mSendData.mLength[0] = skb->len; + msg->mEvent.mSendData.mEofMask = 0xFFFFFFFFUL; + + test_and_set_bit(0, &(msg->mInUse)); + + returnCode = HvCallEvent_signalLpEventFast(remoteLp, + HvLpEvent_Type_VirtualLan, + VethEventTypeFrames, + HvLpEvent_AckInd_NoAck, + HvLpEvent_AckType_ImmediateAck, + connection->mSourceInst, + connection->mTargetInst, + msg->mIndex, + msg->mEvent.mFpData.mData1, + msg->mEvent.mFpData.mData2, + msg->mEvent.mFpData.mData3, + msg->mEvent.mFpData.mData4, + msg->mEvent.mFpData.mData5); + } + else + { + returnCode = -1; /* Bad return code */ + } + + if (returnCode != HvLpEvent_Rc_Good) + { + struct VethPort *port = (struct VethPort *)dev->priv; + + if (msg->mEvent.mSendData.mAddress[0]) + { + pci_unmap_single(iSeries_veth_dev, dma_addr, skb->len, PCI_DMA_TODEVICE); + } + + dev_kfree_skb_irq(skb); + + msg->mSkb = NULL; + memset(&(msg->mEvent.mSendData), 0, sizeof(struct VethFramesData)); + VETHSTACKPUSH(&(connection->mMsgStack), msg); + port->mStats.tx_dropped++; + } + else + { + struct VethPort *port = (struct VethPort *)dev->priv; + port->mStats.tx_packets++; + port->mStats.tx_bytes += skb->len; + } + } + } + else + { + struct VethPort *port = (struct VethPort *)dev->priv; + port->mStats.tx_dropped++; + } + } + else + { + struct VethPort *port = (struct VethPort *)dev->priv; + port->mStats.tx_dropped++; + } +} + +static int veth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + + return -EOPNOTSUPP; +} + +static void veth_set_multicast_list(struct net_device *dev) +{ + char *addrs; + struct VethPort *port = (struct VethPort *)dev->priv; + u64 newAddress = 0; + unsigned long flags; + + write_lock_irqsave(&port->mMcastGate, flags); + + if (dev->flags & IFF_PROMISC) { /* set promiscuous mode */ + port->mPromiscuous = 1; + } else { + struct dev_mc_list *dmi = dev->mc_list; + + if (dev->flags & IFF_ALLMULTI) { + port->mAllMcast = 1; + } else { + int i; + /* Update table */ + port->mNumAddrs = 0; + + for (i = 0; ((i < dev->mc_count) && (i < 12)); i++) { /* for each address in the list */ + addrs = dmi->dmi_addr; + dmi = dmi->next; + if ((*addrs & 0x01) == 1) { /* multicast address? */ + memcpy(&newAddress, addrs, 6); + newAddress &= 0xFFFFFFFFFFFF0000; + + port->mMcasts[port->mNumAddrs] = newAddress; + mb(); + port->mNumAddrs = port->mNumAddrs + 1; + } + } + } + } + + write_unlock_irqrestore(&port->mMcastGate, flags); +} + + +static void veth_handleEvent(struct HvLpEvent *event, struct pt_regs *regs) +{ + if (event->xFlags.xFunction == HvLpEvent_Function_Ack) + { + veth_handleAck(event); + } + else if (event->xFlags.xFunction == HvLpEvent_Function_Int) + { + veth_handleInt(event); + } +} + +static void veth_handleAck(struct HvLpEvent *event) +{ + struct VethLpConnection *connection = &(mFabricMgr->mConnection[event->xTargetLp]); + struct VethLpEvent *vethEvent = (struct VethLpEvent *)event; + + switch(event->xSubtype) + { + case VethEventTypeCap: + { + veth_takeCapAck(connection, vethEvent); + break; + } + case VethEventTypeMonitor: + { + veth_takeMonitorAck(connection, vethEvent); + break; + } + default: + { + veth_error_printk("Unknown ack type %d from lpar %d\n", event->xSubtype, connection->mRemoteLp); + } + }; +} + +static void veth_handleInt(struct HvLpEvent *event) +{ + int i=0; + struct VethLpConnection *connection = &(mFabricMgr->mConnection[event->xSourceLp]); + struct VethLpEvent *vethEvent = (struct VethLpEvent *)event; + + switch(event->xSubtype) + { + case VethEventTypeCap: + { + veth_takeCap(connection, vethEvent); + break; + } + case VethEventTypeMonitor: + { + /* do nothing... this'll hang out here til we're dead, and the hypervisor will return it for us. */ + break; + } + case VethEventTypeFramesAck: + { + for (i=0; i < VethMaxFramesMsgsAcked; ++i) + { + u16 msg = vethEvent->mDerivedData.mFramesAckData.mToken[i]; + veth_recycleMsg(connection, msg); + } + break; + } + case VethEventTypeFrames: + { + veth_takeFrames(connection, vethEvent); + break; + } + default: + { + veth_error_printk("Unknown interrupt type %d from lpar %d\n", event->xSubtype, connection->mRemoteLp); + } + }; +} + +static void veth_openConnections() +{ + int i=0; + + HvLpEvent_registerHandler(HvLpEvent_Type_VirtualLan, &veth_handleEvent); + + /* Now I need to run through the active lps and open connections to the ones I'm supposed to + open to. */ + + for (i=HvMaxArchitectedLps-1; i >=0; --i) + { + if (i != mFabricMgr->mThisLp) + { + if (HvLpConfig_doLpsCommunicateOnVirtualLan(mFabricMgr->mThisLp, i)) + { + veth_openConnection(i, 1); + } + else + { + veth_closeConnection(i, 1); + } + } + } +} + +static void veth_intFinishOpeningConnections(void *parm, int number) +{ + struct VethLpConnection *connection = (struct VethLpConnection *)parm; + connection->mAllocBhTq.data = parm; + connection->mNumberAllocated = number; + queue_task(&connection->mAllocBhTq, &tq_immediate); + mark_bh(IMMEDIATE_BH); +} + +static void veth_finishOpeningConnections(void *parm) +{ + unsigned long flags; + struct VethLpConnection *connection = (struct VethLpConnection *)parm; + spin_lock_irqsave(&connection->mStatusGate, flags); + veth_finishOpeningConnectionsLocked(connection); + spin_unlock_irqrestore(&connection->mStatusGate, flags); +} + +static void veth_finishOpeningConnectionsLocked(struct VethLpConnection *connection) +{ + if (connection->mNumberAllocated >= 2) + { + connection->mConnectionStatus.mCapMonAlloced = 1; + veth_sendCap(connection); + } + else + { + veth_error_printk("Couldn't allocate base msgs for lpar %d, only got %d\n", connection->mRemoteLp, connection->mNumberAllocated); + veth_failMe(connection); + } +} + +static void veth_openConnection(u8 remoteLp, int lockMe) +{ + unsigned long flags; + unsigned long flags2; + HvLpInstanceId source; + HvLpInstanceId target; + u64 i = 0; + struct VethLpConnection *connection = &(mFabricMgr->mConnection[remoteLp]); + + memset(&connection->mCapBhTq, 0, sizeof(connection->mCapBhTq)); + connection->mCapBhTq.routine = (void *)(void *)veth_capBh; + + memset(&connection->mCapAckBhTq, 0, sizeof(connection->mCapAckBhTq)); + connection->mCapAckBhTq.routine = (void *)(void *)veth_capAckBh; + + memset(&connection->mMonitorAckBhTq, 0, sizeof(connection->mMonitorAckBhTq)); + connection->mMonitorAckBhTq.routine = (void *)(void *)veth_monitorAckBh; + + memset(&connection->mAllocBhTq, 0, sizeof(connection->mAllocBhTq)); + connection->mAllocBhTq.routine = (void *)(void *)veth_finishOpeningConnections; + + if (lockMe) + spin_lock_irqsave(&connection->mStatusGate, flags); + + connection->mRemoteLp = remoteLp; + + spin_lock_irqsave(&connection->mAckGate, flags2); + + memset(&connection->mEventData, 0xFF, sizeof(connection->mEventData)); + connection->mNumAcks = 0; + + HvCallEvent_openLpEventPath(remoteLp, HvLpEvent_Type_VirtualLan); + + /* clean up non-acked msgs */ + for (i=0; i < connection->mNumMsgs; ++i) + { + veth_recycleMsg(connection, i); + } + + connection->mConnectionStatus.mOpen = 1; + + source = connection->mSourceInst = HvCallEvent_getSourceLpInstanceId(remoteLp, HvLpEvent_Type_VirtualLan); + target = connection->mTargetInst = HvCallEvent_getTargetLpInstanceId(remoteLp, HvLpEvent_Type_VirtualLan); + + if (connection->mConnectionStatus.mCapMonAlloced != 1) + { + connection->mAllocBhTq.routine = (void *)(void *)veth_finishOpeningConnections; + mf_allocateLpEvents(remoteLp, + HvLpEvent_Type_VirtualLan, + sizeof(struct VethLpEvent), + 2, + &veth_intFinishOpeningConnections, + connection); + } + else + { + veth_finishOpeningConnectionsLocked(connection); + } + + spin_unlock_irqrestore(&connection->mAckGate, flags2); + + if (lockMe) + spin_unlock_irqrestore(&connection->mStatusGate, flags); +} + +static void veth_closeConnection(u8 remoteLp, int lockMe) +{ + struct VethLpConnection *connection = &(mFabricMgr->mConnection[remoteLp]); + unsigned long flags; + unsigned long flags2; + if (lockMe) + spin_lock_irqsave(&connection->mStatusGate, flags); + + del_timer(&connection->mAckTimer); + + if (connection->mConnectionStatus.mOpen == 1) + { + HvCallEvent_closeLpEventPath(remoteLp, HvLpEvent_Type_VirtualLan); + connection->mConnectionStatus.mOpen = 0; + veth_failMe(connection); + + /* reset ack data */ + spin_lock_irqsave(&connection->mAckGate, flags2); + + memset(&connection->mEventData, 0xFF, sizeof(connection->mEventData)); + connection->mNumAcks = 0; + + spin_unlock_irqrestore(&connection->mAckGate, flags2); + } + + if (lockMe) + spin_unlock_irqrestore(&connection->mStatusGate, flags); +} + +static void veth_msgsInit(struct VethLpConnection *connection) +{ + connection->mAllocBhTq.routine = (void *)(void *)veth_finishMsgsInit; + mf_allocateLpEvents(connection->mRemoteLp, + HvLpEvent_Type_VirtualLan, + sizeof(struct VethLpEvent), + connection->mMyCap.mUnionData.mFields.mNumberBuffers, + &veth_intFinishMsgsInit, + connection); +} + +static void veth_intFinishMsgsInit(void *parm, int number) +{ + struct VethLpConnection *connection = (struct VethLpConnection *)parm; + connection->mAllocBhTq.data = parm; + connection->mNumberRcvMsgs = number; + queue_task(&connection->mAllocBhTq, &tq_immediate); + mark_bh(IMMEDIATE_BH); +} + +static void veth_intFinishCapBh(void *parm, int number) +{ + struct VethLpConnection *connection = (struct VethLpConnection *)parm; + connection->mAllocBhTq.data = parm; + if (number > 0) + connection->mNumberLpAcksAlloced += number; + + queue_task(&connection->mAllocBhTq, &tq_immediate); + mark_bh(IMMEDIATE_BH); +} + +static void veth_finishMsgsInit(struct VethLpConnection *connection) +{ + int i=0; + unsigned int numberGotten = 0; + u64 amountOfHeapToGet = connection->mMyCap.mUnionData.mFields.mNumberBuffers * sizeof(struct VethMsg); + char *msgs = NULL; + unsigned long flags; + spin_lock_irqsave(&connection->mStatusGate, flags); + + if (connection->mNumberRcvMsgs >= connection->mMyCap.mUnionData.mFields.mNumberBuffers) + { + msgs = kmalloc(amountOfHeapToGet, GFP_ATOMIC); + + connection->mMsgs = (struct VethMsg *)msgs; + + if (msgs != NULL) + { + memset(msgs, 0, amountOfHeapToGet); + + for (i=0; i < connection->mMyCap.mUnionData.mFields.mNumberBuffers; ++i) + { + connection->mMsgs[i].mIndex = i; + ++numberGotten; + VETHSTACKPUSH(&(connection->mMsgStack), (connection->mMsgs+i)); + } + if (numberGotten > 0) + { + connection->mNumMsgs = numberGotten; + } + } + else + { + kfree(msgs); + connection->mMsgs = NULL; + } + } + + connection->mMyCap.mUnionData.mFields.mNumberBuffers = connection->mNumMsgs; + + if (connection->mNumMsgs < 10) + connection->mMyCap.mUnionData.mFields.mThreshold = 1; + else if (connection->mNumMsgs < 20) + connection->mMyCap.mUnionData.mFields.mThreshold = 4; + else if (connection->mNumMsgs < 40) + connection->mMyCap.mUnionData.mFields.mThreshold = 10; + else + connection->mMyCap.mUnionData.mFields.mThreshold = 20; + + connection->mMyCap.mUnionData.mFields.mTimer = VethAckTimeoutUsec; + + veth_finishSendCap(connection); + + spin_unlock_irqrestore(&connection->mStatusGate, flags); +} + +static void veth_sendCap(struct VethLpConnection *connection) +{ + if (connection->mMsgs == NULL) + { + connection->mMyCap.mUnionData.mFields.mNumberBuffers = VethBuffersToAllocate; + veth_msgsInit(connection); + } + else + { + veth_finishSendCap(connection); + } +} + +static void veth_finishSendCap(struct VethLpConnection *connection) +{ + HvLpEvent_Rc returnCode = HvCallEvent_signalLpEventFast(connection->mRemoteLp, + HvLpEvent_Type_VirtualLan, + VethEventTypeCap, + HvLpEvent_AckInd_DoAck, + HvLpEvent_AckType_ImmediateAck, + connection->mSourceInst, + connection->mTargetInst, + 0, + connection->mMyCap.mUnionData.mNoFields.mReserved1, + connection->mMyCap.mUnionData.mNoFields.mReserved2, + connection->mMyCap.mUnionData.mNoFields.mReserved3, + connection->mMyCap.mUnionData.mNoFields.mReserved4, + connection->mMyCap.mUnionData.mNoFields.mReserved5); + + if ((returnCode == HvLpEvent_Rc_PartitionDead) || + (returnCode == HvLpEvent_Rc_PathClosed)) + { + connection->mConnectionStatus.mSentCap = 0; + } + else if (returnCode != HvLpEvent_Rc_Good) + { + veth_error_printk("Couldn't send cap to lpar %d, rc %x\n", connection->mRemoteLp, (int)returnCode); + veth_failMe(connection); + } + else + { + connection->mConnectionStatus.mSentCap = 1; + } +} + +static void veth_takeCap(struct VethLpConnection *connection, struct VethLpEvent *event) +{ + if (!test_and_set_bit(0,&(connection->mCapBhPending))) + { + connection->mCapBhTq.data = connection; + memcpy(&connection->mCapEvent, event, sizeof(connection->mCapEvent)); + queue_task(&connection->mCapBhTq, &tq_immediate); + mark_bh(IMMEDIATE_BH); + } + else + { + veth_error_printk("Received a capabilities from lpar %d while already processing one\n", connection->mRemoteLp); + event->mBaseEvent.xRc = HvLpEvent_Rc_BufferNotAvailable; + HvCallEvent_ackLpEvent((struct HvLpEvent *)event); + } +} + +static void veth_takeCapAck(struct VethLpConnection *connection, struct VethLpEvent *event) +{ + if (!test_and_set_bit(0,&(connection->mCapAckBhPending))) + { + connection->mCapAckBhTq.data = connection; + memcpy(&connection->mCapAckEvent, event, sizeof(connection->mCapAckEvent)); + queue_task(&connection->mCapAckBhTq, &tq_immediate); + mark_bh(IMMEDIATE_BH); + } + else + { + veth_error_printk("Received a capabilities ack from lpar %d while already processing one\n", connection->mRemoteLp); + } +} + +static void veth_takeMonitorAck(struct VethLpConnection *connection, struct VethLpEvent *event) +{ + if (!test_and_set_bit(0,&(connection->mMonitorAckBhPending))) + { + connection->mMonitorAckBhTq.data = connection; + memcpy(&connection->mMonitorAckEvent, event, sizeof(connection->mMonitorAckEvent)); + queue_task(&connection->mMonitorAckBhTq, &tq_immediate); + mark_bh(IMMEDIATE_BH); + } + else + { + veth_error_printk("Received a monitor ack from lpar %d while already processing one\n", connection->mRemoteLp); + } +} + +static void veth_recycleMsg(struct VethLpConnection *connection, u16 msg) +{ + if (msg < connection->mNumMsgs) + { + struct VethMsg *myMsg = connection->mMsgs + msg; + if (test_and_clear_bit(0, &(myMsg->mInUse))) + { + pci_unmap_single(iSeries_veth_dev, + myMsg->mEvent.mSendData.mAddress[0], + myMsg->mEvent.mSendData.mLength[0], + PCI_DMA_TODEVICE); + dev_kfree_skb_irq(myMsg->mSkb); + + myMsg->mSkb = NULL; + memset(&(myMsg->mEvent.mSendData), 0, sizeof(struct VethFramesData)); + VETHSTACKPUSH(&connection->mMsgStack, myMsg); + } + else + { + if (connection->mConnectionStatus.mOpen) + { + veth_error_printk("Received a frames ack for msg %d from lpar %d while not outstanding\n", msg, connection->mRemoteLp); + } + } + } +} + +static void veth_capBh(struct VethLpConnection *connection) +{ + struct VethLpEvent *event = &connection->mCapEvent; + unsigned long flags; + struct VethCapData *remoteCap = &(connection->mRemoteCap); + u64 numAcks = 0; + spin_lock_irqsave(&connection->mStatusGate, flags); + connection->mConnectionStatus.mGotCap = 1; + + memcpy(remoteCap, &(event->mDerivedData.mCapabilitiesData), sizeof(connection->mRemoteCap)); + + if ((remoteCap->mUnionData.mFields.mNumberBuffers <= VethMaxFramesMsgs) && + (remoteCap->mUnionData.mFields.mNumberBuffers != 0) && + (remoteCap->mUnionData.mFields.mThreshold <= VethMaxFramesMsgsAcked) && + (remoteCap->mUnionData.mFields.mThreshold != 0)) + { + numAcks = (remoteCap->mUnionData.mFields.mNumberBuffers / remoteCap->mUnionData.mFields.mThreshold) + 1; + + if (connection->mNumberLpAcksAlloced < numAcks) + { + numAcks = numAcks - connection->mNumberLpAcksAlloced; + connection->mAllocBhTq.routine = (void *)(void *)veth_finishCapBh; + mf_allocateLpEvents(connection->mRemoteLp, + HvLpEvent_Type_VirtualLan, + sizeof(struct VethLpEvent), + numAcks, + &veth_intFinishCapBh, + connection); + } + else + veth_finishCapBhLocked(connection); + } + else + { + veth_error_printk("Received incompatible capabilities from lpar %d\n", connection->mRemoteLp); + event->mBaseEvent.xRc = HvLpEvent_Rc_InvalidSubtypeData; + HvCallEvent_ackLpEvent((struct HvLpEvent *)event); + } + + clear_bit(0,&(connection->mCapBhPending)); + spin_unlock_irqrestore(&connection->mStatusGate, flags); +} + +static void veth_capAckBh(struct VethLpConnection *connection) +{ + struct VethLpEvent *event = &connection->mCapAckEvent; + unsigned long flags; + + spin_lock_irqsave(&connection->mStatusGate, flags); + + if (event->mBaseEvent.xRc == HvLpEvent_Rc_Good) + { + connection->mConnectionStatus.mCapAcked = 1; + + if ((connection->mConnectionStatus.mGotCap == 1) && + (connection->mConnectionStatus.mGotCapAcked == 1)) + { + if (connection->mConnectionStatus.mSentMonitor != 1) + veth_sendMonitor(connection); + } + } + else + { + veth_error_printk("Bad rc(%d) from lpar %d on capabilities\n", event->mBaseEvent.xRc, connection->mRemoteLp); + veth_failMe(connection); + } + + clear_bit(0,&(connection->mCapAckBhPending)); + spin_unlock_irqrestore(&connection->mStatusGate, flags); +} + +static void veth_monitorAckBh(struct VethLpConnection *connection) +{ + unsigned long flags; + + spin_lock_irqsave(&connection->mStatusGate, flags); + + veth_failMe(connection); + + veth_printk("Monitor ack returned for lpar %d\n", connection->mRemoteLp); + + if (connection->mConnectionStatus.mOpen) + { + veth_closeConnection(connection->mRemoteLp, 0); + + udelay(100); + + queue_task(&connection->mMonitorAckBhTq, &tq_immediate); + mark_bh(IMMEDIATE_BH); + } + else + { +#ifdef MODULE + if (VethModuleReopen) +#endif + veth_openConnection(connection->mRemoteLp, 0); +#ifdef MODULE + else + { + int i=0; + + for (i=0; i < connection->mNumMsgs; ++i) + { + veth_recycleMsg(connection, i); + } + } +#endif + clear_bit(0,&(connection->mMonitorAckBhPending)); + } + + spin_unlock_irqrestore(&connection->mStatusGate, flags); +} + +#define number_of_pages(v, l) ((((unsigned long)(v) & ((1 << 12) - 1)) + (l) + 4096 - 1) / 4096) +#define page_offset(v) ((unsigned long)(v) & ((1 << 12) - 1)) + +static void veth_takeFrames(struct VethLpConnection *connection, struct VethLpEvent *event) +{ + int i; + struct VethPort *port = NULL; + struct BufList + { + union + { + struct + { + u32 token2; + u32 garbage; + } token1; + u64 address; + } addr; + u64 size; + }; + + struct BufList myBufList[4]; + struct BufList remoteList; + + for (i=0; i < VethMaxFramesPerMsg; ++i) + { + u16 length = event->mDerivedData.mSendData.mLength[i]; + u32 address = event->mDerivedData.mSendData.mAddress[i]; + if ((address != 0) && + (length <= 9018) && + (length > 14)) + { + struct sk_buff *skb = alloc_skb(event->mDerivedData.mSendData.mLength[i], GFP_ATOMIC); + remoteList.addr.token1.token2 = address; + remoteList.size = length; + if (skb != NULL) + { + HvLpDma_Rc returnCode = HvLpDma_Rc_Good; + int numPages = number_of_pages((skb->data), length); + + + myBufList[0].addr.address = (0x8000000000000000LL | (VIRT_TO_ABSOLUTE((unsigned long)skb->data))); + myBufList[0].size = (numPages > 1) ? (4096 - page_offset(skb->data)) : length; + + if (numPages > 1) + { + myBufList[1].addr.address = (0x8000000000000000LL | (VIRT_TO_ABSOLUTE((unsigned long) skb->data + myBufList[0].size))); + myBufList[1].size = (numPages > 2) ? (4096 - page_offset(skb->data)) : length - myBufList[0].size; + + if (numPages > 2) + { + myBufList[2].addr.address = (0x8000000000000000LL | (VIRT_TO_ABSOLUTE((unsigned long) skb->data + myBufList[0].size + myBufList[1].size))); + myBufList[2].size = (numPages > 3) ? (4096 - page_offset(skb->data)) : length - myBufList[1].size - myBufList[0].size; + + if (numPages > 3) + { + myBufList[3].addr.address = 0x8000000000000000LL | (VIRT_TO_ABSOLUTE((unsigned long) skb->data + myBufList[0].size + myBufList[1].size + myBufList[2].size)); + myBufList[3].size = (numPages > 4) ? (4096 - page_offset(skb->data)) : length - myBufList[2].size - myBufList[1].size - myBufList[0].size; + } + } + } + + returnCode = HvCallEvent_dmaBufList(HvLpEvent_Type_VirtualLan, + event->mBaseEvent.xSourceLp, + HvLpDma_Direction_RemoteToLocal, + connection->mSourceInst, + connection->mTargetInst, + HvLpDma_AddressType_RealAddress, + HvLpDma_AddressType_TceIndex, + 0x8000000000000000LL | (VIRT_TO_ABSOLUTE((unsigned long)&myBufList)), + 0x8000000000000000LL | (VIRT_TO_ABSOLUTE((unsigned long)&remoteList)), + length); + + if (returnCode == HvLpDma_Rc_Good) + { + HvLpVirtualLanIndex vlan = skb->data[9]; + u64 dest = *((u64 *)skb->data) & 0xFFFFFFFFFFFF0000; + + if (((vlan < HvMaxArchitectedVirtualLans) && + ((port = mFabricMgr->mPorts[vlan]) != NULL)) && + ((dest == port->mMyAddress) || /* it's for me */ + (dest == 0xFFFFFFFFFFFF0000) || /* it's a broadcast */ + (veth_multicast_wanted(port, dest)) || /* it's one of my multicasts */ + (port->mPromiscuous == 1))) /* I'm promiscuous */ + { + skb_put(skb, length); + skb->dev = port->mDev; + skb->protocol = eth_type_trans(skb, port->mDev); + skb->ip_summed = CHECKSUM_NONE; + netif_rx(skb); /* send it up */ + port->mStats.rx_packets++; + port->mStats.rx_bytes += length; + + } + else + { + dev_kfree_skb_irq(skb); + } + } + else + { + printk("bad lp event rc %x length %d remote address %x raw address %x\n", (int)returnCode, length, remoteList.addr.token1.token2, address); + dev_kfree_skb_irq(skb); + } + } + } + else + break; + } + /* Ack it */ + + { + unsigned long flags; + spin_lock_irqsave(&connection->mAckGate, flags); + + if (connection->mNumAcks < VethMaxFramesMsgsAcked) + { + connection->mEventData.mAckData.mToken[connection->mNumAcks] = event->mBaseEvent.xCorrelationToken; + ++connection->mNumAcks; + + if (connection->mNumAcks == connection->mRemoteCap.mUnionData.mFields.mThreshold) + { + HvLpEvent_Rc rc = HvCallEvent_signalLpEventFast(connection->mRemoteLp, + HvLpEvent_Type_VirtualLan, + VethEventTypeFramesAck, + HvLpEvent_AckInd_NoAck, + HvLpEvent_AckType_ImmediateAck, + connection->mSourceInst, + connection->mTargetInst, + 0, + connection->mEventData.mFpData.mData1, + connection->mEventData.mFpData.mData2, + connection->mEventData.mFpData.mData3, + connection->mEventData.mFpData.mData4, + connection->mEventData.mFpData.mData5); + + if (rc != HvLpEvent_Rc_Good) + { + veth_error_printk("Bad lp event return code(%x) acking frames from lpar %d\n", (int)rc, connection->mRemoteLp); + } + + connection->mNumAcks = 0; + + memset(&connection->mEventData, 0xFF, sizeof(connection->mEventData)); + } + + } + + spin_unlock_irqrestore(&connection->mAckGate, flags); + } +} +#undef number_of_pages +#undef page_offset + +static void veth_timedAck(unsigned long connectionPtr) +{ + unsigned long flags; + HvLpEvent_Rc rc; + struct VethLpConnection *connection = (struct VethLpConnection *) connectionPtr; + /* Ack all the events */ + spin_lock_irqsave(&connection->mAckGate, flags); + + if (connection->mNumAcks > 0) + { + rc = HvCallEvent_signalLpEventFast(connection->mRemoteLp, + HvLpEvent_Type_VirtualLan, + VethEventTypeFramesAck, + HvLpEvent_AckInd_NoAck, + HvLpEvent_AckType_ImmediateAck, + connection->mSourceInst, + connection->mTargetInst, + 0, + connection->mEventData.mFpData.mData1, + connection->mEventData.mFpData.mData2, + connection->mEventData.mFpData.mData3, + connection->mEventData.mFpData.mData4, + connection->mEventData.mFpData.mData5); + + if (rc != HvLpEvent_Rc_Good) + { + veth_error_printk("Bad lp event return code(%x) acking frames from lpar %d!\n", (int)rc, connection->mRemoteLp); + } + + connection->mNumAcks = 0; + + memset(&connection->mEventData, 0xFF, sizeof(connection->mEventData)); + } + + spin_unlock_irqrestore(&connection->mAckGate, flags); + + /* Reschedule the timer */ + connection->mAckTimer.expires = jiffies + connection->mTimeout; + add_timer(&connection->mAckTimer); +} + +static int veth_multicast_wanted(struct VethPort *port, u64 thatAddr) +{ + int returnParm = 0; + int i; + unsigned long flags; + + if ((*((char *)&thatAddr) & 0x01) != 1) + return 0; + + read_lock_irqsave(&port->mMcastGate, flags); + if (port->mAllMcast) + return 1; + + for (i=0; i < port->mNumAddrs; ++i) + { + u64 thisAddr = port->mMcasts[i]; + + if (thisAddr == thatAddr) + { + returnParm = 1; + break; + } + } + read_unlock_irqrestore(&port->mMcastGate, flags); + + return returnParm; +} + +static void veth_sendMonitor(struct VethLpConnection *connection) +{ + HvLpEvent_Rc returnCode = HvCallEvent_signalLpEventFast(connection->mRemoteLp, + HvLpEvent_Type_VirtualLan, + VethEventTypeMonitor, + HvLpEvent_AckInd_DoAck, + HvLpEvent_AckType_DeferredAck, + connection->mSourceInst, + connection->mTargetInst, + 0, 0, 0, 0, 0, 0); + + if (returnCode == HvLpEvent_Rc_Good) + { + connection->mConnectionStatus.mSentMonitor = 1; + connection->mConnectionStatus.mFailed = 0; + + /* Start the ACK timer */ + init_timer(&connection->mAckTimer); + connection->mAckTimer.function = veth_timedAck; + connection->mAckTimer.data = (unsigned long) connection; + connection->mAckTimer.expires = jiffies + connection->mTimeout; + add_timer(&connection->mAckTimer); + + } + else + { + veth_error_printk("Monitor send to lpar %d failed with rc %x\n", connection->mRemoteLp, (int)returnCode); + veth_failMe(connection); + } +} + +static void veth_finishCapBh(struct VethLpConnection *connection) +{ + unsigned long flags; + spin_lock_irqsave(&connection->mStatusGate, flags); + veth_finishCapBhLocked(connection); + spin_unlock_irqrestore(&connection->mStatusGate, flags); +} + +static void veth_finishCapBhLocked(struct VethLpConnection *connection) +{ + struct VethLpEvent *event = &connection->mCapEvent; + struct VethCapData *remoteCap = &(connection->mRemoteCap); + int numAcks = (remoteCap->mUnionData.mFields.mNumberBuffers / remoteCap->mUnionData.mFields.mThreshold) + 1; + + /* Convert timer to jiffies */ + if (connection->mMyCap.mUnionData.mFields.mTimer) + connection->mTimeout = remoteCap->mUnionData.mFields.mTimer * HZ / 1000000; + else + connection->mTimeout = VethAckTimeoutUsec * HZ / 1000000; + + if (connection->mNumberLpAcksAlloced >= numAcks) + { + HvLpEvent_Rc returnCode = HvCallEvent_ackLpEvent((struct HvLpEvent *)event); + + if (returnCode == HvLpEvent_Rc_Good) + { + connection->mConnectionStatus.mGotCapAcked = 1; + + if (connection->mConnectionStatus.mSentCap != 1) + { + connection->mTargetInst = HvCallEvent_getTargetLpInstanceId(connection->mRemoteLp, HvLpEvent_Type_VirtualLan); + + veth_sendCap(connection); + } + else if (connection->mConnectionStatus.mCapAcked == 1) + { + if (connection->mConnectionStatus.mSentMonitor != 1) + veth_sendMonitor(connection); + } + } + else + { + veth_error_printk("Failed to ack remote cap for lpar %d with rc %x\n", connection->mRemoteLp, (int)returnCode); + veth_failMe(connection); + } + } + else + { + veth_error_printk("Couldn't allocate all the frames ack events for lpar %d\n", connection->mRemoteLp); + event->mBaseEvent.xRc = HvLpEvent_Rc_BufferNotAvailable; + HvCallEvent_ackLpEvent((struct HvLpEvent *)event); + } +} + +int proc_veth_dump_connection +(char *page, char **start, off_t off, int count, int *eof, void *data) +{ + char *out = page; + long whichConnection = (long) data; + int len = 0; + struct VethLpConnection *connection = NULL; + + if ((whichConnection < 0) || (whichConnection > HvMaxArchitectedLps) || (mFabricMgr == NULL)) + { + veth_error_printk("Got bad data from /proc file system\n"); + len = sprintf(page, "ERROR\n"); + } + else + { + int thereWasStuffBefore = 0; + connection = &(mFabricMgr->mConnection[whichConnection]); + + out += sprintf(out, "Remote Lp:\t%d\n", connection->mRemoteLp); + out += sprintf(out, "Source Inst:\t%04X\n", connection->mSourceInst); + out += sprintf(out, "Target Inst:\t%04X\n", connection->mTargetInst); + out += sprintf(out, "Num Msgs:\t%d\n", connection->mNumMsgs); + out += sprintf(out, "Num Lp Acks:\t%d\n", connection->mNumberLpAcksAlloced); + out += sprintf(out, "Num Acks:\t%d\n", connection->mNumAcks); + + if (connection->mConnectionStatus.mOpen) + { + out += sprintf(out, "mConnectionStatus.mCapMonAlloced) + { + if (thereWasStuffBefore) + out += sprintf(out,"/"); + else + out += sprintf(out,"<"); + out += sprintf(out, "CapMonAlloced"); + thereWasStuffBefore = 1; + } + + if (connection->mConnectionStatus.mBaseMsgsAlloced) + { + if (thereWasStuffBefore) + out += sprintf(out,"/"); + else + out += sprintf(out,"<"); + out += sprintf(out, "BaseMsgsAlloced"); + thereWasStuffBefore = 1; + } + + if (connection->mConnectionStatus.mSentCap) + { + if (thereWasStuffBefore) + out += sprintf(out,"/"); + else + out += sprintf(out,"<"); + out += sprintf(out, "SentCap"); + thereWasStuffBefore = 1; + } + + if (connection->mConnectionStatus.mCapAcked) + { + if (thereWasStuffBefore) + out += sprintf(out,"/"); + else + out += sprintf(out,"<"); + out += sprintf(out, "CapAcked"); + thereWasStuffBefore = 1; + } + + if (connection->mConnectionStatus.mGotCap) + { + if (thereWasStuffBefore) + out += sprintf(out,"/"); + else + out += sprintf(out,"<"); + out += sprintf(out, "GotCap"); + thereWasStuffBefore = 1; + } + + if (connection->mConnectionStatus.mGotCapAcked) + { + if (thereWasStuffBefore) + out += sprintf(out,"/"); + else + out += sprintf(out,"<"); + out += sprintf(out, "GotCapAcked"); + thereWasStuffBefore = 1; + } + + if (connection->mConnectionStatus.mSentMonitor) + { + if (thereWasStuffBefore) + out += sprintf(out,"/"); + else + out += sprintf(out,"<"); + out += sprintf(out, "SentMonitor"); + thereWasStuffBefore = 1; + } + + if (connection->mConnectionStatus.mPopulatedRings) + { + if (thereWasStuffBefore) + out += sprintf(out,"/"); + else + out += sprintf(out,"<"); + out += sprintf(out, "PopulatedRings"); + thereWasStuffBefore = 1; + } + + if (connection->mConnectionStatus.mFailed) + { + if (thereWasStuffBefore) + out += sprintf(out,"/"); + else + out += sprintf(out,"<"); + out += sprintf(out, "Failed"); + thereWasStuffBefore = 1; + } + + if (thereWasStuffBefore) + out += sprintf(out, ">"); + + out += sprintf(out, "\n"); + + out += sprintf(out, "Capabilities (System:):\n"); + out += sprintf(out, "\tLocal:<"); + out += sprintf(out, "%d/%d/%d/%d>\n", + connection->mMyCap.mUnionData.mFields.mVersion, + connection->mMyCap.mUnionData.mFields.mNumberBuffers, + connection->mMyCap.mUnionData.mFields.mThreshold, + connection->mMyCap.mUnionData.mFields.mTimer); + out += sprintf(out, "\tRemote:<"); + out += sprintf(out, "%d/%d/%d/%d>\n", + connection->mRemoteCap.mUnionData.mFields.mVersion, + connection->mRemoteCap.mUnionData.mFields.mNumberBuffers, + connection->mRemoteCap.mUnionData.mFields.mThreshold, + connection->mRemoteCap.mUnionData.mFields.mTimer); + len = out - page; + } + len -= off; + if (len < count) { + *eof = 1; + if (len <= 0) + return 0; + } else + len = count; + *start = page + off; + return len; +} + +int proc_veth_dump_port +(char *page, char **start, off_t off, int count, int *eof, void *data) +{ + char *out = page; + long whichPort = (long) data; + int len = 0; + struct VethPort *port = NULL; + + if ((whichPort < 0) || (whichPort > HvMaxArchitectedVirtualLans) || (mFabricMgr == NULL)) + len = sprintf(page, "Virtual ethernet is not configured.\n"); + else + { + int i=0; + u32 *myAddr; + u16 *myEndAddr; + port = mFabricMgr->mPorts[whichPort]; + + if (port != NULL) + { + myAddr = (u32 *)&(port->mMyAddress); + myEndAddr = (u16 *)(myAddr + 1); + out += sprintf(out, "Net device:\t%p\n", port->mDev); + out += sprintf(out, "Net device name:\t%s\n", port->mDev->name); + out += sprintf(out, "Address:\t%08X%04X\n", myAddr[0], myEndAddr[0]); + out += sprintf(out, "Promiscuous:\t%d\n", port->mPromiscuous); + out += sprintf(out, "All multicast:\t%d\n", port->mAllMcast); + out += sprintf(out, "Number multicast:\t%d\n", port->mNumAddrs); + + for (i=0; i < port->mNumAddrs; ++i) + { + u32 *multi = (u32 *)&(port->mMcasts[i]); + u16 *multiEnd = (u16 *)(multi + 1); + out += sprintf(out, " %08X%04X\n", multi[0], multiEnd[0]); + } + } + else + { + out += sprintf(page, "veth%d is not configured.\n", (int)whichPort); + } + + len = out - page; + } + len -= off; + if (len < count) { + *eof = 1; + if (len <= 0) + return 0; + } else + len = count; + *start = page + off; + return len; +} diff -uNr --exclude=CVS ../kernel.org/linux/drivers/iseries/veth.h linuxppc64_2_4/drivers/iseries/veth.h --- ../kernel.org/linux/drivers/iseries/veth.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/drivers/iseries/veth.h Thu Sep 27 14:00:26 2001 @@ -0,0 +1,255 @@ +/* File veth.h created by Kyle A. Lucke on Mon Aug 7 2000. */ + +/* Change Activity: */ +/* End Change Activity */ + +#ifndef _VETH_H +#define _VETH_H + +#ifndef _HVTYPES_H +#include +#endif +#ifndef _HVLPEVENT_H +#include +#endif +#include + +#define VethEventNumTypes (4) +#define VethEventTypeCap (0) +#define VethEventTypeFrames (1) +#define VethEventTypeMonitor (2) +#define VethEventTypeFramesAck (3) + +#define VethMaxFramesMsgsAcked (20) +#define VethMaxFramesMsgs (0xFFFF) +#define VethMaxFramesPerMsg (6) +#define VethAckTimeoutUsec (1000000) + +#define VETHSTACKTYPE(T) struct VethStack##T +#define VETHSTACK(T) \ +VETHSTACKTYPE(T) \ +{ \ +struct T *head; \ +spinlock_t lock; \ +} +#define VETHSTACKCTOR(s) do { (s)->head = NULL; spin_lock_init(&(s)->lock); } while(0) +#define VETHSTACKPUSH(s, p) \ +do { \ +unsigned long flags; \ +spin_lock_irqsave(&(s)->lock,flags); \ +(p)->next = (s)->head; \ +(s)->head = (p); \ +spin_unlock_irqrestore(&(s)->lock, flags); \ +} while(0) + +#define VETHSTACKPOP(s,p) \ +do { \ +unsigned long flags; \ +spin_lock_irqsave(&(s)->lock,flags); \ +(p) = (s)->head; \ +if ((s)->head != NULL) \ +{ \ +(s)->head = (s)->head->next; \ +} \ +spin_unlock_irqrestore(&(s)->lock, flags); \ +} while(0) + +#define VETHQUEUE(T) \ +struct VethQueue##T \ +{ \ +T *head; \ +T *tail; \ +spinlock_t lock; \ +} +#define VETHQUEUECTOR(q) do { (q)->head = NULL; (q)->tail = NULL; spin_lock_init(&(q)->lock); } while(0) +#define VETHQUEUEENQ(q, p) \ +do { \ +unsigned long flags; \ +spin_lock_irqsave(&(q)->lock,flags); \ +(p)->next = NULL; \ +if ((q)->head != NULL) \ +{ \ +(q)->head->next = (p); \ +(q)->head = (p); \ +} \ +else \ +{ \ +(q)->tail = (q)->head = (p); \ +} \ +spin_unlock_irqrestore(&(q)->lock, flags); \ +} while(0) + +#define VETHQUEUEDEQ(q,p) \ +do { \ +unsigned long flags; \ +spin_lock_irqsave(&(q)->lock,flags); \ +(p) = (q)->tail; \ +if ((p) != NULL) \ +{ \ +(q)->tail = (p)->next; \ +(p)->next = NULL; \ +} \ +if ((q)->tail == NULL) \ +(q)->head = NULL; \ +spin_unlock_irqrestore(&(q)->lock, flags); \ +} while(0) + +struct VethFramesData +{ + u32 mAddress[6]; + u16 mLength[6]; + u32 mEofMask; +}; + +struct VethFramesAckData +{ + u16 mToken[VethMaxFramesMsgsAcked]; +}; + +struct VethCapData +{ + union + { + struct Fields + { + u8 mVersion; + u8 mReserved1; + u16 mNumberBuffers; + u16 mThreshold; + u16 mReserved2; + u32 mTimer; + u32 mReserved3; + u64 mReserved4; + u64 mReserved5; + u64 mReserved6; + } mFields; + struct NoFields + { + u64 mReserved1; + u64 mReserved2; + u64 mReserved3; + u64 mReserved4; + u64 mReserved5; + } mNoFields; + } mUnionData; +}; + +struct VethFastPathData +{ + u64 mData1; + u64 mData2; + u64 mData3; + u64 mData4; + u64 mData5; +}; + +struct VethLpEvent +{ + struct HvLpEvent mBaseEvent; + union { + struct VethFramesData mSendData; + struct VethCapData mCapabilitiesData; + struct VethFramesAckData mFramesAckData; + struct VethFastPathData mFastPathData; + } mDerivedData; + +}; + +struct VethMsg +{ + struct VethMsg *next; + union { + struct VethFramesData mSendData; + struct VethFastPathData mFpData; + } mEvent; + int mIndex; + unsigned long mInUse; + struct sk_buff *mSkb; +}; + + +struct VethControlBlock +{ + struct net_device *mDev; + struct VethControlBlock *mNext; + HvLpVirtualLanIndex mVlanId; +}; + +struct VethLpConnection +{ + u64 mEyecatcher; + HvLpIndex mRemoteLp; + HvLpInstanceId mSourceInst; + HvLpInstanceId mTargetInst; + u32 mNumMsgs; + struct VethMsg *mMsgs; + int mNumberRcvMsgs; + int mNumberLpAcksAlloced; + union + { + struct VethFramesAckData mAckData; + struct VethFastPathData mFpData; + } mEventData; + spinlock_t mAckGate; + u32 mNumAcks; + spinlock_t mStatusGate; + struct + { + u64 mOpen : 1; + u64 mCapMonAlloced : 1; + u64 mBaseMsgsAlloced : 1; + u64 mSentCap : 1; + u64 mCapAcked : 1; + u64 mGotCap : 1; + u64 mGotCapAcked : 1; + u64 mSentMonitor : 1; + u64 mPopulatedRings : 1; + u64 mReserved : 54; + u64 mFailed : 1; + } mConnectionStatus; + struct VethCapData mMyCap; + struct VethCapData mRemoteCap; + unsigned long mCapAckBhPending; + struct tq_struct mCapAckBhTq; + struct VethLpEvent mCapAckEvent; + unsigned long mCapBhPending; + struct tq_struct mCapBhTq; + struct VethLpEvent mCapEvent; + unsigned long mMonitorAckBhPending; + struct tq_struct mMonitorAckBhTq; + struct VethLpEvent mMonitorAckEvent; + unsigned long mAllocBhPending; + struct tq_struct mAllocBhTq; + int mNumberAllocated; + struct timer_list mAckTimer; + u32 mTimeout; + VETHSTACK(VethMsg) mMsgStack; +}; +#define HVMAXARCHITECTEDVIRTUALLANS 16 +struct VethPort +{ + struct net_device *mDev; + struct net_device_stats mStats; + int mLock; + u64 mMyAddress; + int mPromiscuous; + int mAllMcast; + rwlock_t mMcastGate; + int mNumAddrs; + u64 mMcasts[12]; +}; + +struct VethFabricMgr +{ + u64 mEyecatcher; + HvLpIndex mThisLp; + struct VethLpConnection mConnection[HVMAXARCHITECTEDLPS]; + spinlock_t mPortListGate; + u64 mNumPorts; + struct VethPort *mPorts[HVMAXARCHITECTEDVIRTUALLANS]; +}; + +int proc_veth_dump_connection(char *page, char **start, off_t off, int count, int *eof, void *data); +int proc_veth_dump_port(char *page, char **start, off_t off, int count, int *eof, void *data); + +#endif /* _VETH_H */ diff -uNr --exclude=CVS ../kernel.org/linux/drivers/iseries/vio.h linuxppc64_2_4/drivers/iseries/vio.h --- ../kernel.org/linux/drivers/iseries/vio.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/drivers/iseries/vio.h Thu Sep 27 14:00:26 2001 @@ -0,0 +1,115 @@ +/* -*- linux-c -*- + * drivers/char/vio.h + * + * iSeries Virtual I/O Message Path header + * + * Authors: Dave Boutcher + * Ryan Arnold + * Colin Devilbiss + * + * (C) Copyright 2000 IBM Corporation + * + * This header file is used by the iSeries virtual I/O device + * drivers. It defines the interfaces to the common functions + * (implemented in drivers/char/viopath.h) as well as defining + * common functions and structures. Currently (at the time I + * wrote this comment) the iSeries virtual I/O device drivers + * that use this are + * drivers/block/viodasd.c + * drivers/char/viocons.c + * drivers/char/viotape.c + * drivers/cdrom/viocd.c + * + * The iSeries virtual ethernet support (veth.c) uses a whole + * different set of functions. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) anyu later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ +#ifndef _VIO_H +#define _VIO_H + +#include +#include + +/* iSeries virtual I/O events use the subtype field in + * HvLpEvent to figure out what kind of vio event is coming + * in. We use a table to route these, and this defines + * the maximum number of distinct subtypes + */ +#define VIO_MAX_SUBTYPES 7 + +/* Each subtype can register a handler to process their events. + * The handler must have this interface. + */ +typedef void (vio_event_handler_t) (struct HvLpEvent * event); + +int viopath_open(HvLpIndex remoteLp, int subtype, int numReq); +int viopath_close(HvLpIndex remoteLp, int subtype, int numReq); +int vio_setHandler(int subtype, vio_event_handler_t * beh); +int vio_clearHandler(int subtype); +int viopath_isactive(HvLpIndex lp); +HvLpInstanceId viopath_sourceinst(HvLpIndex lp); +HvLpInstanceId viopath_targetinst(HvLpIndex lp); +void vio_set_hostlp(void); +void *vio_get_event_buffer(int subtype); +void vio_free_event_buffer(int subtype, void *buffer); + +extern HvLpIndex viopath_hostLp; + +#define VIO_MESSAGE "iSeries virtual I/O: " +#define KERN_DEBUG_VIO KERN_DEBUG VIO_MESSAGE +#define KERN_INFO_VIO KERN_INFO VIO_MESSAGE +#define KERN_WARNING_VIO KERN_WARNING VIO_MESSAGE + +#define VIOCHAR_MAX_DATA 200 + +#define VIOMAJOR_SUBTYPE_MASK 0xff00 +#define VIOMINOR_SUBTYPE_MASK 0x00ff +#define VIOMAJOR_SUBTYPE_SHIFT 8 + +#define VIOVERSION 0x0101 + +enum viosubtypes { + viomajorsubtype_monitor = 0x0100, + viomajorsubtype_blockio = 0x0200, + viomajorsubtype_chario = 0x0300, + viomajorsubtype_config = 0x0400, + viomajorsubtype_cdio = 0x0500, + viomajorsubtype_tape = 0x0600 +}; + + +enum vioconfigsubtype { + vioconfigget = 0x0001, +}; + +enum viorc { + viorc_good = 0x0000, + viorc_noConnection = 0x0001, + viorc_noReceiver = 0x0002, + viorc_noBufferAvailable = 0x0003, + viorc_invalidMessageType = 0x0004, + viorc_invalidRange = 0x0201, + viorc_invalidToken = 0x0202, + viorc_DMAError = 0x0203, + viorc_useError = 0x0204, + viorc_releaseError = 0x0205, + viorc_invalidDisk = 0x0206, + viorc_openRejected = 0x0301 +}; + + +#endif /* _VIO_H */ diff -uNr --exclude=CVS ../kernel.org/linux/drivers/iseries/viocd.c linuxppc64_2_4/drivers/iseries/viocd.c --- ../kernel.org/linux/drivers/iseries/viocd.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/drivers/iseries/viocd.c Wed Nov 14 13:42:56 2001 @@ -0,0 +1,783 @@ +/* -*- linux-c -*- + * drivers/cdrom/viocd.c + * + *************************************************************************** + * iSeries Virtual CD Rom + * + * Authors: Dave Boutcher + * Ryan Arnold + * Colin Devilbiss + * + * (C) Copyright 2000 IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) anyu later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *************************************************************************** + * This routine provides access to CD ROM drives owned and managed by an + * OS/400 partition running on the same box as this Linux partition. + * + * All operations are performed by sending messages back and forth to + * the OS/400 partition. + * + * + * This device driver can either use it's own major number, or it can + * pretend to be an AZTECH drive. This is controlled with a + * CONFIG option. You can either call this an elegant solution to the + * fact that a lot of software doesn't recognize a new CD major number... + * or you can call this a really ugly hack. Your choice. + * + */ + +#include +#include + +/* Decide on the proper naming convention to use for our device */ +#ifdef CONFIG_DEVFS_FS +#define VIOCD_DEVICE "cdroms/cdrom%d" +#define VIOCD_DEVICE_OFFSET 0 +#else +#ifdef CONFIG_VIOCD_AZTECH +#define VIOCD_DEVICE "aztcd" +#define VIOCD_DEVICE_OFFSET 0 +#else +#define VIOCD_DEVICE "iseries/vcd%c" +#define VIOCD_DEVICE_OFFSET 'a' +#endif +#endif + +/*************************************************************************** + * Decide if we are using our own major or pretending to be an AZTECH drive + ***************************************************************************/ +#ifdef CONFIG_VIOCD_AZTECH +#define MAJOR_NR AZTECH_CDROM_MAJOR +#define do_viocd_request do_aztcd_request +#else +#define MAJOR_NR VIOCD_MAJOR +#endif + +#define VIOCD_VERS "1.04" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include "vio.h" +#include + +extern struct pci_dev * iSeries_vio_dev; + +#define signalLpEvent HvCallEvent_signalLpEventFast + +struct viocdlpevent { + struct HvLpEvent event; + u32 mReserved1; + u16 mVersion; + u16 mSubTypeRc; + u16 mDisk; + u16 mFlags; + u32 mToken; + u64 mOffset; // On open, the max number of disks + u64 mLen; // On open, the size of the disk + u32 mBlockSize; // Only set on open + u32 mMediaSize; // Only set on open +}; + +enum viocdsubtype { + viocdopen = 0x0001, + viocdclose = 0x0002, + viocdread = 0x0003, + viocdwrite = 0x0004, + viocdlockdoor = 0x0005, + viocdgetinfo = 0x0006, + viocdcheck = 0x0007 +}; + +/* Should probably make this a module parameter....sigh + */ +#define VIOCD_MAX_CD 8 +int viocd_blocksizes[VIOCD_MAX_CD]; +static u64 viocd_size_in_bytes[VIOCD_MAX_CD]; + +/* This is the structure we use to exchange info between driver and interrupt + * handler + */ +struct viocd_waitevent { + struct semaphore *sem; + int rc; + int changed; +}; + +/* this is a lookup table for the true capabilities of a device */ +struct capability_entry { + char *type; + int capability; +}; + +static struct capability_entry capability_table[] = { + { "6330", CDC_LOCK | CDC_DVD_RAM }, + { "6321", CDC_LOCK }, + { "632B", 0 }, + { NULL , CDC_LOCK }, +}; + +/* These are our internal structures for keeping track of devices + */ +static int viocd_numdev; + +struct cdrom_info { + char rsrcname[10]; + char type[4]; + char model[3]; +}; +static struct cdrom_info *viocd_unitinfo = NULL; + +struct disk_info{ + u32 useCount; + u32 blocksize; + u32 mediasize; +}; +static struct disk_info viocd_diskinfo[VIOCD_MAX_CD]; + +static struct cdrom_device_info viocd_info[VIOCD_MAX_CD]; + +static spinlock_t viocd_lock = SPIN_LOCK_UNLOCKED; + +#define MAX_CD_REQ 1 +static LIST_HEAD(reqlist); + +/* End a request + */ +static int viocd_end_request(struct request *req, int uptodate) +{ + if (end_that_request_first(req, uptodate, DEVICE_NAME)) + return 0; + end_that_request_last(req); + return 1; +} + + +/* Get info on CD devices from OS/400 + */ +static void get_viocd_info(void) +{ + dma_addr_t dmaaddr; + HvLpEvent_Rc hvrc; + int i; + DECLARE_MUTEX_LOCKED(Semaphore); + struct viocd_waitevent we; + + // If we don't have a host, bail out + if (viopath_hostLp == HvLpIndexInvalid) + return; + + if (viocd_unitinfo == NULL) + viocd_unitinfo = + kmalloc(sizeof(struct cdrom_info) * VIOCD_MAX_CD, + GFP_KERNEL); + + memset(viocd_unitinfo, 0x00, + sizeof(struct cdrom_info) * VIOCD_MAX_CD); + + dmaaddr = pci_map_single(iSeries_vio_dev, viocd_unitinfo, + sizeof(struct cdrom_info) * VIOCD_MAX_CD, + PCI_DMA_FROMDEVICE); + if (dmaaddr == 0xFFFFFFFF) { + printk(KERN_WARNING_VIO "error allocating tce\n"); + return; + } + + we.sem = &Semaphore; + + hvrc = signalLpEvent(viopath_hostLp, + HvLpEvent_Type_VirtualIo, + viomajorsubtype_cdio | viocdgetinfo, + HvLpEvent_AckInd_DoAck, + HvLpEvent_AckType_ImmediateAck, + viopath_sourceinst(viopath_hostLp), + viopath_targetinst(viopath_hostLp), + (u64) (unsigned long) &we, + VIOVERSION << 16, + dmaaddr, + 0, + sizeof(struct cdrom_info) * VIOCD_MAX_CD, + 0); + if (hvrc != HvLpEvent_Rc_Good) { + printk(KERN_WARNING_VIO "cdrom error sending event. rc %d\n", (int) hvrc); + return; + } + + down(&Semaphore); + + if (we.rc) { + printk(KERN_WARNING_VIO "bad rc %d on getinfo\n", we.rc); + return; + } + + + for (i = 0; (i < VIOCD_MAX_CD) && (viocd_unitinfo[i].rsrcname[0]); i++) { + viocd_numdev++; + } +} + +/* Open a device + */ +static int viocd_open(struct cdrom_device_info *cdi, int purpose) +{ + DECLARE_MUTEX_LOCKED(Semaphore); + int device_no = MINOR(cdi->dev); + HvLpEvent_Rc hvrc; + struct viocd_waitevent we; + struct disk_info *diskinfo = &viocd_diskinfo[device_no]; + + // If we don't have a host, bail out + if (viopath_hostLp == HvLpIndexInvalid || device_no >= viocd_numdev) + return -ENODEV; + + we.sem = &Semaphore; + hvrc = signalLpEvent(viopath_hostLp, + HvLpEvent_Type_VirtualIo, + viomajorsubtype_cdio | viocdopen, + HvLpEvent_AckInd_DoAck, + HvLpEvent_AckType_ImmediateAck, + viopath_sourceinst(viopath_hostLp), + viopath_targetinst(viopath_hostLp), + (u64) (unsigned long) &we, + VIOVERSION << 16, + ((u64) device_no << 48), + 0, 0, 0); + if (hvrc != 0) { + printk(KERN_WARNING_VIO "bad rc on signalLpEvent %d\n", + (int) hvrc); + return -EIO; + } + + down(&Semaphore); + + if (we.rc) + return -EIO; + + if (diskinfo->useCount == 0) { + if(diskinfo->blocksize > 0) { + viocd_blocksizes[device_no] = diskinfo->blocksize; + viocd_size_in_bytes[device_no] = diskinfo->blocksize * diskinfo->mediasize; + } else { + viocd_size_in_bytes[device_no] = 0xFFFFFFFFFFFFFFFF; + } + } + MOD_INC_USE_COUNT; + return 0; +} + +/* Release a device + */ +static void viocd_release(struct cdrom_device_info *cdi) +{ + int device_no = MINOR(cdi->dev); + HvLpEvent_Rc hvrc; + + /* If we don't have a host, bail out */ + if (viopath_hostLp == HvLpIndexInvalid + || device_no >= viocd_numdev) + return; + + hvrc = signalLpEvent(viopath_hostLp, + HvLpEvent_Type_VirtualIo, + viomajorsubtype_cdio | viocdclose, + HvLpEvent_AckInd_NoAck, + HvLpEvent_AckType_ImmediateAck, + viopath_sourceinst(viopath_hostLp), + viopath_targetinst(viopath_hostLp), + 0, + VIOVERSION << 16, + ((u64) device_no << 48), + 0, 0, 0); + if (hvrc != 0) { + printk(KERN_WARNING_VIO "bad rc on signalLpEvent %d\n", (int) hvrc); + return; + } + + MOD_DEC_USE_COUNT; +} + +/* Send a read or write request to OS/400 + */ +static int send_request(struct request *req) +{ + HvLpEvent_Rc hvrc; + dma_addr_t dmaaddr; + int device_no = DEVICE_NR(req->rq_dev); + u64 start = req->sector * 512, + len = req->current_nr_sectors * 512; + char reading = req->cmd == READ; + u16 command = reading ? viocdread : viocdwrite; + + + if(start + len > viocd_size_in_bytes[device_no]) { + printk(KERN_WARNING_VIO "viocd%d; access position %lx, past size %lx\n", + device_no, start + len, viocd_size_in_bytes[device_no]); + return -1; + } + + dmaaddr = pci_map_single(iSeries_vio_dev, req->buffer, len, + reading ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); + if (dmaaddr == 0xFFFFFFFF) { + printk(KERN_WARNING_VIO "error allocating tce for address %p len %ld\n", + req->buffer, len); + return -1; + } + + hvrc = signalLpEvent(viopath_hostLp, + HvLpEvent_Type_VirtualIo, + viomajorsubtype_cdio | command, + HvLpEvent_AckInd_DoAck, + HvLpEvent_AckType_ImmediateAck, + viopath_sourceinst(viopath_hostLp), + viopath_targetinst(viopath_hostLp), + (u64) (unsigned long) req->buffer, + VIOVERSION << 16, + ((u64) device_no << 48) | dmaaddr, + start, len, 0); + if (hvrc != HvLpEvent_Rc_Good) { + printk(KERN_WARNING_VIO "hv error on op %d\n", (int) hvrc); + return -1; + } + + return 0; +} + + +/* Do a request + */ +static int rwreq; +static void do_viocd_request(request_queue_t * q) +{ + for (;;) { + struct request *req; + char err_str[80] = ""; + int device_no; + + INIT_REQUEST; + if (rwreq >= MAX_CD_REQ) { + return; + } + + device_no = CURRENT_DEV; + + /* remove the current request from the queue */ + req = CURRENT; + blkdev_dequeue_request(req); + + /* check for any kind of error */ + if (device_no > viocd_numdev) + sprintf(err_str, "Invalid device number %d", device_no); + else if (send_request(req) < 0) + strcpy(err_str, "unable to send message to OS/400!"); + + /* if we had any sort of error, log it and cancel the request */ + if (*err_str) { + printk(KERN_WARNING_VIO "%s\n", err_str); + viocd_end_request(req, 0); + } else { + spin_lock(&viocd_lock); + list_add_tail(&req->queue, &reqlist); + ++rwreq; + spin_unlock(&viocd_lock); + } + } +} + +/* Check if the CD changed + */ +static int viocd_media_changed(struct cdrom_device_info *cdi, int disc_nr) +{ + struct viocd_waitevent we; + HvLpEvent_Rc hvrc; + int device_no = MINOR(cdi->dev); + + /* This semaphore is raised in the interrupt handler */ + DECLARE_MUTEX_LOCKED(Semaphore); + + /* Check that we are dealing with a valid hosting partition */ + if (viopath_hostLp == HvLpIndexInvalid) { + printk(KERN_WARNING_VIO "Invalid hosting partition\n"); + return -EIO; + } + + we.sem = &Semaphore; + + /* Send the open event to OS/400 */ + hvrc = signalLpEvent(viopath_hostLp, + HvLpEvent_Type_VirtualIo, + viomajorsubtype_cdio | viocdcheck, + HvLpEvent_AckInd_DoAck, + HvLpEvent_AckType_ImmediateAck, + viopath_sourceinst(viopath_hostLp), + viopath_targetinst(viopath_hostLp), + (u64) (unsigned long) &we, + VIOVERSION << 16, + ((u64) device_no << 48), + 0, 0, 0); + + if (hvrc != 0) { + printk(KERN_WARNING_VIO "bad rc on signalLpEvent %d\n", (int) hvrc); + return -EIO; + } + + /* Wait for the interrupt handler to get the response */ + down(&Semaphore); + + /* Check the return code. If bad, assume no change */ + if (we.rc != 0) { + printk(KERN_WARNING_VIO "bad rc on check_change. Assuming no change\n"); + return 0; + } + + return we.changed; +} + +static int viocd_lock_door(struct cdrom_device_info *cdi, int locking) +{ + HvLpEvent_Rc hvrc; + u64 device_no = MINOR(cdi->dev); + /* NOTE: flags is 1 or 0 so it won't overwrite the device_no */ + u64 flags = !!locking; + /* This semaphore is raised in the interrupt handler */ + DECLARE_MUTEX_LOCKED(Semaphore); + struct viocd_waitevent we = { sem:&Semaphore }; + + /* Check that we are dealing with a valid hosting partition */ + if (viopath_hostLp == HvLpIndexInvalid) { + printk(KERN_WARNING_VIO "Invalid hosting partition\n"); + return -EIO; + } + + we.sem = &Semaphore; + + /* Send the lockdoor event to OS/400 */ + hvrc = signalLpEvent(viopath_hostLp, + HvLpEvent_Type_VirtualIo, + viomajorsubtype_cdio | viocdlockdoor, + HvLpEvent_AckInd_DoAck, + HvLpEvent_AckType_ImmediateAck, + viopath_sourceinst(viopath_hostLp), + viopath_targetinst(viopath_hostLp), + (u64) (unsigned long) &we, + VIOVERSION << 16, + (device_no << 48) | (flags << 32), + 0, 0, 0); + + if (hvrc != 0) { + printk(KERN_WARNING_VIO "bad rc on signalLpEvent %d\n", (int) hvrc); + return -EIO; + } + + /* Wait for the interrupt handler to get the response */ + down(&Semaphore); + + /* Check the return code. If bad, assume no change */ + if (we.rc != 0) { + return -EIO; + } + + return 0; +} + +/* This routine handles incoming CD LP events + */ +static void vioHandleCDEvent(struct HvLpEvent *event) +{ + struct viocdlpevent *bevent = (struct viocdlpevent *) event; + struct viocd_waitevent *pwe; + + if (event == NULL) { + /* Notification that a partition went away! */ + return; + } + /* First, we should NEVER get an int here...only acks */ + if (event->xFlags.xFunction == HvLpEvent_Function_Int) { + printk(KERN_WARNING_VIO "Yikes! got an int in viocd event handler!\n"); + if (event->xFlags.xAckInd == HvLpEvent_AckInd_DoAck) { + event->xRc = HvLpEvent_Rc_InvalidSubtype; + HvCallEvent_ackLpEvent(event); + } + } + + switch (event->xSubtype & VIOMINOR_SUBTYPE_MASK) { + case viocdopen: + viocd_diskinfo[bevent->mDisk].blocksize = bevent->mBlockSize; + viocd_diskinfo[bevent->mDisk].mediasize = bevent->mMediaSize; + /* FALLTHROUGH !! */ + case viocdgetinfo: + case viocdlockdoor: + pwe = (struct viocd_waitevent *) (unsigned long) event->xCorrelationToken; + pwe->rc = event->xRc; + up(pwe->sem); + break; + + case viocdclose: + break; + + case viocdwrite: + case viocdread:{ + unsigned long flags; + int reading = ((event->xSubtype & VIOMINOR_SUBTYPE_MASK) == viocdread); + struct request *req = blkdev_entry_to_request(reqlist.next); + /* Since this is running in interrupt mode, we need to make sure we're not + * stepping on any global I/O operations + */ + spin_lock_irqsave(&io_request_lock, flags); + + pci_unmap_single(iSeries_vio_dev, + bevent->mToken, + bevent->mLen, + reading ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); + + /* find the event to which this is a response */ + while ((&req->queue != &reqlist) && + ((u64) (unsigned long) req->buffer != bevent->event.xCorrelationToken)) + req = blkdev_entry_to_request(req->queue.next); + + /* if the event was not there, then what are we responding to?? */ + if (&req->queue == &reqlist) { + printk(KERN_WARNING_VIO "Yikes! we didn't ever enqueue this guy!\n"); + spin_unlock_irqrestore(&io_request_lock, + flags); + break; + } + + /* we don't need to keep it around anymore... */ + spin_lock(&viocd_lock); + list_del(&req->queue); + --rwreq; + spin_unlock(&viocd_lock); + { + char stat = event->xRc == HvLpEvent_Rc_Good; + int nsect = bevent->mLen >> 9; + + if (!stat) + printk(KERN_WARNING_VIO + "request %p failed with rc %d:0x%08x\n", + req->buffer, event->xRc, bevent->mSubTypeRc); + while ((nsect > 0) && (req->bh)) { + nsect -= req->current_nr_sectors; + viocd_end_request(req, stat); + } + /* we weren't done yet */ + if (req->bh) { + if (send_request(req) < 0) { + printk(KERN_WARNING_VIO + "couldn't re-submit req %p\n", req->buffer); + viocd_end_request(req, 0); + } else { + spin_lock(&viocd_lock); + list_add_tail(&req->queue, &reqlist); + ++rwreq; + spin_unlock(&viocd_lock); + } + } + } + + /* restart handling of incoming requests */ + do_viocd_request(NULL); + spin_unlock_irqrestore(&io_request_lock, flags); + break; + } + case viocdcheck: + pwe = (struct viocd_waitevent *) (unsigned long) event->xCorrelationToken; + pwe->rc = event->xRc; + pwe->changed = bevent->mFlags; + up(pwe->sem); + break; + + default: + printk(KERN_WARNING_VIO "invalid subtype!"); + if (event->xFlags.xAckInd == HvLpEvent_AckInd_DoAck) { + event->xRc = HvLpEvent_Rc_InvalidSubtype; + HvCallEvent_ackLpEvent(event); + } + } +} + +/* Our file operations table + */ +static struct cdrom_device_ops viocd_dops = { + open:viocd_open, + release:viocd_release, + media_changed:viocd_media_changed, + lock_door:viocd_lock_door, + capability:CDC_CLOSE_TRAY | CDC_OPEN_TRAY | CDC_LOCK | CDC_SELECT_SPEED | CDC_SELECT_DISC | CDC_MULTI_SESSION | CDC_MCN | CDC_MEDIA_CHANGED | CDC_PLAY_AUDIO | CDC_RESET | CDC_IOCTLS | CDC_DRIVE_STATUS | CDC_GENERIC_PACKET | CDC_CD_R | CDC_CD_RW | CDC_DVD | CDC_DVD_R | CDC_DVD_RAM +}; + +/* Handle reads from the proc file system + */ +static int proc_read(char *buf, char **start, off_t offset, + int blen, int *eof, void *data) +{ + int len = 0; + int i; + + for (i = 0; i < viocd_numdev; i++) { + len += + sprintf(buf + len, + "viocd device %d is iSeries resource %10.10s type %4.4s, model %3.3s\n", + i, viocd_unitinfo[i].rsrcname, + viocd_unitinfo[i].type, + viocd_unitinfo[i].model); + } + *eof = 1; + return len; +} + + +/* setup our proc file system entries + */ +void viocd_proc_init(struct proc_dir_entry *iSeries_proc) +{ + struct proc_dir_entry *ent; + ent = create_proc_entry("viocd", S_IFREG | S_IRUSR, iSeries_proc); + if (!ent) + return; + ent->nlink = 1; + ent->data = NULL; + ent->read_proc = proc_read; +} + +/* clean up our proc file system entries + */ +void viocd_proc_delete(struct proc_dir_entry *iSeries_proc) +{ + remove_proc_entry("viocd", iSeries_proc); +} + +static int find_capability(const char *type) +{ + struct capability_entry *entry; + for(entry = capability_table; entry->type; ++entry) + if(!strncmp(entry->type, type, 4)) + break; + return entry->capability; +} + +/* Initialize the whole device driver. Handle module and non-module + * versions + */ +__init int viocd_init(void) +{ + int i, rc; + + if (viopath_hostLp == HvLpIndexInvalid) + vio_set_hostlp(); + + /* If we don't have a host, bail out */ + if (viopath_hostLp == HvLpIndexInvalid) + return -ENODEV; + + rc = viopath_open(viopath_hostLp, viomajorsubtype_cdio, MAX_CD_REQ+2); + if (rc) { + printk(KERN_WARNING_VIO "error opening path to host partition %d\n", + viopath_hostLp); + return rc; + } + + /* Initialize our request handler + */ + rwreq = 0; + vio_setHandler(viomajorsubtype_cdio, vioHandleCDEvent); + + memset(&viocd_diskinfo, 0x00, sizeof(viocd_diskinfo)); + + get_viocd_info(); + + if (viocd_numdev == 0) { + vio_clearHandler(viomajorsubtype_cdio); + viopath_close(viopath_hostLp, viomajorsubtype_cdio, MAX_CD_REQ+2); + return 0; + } + + printk(KERN_INFO_VIO + "%s: iSeries Virtual CD vers %s, major %d, max disks %d, hosting partition %d\n", + DEVICE_NAME, VIOCD_VERS, MAJOR_NR, VIOCD_MAX_CD, viopath_hostLp); + + if (devfs_register_blkdev(MAJOR_NR, "viocd", &cdrom_fops) != 0) { + printk(KERN_WARNING_VIO "Unable to get major %d for viocd CD-ROM\n", MAJOR_NR); + return -EIO; + } + + blksize_size[MAJOR_NR] = viocd_blocksizes; + blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST); + read_ahead[MAJOR_NR] = 4; + + memset(&viocd_info, 0x00, sizeof(viocd_info)); + for (i = 0; i < viocd_numdev; i++) { + viocd_info[i].dev = MKDEV(MAJOR_NR, i); + viocd_info[i].ops = &viocd_dops; + viocd_info[i].speed = 4; + viocd_info[i].capacity = 1; + viocd_info[i].mask = ~find_capability(viocd_unitinfo[i].type); + sprintf(viocd_info[i].name, VIOCD_DEVICE, VIOCD_DEVICE_OFFSET + i); + if (register_cdrom(&viocd_info[i]) != 0) { + printk(KERN_WARNING_VIO "Cannot register viocd CD-ROM %s!\n", viocd_info[i].name); + } else { + printk(KERN_INFO_VIO + "cd %s is iSeries resource %10.10s type %4.4s, model %3.3s\n", + viocd_info[i].name, + viocd_unitinfo[i].rsrcname, + viocd_unitinfo[i].type, + viocd_unitinfo[i].model); + } + } + + /* + * Create the proc entry + */ + iSeries_proc_callback(&viocd_proc_init); + + return 0; +} + +#ifdef MODULE +void viocd_exit(void) +{ + int i; + for (i = 0; i < viocd_numdev; i++) { + if (unregister_cdrom(&viocd_info[i]) != 0) { + printk(KERN_WARNING_VIO "Cannot unregister viocd CD-ROM %s!\n", viocd_info[i].name); + } + } + if ((devfs_unregister_blkdev(MAJOR_NR, "viocd") == -EINVAL)) { + printk(KERN_WARNING_VIO "can't unregister viocd\n"); + return; + } + blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR)); + if (viocd_unitinfo) + kfree(viocd_unitinfo); + + iSeries_proc_callback(&viocd_proc_delete); + + viopath_close(viopath_hostLp, viomajorsubtype_cdio, MAX_CD_REQ+2); + vio_clearHandler(viomajorsubtype_cdio); +} +#endif + +#ifdef MODULE +module_init(viocd_init); +module_exit(viocd_exit); +MODULE_LICENSE("GPL"); +#endif diff -uNr --exclude=CVS ../kernel.org/linux/drivers/iseries/viocons.c linuxppc64_2_4/drivers/iseries/viocons.c --- ../kernel.org/linux/drivers/iseries/viocons.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/drivers/iseries/viocons.c Thu Sep 27 14:00:26 2001 @@ -0,0 +1,1403 @@ +/* -*- linux-c -*- + * drivers/char/viocons.c + * + * iSeries Virtual Terminal + * + * Authors: Dave Boutcher + * Ryan Arnold + * Colin Devilbiss + * + * (C) Copyright 2000 IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) anyu later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vio.h" + +#include +#include "asm/iSeries/HvCallEvent.h" +#include "asm/iSeries/HvLpConfig.h" +#include "asm/iSeries/HvCall.h" +#include + +/* Check that the tty_driver_data actually points to our stuff + */ +#define VIOTTY_PARANOIA_CHECK 1 +#define VIOTTY_MAGIC (0x0DCB) + +static int debug; + +static DECLARE_WAIT_QUEUE_HEAD(viocons_wait_queue); + +static int viotty_major = 229; + +#define VTTY_PORTS 10 +#define VIOTTY_SERIAL_START 65 + +static u64 sndMsgSeq[VTTY_PORTS]; +static u64 sndMsgAck[VTTY_PORTS]; + +static spinlock_t consolelock = SPIN_LOCK_UNLOCKED; + +/* THe structure of the events that flow between us and OS/400. You can't + * mess with this unless the OS/400 side changes too + */ +struct viocharlpevent { + struct HvLpEvent event; + u32 mReserved1; + u16 mVersion; + u16 mSubTypeRc; + u8 virtualDevice; + u8 immediateDataLen; + u8 immediateData[VIOCHAR_MAX_DATA]; +}; + +#define viochar_window (10) +#define viochar_highwatermark (3) + +enum viocharsubtype { + viocharopen = 0x0001, + viocharclose = 0x0002, + viochardata = 0x0003, + viocharack = 0x0004, + viocharconfig = 0x0005 +}; + +enum viochar_rc { + viochar_rc_ebusy = 1 +}; + +/* When we get writes faster than we can send it to the partition, + * buffer the data here. There is one set of buffers for each virtual + * port. + * Note that bufferUsed is a bit map of used buffers. + * It had better have enough bits to hold NUM_BUF + * the bitops assume it is a multiple of unsigned long + */ +#define NUM_BUF (8) +#define OVERFLOW_SIZE VIOCHAR_MAX_DATA + +static struct overflowBuffers { + unsigned long bufferUsed; + u8 *buffer[NUM_BUF]; + int bufferBytes[NUM_BUF]; + int curbuf; + int bufferOverflow; + int overflowMessage; +} overflow[VTTY_PORTS]; + +static void initDataEvent(struct viocharlpevent *viochar, HvLpIndex lp); + +static struct tty_driver viotty_driver; +static struct tty_driver viottyS_driver; +static int viotty_refcount; + +static struct tty_struct *viotty_table[VTTY_PORTS]; +static struct tty_struct *viottyS_table[VTTY_PORTS]; +static struct termios *viotty_termios[VTTY_PORTS]; +static struct termios *viottyS_termios[VTTY_PORTS]; +static struct termios *viotty_termios_locked[VTTY_PORTS]; +static struct termios *viottyS_termios_locked[VTTY_PORTS]; + +void hvlog(char *fmt, ...) +{ + int i; + static char buf[256]; + va_list args; + va_start(args, fmt); + i = vsprintf(buf, fmt, args); + va_end(args); + HvCall_writeLogBuffer(buf, i); + HvCall_writeLogBuffer("\r", 1); + +} + +/* Our port information. We store a pointer to one entry in the + * tty_driver_data + */ +static struct port_info_tag { + int magic; + struct tty_struct *tty; + HvLpIndex lp; + u8 vcons; + u8 port; +} port_info[VTTY_PORTS]; + +/* Make sure we're pointing to a valid port_info structure. Shamelessly + * plagerized from serial.c + */ +static inline int viotty_paranoia_check(struct port_info_tag *pi, + kdev_t device, const char *routine) +{ +#ifdef VIOTTY_PARANOIA_CHECK + static const char *badmagic = + "%s Warning: bad magic number for port_info struct (%s) in %s\n"; + static const char *badinfo = + "%s Warning: null port_info for (%s) in %s\n"; + + if (!pi) { + printk(badinfo, KERN_WARNING_VIO, kdevname(device), + routine); + return 1; + } + if (pi->magic != VIOTTY_MAGIC) { + printk(badmagic, KERN_WARNING_VIO, kdevname(device), + routine); + return 1; + } +#endif + return 0; +} + +/* + * Handle reads from the proc file system. Right now we just dump the + * state of the first TTY + */ +static int proc_read(char *buf, char **start, off_t offset, + int blen, int *eof, void *data) +{ + int len = 0; + struct tty_struct *tty = viotty_table[0]; + struct termios *termios; + if (tty == NULL) { + len += sprintf(buf + len, "no tty\n"); + *eof = 1; + return len; + } + + len += + sprintf(buf + len, + "tty info: COOK_OUT %ld COOK_IN %ld, NO_WRITE_SPLIT %ld\n", + tty->flags & TTY_HW_COOK_OUT, + tty->flags & TTY_HW_COOK_IN, + tty->flags & TTY_NO_WRITE_SPLIT); + + termios = tty->termios; + if (termios == NULL) { + len += sprintf(buf + len, "no termios\n"); + *eof = 1; + return len; + } + len += sprintf(buf + len, "INTR_CHAR %2.2x\n", INTR_CHAR(tty)); + len += sprintf(buf + len, "QUIT_CHAR %2.2x\n", QUIT_CHAR(tty)); + len += + sprintf(buf + len, "ERASE_CHAR %2.2x\n", ERASE_CHAR(tty)); + len += sprintf(buf + len, "KILL_CHAR %2.2x\n", KILL_CHAR(tty)); + len += sprintf(buf + len, "EOF_CHAR %2.2x\n", EOF_CHAR(tty)); + len += sprintf(buf + len, "TIME_CHAR %2.2x\n", TIME_CHAR(tty)); + len += sprintf(buf + len, "MIN_CHAR %2.2x\n", MIN_CHAR(tty)); + len += sprintf(buf + len, "SWTC_CHAR %2.2x\n", SWTC_CHAR(tty)); + len += + sprintf(buf + len, "START_CHAR %2.2x\n", START_CHAR(tty)); + len += sprintf(buf + len, "STOP_CHAR %2.2x\n", STOP_CHAR(tty)); + len += sprintf(buf + len, "SUSP_CHAR %2.2x\n", SUSP_CHAR(tty)); + len += sprintf(buf + len, "EOL_CHAR %2.2x\n", EOL_CHAR(tty)); + len += + sprintf(buf + len, "REPRINT_CHAR %2.2x\n", REPRINT_CHAR(tty)); + len += + sprintf(buf + len, "DISCARD_CHAR %2.2x\n", DISCARD_CHAR(tty)); + len += + sprintf(buf + len, "WERASE_CHAR %2.2x\n", WERASE_CHAR(tty)); + len += + sprintf(buf + len, "LNEXT_CHAR %2.2x\n", LNEXT_CHAR(tty)); + len += sprintf(buf + len, "EOL2_CHAR %2.2x\n", EOL2_CHAR(tty)); + + len += sprintf(buf + len, "I_IGNBRK %4.4x\n", I_IGNBRK(tty)); + len += sprintf(buf + len, "I_BRKINT %4.4x\n", I_BRKINT(tty)); + len += sprintf(buf + len, "I_IGNPAR %4.4x\n", I_IGNPAR(tty)); + len += sprintf(buf + len, "I_PARMRK %4.4x\n", I_PARMRK(tty)); + len += sprintf(buf + len, "I_INPCK %4.4x\n", I_INPCK(tty)); + len += sprintf(buf + len, "I_ISTRIP %4.4x\n", I_ISTRIP(tty)); + len += sprintf(buf + len, "I_INLCR %4.4x\n", I_INLCR(tty)); + len += sprintf(buf + len, "I_IGNCR %4.4x\n", I_IGNCR(tty)); + len += sprintf(buf + len, "I_ICRNL %4.4x\n", I_ICRNL(tty)); + len += sprintf(buf + len, "I_IUCLC %4.4x\n", I_IUCLC(tty)); + len += sprintf(buf + len, "I_IXON %4.4x\n", I_IXON(tty)); + len += sprintf(buf + len, "I_IXANY %4.4x\n", I_IXANY(tty)); + len += sprintf(buf + len, "I_IXOFF %4.4x\n", I_IXOFF(tty)); + len += sprintf(buf + len, "I_IMAXBEL %4.4x\n", I_IMAXBEL(tty)); + + len += sprintf(buf + len, "O_OPOST %4.4x\n", O_OPOST(tty)); + len += sprintf(buf + len, "O_OLCUC %4.4x\n", O_OLCUC(tty)); + len += sprintf(buf + len, "O_ONLCR %4.4x\n", O_ONLCR(tty)); + len += sprintf(buf + len, "O_OCRNL %4.4x\n", O_OCRNL(tty)); + len += sprintf(buf + len, "O_ONOCR %4.4x\n", O_ONOCR(tty)); + len += sprintf(buf + len, "O_ONLRET %4.4x\n", O_ONLRET(tty)); + len += sprintf(buf + len, "O_OFILL %4.4x\n", O_OFILL(tty)); + len += sprintf(buf + len, "O_OFDEL %4.4x\n", O_OFDEL(tty)); + len += sprintf(buf + len, "O_NLDLY %4.4x\n", O_NLDLY(tty)); + len += sprintf(buf + len, "O_CRDLY %4.4x\n", O_CRDLY(tty)); + len += sprintf(buf + len, "O_TABDLY %4.4x\n", O_TABDLY(tty)); + len += sprintf(buf + len, "O_BSDLY %4.4x\n", O_BSDLY(tty)); + len += sprintf(buf + len, "O_VTDLY %4.4x\n", O_VTDLY(tty)); + len += sprintf(buf + len, "O_FFDLY %4.4x\n", O_FFDLY(tty)); + + len += sprintf(buf + len, "C_BAUD %4.4x\n", C_BAUD(tty)); + len += sprintf(buf + len, "C_CSIZE %4.4x\n", C_CSIZE(tty)); + len += sprintf(buf + len, "C_CSTOPB %4.4x\n", C_CSTOPB(tty)); + len += sprintf(buf + len, "C_CREAD %4.4x\n", C_CREAD(tty)); + len += sprintf(buf + len, "C_PARENB %4.4x\n", C_PARENB(tty)); + len += sprintf(buf + len, "C_PARODD %4.4x\n", C_PARODD(tty)); + len += sprintf(buf + len, "C_HUPCL %4.4x\n", C_HUPCL(tty)); + len += sprintf(buf + len, "C_CLOCAL %4.4x\n", C_CLOCAL(tty)); + len += sprintf(buf + len, "C_CRTSCTS %4.4x\n", C_CRTSCTS(tty)); + + len += sprintf(buf + len, "L_ISIG %4.4x\n", L_ISIG(tty)); + len += sprintf(buf + len, "L_ICANON %4.4x\n", L_ICANON(tty)); + len += sprintf(buf + len, "L_XCASE %4.4x\n", L_XCASE(tty)); + len += sprintf(buf + len, "L_ECHO %4.4x\n", L_ECHO(tty)); + len += sprintf(buf + len, "L_ECHOE %4.4x\n", L_ECHOE(tty)); + len += sprintf(buf + len, "L_ECHOK %4.4x\n", L_ECHOK(tty)); + len += sprintf(buf + len, "L_ECHONL %4.4x\n", L_ECHONL(tty)); + len += sprintf(buf + len, "L_NOFLSH %4.4x\n", L_NOFLSH(tty)); + len += sprintf(buf + len, "L_TOSTOP %4.4x\n", L_TOSTOP(tty)); + len += sprintf(buf + len, "L_ECHOCTL %4.4x\n", L_ECHOCTL(tty)); + len += sprintf(buf + len, "L_ECHOPRT %4.4x\n", L_ECHOPRT(tty)); + len += sprintf(buf + len, "L_ECHOKE %4.4x\n", L_ECHOKE(tty)); + len += sprintf(buf + len, "L_FLUSHO %4.4x\n", L_FLUSHO(tty)); + len += sprintf(buf + len, "L_PENDIN %4.4x\n", L_PENDIN(tty)); + len += sprintf(buf + len, "L_IEXTEN %4.4x\n", L_IEXTEN(tty)); + + *eof = 1; + return len; +} + +/* + * Handle writes to our proc file system. Right now just turns on and off + * our debug flag + */ +static int proc_write(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + if (count) { + if (buffer[0] == '1') { + printk("viocons: debugging on\n"); + debug = 1; + } else { + printk("viocons: debugging off\n"); + debug = 0; + } + } + return count; +} + +/* + * setup our proc file system entries + */ +void viocons_proc_init(struct proc_dir_entry *iSeries_proc) +{ + struct proc_dir_entry *ent; + ent = + create_proc_entry("viocons", S_IFREG | S_IRUSR, iSeries_proc); + if (!ent) + return; + ent->nlink = 1; + ent->data = NULL; + ent->read_proc = proc_read; + ent->write_proc = proc_write; +} + +/* + * clean up our proc file system entries + */ +void viocons_proc_delete(struct proc_dir_entry *iSeries_proc) +{ + remove_proc_entry("viocons", iSeries_proc); +} + +/* + * Add data to our pending-send buffers. + * + * NOTE: Don't use printk in here because it gets nastily recursive. hvlog can be + * used to log to the hypervisor buffer + */ +static int bufferAdd(u8 port, const char *buf, size_t len, int userFlag) +{ + size_t bleft = len; + size_t curlen; + char *cbuf = (char *) buf; + int nextbuf; + struct overflowBuffers *pov = &overflow[port]; + while (bleft > 0) { + /* If there is no space left in the current buffer, we have + * filled everything up, so return. If we filled the previous + * buffer we would already have moved to the next one. + */ + if (pov->bufferBytes[pov->curbuf] == OVERFLOW_SIZE) { + hvlog("buffer %d full. no more space\n", + pov->curbuf); + pov->bufferOverflow++; + pov->overflowMessage = 1; + return len - bleft; + } + + /* Turn on the "used" bit for this buffer. If it's already on, that's + * fine. + */ + set_bit(pov->curbuf, &pov->bufferUsed); + + /* + * See if this buffer has been allocated. If not, allocate it + */ + if (pov->buffer[pov->curbuf] == NULL) + pov->buffer[pov->curbuf] = + kmalloc(OVERFLOW_SIZE, GFP_ATOMIC); + + /* + * Figure out how much we can copy into this buffer + */ + if (bleft < + (OVERFLOW_SIZE - pov->bufferBytes[pov->curbuf])) + curlen = bleft; + else + curlen = + OVERFLOW_SIZE - pov->bufferBytes[pov->curbuf]; + + /* + * Copy the data into the buffer + */ + if (userFlag) + copy_from_user(pov->buffer[pov->curbuf] + + pov->bufferBytes[pov->curbuf], cbuf, + curlen); + else + memcpy(pov->buffer[pov->curbuf] + + pov->bufferBytes[pov->curbuf], cbuf, + curlen); + + pov->bufferBytes[pov->curbuf] += curlen; + cbuf += curlen; + bleft -= curlen; + + /* + * Now see if we've filled this buffer + */ + if (pov->bufferBytes[pov->curbuf] == OVERFLOW_SIZE) { + nextbuf = (pov->curbuf + 1) % NUM_BUF; + + /* + * Move to the next buffer if it hasn't been used yet + */ + if (test_bit(nextbuf, &pov->bufferUsed) == 0) { + pov->curbuf = nextbuf; + } + } + } + return len; +} + +/* Send pending data + * + * NOTE: Don't use printk in here because it gets nastily recursive. hvlog can be + * used to log to the hypervisor buffer + */ +void sendBuffers(u8 port, HvLpIndex lp) +{ + HvLpEvent_Rc hvrc; + int nextbuf; + struct viocharlpevent *viochar; + unsigned long flags; + struct overflowBuffers *pov = &overflow[port]; + + spin_lock_irqsave(&consolelock, flags); + + viochar = (struct viocharlpevent *) + vio_get_event_buffer(viomajorsubtype_chario); + + /* Make sure we got a buffer + */ + if (viochar == NULL) { + hvlog("Yikes...can't get viochar buffer"); + spin_unlock_irqrestore(&consolelock, flags); + return; + } + + if (pov->bufferUsed == 0) { + hvlog("in sendbuffers, but no buffers used\n"); + vio_free_event_buffer(viomajorsubtype_chario, viochar); + spin_unlock_irqrestore(&consolelock, flags); + return; + } + + /* + * curbuf points to the buffer we're filling. We want to start sending AFTER + * this one. + */ + nextbuf = (pov->curbuf + 1) % NUM_BUF; + + /* + * Loop until we find a buffer with the bufferUsed bit on + */ + while (test_bit(nextbuf, &pov->bufferUsed) == 0) + nextbuf = (nextbuf + 1) % NUM_BUF; + + initDataEvent(viochar, lp); + + /* + * While we have buffers with data, and our send window is open, send them + */ + while ((test_bit(nextbuf, &pov->bufferUsed)) && + ((sndMsgSeq[port] - sndMsgAck[port]) < viochar_window)) { + viochar->immediateDataLen = pov->bufferBytes[nextbuf]; + viochar->event.xCorrelationToken = sndMsgSeq[port]++; + viochar->event.xSizeMinus1 = + offsetof(struct viocharlpevent, + immediateData) + viochar->immediateDataLen; + + memcpy(viochar->immediateData, pov->buffer[nextbuf], + viochar->immediateDataLen); + + hvrc = HvCallEvent_signalLpEvent(&viochar->event); + if (hvrc) { + /* + * MUST unlock the spinlock before doing a printk + */ + vio_free_event_buffer(viomajorsubtype_chario, + viochar); + spin_unlock_irqrestore(&consolelock, flags); + + printk(KERN_WARNING_VIO + "console error sending event! return code %d\n", + (int) hvrc); + return; + } + + /* + * clear the bufferUsed bit, zero the number of bytes in this buffer, + * and move to the next buffer + */ + clear_bit(nextbuf, &pov->bufferUsed); + pov->bufferBytes[nextbuf] = 0; + nextbuf = (nextbuf + 1) % NUM_BUF; + } + + + /* + * If we have emptied all the buffers, start at 0 again. + * this will re-use any allocated buffers + */ + if (pov->bufferUsed == 0) { + pov->curbuf = 0; + + if (pov->overflowMessage) + pov->overflowMessage = 0; + + if (port_info[port].tty) { + if ((port_info[port].tty-> + flags & (1 << TTY_DO_WRITE_WAKEUP)) + && (port_info[port].tty->ldisc.write_wakeup)) + (port_info[port].tty->ldisc. + write_wakeup) (port_info[port].tty); + wake_up_interruptible(&port_info[port].tty-> + write_wait); + } + } + + vio_free_event_buffer(viomajorsubtype_chario, viochar); + spin_unlock_irqrestore(&consolelock, flags); + +} + +/* Our internal writer. Gets called both from the console device and + * the tty device. the tty pointer will be NULL if called from the console. + * + * NOTE: Don't use printk in here because it gets nastily recursive. hvlog can be + * used to log to the hypervisor buffer + */ +static int internal_write(struct tty_struct *tty, const char *buf, + size_t len, int userFlag) +{ + HvLpEvent_Rc hvrc; + size_t bleft = len; + size_t curlen; + const char *curbuf = buf; + struct viocharlpevent *viochar; + unsigned long flags; + struct port_info_tag *pi = NULL; + HvLpIndex lp; + u8 port; + + if (tty) { + pi = (struct port_info_tag *) tty->driver_data; + + if (!pi + || viotty_paranoia_check(pi, tty->device, + "viotty_internal_write")) + return -ENODEV; + + lp = pi->lp; + port = pi->port; + } else { + /* If this is the console device, use the lp from the first port entry + */ + port = 0; + lp = port_info[0].lp; + } + + /* Always put console output in the hypervisor console log + */ + if (port == 0) + HvCall_writeLogBuffer(buf, len); + + /* If the path to this LP is closed, don't bother doing anything more. + * just dump the data on the floor + */ + if (!viopath_isactive(lp)) + return len; + + /* + * If there is already data queued for this port, send it + */ + if (overflow[port].bufferUsed) + sendBuffers(port, lp); + + spin_lock_irqsave(&consolelock, flags); + + viochar = (struct viocharlpevent *) + vio_get_event_buffer(viomajorsubtype_chario); + /* Make sure we got a buffer + */ + if (viochar == NULL) { + hvlog("Yikes...can't get viochar buffer"); + spin_unlock_irqrestore(&consolelock, flags); + return -1; + } + + initDataEvent(viochar, lp); + + /* Got the lock, don't cause console output */ + while ((bleft > 0) && + (overflow[port].bufferUsed == 0) && + ((sndMsgSeq[port] - sndMsgAck[port]) < viochar_window)) { + if (bleft > VIOCHAR_MAX_DATA) + curlen = VIOCHAR_MAX_DATA; + else + curlen = bleft; + + viochar->immediateDataLen = curlen; + viochar->event.xCorrelationToken = sndMsgSeq[port]++; + + if (userFlag) + copy_from_user(viochar->immediateData, curbuf, + curlen); + else + memcpy(viochar->immediateData, curbuf, curlen); + + viochar->event.xSizeMinus1 = + offsetof(struct viocharlpevent, + immediateData) + curlen; + + hvrc = HvCallEvent_signalLpEvent(&viochar->event); + if (hvrc) { + /* + * MUST unlock the spinlock before doing a printk + */ + vio_free_event_buffer(viomajorsubtype_chario, + viochar); + spin_unlock_irqrestore(&consolelock, flags); + + hvlog("viocons: error sending event! %d\n", + (int) hvrc); + return len - bleft; + } + + curbuf += curlen; + bleft -= curlen; + } + + /* + * If we didn't send it all, buffer it + */ + if (bleft > 0) { + bleft -= bufferAdd(port, curbuf, bleft, userFlag); + } + vio_free_event_buffer(viomajorsubtype_chario, viochar); + spin_unlock_irqrestore(&consolelock, flags); + + return len - bleft; +} + +/* Initialize the common fields in a charLpEvent + */ +static void initDataEvent(struct viocharlpevent *viochar, HvLpIndex lp) +{ + memset(viochar, 0x00, sizeof(struct viocharlpevent)); + + viochar->event.xFlags.xValid = 1; + viochar->event.xFlags.xFunction = HvLpEvent_Function_Int; + viochar->event.xFlags.xAckInd = HvLpEvent_AckInd_NoAck; + viochar->event.xFlags.xAckType = HvLpEvent_AckType_DeferredAck; + viochar->event.xType = HvLpEvent_Type_VirtualIo; + viochar->event.xSubtype = viomajorsubtype_chario | viochardata; + viochar->event.xSourceLp = HvLpConfig_getLpIndex(); + viochar->event.xTargetLp = lp; + viochar->event.xSizeMinus1 = sizeof(struct viocharlpevent); + viochar->event.xSourceInstanceId = viopath_sourceinst(lp); + viochar->event.xTargetInstanceId = viopath_targetinst(lp); +} + + +/* console device write + */ +static void viocons_write(struct console *co, const char *s, + unsigned count) +{ + /* This parser will ensure that all single instances of either \n or \r are + * matched into carriage return/line feed combinations. It also allows for + * instances where there already exist \n\r combinations as well as the + * reverse, \r\n combinations. + */ + + int index; + char charptr[1]; + int foundcr; + int slicebegin; + int sliceend; + + foundcr = 0; + slicebegin = 0; + sliceend = 0; + + for (index = 0; index < count; index++) { + if (!foundcr && s[index] == 0x0a) { + if ((slicebegin - sliceend > 0) + && sliceend < count) { + internal_write(NULL, &s[slicebegin], + sliceend - slicebegin, 0); + slicebegin = sliceend; + } + charptr[0] = '\r'; + internal_write(NULL, charptr, 1, 0); + } + if (foundcr && s[index] != 0x0a) { + if ((index - 2) >= 0) { + if (s[index - 2] != 0x0a) { + internal_write(NULL, + &s[slicebegin], + sliceend - + slicebegin, 0); + slicebegin = sliceend; + charptr[0] = '\n'; + internal_write(NULL, charptr, 1, + 0); + } + } + } + sliceend++; + + if (s[index] == 0x0d) + foundcr = 1; + else + foundcr = 0; + } + + internal_write(NULL, &s[slicebegin], sliceend - slicebegin, 0); + + if (count > 1) { + if (foundcr == 1 && s[count - 1] != 0x0a) { + charptr[0] = '\n'; + internal_write(NULL, charptr, 1, 0); + } else if (s[count - 1] == 0x0a && s[count - 2] != 0x0d) { + + charptr[0] = '\r'; + internal_write(NULL, charptr, 1, 0); + } + } +} + +/* Work out a the device associate with this console + */ +static kdev_t viocons_device(struct console *c) +{ + return MKDEV(TTY_MAJOR, c->index + viotty_driver.minor_start); +} + +/* console device read method + */ +static int viocons_read(struct console *co, const char *s, unsigned count) +{ + printk(KERN_DEBUG_VIO "viocons_read\n"); + // Implement me + interruptible_sleep_on(&viocons_wait_queue); + return 0; +} + +/* console device wait until a key is pressed + */ +static int viocons_wait_key(struct console *co) +{ + printk(KERN_DEBUG_VIO "In viocons_wait_key\n"); + // Implement me + interruptible_sleep_on(&viocons_wait_queue); + return 0; +} + +/* Do console device setup + */ +static int __init viocons_setup(struct console *co, char *options) +{ + return 0; +} + +/* console device I/O methods + */ +static struct console viocons = { + name:"ttyS", + write:viocons_write, + read:viocons_read, + device:viocons_device, + wait_key:viocons_wait_key, + setup:viocons_setup, + flags:CON_PRINTBUFFER, +}; + + +/* TTY Open method + */ +static int viotty_open(struct tty_struct *tty, struct file *filp) +{ + int port; + unsigned long flags; + MOD_INC_USE_COUNT; + port = MINOR(tty->device) - tty->driver.minor_start; + + if (port >= VIOTTY_SERIAL_START) + port -= VIOTTY_SERIAL_START; + + if ((port < 0) || (port >= VTTY_PORTS)) { + MOD_DEC_USE_COUNT; + return -ENODEV; + } + + spin_lock_irqsave(&consolelock, flags); + + /* + * If some other TTY is already connected here, reject the open + */ + if ((port_info[port].tty) && (port_info[port].tty != tty)) { + spin_unlock_irqrestore(&consolelock, flags); + MOD_DEC_USE_COUNT; + printk(KERN_WARNING_VIO + "console attempt to open device twice from different ttys\n"); + return -EBUSY; + } + tty->driver_data = &port_info[port]; + port_info[port].tty = tty; + spin_unlock_irqrestore(&consolelock, flags); + + return 0; +} + +/* TTY Close method + */ +static void viotty_close(struct tty_struct *tty, struct file *filp) +{ + unsigned long flags; + struct port_info_tag *pi = + (struct port_info_tag *) tty->driver_data; + + if (!pi || viotty_paranoia_check(pi, tty->device, "viotty_close")) + return; + + spin_lock_irqsave(&consolelock, flags); + if (tty->count == 1) { + pi->tty = NULL; + } + + spin_unlock_irqrestore(&consolelock, flags); + + MOD_DEC_USE_COUNT; +} + +/* TTY Write method + */ +static int viotty_write(struct tty_struct *tty, int from_user, + const unsigned char *buf, int count) +{ + return internal_write(tty, buf, count, from_user); +} + +/* TTY put_char method + */ +static void viotty_put_char(struct tty_struct *tty, unsigned char ch) +{ + internal_write(tty, &ch, 1, 0); +} + +/* TTY flush_chars method + */ +static void viotty_flush_chars(struct tty_struct *tty) +{ +} + +/* TTY write_room method + */ +static int viotty_write_room(struct tty_struct *tty) +{ + int i; + int room = 0; + struct port_info_tag *pi = + (struct port_info_tag *) tty->driver_data; + + if (!pi + || viotty_paranoia_check(pi, tty->device, + "viotty_sendbuffers")) + return 0; + + // If no buffers are used, return the max size + if (overflow[pi->port].bufferUsed == 0) + return VIOCHAR_MAX_DATA * NUM_BUF; + + for (i = 0; ((i < NUM_BUF) && (room < VIOCHAR_MAX_DATA)); i++) { + room += + (OVERFLOW_SIZE - overflow[pi->port].bufferBytes[i]); + } + + if (room > VIOCHAR_MAX_DATA) + return VIOCHAR_MAX_DATA; + else + return room; +} + +/* TTY chars_in_buffer_room method + */ +static int viotty_chars_in_buffer(struct tty_struct *tty) +{ + return 0; +} + +static void viotty_flush_buffer(struct tty_struct *tty) +{ +} + +static int viotty_ioctl(struct tty_struct *tty, struct file *file, + unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + /* the ioctls below read/set the flags usually shown in the leds */ + /* don't use them - they will go away without warning */ + case KDGETLED: + case KDGKBLED: + return put_user(0, (char *) arg); + + case KDSKBLED: + return 0; + } + + return n_tty_ioctl(tty, file, cmd, arg); +} + +static void viotty_throttle(struct tty_struct *tty) +{ +} + +static void viotty_unthrottle(struct tty_struct *tty) +{ +} + +static void viotty_set_termios(struct tty_struct *tty, + struct termios *old_termios) +{ +} + +static void viotty_stop(struct tty_struct *tty) +{ +} + +static void viotty_start(struct tty_struct *tty) +{ +} + +static void viotty_hangup(struct tty_struct *tty) +{ +} + +static void viotty_break(struct tty_struct *tty, int break_state) +{ +} + +static void viotty_send_xchar(struct tty_struct *tty, char ch) +{ +} + +static void viotty_wait_until_sent(struct tty_struct *tty, int timeout) +{ +} + +/* Handle an open charLpEvent. Could be either interrupt or ack + */ +static void vioHandleOpenEvent(struct HvLpEvent *event) +{ + unsigned long flags; + u8 eventRc; + u16 eventSubtypeRc; + struct viocharlpevent *cevent = (struct viocharlpevent *) event; + u8 port = cevent->virtualDevice; + + if (event->xFlags.xFunction == HvLpEvent_Function_Ack) { + if (port >= VTTY_PORTS) + return; + + spin_lock_irqsave(&consolelock, flags); + /* Got the lock, don't cause console output */ + + if (event->xRc == HvLpEvent_Rc_Good) { + sndMsgSeq[port] = sndMsgAck[port] = 0; + } + + port_info[port].lp = event->xTargetLp; + + spin_unlock_irqrestore(&consolelock, flags); + + if (event->xCorrelationToken != 0) { + unsigned long semptr = event->xCorrelationToken; + up((struct semaphore *) semptr); + } else + printk(KERN_WARNING_VIO + "console: wierd...got open ack without semaphore\n"); + } else { + /* This had better require an ack, otherwise complain + */ + if (event->xFlags.xAckInd != HvLpEvent_AckInd_DoAck) { + printk(KERN_WARNING_VIO + "console: viocharopen without ack bit!\n"); + return; + } + + spin_lock_irqsave(&consolelock, flags); + /* Got the lock, don't cause console output */ + + /* Make sure this is a good virtual tty */ + if (port >= VTTY_PORTS) { + eventRc = HvLpEvent_Rc_SubtypeError; + eventSubtypeRc = viorc_openRejected; + } + + /* If this is tty is already connected to a different + partition, fail */ + else if ((port_info[port].lp != HvLpIndexInvalid) && + (port_info[port].lp != event->xSourceLp)) { + eventRc = HvLpEvent_Rc_SubtypeError; + eventSubtypeRc = viorc_openRejected; + } else { + port_info[port].lp = event->xSourceLp; + eventRc = HvLpEvent_Rc_Good; + eventSubtypeRc = viorc_good; + sndMsgSeq[port] = sndMsgAck[port] = 0; + } + + spin_unlock_irqrestore(&consolelock, flags); + + /* Return the acknowledgement */ + HvCallEvent_ackLpEvent(event); + } +} + +/* Handle a close open charLpEvent. Could be either interrupt or ack + */ +static void vioHandleCloseEvent(struct HvLpEvent *event) +{ + unsigned long flags; + struct viocharlpevent *cevent = (struct viocharlpevent *) event; + u8 port = cevent->virtualDevice; + + if (event->xFlags.xFunction == HvLpEvent_Function_Int) { + if (port >= VTTY_PORTS) + return; + + /* For closes, just mark the console partition invalid */ + spin_lock_irqsave(&consolelock, flags); + /* Got the lock, don't cause console output */ + + if (port_info[port].lp == event->xSourceLp) + port_info[port].lp = HvLpIndexInvalid; + + spin_unlock_irqrestore(&consolelock, flags); + printk(KERN_INFO_VIO + "console close from %d\n", event->xSourceLp); + } else { + printk(KERN_WARNING_VIO + "console got unexpected close acknowlegement\n"); + } +} + +/* Handle a config charLpEvent. Could be either interrupt or ack + */ +static void vioHandleConfig(struct HvLpEvent *event) +{ + struct viocharlpevent *cevent = (struct viocharlpevent *) event; + int len; + + len = cevent->immediateDataLen; + HvCall_writeLogBuffer(cevent->immediateData, + cevent->immediateDataLen); + + if (cevent->immediateData[0] == 0x01) { + printk(KERN_INFO_VIO + "console window resized to %d: %d: %d: %d\n", + cevent->immediateData[1], + cevent->immediateData[2], + cevent->immediateData[3], cevent->immediateData[4]); + } else { + printk(KERN_WARNING_VIO "console unknown config event\n"); + } + return; +} + +/* Handle a data charLpEvent. + */ +static void vioHandleData(struct HvLpEvent *event) +{ + struct tty_struct *tty; + struct viocharlpevent *cevent = (struct viocharlpevent *) event; + struct port_info_tag *pi; + int len; + u8 port = cevent->virtualDevice; + + if (port >= VTTY_PORTS) { + printk(KERN_WARNING_VIO + "console data on invalid virtual device %d\n", + port); + return; + } + + tty = port_info[port].tty; + + if (tty == NULL) { + printk(KERN_WARNING_VIO + "no tty for virtual device %d\n", port); + return; + } + + if (tty->magic != TTY_MAGIC) { + printk(KERN_WARNING_VIO "tty bad magic\n"); + return; + } + + /* + * Just to be paranoid, make sure the tty points back to this port + */ + pi = (struct port_info_tag *) tty->driver_data; + + if (!pi || viotty_paranoia_check(pi, tty->device, "vioHandleData")) + return; + + len = cevent->immediateDataLen; + + if (len == 0) + return; + + /* + * Log port 0 data to the hypervisor log + */ + if (port == 0) + HvCall_writeLogBuffer(cevent->immediateData, + cevent->immediateDataLen); + + /* Don't copy more bytes than there is room for in the buffer */ + if (tty->flip.count + len > TTY_FLIPBUF_SIZE) { + len = TTY_FLIPBUF_SIZE - tty->flip.count; + printk(KERN_WARNING_VIO + "console input buffer overflow!\n"); + } + + memcpy(tty->flip.char_buf_ptr, cevent->immediateData, len); + memset(tty->flip.flag_buf_ptr, TTY_NORMAL, len); + + /* Update the kernel buffer end */ + tty->flip.count += len; + tty->flip.char_buf_ptr += len; + + tty->flip.flag_buf_ptr += len; + + tty_flip_buffer_push(tty); +} + +/* Handle an ack charLpEvent. + */ +static void vioHandleAck(struct HvLpEvent *event) +{ + struct viocharlpevent *cevent = (struct viocharlpevent *) event; + unsigned long flags; + u8 port = cevent->virtualDevice; + + if (port >= VTTY_PORTS) { + printk(KERN_WARNING_VIO + "viocons: data on invalid virtual device\n"); + return; + } + + spin_lock_irqsave(&consolelock, flags); + sndMsgAck[port] = event->xCorrelationToken; + spin_unlock_irqrestore(&consolelock, flags); + + if (overflow[port].bufferUsed) + sendBuffers(port, port_info[port].lp); +} + +/* Handle charLpEvents and route to the appropriate routine + */ +static void vioHandleCharEvent(struct HvLpEvent *event) +{ + int charminor; + + if (event == NULL) { + return; + } + charminor = event->xSubtype & VIOMINOR_SUBTYPE_MASK; + switch (charminor) { + case viocharopen: + vioHandleOpenEvent(event); + break; + case viocharclose: + vioHandleCloseEvent(event); + break; + case viochardata: + vioHandleData(event); + break; + case viocharack: + vioHandleAck(event); + break; + case viocharconfig: + vioHandleConfig(event); + break; + default: + if ((event->xFlags.xFunction == HvLpEvent_Function_Int) && + (event->xFlags.xAckInd == HvLpEvent_AckInd_DoAck)) { + event->xRc = HvLpEvent_Rc_InvalidSubtype; + HvCallEvent_ackLpEvent(event); + } + } +} + +/* Send an open event + */ +static int viocons_sendOpen(HvLpIndex remoteLp, u8 port, void *sem) +{ + return HvCallEvent_signalLpEventFast(remoteLp, + HvLpEvent_Type_VirtualIo, + viomajorsubtype_chario + | viocharopen, + HvLpEvent_AckInd_DoAck, + HvLpEvent_AckType_ImmediateAck, + viopath_sourceinst + (remoteLp), + viopath_targetinst + (remoteLp), + (u64) (unsigned long) + sem, VIOVERSION << 16, + ((u64) port << 48), 0, 0, 0); + +} + +int __init viocons_init2(void) +{ + DECLARE_MUTEX_LOCKED(Semaphore); + int rc; + + /* + * Now open to the primary LP + */ + printk(KERN_INFO_VIO "console open path to primary\n"); + rc = viopath_open(HvLpConfig_getPrimaryLpIndex(), viomajorsubtype_chario, viochar_window + 2); /* +2 for fudge */ + if (rc) { + printk(KERN_WARNING_VIO + "console error opening to primary %d\n", rc); + } + + if (viopath_hostLp == HvLpIndexInvalid) { + vio_set_hostlp(); + } + + /* + * And if the primary is not the same as the hosting LP, open to the + * hosting lp + */ + if ((viopath_hostLp != HvLpIndexInvalid) && + (viopath_hostLp != HvLpConfig_getPrimaryLpIndex())) { + printk(KERN_INFO_VIO + "console open path to hosting (%d)\n", + viopath_hostLp); + rc = viopath_open(viopath_hostLp, viomajorsubtype_chario, viochar_window + 2); /* +2 for fudge */ + if (rc) { + printk(KERN_WARNING_VIO + "console error opening to partition %d: %d\n", + viopath_hostLp, rc); + } + } + + if (vio_setHandler(viomajorsubtype_chario, vioHandleCharEvent) < 0) { + printk(KERN_WARNING_VIO + "Error seting handler for console events!\n"); + } + + printk(KERN_INFO_VIO "console major number is %d\n", TTY_MAJOR); + + /* First, try to open the console to the hosting lp. + * Wait on a semaphore for the response. + */ + if ((viopath_isactive(viopath_hostLp)) && + (viocons_sendOpen(viopath_hostLp, 0, &Semaphore) == 0)) { + printk(KERN_INFO_VIO + "opening console to hosting partition %d\n", + viopath_hostLp); + down(&Semaphore); + } + + /* + * If we don't have an active console, try the primary + */ + if ((!viopath_isactive(port_info[0].lp)) && + (viopath_isactive(HvLpConfig_getPrimaryLpIndex())) && + (viocons_sendOpen + (HvLpConfig_getPrimaryLpIndex(), 0, &Semaphore) == 0)) { + printk(KERN_INFO_VIO + "opening console to primary partition\n"); + down(&Semaphore); + } + + /* Initialize the tty_driver structure */ + memset(&viotty_driver, 0, sizeof(struct tty_driver)); + viotty_driver.magic = TTY_DRIVER_MAGIC; + viotty_driver.driver_name = "vioconsole"; +#if defined(CONFIG_DEVFS_FS) + viotty_driver.name = "tty%d"; +#else + viotty_driver.name = "tty"; +#endif + viotty_driver.major = TTY_MAJOR; + viotty_driver.minor_start = 1; + viotty_driver.name_base = 1; + viotty_driver.num = VTTY_PORTS; + viotty_driver.type = TTY_DRIVER_TYPE_CONSOLE; + viotty_driver.subtype = 1; + viotty_driver.init_termios = tty_std_termios; + viotty_driver.flags = + TTY_DRIVER_REAL_RAW | TTY_DRIVER_RESET_TERMIOS; + viotty_driver.refcount = &viotty_refcount; + viotty_driver.table = viotty_table; + viotty_driver.termios = viotty_termios; + viotty_driver.termios_locked = viotty_termios_locked; + + viotty_driver.open = viotty_open; + viotty_driver.close = viotty_close; + viotty_driver.write = viotty_write; + viotty_driver.put_char = viotty_put_char; + viotty_driver.flush_chars = viotty_flush_chars; + viotty_driver.write_room = viotty_write_room; + viotty_driver.chars_in_buffer = viotty_chars_in_buffer; + viotty_driver.flush_buffer = viotty_flush_buffer; + viotty_driver.ioctl = viotty_ioctl; + viotty_driver.throttle = viotty_throttle; + viotty_driver.unthrottle = viotty_unthrottle; + viotty_driver.set_termios = viotty_set_termios; + viotty_driver.stop = viotty_stop; + viotty_driver.start = viotty_start; + viotty_driver.hangup = viotty_hangup; + viotty_driver.break_ctl = viotty_break; + viotty_driver.send_xchar = viotty_send_xchar; + viotty_driver.wait_until_sent = viotty_wait_until_sent; + + viottyS_driver = viotty_driver; +#if defined(CONFIG_DEVFS_FS) + viottyS_driver.name = "ttyS%d"; +#else + viottyS_driver.name = "ttyS"; +#endif + viottyS_driver.major = TTY_MAJOR; + viottyS_driver.minor_start = VIOTTY_SERIAL_START; + viottyS_driver.type = TTY_DRIVER_TYPE_SERIAL; + viottyS_driver.table = viottyS_table; + viottyS_driver.termios = viottyS_termios; + viottyS_driver.termios_locked = viottyS_termios_locked; + + if (tty_register_driver(&viotty_driver)) { + printk(KERN_WARNING_VIO + "Couldn't register console driver\n"); + } + + if (tty_register_driver(&viottyS_driver)) { + printk(KERN_WARNING_VIO + "Couldn't register console S driver\n"); + } + /* Now create the vcs and vcsa devfs entries so mingetty works */ +#if defined(CONFIG_DEVFS_FS) + { + struct tty_driver temp_driver = viotty_driver; + int i; + + temp_driver.name = "vcs%d"; + for (i = 0; i < VTTY_PORTS; i++) + tty_register_devfs(&temp_driver, + 0, i + temp_driver.minor_start); + + temp_driver.name = "vcsa%d"; + for (i = 0; i < VTTY_PORTS; i++) + tty_register_devfs(&temp_driver, + 0, i + temp_driver.minor_start); + + // For compatibility with some earlier code only! + // This will go away!!! + temp_driver.name = "viocons/%d"; + temp_driver.name_base = 0; + for (i = 0; i < VTTY_PORTS; i++) + tty_register_devfs(&temp_driver, + 0, i + temp_driver.minor_start); + } +#endif + + /* + * Create the proc entry + */ + iSeries_proc_callback(&viocons_proc_init); + + return 0; +} + +void __init viocons_init(void) +{ + int i; + printk(KERN_INFO_VIO "registering console\n"); + + memset(&port_info, 0x00, sizeof(port_info)); + for (i = 0; i < VTTY_PORTS; i++) { + sndMsgSeq[i] = sndMsgAck[i] = 0; + port_info[i].port = i; + port_info[i].lp = HvLpIndexInvalid; + port_info[i].magic = VIOTTY_MAGIC; + } + + register_console(&viocons); + memset(overflow, 0x00, sizeof(overflow)); + debug = 0; + + HvCall_setLogBufferFormatAndCodepage(HvCall_LogBuffer_ASCII, 437); +} diff -uNr --exclude=CVS ../kernel.org/linux/drivers/iseries/viodasd.c linuxppc64_2_4/drivers/iseries/viodasd.c --- ../kernel.org/linux/drivers/iseries/viodasd.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/drivers/iseries/viodasd.c Wed Nov 14 13:42:56 2001 @@ -0,0 +1,1416 @@ +/* -*- linux-c -*- + * viodasd.c + * Authors: Dave Boutcher + * Ryan Arnold + * Colin Devilbiss + * + * (C) Copyright 2000 IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *************************************************************************** + * This routine provides access to disk space (termed "DASD" in historical + * IBM terms) owned and managed by an OS/400 partition running on the + * same box as this Linux partition. + * + * All disk operations are performed by sending messages back and forth to + * the OS/400 partition. + * + * This device driver can either use it's own major number, or it can + * pretend to be an IDE drive (Major #3). Currently it doesn't + * emulate all the other IDE majors. This is controlled with a + * CONFIG option. You can either call this an elegant solution to the + * fact that a lot of software doesn't recognize a new disk major number... + * or you can call this a really ugly hack. Your choice. + */ + +#include +#include + +/* Decide if we are using our own major or pretending to be an IDE drive + * + * If we are using our own majors, we only support 3 partitions per physical + * disk....so with minor numbers 0-255 we get a maximum of 64 disks. If we + * are emulating IDE, we get 16 partitions per disk, with a maximum of 16 + * disks + */ +#ifdef CONFIG_VIODASD_IDE +#define MAJOR_NR IDE0_MAJOR +#define PARTITION_SHIFT 6 +#define do_viodasd_request do_hd_request +static int numdsk = 16; +static int viodasd_max_disk = 16; +#define VIOD_DEVICE_NAME "hd" +#define VIOD_GENHD_NAME "hd" +#else +#define MAJOR_NR VIODASD_MAJOR +#define PARTITION_SHIFT 3 +static int numdsk = 32; +static int viodasd_max_disk = 32; +#define VIOD_DEVICE_NAME "viod" +#ifdef CONFIG_DEVFS_FS +#define VIOD_GENHD_NAME "viod" +#else +#define VIOD_GENHD_NAME "iSeries/vd" +#endif /* CONFIG_DEVFS */ +#endif /* CONFIG_VIODASD_IDE */ + +#define VIODASD_VERS "1.02" +#define LOCAL_END_REQUEST + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include "vio.h" +#include + +MODULE_DESCRIPTION("iSeries Virtual DASD"); +MODULE_AUTHOR("Dave Boutcher"); +MODULE_LICENSE("GPL"); + +#define VIOMAXREQ 16 +#define VIOMAXBLOCKDMA 12 + +extern struct pci_dev * iSeries_vio_dev; + +struct vioblocklpevent { + struct HvLpEvent event; + u32 mReserved1; + u16 mVersion; + u16 mSubTypeRc; + u16 mDisk; + u16 mFlags; + union { + struct { // Used during open + u64 mDiskLen; + u16 mMaxDisks; + u16 mCylinders; + u16 mTracks; + u16 mSectors; + u16 mBytesPerSector; + } openData; + struct { // Used during rw + u64 mOffset; + struct { + u32 mToken; + u32 reserved; + u64 mLen; + } dmaInfo[VIOMAXBLOCKDMA]; + } rwData; + + struct { + u64 changed; + } check; + } u; +}; + +#define vioblockflags_ro 0x0001 + +enum vioblocksubtype { + vioblockopen = 0x0001, + vioblockclose = 0x0002, + vioblockread = 0x0003, + vioblockwrite = 0x0004, + vioblockflush = 0x0005, + vioblockcheck = 0x0007 +}; + +/* In a perfect world we will perform better if we get page-aligned I/O + * requests, in multiples of pages. At least peg our block size fo the + * actual page size. + */ +static int blksize = HVPAGESIZE; /* in bytes */ + +static DECLARE_WAIT_QUEUE_HEAD(viodasd_wait); +struct viodasd_waitevent { + struct semaphore *sem; + int rc; + int changed; /* Used only for check_change */ +}; + +/* All our disk-related global structures + */ +static struct hd_struct *viodasd_partitions; +static int *viodasd_sizes; +static int *viodasd_sectsizes; +static int *viodasd_maxsectors; +extern struct gendisk viodasd_gendsk; + +/* Figure out the biggest I/O request (in sectors) we can accept + */ +#define VIODASD_MAXSECTORS (4096 / 512 * VIOMAXBLOCKDMA) + +/* Keep some statistics on what's happening for the PROC file system + */ +static struct { + long tot; + long nobh; + long ntce[VIOMAXBLOCKDMA]; +} viod_stats[64][2]; + +/* Number of disk I/O requests we've sent to OS/400 + */ +static int numReqOut; + +/* This is our internal structure for keeping track of disk devices + */ +struct viodasd_device { + int useCount; + u16 cylinders; + u16 tracks; + u16 sectors; + u16 bytesPerSector; + u64 size; + int readOnly; +} *viodasd_devices; + +/* When we get a disk I/O request we take it off the general request queue + * and put it here. + */ +static LIST_HEAD(reqlist); + +/* Handle reads from the proc file system + */ +static int proc_read(char *buf, char **start, off_t offset, + int blen, int *eof, void *data) +{ + int len = 0; + int i; + int j; + +#if defined(MODULE) + len += + sprintf(buf + len, + "viod Module opened %d times. Major number %d\n", + MOD_IN_USE, MAJOR_NR); +#endif + len += sprintf(buf + len, "viod %d devices\n", numdsk); + + for (i = 0; i < 16; i++) { + if (viod_stats[i][0].tot || viod_stats[i][1].tot) { + len += + sprintf(buf + len, + "DISK %2.2d: rd %-10.10ld wr %-10.10ld (no buffer list rd %-10.10ld wr %-10.10ld\n", + i, viod_stats[i][0].tot, + viod_stats[i][1].tot, + viod_stats[i][0].nobh, + viod_stats[i][1].nobh); + + len += sprintf(buf + len, "rd DMA: "); + + for (j = 0; j < VIOMAXBLOCKDMA; j++) + len += sprintf(buf + len, " [%2.2d] %ld", + j, + viod_stats[i][0].ntce[j]); + + len += sprintf(buf + len, "\nwr DMA: "); + + for (j = 0; j < VIOMAXBLOCKDMA; j++) + len += sprintf(buf + len, " [%2.2d] %ld", + j, + viod_stats[i][1].ntce[j]); + len += sprintf(buf + len, "\n"); + } + } + + *eof = 1; + return len; +} + +/* Handle writes to our proc file system + */ +static int proc_write(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + return count; +} + +/* setup our proc file system entries + */ +void viodasd_proc_init(struct proc_dir_entry *iSeries_proc) +{ + struct proc_dir_entry *ent; + ent = + create_proc_entry("viodasd", S_IFREG | S_IRUSR, iSeries_proc); + if (!ent) + return; + ent->nlink = 1; + ent->data = NULL; + ent->read_proc = proc_read; + ent->write_proc = proc_write; +} + +/* clean up our proc file system entries + */ +void viodasd_proc_delete(struct proc_dir_entry *iSeries_proc) +{ + remove_proc_entry("viodasd", iSeries_proc); +} + +/* End a request + */ +static void viodasd_end_request(struct request *req, int uptodate) +{ + + if (end_that_request_first(req, uptodate, VIOD_DEVICE_NAME)) + return; + + end_that_request_last(req); +} + +/* This rebuilds the partition information for a single disk device + */ +static int viodasd_revalidate(kdev_t dev) +{ + int i; + int device_no = DEVICE_NR(dev); + int part0 = (device_no << PARTITION_SHIFT); + int npart = (1 << PARTITION_SHIFT); + int minor; + kdev_t devp; + struct super_block *sb; + + if (viodasd_devices[device_no].size == 0) + return 0; + + for (i = npart - 1; i >= 0; i--) { + minor = part0 + i; + + if (viodasd_partitions[minor].nr_sects != 0) { + devp = MKDEV(MAJOR_NR, minor); + fsync_dev(devp); + + sb = get_super(devp); + if (sb) + invalidate_inodes(sb); + + invalidate_buffers(devp); + } + + viodasd_partitions[minor].start_sect = 0; + viodasd_partitions[minor].nr_sects = 0; + } + + grok_partitions(&viodasd_gendsk, device_no, npart, + viodasd_devices[device_no].size >> 9); + + return 0; +} + +/* This is the actual open code. It gets called from the external + * open entry point, as well as from the init code when we're figuring + * out what disks we have + */ +static int internal_open(int device_no) +{ + int i; + struct viodasd_waitevent we; + + HvLpEvent_Rc hvrc; + /* This semaphore is raised in the interrupt handler */ + DECLARE_MUTEX_LOCKED(Semaphore); + + /* Check that we are dealing with a valid hosting partition */ + if (viopath_hostLp == HvLpIndexInvalid) { + printk(KERN_WARNING_VIO "Invalid hosting partition\n"); + return -EIO; + } + + we.sem = &Semaphore; + + /* Send the open event to OS/400 */ + hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, + HvLpEvent_Type_VirtualIo, + viomajorsubtype_blockio | + vioblockopen, + HvLpEvent_AckInd_DoAck, + HvLpEvent_AckType_ImmediateAck, + viopath_sourceinst + (viopath_hostLp), + viopath_targetinst + (viopath_hostLp), + (u64) (unsigned long) &we, + VIOVERSION << 16, + ((u64) device_no << 48), 0, 0, + 0); + + if (hvrc != 0) { + printk(KERN_WARNING_VIO "bad rc on signalLpEvent %d\n", (int) hvrc); + return -EIO; + } + + /* Wait for the interrupt handler to get the response */ + down(&Semaphore); + + /* Check the return code */ + if (we.rc != 0) { + printk(KERN_WARNING_VIO "bad rc opening disk: %d\n", (int) we.rc); + return we.rc; + } + + /* If this is the first open of this device, update the device information */ + /* If this is NOT the first open, assume that it isn't changing */ + if (viodasd_devices[device_no].useCount == 0) { + if (viodasd_devices[device_no].size > 0) { + /* divide by 512 */ + u64 tmpint = viodasd_devices[device_no].size >> 9; + viodasd_partitions[device_no << PARTITION_SHIFT]. + nr_sects = tmpint; + /* Now the value divided by 1024 */ + tmpint = tmpint >> 1; + viodasd_sizes[device_no << PARTITION_SHIFT] = + tmpint; + + for (i = (device_no << PARTITION_SHIFT); + i < ((device_no + 1) << PARTITION_SHIFT); i++) + viodasd_sectsizes[i] = + viodasd_devices[device_no]. + bytesPerSector; + + } + } else { + /* If the size of the device changed, wierd things are happening! */ + if (viodasd_sizes[device_no << PARTITION_SHIFT] != + viodasd_devices[device_no].size >> 10) { + printk(KERN_WARNING_VIO + "disk size change (%dK to %dK) for device %d\n", + viodasd_sizes[device_no << PARTITION_SHIFT], + (int) viodasd_devices[device_no].size >> 10, + device_no); + } + } + + /* Bump the use count */ + viodasd_devices[device_no].useCount++; + + return 0; +} + +/* This is the actual release code. It gets called from the external + * release entry point, as well as from the init code when we're figuring + * out what disks we have + */ +static int internal_release(int device_no) +{ + /* Send the event to OS/400. We DON'T expect a response */ + HvLpEvent_Rc hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, + HvLpEvent_Type_VirtualIo, + viomajorsubtype_blockio + | vioblockclose, + HvLpEvent_AckInd_NoAck, + HvLpEvent_AckType_ImmediateAck, + viopath_sourceinst + (viopath_hostLp), + viopath_targetinst + (viopath_hostLp), + 0, + VIOVERSION << 16, + ((u64) device_no + << 48), + 0, 0, 0); + + viodasd_devices[device_no].useCount--; + + if (hvrc != 0) { + printk(KERN_WARNING_VIO "bad rc sending event to OS/400 %d\n", (int) hvrc); + return -EIO; + } + return 0; +} + +/* External open entry point. + */ +static int viodasd_open(struct inode *ino, struct file *fil) +{ + int device_no; + + /* Do a bunch of sanity checks */ + if (!ino) { + printk(KERN_WARNING_VIO "no inode provided in open\n"); + return -ENODEV; + } + + if (MAJOR(ino->i_rdev) != MAJOR_NR) { + printk(KERN_WARNING_VIO "Wierd error...wrong major number on open\n"); + return -ENODEV; + } + + device_no = DEVICE_NR(ino->i_rdev); + if (device_no > numdsk) { + printk(KERN_WARNING_VIO "Invalid minor device number %d in open\n", + device_no); + return -ENODEV; + } + + /* Call the actual open code */ + if (internal_open(device_no) == 0) { + if (fil && fil->f_mode) { + if (fil->f_mode & 2) { + if (viodasd_devices[device_no].readOnly) { + internal_release(device_no); + return -EROFS; + } + } + } + MOD_INC_USE_COUNT; + return 0; + } else { + return -EIO; + } +} + +/* External release entry point. + */ +static int viodasd_release(struct inode *ino, struct file *fil) +{ + int device_no; + + /* Do a bunch of sanity checks */ + if (!ino) { + printk(KERN_WARNING_VIO "no inode provided in release\n"); + return -ENODEV; + } + + if (MAJOR(ino->i_rdev) != MAJOR_NR) { + printk(KERN_WARNING_VIO + "Wierd error...wrong major number on release\n"); + return -ENODEV; + } + + device_no = DEVICE_NR(ino->i_rdev); + if (device_no > numdsk) { + return -ENODEV; + } + + /* Just to be paranoid, sync the device */ + fsync_dev(ino->i_rdev); + + /* Call the actual release code */ + internal_release(device_no); + + MOD_DEC_USE_COUNT; + return 0; +} + +/* External ioctl entry point. + */ +static int viodasd_ioctl(struct inode *ino, struct file *fil, + unsigned int cmd, unsigned long arg) +{ + int device_no; + int err; + HvLpEvent_Rc hvrc; + DECLARE_MUTEX_LOCKED(Semaphore); + + /* Sanity checks */ + if (!ino) { + printk(KERN_WARNING_VIO "no inode provided in ioctl\n"); + return -ENODEV; + } + + if (MAJOR(ino->i_rdev) != MAJOR_NR) { + printk(KERN_WARNING_VIO "Wierd error...wrong major number on ioctl\n"); + return -ENODEV; + } + + device_no = DEVICE_NR(ino->i_rdev); + if (device_no > numdsk) { + printk(KERN_WARNING_VIO "Invalid minor device number %d in ioctl\n", + device_no); + return -ENODEV; + } + + switch (cmd) { + case BLKGETSIZE: + /* return the device size in sectors */ + if (!arg) + return -EINVAL; + err = + verify_area(VERIFY_WRITE, (long *) arg, sizeof(long)); + if (err) + return err; + + put_user(viodasd_partitions[MINOR(ino->i_rdev)].nr_sects, + (long *) arg); + return 0; + + case FDFLUSH: + case BLKFLSBUF: + if (!suser()) + return -EACCES; + fsync_dev(ino->i_rdev); + invalidate_buffers(ino->i_rdev); + hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, + HvLpEvent_Type_VirtualIo, + viomajorsubtype_blockio + | vioblockflush, + HvLpEvent_AckInd_DoAck, + HvLpEvent_AckType_ImmediateAck, + viopath_sourceinst + (viopath_hostLp), + viopath_targetinst + (viopath_hostLp), + (u64) (unsigned long) + &Semaphore, + VIOVERSION << 16, + ((u64) device_no << + 48), 0, 0, 0); + + + if (hvrc != 0) { + printk(KERN_WARNING_VIO "bad rc on sync signalLpEvent %d\n", + (int) hvrc); + return -EIO; + } + + down(&Semaphore); + + return 0; + + case BLKRAGET: + if (!arg) + return -EINVAL; + err = + verify_area(VERIFY_WRITE, (long *) arg, sizeof(long)); + if (err) + return err; + put_user(read_ahead[MAJOR_NR], (long *) arg); + return 0; + + case BLKRASET: + if (!suser()) + return -EACCES; + if (arg > 0x00ff) + return -EINVAL; + read_ahead[MAJOR_NR] = arg; + return 0; + + case BLKRRPART: + viodasd_revalidate(ino->i_rdev); + return 0; + + case HDIO_GETGEO: + { + unsigned char sectors; + unsigned char heads; + unsigned short cylinders; + + struct hd_geometry *geo = + (struct hd_geometry *) arg; + if (geo == NULL) + return -EINVAL; + + err = verify_area(VERIFY_WRITE, geo, sizeof(*geo)); + if (err) + return err; + + sectors = viodasd_devices[device_no].sectors; + if (sectors == 0) + sectors = 32; + + heads = viodasd_devices[device_no].tracks; + if (heads == 0) + heads = 64; + + cylinders = viodasd_devices[device_no].cylinders; + if (cylinders == 0) + cylinders = + viodasd_partitions[MINOR(ino->i_rdev)]. + nr_sects / (sectors * heads); + + put_user(sectors, &geo->sectors); + put_user(heads, &geo->heads); + put_user(cylinders, &geo->cylinders); + + put_user(viodasd_partitions[MINOR(ino->i_rdev)]. + start_sect, (long *) &geo->start); + + return 0; + } + +#define PRTIOC(x) case x: printk(KERN_WARNING_VIO "got unsupported FD ioctl " #x "\n"); \ + return -EINVAL; + + PRTIOC(FDCLRPRM); + PRTIOC(FDSETPRM); + PRTIOC(FDDEFPRM); + PRTIOC(FDGETPRM); + PRTIOC(FDMSGON); + PRTIOC(FDMSGOFF); + PRTIOC(FDFMTBEG); + PRTIOC(FDFMTTRK); + PRTIOC(FDFMTEND); + PRTIOC(FDSETEMSGTRESH); + PRTIOC(FDSETMAXERRS); + PRTIOC(FDGETMAXERRS); + PRTIOC(FDGETDRVTYP); + PRTIOC(FDSETDRVPRM); + PRTIOC(FDGETDRVPRM); + PRTIOC(FDGETDRVSTAT); + PRTIOC(FDPOLLDRVSTAT); + PRTIOC(FDRESET); + PRTIOC(FDGETFDCSTAT); + PRTIOC(FDWERRORCLR); + PRTIOC(FDWERRORGET); + PRTIOC(FDRAWCMD); + PRTIOC(FDEJECT); + PRTIOC(FDTWADDLE); + + } + + return -EINVAL; +} + +/* Send an actual I/O request to OS/400 + */ +static int send_request(struct request *req) +{ + u64 sect_size; + u64 start; + u64 len; + int direction; + int nsg; + u16 viocmd; + HvLpEvent_Rc hvrc; + struct vioblocklpevent *bevent; + struct scatterlist sg[VIOMAXBLOCKDMA]; + struct buffer_head *bh; + int sgindex; + int device_no = DEVICE_NR(req->rq_dev); + int statindex; + + /* Note that this SHOULD always be 512...but lets be architecturally correct */ + sect_size = hardsect_size[MAJOR_NR][device_no]; + + /* Figure out teh starting sector and length */ + start = + (req->sector + + viodasd_partitions[MINOR(req->rq_dev)].start_sect) * + sect_size; + len = req->nr_sectors * sect_size; + + /* More paranoia checks */ + if ((req->sector + req->nr_sectors) > + (viodasd_partitions[MINOR(req->rq_dev)].start_sect + + viodasd_partitions[MINOR(req->rq_dev)].nr_sects)) { + printk(KERN_WARNING_VIO "Invalid request offset & length\n"); + printk(KERN_WARNING_VIO "req->sector: %ld, req->nr_sectors: %ld\n", + req->sector, req->nr_sectors); + printk(KERN_WARNING_VIO "RQ_DEV: %d, minor: %d\n", req->rq_dev, + MINOR(req->rq_dev)); + return -1; + } + + if (req->cmd == READ) { + direction = PCI_DMA_FROMDEVICE; + viocmd = viomajorsubtype_blockio | vioblockread; + statindex = 0; + } else { + direction = PCI_DMA_TODEVICE; + viocmd = viomajorsubtype_blockio | vioblockwrite; + statindex = 1; + } + + /* Update totals */ + viod_stats[device_no][statindex].tot++; + + /* Now build the scatter-gather list */ + memset(&sg, 0x00, sizeof(sg)); + sgindex = 0; + + /* See if this is a swap I/O (without a bh pointer) or a regular I/O */ + if (req->bh) { + /* OK...this loop takes buffers from the request and adds them to the SG + until we're done, or until we hit a maximum. If we hit a maximum we'll + just finish this request later */ + bh = req->bh; + while ((bh) && (sgindex < VIOMAXBLOCKDMA)) { + sg[sgindex].address = bh->b_data; + sg[sgindex].length = bh->b_size; + + sgindex++; + bh = bh->b_reqnext; + } + nsg = pci_map_sg(iSeries_vio_dev, sg, sgindex, direction); + if ((nsg == 0) || (sg[0].dma_length == 0) + || (sg[0].dma_address == 0xFFFFFFFF)) { + printk(KERN_WARNING_VIO "error getting sg tces\n"); + return -1; + } + + } else { + /* Update stats */ + viod_stats[device_no][statindex].nobh++; + + sg[0].dma_address = pci_map_single(iSeries_vio_dev, req->buffer, + len, direction); + if (sg[0].dma_address == 0xFFFFFFFF) { + printk(KERN_WARNING_VIO + "error allocating tce for address %p len %ld\n", + req->buffer, (long) len); + return -1; + } + sg[0].dma_length = len; + nsg = 1; + } + + /* Update stats */ + viod_stats[device_no][statindex].ntce[sgindex]++; + + /* This optimization handles a single DMA block */ + if (sgindex == 1) { + /* Send the open event to OS/400 */ + hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, + HvLpEvent_Type_VirtualIo, + viomajorsubtype_blockio + | viocmd, + HvLpEvent_AckInd_DoAck, + HvLpEvent_AckType_ImmediateAck, + viopath_sourceinst + (viopath_hostLp), + viopath_targetinst + (viopath_hostLp), + (u64) (unsigned long) + req->buffer, + VIOVERSION << 16, + ((u64) device_no << + 48), start, + ((u64) sg[0]. + dma_address) << 32, + sg[0].dma_length); + } else { + bevent = (struct vioblocklpevent *) vio_get_event_buffer(viomajorsubtype_blockio); + if (bevent == NULL) { + printk(KERN_WARNING_VIO + "error allocating disk event buffer\n"); + return -1; + } + + /* Now build up the actual request. Note that we store the pointer */ + /* to the request buffer in the correlation token so we can match */ + /* this response up later */ + memset(bevent, 0x00, sizeof(struct vioblocklpevent)); + bevent->event.xFlags.xValid = 1; + bevent->event.xFlags.xFunction = HvLpEvent_Function_Int; + bevent->event.xFlags.xAckInd = HvLpEvent_AckInd_DoAck; + bevent->event.xFlags.xAckType = + HvLpEvent_AckType_ImmediateAck; + bevent->event.xType = HvLpEvent_Type_VirtualIo; + bevent->event.xSubtype = viocmd; + bevent->event.xSourceLp = HvLpConfig_getLpIndex(); + bevent->event.xTargetLp = viopath_hostLp; + bevent->event.xSizeMinus1 = + offsetof(struct vioblocklpevent, + u.rwData.dmaInfo) + + (sizeof(bevent->u.rwData.dmaInfo[0]) * (sgindex)) - 1; + bevent->event.xSizeMinus1 = + sizeof(struct vioblocklpevent) - 1; + bevent->event.xSourceInstanceId = + viopath_sourceinst(viopath_hostLp); + bevent->event.xTargetInstanceId = + viopath_targetinst(viopath_hostLp); + bevent->event.xCorrelationToken = + (u64) (unsigned long) req->buffer; + bevent->mVersion = VIOVERSION; + bevent->mDisk = device_no; + bevent->u.rwData.mOffset = start; + + /* Copy just the dma information from the sg list into the request */ + for (sgindex = 0; sgindex < nsg; sgindex++) { + bevent->u.rwData.dmaInfo[sgindex].mToken = + sg[sgindex].dma_address; + bevent->u.rwData.dmaInfo[sgindex].mLen = + sg[sgindex].dma_length; + } + + /* Send the request */ + hvrc = HvCallEvent_signalLpEvent(&bevent->event); + vio_free_event_buffer(viomajorsubtype_blockio, bevent); + } + + if (hvrc != HvLpEvent_Rc_Good) { + printk(KERN_WARNING_VIO "error sending disk event to OS/400 (rcp %d)\n", (int) hvrc); + return -1; + } else { + /* If the request was successful, bump the number of outstanding */ + numReqOut++; + } + return 0; +} + +/* This is the external request processing routine + */ +static void do_viodasd_request(request_queue_t * q) +{ + int device_no; + struct request *req; + for (;;) { + + INIT_REQUEST; + + device_no = CURRENT_DEV; + if (device_no > numdsk) { + printk(KERN_WARNING_VIO "Invalid device # %d\n", CURRENT_DEV); + viodasd_end_request(CURRENT, 0); + continue; + } + + if (viodasd_gendsk.sizes == NULL) { + printk(KERN_WARNING_VIO + "Ouch! viodasd_gendsk.sizes is NULL\n"); + viodasd_end_request(CURRENT, 0); + continue; + } + + /* If the queue is plugged, don't dequeue anything right now */ + if ((q) && (q->plugged)) { + return; + } + + /* If we already have the maximum number of requests outstanding to OS/400 + just bail out. We'll come back later */ + if (numReqOut >= VIOMAXREQ) + return; + + /* get the current request, then dequeue it from the queue */ + req = CURRENT; + blkdev_dequeue_request(req); + + /* Try sending the request */ + if (send_request(req) == 0) { + list_add_tail(&req->queue, &reqlist); + } else { + viodasd_end_request(req, 0); + } + } +} + +/* Check for changed disks + */ +static int viodasd_check_change(kdev_t dev) +{ + struct viodasd_waitevent we; + HvLpEvent_Rc hvrc; + int device_no = DEVICE_NR(dev); + + /* This semaphore is raised in the interrupt handler */ + DECLARE_MUTEX_LOCKED(Semaphore); + + /* Check that we are dealing with a valid hosting partition */ + if (viopath_hostLp == HvLpIndexInvalid) { + printk(KERN_WARNING_VIO "Invalid hosting partition\n"); + return -EIO; + } + + we.sem = &Semaphore; + + /* Send the open event to OS/400 */ + hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, + HvLpEvent_Type_VirtualIo, + viomajorsubtype_blockio | + vioblockcheck, + HvLpEvent_AckInd_DoAck, + HvLpEvent_AckType_ImmediateAck, + viopath_sourceinst + (viopath_hostLp), + viopath_targetinst + (viopath_hostLp), + (u64) (unsigned long) &we, + VIOVERSION << 16, + ((u64) device_no << 48), 0, 0, + 0); + + if (hvrc != 0) { + printk(KERN_WARNING_VIO "bad rc on signalLpEvent %d\n", (int) hvrc); + return -EIO; + } + + /* Wait for the interrupt handler to get the response */ + down(&Semaphore); + + /* Check the return code. If bad, assume no change */ + if (we.rc != 0) { + printk(KERN_WARNING_VIO "bad rc on check_change. Assuming no change\n"); + return 0; + } + + return we.changed; +} + +/* Our file operations table + */ +static struct block_device_operations viodasd_fops = { + open:viodasd_open, + release:viodasd_release, + ioctl:viodasd_ioctl, + check_media_change:viodasd_check_change, + revalidate:viodasd_revalidate +}; + +/* Our gendisk table + */ +struct gendisk viodasd_gendsk = { + 0, /* major - fill in later */ + "viodasd", + PARTITION_SHIFT, + 1 << PARTITION_SHIFT, + NULL, /* partition array - fill in later */ + NULL, /* block sizes - fill in later */ + 0, /* # units */ + NULL, /* "real device" pointer */ + NULL, /* next */ + &viodasd_fops /* operations */ +}; + +/* This routine handles incoming block LP events + */ +static void vioHandleBlockEvent(struct HvLpEvent *event) +{ + struct scatterlist sg[VIOMAXBLOCKDMA]; + struct vioblocklpevent *bevent = (struct vioblocklpevent *) event; + int nsect; + struct request *req; + int i; + struct viodasd_waitevent *pwe; + unsigned long flags; + int maxsg; + + if (event == NULL) { + /* Notification that a partition went away! */ + return; + } + // First, we should NEVER get an int here...only acks + if (event->xFlags.xFunction == HvLpEvent_Function_Int) { + printk(KERN_WARNING_VIO + "Yikes! got an int in viodasd event handler!\n"); + if (event->xFlags.xAckInd == HvLpEvent_AckInd_DoAck) { + event->xRc = HvLpEvent_Rc_InvalidSubtype; + HvCallEvent_ackLpEvent(event); + } + } + + switch (event->xSubtype & VIOMINOR_SUBTYPE_MASK) { + + /* Handle a response to an open request. We get all the disk information + * in the response, so update it. The correlation token contains a pointer to + * a waitevent structure that has a semaphore in it. update the return code + * in the waitevent structure and post the semaphore to wake up the guy who + * sent the request */ + case vioblockopen: + pwe = + (struct viodasd_waitevent *) (unsigned long) event-> + xCorrelationToken; + pwe->rc = event->xRc; + if (event->xRc == HvLpEvent_Rc_Good) { + viodasd_devices[bevent->mDisk].size = + bevent->u.openData.mDiskLen; + viodasd_devices[bevent->mDisk].cylinders = + bevent->u.openData.mCylinders; + viodasd_devices[bevent->mDisk].tracks = + bevent->u.openData.mTracks; + viodasd_devices[bevent->mDisk].sectors = + bevent->u.openData.mSectors; + viodasd_devices[bevent->mDisk].bytesPerSector = + bevent->u.openData.mBytesPerSector; + viodasd_devices[bevent->mDisk].readOnly = + bevent->mFlags & vioblockflags_ro; + + if (viodasd_max_disk != + bevent->u.openData.mMaxDisks) { + viodasd_max_disk = + bevent->u.openData.mMaxDisks; + } + } + up(pwe->sem); + break; + + case vioblockclose: + break; + + /* For read and write requests, decrement the number of outstanding requests, + * Free the DMA buffers we allocated, and find the matching request by + * using the buffer pointer we stored in the correlation token. + */ + case vioblockread: + case vioblockwrite: + + /* Free the DMA buffers */ + i = 0; + nsect = 0; + memset(sg, 0x00, sizeof(sg)); + + maxsg = (((bevent->event.xSizeMinus1 + 1) - + offsetof(struct vioblocklpevent, + u.rwData.dmaInfo)) / + sizeof(bevent->u.rwData.dmaInfo[0])); + + + while ((i < maxsg) && + (bevent->u.rwData.dmaInfo[i].mLen > 0) && + (i < VIOMAXBLOCKDMA)) { + sg[i].dma_address = + bevent->u.rwData.dmaInfo[i].mToken; + sg[i].dma_length = + bevent->u.rwData.dmaInfo[i].mLen; + nsect += bevent->u.rwData.dmaInfo[i].mLen; + i++; + } + + pci_unmap_sg(iSeries_vio_dev, + sg, + i, + (bevent->event.xSubtype == + (viomajorsubtype_blockio | vioblockread)) ? + PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); + + + /* Since this is running in interrupt mode, we need to make sure we're not + * stepping on any global I/O operations + */ + spin_lock_irqsave(&io_request_lock, flags); + + /* Decrement the number of outstanding requests */ + numReqOut--; + + /* Now find the matching request in OUR list (remember we moved the request + * from the global list to our list when we got it) + */ + req = blkdev_entry_to_request(reqlist.next); + while ((&req->queue != &reqlist) && + ((u64) (unsigned long) req->buffer != + bevent->event.xCorrelationToken)) + req = blkdev_entry_to_request(req->queue.next); + + if (&req->queue == &reqlist) { + printk(KERN_WARNING_VIO + "Yikes! Could not find matching buffer %p in reqlist\n", + req->buffer); + break; + } + + /* Remove the request from our list */ + list_del(&req->queue); + + /* Calculate the number of sectors from the length in bytes */ + nsect = nsect >> 9; + if (!req->bh) { + if (event->xRc != HvLpEvent_Rc_Good) { + printk(KERN_WARNING_VIO "read/wrute error %d:%d\n", event->xRc, + bevent->mSubTypeRc); + viodasd_end_request(req, 0); + } else { + if (nsect != req->current_nr_sectors) { + printk(KERN_WARNING_VIO + "Yikes...non bh i/o # sect doesn't match!!!\n"); + } + viodasd_end_request(req, 1); + } + } else { + while ((nsect > 0) && (req->bh)) { + nsect -= req->current_nr_sectors; + viodasd_end_request(req, 1); + } + if (nsect) { + printk(KERN_WARNING_VIO + "Yikes...sectors left over on a request!!!\n"); + } + + /* If the original request could not handle all the buffers, re-send + * the request + */ + if (req->bh) { + if (send_request(req) == 0) { + list_add_tail(&req->queue, + &reqlist); + } else { + viodasd_end_request(req, 0); + } + } + + } + + /* Finally, send more requests */ + do_viodasd_request(NULL); + + spin_unlock_irqrestore(&io_request_lock, flags); + break; + + case vioblockflush: + up((void *) (unsigned long) event->xCorrelationToken); + break; + + case vioblockcheck: + pwe = + (struct viodasd_waitevent *) (unsigned long) event-> + xCorrelationToken; + pwe->rc = event->xRc; + pwe->changed = bevent->u.check.changed; + up(pwe->sem); + break; + + default: + printk(KERN_WARNING_VIO "invalid subtype!"); + if (event->xFlags.xAckInd == HvLpEvent_AckInd_DoAck) { + event->xRc = HvLpEvent_Rc_InvalidSubtype; + HvCallEvent_ackLpEvent(event); + } + } +} + +/* This routine tries to clean up anything we allocated/registered + */ +static void cleanup2(void) +{ + int i; + +#define CLEANIT(x) if (x) {kfree(x); x=NULL;} + + for (i = 0; i < numdsk; i++) + fsync_dev(MKDEV(MAJOR_NR, i)); + + read_ahead[MAJOR_NR] = 0; + + CLEANIT(viodasd_devices); + CLEANIT(blk_size[MAJOR_NR]); + CLEANIT(blksize_size[MAJOR_NR]); + CLEANIT(hardsect_size[MAJOR_NR]); + CLEANIT(max_sectors[MAJOR_NR]); + CLEANIT(viodasd_gendsk.part); + blk_size[MAJOR_NR] = NULL; + blksize_size[MAJOR_NR] = NULL; + + devfs_unregister_blkdev(MAJOR_NR, VIOD_DEVICE_NAME); +} + +/* Initialize the whole device driver. Handle module and non-module + * versions + */ +__init int viodasd_init(void) +{ + int i, j; + int rc; + int *viodasd_blksizes; + int numpart = numdsk << PARTITION_SHIFT; + + /* Try to open to our host lp + */ + if (viopath_hostLp == HvLpIndexInvalid) { + vio_set_hostlp(); + } + + if (viopath_hostLp == HvLpIndexInvalid) { + printk(KERN_WARNING_VIO "%s: invalid hosting partition\n", + VIOD_DEVICE_NAME); + return -1; + } + + /* + * Do the devfs_register. This works even if devfs is not + * configured + */ + if (devfs_register_blkdev + (MAJOR_NR, VIOD_DEVICE_NAME, &viodasd_fops)) { + printk(KERN_WARNING_VIO "%s: unable to get major number %d\n", + VIOD_DEVICE_NAME, MAJOR_NR); + return -1; + } + + printk(KERN_INFO_VIO + "%s: Disk vers %s, major %d, max disks %d, hosting partition %d\n", + VIOD_DEVICE_NAME, VIODASD_VERS, MAJOR_NR, numdsk, + viopath_hostLp); + + if (ROOT_DEV == NODEV) { + ROOT_DEV = MKDEV(MAJOR_NR,1); + + printk(KERN_INFO_VIO + "Claiming root file system as first partition of first virtual disk"); + } + + /* Do the blk device initialization */ + blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST); + + read_ahead[MAJOR_NR] = 8; /* 8 sector (4kB) read ahead */ + + /* Start filling in gendsk structure */ + viodasd_gendsk.major = MAJOR_NR; + viodasd_gendsk.major_name = VIOD_GENHD_NAME; + viodasd_gendsk.nr_real = numdsk; + add_gendisk(&viodasd_gendsk); + + /* Actually open the path to the hosting partition */ + rc = viopath_open(viopath_hostLp, viomajorsubtype_blockio, VIOMAXREQ+2); + if (rc) { + printk(KERN_WARNING_VIO "error opening path to host partition %d\n", + viopath_hostLp); + blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR)); + return -1; + } else { + printk("%s: opened path to hosting partition %d\n", + VIOD_DEVICE_NAME, viopath_hostLp); + } + + /* + * Initialize our request handler + */ + vio_setHandler(viomajorsubtype_blockio, vioHandleBlockEvent); + + /* + * Now fill in all the device driver info + */ + viodasd_devices = + kmalloc(numdsk * sizeof(struct viodasd_device), GFP_KERNEL); + if (!viodasd_devices) { + cleanup2(); + return -ENOMEM; + } + memset(viodasd_devices, 0x00, + numdsk * sizeof(struct viodasd_device)); + + viodasd_sizes = kmalloc(numpart * sizeof(int), GFP_KERNEL); + if (!viodasd_sizes) { + cleanup2(); + return -ENOMEM; + } + memset(viodasd_sizes, 0x00, numpart * sizeof(int)); + blk_size[MAJOR_NR] = viodasd_gendsk.sizes = viodasd_sizes; + + viodasd_partitions = + kmalloc(numpart * sizeof(struct hd_struct), GFP_KERNEL); + if (!viodasd_partitions) { + cleanup2(); + return -ENOMEM; + } + memset(viodasd_partitions, 0x00, + numpart * sizeof(struct hd_struct)); + viodasd_gendsk.part = viodasd_partitions; + + viodasd_blksizes = kmalloc(numpart * sizeof(int), GFP_KERNEL); + if (!viodasd_blksizes) { + cleanup2(); + return -ENOMEM; + } + for (i = 0; i < numpart; i++) + viodasd_blksizes[i] = blksize; + blksize_size[MAJOR_NR] = viodasd_blksizes; + + viodasd_sectsizes = kmalloc(numpart * sizeof(int), GFP_KERNEL); + if (!viodasd_sectsizes) { + cleanup2(); + return -ENOMEM; + } + for (i = 0; i < numpart; i++) + viodasd_sectsizes[i] = 0; + hardsect_size[MAJOR_NR] = viodasd_sectsizes; + + viodasd_maxsectors = kmalloc(numpart * sizeof(int), GFP_KERNEL); + if (!viodasd_maxsectors) { + cleanup2(); + return -ENOMEM; + } + for (i = 0; i < numpart; i++) + viodasd_maxsectors[i] = VIODASD_MAXSECTORS; + max_sectors[MAJOR_NR] = viodasd_maxsectors; + + viodasd_max_disk = numdsk; + for (i = 0; i <= viodasd_max_disk; i++) { + // Note that internal_open has two side effects: + // a) it updates the size of the disk + // b) it updates viodasd_max_disk + if (internal_open(i) == 0) { + if (i == 0) + printk(KERN_INFO_VIO + "%s: Currently %d disks connected\n", + VIOD_DEVICE_NAME, + (int) viodasd_max_disk + 1); + + register_disk(&viodasd_gendsk, + MKDEV(MAJOR_NR, + i << PARTITION_SHIFT), + 1 << PARTITION_SHIFT, &viodasd_fops, + viodasd_partitions[i << + PARTITION_SHIFT]. + nr_sects); + + printk(KERN_INFO_VIO + "%s: Disk %2.2d size %dM, sectors %d, heads %d, cylinders %d, sectsize %d\n", + VIOD_DEVICE_NAME, i, + (int) (viodasd_devices[i].size / + (1024 * 1024)), + (int) viodasd_devices[i].sectors, + (int) viodasd_devices[i].tracks, + (int) viodasd_devices[i].cylinders, + (int) viodasd_sectsizes[i << + PARTITION_SHIFT]); + + for (j = (i << PARTITION_SHIFT) + 1; + j < ((i + 1) << PARTITION_SHIFT); j++) { + if (viodasd_gendsk.part[j].nr_sects) + printk(KERN_INFO_VIO + "%s: Disk %2.2d partition %2.2d start sector %ld, # sector %ld\n", + VIOD_DEVICE_NAME, i, + j - (i << PARTITION_SHIFT), + viodasd_gendsk.part[j]. + start_sect, + viodasd_gendsk.part[j]. + nr_sects); + } + + internal_release(i); + } + } + + /* + * Create the proc entry + */ + iSeries_proc_callback(&viodasd_proc_init); + + return 0; +} + +#ifdef MODULE +void viodasd_exit(void) +{ + int i; + for (i = 0; i < numdsk << PARTITION_SHIFT; i++) + fsync_dev(MKDEV(MAJOR_NR, i)); + + blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR)); + + iSeries_proc_callback(&viodasd_proc_delete); + + cleanup2(); +} +#endif + +#ifdef MODULE +module_init(viodasd_init); +module_exit(viodasd_exit); +#endif diff -uNr --exclude=CVS ../kernel.org/linux/drivers/iseries/viopath.c linuxppc64_2_4/drivers/iseries/viopath.c --- ../kernel.org/linux/drivers/iseries/viopath.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/drivers/iseries/viopath.c Wed Oct 10 11:56:17 2001 @@ -0,0 +1,624 @@ +/* -*- linux-c -*- + * arch/ppc64/viopath.c + * + * iSeries Virtual I/O Message Path code + * + * Authors: Dave Boutcher + * Ryan Arnold + * Colin Devilbiss + * + * (C) Copyright 2000 IBM Corporation + * + * This code is used by the iSeries virtual disk, cd, + * tape, and console to communicate with OS/400 in another + * partition. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) anyu later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "vio.h" + +EXPORT_SYMBOL(viopath_hostLp); +EXPORT_SYMBOL(vio_set_hostlp); +EXPORT_SYMBOL(viopath_open); +EXPORT_SYMBOL(viopath_close); +EXPORT_SYMBOL(viopath_isactive); +EXPORT_SYMBOL(viopath_sourceinst); +EXPORT_SYMBOL(viopath_targetinst); +EXPORT_SYMBOL(vio_setHandler); +EXPORT_SYMBOL(vio_clearHandler); +EXPORT_SYMBOL(vio_get_event_buffer); +EXPORT_SYMBOL(vio_free_event_buffer); + +extern struct pci_dev * iSeries_vio_dev; + +/* Status of the path to each other partition in the system. + * This is overkill, since we will only ever establish connections + * to our hosting partition and the primary partition on the system. + * But this allows for other support in the future. + */ +static struct viopathStatus { + int isOpen:1; /* Did we open the path? */ + int isActive:1; /* Do we have a mon msg outstanding */ + int users[VIO_MAX_SUBTYPES]; + HvLpInstanceId mSourceInst; + HvLpInstanceId mTargetInst; + int numberAllocated; +} viopathStatus[HVMAXARCHITECTEDLPS]; + +static spinlock_t statuslock = SPIN_LOCK_UNLOCKED; + +/* + * For each kind of event we allocate a buffer that is + * guaranteed not to cross a page boundary + */ +static void *event_buffer[VIO_MAX_SUBTYPES]; +static atomic_t event_buffer_available[VIO_MAX_SUBTYPES]; + +static void handleMonitorEvent(struct HvLpEvent *event); + +/* We use this structure to handle asynchronous responses. The caller + * blocks on the semaphore and the handler posts the semaphore. + */ +struct doneAllocParms_t { + struct semaphore *sem; + int number; +}; + +/* Put a sequence number in each mon msg. The value is not + * important. Start at something other than 0 just for + * readability. wrapping this is ok. + */ +static u8 viomonseq = 22; + +/* Our hosting logical partition. We get this at startup + * time, and different modules access this variable directly. + */ +HvLpIndex viopath_hostLp = 0xff; /* HvLpIndexInvalid */ + +/* For each kind of incoming event we set a pointer to a + * routine to call. + */ +static vio_event_handler_t *vio_handler[VIO_MAX_SUBTYPES]; + +/* A page to build an lp event in + */ +static unsigned long VIOReqPage; + +/* Handle reads from the proc file system + */ +static int proc_read(char *buf, char **start, off_t offset, + int blen, int *eof, void *data) +{ + HvLpEvent_Rc hvrc; + DECLARE_MUTEX_LOCKED(Semaphore); + dma_addr_t dmaa = + pci_map_single(iSeries_vio_dev, buf, PAGE_SIZE, PCI_DMA_FROMDEVICE); + int len = PAGE_SIZE; + + if (len > blen) + len = blen; + + memset(buf, 0x00, len); + hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, + HvLpEvent_Type_VirtualIo, + viomajorsubtype_config | + vioconfigget, + HvLpEvent_AckInd_DoAck, + HvLpEvent_AckType_ImmediateAck, + viopath_sourceinst + (viopath_hostLp), + viopath_targetinst + (viopath_hostLp), + (u64) (unsigned long) + &Semaphore, VIOVERSION << 16, + ((u64) dmaa) << 32, len, 0, + 0); + if (hvrc != HvLpEvent_Rc_Good) { + printk("viopath hv error on op %d\n", (int) hvrc); + } + + down(&Semaphore); + + pci_unmap_single(iSeries_vio_dev, dmaa, PAGE_SIZE, PCI_DMA_FROMDEVICE); + + *eof = 1; + return strlen(buf); +} + +/* Handle writes to our proc file system + */ +static int proc_write(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + /* Doesn't do anything today!!! + */ + return count; +} + +/* setup our proc file system entries + */ +static void vio_proc_init(struct proc_dir_entry *iSeries_proc) +{ + struct proc_dir_entry *ent; + ent = create_proc_entry("config", S_IFREG | S_IRUSR, iSeries_proc); + if (!ent) + return; + ent->nlink = 1; + ent->data = NULL; + ent->read_proc = proc_read; + ent->write_proc = proc_write; +} + +/* See if a given LP is active. Allow for invalid lps to be passed in + * and just return invalid + */ +int viopath_isactive(HvLpIndex lp) +{ + if (lp == HvLpIndexInvalid) + return 0; + if (lp < HVMAXARCHITECTEDLPS) + return viopathStatus[lp].isActive; + else + return 0; +} + +/* We cache the source and target instance ids for each + * partition. + */ +HvLpInstanceId viopath_sourceinst(HvLpIndex lp) +{ + return viopathStatus[lp].mSourceInst; +} + +HvLpInstanceId viopath_targetinst(HvLpIndex lp) +{ + return viopathStatus[lp].mTargetInst; +} + +/* Send a monitor message. This is a message with the acknowledge + * bit on that the other side will NOT explicitly acknowledge. When + * the other side goes down, the hypervisor will acknowledge any + * outstanding messages....so we will know when the other side dies. + */ +static void sendMonMsg(HvLpIndex remoteLp) +{ + HvLpEvent_Rc hvrc; + + viopathStatus[remoteLp].mSourceInst = + HvCallEvent_getSourceLpInstanceId(remoteLp, + HvLpEvent_Type_VirtualIo); + viopathStatus[remoteLp].mTargetInst = + HvCallEvent_getTargetLpInstanceId(remoteLp, + HvLpEvent_Type_VirtualIo); + + /* Deliberately ignore the return code here. if we call this + * more than once, we don't care. + */ + vio_setHandler(viomajorsubtype_monitor, handleMonitorEvent); + + hvrc = HvCallEvent_signalLpEventFast(remoteLp, + HvLpEvent_Type_VirtualIo, + viomajorsubtype_monitor, + HvLpEvent_AckInd_DoAck, + HvLpEvent_AckType_DeferredAck, + viopathStatus[remoteLp]. + mSourceInst, + viopathStatus[remoteLp]. + mTargetInst, viomonseq++, + 0, 0, 0, 0, 0); + + if (hvrc == HvLpEvent_Rc_Good) { + viopathStatus[remoteLp].isActive = 1; + } else { + printk(KERN_WARNING_VIO + "could not connect ot partition %d\n", remoteLp); + viopathStatus[remoteLp].isActive = 0; + } +} + +static void handleMonitorEvent(struct HvLpEvent *event) +{ + HvLpIndex remoteLp; + int i; + + /* First see if this is just a normal monitor message from the + * other partition + */ + if (event->xFlags.xFunction == HvLpEvent_Function_Int) { + remoteLp = event->xSourceLp; + if (!viopathStatus[remoteLp].isActive) + sendMonMsg(remoteLp); + return; + } + + /* This path is for an acknowledgement; the other partition + * died + */ + remoteLp = event->xTargetLp; + if ((event->xSourceInstanceId != + viopathStatus[remoteLp].mSourceInst) + || (event->xTargetInstanceId != + viopathStatus[remoteLp].mTargetInst)) { + printk(KERN_WARNING_VIO + "ignoring ack....mismatched instances\n"); + return; + } + + printk(KERN_WARNING_VIO "partition %d ended\n", remoteLp); + + viopathStatus[remoteLp].isActive = 0; + + /* For each active handler, pass them a NULL + * message to indicate that the other partition + * died + */ + for (i = 0; i < VIO_MAX_SUBTYPES; i++) { + if (vio_handler[i] != NULL) + (*vio_handler[i]) (NULL); + } +} + +int vio_setHandler(int subtype, vio_event_handler_t * beh) +{ + subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT; + + if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES)) + return -EINVAL; + + if (vio_handler[subtype] != NULL) + return -EBUSY; + + vio_handler[subtype] = beh; + return 0; +} + +int vio_clearHandler(int subtype) +{ + subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT; + + if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES)) + return -EINVAL; + + if (vio_handler[subtype] == NULL) + return -EAGAIN; + + vio_handler[subtype] = NULL; + return 0; +} + +static void handleConfig(struct HvLpEvent *event) +{ + if (event->xFlags.xFunction == HvLpEvent_Function_Int) { + printk(KERN_WARNING_VIO + "unexpected config request from partition %d", + event->xSourceLp); + + if ((event->xFlags.xFunction == HvLpEvent_Function_Int) && + (event->xFlags.xAckInd == HvLpEvent_AckInd_DoAck)) { + event->xRc = HvLpEvent_Rc_InvalidSubtype; + HvCallEvent_ackLpEvent(event); + } + return; + } + + up((struct semaphore *) event->xCorrelationToken); +} + +/* Initialization of the hosting partition + */ +void vio_set_hostlp(void) +{ + /* If this has already been set then we DON'T want to either change + * it or re-register the proc file system + */ + if (viopath_hostLp != HvLpIndexInvalid) + return; + + /* Figure out our hosting partition. This isn't allowed to change + * while we're active + */ + viopath_hostLp = + HvCallCfg_getHostingLpIndex(HvLpConfig_getLpIndex()); + + /* If we have a valid hosting LP, create a proc file system entry + * for config information + */ + if (viopath_hostLp != HvLpIndexInvalid) { + iSeries_proc_callback(&vio_proc_init); + vio_setHandler(viomajorsubtype_config, handleConfig); + } +} + +static void vio_handleEvent(struct HvLpEvent *event, struct pt_regs *regs) +{ + HvLpIndex remoteLp; + int subtype = + (event-> + xSubtype & VIOMAJOR_SUBTYPE_MASK) >> VIOMAJOR_SUBTYPE_SHIFT; + + if (event->xFlags.xFunction == HvLpEvent_Function_Int) { + remoteLp = event->xSourceLp; + if (event->xSourceInstanceId != + viopathStatus[remoteLp].mTargetInst) { + printk(KERN_WARNING_VIO + "message from invalid partition. " + "int msg rcvd, source inst (%d) doesnt match (%d)\n", + viopathStatus[remoteLp].mTargetInst, + event->xSourceInstanceId); + return; + } + + if (event->xTargetInstanceId != + viopathStatus[remoteLp].mSourceInst) { + printk(KERN_WARNING_VIO + "message from invalid partition. " + "int msg rcvd, target inst (%d) doesnt match (%d)\n", + viopathStatus[remoteLp].mSourceInst, + event->xTargetInstanceId); + return; + } + } else { + remoteLp = event->xTargetLp; + if (event->xSourceInstanceId != + viopathStatus[remoteLp].mSourceInst) { + printk(KERN_WARNING_VIO + "message from invalid partition. " + "ack msg rcvd, source inst (%d) doesnt match (%d)\n", + viopathStatus[remoteLp].mSourceInst, + event->xSourceInstanceId); + return; + } + + if (event->xTargetInstanceId != + viopathStatus[remoteLp].mTargetInst) { + printk(KERN_WARNING_VIO + "message from invalid partition. " + "viopath: ack msg rcvd, target inst (%d) doesnt match (%d)\n", + viopathStatus[remoteLp].mTargetInst, + event->xTargetInstanceId); + return; + } + } + + if (vio_handler[subtype] == NULL) { + printk(KERN_WARNING_VIO + "unexpected virtual io event subtype %d from partition %d\n", + event->xSubtype, remoteLp); + /* No handler. Ack if necessary + */ + if ((event->xFlags.xFunction == HvLpEvent_Function_Int) && + (event->xFlags.xAckInd == HvLpEvent_AckInd_DoAck)) { + event->xRc = HvLpEvent_Rc_InvalidSubtype; + HvCallEvent_ackLpEvent(event); + } + return; + } + + /* This inocuous little line is where all the real work happens + */ + (*vio_handler[subtype]) (event); +} + +static void viopath_donealloc(void *parm, int number) +{ + struct doneAllocParms_t *doneAllocParmsp = + (struct doneAllocParms_t *) parm; + doneAllocParmsp->number = number; + up(doneAllocParmsp->sem); +} + +static int allocateEvents(HvLpIndex remoteLp, int numEvents) +{ + struct doneAllocParms_t doneAllocParms; + DECLARE_MUTEX_LOCKED(Semaphore); + doneAllocParms.sem = &Semaphore; + + mf_allocateLpEvents(remoteLp, HvLpEvent_Type_VirtualIo, 250, /* It would be nice to put a real number here! */ + numEvents, + &viopath_donealloc, &doneAllocParms); + + down(&Semaphore); + + return doneAllocParms.number; +} + +int viopath_open(HvLpIndex remoteLp, int subtype, int numReq) +{ + int i; + unsigned long flags; + + if ((remoteLp >= HvMaxArchitectedLps) + || (remoteLp == HvLpIndexInvalid)) + return -EINVAL; + + subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT; + if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES)) + return -EINVAL; + + spin_lock_irqsave(&statuslock, flags); + + /* OK...we can fit 4 maximum-sized events (256 bytes) in + * each page (4096). Get a new page every 4 + */ + if (event_buffer[0] == NULL) { + for (i = 0; i < VIO_MAX_SUBTYPES; i++) { + if ((i % 4) == 0) { + event_buffer[i] = + (void *) get_free_page(GFP_KERNEL); + if (event_buffer[i] == NULL) { + spin_unlock_irqrestore(&statuslock, flags); + return -ENOMEM; + } + } else { + event_buffer[i] = + event_buffer[i - 1] + 256; + } + atomic_set(&event_buffer_available[i], 1); + } + } + + viopathStatus[remoteLp].users[subtype]++; + + if (!viopathStatus[remoteLp].isOpen) { + HvCallEvent_openLpEventPath(remoteLp, + HvLpEvent_Type_VirtualIo); + + viopathStatus[remoteLp].numberAllocated += + allocateEvents(remoteLp, 1); + + if (viopathStatus[remoteLp].numberAllocated == 0) { + HvCallEvent_closeLpEventPath(remoteLp, + HvLpEvent_Type_VirtualIo); + + spin_unlock_irqrestore(&statuslock, flags); + return -ENOMEM; + } + + viopathStatus[remoteLp].mSourceInst = + HvCallEvent_getSourceLpInstanceId(remoteLp, + HvLpEvent_Type_VirtualIo); + viopathStatus[remoteLp].mTargetInst = + HvCallEvent_getTargetLpInstanceId(remoteLp, + HvLpEvent_Type_VirtualIo); + + HvLpEvent_registerHandler(HvLpEvent_Type_VirtualIo, + &vio_handleEvent); + + viopathStatus[remoteLp].isOpen = 1; + + sendMonMsg(remoteLp); + + printk(KERN_INFO_VIO + "Opening connection to partition %d, setting sinst %d, tinst %d\n", + remoteLp, + viopathStatus[remoteLp].mSourceInst, + viopathStatus[remoteLp].mTargetInst); + } + + viopathStatus[remoteLp].numberAllocated += + allocateEvents(remoteLp, numReq); + spin_unlock_irqrestore(&statuslock, flags); + + return 0; +} + +int viopath_close(HvLpIndex remoteLp, int subtype, int numReq) +{ + unsigned long flags; + int i; + int numOpen; + struct doneAllocParms_t doneAllocParms; + DECLARE_MUTEX_LOCKED(Semaphore); + doneAllocParms.sem = &Semaphore; + + if ((remoteLp >= HvMaxArchitectedLps) + || (remoteLp == HvLpIndexInvalid)) + return -EINVAL; + + subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT; + if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES)) + return -EINVAL; + + spin_lock_irqsave(&statuslock, flags); + + viopathStatus[remoteLp].users[subtype]--; + + mf_deallocateLpEvents( remoteLp,HvLpEvent_Type_VirtualIo, + numReq, + &viopath_donealloc, + &doneAllocParms ); + down(&Semaphore); + + for (i = 0, numOpen = 0; i < VIO_MAX_SUBTYPES; i++) { + numOpen += viopathStatus[remoteLp].users[i]; + } + + if ((viopathStatus[remoteLp].isOpen) && (numOpen == 0)) { + printk(KERN_INFO_VIO + "Closing connection to partition %d", remoteLp); + + HvCallEvent_closeLpEventPath(remoteLp, + HvLpEvent_Type_VirtualIo); + viopathStatus[remoteLp].isOpen = 0; + viopathStatus[remoteLp].isActive = 0; + + for (i = 0; i < VIO_MAX_SUBTYPES; i++) { + atomic_set(&event_buffer_available[i], 0); + + for (i = 0; i < VIO_MAX_SUBTYPES; i += 4) { + free_page((unsigned long) event_buffer[i]); + } + } + + } + spin_unlock_irqrestore(&statuslock, flags); + return 0; +} + +void *vio_get_event_buffer(int subtype) +{ + subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT; + if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES)) + return NULL; + + if (atomic_dec_if_positive(&event_buffer_available[subtype]) == 0) + return event_buffer[subtype]; + else + return NULL; +} + +void vio_free_event_buffer(int subtype, void *buffer) +{ + subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT; + if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES)) { + printk(KERN_WARNING_VIO + "unexpected subtype %d freeing event buffer\n", + subtype); + return; + } + + if (atomic_read(&event_buffer_available[subtype]) != 0) { + printk(KERN_WARNING_VIO + "freeing unallocated event buffer, subtype %d\n", + subtype); + return; + } + + if (buffer != event_buffer[subtype]) { + printk(KERN_WARNING_VIO + "freeing invalid event buffer, subtype %d\n", + subtype); + } + + atomic_set(&event_buffer_available[subtype], 1); +} diff -uNr --exclude=CVS ../kernel.org/linux/drivers/iseries/viotape.c linuxppc64_2_4/drivers/iseries/viotape.c --- ../kernel.org/linux/drivers/iseries/viotape.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/drivers/iseries/viotape.c Wed Nov 14 13:42:56 2001 @@ -0,0 +1,1296 @@ +/* -*- linux-c -*- + * drivers/char/viotape.c + * + * iSeries Virtual Tape + *************************************************************************** + * + * Authors: Dave Boutcher + * Ryan Arnold + * Colin Devilbiss + * + * (C) Copyright 2000 IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) anyu later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + *************************************************************************** + * This routine provides access to tape drives owned and managed by an OS/400 + * partition running on the same box as this Linux partition. + * + * All tape operations are performed by sending messages back and forth to + * the OS/400 partition. The format of the messages is defined in + * iSeries/vio.h + * + */ + + +#undef VIOT_DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vio.h" +#include +#include "asm/iSeries/HvCallEvent.h" +#include "asm/iSeries/HvLpConfig.h" +#include + +extern struct pci_dev * iSeries_vio_dev; + +static int viotape_major = 230; +static int viotape_numdev = 0; + +#define VIOTAPE_MAXREQ 1 + +/* version number for viotape driver */ +static unsigned int version_major = 1; +static unsigned int version_minor = 0; + +static u64 sndMsgSeq; +static u64 sndMsgAck; +static u64 rcvMsgSeq; +static u64 rcvMsgAck; + +/*************************************************************************** + * The minor number follows the conventions of the SCSI tape drives. The + * rewind and mode are encoded in the minor #. We use this struct to break + * them out + ***************************************************************************/ +struct viot_devinfo_struct { + int major; + int minor; + int devno; + int mode; + int rewind; +}; + +#define VIOTAPOP_RESET 0 +#define VIOTAPOP_FSF 1 +#define VIOTAPOP_BSF 2 +#define VIOTAPOP_FSR 3 +#define VIOTAPOP_BSR 4 +#define VIOTAPOP_WEOF 5 +#define VIOTAPOP_REW 6 +#define VIOTAPOP_NOP 7 +#define VIOTAPOP_EOM 8 +#define VIOTAPOP_ERASE 9 +#define VIOTAPOP_SETBLK 10 +#define VIOTAPOP_SETDENSITY 11 +#define VIOTAPOP_SETPOS 12 +#define VIOTAPOP_GETPOS 13 +#define VIOTAPOP_SETPART 14 + +struct viotapelpevent { + struct HvLpEvent event; + u32 mReserved1; + u16 mVersion; + u16 mSubTypeRc; + u16 mTape; + u16 mFlags; + u32 mToken; + u64 mLen; + union { + struct { + u32 mTapeOp; + u32 mCount; + } tapeOp; + struct { + u32 mType; + u32 mResid; + u32 mDsreg; + u32 mGstat; + u32 mErreg; + u32 mFileNo; + u32 mBlkNo; + } getStatus; + struct { + u32 mBlkNo; + } getPos; + } u; +}; +enum viotapesubtype { + viotapeopen = 0x0001, + viotapeclose = 0x0002, + viotaperead = 0x0003, + viotapewrite = 0x0004, + viotapegetinfo = 0x0005, + viotapeop = 0x0006, + viotapegetpos = 0x0007, + viotapesetpos = 0x0008, + viotapegetstatus = 0x0009 +}; + +enum viotapeRc { + viotape_InvalidRange = 0x0601, + viotape_InvalidToken = 0x0602, + viotape_DMAError = 0x0603, + viotape_UseError = 0x0604, + viotape_ReleaseError = 0x0605, + viotape_InvalidTape = 0x0606, + viotape_InvalidOp = 0x0607, + viotape_TapeErr = 0x0608, + + viotape_AllocTimedOut = 0x0640, + viotape_BOTEnc = 0x0641, + viotape_BlankTape = 0x0642, + viotape_BufferEmpty = 0x0643, + viotape_CleanCartFound = 0x0644, + viotape_CmdNotAllowed = 0x0645, + viotape_CmdNotSupported = 0x0646, + viotape_DataCheck = 0x0647, + viotape_DecompressErr = 0x0648, + viotape_DeviceTimeout = 0x0649, + viotape_DeviceUnavail = 0x064a, + viotape_DeviceBusy = 0x064b, + viotape_EndOfMedia = 0x064c, + viotape_EndOfTape = 0x064d, + viotape_EquipCheck = 0x064e, + viotape_InsufficientRs = 0x064f, + viotape_InvalidLogBlk = 0x0650, + viotape_LengthError = 0x0651, + viotape_LibDoorOpen = 0x0652, + viotape_LoadFailure = 0x0653, + viotape_NotCapable = 0x0654, + viotape_NotOperational = 0x0655, + viotape_NotReady = 0x0656, + viotape_OpCancelled = 0x0657, + viotape_PhyLinkErr = 0x0658, + viotape_RdyNotBOT = 0x0659, + viotape_TapeMark = 0x065a, + viotape_WriteProt = 0x065b +}; + +/* Maximum # tapes we support + */ +#define VIOTAPE_MAX_TAPE 8 +#define MAX_PARTITIONS 4 + +/* defines for current tape state */ +#define VIOT_IDLE 0 +#define VIOT_READING 1 +#define VIOT_WRITING 2 + +/* Our info on the tapes + */ +struct tape_descr { + char rsrcname[10]; + char type[4]; + char model[3]; +}; + +static struct tape_descr *viotape_unitinfo = NULL; + +static char *lasterr[VIOTAPE_MAX_TAPE]; + +static struct mtget viomtget[VIOTAPE_MAX_TAPE]; + +/* maintain the current state of each tape (and partition) + so that we know when to write EOF marks. +*/ +static struct { + unsigned char cur_part; + devfs_handle_t dev_handle; + struct { + unsigned char rwi; + } part_stat[MAX_PARTITIONS]; +} state[VIOTAPE_MAX_TAPE]; + +/* We single-thread + */ +static struct semaphore reqSem; + +/* When we send a request, we use this struct to get the response back + * from the interrupt handler + */ +struct opStruct { + void *buffer; + dma_addr_t dmaaddr; + size_t count; + int rc; + struct semaphore *sem; + struct opStruct *free; +}; + +static spinlock_t opStructListLock; +static struct opStruct *opStructList; + +/* forward declaration to resolve interdependence */ +static int chg_state(int index, unsigned char new_state, + struct file *file); + +/* Decode the kdev_t into its parts + */ +void getDevInfo(kdev_t dev, struct viot_devinfo_struct *devi) +{ + devi->major = MAJOR(dev); + devi->minor = MINOR(dev); + devi->devno = devi->minor & 0x1F; + devi->mode = (devi->minor & 0x60) >> 5; + /* if bit is set in the minor, do _not_ rewind automatically */ + devi->rewind = !(devi->minor & 0x80); +} + + +/* Allocate an op structure from our pool + */ +static struct opStruct *getOpStruct(void) +{ + struct opStruct *newOpStruct; + spin_lock(&opStructListLock); + + if (opStructList == NULL) { + newOpStruct = kmalloc(sizeof(struct opStruct), GFP_KERNEL); + } else { + newOpStruct = opStructList; + opStructList = opStructList->free; + } + + if (newOpStruct) + memset(newOpStruct, 0x00, sizeof(struct opStruct)); + + spin_unlock(&opStructListLock); + + return newOpStruct; +} + +/* Return an op structure to our pool + */ +static void freeOpStruct(struct opStruct *opStruct) +{ + spin_lock(&opStructListLock); + opStruct->free = opStructList; + opStructList = opStruct; + spin_unlock(&opStructListLock); +} + +/* Map our tape return codes to errno values + */ +int tapeRcToErrno(int tapeRc, char *operation, int tapeno) +{ + int terrno; + char *tmsg; + + switch (tapeRc) { + case 0: + return 0; + case viotape_InvalidRange: + terrno = EIO; + tmsg = "Internal error"; + break; + case viotape_InvalidToken: + terrno = EIO; + tmsg = "Internal error"; + break; + case viotape_DMAError: + terrno = EIO; + tmsg = "DMA error"; + break; + case viotape_UseError: + terrno = EIO; + tmsg = "Internal error"; + break; + case viotape_ReleaseError: + terrno = EIO; + tmsg = "Internal error"; + break; + case viotape_InvalidTape: + terrno = EIO; + tmsg = "Invalid tape device"; + break; + case viotape_InvalidOp: + terrno = EIO; + tmsg = "Invalid operation"; + break; + case viotape_TapeErr: + terrno = EIO; + tmsg = "Tape error"; + break; + + case viotape_AllocTimedOut: + terrno = EBUSY; + tmsg = "Allocate timed out"; + break; + case viotape_BOTEnc: + terrno = EIO; + tmsg = "Beginning of tape encountered"; + break; + case viotape_BlankTape: + terrno = EIO; + tmsg = "Blank tape"; + break; + case viotape_BufferEmpty: + terrno = EIO; + tmsg = "Buffer empty"; + break; + case viotape_CleanCartFound: + terrno = ENOMEDIUM; + tmsg = "Cleaning cartridge found"; + break; + case viotape_CmdNotAllowed: + terrno = EIO; + tmsg = "Command not allowed"; + break; + case viotape_CmdNotSupported: + terrno = EIO; + tmsg = "Command not supported"; + break; + case viotape_DataCheck: + terrno = EIO; + tmsg = "Data check"; + break; + case viotape_DecompressErr: + terrno = EIO; + tmsg = "Decompression error"; + break; + case viotape_DeviceTimeout: + terrno = EBUSY; + tmsg = "Device timeout"; + break; + case viotape_DeviceUnavail: + terrno = EIO; + tmsg = "Device unavailable"; + break; + case viotape_DeviceBusy: + terrno = EBUSY; + tmsg = "Device busy"; + break; + case viotape_EndOfMedia: + terrno = ENOSPC; + tmsg = "End of media"; + break; + case viotape_EndOfTape: + terrno = ENOSPC; + tmsg = "End of tape"; + break; + case viotape_EquipCheck: + terrno = EIO; + tmsg = "Equipment check"; + break; + case viotape_InsufficientRs: + terrno = EOVERFLOW; + tmsg = "Insufficient tape resources"; + break; + case viotape_InvalidLogBlk: + terrno = EIO; + tmsg = "Invalid logical block location"; + break; + case viotape_LengthError: + terrno = EOVERFLOW; + tmsg = "Length error"; + break; + case viotape_LibDoorOpen: + terrno = EBUSY; + tmsg = "Door open"; + break; + case viotape_LoadFailure: + terrno = ENOMEDIUM; + tmsg = "Load failure"; + break; + case viotape_NotCapable: + terrno = EIO; + tmsg = "Not capable"; + break; + case viotape_NotOperational: + terrno = EIO; + tmsg = "Not operational"; + break; + case viotape_NotReady: + terrno = EIO; + tmsg = "Not ready"; + break; + case viotape_OpCancelled: + terrno = EIO; + tmsg = "Operation cancelled"; + break; + case viotape_PhyLinkErr: + terrno = EIO; + tmsg = "Physical link error"; + break; + case viotape_RdyNotBOT: + terrno = EIO; + tmsg = "Ready but not beginning of tape"; + break; + case viotape_TapeMark: + terrno = EIO; + tmsg = "Tape mark"; + break; + case viotape_WriteProt: + terrno = EROFS; + tmsg = "Write protection error"; + break; + default: + terrno = EIO; + tmsg = "I/O error"; + } + + printk(KERN_WARNING_VIO "tape error on Device %d (%10.10s): %s\n", + tapeno, viotape_unitinfo[tapeno].rsrcname, tmsg); + + lasterr[tapeno] = tmsg; + + return -terrno; +} + +/* Handle reads from the proc file system. + */ +static int proc_read(char *buf, char **start, off_t offset, + int blen, int *eof, void *data) +{ + int len = 0; + int i; + + len += sprintf(buf + len, "viotape driver version %d.%d\n", + version_major, version_minor); + + for (i = 0; i < viotape_numdev; i++) { + + len += + sprintf(buf + len, + "viotape device %d is iSeries resource %10.10s type %4.4s, model %3.3s\n", + i, viotape_unitinfo[i].rsrcname, + viotape_unitinfo[i].type, + viotape_unitinfo[i].model); + if (lasterr[i]) + len += + sprintf(buf + len, " last error: %s\n", + lasterr[i]); + } + + *eof = 1; + return len; +} + +/* setup our proc file system entries + */ +void viotape_proc_init(struct proc_dir_entry *iSeries_proc) +{ + struct proc_dir_entry *ent; + ent = + create_proc_entry("viotape", S_IFREG | S_IRUSR, iSeries_proc); + if (!ent) + return; + ent->nlink = 1; + ent->data = NULL; + ent->read_proc = proc_read; +} + +/* clean up our proc file system entries + */ +void viotape_proc_delete(struct proc_dir_entry *iSeries_proc) +{ + remove_proc_entry("viotape", iSeries_proc); +} + + +/* Get info on all tapes from OS/400 + */ +static void get_viotape_info(void) +{ + dma_addr_t dmaaddr; + HvLpEvent_Rc hvrc; + int i; + struct opStruct *op = getOpStruct(); + DECLARE_MUTEX_LOCKED(Semaphore); + if (op == NULL) + return; + + if (viotape_unitinfo == NULL) { + viotape_unitinfo = + kmalloc(sizeof(struct tape_descr) * VIOTAPE_MAX_TAPE, + GFP_KERNEL); + } + memset(viotape_unitinfo, 0x00, + sizeof(struct tape_descr) * VIOTAPE_MAX_TAPE); + memset(lasterr, 0x00, sizeof(lasterr)); + + op->sem = &Semaphore; + + dmaaddr = pci_map_single(iSeries_vio_dev, viotape_unitinfo, + sizeof(struct tape_descr) * + VIOTAPE_MAX_TAPE, PCI_DMA_FROMDEVICE); + if (dmaaddr == 0xFFFFFFFF) { + printk(KERN_WARNING_VIO "viotape error allocating tce\n"); + return; + } + + hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, + HvLpEvent_Type_VirtualIo, + viomajorsubtype_tape | + viotapegetinfo, + HvLpEvent_AckInd_DoAck, + HvLpEvent_AckType_ImmediateAck, + viopath_sourceinst + (viopath_hostLp), + viopath_targetinst + (viopath_hostLp), + (u64) (unsigned long) op, + VIOVERSION << 16, dmaaddr, + sizeof(struct tape_descr) * + VIOTAPE_MAX_TAPE, 0, 0); + if (hvrc != HvLpEvent_Rc_Good) { + printk("viotape hv error on op %d\n", (int) hvrc); + } + + down(&Semaphore); + + freeOpStruct(op); + + + for (i = 0; + ((i < VIOTAPE_MAX_TAPE) && (viotape_unitinfo[i].rsrcname[0])); + i++) { + printk("found a tape %10.10s\n", + viotape_unitinfo[i].rsrcname); + viotape_numdev++; + } +} + + +/* Write + */ +static ssize_t viotap_write(struct file *file, const char *buf, + size_t count, loff_t * ppos) +{ + HvLpEvent_Rc hvrc; + kdev_t dev = file->f_dentry->d_inode->i_rdev; + unsigned short flags = file->f_flags; + struct opStruct *op = getOpStruct(); + int noblock = ((flags & O_NONBLOCK) != 0); + int err; + struct viot_devinfo_struct devi; + DECLARE_MUTEX_LOCKED(Semaphore); + + if (op == NULL) + return -ENOMEM; + + getDevInfo(dev, &devi); + + /* We need to make sure we can send a request. We use + * a semaphore to keep track of # requests in use. If + * we are non-blocking, make sure we don't block on the + * semaphore + */ + if (noblock) { + if (down_trylock(&reqSem)) { + freeOpStruct(op); + return -EWOULDBLOCK; + } + } else { + down(&reqSem); + } + + /* Allocate a DMA buffer */ + op->buffer = pci_alloc_consistent(iSeries_vio_dev, count, &op->dmaaddr); + + if ((op->dmaaddr == 0xFFFFFFFF) || (op->buffer == NULL)) { + printk(KERN_WARNING_VIO + "tape error allocating dma buffer for len %ld\n", + count); + freeOpStruct(op); + up(&reqSem); + return -EFAULT; + } + + op->count = count; + + /* Copy the data into the buffer */ + err = copy_from_user(op->buffer, (const void *) buf, count); + if (err) { + printk(KERN_WARNING_VIO + "tape: error on copy from user\n"); + pci_free_consistent(iSeries_vio_dev, count, op->buffer, op->dmaaddr); + freeOpStruct(op); + up(&reqSem); + return -EFAULT; + } + + if (noblock) { + op->sem = NULL; + } else { + op->sem = &Semaphore; + } + + hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, + HvLpEvent_Type_VirtualIo, + viomajorsubtype_tape | + viotapewrite, + HvLpEvent_AckInd_DoAck, + HvLpEvent_AckType_ImmediateAck, + viopath_sourceinst + (viopath_hostLp), + viopath_targetinst + (viopath_hostLp), + (u64) (unsigned long) op, + VIOVERSION << 16, + ((u64) devi. + devno << 48) | op->dmaaddr, + count, 0, 0); + if (hvrc != HvLpEvent_Rc_Good) { + printk("viotape hv error on op %d\n", (int) hvrc); + pci_free_consistent(iSeries_vio_dev, count, op->buffer, op->dmaaddr); + freeOpStruct(op); + up(&reqSem); + return -EIO; + } + + if (noblock) + return count; + + down(&Semaphore); + + err = op->rc; + + /* Free the buffer */ + pci_free_consistent(iSeries_vio_dev, count, op->buffer, op->dmaaddr); + + count = op->count; + + freeOpStruct(op); + up(&reqSem); + if (err) + return tapeRcToErrno(err, "write", devi.devno); + else { + chg_state(devi.devno, VIOT_WRITING, file); + return count; + } +} + +/* read + */ +static ssize_t viotap_read(struct file *file, char *buf, size_t count, + loff_t * ptr) +{ + HvLpEvent_Rc hvrc; + kdev_t dev = file->f_dentry->d_inode->i_rdev; + unsigned short flags = file->f_flags; + struct opStruct *op = getOpStruct(); + int noblock = ((flags & O_NONBLOCK) != 0); + int err; + struct viot_devinfo_struct devi; + DECLARE_MUTEX_LOCKED(Semaphore); + + if (op == NULL) + return -ENOMEM; + + getDevInfo(dev, &devi); + + /* We need to make sure we can send a request. We use + * a semaphore to keep track of # requests in use. If + * we are non-blocking, make sure we don't block on the + * semaphore + */ + if (noblock) { + if (down_trylock(&reqSem)) { + freeOpStruct(op); + return -EWOULDBLOCK; + } + } else { + down(&reqSem); + } + + chg_state(devi.devno, VIOT_READING, file); + + /* Allocate a DMA buffer */ + op->buffer = pci_alloc_consistent(iSeries_vio_dev, count, &op->dmaaddr); + + if ((op->dmaaddr == 0xFFFFFFFF) || (op->buffer == NULL)) { + freeOpStruct(op); + up(&reqSem); + return -EFAULT; + } + + op->count = count; + + op->sem = &Semaphore; + + hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, + HvLpEvent_Type_VirtualIo, + viomajorsubtype_tape | + viotaperead, + HvLpEvent_AckInd_DoAck, + HvLpEvent_AckType_ImmediateAck, + viopath_sourceinst + (viopath_hostLp), + viopath_targetinst + (viopath_hostLp), + (u64) (unsigned long) op, + VIOVERSION << 16, + ((u64) devi. + devno << 48) | op->dmaaddr, + count, 0, 0); + if (hvrc != HvLpEvent_Rc_Good) { + printk(KERN_WARNING_VIO + "tape hv error on op %d\n", (int) hvrc); + pci_free_consistent(iSeries_vio_dev, count, op->buffer, op->dmaaddr); + freeOpStruct(op); + up(&reqSem); + return -EIO; + } + + down(&Semaphore); + + if (op->rc == 0) { + /* If we got data back */ + if (op->count) { + /* Copy the data into the buffer */ + err = copy_to_user(buf, op->buffer, count); + if (err) { + printk("error on copy_to_user\n"); + pci_free_consistent(iSeries_vio_dev, count, + op->buffer, + op->dmaaddr); + freeOpStruct(op); + up(&reqSem); + return -EFAULT; + } + } + } + + err = op->rc; + + /* Free the buffer */ + pci_free_consistent(iSeries_vio_dev, count, op->buffer, op->dmaaddr); + count = op->count; + + freeOpStruct(op); + up(&reqSem); + if (err) + return tapeRcToErrno(err, "read", devi.devno); + else + return count; +} + +/* read + */ +static int viotap_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + HvLpEvent_Rc hvrc; + int err; + DECLARE_MUTEX_LOCKED(Semaphore); + kdev_t dev = file->f_dentry->d_inode->i_rdev; + struct opStruct *op = getOpStruct(); + struct viot_devinfo_struct devi; + if (op == NULL) + return -ENOMEM; + + getDevInfo(dev, &devi); + + down(&reqSem); + + switch (cmd) { + case MTIOCTOP:{ + struct mtop mtc; + u32 myOp; + + /* inode is null if and only if we (the kernel) made the request */ + if (inode == NULL) + memcpy(&mtc, (void *) arg, + sizeof(struct mtop)); + else if (copy_from_user + ((char *) &mtc, (char *) arg, + sizeof(struct mtop))) { + freeOpStruct(op); + up(&reqSem); + return -EFAULT; + } + + switch (mtc.mt_op) { + case MTRESET: + myOp = VIOTAPOP_RESET; + break; + case MTFSF: + myOp = VIOTAPOP_FSF; + break; + case MTBSF: + myOp = VIOTAPOP_BSF; + break; + case MTFSR: + myOp = VIOTAPOP_FSR; + break; + case MTBSR: + myOp = VIOTAPOP_BSR; + break; + case MTWEOF: + myOp = VIOTAPOP_WEOF; + break; + case MTREW: + myOp = VIOTAPOP_REW; + break; + case MTNOP: + myOp = VIOTAPOP_NOP; + break; + case MTEOM: + myOp = VIOTAPOP_EOM; + break; + case MTERASE: + myOp = VIOTAPOP_ERASE; + break; + case MTSETBLK: + myOp = VIOTAPOP_SETBLK; + break; + case MTSETDENSITY: + myOp = VIOTAPOP_SETDENSITY; + break; + case MTTELL: + myOp = VIOTAPOP_GETPOS; + break; + case MTSEEK: + myOp = VIOTAPOP_SETPOS; + break; + case MTSETPART: + myOp = VIOTAPOP_SETPART; + break; + default: + return -EIO; + } + +/* if we moved the head, we are no longer reading or writing */ + switch (mtc.mt_op) { + case MTFSF: + case MTBSF: + case MTFSR: + case MTBSR: + case MTTELL: + case MTSEEK: + case MTREW: + chg_state(devi.devno, VIOT_IDLE, file); + } + + op->sem = &Semaphore; + hvrc = + HvCallEvent_signalLpEventFast(viopath_hostLp, + HvLpEvent_Type_VirtualIo, + viomajorsubtype_tape + | viotapeop, + HvLpEvent_AckInd_DoAck, + HvLpEvent_AckType_ImmediateAck, + viopath_sourceinst + (viopath_hostLp), + viopath_targetinst + (viopath_hostLp), + (u64) (unsigned + long) op, + VIOVERSION << 16, + ((u64) devi. + devno << 48), 0, + (((u64) myOp) << + 32) | mtc. + mt_count, 0); + if (hvrc != HvLpEvent_Rc_Good) { + printk("viotape hv error on op %d\n", + (int) hvrc); + freeOpStruct(op); + up(&reqSem); + return -EIO; + } + down(&Semaphore); + if (op->rc) { + freeOpStruct(op); + up(&reqSem); + return tapeRcToErrno(op->rc, + "tape operation", + devi.devno); + } else { + freeOpStruct(op); + up(&reqSem); + return 0; + } + break; + } + + case MTIOCGET: + op->sem = &Semaphore; + hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, + HvLpEvent_Type_VirtualIo, + viomajorsubtype_tape | + viotapegetstatus, + HvLpEvent_AckInd_DoAck, + HvLpEvent_AckType_ImmediateAck, + viopath_sourceinst + (viopath_hostLp), + viopath_targetinst + (viopath_hostLp), + (u64) (unsigned long) + op, VIOVERSION << 16, + ((u64) devi. + devno << 48), 0, 0, + 0); + if (hvrc != HvLpEvent_Rc_Good) { + printk("viotape hv error on op %d\n", (int) hvrc); + freeOpStruct(op); + up(&reqSem); + return -EIO; + } + down(&Semaphore); + up(&reqSem); + if (op->rc) { + freeOpStruct(op); + return tapeRcToErrno(op->rc, "get status", + devi.devno); + } else { + freeOpStruct(op); + err = + copy_to_user((void *) arg, &viomtget[dev], + sizeof(viomtget[0])); + if (err) { + freeOpStruct(op); + return -EFAULT; + } + return 0; + } + break; + case MTIOCPOS: + printk("Got an MTIOCPOS\n"); + default: + return -ENOSYS; + } + return 0; +} + +/* Open + */ +static int viotap_open(struct inode *inode, struct file *file) +{ + DECLARE_MUTEX_LOCKED(Semaphore); + kdev_t dev = file->f_dentry->d_inode->i_rdev; + HvLpEvent_Rc hvrc; + struct opStruct *op = getOpStruct(); + struct viot_devinfo_struct devi; + if (op == NULL) + return -ENOMEM; + + getDevInfo(dev, &devi); + +// Note: We currently only support one mode! + if ((devi.devno >= viotape_numdev) || (devi.mode)) { + freeOpStruct(op); + return -ENODEV; + } + + op->sem = &Semaphore; + + hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, + HvLpEvent_Type_VirtualIo, + viomajorsubtype_tape | + viotapeopen, + HvLpEvent_AckInd_DoAck, + HvLpEvent_AckType_ImmediateAck, + viopath_sourceinst + (viopath_hostLp), + viopath_targetinst + (viopath_hostLp), + (u64) (unsigned long) op, + VIOVERSION << 16, + ((u64) devi.devno << 48), 0, + 0, 0); + + + if (hvrc != 0) { + printk("viotape bad rc on signalLpEvent %d\n", (int) hvrc); + freeOpStruct(op); + return -EIO; + } + + down(&Semaphore); + + if (op->rc) { + freeOpStruct(op); + return tapeRcToErrno(op->rc, "open", devi.devno); + } else { + freeOpStruct(op); + MOD_INC_USE_COUNT; + return 0; + } +} + + +/* Release + */ +static int viotap_release(struct inode *inode, struct file *file) +{ + DECLARE_MUTEX_LOCKED(Semaphore); + kdev_t dev = file->f_dentry->d_inode->i_rdev; + HvLpEvent_Rc hvrc; + struct viot_devinfo_struct devi; + struct opStruct *op = getOpStruct(); + + if (op == NULL) + return -ENOMEM; + op->sem = &Semaphore; + + getDevInfo(dev, &devi); + + if (devi.devno >= viotape_numdev) { + freeOpStruct(op); + return -ENODEV; + } + + chg_state(devi.devno, VIOT_IDLE, file); + + if (devi.rewind) { + hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, + HvLpEvent_Type_VirtualIo, + viomajorsubtype_tape | + viotapeop, + HvLpEvent_AckInd_DoAck, + HvLpEvent_AckType_ImmediateAck, + viopath_sourceinst + (viopath_hostLp), + viopath_targetinst + (viopath_hostLp), + (u64) (unsigned long) + op, VIOVERSION << 16, + ((u64) devi. + devno << 48), 0, + ((u64) VIOTAPOP_REW) + << 32, 0); + down(&Semaphore); + + if (op->rc) { + tapeRcToErrno(op->rc, "rewind", devi.devno); + } + } + + hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, + HvLpEvent_Type_VirtualIo, + viomajorsubtype_tape | + viotapeclose, + HvLpEvent_AckInd_DoAck, + HvLpEvent_AckType_ImmediateAck, + viopath_sourceinst + (viopath_hostLp), + viopath_targetinst + (viopath_hostLp), + (u64) (unsigned long) op, + VIOVERSION << 16, + ((u64) devi.devno << 48), 0, + 0, 0); + + + if (hvrc != 0) { + printk("viotape: bad rc on signalLpEvent %d\n", + (int) hvrc); + return -EIO; + } + + down(&Semaphore); + + if (op->rc) { + printk("viotape: close failed\n"); + } + MOD_DEC_USE_COUNT; + return 0; +} + +struct file_operations viotap_fops = { + owner:THIS_MODULE, + read:viotap_read, + write:viotap_write, + ioctl:viotap_ioctl, + open:viotap_open, + release:viotap_release, +}; + +/* Handle interrupt events for tape + */ +static void vioHandleTapeEvent(struct HvLpEvent *event) +{ + int tapeminor; + struct opStruct *op; + struct viotapelpevent *tevent = (struct viotapelpevent *) event; + + if (event == NULL) { + /* Notification that a partition went away! */ + if (!viopath_isactive(viopath_hostLp)) { + /* TODO! Clean up */ + } + return; + } + + tapeminor = event->xSubtype & VIOMINOR_SUBTYPE_MASK; + switch (tapeminor) { + case viotapegetinfo: + case viotapeopen: + case viotapeclose: + op = (struct opStruct *) (unsigned long) event-> + xCorrelationToken; + op->rc = tevent->mSubTypeRc; + up(op->sem); + break; + case viotaperead: + case viotapewrite: + op = (struct opStruct *) (unsigned long) event-> + xCorrelationToken; + op->rc = tevent->mSubTypeRc;; + op->count = tevent->mLen; + + if (op->sem) { + up(op->sem); + } else { + freeOpStruct(op); + up(&reqSem); + } + break; + case viotapeop: + case viotapegetpos: + case viotapesetpos: + case viotapegetstatus: + op = (struct opStruct *) (unsigned long) event-> + xCorrelationToken; + if (op) { + op->count = tevent->u.tapeOp.mCount; + op->rc = tevent->mSubTypeRc;; + + if (op->sem) { + up(op->sem); + } + } + break; + default: + printk("viotape: wierd ack\n"); + } +} + + +/* Do initialization + */ +int __init viotap_init(void) +{ + DECLARE_MUTEX_LOCKED(Semaphore); + int rc; + char tapename[32]; + int i; + + printk("viotape driver version %d.%d\n", version_major, + version_minor); + + sndMsgSeq = sndMsgAck = 0; + rcvMsgSeq = rcvMsgAck = 0; + opStructList = NULL; + spin_lock_init(&opStructListLock); + + sema_init(&reqSem, VIOTAPE_MAXREQ); + + if (viopath_hostLp == HvLpIndexInvalid) + vio_set_hostlp(); + + /* + * Open to our hosting lp + */ + if (viopath_hostLp == HvLpIndexInvalid) + return -1; + + printk("viotape: init - open path to hosting (%d)\n", + viopath_hostLp); + + rc = viopath_open(viopath_hostLp, viomajorsubtype_tape, VIOTAPE_MAXREQ + 2); + if (rc) { + printk("viotape: error on viopath_open to hostlp %d\n", + rc); + } + + vio_setHandler(viomajorsubtype_tape, vioHandleTapeEvent); + + printk("viotape major is %d\n", viotape_major); + + get_viotape_info(); + + if (devfs_register_chrdev(viotape_major, "viotape", &viotap_fops)) { + printk("Error registering viotape device\n"); + return -1; + } + + for (i = 0; i < viotape_numdev; i++) { + int j; + state[i].cur_part = 0; + for (j = 0; j < MAX_PARTITIONS; ++j) + state[i].part_stat[j].rwi = VIOT_IDLE; + sprintf(tapename, "viotape%d", i); + state[i].dev_handle = + devfs_register(NULL, tapename, DEVFS_FL_DEFAULT, + viotape_major, i, + S_IFCHR | S_IRUSR | S_IWUSR | S_IRGRP | + S_IWGRP, &viotap_fops, NULL); + printk + ("viotape device %s is iSeries resource %10.10s type %4.4s, model %3.3s\n", + tapename, viotape_unitinfo[i].rsrcname, + viotape_unitinfo[i].type, viotape_unitinfo[i].model); + } + + /* + * Create the proc entry + */ + iSeries_proc_callback(&viotape_proc_init); + + return 0; +} + +/* Give a new state to the tape object + */ +static int chg_state(int index, unsigned char new_state, struct file *file) +{ + unsigned char *cur_state = + &state[index].part_stat[state[index].cur_part].rwi; + int rc = 0; + + /* if the same state, don't bother */ + if (*cur_state == new_state) + return 0; + + /* write an EOF if changing from writing to some other state */ + if (*cur_state == VIOT_WRITING) { + struct mtop write_eof = { MTWEOF, 1 }; + rc = viotap_ioctl(NULL, file, MTIOCTOP, + (unsigned long) &write_eof); + } + *cur_state = new_state; + return rc; +} + +/* Cleanup + */ +static void __exit viotap_exit(void) +{ + int i, ret; + for (i = 0; i < viotape_numdev; ++i) + devfs_unregister(state[i].dev_handle); + ret = devfs_unregister_chrdev(viotape_major, "viotape"); + if (ret < 0) + printk("Error unregistering device: %d\n", ret); + iSeries_proc_callback(&viotape_proc_delete); + if (viotape_unitinfo != NULL) { + kfree(viotape_unitinfo); + viotape_unitinfo = NULL; + } + viopath_close(viopath_hostLp, viomajorsubtype_tape, VIOTAPE_MAXREQ + 2); + vio_clearHandler(viomajorsubtype_tape); +} + +MODULE_LICENSE("GPL"); +module_init(viotap_init); +module_exit(viotap_exit); diff -uNr --exclude=CVS ../kernel.org/linux/drivers/net/Config.in linuxppc64_2_4/drivers/net/Config.in --- ../kernel.org/linux/drivers/net/Config.in Fri Oct 19 10:32:28 2001 +++ linuxppc64_2_4/drivers/net/Config.in Fri Oct 26 02:25:14 2001 @@ -237,10 +237,6 @@ endmenu -if [ "$CONFIG_PPC_ISERIES" = "y" ]; then - dep_tristate 'iSeries Virtual Ethernet driver support' CONFIG_VETH $CONFIG_PPC_ISERIES -fi - bool 'FDDI driver support' CONFIG_FDDI if [ "$CONFIG_FDDI" = "y" ]; then if [ "$CONFIG_PCI" = "y" -o "$CONFIG_EISA" = "y" ]; then diff -uNr --exclude=CVS ../kernel.org/linux/drivers/net/Makefile linuxppc64_2_4/drivers/net/Makefile --- ../kernel.org/linux/drivers/net/Makefile Fri Oct 19 10:32:28 2001 +++ linuxppc64_2_4/drivers/net/Makefile Thu Oct 25 20:56:06 2001 @@ -73,7 +73,6 @@ obj-$(CONFIG_DM9102) += dmfe.o obj-$(CONFIG_YELLOWFIN) += yellowfin.o obj-$(CONFIG_ACENIC) += acenic.o -obj-$(CONFIG_VETH) += veth.o obj-$(CONFIG_NATSEMI) += natsemi.o obj-$(CONFIG_NS83820) += ns83820.o obj-$(CONFIG_STNIC) += stnic.o 8390.o diff -uNr --exclude=CVS ../kernel.org/linux/drivers/net/Space.c linuxppc64_2_4/drivers/net/Space.c --- ../kernel.org/linux/drivers/net/Space.c Thu Sep 13 17:21:32 2001 +++ linuxppc64_2_4/drivers/net/Space.c Tue Sep 4 14:09:18 2001 @@ -537,9 +537,10 @@ -#ifdef CONFIG_TR +#if 0 /* ifdef CONFIG_TR */ /* Token-ring device probe */ extern int ibmtr_probe(struct net_device *); +extern int olympic_probe(struct net_device *); extern int smctr_probe(struct net_device *); static int @@ -548,6 +549,9 @@ if (1 #ifdef CONFIG_IBMTR && ibmtr_probe(dev) +#endif +#ifdef CONFIG_IBMOL + && olympic_probe(dev) #endif #ifdef CONFIG_SMCTR && smctr_probe(dev) diff -uNr --exclude=CVS ../kernel.org/linux/drivers/net/acenic.c linuxppc64_2_4/drivers/net/acenic.c --- ../kernel.org/linux/drivers/net/acenic.c Fri Oct 12 17:35:53 2001 +++ linuxppc64_2_4/drivers/net/acenic.c Tue Oct 30 18:31:52 2001 @@ -1051,7 +1051,8 @@ struct ace_private *ap; struct ace_regs *regs; struct ace_info *info = NULL; - unsigned long tmp_ptr, myjif; + u64 tmp_ptr; + unsigned long myjif; u32 tig_ver, mac1, mac2, tmp, pci_state; int board_idx, ecode = 0; short i; diff -uNr --exclude=CVS ../kernel.org/linux/drivers/net/pcnet32.c linuxppc64_2_4/drivers/net/pcnet32.c --- ../kernel.org/linux/drivers/net/pcnet32.c Fri Oct 19 10:32:28 2001 +++ linuxppc64_2_4/drivers/net/pcnet32.c Mon Dec 3 15:00:35 2001 @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -207,7 +208,7 @@ #define RX_RING_MOD_MASK (RX_RING_SIZE - 1) #define RX_RING_LEN_BITS ((PCNET32_LOG_RX_BUFFERS) << 4) -#define PKT_BUF_SZ 1544 +#define PKT_BUF_SZ 2048 /* Offsets from base I/O address. */ #define PCNET32_WIO_RDP 0x10 @@ -300,7 +301,7 @@ static int pcnet32_probe_vlbus(int cards_found); static int pcnet32_probe_pci(struct pci_dev *, const struct pci_device_id *); -static int pcnet32_probe1(unsigned long, unsigned char, int, int, struct pci_dev *); +static int pcnet32_probe1(unsigned long, unsigned int, int, int, struct pci_dev *); static int pcnet32_open(struct net_device *); static int pcnet32_init_ring(struct net_device *); static int pcnet32_start_xmit(struct sk_buff *, struct net_device *); @@ -323,7 +324,7 @@ const char *name; u16 vendor_id, device_id, svid, sdid, flags; int io_size; - int (*probe1) (unsigned long, unsigned char, int, int, struct pci_dev *); + int (*probe1) (unsigned long, unsigned int, int, int, struct pci_dev *); }; @@ -446,7 +447,9 @@ static int __init pcnet32_probe_vlbus(int cards_found) { unsigned long ioaddr = 0; // FIXME dev ? dev->base_addr: 0; +#ifndef __powerpc__ unsigned int irq_line = 0; // FIXME dev ? dev->irq : 0; +#endif int *port; printk(KERN_INFO "pcnet32_probe_vlbus: cards_found=%d\n", cards_found); @@ -514,7 +517,7 @@ * pdev will be NULL when called from pcnet32_probe_vlbus. */ static int __devinit -pcnet32_probe1(unsigned long ioaddr, unsigned char irq_line, int shared, int card_idx, struct pci_dev *pdev) +pcnet32_probe1(unsigned long ioaddr, unsigned int irq_line, int shared, int card_idx, struct pci_dev *pdev) { struct pcnet32_private *lp; struct resource *res; @@ -528,17 +531,20 @@ char *chipname; struct net_device *dev; struct pcnet32_access *a = NULL; + u8 promaddr[6]; /* reset the chip */ pcnet32_dwio_reset(ioaddr); + udelay (100); pcnet32_wio_reset(ioaddr); - /* NOTE: 16-bit check is first, otherwise some older PCnet chips fail */ - if (pcnet32_wio_read_csr (ioaddr, 0) == 4 && pcnet32_wio_check (ioaddr)) { - a = &pcnet32_wio; + /* Important to do the check for dwio mode first. */ + if (pcnet32_dwio_read_csr(ioaddr, 0) == 4 && pcnet32_dwio_check(ioaddr)) { + a = &pcnet32_dwio; } else { - if (pcnet32_dwio_read_csr (ioaddr, 0) == 4 && pcnet32_dwio_check(ioaddr)) { - a = &pcnet32_dwio; + if (pcnet32_wio_read_csr(ioaddr, 0) == 4 && + pcnet32_wio_check(ioaddr)) { + a = &pcnet32_wio; } else return -ENODEV; } @@ -613,11 +619,18 @@ * one for latency - although on PCI this isnt a big loss. Older chips * have FIFO's smaller than a packet, so you can't do this. */ - + /* + * UPDATE + * Got to make sure that BCR18:MEMCMD, BCR18:BREADE, BCR18:BWRITE are + * set on a PCI + */ if(fset) { - a->write_bcr(ioaddr, 18, (a->read_bcr(ioaddr, 18) | 0x0800)); - a->write_csr(ioaddr, 80, (a->read_csr(ioaddr, 80) & 0x0C00) | 0x0c00); + a->write_bcr(ioaddr, 18, (a->read_bcr(ioaddr, 18) | 0xA60)); + a->write_csr(ioaddr, 3, 0x2eb7); + a->write_csr(ioaddr, 4, 0x32ea); + a->write_csr(ioaddr, 80, 0x3f00); + #ifdef DO_DXSUFLO dxsuflo = 1; #endif @@ -637,6 +650,7 @@ * they disagree with the CSRs. Either way, we use the CSR values, and * double check that they are valid. */ +#ifndef CONFIG_PPC for (i = 0; i < 3; i++) { unsigned int val; val = a->read_csr(ioaddr, i+12) & 0x0ffff; @@ -644,20 +658,21 @@ dev->dev_addr[2*i] = val & 0x0ff; dev->dev_addr[2*i+1] = (val >> 8) & 0x0ff; } +#endif + for (i = 0; i < 6; i++) { + promaddr[i] = inb(ioaddr + i); +#ifdef CONFIG_PPC + dev->dev_addr[i] = promaddr[i]; +#endif + } + if( memcmp( promaddr, dev->dev_addr, 6) ) { - u8 promaddr[6]; - for (i = 0; i < 6; i++) { - promaddr[i] = inb(ioaddr + i); - } - if( memcmp( promaddr, dev->dev_addr, 6) ) - { - printk(" warning PROM address does not match CSR address\n"); + printk(" warning PROM address does not match CSR address"); #if defined(__i386__) - printk(KERN_WARNING "%s: Probably a Compaq, using the PROM address of", dev->name); - memcpy(dev->dev_addr, promaddr, 6); + printk(KERN_WARNING "%s: Probably a Compaq, using the PROM address of", dev->name); + memcpy(dev->dev_addr, promaddr, 6); #endif - } - } + } /* if the ethernet address is not valid, force to 00:00:00:00:00:00 */ if( !is_valid_ether_addr(dev->dev_addr) ) for (i = 0; i < 6; i++) @@ -891,7 +906,7 @@ lp->init_block.filter[1] = 0x00000000; if (pcnet32_init_ring(dev)) return -ENOMEM; - + /* Re-initialize the PCNET32, and start it when done. */ lp->a.write_csr (ioaddr, 1, (lp->dma_addr + offsetof(struct pcnet32_private, init_block)) &0xffff); lp->a.write_csr (ioaddr, 2, (lp->dma_addr + offsetof(struct pcnet32_private, init_block)) >> 16); @@ -944,7 +959,7 @@ for (i = 0; i < TX_RING_SIZE; i++) { if (lp->tx_skbuff[i]) { pci_unmap_single(lp->pci_dev, lp->tx_dma_addr[i], lp->tx_skbuff[i]->len, PCI_DMA_TODEVICE); - dev_kfree_skb(lp->tx_skbuff[i]); + dev_kfree_skb_any(lp->tx_skbuff[i]); lp->tx_skbuff[i] = NULL; lp->tx_dma_addr[i] = 0; } @@ -973,7 +988,10 @@ } skb_reserve (rx_skbuff, 2); } - lp->rx_dma_addr[i] = pci_map_single(lp->pci_dev, rx_skbuff->tail, rx_skbuff->len, PCI_DMA_FROMDEVICE); + + if (lp->rx_dma_addr[i] == NULL) + lp->rx_dma_addr[i] = pci_map_single(lp->pci_dev, rx_skbuff->tail, PKT_BUF_SZ-2, PCI_DMA_FROMDEVICE); + lp->rx_ring[i].base = (u32)le32_to_cpu(lp->rx_dma_addr[i]); lp->rx_ring[i].buf_length = le16_to_cpu(-PKT_BUF_SZ); lp->rx_ring[i].status = le16_to_cpu(0x8000); @@ -1020,8 +1038,9 @@ pcnet32_tx_timeout (struct net_device *dev) { struct pcnet32_private *lp = dev->priv; - unsigned int ioaddr = dev->base_addr; + unsigned long ioaddr = dev->base_addr, flags; + spin_lock_irqsave(&lp->lock, flags); /* Transmitter timeout, serious problems. */ printk(KERN_ERR "%s: transmit timed out, status %4.4x, resetting.\n", dev->name, lp->a.read_csr (ioaddr, 0)); @@ -1046,6 +1065,8 @@ dev->trans_start = jiffies; netif_start_queue(dev); + + spin_unlock_irqrestore(&lp->lock, flags); } @@ -1053,7 +1074,7 @@ pcnet32_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct pcnet32_private *lp = dev->priv; - unsigned int ioaddr = dev->base_addr; + unsigned long ioaddr = dev->base_addr; u16 status; int entry; unsigned long flags; @@ -1307,7 +1328,8 @@ skb_put (skb, pkt_len); lp->rx_skbuff[entry] = newskb; newskb->dev = dev; - lp->rx_dma_addr[entry] = pci_map_single(lp->pci_dev, newskb->tail, newskb->len, PCI_DMA_FROMDEVICE); + pci_unmap_single(lp->pci_dev, lp->rx_dma_addr[entry], PKT_BUF_SZ-2, PCI_DMA_FROMDEVICE); + lp->rx_dma_addr[entry] = pci_map_single(lp->pci_dev, newskb->tail, PKT_BUF_SZ-2, PCI_DMA_FROMDEVICE); lp->rx_ring[entry].base = le32_to_cpu(lp->rx_dma_addr[entry]); rx_in_place = 1; } else @@ -1359,7 +1381,7 @@ static int pcnet32_close(struct net_device *dev) { - unsigned long ioaddr = dev->base_addr; + unsigned long ioaddr = dev->base_addr, flags; struct pcnet32_private *lp = dev->priv; int i; @@ -1380,13 +1402,23 @@ */ lp->a.write_bcr (ioaddr, 20, 4); + /* + * FIXME: What happens if the bcr write is posted, the buffers are + * freed and there is still incoming DMA traffic + */ + +#warning "PCI posting bug" + free_irq(dev->irq, dev); - + + /* Lock after free_irq to avoid deadlock with interrupt handler. */ + spin_lock_irqsave(&lp->lock, flags); + /* free all allocated skbuffs */ for (i = 0; i < RX_RING_SIZE; i++) { lp->rx_ring[i].status = 0; if (lp->rx_skbuff[i]) { - pci_unmap_single(lp->pci_dev, lp->rx_dma_addr[i], lp->rx_skbuff[i]->len, PCI_DMA_FROMDEVICE); + pci_unmap_single(lp->pci_dev, lp->rx_dma_addr[i], PKT_BUF_SZ-2, PCI_DMA_FROMDEVICE); dev_kfree_skb(lp->rx_skbuff[i]); } lp->rx_skbuff[i] = NULL; @@ -1402,6 +1434,8 @@ lp->tx_dma_addr[i] = 0; } + spin_unlock_irqrestore(&lp->lock, flags); + MOD_DEC_USE_COUNT; return 0; @@ -1479,9 +1513,10 @@ */ static void pcnet32_set_multicast_list(struct net_device *dev) { - unsigned long ioaddr = dev->base_addr; + unsigned long ioaddr = dev->base_addr, flags; struct pcnet32_private *lp = dev->priv; + spin_lock_irqsave(&lp->lock, flags); if (dev->flags&IFF_PROMISC) { /* Log any net taps. */ printk(KERN_INFO "%s: Promiscuous mode enabled.\n", dev->name); @@ -1494,6 +1529,7 @@ lp->a.write_csr (ioaddr, 0, 0x0004); /* Temporarily stop the lance. */ pcnet32_restart(dev, 0x0042); /* Resume normal operation */ + spin_unlock_irqrestore(&lp->lock, flags); } #ifdef HAVE_PRIVATE_IOCTL @@ -1501,26 +1537,32 @@ { unsigned long ioaddr = dev->base_addr; struct pcnet32_private *lp = dev->priv; - u16 *data = (u16 *)&rq->ifr_data; + struct mii_ioctl_data *data = (struct mii_ioctl_data *)&rq->ifr_data; int phyaddr = lp->a.read_bcr (ioaddr, 33); if (lp->mii) { switch(cmd) { - case SIOCDEVPRIVATE: /* Get the address of the PHY in use. */ - data[0] = (phyaddr >> 5) & 0x1f; + case SIOCGMIIPHY: /* Get the address of the PHY in use. */ + case SIOCDEVPRIVATE: /* binary compat, remove in 2.5 */ + data->phy_id = (phyaddr >> 5) & 0x1f; /* Fall Through */ - case SIOCDEVPRIVATE+1: /* Read the specified MII register. */ - lp->a.write_bcr (ioaddr, 33, ((data[0] & 0x1f) << 5) | (data[1] & 0x1f)); - data[3] = lp->a.read_bcr (ioaddr, 34); + + case SIOCGMIIREG: /* Read the specified MII register. */ + case SIOCDEVPRIVATE+1: /* binary compat, remove in 2.5 */ + lp->a.write_bcr (ioaddr, 33, ((data->phy_id & 0x1f) << 5) | (data->reg_num & 0x1f)); + data->val_out = lp->a.read_bcr (ioaddr, 34); lp->a.write_bcr (ioaddr, 33, phyaddr); return 0; - case SIOCDEVPRIVATE+2: /* Write the specified MII register */ + + case SIOCSMIIREG: /* Write the specified MII register */ + case SIOCDEVPRIVATE+2: /* binary compat, remove in 2.5 */ if (!capable(CAP_NET_ADMIN)) return -EPERM; - lp->a.write_bcr (ioaddr, 33, ((data[0] & 0x1f) << 5) | (data[1] & 0x1f)); - lp->a.write_bcr (ioaddr, 34, data[2]); + lp->a.write_bcr (ioaddr, 33, ((data->phy_id & 0x1f) << 5) | (data->reg_num & 0x1f)); + lp->a.write_bcr (ioaddr, 34, data->val_in); lp->a.write_bcr (ioaddr, 33, phyaddr); return 0; + default: return -EOPNOTSUPP; } diff -uNr --exclude=CVS ../kernel.org/linux/drivers/net/tokenring/olympic.c linuxppc64_2_4/drivers/net/tokenring/olympic.c --- ../kernel.org/linux/drivers/net/tokenring/olympic.c Thu Sep 13 18:04:43 2001 +++ linuxppc64_2_4/drivers/net/tokenring/olympic.c Thu Oct 25 07:44:03 2001 @@ -737,7 +737,7 @@ } else { if (buffer_cnt == 1) { - skb = dev_alloc_skb(olympic_priv->pkt_buf_sz) ; + skb = dev_alloc_skb(max_t(int, olympic_priv->pkt_buf_sz,length)) ; } else { skb = dev_alloc_skb(length) ; } @@ -1722,4 +1722,4 @@ module_init(olympic_pci_init) ; module_exit(olympic_pci_cleanup) ; -MODULE_LICENSE("GPL"); \ No newline at end of file +MODULE_LICENSE("GPL"); diff -uNr --exclude=CVS ../kernel.org/linux/drivers/pci/pci.c linuxppc64_2_4/drivers/pci/pci.c --- ../kernel.org/linux/drivers/pci/pci.c Fri Oct 12 17:35:53 2001 +++ linuxppc64_2_4/drivers/pci/pci.c Fri Oct 26 02:25:19 2001 @@ -969,10 +969,10 @@ res = child->resource[0]; pci_read_config_byte(dev, PCI_IO_BASE, &io_base_lo); pci_read_config_byte(dev, PCI_IO_LIMIT, &io_limit_lo); - base = (io_base_lo & PCI_IO_RANGE_MASK) << 8; - limit = (io_limit_lo & PCI_IO_RANGE_MASK) << 8; + base = (unsigned long)(io_base_lo & PCI_IO_RANGE_MASK) << 8; + limit = (unsigned long)(io_limit_lo & PCI_IO_RANGE_MASK) << 8; - if ((base & PCI_IO_RANGE_TYPE_MASK) == PCI_IO_RANGE_TYPE_32) { + if ((io_base_lo & PCI_IO_RANGE_TYPE_MASK) == PCI_IO_RANGE_TYPE_32) { u16 io_base_hi, io_limit_hi; pci_read_config_word(dev, PCI_IO_BASE_UPPER16, &io_base_hi); pci_read_config_word(dev, PCI_IO_LIMIT_UPPER16, &io_limit_hi); @@ -997,8 +997,8 @@ res = child->resource[1]; pci_read_config_word(dev, PCI_MEMORY_BASE, &mem_base_lo); pci_read_config_word(dev, PCI_MEMORY_LIMIT, &mem_limit_lo); - base = (mem_base_lo & PCI_MEMORY_RANGE_MASK) << 16; - limit = (mem_limit_lo & PCI_MEMORY_RANGE_MASK) << 16; + base = (unsigned long)(mem_base_lo & PCI_MEMORY_RANGE_MASK) << 16; + limit = (unsigned long)(mem_limit_lo & PCI_MEMORY_RANGE_MASK) << 16; if (base && base <= limit) { res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM; res->start = base; @@ -1013,16 +1013,16 @@ res = child->resource[2]; pci_read_config_word(dev, PCI_PREF_MEMORY_BASE, &mem_base_lo); pci_read_config_word(dev, PCI_PREF_MEMORY_LIMIT, &mem_limit_lo); - base = (mem_base_lo & PCI_PREF_RANGE_MASK) << 16; - limit = (mem_limit_lo & PCI_PREF_RANGE_MASK) << 16; + base = (unsigned long)(mem_base_lo & PCI_PREF_RANGE_MASK) << 16; + limit = (unsigned long)(mem_limit_lo & PCI_PREF_RANGE_MASK) << 16; if ((mem_base_lo & PCI_PREF_RANGE_TYPE_MASK) == PCI_PREF_RANGE_TYPE_64) { u32 mem_base_hi, mem_limit_hi; pci_read_config_dword(dev, PCI_PREF_BASE_UPPER32, &mem_base_hi); pci_read_config_dword(dev, PCI_PREF_LIMIT_UPPER32, &mem_limit_hi); #if BITS_PER_LONG == 64 - base |= ((long) mem_base_hi) << 32; - limit |= ((long) mem_limit_hi) << 32; + base |= ((unsigned long) mem_base_hi) << 32; + limit |= ((unsigned long) mem_limit_hi) << 32; #else if (mem_base_hi || mem_limit_hi) { printk(KERN_ERR "PCI: Unable to handle 64-bit address space for %s\n", child->name); diff -uNr --exclude=CVS ../kernel.org/linux/drivers/scsi/sr_ioctl.c linuxppc64_2_4/drivers/scsi/sr_ioctl.c --- ../kernel.org/linux/drivers/scsi/sr_ioctl.c Mon Oct 15 15:27:42 2001 +++ linuxppc64_2_4/drivers/scsi/sr_ioctl.c Fri Oct 26 02:25:21 2001 @@ -334,7 +334,12 @@ { u_char sr_cmd[10]; int result, target = MINOR(cdi->dev); - unsigned char buffer[32]; + unsigned char *buffer = scsi_malloc(512); + + if (buffer == NULL) { + printk("SCSI DMA pool exhausted."); + return -ENOMEM; + } memset(sr_cmd, 0, sizeof(sr_cmd)); @@ -407,6 +412,7 @@ return -EINVAL; } + scsi_free(buffer, 512); #if 0 if (result) printk("DEBUG: sr_audio: result for ioctl %x: %x\n", cmd, result); diff -uNr --exclude=CVS ../kernel.org/linux/drivers/video/offb.c linuxppc64_2_4/drivers/video/offb.c --- ../kernel.org/linux/drivers/video/offb.c Tue Oct 2 11:10:31 2001 +++ linuxppc64_2_4/drivers/video/offb.c Wed Nov 14 21:23:38 2001 @@ -52,7 +52,8 @@ cmap_r128, /* ATI Rage128 */ cmap_M3A, /* ATI Rage Mobility M3 Head A */ cmap_M3B, /* ATI Rage Mobility M3 Head B */ - cmap_radeon /* ATI Radeon */ + cmap_radeon, /* ATI Radeon */ + cmap_gxt2000 /* IBM GXT2000 */ }; struct fb_info_offb { @@ -64,6 +65,7 @@ volatile unsigned char *cmap_adr; volatile unsigned char *cmap_data; int cmap_type; + int blanked; union { #ifdef FBCON_HAS_CFB16 u16 cfb16[16]; @@ -210,9 +212,11 @@ static int offb_get_cmap(struct fb_cmap *cmap, int kspc, int con, struct fb_info *info) { - if (con == currcon) /* current console? */ + struct fb_info_offb *info2 = (struct fb_info_offb *)info; + + if (con == currcon && !info2->blanked) /* current console? */ return fb_get_cmap(cmap, kspc, offb_getcolreg, info); - else if (fb_display[con].cmap.len) /* non default colormap? */ + if (fb_display[con].cmap.len) /* non default colormap? */ fb_copy_cmap(&fb_display[con].cmap, cmap, kspc ? 0 : 2); else { @@ -240,7 +244,7 @@ if ((err = fb_alloc_cmap(&fb_display[con].cmap, size, 0))) return err; } - if (con == currcon) /* current console? */ + if (con == currcon && !info2->blanked) /* current console? */ return fb_set_cmap(cmap, kspc, offb_setcolreg, info); else fb_copy_cmap(cmap, &fb_display[con].cmap, kspc ? 0 : 1); @@ -445,10 +449,19 @@ info->cmap_adr = ioremap(regbase, 0x1FFF); info->cmap_type = cmap_radeon; } else if (!strncmp(name, "ATY,", 4)) { + /* Hrm... this is bad... any recent ATI not covered + * by the previous cases will get there, while this + * cose is only good for mach64's. Gotta figure out + * a proper fix... --BenH. + */ unsigned long base = address & 0xff000000UL; info->cmap_adr = ioremap(base + 0x7ff000, 0x1000) + 0xcc0; info->cmap_data = info->cmap_adr + 1; info->cmap_type = cmap_m64; + } else if (dp && device_is_compatible(dp, "pci1014,b7")) { + unsigned long regbase = dp->addrs[0].address; + info->cmap_adr = ioremap(regbase + 0x6000, 0x1000); + info->cmap_type = cmap_gxt2000; } fix->visual = info->cmap_adr ? FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_STATIC_PSEUDOCOLOR; @@ -620,8 +633,10 @@ static int offbcon_switch(int con, struct fb_info *info) { + struct fb_info_offb *info2 = (struct fb_info_offb *)info; + /* Do we have to save the colormap? */ - if (fb_display[currcon].cmap.len) + if (fb_display[currcon].cmap.len && !info2->blanked) fb_get_cmap(&fb_display[currcon].cmap, 1, offb_getcolreg, info); currcon = con; @@ -652,6 +667,15 @@ if (!info2->cmap_adr) return; + if (!info2->blanked) { + if (!blank) + return; + if (fb_display[currcon].cmap.len) + fb_get_cmap(&fb_display[currcon].cmap, 1, offb_getcolreg, info); + } + + info2->blanked = blank; + if (blank) for (i = 0; i < 256; i++) { switch(info2->cmap_type) { @@ -664,26 +688,29 @@ } break; case cmap_M3A: - /* Clear PALETTE_ACCESS_CNTL in DAC_CNTL */ - out_le32((unsigned *)(info2->cmap_adr + 0x58), - in_le32((unsigned *)(info2->cmap_adr + 0x58)) & ~0x20); + /* Clear PALETTE_ACCESS_CNTL in DAC_CNTL */ + out_le32((unsigned *)(info2->cmap_adr + 0x58), + in_le32((unsigned *)(info2->cmap_adr + 0x58)) & ~0x20); case cmap_r128: - /* Set palette index & data */ - out_8(info2->cmap_adr + 0xb0, i); - out_le32((unsigned *)(info2->cmap_adr + 0xb4), 0); - break; + /* Set palette index & data */ + out_8(info2->cmap_adr + 0xb0, i); + out_le32((unsigned *)(info2->cmap_adr + 0xb4), 0); + break; case cmap_M3B: - /* Set PALETTE_ACCESS_CNTL in DAC_CNTL */ - out_le32((unsigned *)(info2->cmap_adr + 0x58), - in_le32((unsigned *)(info2->cmap_adr + 0x58)) | 0x20); - /* Set palette index & data */ - out_8(info2->cmap_adr + 0xb0, i); - out_le32((unsigned *)(info2->cmap_adr + 0xb4), 0); - break; + /* Set PALETTE_ACCESS_CNTL in DAC_CNTL */ + out_le32((unsigned *)(info2->cmap_adr + 0x58), + in_le32((unsigned *)(info2->cmap_adr + 0x58)) | 0x20); + /* Set palette index & data */ + out_8(info2->cmap_adr + 0xb0, i); + out_le32((unsigned *)(info2->cmap_adr + 0xb4), 0); + break; case cmap_radeon: - out_8(info2->cmap_adr + 0xb0, i); - out_le32((unsigned *)(info2->cmap_adr + 0xb4), 0); - break; + out_8(info2->cmap_adr + 0xb0, i); + out_le32((unsigned *)(info2->cmap_adr + 0xb4), 0); + break; + case cmap_gxt2000: + out_le32((unsigned *)info2->cmap_adr + i, 0); + break; } } else @@ -768,6 +795,10 @@ out_8(info2->cmap_adr + 0xb0, regno); out_le32((unsigned *)(info2->cmap_adr + 0xb4), (red << 16 | green << 8 | blue)); + break; + case cmap_gxt2000: + out_le32((unsigned *)info2->cmap_adr + regno, + (red << 16 | green << 8 | blue)); break; } diff -uNr --exclude=CVS ../kernel.org/linux/fs/Config.in linuxppc64_2_4/fs/Config.in --- ../kernel.org/linux/fs/Config.in Thu Oct 4 17:13:18 2001 +++ linuxppc64_2_4/fs/Config.in Wed Nov 14 10:19:35 2001 @@ -43,6 +43,8 @@ dep_mbool ' Microsoft Joliet CDROM extensions' CONFIG_JOLIET $CONFIG_ISO9660_FS tristate 'Minix fs support' CONFIG_MINIX_FS +tristate 'JFS filesystem support' CONFIG_JFS_FS +dep_mbool ' JFS debugging' CONFIG_JFS_DEBUG $CONFIG_JFS_FS tristate 'FreeVxFS file system support (VERITAS VxFS(TM) compatible)' CONFIG_VXFS_FS tristate 'NTFS file system support (read only)' CONFIG_NTFS_FS diff -uNr --exclude=CVS ../kernel.org/linux/fs/Makefile linuxppc64_2_4/fs/Makefile --- ../kernel.org/linux/fs/Makefile Thu Oct 4 17:13:18 2001 +++ linuxppc64_2_4/fs/Makefile Fri Oct 26 02:35:30 2001 @@ -63,6 +63,7 @@ subdir-$(CONFIG_REISERFS_FS) += reiserfs subdir-$(CONFIG_DEVPTS_FS) += devpts subdir-$(CONFIG_SUN_OPENPROMFS) += openpromfs +subdir-$(CONFIG_JFS_FS) += jfs obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o diff -uNr --exclude=CVS ../kernel.org/linux/fs/jfs/Makefile linuxppc64_2_4/fs/jfs/Makefile --- ../kernel.org/linux/fs/jfs/Makefile Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/fs/jfs/Makefile Wed Nov 14 10:19:35 2001 @@ -0,0 +1,20 @@ +# +# Makefile for the Linux JFS filesystem routines. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (not a .c file). +# +# Note 2! The CFLAGS definitions are now in the main makefile. + +O_TARGET := jfs.o +obj-y := super.o file.o inode.o namei.o jfs_mount.o jfs_umount.o \ + jfs_xtree.o jfs_imap.o jfs_debug.o jfs_dmap.o \ + jfs_unicode.o jfs_dtree.o jfs_inode.o \ + jfs_extent.o symlink.o jfs_metapage.o \ + jfs_logmgr.o jfs_txnmgr.o jfs_uniupr.o +obj-m := $(O_TARGET) + +EXTRA_CFLAGS += -D_JFS_4K + +include $(TOPDIR)/Rules.make diff -uNr --exclude=CVS ../kernel.org/linux/fs/jfs/dir.c linuxppc64_2_4/fs/jfs/dir.c --- ../kernel.org/linux/fs/jfs/dir.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/fs/jfs/dir.c Wed Nov 14 10:19:35 2001 @@ -0,0 +1,112 @@ +/* + * + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern int jfs_create(struct inode *, struct dentry *, int); +extern int jfs_mkdir(struct inode *, struct dentry *, int); +extern int jfs_unlink(struct inode *, struct dentry *); +extern int jfs_rmdir(struct inode *, struct dentry *); +extern int jfs_link(struct dentry *, struct inode *, struct dentry *); +extern int jfs_symlink(struct inode *, struct dentry *, const char *); +extern int jfs_rename(struct inode *, struct dentry *, struct inode *, + struct dentry *); +extern int jfs_mknod(struct inode *, struct dentry *, int, int); +extern int jfs_fsync_file(struct file *, struct dentry *, int); + +static ssize_t jfs_dir_read(struct file *filp, + char *buf, size_t count, loff_t * ppos) +{ + return -EISDIR; +} + +struct file_operations jfs_dir_operations = { + fsync: jfs_fsync_file, + read: jfs_dir_read, + readdir: jfs_readdir, +}; + +static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry) +{ + btstack_t btstack; + ino_t inum; + struct inode *ip; + component_t key; + const char *name = dentry->d_name.name; + int len = dentry->d_name.len; + int rc; + + jFYI(1, ("jfs_lookup: name = %s\n", name)); + + + if ((name[0] == '.') && (len == 1)) + inum = dip->i_ino; + else if (strcmp(name, "..") == 0) + inum = PARENT(dip); + else { + if ((rc = + get_UCSname(&key, dentry, JFS_SBI(dip->i_sb)->nls_tab))) + return ERR_PTR(-rc); + IREAD_LOCK(dip); + rc = dtSearch(dip, &key, &inum, &btstack, JFS_LOOKUP); + IREAD_UNLOCK(dip); + free_UCSname(&key); + if (rc == ENOENT) { + d_add(dentry, NULL); + return ERR_PTR(0); + } else if (rc) { + jERROR(1, + ("jfs_lookup: dtSearch returned %d\n", rc)); + return ERR_PTR(-rc); + } + } + + ip = iget(dip->i_sb, inum); + if (ip == NULL) { + jERROR(1, + ("jfs_lookup: iget failed on inum %d\n", + (uint) inum)); + return ERR_PTR(-EACCES); + } + + d_add(dentry, ip); + + return ERR_PTR(0); +} + +struct inode_operations jfs_dir_inode_operations = { + create: jfs_create, + lookup: jfs_lookup, + link: jfs_link, + unlink: jfs_unlink, + symlink: jfs_symlink, + mkdir: jfs_mkdir, + rmdir: jfs_rmdir, + mknod: jfs_mknod, + rename: jfs_rename, +}; diff -uNr --exclude=CVS ../kernel.org/linux/fs/jfs/file.c linuxppc64_2_4/fs/jfs/file.c --- ../kernel.org/linux/fs/jfs/file.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/fs/jfs/file.c Wed Nov 14 10:19:35 2001 @@ -0,0 +1,99 @@ +/* + * + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include + +extern int jfs_commit_inode(struct inode *, int); + +int jfs_fsync(struct file *file, struct dentry *dentry, int datasync) +{ + struct inode *inode = dentry->d_inode; + int rc = 0; + + rc = fsync_inode_data_buffers(inode); + + if (!(inode->i_state & I_DIRTY)) + return rc; + if (datasync || !(inode->i_state & I_DIRTY_DATASYNC)) + return rc; + + IWRITE_LOCK(inode); + rc |= jfs_commit_inode(inode, 1); + IWRITE_UNLOCK(inode); + + return rc ? -EIO : 0; +} + +struct file_operations jfs_file_operations = { + write: generic_file_write, + read: generic_file_read, + mmap: generic_file_mmap, + fsync: jfs_fsync, +}; + +/* + * Guts of jfs_truncate. Called with locks already held. Can be called + * with directory for truncating directory index table. + */ +void jfs_truncate_nolock(struct inode *ip, loff_t length) +{ + loff_t newsize; + int tid = 0; + + ASSERT(length >= 0); + + if (test_cflag(COMMIT_Nolink, ip)) { + xtTruncate(0, ip, length, COMMIT_WMAP); + return; + } + + do { + txBegin(ip->i_sb, &tid, 0); + + newsize = xtTruncate(tid, ip, length, + COMMIT_TRUNCATE | COMMIT_PWMAP); + if (newsize < 0) { + txEnd(tid); + break; + } + + ip->i_mtime = ip->i_ctime = CURRENT_TIME; + mark_inode_dirty(ip); + + txCommit(tid, 1, &ip, 0); + txEnd(tid); + } while (newsize > length); /* Truncate isn't always atomic */ +} + +static void jfs_truncate(struct inode *ip) +{ + jFYI(1, ("jfs_truncate: size = 0x%lx\n", (ulong) ip->i_size)); + + IWRITE_LOCK(ip); + jfs_truncate_nolock(ip, ip->i_size); + IWRITE_UNLOCK(ip); +} + +struct inode_operations jfs_file_inode_operations = { + truncate: jfs_truncate, +}; diff -uNr --exclude=CVS ../kernel.org/linux/fs/jfs/inode.c linuxppc64_2_4/fs/jfs/inode.c --- ../kernel.org/linux/fs/jfs/inode.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/fs/jfs/inode.c Wed Nov 14 10:19:35 2001 @@ -0,0 +1,323 @@ +/* + * + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +extern struct inode_operations jfs_dir_inode_operations; +extern struct inode_operations jfs_file_inode_operations; +extern struct inode_operations jfs_symlink_inode_operations; +extern struct file_operations jfs_dir_operations; +extern struct file_operations jfs_file_operations; +struct address_space_operations jfs_aops; +extern int freeZeroLink(struct inode *); + +kmem_cache_t *jfs_inode_cachep; + + +void jfs_put_inode(struct inode *inode) +{ + jFYI(1, ("In jfs_put_inode, inode = 0x%p\n", inode)); +} + +void jfs_read_inode(struct inode *inode) +{ + struct jfs_inode_info *jfs_ip; + + jfs_ip = kmem_cache_alloc(jfs_inode_cachep, GFP_NOFS); + inode->u.generic_ip = jfs_ip; + if (!jfs_ip) { + printk(__FUNCTION__ ": failed."); + goto bad_inode; + } + memset(jfs_ip, 0, sizeof(struct jfs_inode_info)); + + jFYI(1, ("In jfs_read_inode, inode = 0x%p\n", inode)); + + if (diRead(inode)) + goto bad_inode_free; + + if (S_ISREG(inode->i_mode)) { + inode->i_op = &jfs_file_inode_operations; + inode->i_fop = &jfs_file_operations; + inode->i_mapping->a_ops = &jfs_aops; + } else if (S_ISDIR(inode->i_mode)) { + inode->i_op = &jfs_dir_inode_operations; + inode->i_fop = &jfs_dir_operations; + inode->i_mapping->a_ops = &jfs_aops; + inode->i_mapping->gfp_mask = GFP_NOFS | __GFP_HIGHMEM; + } else if (S_ISLNK(inode->i_mode)) { + if (inode->i_size > IDATASIZE) { + inode->i_op = &page_symlink_inode_operations; + inode->i_mapping->a_ops = &jfs_aops; + } else + inode->i_op = &jfs_symlink_inode_operations; + } else { + init_special_inode(inode, inode->i_mode, + kdev_t_to_nr(inode->i_rdev)); + } + + return; + +bad_inode_free: + kmem_cache_free(jfs_inode_cachep, JFS_IP(inode)); +bad_inode: + make_bad_inode(inode); +} + +/* This define is from fs/open.c */ +#define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) + +/* + * Workhorse of both fsync & write_inode + */ +int jfs_commit_inode(struct inode *inode, int wait) +{ + int rc = 0; + int tid; + static int noisy = 5; + + jFYI(1, ("In jfs_commit_inode, inode = 0x%p\n", inode)); + + /* + * Don't commit if inode has been committed since last being + * marked dirty, or if it has been deleted. + */ + if (test_cflag(COMMIT_Nolink, inode) || + !test_cflag(COMMIT_Dirty, inode)) + return 0; + + if (isReadOnly(inode)) { + /* kernel allows writes to devices on read-only + * partitions and may think inode is dirty + */ + if(!special_file(inode->i_mode) && noisy) { + jERROR(1,("jfs_commit_inode(0x%p) called on " + "read-only volume\n", inode)); + jERROR(1,("Is remount racy?\n")); + noisy--; + } + return 0; + } + + txBegin(inode->i_sb, &tid, COMMIT_INODE); + rc = txCommit(tid, 1, &inode, wait ? COMMIT_SYNC : 0); + txEnd(tid); + return -rc; +} + +void jfs_write_inode(struct inode *inode, int wait) +{ + /* + * If COMMIT_DIRTY is not set, the inode isn't really dirty. + * It has been committed since the last change, but was still + * on the dirty inode list + */ + if (test_cflag(COMMIT_Nolink, inode) || + !test_cflag(COMMIT_Dirty, inode)) + return; + + IWRITE_LOCK(inode); + + if (jfs_commit_inode(inode, wait)) { + jERROR(1, ("jfs_write_inode: jfs_commit_inode failed!\n")); + } + + IWRITE_UNLOCK(inode); +} + +void jfs_delete_inode(struct inode *inode) +{ + jFYI(1, ("In jfs_delete_inode, inode = 0x%p\n", inode)); + + IWRITE_LOCK(inode); + if (test_cflag(COMMIT_Freewmap, inode)) + freeZeroLink(inode); + + diFree(inode); + IWRITE_UNLOCK(inode); + + clear_inode(inode); +} + +void jfs_dirty_inode(struct inode *inode) +{ + static int noisy = 5; + + if (isReadOnly(inode)) { + if(!special_file(inode->i_mode) && noisy) { + /* kernel allows writes to devices on read-only + * partitions and may try to mark inode dirty + */ + jERROR(1,("jfs_dirty_inode called on " + "read-only volume\n")); + jERROR(1,("Is remount racy?\n")); + noisy--; + } + return; + } + + set_cflag(COMMIT_Dirty, inode); +} + +static int jfs_get_block(struct inode *ip, long lblock, + struct buffer_head *bh_result, int create) +{ + s64 lblock64 = lblock; + int no_size_check = 0; + int rc = 0; + int take_locks; + xad_t xad; + s64 xaddr; + int xflag; + s32 xlen; + + /* + * If this is a special inode (imap, dmap) or directory, + * the lock should already be taken + */ + take_locks = ((JFS_IP(ip)->fileset != AGGREGATE_I) && + !S_ISDIR(ip->i_mode)); + /* + * Take appropriate lock on inode + */ + if (take_locks) { + if (create) + IWRITE_LOCK(ip); + else + IREAD_LOCK(ip); + } + + /* + * A directory's "data" is the inode index table, but i_size is the + * size of the d-tree, so don't check the offset against i_size + */ + if (S_ISDIR(ip->i_mode)) + no_size_check = 1; + + if ((no_size_check || + ((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size)) && + (xtLookup(ip, lblock64, 1, &xflag, &xaddr, &xlen, no_size_check) + == 0) && xlen) { + if (xflag & XAD_NOTRECORDED) { + if (!create) + /* + * Allocated but not recorded, read treats + * this as a hole + */ + goto unlock; +#ifdef _JFS_4K + XADoffset(&xad, lblock64); + XADlength(&xad, xlen); + XADaddress(&xad, xaddr); +#else /* _JFS_4K */ + /* + * As long as block size = 4K, this isn't a problem. + * We should mark the whole page not ABNR, but how + * will we know to mark the other blocks BH_New? + */ + BUG(); +#endif /* _JFS_4K */ + rc = extRecord(ip, &xad); + if (rc) + goto unlock; + bh_result->b_state |= (1UL << BH_New); + } + + bh_result->b_dev = ip->i_dev; + bh_result->b_blocknr = xaddr; + bh_result->b_state |= (1UL << BH_Mapped); + goto unlock; + } + if (!create) + goto unlock; + + /* + * Allocate a new block + */ +#ifdef _JFS_4K + if ((rc = + extHint(ip, lblock64 << ip->i_sb->s_blocksize_bits, &xad))) + goto unlock; + rc = extAlloc(ip, 1, lblock64, &xad, FALSE); + if (rc) + goto unlock; + + bh_result->b_dev = ip->i_dev; + bh_result->b_blocknr = addressXAD(&xad); + bh_result->b_state |= ((1UL << BH_Mapped) | (1UL << BH_New)); + +#else /* _JFS_4K */ + /* + * We need to do whatever it takes to keep all but the last buffers + * in 4K pages - see jfs_write.c + */ + BUG(); +#endif /* _JFS_4K */ + + unlock: + /* + * Release lock on inode + */ + if (take_locks) { + if (create) + IWRITE_UNLOCK(ip); + else + IREAD_UNLOCK(ip); + } + return -rc; +} + +static int jfs_writepage(struct page *page) +{ + return block_write_full_page(page, jfs_get_block); +} + +static int jfs_readpage(struct file *file, struct page *page) +{ + return block_read_full_page(page, jfs_get_block); +} + +static int jfs_prepare_write(struct file *file, + struct page *page, unsigned from, unsigned to) +{ + return block_prepare_write(page, from, to, jfs_get_block); +} + +static int jfs_bmap(struct address_space *mapping, long block) +{ + return generic_block_bmap(mapping, block, jfs_get_block); +} + +struct address_space_operations jfs_aops = { + readpage: jfs_readpage, + writepage: jfs_writepage, + sync_page: block_sync_page, + prepare_write: jfs_prepare_write, + commit_write: generic_commit_write, + bmap: jfs_bmap, +}; diff -uNr --exclude=CVS ../kernel.org/linux/fs/jfs/jfs_debug.c linuxppc64_2_4/fs/jfs/jfs_debug.c --- ../kernel.org/linux/fs/jfs/jfs_debug.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/fs/jfs/jfs_debug.c Wed Nov 14 10:19:35 2001 @@ -0,0 +1,132 @@ +/* + * + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include +#include +#include +#include +#include +#define __NO_VERSION__ +#include +#include +#include + +#ifdef CONFIG_JFS_DEBUG + +void dump_mem(char *label, void *data, int length) +{ + int i, j; + int *intptr = data; + char *charptr = data; + char buf[10], line[80]; + + printk("%s: dump of %d bytes of data at 0x%p\n\n", label, length, + data); + for (i = 0; i < length; i += 16) { + line[0] = 0; + for (j = 0; (j < 4) && (i + j * 4 < length); j++) { + sprintf(buf, " %08x", intptr[i / 4 + j]); + strcat(line, buf); + } + buf[0] = ' '; + buf[2] = 0; + for (j = 0; (j < 16) && (i + j < length); j++) { + buf[1] = + isprint(charptr[i + j]) ? charptr[i + j] : '.'; + strcat(line, buf); + } + printk("%s\n", line); + } +} + +#if CONFIG_PROC_FS +struct proc_dir_entry *proc_fs_jfs; +read_proc_t jfs_txanchor_read; +read_proc_t jfs_logmgr_read; +static read_proc_t jfsFYI_read; +static write_proc_t jfsFYI_write; + +void jfs_proc_init(void) +{ + struct proc_dir_entry *pde; + + proc_fs_jfs = proc_mkdir("jfs", proc_root_fs); + if (proc_fs_jfs == NULL) + return; + + proc_fs_jfs->owner = THIS_MODULE; + create_proc_read_entry("TxAnchor", 0, proc_fs_jfs, + jfs_txanchor_read, 0); + create_proc_read_entry("logmgr", 0, proc_fs_jfs, + jfs_logmgr_read, 0); + pde = + create_proc_read_entry("jfsFYI", 0, proc_fs_jfs, jfsFYI_read, + 0); + if (pde) + pde->write_proc = jfsFYI_write; + +} + +void jfs_proc_clean(void) +{ + if (proc_fs_jfs == NULL) + return; + + remove_proc_entry("logmgr", proc_fs_jfs); + remove_proc_entry("TxAnchor", proc_fs_jfs); + remove_proc_entry("jfsFYI", proc_fs_jfs); + remove_proc_entry("jfs", proc_root_fs); +} +static int jfsFYI_read(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + int len; + + len = sprintf(page, "%d\n", jfsFYI); + + len -= off; + *start = page + off; + + if (len > count) + len = count; + else + *eof = 1; + + if (len < 0) + len = 0; + + return len; +} +static int jfsFYI_write(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + char c; + int rc; + + rc = get_user(c, buffer); + if (rc) + return rc; + if (c == '0' || c == 'n' || c == 'N') + jfsFYI = 0; + else if (c == '1' || c == 'y' || c == 'Y') + jfsFYI = 1; + + return count; +} +#endif /* CONFIG_PROC_FS */ +#endif /* CONFIG_JFS_DEBUG */ diff -uNr --exclude=CVS ../kernel.org/linux/fs/jfs/jfs_dmap.c linuxppc64_2_4/fs/jfs/jfs_dmap.c --- ../kernel.org/linux/fs/jfs/jfs_dmap.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/fs/jfs/jfs_dmap.c Wed Nov 14 10:19:35 2001 @@ -0,0 +1,4190 @@ +/* + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * + * MODULE_NAME: jfs_dmap.c + * + * COMPONENT_NAME: sysjfs + * + * FUNCTION: block allocation map manager + * +*/ + +/* + * Change History : + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Debug code for double-checking block map + */ +/* #define _JFS_DEBUG_DMAP 1 */ + +#ifdef _JFS_DEBUG_DMAP +#define DBINITMAP(size,ipbmap,results) \ + DBinitmap(size,ipbmap,results) +#define DBALLOC(dbmap,mapsize,blkno,nblocks) \ + DBAlloc(dbmap,mapsize,blkno,nblocks) +#define DBFREE(dbmap,mapsize,blkno,nblocks) \ + DBFree(dbmap,mapsize,blkno,nblocks) +#define DBALLOCCK(dbmap,mapsize,blkno,nblocks) \ + DBAllocCK(dbmap,mapsize,blkno,nblocks) +#define DBFREECK(dbmap,mapsize,blkno,nblocks) \ + DBFreeCK(dbmap,mapsize,blkno,nblocks) + +static void DBinitmap(s64, struct inode *, u32 **); +static void DBAlloc(uint *, s64, s64, s64); +static void DBFree(uint *, s64, s64, s64); +static void DBAllocCK(uint *, s64, s64, s64); +static void DBFreeCK(uint *, s64, s64, s64); +#else +#define DBINITMAP(size,ipbmap,results) +#define DBALLOC(dbmap, mapsize, blkno, nblocks) +#define DBFREE(dbmap, mapsize, blkno, nblocks) +#define DBALLOCCK(dbmap, mapsize, blkno, nblocks) +#define DBFREECK(dbmap, mapsize, blkno, nblocks) +#endif /* _JFS_DEBUG_DMAP */ + +/* + * SERIALIZATION of the Block Allocation Map. + * + * the working state of the block allocation map is accessed in + * two directions: + * + * 1) allocation and free requests that start at the dmap + * level and move up through the dmap control pages (i.e. + * the vast majority of requests). + * + * 2) allocation requests that start at dmap control page + * level and work down towards the dmaps. + * + * the serialization scheme used here is as follows. + * + * requests which start at the bottom are serialized against each + * other through buffers and each requests holds onto its buffers + * as it works it way up from a single dmap to the required level + * of dmap control page. + * requests that start at the top are serialized against each other + * and request that start from the bottom by the multiple read/single + * write inode lock of the bmap inode. requests starting at the top + * take this lock in write mode while request starting at the bottom + * take the lock in read mode. a single top-down request may proceed + * exclusively while multiple bottoms-up requests may proceed + * simultaneously (under the protection of busy buffers). + * + * in addition to information found in dmaps and dmap control pages, + * the working state of the block allocation map also includes read/ + * write information maintained in the bmap descriptor (i.e. total + * free block count, allocation group level free block counts). + * a single exclusive lock (BMAP_LOCK) is used to guard this information + * in the face of multiple-bottoms up requests. + * (lock ordering: IREAD_LOCK, BMAP_LOCK); + * + * accesses to the persistent state of the block allocation map (limited + * to the persistent bitmaps in dmaps) is guarded by (busy) buffers. + */ + +#define BMAP_LOCK_INIT(bmp) init_MUTEX(&bmp->db_bmaplock) +#define BMAP_LOCK(bmp) down(&bmp->db_bmaplock) +#define BMAP_UNLOCK(bmp) up(&bmp->db_bmaplock) + +/* + * forward references + */ +static void dbAllocBits(bmap_t * bmp, dmap_t * dp, s64 blkno, int nblocks); +static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval); +static void dbBackSplit(dmtree_t * tp, int leafno); +static void dbJoin(dmtree_t * tp, int leafno, int newval); +static void dbAdjTree(dmtree_t * tp, int leafno, int newval); +static int dbAdjCtl(bmap_t * bmp, s64 blkno, int newval, int alloc, + int level); +static int dbAllocAny(bmap_t * bmp, s64 nblocks, int l2nb, s64 * results); +static int dbAllocNext(bmap_t * bmp, dmap_t * dp, s64 blkno, int nblocks); +static int dbAllocNear(bmap_t * bmp, dmap_t * dp, s64 blkno, int nblocks, + int l2nb, s64 * results); +static int dbAllocDmap(bmap_t * bmp, dmap_t * dp, s64 blkno, int nblocks); +static int dbAllocDmapLev(bmap_t * bmp, dmap_t * dp, int nblocks, int l2nb, + s64 * results); +static int dbAllocAG(bmap_t * bmp, int agno, s64 nblocks, int l2nb, + s64 * results); +static int dbAllocCtl(bmap_t * bmp, s64 nblocks, int l2nb, s64 blkno, + s64 * results); +int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks); +static int dbFindBits(u32 word, int l2nb); +static int dbFindCtl(bmap_t * bmp, int l2nb, int level, s64 * blkno); +static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx); +static void dbFreeBits(bmap_t * bmp, dmap_t * dp, s64 blkno, int nblocks); +static int dbFreeDmap(bmap_t * bmp, dmap_t * dp, s64 blkno, int nblocks); +static int dbMaxBud(u8 * cp); +s64 dbMapFileSizeToMapSize(struct inode *ipbmap); +int blkstol2(s64 nb); +void fsDirty(void); + +int cntlz(u32 value); +int cnttz(u32 word); + +static int dbAllocDmapBU(bmap_t * bmp, dmap_t * dp, s64 blkno, + int nblocks); +static int dbInitDmap(dmap_t * dp, s64 blkno, int nblocks); +static int dbInitDmapTree(dmap_t * dp); +static int dbInitTree(dmaptree_t * dtp); +static int dbInitDmapCtl(dmapctl_t * dcp, int level, int i); +static int dbGetL2AGSize(s64 nblocks); + +/* + * buddy table + * + * table used for determining buddy sizes within characters of + * dmap bitmap words. the characters themselves serve as indexes + * into the table, with the table elements yielding the maximum + * binary buddy of free bits within the character. + */ +signed char budtab[256] = { + 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, + 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, + 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, + 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, + 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, + 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, + 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, + 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, + 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, -1 +}; + + +/* + * NAME: dbMount() + * + * FUNCTION: initializate the block allocation map. + * + * memory is allocated for the in-core bmap descriptor and + * the in-core descriptor is initialized from disk. + * + * PARAMETERS: + * ipbmap - pointer to in-core inode for the block map. + * + * RETURN VALUES: + * 0 - success + * ENOMEM - insufficient memory + * EIO - i/o error + */ +int dbMount(struct inode *ipbmap) +{ + bmap_t *bmp; + dbmap_t *dbmp_le; + metapage_t *mp; + int i; + + /* + * allocate/initialize the in-memory bmap descriptor + */ + /* allocate memory for the in-memory bmap descriptor */ + bmp = kmalloc(sizeof(bmap_t), GFP_KERNEL); + if (bmp == NULL) + return (ENOMEM); + + /* read the on-disk bmap descriptor. */ + mp = read_metapage(ipbmap, + BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage, + PSIZE, 0); + if (mp == NULL) { + kfree(bmp); + return (EIO); + } + + /* copy the on-disk bmap descriptor to its in-memory version. */ + dbmp_le = (dbmap_t *) mp->data; + bmp->db_mapsize = le64_to_cpu(dbmp_le->dn_mapsize); + bmp->db_nfree = le64_to_cpu(dbmp_le->dn_nfree); + bmp->db_l2nbperpage = le32_to_cpu(dbmp_le->dn_l2nbperpage); + bmp->db_numag = le32_to_cpu(dbmp_le->dn_numag); + bmp->db_maxlevel = le32_to_cpu(dbmp_le->dn_maxlevel); + bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag); + bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref); + bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel); + bmp->db_agheigth = le32_to_cpu(dbmp_le->dn_agheigth); + bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth); + bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart); + bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size); + for (i = 0; i < MAXAG; i++) + bmp->db_agfree[i] = le64_to_cpu(dbmp_le->dn_agfree[i]); + bmp->db_agsize = le64_to_cpu(dbmp_le->dn_agsize); + bmp->db_maxfreebud = dbmp_le->dn_maxfreebud; + + /* release the buffer. */ + release_metapage(mp); + + /* bind the bmap inode and the bmap descriptor to each other. */ + bmp->db_ipbmap = ipbmap; + JFS_SBI(ipbmap->i_sb)->bmap = bmp; + + DBINITMAP(bmp->db_mapsize, ipbmap, &bmp->db_DBmap); + + /* + * allocate/initialize the bmap lock + */ + BMAP_LOCK_INIT(bmp); + + return (0); +} + + +/* + * NAME: dbUnmount() + * + * FUNCTION: terminate the block allocation map in preparation for + * file system unmount. + * + * the in-core bmap descriptor is written to disk and + * the memory for this descriptor is freed. + * + * PARAMETERS: + * ipbmap - pointer to in-core inode for the block map. + * + * RETURN VALUES: + * 0 - success + * EIO - i/o error + */ +int dbUnmount(struct inode *ipbmap, int mounterror) +{ + bmap_t *bmp = JFS_SBI(ipbmap->i_sb)->bmap; + + if (!(mounterror || isReadOnly(ipbmap))) + dbSync(ipbmap); + + /* + * Invalidate the page cache buffers + */ + truncate_inode_pages(ipbmap->i_mapping, 0); + + /* free the memory for the in-memory bmap. */ + kfree(bmp); + + return (0); +} + +/* + * dbSync() + */ +int dbSync(struct inode *ipbmap) +{ + dbmap_t *dbmp_le; + bmap_t *bmp = JFS_SBI(ipbmap->i_sb)->bmap; + metapage_t *mp; + int i; + + /* + * write bmap global control page + */ + /* get the buffer for the on-disk bmap descriptor. */ + mp = read_metapage(ipbmap, + BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage, + PSIZE, 0); + if (mp == NULL) { + jERROR(1,("dbSync: read_metapage failed!\n")); + return (EIO); + } + /* copy the in-memory version of the bmap to the on-disk version */ + dbmp_le = (dbmap_t *) mp->data; + dbmp_le->dn_mapsize = cpu_to_le64(bmp->db_mapsize); + dbmp_le->dn_nfree = cpu_to_le64(bmp->db_nfree); + dbmp_le->dn_l2nbperpage = cpu_to_le32(bmp->db_l2nbperpage); + dbmp_le->dn_numag = cpu_to_le32(bmp->db_numag); + dbmp_le->dn_maxlevel = cpu_to_le32(bmp->db_maxlevel); + dbmp_le->dn_maxag = cpu_to_le32(bmp->db_maxag); + dbmp_le->dn_agpref = cpu_to_le32(bmp->db_agpref); + dbmp_le->dn_aglevel = cpu_to_le32(bmp->db_aglevel); + dbmp_le->dn_agheigth = cpu_to_le32(bmp->db_agheigth); + dbmp_le->dn_agwidth = cpu_to_le32(bmp->db_agwidth); + dbmp_le->dn_agstart = cpu_to_le32(bmp->db_agstart); + dbmp_le->dn_agl2size = cpu_to_le32(bmp->db_agl2size); + for (i = 0; i < MAXAG; i++) + dbmp_le->dn_agfree[i] = cpu_to_le64(bmp->db_agfree[i]); + dbmp_le->dn_agsize = cpu_to_le64(bmp->db_agsize); + dbmp_le->dn_maxfreebud = bmp->db_maxfreebud; + + /* write the buffer */ + write_metapage(mp); + + /* + * write out dirty pages of bmap + */ + fsync_inode_data_buffers(ipbmap); + + ipbmap->i_state |= I_DIRTY; + diWriteSpecial(ipbmap); + + return (0); +} + + +/* + * NAME: dbFree() + * + * FUNCTION: free the specified block range from the working block + * allocation map. + * + * the blocks will be free from the working map one dmap + * at a time. + * + * PARAMETERS: + * ip - pointer to in-core inode; + * blkno - starting block number to be freed. + * nblocks - number of blocks to be freed. + * + * RETURN VALUES: + * 0 - success + * EIO - i/o error + */ +int dbFree(struct inode *ip, s64 blkno, s64 nblocks) +{ + metapage_t *mp; + dmap_t *dp; + int nb, rc; + s64 lblkno, rem; + struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; + bmap_t *bmp = JFS_SBI(ip->i_sb)->bmap; + + IREAD_LOCK(ipbmap); + + /* block to be freed better be within the mapsize. */ + assert(blkno + nblocks <= bmp->db_mapsize); + + /* + * free the blocks a dmap at a time. + */ + mp = NULL; + for (rem = nblocks; rem > 0; rem -= nb, blkno += nb) { + /* release previous dmap if any */ + if (mp) { + write_metapage(mp); + } + + /* get the buffer for the current dmap. */ + lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); + mp = read_metapage(ipbmap, lblkno, PSIZE, 0); + if (mp == NULL) { + IREAD_UNLOCK(ipbmap); + return (EIO); + } + dp = (dmap_t *) mp->data; + + /* determine the number of blocks to be freed from + * this dmap. + */ + nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1))); + + DBALLOCCK(bmp->db_DBmap, bmp->db_mapsize, blkno, nb); + + /* free the blocks. */ + if ((rc = dbFreeDmap(bmp, dp, blkno, nb))) { + release_metapage(mp); + IREAD_UNLOCK(ipbmap); + return (rc); + } + + DBFREE(bmp->db_DBmap, bmp->db_mapsize, blkno, nb); + } + + /* write the last buffer. */ + write_metapage(mp); + + IREAD_UNLOCK(ipbmap); + + return (0); +} + + +/* + * NAME: dbUpdatePMap() + * + * FUNCTION: update the allocation state (free or allocate) of the + * specified block range in the persistent block allocation map. + * + * the blocks will be updated in the persistent map one + * dmap at a time. + * + * PARAMETERS: + * ipbmap - pointer to in-core inode for the block map. + * free - TRUE if block range is to be freed from the persistent + * map; FALSE if it is to be allocated. + * blkno - starting block number of the range. + * nblocks - number of contiguous blocks in the range. + * tblk - transaction block; + * + * RETURN VALUES: + * 0 - success + * EIO - i/o error + */ +int +dbUpdatePMap(struct inode *ipbmap, + int free, s64 blkno, s64 nblocks, tblock_t * tblk) +{ + int nblks, dbitno, wbitno, rbits; + int word, nbits, nwords; + bmap_t *bmp = JFS_SBI(ipbmap->i_sb)->bmap; + s64 lblkno, rem, lastlblkno; + u32 mask; + dmap_t *dp; + metapage_t *mp; + log_t *log; + int lsn, difft, diffp; + + /* the blocks better be within the mapsize. */ + assert(blkno + nblocks <= bmp->db_mapsize); + + /* compute delta of transaction lsn from log syncpt */ + lsn = tblk->lsn; + log = (log_t *) JFS_SBI(tblk->sb)->log; + logdiff(difft, lsn, log); + + /* + * update the block state a dmap at a time. + */ + mp = NULL; + lastlblkno = 0; + for (rem = nblocks; rem > 0; rem -= nblks, blkno += nblks) { + /* get the buffer for the current dmap. */ + lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); + if (lblkno != lastlblkno) { + if (mp) { + write_metapage(mp); + } + + mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, + 0); + if (mp == NULL) + return (EIO); + } + dp = (dmap_t *) mp->data; + + /* determine the bit number and word within the dmap of + * the starting block. also determine how many blocks + * are to be updated within this dmap. + */ + dbitno = blkno & (BPERDMAP - 1); + word = dbitno >> L2DBWORD; + nblks = min(rem, (s64)BPERDMAP - dbitno); + + /* update the bits of the dmap words. the first and last + * words may only have a subset of their bits updated. if + * this is the case, we'll work against that word (i.e. + * partial first and/or last) only in a single pass. a + * single pass will also be used to update all words that + * are to have all their bits updated. + */ + for (rbits = nblks; rbits > 0; + rbits -= nbits, dbitno += nbits) { + /* determine the bit number within the word and + * the number of bits within the word. + */ + wbitno = dbitno & (DBWORD - 1); + nbits = min(rbits, DBWORD - wbitno); + + /* check if only part of the word is to be updated. */ + if (nbits < DBWORD) { + /* update (free or allocate) the bits + * in this word. + */ + mask = + (ONES << (DBWORD - nbits) >> wbitno); + if (free) + dp->pmap[word] &= + cpu_to_le32(~mask); + else + dp->pmap[word] |= + cpu_to_le32(mask); + + word += 1; + } else { + /* one or more words are to have all + * their bits updated. determine how + * many words and how many bits. + */ + nwords = rbits >> L2DBWORD; + nbits = nwords << L2DBWORD; + + /* update (free or allocate) the bits + * in these words. + */ + if (free) + memset(&dp->pmap[word], 0, + nwords * 4); + else + memset(&dp->pmap[word], (int) ONES, + nwords * 4); + + word += nwords; + } + } + + /* + * update dmap lsn + */ + if (lblkno == lastlblkno) + continue; + + lastlblkno = lblkno; + + if (mp->lsn != 0) { + /* inherit older/smaller lsn */ + logdiff(diffp, mp->lsn, log); + if (difft < diffp) { + mp->lsn = lsn; + + /* move bp after tblock in logsync list */ + LOGSYNC_LOCK(log); + list_del(&mp->synclist); + list_add(&mp->synclist, &tblk->synclist); + LOGSYNC_UNLOCK(log); + } + + /* inherit younger/larger clsn */ + LOGSYNC_LOCK(log); + logdiff(difft, tblk->clsn, log); + logdiff(diffp, mp->clsn, log); + if (difft > diffp) + mp->clsn = tblk->clsn; + LOGSYNC_UNLOCK(log); + } else { + mp->log = log; + mp->lsn = lsn; + + /* insert bp after tblock in logsync list */ + LOGSYNC_LOCK(log); + + log->count++; + list_add(&mp->synclist, &tblk->synclist); + + mp->clsn = tblk->clsn; + LOGSYNC_UNLOCK(log); + } + } + + /* write the last buffer. */ + if (mp) { + write_metapage(mp); + } + + return (0); +} + + +/* + * NAME: dbNextAG() + * + * FUNCTION: find the preferred allocation group for new allocations. + * + * we try to keep the trailing (rightmost) allocation groups + * free for large allocations. we try to do this by targeting + * new inode allocations towards the leftmost or 'active' + * allocation groups while keeping the rightmost or 'inactive' + * allocation groups free. once the active allocation groups + * have dropped to a certain percentage of free space, we add + * the leftmost inactive allocation group to the active set. + * + * within the active allocation groups, we maintain a preferred + * allocation group which consists of a group with at least + * average free space over the active set. it is the preferred + * group that we target new inode allocation towards. the + * tie-in between inode allocation and block allocation occurs + * as we allocate the first (data) block of an inode and specify + * the inode (block) as the allocation hint for this block. + * + * PARAMETERS: + * ipbmap - pointer to in-core inode for the block map. + * + * RETURN VALUES: + * the preferred allocation group number. + * + * note: only called by dbAlloc(); + */ +int dbNextAG(struct inode *ipbmap) +{ + s64 avgfree, inactfree, actfree, rem; + int actags, inactags, l2agsize; + bmap_t *bmp = JFS_SBI(ipbmap->i_sb)->bmap; + + BMAP_LOCK(bmp); + + /* determine the number of active allocation groups (i.e. + * the number of allocation groups up to and including + * the rightmost allocation group with blocks allocated + * in it. + */ + actags = bmp->db_maxag + 1; + assert(actags <= bmp->db_numag); + + /* get the number of inactive allocation groups (i.e. the + * number of allocation group following the rightmost group + * with allocation in it. + */ + inactags = bmp->db_numag - actags; + + /* determine how many blocks are in the inactive allocation + * groups. in doing this, we must account for the fact that + * the rightmost group might be a partial group (i.e. file + * system size is not a multiple of the group size). + */ + l2agsize = bmp->db_agl2size; + rem = bmp->db_mapsize & (bmp->db_agsize - 1); + inactfree = (inactags + && rem) ? ((inactags - 1) << l2agsize) + + rem : inactags << l2agsize; + + /* now determine how many free blocks are in the active + * allocation groups plus the average number of free blocks + * within the active ags. + */ + actfree = bmp->db_nfree - inactfree; + avgfree = (u32) actfree / (u32) actags; + + /* check if not all of the allocation groups are active. + */ + if (actags < bmp->db_numag) { + /* not all of the allocation groups are active. determine + * if we should extend the active set by 1 (i.e. add the + * group following the current active set). we do so if + * the number of free blocks within the active set is less + * than the allocation group set and average free within + * the active set is less than 60%. we activate a new group + * by setting the allocation group preference to the new + * group. + */ + if (actfree < bmp->db_agsize && + ((avgfree * 100) >> l2agsize) < 60) + bmp->db_agpref = actags; + } else { + /* all allocation groups are in the active set. check if + * the preferred allocation group has average free space. + * if not, re-establish the preferred group as the leftmost + * group with average free space. + */ + if (bmp->db_agfree[bmp->db_agpref] < avgfree) { + for (bmp->db_agpref = 0; bmp->db_agpref < actags; + bmp->db_agpref++) { + if (bmp->db_agfree[bmp->db_agpref] <= + avgfree) + break; + } + assert(bmp->db_agpref < bmp->db_numag); + } + } + + BMAP_UNLOCK(bmp); + + /* return the preferred group. + */ + return (bmp->db_agpref); +} + + +/* + * NAME: dbAlloc() + * + * FUNCTION: attempt to allocate a specified number of contiguous free + * blocks from the working allocation block map. + * + * the block allocation policy uses hints and a multi-step + * approach. + * + * for allocation requests smaller than the number of blocks + * per dmap, we first try to allocate the new blocks + * immediately following the hint. if these blocks are not + * available, we try to allocate blocks near the hint. if + * no blocks near the hint are available, we next try to + * allocate within the same dmap as contains the hint. + * + * if no blocks are available in the dmap or the allocation + * request is larger than the dmap size, we try to allocate + * within the same allocation group as contains the hint. if + * this does not succeed, we finally try to allocate anywhere + * within the aggregate. + * + * we also try to allocate anywhere within the aggregate for + * for allocation requests larger than the allocation group + * size or requests that specify no hint value. + * + * PARAMETERS: + * ip - pointer to in-core inode; + * hint - allocation hint. + * nblocks - number of contiguous blocks in the range. + * results - on successful return, set to the starting block number + * of the newly allocated contiguous range. + * + * RETURN VALUES: + * 0 - success + * ENOSPC - insufficient disk resources + * EIO - i/o error + */ +int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) +{ + int rc, agno; + struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; + bmap_t *bmp; + metapage_t *mp; + s64 lblkno, blkno; + dmap_t *dp; + int l2nb; + s64 mapSize; + + /* assert that nblocks is valid */ + assert(nblocks > 0); + +#ifdef _STILL_TO_PORT + /* DASD limit check F226941 */ + if (OVER_LIMIT(ip, nblocks)) + return ENOSPC; +#endif /* _STILL_TO_PORT */ + + /* get the log2 number of blocks to be allocated. + * if the number of blocks is not a log2 multiple, + * it will be rounded up to the next log2 multiple. + */ + l2nb = BLKSTOL2(nblocks); + + bmp = JFS_SBI(ip->i_sb)->bmap; + +//retry: /* serialize w.r.t.extendfs() */ + mapSize = bmp->db_mapsize; + + /* the hint should be within the map */ + assert(hint < mapSize); + + /* if no hint was specified or the number of blocks to be + * allocated is greater than the allocation group size, try + * to allocate anywhere. + */ + if (hint == 0 || l2nb > bmp->db_agl2size) { + IWRITE_LOCK(ipbmap); + + rc = dbAllocAny(bmp, nblocks, l2nb, results); + if (rc == 0) { + DBALLOC(bmp->db_DBmap, bmp->db_mapsize, *results, + nblocks); + } + + IWRITE_UNLOCK(ipbmap); + return (rc); + } + + /* we would like to allocate close to the hint. adjust the + * hint to the block following the hint since the allocators + * will start looking for free space starting at this point. + * if the hint was the last block of the file system, try to + * allocate in the same allocation group as the hint. + */ + blkno = hint + 1; + if (blkno >= bmp->db_mapsize) { + blkno--; + goto tryag; + } + + /* check if blkno crosses over into a new allocation group. + * if so, check if we should allow allocations within this + * allocation group. we try to keep the trailing (rightmost) + * allocation groups of the file system free for large + * allocations and may want to prevent this allocation from + * spilling over into this space. + */ + if ((blkno & (bmp->db_agsize - 1)) == 0) { + /* check if the AG is beyond the rightmost AG with + * allocations in it. if so, call dbNextAG() to + * determine if the allocation should be allowed + * to proceed within this AG or should be targeted + * to another AG. + */ + agno = blkno >> bmp->db_agl2size; + if (agno > bmp->db_maxag) { + agno = dbNextAG(ipbmap); + blkno = (s64) agno << bmp->db_agl2size; + goto tryag; + } + } + + /* check if the allocation request size can be satisfied from a + * single dmap. if so, try to allocate from the dmap containing + * the hint using a tiered strategy. + */ + if (nblocks <= BPERDMAP) { + IREAD_LOCK(ipbmap); + + /* get the buffer for the dmap containing the hint. + */ + lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); + mp = read_metapage(ipbmap, lblkno, PSIZE, 0); + if (mp == NULL) { + IREAD_UNLOCK(ipbmap); + return (EIO); + } + dp = (dmap_t *) mp->data; + + /* first, try to satisfy the allocation request with the + * blocks beginning at the hint. + */ + if ((rc = + dbAllocNext(bmp, dp, blkno, + (int) nblocks)) != ENOSPC) { + if (rc == 0) { + *results = blkno; + DBALLOC(bmp->db_DBmap, bmp->db_mapsize, + *results, nblocks); + write_metapage(mp); + } else { + assert(rc == EIO); + release_metapage(mp); + } + + IREAD_UNLOCK(ipbmap); + return (rc); + } + + /* next, try to satisfy the allocation request with blocks + * near the hint. + */ + if ((rc = + dbAllocNear(bmp, dp, blkno, (int) nblocks, l2nb, + results)) + != ENOSPC) { + if (rc == 0) { + DBALLOC(bmp->db_DBmap, bmp->db_mapsize, + *results, nblocks); + mark_metapage_dirty(mp); + } + release_metapage(mp); + + IREAD_UNLOCK(ipbmap); + return (rc); + } + + /* try to satisfy the allocation request with blocks within + * the same allocation group as the hint. + */ + if ((rc = + dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results)) + != ENOSPC) { + if (rc == 0) { + DBALLOC(bmp->db_DBmap, bmp->db_mapsize, + *results, nblocks); + mark_metapage_dirty(mp); + } + release_metapage(mp); + + IREAD_UNLOCK(ipbmap); + return (rc); + } + + release_metapage(mp); + IREAD_UNLOCK(ipbmap); + } + + tryag: + IWRITE_LOCK(ipbmap); + + /* determine the allocation group number of the hint and try to + * allocate within this allocation group. if that fails, try to + * allocate anywhere in the map. + */ + agno = blkno >> bmp->db_agl2size; + if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) == ENOSPC) + rc = dbAllocAny(bmp, nblocks, l2nb, results); + if (rc == 0) { + DBALLOC(bmp->db_DBmap, bmp->db_mapsize, *results, nblocks); + } + + IWRITE_UNLOCK(ipbmap); + + return (rc); +} + + +/* + * NAME: dbAllocExact() + * + * FUNCTION: try to allocate the requested extent; + * + * PARAMETERS: + * ip - pointer to in-core inode; + * blkno - extent address; + * nblocks - extent length; + * + * RETURN VALUES: + * 0 - success + * ENOSPC - insufficient disk resources + * EIO - i/o error + */ +int dbAllocExact(struct inode *ip, s64 blkno, int nblocks) +{ + int rc; + struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; + bmap_t *bmp = JFS_SBI(ip->i_sb)->bmap; + dmap_t *dp; + s64 lblkno; + metapage_t *mp; + + IREAD_LOCK(ipbmap); + + /* + * validate extent request: + * + * note: defragfs policy: + * max 64 blocks will be moved. + * allocation request size must be satisfied from a single dmap. + */ + if (nblocks <= 0 || nblocks > BPERDMAP || blkno >= bmp->db_mapsize) { + IREAD_UNLOCK(ipbmap); + return EINVAL; + } + + if (nblocks > ((s64) 1 << bmp->db_maxfreebud)) { + /* the free space is no longer available */ + IREAD_UNLOCK(ipbmap); + return ENOSPC; + } + + /* read in the dmap covering the extent */ + lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); + mp = read_metapage(ipbmap, lblkno, PSIZE, 0); + if (mp == NULL) { + IREAD_UNLOCK(ipbmap); + return (EIO); + } + dp = (dmap_t *) mp->data; + + /* try to allocate the requested extent */ + rc = dbAllocNext(bmp, dp, blkno, nblocks); + + IREAD_UNLOCK(ipbmap); + + if (rc == 0) { + DBALLOC(bmp->db_DBmap, bmp->db_mapsize, blkno, nblocks); + mark_metapage_dirty(mp); + } + release_metapage(mp); + + return (rc); +} + + +/* + * NAME: dbReAlloc() + * + * FUNCTION: attempt to extend a current allocation by a specified + * number of blocks. + * + * this routine attempts to satisfy the allocation request + * by first trying to extend the existing allocation in + * place by allocating the additional blocks as the blocks + * immediately following the current allocation. if these + * blocks are not available, this routine will attempt to + * allocate a new set of contiguous blocks large enough + * to cover the existing allocation plus the additional + * number of blocks required. + * + * PARAMETERS: + * ip - pointer to in-core inode requiring allocation. + * blkno - starting block of the current allocation. + * nblocks - number of contiguous blocks within the current + * allocation. + * addnblocks - number of blocks to add to the allocation. + * results - on successful return, set to the starting block number + * of the existing allocation if the existing allocation + * was extended in place or to a newly allocated contiguous + * range if the existing allocation could not be extended + * in place. + * + * RETURN VALUES: + * 0 - success + * ENOSPC - insufficient disk resources + * EIO - i/o error + */ +int +dbReAlloc(struct inode *ip, + s64 blkno, s64 nblocks, s64 addnblocks, s64 * results) +{ + int rc; + + /* try to extend the allocation in place. + */ + if ((rc = dbExtend(ip, blkno, nblocks, addnblocks)) == 0) { + *results = blkno; + return (0); + } else { + if (rc != ENOSPC) + return (rc); + } + + /* could not extend the allocation in place, so allocate a + * new set of blocks for the entire request (i.e. try to get + * a range of contiguous blocks large enough to cover the + * existing allocation plus the additional blocks.) + */ + return (dbAlloc + (ip, blkno + nblocks - 1, addnblocks + nblocks, results)); +} + + +/* + * NAME: dbExtend() + * + * FUNCTION: attempt to extend a current allocation by a specified + * number of blocks. + * + * this routine attempts to satisfy the allocation request + * by first trying to extend the existing allocation in + * place by allocating the additional blocks as the blocks + * immediately following the current allocation. + * + * PARAMETERS: + * ip - pointer to in-core inode requiring allocation. + * blkno - starting block of the current allocation. + * nblocks - number of contiguous blocks within the current + * allocation. + * addnblocks - number of blocks to add to the allocation. + * + * RETURN VALUES: + * 0 - success + * ENOSPC - insufficient disk resources + * EIO - i/o error + */ +int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks) +{ + struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); + s64 lblkno, lastblkno, extblkno; + uint rel_block; + metapage_t *mp; + dmap_t *dp; + int rc; + struct inode *ipbmap = sbi->ipbmap; + bmap_t *bmp; + + /* + * We don't want a non-aligned extent to cross a page boundary + */ + if (((rel_block = blkno & (sbi->nbperpage - 1))) && + (rel_block + nblocks + addnblocks > sbi->nbperpage)) + return (ENOSPC); + + /* get the last block of the current allocation */ + lastblkno = blkno + nblocks - 1; + + /* determine the block number of the block following + * the existing allocation. + */ + extblkno = lastblkno + 1; + + IREAD_LOCK(ipbmap); + + /* better be within the file system */ + bmp = sbi->bmap; + assert(lastblkno >= 0 && lastblkno < bmp->db_mapsize); + + /* we'll attempt to extend the current allocation in place by + * allocating the additional blocks as the blocks immediately + * following the current allocation. we only try to extend the + * current allocation in place if the number of additional blocks + * can fit into a dmap, the last block of the current allocation + * is not the last block of the file system, and the start of the + * inplace extension is not on an allocation group boundry. + */ + if (addnblocks > BPERDMAP || extblkno >= bmp->db_mapsize || + (extblkno & (bmp->db_agsize - 1)) == 0) { + IREAD_UNLOCK(ipbmap); + return (ENOSPC); + } + + /* get the buffer for the dmap containing the first block + * of the extension. + */ + lblkno = BLKTODMAP(extblkno, bmp->db_l2nbperpage); + mp = read_metapage(ipbmap, lblkno, PSIZE, 0); + if (mp == NULL) { + IREAD_UNLOCK(ipbmap); + return (EIO); + } + + DBALLOCCK(bmp->db_DBmap, bmp->db_mapsize, blkno, nblocks); + dp = (dmap_t *) mp->data; + + /* try to allocate the blocks immediately following the + * current allocation. + */ + rc = dbAllocNext(bmp, dp, extblkno, (int) addnblocks); + + IREAD_UNLOCK(ipbmap); + + /* were we successful ? */ + if (rc == 0) { + DBALLOC(bmp->db_DBmap, bmp->db_mapsize, extblkno, + addnblocks); + write_metapage(mp); + } else { + /* we were not successful */ + release_metapage(mp); + assert(rc == ENOSPC || rc == EIO); + } + + return (rc); +} + + +/* + * NAME: dbAllocNext() + * + * FUNCTION: attempt to allocate the blocks of the specified block + * range within a dmap. + * + * PARAMETERS: + * bmp - pointer to bmap descriptor + * dp - pointer to dmap. + * blkno - starting block number of the range. + * nblocks - number of contiguous free blocks of the range. + * + * RETURN VALUES: + * 0 - success + * ENOSPC - insufficient disk resources + * EIO - i/o error + * + * serialization: IREAD_LOCK(ipbmap) held on entry/exit; + */ +static int dbAllocNext(bmap_t * bmp, dmap_t * dp, s64 blkno, int nblocks) +{ + int dbitno, word, rembits, nb, nwords, wbitno, nw; + int l2size; + s8 *leaf; + u32 mask; + + /* pick up a pointer to the leaves of the dmap tree. + */ + leaf = dp->tree.stree + le32_to_cpu(dp->tree.leafidx); + + /* determine the bit number and word within the dmap of the + * starting block. + */ + dbitno = blkno & (BPERDMAP - 1); + word = dbitno >> L2DBWORD; + + /* check if the specified block range is contained within + * this dmap. + */ + if (dbitno + nblocks > BPERDMAP) + return (ENOSPC); + + /* check if the starting leaf indicates that anything + * is free. + */ + if (leaf[word] == NOFREE) + return (ENOSPC); + + /* check the dmaps words corresponding to block range to see + * if the block range is free. not all bits of the first and + * last words may be contained within the block range. if this + * is the case, we'll work against those words (i.e. partial first + * and/or last) on an individual basis (a single pass) and examine + * the actual bits to determine if they are free. a single pass + * will be used for all dmap words fully contained within the + * specified range. within this pass, the leaves of the dmap + * tree will be examined to determine if the blocks are free. a + * single leaf may describe the free space of multiple dmap + * words, so we may visit only a subset of the actual leaves + * corresponding to the dmap words of the block range. + */ + for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) { + /* determine the bit number within the word and + * the number of bits within the word. + */ + wbitno = dbitno & (DBWORD - 1); + nb = min(rembits, DBWORD - wbitno); + + /* check if only part of the word is to be examined. + */ + if (nb < DBWORD) { + /* check if the bits are free. + */ + mask = (ONES << (DBWORD - nb) >> wbitno); + if ((mask & ~le32_to_cpu(dp->wmap[word])) != mask) + return (ENOSPC); + + word += 1; + } else { + /* one or more dmap words are fully contained + * within the block range. determine how many + * words and how many bits. + */ + nwords = rembits >> L2DBWORD; + nb = nwords << L2DBWORD; + + /* now examine the appropriate leaves to determine + * if the blocks are free. + */ + while (nwords > 0) { + /* does the leaf describe any free space ? + */ + if (leaf[word] < BUDMIN) + return (ENOSPC); + + /* determine the l2 number of bits provided + * by this leaf. + */ + l2size = + min((int)leaf[word], NLSTOL2BSZ(nwords)); + + /* determine how many words were handled. + */ + nw = BUDSIZE(l2size, BUDMIN); + + nwords -= nw; + word += nw; + } + } + } + + /* allocate the blocks. + */ + return (dbAllocDmap(bmp, dp, blkno, nblocks)); +} + + +/* + * NAME: dbAllocNear() + * + * FUNCTION: attempt to allocate a number of contiguous free blocks near + * a specified block (hint) within a dmap. + * + * starting with the dmap leaf that covers the hint, we'll + * check the next four contiguous leaves for sufficient free + * space. if sufficient free space is found, we'll allocate + * the desired free space. + * + * PARAMETERS: + * bmp - pointer to bmap descriptor + * dp - pointer to dmap. + * blkno - block number to allocate near. + * nblocks - actual number of contiguous free blocks desired. + * l2nb - log2 number of contiguous free blocks desired. + * results - on successful return, set to the starting block number + * of the newly allocated range. + * + * RETURN VALUES: + * 0 - success + * ENOSPC - insufficient disk resources + * EIO - i/o error + * + * serialization: IREAD_LOCK(ipbmap) held on entry/exit; + */ +static int +dbAllocNear(bmap_t * bmp, + dmap_t * dp, s64 blkno, int nblocks, int l2nb, s64 * results) +{ + int word, lword, rc; + s8 *leaf = dp->tree.stree + le32_to_cpu(dp->tree.leafidx); + + /* determine the word within the dmap that holds the hint + * (i.e. blkno). also, determine the last word in the dmap + * that we'll include in our examination. + */ + word = (blkno & (BPERDMAP - 1)) >> L2DBWORD; + lword = min(word + 4, LPERDMAP); + + /* examine the leaves for sufficient free space. + */ + for (; word < lword; word++) { + /* does the leaf describe sufficient free space ? + */ + if (leaf[word] < l2nb) + continue; + + /* determine the block number within the file system + * of the first block described by this dmap word. + */ + blkno = le64_to_cpu(dp->start) + (word << L2DBWORD); + + /* if not all bits of the dmap word are free, get the + * starting bit number within the dmap word of the required + * string of free bits and adjust the block number with the + * value. + */ + if (leaf[word] < BUDMIN) + blkno += + dbFindBits(le32_to_cpu(dp->wmap[word]), l2nb); + + /* allocate the blocks. + */ + if ((rc = dbAllocDmap(bmp, dp, blkno, nblocks)) == 0) + *results = blkno; + + return (rc); + } + + return (ENOSPC); +} + + +/* + * NAME: dbAllocAG() + * + * FUNCTION: attempt to allocate the specified number of contiguous + * free blocks within the specified allocation group. + * + * unless the allocation group size is equal to the number + * of blocks per dmap, the dmap control pages will be used to + * find the required free space, if available. we start the + * search at the highest dmap control page level which + * distinctly describes the allocation group's free space + * (i.e. the highest level at which the allocation group's + * free space is not mixed in with that of any other group). + * in addition, we start the search within this level at a + * height of the dmapctl dmtree at which the nodes distinctly + * describe the allocation group's free space. at this height, + * the allocation group's free space may be represented by 1 + * or two sub-trees, depending on the allocation group size. + * we search the top nodes of these subtrees left to right for + * sufficient free space. if sufficient free space is found, + * the subtree is searched to find the leftmost leaf that + * has free space. once we have made it to the leaf, we + * move the search to the next lower level dmap control page + * corresponding to this leaf. we continue down the dmap control + * pages until we find the dmap that contains or starts the + * sufficient free space and we allocate at this dmap. + * + * if the allocation group size is equal to the dmap size, + * we'll start at the dmap corresponding to the allocation + * group and attempt the allocation at this level. + * + * the dmap control page search is also not performed if the + * allocation group is completely free and we go to the first + * dmap of the allocation group to do the allocation. this is + * done because the allocation group may be part (not the first + * part) of a larger binary buddy system, causing the dmap + * control pages to indicate no free space (NOFREE) within + * the allocation group. + * + * PARAMETERS: + * bmp - pointer to bmap descriptor + * agno - allocation group number. + * nblocks - actual number of contiguous free blocks desired. + * l2nb - log2 number of contiguous free blocks desired. + * results - on successful return, set to the starting block number + * of the newly allocated range. + * + * RETURN VALUES: + * 0 - success + * ENOSPC - insufficient disk resources + * EIO - i/o error + * + * note: IWRITE_LOCK(ipmap) held on entry/exit; + */ +static int +dbAllocAG(bmap_t * bmp, int agno, s64 nblocks, int l2nb, s64 * results) +{ + metapage_t *mp; + dmapctl_t *dcp; + int rc, ti, i, k, m, n, agperlev; + s64 blkno, lblkno; + int budmin; + + /* allocation request should not be for more than the + * allocation group size. + */ + assert(l2nb <= bmp->db_agl2size); + + /* determine the starting block number of the allocation + * group. + */ + blkno = (s64) agno << bmp->db_agl2size; + + /* check if the allocation group size is the minimum allocation + * group size or if the allocation group is completely free. if + * the allocation group size is the minimum size of BPERDMAP (i.e. + * 1 dmap), there is no need to search the dmap control page (below) + * that fully describes the allocation group since the allocation + * group is already fully described by a dmap. in this case, we + * just call dbAllocCtl() to search the dmap tree and allocate the + * required space if available. + * + * if the allocation group is completely free, dbAllocCtl() is + * also called to allocate the required space. this is done for + * two reasons. first, it makes no sense searching the dmap control + * pages for free space when we know that free space exists. second, + * the dmap control pages may indicate that the allocation group + * has no free space if the allocation group is part (not the first + * part) of a larger binary buddy system. + */ + if (bmp->db_agsize == BPERDMAP + || bmp->db_agfree[agno] == bmp->db_agsize) { + rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); + /* assert(!(rc == ENOSPC && bmp->db_agfree[agno] == bmp->db_agsize)); */ + if ((rc == ENOSPC) && + (bmp->db_agfree[agno] == bmp->db_agsize)) { + jERROR(1, + ("dbAllocAG: removed assert, but still need to debug here\nblkno = 0x%Lx, nblocks = 0x%Lx\n", + (unsigned long long) blkno, + (unsigned long long) nblocks)); + } + return (rc); + } + + /* the buffer for the dmap control page that fully describes the + * allocation group. + */ + lblkno = BLKTOCTL(blkno, bmp->db_l2nbperpage, bmp->db_aglevel); + mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); + if (mp == NULL) + return (EIO); + dcp = (dmapctl_t *) mp->data; + budmin = dcp->budmin; + + /* search the subtree(s) of the dmap control page that describes + * the allocation group, looking for sufficient free space. to begin, + * determine how many allocation groups are represented in a dmap + * control page at the control page level (i.e. L0, L1, L2) that + * fully describes an allocation group. next, determine the starting + * tree index of this allocation group within the control page. + */ + agperlev = + (1 << (L2LPERCTL - (bmp->db_agheigth << 1))) / bmp->db_agwidth; + ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1)); + + /* dmap control page trees fan-out by 4 and a single allocation + * group may be described by 1 or 2 subtrees within the ag level + * dmap control page, depending upon the ag size. examine the ag's + * subtrees for sufficient free space, starting with the leftmost + * subtree. + */ + for (i = 0; i < bmp->db_agwidth; i++, ti++) { + /* is there sufficient free space ? + */ + if (l2nb > dcp->stree[ti]) + continue; + + /* sufficient free space found in a subtree. now search down + * the subtree to find the leftmost leaf that describes this + * free space. + */ + for (k = bmp->db_agheigth; k > 0; k--) { + for (n = 0, m = (ti << 2) + 1; n < 4; n++) { + if (l2nb <= dcp->stree[m + n]) { + ti = m + n; + break; + } + } + assert(n < 4); + } + + /* determine the block number within the file system + * that corresponds to this leaf. + */ + if (bmp->db_aglevel == 2) + blkno = 0; + else if (bmp->db_aglevel == 1) + blkno &= ~(MAXL1SIZE - 1); + else /* bmp->db_aglevel == 0 */ + blkno &= ~(MAXL0SIZE - 1); + + blkno += + ((s64) (ti - le32_to_cpu(dcp->leafidx))) << budmin; + + /* release the buffer in preparation for going down + * the next level of dmap control pages. + */ + release_metapage(mp); + + /* check if we need to continue to search down the lower + * level dmap control pages. we need to if the number of + * blocks required is less than maximum number of blocks + * described at the next lower level. + */ + if (l2nb < budmin) { + + /* search the lower level dmap control pages to get + * the starting block number of the the dmap that + * contains or starts off the free space. + */ + if ((rc = + dbFindCtl(bmp, l2nb, bmp->db_aglevel - 1, + &blkno))) { + assert(rc != ENOSPC); + return (rc); + } + } + + /* allocate the blocks. + */ + rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); + assert(rc != ENOSPC); + return (rc); + } + + /* no space in the allocation group. release the buffer and + * return ENOSPC. + */ + release_metapage(mp); + + return (ENOSPC); +} + + +/* + * NAME: dbAllocAny() + * + * FUNCTION: attempt to allocate the specified number of contiguous + * free blocks anywhere in the file system. + * + * dbAllocAny() attempts to find the sufficient free space by + * searching down the dmap control pages, starting with the + * highest level (i.e. L0, L1, L2) control page. if free space + * large enough to satisfy the desired free space is found, the + * desired free space is allocated. + * + * PARAMETERS: + * bmp - pointer to bmap descriptor + * nblocks - actual number of contiguous free blocks desired. + * l2nb - log2 number of contiguous free blocks desired. + * results - on successful return, set to the starting block number + * of the newly allocated range. + * + * RETURN VALUES: + * 0 - success + * ENOSPC - insufficient disk resources + * EIO - i/o error + * + * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; + */ +static int dbAllocAny(bmap_t * bmp, s64 nblocks, int l2nb, s64 * results) +{ + int rc; + s64 blkno = 0; + + /* starting with the top level dmap control page, search + * down the dmap control levels for sufficient free space. + * if free space is found, dbFindCtl() returns the starting + * block number of the dmap that contains or starts off the + * range of free space. + */ + if ((rc = dbFindCtl(bmp, l2nb, bmp->db_maxlevel, &blkno))) + return (rc); + + /* allocate the blocks. + */ + rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); + assert(rc != ENOSPC); + return (rc); +} + + +/* + * NAME: dbFindCtl() + * + * FUNCTION: starting at a specified dmap control page level and block + * number, search down the dmap control levels for a range of + * contiguous free blocks large enough to satisfy an allocation + * request for the specified number of free blocks. + * + * if sufficient contiguous free blocks are found, this routine + * returns the starting block number within a dmap page that + * contains or starts a range of contiqious free blocks that + * is sufficient in size. + * + * PARAMETERS: + * bmp - pointer to bmap descriptor + * level - starting dmap control page level. + * l2nb - log2 number of contiguous free blocks desired. + * *blkno - on entry, starting block number for conducting the search. + * on successful return, the first block within a dmap page + * that contains or starts a range of contiguous free blocks. + * + * RETURN VALUES: + * 0 - success + * ENOSPC - insufficient disk resources + * EIO - i/o error + * + * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; + */ +static int dbFindCtl(bmap_t * bmp, int l2nb, int level, s64 * blkno) +{ + int rc, leafidx, lev; + s64 b, lblkno; + dmapctl_t *dcp; + int budmin; + metapage_t *mp; + + /* starting at the specified dmap control page level and block + * number, search down the dmap control levels for the starting + * block number of a dmap page that contains or starts off + * sufficient free blocks. + */ + for (lev = level, b = *blkno; lev >= 0; lev--) { + /* get the buffer of the dmap control page for the block + * number and level (i.e. L0, L1, L2). + */ + lblkno = BLKTOCTL(b, bmp->db_l2nbperpage, lev); + mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); + if (mp == NULL) + return (EIO); + dcp = (dmapctl_t *) mp->data; + budmin = dcp->budmin; + + /* search the tree within the dmap control page for + * sufficent free space. if sufficient free space is found, + * dbFindLeaf() returns the index of the leaf at which + * free space was found. + */ + rc = dbFindLeaf((dmtree_t *) dcp, l2nb, &leafidx); + + /* release the buffer. + */ + release_metapage(mp); + + /* space found ? + */ + if (rc) { + assert(lev == level); + return (ENOSPC); + } + + /* adjust the block number to reflect the location within + * the dmap control page (i.e. the leaf) at which free + * space was found. + */ + b += (((s64) leafidx) << budmin); + + /* we stop the search at this dmap control page level if + * the number of blocks required is greater than or equal + * to the maximum number of blocks described at the next + * (lower) level. + */ + if (l2nb >= budmin) + break; + } + + *blkno = b; + return (0); +} + + +/* + * NAME: dbAllocCtl() + * + * FUNCTION: attempt to allocate a specified number of contiguous + * blocks starting within a specific dmap. + * + * this routine is called by higher level routines that search + * the dmap control pages above the actual dmaps for contiguous + * free space. the result of successful searches by these + * routines are the starting block numbers within dmaps, with + * the dmaps themselves containing the desired contiguous free + * space or starting a contiguous free space of desired size + * that is made up of the blocks of one or more dmaps. these + * calls should not fail due to insufficent resources. + * + * this routine is called in some cases where it is not known + * whether it will fail due to insufficient resources. more + * specifically, this occurs when allocating from an allocation + * group whose size is equal to the number of blocks per dmap. + * in this case, the dmap control pages are not examined prior + * to calling this routine (to save pathlength) and the call + * might fail. + * + * for a request size that fits within a dmap, this routine relies + * upon the dmap's dmtree to find the requested contiguous free + * space. for request sizes that are larger than a dmap, the + * requested free space will start at the first block of the + * first dmap (i.e. blkno). + * + * PARAMETERS: + * bmp - pointer to bmap descriptor + * nblocks - actual number of contiguous free blocks to allocate. + * l2nb - log2 number of contiguous free blocks to allocate. + * blkno - starting block number of the dmap to start the allocation + * from. + * results - on successful return, set to the starting block number + * of the newly allocated range. + * + * RETURN VALUES: + * 0 - success + * ENOSPC - insufficient disk resources + * EIO - i/o error + * + * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; + */ +static int +dbAllocCtl(bmap_t * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results) +{ + int rc, nb; + s64 b, lblkno, n; + metapage_t *mp; + dmap_t *dp; + + /* check if the allocation request is confined to a single dmap. + */ + if (l2nb <= L2BPERDMAP) { + /* get the buffer for the dmap. + */ + lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); + mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); + if (mp == NULL) + return (EIO); + dp = (dmap_t *) mp->data; + + /* try to allocate the blocks. + */ + rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results); + if (rc == 0) + mark_metapage_dirty(mp); + + release_metapage(mp); + + return (rc); + } + + /* allocation request involving multiple dmaps. it must start on + * a dmap boundary. + */ + assert((blkno & (BPERDMAP - 1)) == 0); + + /* allocate the blocks dmap by dmap. + */ + for (n = nblocks, b = blkno; n > 0; n -= nb, b += nb) { + /* get the buffer for the dmap. + */ + lblkno = BLKTODMAP(b, bmp->db_l2nbperpage); + mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); + if (mp == NULL) { + rc = EIO; + goto backout; + } + dp = (dmap_t *) mp->data; + + /* the dmap better be all free. + */ + assert(dp->tree.stree[ROOT] == L2BPERDMAP); + + /* determine how many blocks to allocate from this dmap. + */ + nb = min(n, (s64)BPERDMAP); + + /* allocate the blocks from the dmap. + */ + if ((rc = dbAllocDmap(bmp, dp, b, nb))) { + release_metapage(mp); + goto backout; + } + + /* write the buffer. + */ + write_metapage(mp); + } + + /* set the results (starting block number) and return. + */ + *results = blkno; + return (0); + + /* something failed in handling an allocation request involving + * multiple dmaps. we'll try to clean up by backing out any + * allocation that has already happened for this request. if + * we fail in backing out the allocation, we'll mark the file + * system to indicate that blocks have been leaked. + */ + backout: + + /* try to backout the allocations dmap by dmap. + */ + for (n = nblocks - n, b = blkno; n > 0; + n -= BPERDMAP, b += BPERDMAP) { + /* get the buffer for this dmap. + */ + lblkno = BLKTODMAP(b, bmp->db_l2nbperpage); + mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); + if (mp == NULL) { + /* could not back out. mark the file system + * to indicate that we have leaked blocks. + */ + fsDirty(); /* !!! */ + jERROR(1, + ("dbAllocCtl: I/O Error: Block Leakage.\n")); + continue; + } + dp = (dmap_t *) mp->data; + + /* free the blocks is this dmap. + */ + if (dbFreeDmap(bmp, dp, b, BPERDMAP)) { + /* could not back out. mark the file system + * to indicate that we have leaked blocks. + */ + release_metapage(mp); + fsDirty(); /* !!! */ + jERROR(1, ("dbAllocCtl: Block Leakage.\n")); + continue; + } + + /* write the buffer. + */ + write_metapage(mp); + } + + return (rc); +} + + +/* + * NAME: dbAllocDmapLev() + * + * FUNCTION: attempt to allocate a specified number of contiguous blocks + * from a specified dmap. + * + * this routine checks if the contiguous blocks are available. + * if so, nblocks of blocks are allocated; otherwise, ENOSPC is + * returned. + * + * PARAMETERS: + * mp - pointer to bmap descriptor + * dp - pointer to dmap to attempt to allocate blocks from. + * l2nb - log2 number of contiguous block desired. + * nblocks - actual number of contiguous block desired. + * results - on successful return, set to the starting block number + * of the newly allocated range. + * + * RETURN VALUES: + * 0 - success + * ENOSPC - insufficient disk resources + * EIO - i/o error + * + * serialization: IREAD_LOCK(ipbmap), e.g., from dbAlloc(), or + * IWRITE_LOCK(ipbmap), e.g., dbAllocCtl(), held on entry/exit; + */ +static int +dbAllocDmapLev(bmap_t * bmp, + dmap_t * dp, int nblocks, int l2nb, s64 * results) +{ + s64 blkno; + int leafidx, rc; + + /* can't be more than a dmaps worth of blocks */ + assert(l2nb <= L2BPERDMAP); + + /* search the tree within the dmap page for sufficient + * free space. if sufficient free space is found, dbFindLeaf() + * returns the index of the leaf at which free space was found. + */ + if (dbFindLeaf((dmtree_t *) & dp->tree, l2nb, &leafidx)) + return (ENOSPC); + + /* determine the block number within the file system corresponding + * to the leaf at which free space was found. + */ + blkno = le64_to_cpu(dp->start) + (leafidx << L2DBWORD); + + /* if not all bits of the dmap word are free, get the starting + * bit number within the dmap word of the required string of free + * bits and adjust the block number with this value. + */ + if (dp->tree.stree[leafidx + LEAFIND] < BUDMIN) + blkno += dbFindBits(le32_to_cpu(dp->wmap[leafidx]), l2nb); + + /* allocate the blocks */ + if ((rc = dbAllocDmap(bmp, dp, blkno, nblocks)) == 0) + *results = blkno; + + return (rc); +} + + +/* + * NAME: dbAllocDmap() + * + * FUNCTION: adjust the disk allocation map to reflect the allocation + * of a specified block range within a dmap. + * + * this routine allocates the specified blocks from the dmap + * through a call to dbAllocBits(). if the allocation of the + * block range causes the maximum string of free blocks within + * the dmap to change (i.e. the value of the root of the dmap's + * dmtree), this routine will cause this change to be reflected + * up through the appropriate levels of the dmap control pages + * by a call to dbAdjCtl() for the L0 dmap control page that + * covers this dmap. + * + * PARAMETERS: + * bmp - pointer to bmap descriptor + * dp - pointer to dmap to allocate the block range from. + * blkno - starting block number of the block to be allocated. + * nblocks - number of blocks to be allocated. + * + * RETURN VALUES: + * 0 - success + * EIO - i/o error + * + * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; + */ +static int dbAllocDmap(bmap_t * bmp, dmap_t * dp, s64 blkno, int nblocks) +{ + s8 oldroot; + int rc; + + /* save the current value of the root (i.e. maximum free string) + * of the dmap tree. + */ + oldroot = dp->tree.stree[ROOT]; + + /* allocate the specified (blocks) bits */ + dbAllocBits(bmp, dp, blkno, nblocks); + + /* if the root has not changed, done. */ + if (dp->tree.stree[ROOT] == oldroot) + return (0); + + /* root changed. bubble the change up to the dmap control pages. + * if the adjustment of the upper level control pages fails, + * backout the bit allocation (thus making everything consistent). + */ + if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 1, 0))) + dbFreeBits(bmp, dp, blkno, nblocks); + + return (rc); +} + + +/* + * NAME: dbFreeDmap() + * + * FUNCTION: adjust the disk allocation map to reflect the allocation + * of a specified block range within a dmap. + * + * this routine frees the specified blocks from the dmap through + * a call to dbFreeBits(). if the deallocation of the block range + * causes the maximum string of free blocks within the dmap to + * change (i.e. the value of the root of the dmap's dmtree), this + * routine will cause this change to be reflected up through the + * appropriate levels of the dmap control pages by a call to + * dbAdjCtl() for the L0 dmap control page that covers this dmap. + * + * PARAMETERS: + * bmp - pointer to bmap descriptor + * dp - pointer to dmap to free the block range from. + * blkno - starting block number of the block to be freed. + * nblocks - number of blocks to be freed. + * + * RETURN VALUES: + * 0 - success + * EIO - i/o error + * + * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; + */ +static int dbFreeDmap(bmap_t * bmp, dmap_t * dp, s64 blkno, int nblocks) +{ + s8 oldroot; + int rc, word; + + /* save the current value of the root (i.e. maximum free string) + * of the dmap tree. + */ + oldroot = dp->tree.stree[ROOT]; + + /* free the specified (blocks) bits */ + dbFreeBits(bmp, dp, blkno, nblocks); + + /* if the root has not changed, done. */ + if (dp->tree.stree[ROOT] == oldroot) + return (0); + + /* root changed. bubble the change up to the dmap control pages. + * if the adjustment of the upper level control pages fails, + * backout the deallocation. + */ + if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 0, 0))) { + word = (blkno & (BPERDMAP - 1)) >> L2DBWORD; + + /* as part of backing out the deallocation, we will have + * to back split the dmap tree if the deallocation caused + * the freed blocks to become part of a larger binary buddy + * system. + */ + if (dp->tree.stree[word] == NOFREE) + dbBackSplit((dmtree_t *) & dp->tree, word); + + dbAllocBits(bmp, dp, blkno, nblocks); + } + + return (rc); +} + + +/* + * NAME: dbAllocBits() + * + * FUNCTION: allocate a specified block range from a dmap. + * + * this routine updates the dmap to reflect the working + * state allocation of the specified block range. it directly + * updates the bits of the working map and causes the adjustment + * of the binary buddy system described by the dmap's dmtree + * leaves to reflect the bits allocated. it also causes the + * dmap's dmtree, as a whole, to reflect the allocated range. + * + * PARAMETERS: + * bmp - pointer to bmap descriptor + * dp - pointer to dmap to allocate bits from. + * blkno - starting block number of the bits to be allocated. + * nblocks - number of bits to be allocated. + * + * RETURN VALUES: none + * + * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; + */ +static void dbAllocBits(bmap_t * bmp, dmap_t * dp, s64 blkno, int nblocks) +{ + int dbitno, word, rembits, nb, nwords, wbitno, nw, agno; + dmtree_t *tp = (dmtree_t *) & dp->tree; + int size; + s8 *leaf; + + /* pick up a pointer to the leaves of the dmap tree */ + leaf = dp->tree.stree + LEAFIND; + + /* determine the bit number and word within the dmap of the + * starting block. + */ + dbitno = blkno & (BPERDMAP - 1); + word = dbitno >> L2DBWORD; + + /* block range better be within the dmap */ + assert(dbitno + nblocks <= BPERDMAP); + + /* allocate the bits of the dmap's words corresponding to the block + * range. not all bits of the first and last words may be contained + * within the block range. if this is the case, we'll work against + * those words (i.e. partial first and/or last) on an individual basis + * (a single pass), allocating the bits of interest by hand and + * updating the leaf corresponding to the dmap word. a single pass + * will be used for all dmap words fully contained within the + * specified range. within this pass, the bits of all fully contained + * dmap words will be marked as free in a single shot and the leaves + * will be updated. a single leaf may describe the free space of + * multiple dmap words, so we may update only a subset of the actual + * leaves corresponding to the dmap words of the block range. + */ + for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) { + /* determine the bit number within the word and + * the number of bits within the word. + */ + wbitno = dbitno & (DBWORD - 1); + nb = min(rembits, DBWORD - wbitno); + + /* check if only part of a word is to be allocated. + */ + if (nb < DBWORD) { + /* allocate (set to 1) the appropriate bits within + * this dmap word. + */ + dp->wmap[word] |= cpu_to_le32(ONES << (DBWORD - nb) + >> wbitno); + + /* update the leaf for this dmap word. in addition + * to setting the leaf value to the binary buddy max + * of the updated dmap word, dbSplit() will split + * the binary system of the leaves if need be. + */ + dbSplit(tp, word, BUDMIN, + dbMaxBud((u8 *) & dp->wmap[word])); + + word += 1; + } else { + /* one or more dmap words are fully contained + * within the block range. determine how many + * words and allocate (set to 1) the bits of these + * words. + */ + nwords = rembits >> L2DBWORD; + memset(&dp->wmap[word], (int) ONES, nwords * 4); + + /* determine how many bits. + */ + nb = nwords << L2DBWORD; + + /* now update the appropriate leaves to reflect + * the allocated words. + */ + for (; nwords > 0; nwords -= nw) { + assert(leaf[word] >= BUDMIN); + + /* determine what the leaf value should be + * updated to as the minimum of the l2 number + * of bits being allocated and the l2 number + * of bits currently described by this leaf. + */ + size = min((int)leaf[word], NLSTOL2BSZ(nwords)); + + /* update the leaf to reflect the allocation. + * in addition to setting the leaf value to + * NOFREE, dbSplit() will split the binary + * system of the leaves to reflect the current + * allocation (size). + */ + dbSplit(tp, word, size, NOFREE); + + /* get the number of dmap words handled */ + nw = BUDSIZE(size, BUDMIN); + word += nw; + } + } + } + + /* update the free count for this dmap */ + dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) - nblocks); + + BMAP_LOCK(bmp); + + /* if this allocation group is completely free, + * update the maximum allocation group number if this allocation + * group is the new max. + */ + agno = blkno >> bmp->db_agl2size; + if (agno > bmp->db_maxag) + bmp->db_maxag = agno; + + /* update the free count for the allocation group and map */ + bmp->db_agfree[agno] -= nblocks; + bmp->db_nfree -= nblocks; + + BMAP_UNLOCK(bmp); +} + + +/* + * NAME: dbFreeBits() + * + * FUNCTION: free a specified block range from a dmap. + * + * this routine updates the dmap to reflect the working + * state allocation of the specified block range. it directly + * updates the bits of the working map and causes the adjustment + * of the binary buddy system described by the dmap's dmtree + * leaves to reflect the bits freed. it also causes the dmap's + * dmtree, as a whole, to reflect the deallocated range. + * + * PARAMETERS: + * bmp - pointer to bmap descriptor + * dp - pointer to dmap to free bits from. + * blkno - starting block number of the bits to be freed. + * nblocks - number of bits to be freed. + * + * RETURN VALUES: none + * + * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; + */ +static void dbFreeBits(bmap_t * bmp, dmap_t * dp, s64 blkno, int nblocks) +{ + int dbitno, word, rembits, nb, nwords, wbitno, nw, agno; + dmtree_t *tp = (dmtree_t *) & dp->tree; + int size; + + /* determine the bit number and word within the dmap of the + * starting block. + */ + dbitno = blkno & (BPERDMAP - 1); + word = dbitno >> L2DBWORD; + + /* block range better be within the dmap. + */ + assert(dbitno + nblocks <= BPERDMAP); + + /* free the bits of the dmaps words corresponding to the block range. + * not all bits of the first and last words may be contained within + * the block range. if this is the case, we'll work against those + * words (i.e. partial first and/or last) on an individual basis + * (a single pass), freeing the bits of interest by hand and updating + * the leaf corresponding to the dmap word. a single pass will be used + * for all dmap words fully contained within the specified range. + * within this pass, the bits of all fully contained dmap words will + * be marked as free in a single shot and the leaves will be updated. a + * single leaf may describe the free space of multiple dmap words, + * so we may update only a subset of the actual leaves corresponding + * to the dmap words of the block range. + * + * dbJoin() is used to update leaf values and will join the binary + * buddy system of the leaves if the new leaf values indicate this + * should be done. + */ + for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) { + /* determine the bit number within the word and + * the number of bits within the word. + */ + wbitno = dbitno & (DBWORD - 1); + nb = min(rembits, DBWORD - wbitno); + + /* check if only part of a word is to be freed. + */ + if (nb < DBWORD) { + /* free (zero) the appropriate bits within this + * dmap word. + */ + dp->wmap[word] &= + cpu_to_le32(~(ONES << (DBWORD - nb) + >> wbitno)); + + /* update the leaf for this dmap word. + */ + dbJoin(tp, word, + dbMaxBud((u8 *) & dp->wmap[word])); + + word += 1; + } else { + /* one or more dmap words are fully contained + * within the block range. determine how many + * words and free (zero) the bits of these words. + */ + nwords = rembits >> L2DBWORD; + memset(&dp->wmap[word], 0, nwords * 4); + + /* determine how many bits. + */ + nb = nwords << L2DBWORD; + + /* now update the appropriate leaves to reflect + * the freed words. + */ + for (; nwords > 0; nwords -= nw) { + /* determine what the leaf value should be + * updated to as the minimum of the l2 number + * of bits being freed and the l2 (max) number + * of bits that can be described by this leaf. + */ + size = + min(LITOL2BSZ + (word, L2LPERDMAP, BUDMIN), + NLSTOL2BSZ(nwords)); + + /* update the leaf. + */ + dbJoin(tp, word, size); + + /* get the number of dmap words handled. + */ + nw = BUDSIZE(size, BUDMIN); + word += nw; + } + } + } + + /* update the free count for this dmap. + */ + dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) + nblocks); + + BMAP_LOCK(bmp); + + /* update the free count for the allocation group and + * map. + */ + agno = blkno >> bmp->db_agl2size; + bmp->db_nfree += nblocks; + bmp->db_agfree[agno] += nblocks; + + /* check if this allocation group is not completely free and + * if it is currently the maximum (rightmost) allocation group. + * if so, establish the new maximum allocation group number by + * searching left for the first allocation group with allocation. + */ + if ((bmp->db_agfree[agno] == bmp->db_agsize + && agno == bmp->db_maxag) || (agno == bmp->db_numag - 1 + && bmp->db_agfree[agno] == + (bmp-> db_mapsize & + (BPERDMAP - 1)))) { + while (bmp->db_maxag > 0) { + bmp->db_maxag -= 1; + if (bmp->db_agfree[bmp->db_maxag] != + bmp->db_agsize) + break; + } + + /* re-establish the allocation group preference if the + * current preference is right of the maximum allocation + * group. + */ + if (bmp->db_agpref > bmp->db_maxag) + bmp->db_agpref = bmp->db_maxag; + } + + BMAP_UNLOCK(bmp); +} + + +/* + * NAME: dbAdjCtl() + * + * FUNCTION: adjust a dmap control page at a specified level to reflect + * the change in a lower level dmap or dmap control page's + * maximum string of free blocks (i.e. a change in the root + * of the lower level object's dmtree) due to the allocation + * or deallocation of a range of blocks with a single dmap. + * + * on entry, this routine is provided with the new value of + * the lower level dmap or dmap control page root and the + * starting block number of the block range whose allocation + * or deallocation resulted in the root change. this range + * is respresented by a single leaf of the current dmapctl + * and the leaf will be updated with this value, possibly + * causing a binary buddy system within the leaves to be + * split or joined. the update may also cause the dmapctl's + * dmtree to be updated. + * + * if the adjustment of the dmap control page, itself, causes its + * root to change, this change will be bubbled up to the next dmap + * control level by a recursive call to this routine, specifying + * the new root value and the next dmap control page level to + * be adjusted. + * PARAMETERS: + * bmp - pointer to bmap descriptor + * blkno - the first block of a block range within a dmap. it is + * the allocation or deallocation of this block range that + * requires the dmap control page to be adjusted. + * newval - the new value of the lower level dmap or dmap control + * page root. + * alloc - TRUE if adjustment is due to an allocation. + * level - current level of dmap control page (i.e. L0, L1, L2) to + * be adjusted. + * + * RETURN VALUES: + * 0 - success + * EIO - i/o error + * + * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; + */ +static int +dbAdjCtl(bmap_t * bmp, s64 blkno, int newval, int alloc, int level) +{ + metapage_t *mp; + s8 oldroot; + int oldval; + s64 lblkno; + dmapctl_t *dcp; + int rc, leafno, ti; + + /* get the buffer for the dmap control page for the specified + * block number and control page level. + */ + lblkno = BLKTOCTL(blkno, bmp->db_l2nbperpage, level); + mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); + if (mp == NULL) + return (EIO); + dcp = (dmapctl_t *) mp->data; + + /* determine the leaf number corresponding to the block and + * the index within the dmap control tree. + */ + leafno = BLKTOCTLLEAF(blkno, dcp->budmin); + ti = leafno + le32_to_cpu(dcp->leafidx); + + /* save the current leaf value and the current root level (i.e. + * maximum l2 free string described by this dmapctl). + */ + oldval = dcp->stree[ti]; + oldroot = dcp->stree[ROOT]; + + /* check if this is a control page update for an allocation. + * if so, update the leaf to reflect the new leaf value using + * dbSplit(); otherwise (deallocation), use dbJoin() to udpate + * the leaf with the new value. in addition to updating the + * leaf, dbSplit() will also split the binary buddy system of + * the leaves, if required, and bubble new values within the + * dmapctl tree, if required. similarly, dbJoin() will join + * the binary buddy system of leaves and bubble new values up + * the dmapctl tree as required by the new leaf value. + */ + if (alloc) { + /* check if we are in the middle of a binary buddy + * system. this happens when we are performing the + * first allocation out of an allocation group that + * is part (not the first part) of a larger binary + * buddy system. if we are in the middle, back split + * the system prior to calling dbSplit() which assumes + * that it is at the front of a binary buddy system. + */ + if (oldval == NOFREE) { + dbBackSplit((dmtree_t *) dcp, leafno); + oldval = dcp->stree[ti]; + } + dbSplit((dmtree_t *) dcp, leafno, dcp->budmin, newval); + } else { + dbJoin((dmtree_t *) dcp, leafno, newval); + } + + /* check if the root of the current dmap control page changed due + * to the update and if the current dmap control page is not at + * the current top level (i.e. L0, L1, L2) of the map. if so (i.e. + * root changed and this is not the top level), call this routine + * again (recursion) for the next higher level of the mapping to + * reflect the change in root for the current dmap control page. + */ + if (dcp->stree[ROOT] != oldroot) { + /* are we below the top level of the map. if so, + * bubble the root up to the next higher level. + */ + if (level < bmp->db_maxlevel) { + /* bubble up the new root of this dmap control page to + * the next level. + */ + if ((rc = + dbAdjCtl(bmp, blkno, dcp->stree[ROOT], alloc, + level + 1))) { + /* something went wrong in bubbling up the new + * root value, so backout the changes to the + * current dmap control page. + */ + if (alloc) { + dbJoin((dmtree_t *) dcp, leafno, + oldval); + } else { + /* the dbJoin() above might have + * caused a larger binary buddy system + * to form and we may now be in the + * middle of it. if this is the case, + * back split the buddies. + */ + if (dcp->stree[ti] == NOFREE) + dbBackSplit((dmtree_t *) + dcp, leafno); + dbSplit((dmtree_t *) dcp, leafno, + dcp->budmin, oldval); + } + + /* release the buffer and return the error. + */ + release_metapage(mp); + return (rc); + } + } else { + /* we're at the top level of the map. update + * the bmap control page to reflect the size + * of the maximum free buddy system. + */ + assert(level == bmp->db_maxlevel); + assert(bmp->db_maxfreebud == oldroot); + bmp->db_maxfreebud = dcp->stree[ROOT]; + } + } + + /* write the buffer. + */ + write_metapage(mp); + + return (0); +} + + +/* + * NAME: dbSplit() + * + * FUNCTION: update the leaf of a dmtree with a new value, splitting + * the leaf from the binary buddy system of the dmtree's + * leaves, as required. + * + * PARAMETERS: + * tp - pointer to the tree containing the leaf. + * leafno - the number of the leaf to be updated. + * splitsz - the size the binary buddy system starting at the leaf + * must be split to, specified as the log2 number of blocks. + * newval - the new value for the leaf. + * + * RETURN VALUES: none + * + * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; + */ +static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval) +{ + int budsz; + int cursz; + s8 *leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx); + + /* check if the leaf needs to be split. + */ + if (leaf[leafno] > tp->dmt_budmin) { + /* the split occurs by cutting the buddy system in half + * at the specified leaf until we reach the specified + * size. pick up the starting split size (current size + * - 1 in l2) and the corresponding buddy size. + */ + cursz = leaf[leafno] - 1; + budsz = BUDSIZE(cursz, tp->dmt_budmin); + + /* split until we reach the specified size. + */ + while (cursz >= splitsz) { + /* update the buddy's leaf with its new value. + */ + dbAdjTree(tp, leafno ^ budsz, cursz); + + /* on to the next size and buddy. + */ + cursz -= 1; + budsz >>= 1; + } + } + + /* adjust the dmap tree to reflect the specified leaf's new + * value. + */ + dbAdjTree(tp, leafno, newval); +} + + +/* + * NAME: dbBackSplit() + * + * FUNCTION: back split the binary buddy system of dmtree leaves + * that hold a specified leaf until the specified leaf + * starts its own binary buddy system. + * + * the allocators typically perform allocations at the start + * of binary buddy systems and dbSplit() is used to accomplish + * any required splits. in some cases, however, allocation + * may occur in the middle of a binary system and requires a + * back split, with the split proceeding out from the middle of + * the system (less efficient) rather than the start of the + * system (more efficient). the cases in which a back split + * is required are rare and are limited to the first allocation + * within an allocation group which is a part (not first part) + * of a larger binary buddy system and a few exception cases + * in which a previous join operation must be backed out. + * + * PARAMETERS: + * tp - pointer to the tree containing the leaf. + * leafno - the number of the leaf to be updated. + * + * RETURN VALUES: none + * + * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; + */ +static void dbBackSplit(dmtree_t * tp, int leafno) +{ + int budsz, bud, w, bsz, size; + int cursz; + s8 *leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx); + + /* leaf should be part (not first part) of a binary + * buddy system. + */ + assert(leaf[leafno] == NOFREE); + + /* the back split is accomplished by iteratively finding the leaf + * that starts the buddy system that contains the specified leaf and + * splitting that system in two. this iteration continues until + * the specified leaf becomes the start of a buddy system. + * + * determine maximum possible l2 size for the specified leaf. + */ + size = + LITOL2BSZ(leafno, le32_to_cpu(tp->dmt_l2nleafs), + tp->dmt_budmin); + + /* determine the number of leaves covered by this size. this + * is the buddy size that we will start with as we search for + * the buddy system that contains the specified leaf. + */ + budsz = BUDSIZE(size, tp->dmt_budmin); + + /* back split. + */ + while (leaf[leafno] == NOFREE) { + /* find the leftmost buddy leaf. + */ + for (w = leafno, bsz = budsz;; bsz <<= 1, + w = (w < bud) ? w : bud) { + assert(bsz < le32_to_cpu(tp->dmt_nleafs)); + + /* determine the buddy. + */ + bud = w ^ bsz; + + /* check if this buddy is the start of the system. + */ + if (leaf[bud] != NOFREE) { + /* split the leaf at the start of the + * system in two. + */ + cursz = leaf[bud] - 1; + dbSplit(tp, bud, cursz, cursz); + break; + } + } + } + + assert(leaf[leafno] == size); +} + + +/* + * NAME: dbJoin() + * + * FUNCTION: update the leaf of a dmtree with a new value, joining + * the leaf with other leaves of the dmtree into a multi-leaf + * binary buddy system, as required. + * + * PARAMETERS: + * tp - pointer to the tree containing the leaf. + * leafno - the number of the leaf to be updated. + * newval - the new value for the leaf. + * + * RETURN VALUES: none + */ +static void dbJoin(dmtree_t * tp, int leafno, int newval) +{ + int budsz, buddy; + s8 *leaf; + + /* can the new leaf value require a join with other leaves ? + */ + if (newval >= tp->dmt_budmin) { + /* pickup a pointer to the leaves of the tree. + */ + leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx); + + /* try to join the specified leaf into a large binary + * buddy system. the join proceeds by attempting to join + * the specified leafno with its buddy (leaf) at new value. + * if the join occurs, we attempt to join the left leaf + * of the joined buddies with its buddy at new value + 1. + * we continue to join until we find a buddy that cannot be + * joined (does not have a value equal to the size of the + * last join) or until all leaves have been joined into a + * single system. + * + * get the buddy size (number of words covered) of + * the new value. + */ + budsz = BUDSIZE(newval, tp->dmt_budmin); + + /* try to join. + */ + while (budsz < le32_to_cpu(tp->dmt_nleafs)) { + /* get the buddy leaf. + */ + buddy = leafno ^ budsz; + + /* if the leaf's new value is greater than its + * buddy's value, we join no more. + */ + if (newval > leaf[buddy]) + break; + + assert(newval == leaf[buddy]); + + /* check which (leafno or buddy) is the left buddy. + * the left buddy gets to claim the blocks resulting + * from the join while the right gets to claim none. + * the left buddy is also eligable to participate in + * a join at the next higher level while the right + * is not. + * + */ + if (leafno < buddy) { + /* leafno is the left buddy. + */ + dbAdjTree(tp, buddy, NOFREE); + } else { + /* buddy is the left buddy and becomes + * leafno. + */ + dbAdjTree(tp, leafno, NOFREE); + leafno = buddy; + } + + /* on to try the next join. + */ + newval += 1; + budsz <<= 1; + } + } + + /* update the leaf value. + */ + dbAdjTree(tp, leafno, newval); +} + + +/* + * NAME: dbAdjTree() + * + * FUNCTION: update a leaf of a dmtree with a new value, adjusting + * the dmtree, as required, to reflect the new leaf value. + * the combination of any buddies must already be done before + * this is called. + * + * PARAMETERS: + * tp - pointer to the tree to be adjusted. + * leafno - the number of the leaf to be updated. + * newval - the new value for the leaf. + * + * RETURN VALUES: none + */ +static void dbAdjTree(dmtree_t * tp, int leafno, int newval) +{ + int lp, pp, k; + int max; + + /* pick up the index of the leaf for this leafno. + */ + lp = leafno + le32_to_cpu(tp->dmt_leafidx); + + /* is the current value the same as the old value ? if so, + * there is nothing to do. + */ + if (tp->dmt_stree[lp] == newval) + return; + + /* set the new value. + */ + tp->dmt_stree[lp] = newval; + + /* bubble the new value up the tree as required. + */ + for (k = 0; k < le32_to_cpu(tp->dmt_height); k++) { + /* get the index of the first leaf of the 4 leaf + * group containing the specified leaf (leafno). + */ + lp = ((lp - 1) & ~0x03) + 1; + + /* get the index of the parent of this 4 leaf group. + */ + pp = (lp - 1) >> 2; + + /* determine the maximum of the 4 leaves. + */ + max = TREEMAX(&tp->dmt_stree[lp]); + + /* if the maximum of the 4 is the same as the + * parent's value, we're done. + */ + if (tp->dmt_stree[pp] == max) + break; + + /* parent gets new value. + */ + tp->dmt_stree[pp] = max; + + /* parent becomes leaf for next go-round. + */ + lp = pp; + } +} + + +/* + * NAME: dbFindLeaf() + * + * FUNCTION: search a dmtree_t for sufficient free blocks, returning + * the index of a leaf describing the free blocks if + * sufficient free blocks are found. + * + * the search starts at the top of the dmtree_t tree and + * proceeds down the tree to the leftmost leaf with sufficient + * free space. + * + * PARAMETERS: + * tp - pointer to the tree to be searched. + * l2nb - log2 number of free blocks to search for. + * leafidx - return pointer to be set to the index of the leaf + * describing at least l2nb free blocks if sufficient + * free blocks are found. + * + * RETURN VALUES: + * 0 - success + * ENOSPC - insufficient free blocks. + */ +static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx) +{ + int ti, n = 0, k, x = 0; + + /* first check the root of the tree to see if there is + * sufficient free space. + */ + if (l2nb > tp->dmt_stree[ROOT]) + return (ENOSPC); + + /* sufficient free space available. now search down the tree + * starting at the next level for the leftmost leaf that + * describes sufficient free space. + */ + for (k = le32_to_cpu(tp->dmt_height), ti = 1; + k > 0; k--, ti = ((ti + n) << 2) + 1) { + /* search the four nodes at this level, starting from + * the left. + */ + for (x = ti, n = 0; n < 4; n++) { + /* sufficient free space found. move to the next + * level (or quit if this is the last level). + */ + if (l2nb <= tp->dmt_stree[x + n]) + break; + } + + /* better have found something since the higher + * levels of the tree said it was here. + */ + assert(n < 4); + } + + /* set the return to the leftmost leaf describing sufficient + * free space. + */ + *leafidx = x + n - le32_to_cpu(tp->dmt_leafidx); + + return (0); +} + + +/* + * NAME: dbFindBits() + * + * FUNCTION: find a specified number of binary buddy free bits within a + * dmap bitmap word value. + * + * this routine searches the bitmap value for (1 << l2nb) free + * bits at (1 << l2nb) alignments within the value. + * + * PARAMETERS: + * word - dmap bitmap word value. + * l2nb - number of free bits specified as a log2 number. + * + * RETURN VALUES: + * starting bit number of free bits. + */ +static int dbFindBits(u32 word, int l2nb) +{ + int bitno, nb; + u32 mask; + + /* get the number of bits. + */ + nb = 1 << l2nb; + assert(nb <= DBWORD); + + /* complement the word so we can use a mask (i.e. 0s represent + * free bits) and compute the mask. + */ + word = ~word; + mask = ONES << (DBWORD - nb); + + /* scan the word for nb free bits at nb alignments. + */ + for (bitno = 0; mask != 0; bitno += nb, mask >>= nb) { + if ((mask & word) == mask) + break; + } + + ASSERT(bitno < 32); + + /* return the bit number. + */ + return (bitno); +} + + +/* + * NAME: dbMaxBud(u8 *cp) + * + * FUNCTION: determine the largest binary buddy string of free + * bits within 32-bits of the map. + * + * PARAMETERS: + * cp - pointer to the 32-bit value. + * + * RETURN VALUES: + * largest binary buddy of free bits within a dmap word. + */ +static int dbMaxBud(u8 * cp) +{ + signed char tmp1, tmp2; + + /* check if the wmap word is all free. if so, the + * free buddy size is BUDMIN. + */ + if (*((uint *) cp) == 0) + return (BUDMIN); + + /* check if the wmap word is half free. if so, the + * free buddy size is BUDMIN-1. + */ + if (*((u16 *) cp) == 0 || *((u16 *) cp + 1) == 0) + return (BUDMIN - 1); + + /* not all free or half free. determine the free buddy + * size thru table lookup using quarters of the wmap word. + */ + tmp1 = max(budtab[cp[2]], budtab[cp[3]]); + tmp2 = max(budtab[cp[0]], budtab[cp[1]]); + return (max(tmp1, tmp2)); +} + + +/* + * NAME: cnttz(uint word) + * + * FUNCTION: determine the number of trailing zeros within a 32-bit + * value. + * + * PARAMETERS: + * value - 32-bit value to be examined. + * + * RETURN VALUES: + * count of trailing zeros + */ +int cnttz(u32 word) +{ + int n; + + for (n = 0; n < 32; n++, word >>= 1) { + if (word & 0x01) + break; + } + + return (n); +} + + +/* + * NAME: cntlz(u32 value) + * + * FUNCTION: determine the number of leading zeros within a 32-bit + * value. + * + * PARAMETERS: + * value - 32-bit value to be examined. + * + * RETURN VALUES: + * count of leading zeros + */ +int cntlz(u32 value) +{ + int n; + + for (n = 0; n < 32; n++, value <<= 1) { + if (value & HIGHORDER) + break; + } + return (n); +} + + +/* + * NAME: blkstol2(s64 nb) + * + * FUNCTION: convert a block count to its log2 value. if the block + * count is not a l2 multiple, it is rounded up to the next + * larger l2 multiple. + * + * PARAMETERS: + * nb - number of blocks + * + * RETURN VALUES: + * log2 number of blocks + */ +int blkstol2(s64 nb) +{ + int l2nb; + s64 mask; /* meant to be signed */ + + mask = (s64) 1 << (64 - 1); + + /* count the leading bits. + */ + for (l2nb = 0; l2nb < 64; l2nb++, mask >>= 1) { + /* leading bit found. + */ + if (nb & mask) { + /* determine the l2 value. + */ + l2nb = (64 - 1) - l2nb; + + /* check if we need to round up. + */ + if (~mask & nb) + l2nb++; + + return (l2nb); + } + } + assert(0); + return 0; /* fix compiler warning */ +} + + +/* + * NAME: fsDirty() + * + * FUNCTION: xxx + * + * PARAMETERS: + * ipmnt - mount inode + * + * RETURN VALUES: + * none + */ +void fsDirty() +{ + printk("fsDirty(): bye-bye\n"); + assert(0); +} + + +/* + * NAME: dbAllocBottomUp() + * + * FUNCTION: alloc the specified block range from the working block + * allocation map. + * + * the blocks will be alloc from the working map one dmap + * at a time. + * + * PARAMETERS: + * ip - pointer to in-core inode; + * blkno - starting block number to be freed. + * nblocks - number of blocks to be freed. + * + * RETURN VALUES: + * 0 - success + * EIO - i/o error + */ +int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks) +{ + metapage_t *mp; + dmap_t *dp; + int nb, rc; + s64 lblkno, rem; + struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; + bmap_t *bmp = JFS_SBI(ip->i_sb)->bmap; + + IREAD_LOCK(ipbmap); + + /* block to be allocated better be within the mapsize. */ + ASSERT(nblocks <= bmp->db_mapsize - blkno); + + /* + * allocate the blocks a dmap at a time. + */ + mp = NULL; + for (rem = nblocks; rem > 0; rem -= nb, blkno += nb) { + /* release previous dmap if any */ + if (mp) { + write_metapage(mp); + } + + /* get the buffer for the current dmap. */ + lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); + mp = read_metapage(ipbmap, lblkno, PSIZE, 0); + if (mp == NULL) { + IREAD_UNLOCK(ipbmap); + return (EIO); + } + dp = (dmap_t *) mp->data; + + /* determine the number of blocks to be allocated from + * this dmap. + */ + nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1))); + + DBFREECK(bmp->db_DBmap, bmp->db_mapsize, blkno, nb); + + /* allocate the blocks. */ + if ((rc = dbAllocDmapBU(bmp, dp, blkno, nb))) { + release_metapage(mp); + IREAD_UNLOCK(ipbmap); + return (rc); + } + + DBALLOC(bmp->db_DBmap, bmp->db_mapsize, blkno, nb); + } + + /* write the last buffer. */ + write_metapage(mp); + + IREAD_UNLOCK(ipbmap); + + return (0); +} + + +static int dbAllocDmapBU(bmap_t * bmp, dmap_t * dp, s64 blkno, int nblocks) +{ + int rc; + int dbitno, word, rembits, nb, nwords, wbitno, agno; + s8 oldroot, *leaf; + dmaptree_t *tp = (dmaptree_t *) & dp->tree; + + /* save the current value of the root (i.e. maximum free string) + * of the dmap tree. + */ + oldroot = tp->stree[ROOT]; + + /* pick up a pointer to the leaves of the dmap tree */ + leaf = tp->stree + LEAFIND; + + /* determine the bit number and word within the dmap of the + * starting block. + */ + dbitno = blkno & (BPERDMAP - 1); + word = dbitno >> L2DBWORD; + + /* block range better be within the dmap */ + assert(dbitno + nblocks <= BPERDMAP); + + /* allocate the bits of the dmap's words corresponding to the block + * range. not all bits of the first and last words may be contained + * within the block range. if this is the case, we'll work against + * those words (i.e. partial first and/or last) on an individual basis + * (a single pass), allocating the bits of interest by hand and + * updating the leaf corresponding to the dmap word. a single pass + * will be used for all dmap words fully contained within the + * specified range. within this pass, the bits of all fully contained + * dmap words will be marked as free in a single shot and the leaves + * will be updated. a single leaf may describe the free space of + * multiple dmap words, so we may update only a subset of the actual + * leaves corresponding to the dmap words of the block range. + */ + for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) { + /* determine the bit number within the word and + * the number of bits within the word. + */ + wbitno = dbitno & (DBWORD - 1); + nb = min(rembits, DBWORD - wbitno); + + /* check if only part of a word is to be allocated. + */ + if (nb < DBWORD) { + /* allocate (set to 1) the appropriate bits within + * this dmap word. + */ + dp->wmap[word] |= cpu_to_le32(ONES << (DBWORD - nb) + >> wbitno); + + word += 1; + } else { + /* one or more dmap words are fully contained + * within the block range. determine how many + * words and allocate (set to 1) the bits of these + * words. + */ + nwords = rembits >> L2DBWORD; + memset(&dp->wmap[word], (int) ONES, nwords * 4); + + /* determine how many bits */ + nb = nwords << L2DBWORD; + } + } + + /* update the free count for this dmap */ + dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) - nblocks); + + /* reconstruct summary tree */ + dbInitDmapTree(dp); + + BMAP_LOCK(bmp); + + /* if this allocation group is completely free, + * update the highest active allocation group number + * if this allocation group is the new max. + */ + agno = blkno >> bmp->db_agl2size; + if (agno > bmp->db_maxag) + bmp->db_maxag = agno; + + /* update the free count for the allocation group and map */ + bmp->db_agfree[agno] -= nblocks; + bmp->db_nfree -= nblocks; + + BMAP_UNLOCK(bmp); + + /* if the root has not changed, done. */ + if (tp->stree[ROOT] == oldroot) + return (0); + + /* root changed. bubble the change up to the dmap control pages. + * if the adjustment of the upper level control pages fails, + * backout the bit allocation (thus making everything consistent). + */ + if ((rc = dbAdjCtl(bmp, blkno, tp->stree[ROOT], 1, 0))) + dbFreeBits(bmp, dp, blkno, nblocks); + + return (rc); +} + + +/* + * NAME: dbExtendFS() + * + * FUNCTION: extend bmap from blkno for nblocks; + * dbExtendFS() updates bmap ready for dbAllocBottomUp(); + * + * L2 + * | + * L1---------------------------------L1 + * | | + * L0---------L0---------L0 L0---------L0---------L0 + * | | | | | | + * d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,.,dm; + * L2L1L0d0,...,dnL0d0,...,dnL0d0,...,dnL1L0d0,...,dnL0d0,...,dnL0d0,..dm + * + * <---old---><----------------------------extend-----------------------> + */ +int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) +{ + struct jfs_sb_info *sbi = JFS_SBI(ipbmap->i_sb); + int nbperpage = sbi->nbperpage; + int i, i0 = TRUE, j, j0 = TRUE, k, n; + s64 newsize; + s64 p; + metapage_t *mp, *l2mp, *l1mp, *l0mp; + dmapctl_t *l2dcp, *l1dcp, *l0dcp; + dmap_t *dp; + s8 *l0leaf, *l1leaf, *l2leaf; + bmap_t *bmp = sbi->bmap; + int agno, l2agsize, oldl2agsize; + s64 ag_rem; + + newsize = blkno + nblocks; + + jEVENT(0, ("dbExtendFS: blkno:%Ld nblocks:%Ld newsize:%Ld\n", + (long long) blkno, (long long) nblocks, + (long long) newsize)); + + /* + * initialize bmap control page. + * + * all the data in bmap control page should exclude + * the mkfs hidden dmap page. + */ + + /* update mapsize */ + bmp->db_mapsize = newsize; + bmp->db_maxlevel = BMAPSZTOLEV(bmp->db_mapsize); + + /* compute new AG size */ + l2agsize = dbGetL2AGSize(newsize); + oldl2agsize = bmp->db_agl2size; + + bmp->db_agl2size = l2agsize; + bmp->db_agsize = 1 << l2agsize; + + /* compute new number of AG */ + agno = bmp->db_numag; + bmp->db_numag = newsize >> l2agsize; + bmp->db_numag += ((u32) newsize % (u32) bmp->db_agsize) ? 1 : 0; + + /* + * reconfigure db_agfree[] + * from old AG configuration to new AG configuration; + * + * coalesce contiguous k (newAGSize/oldAGSize) AGs; + * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn; + * note: new AG size = old AG size * (2**x). + */ + if (l2agsize == oldl2agsize) + goto extend; + k = 1 << (l2agsize - oldl2agsize); + ag_rem = bmp->db_agfree[0]; /* save agfree[0] */ + for (i = 0, n = 0; i < agno; n++) { + bmp->db_agfree[n] = 0; /* init collection point */ + + /* coalesce cotiguous k AGs; */ + for (j = 0; j < k && i < agno; j++, i++) { + /* merge AGi to AGn */ + bmp->db_agfree[n] += bmp->db_agfree[i]; + } + } + bmp->db_agfree[0] += ag_rem; /* restore agfree[0] */ + + for (; n < MAXAG; n++) + bmp->db_agfree[n] = 0; + + /* + * update highest active ag number + */ + + bmp->db_maxag = bmp->db_maxag / k; + + /* + * extend bmap + * + * update bit maps and corresponding level control pages; + * global control page db_nfree, db_agfree[agno], db_maxfreebud; + */ + extend: + /* get L2 page */ + p = BMAPBLKNO + nbperpage; /* L2 page */ + l2mp = read_metapage(ipbmap, p, PSIZE, 0); + assert(l2mp); + l2dcp = (dmapctl_t *) l2mp->data; + + /* compute start L1 */ + k = blkno >> L2MAXL1SIZE; + l2leaf = l2dcp->stree + CTLLEAFIND + k; + p = BLKTOL1(blkno, sbi->l2nbperpage); /* L1 page */ + + /* + * extend each L1 in L2 + */ + for (; k < LPERCTL; k++, p += nbperpage) { + /* get L1 page */ + if (j0) { + /* read in L1 page: (blkno & (MAXL1SIZE - 1)) */ + l1mp = read_metapage(ipbmap, p, PSIZE, 0); + if (l1mp == NULL) + goto errout; + l1dcp = (dmapctl_t *) l1mp->data; + + /* compute start L0 */ + j = (blkno & (MAXL1SIZE - 1)) >> L2MAXL0SIZE; + l1leaf = l1dcp->stree + CTLLEAFIND + j; + p = BLKTOL0(blkno, sbi->l2nbperpage); + j0 = FALSE; + } else { + /* assign/init L1 page */ + l1mp = get_metapage(ipbmap, p, PSIZE, 0); + if (l1mp == NULL) + goto errout; + + l1dcp = (dmapctl_t *) l1mp->data; + + /* compute start L0 */ + j = 0; + l1leaf = l1dcp->stree + CTLLEAFIND; + p += nbperpage; /* 1st L0 of L1.k */ + } + + /* + * extend each L0 in L1 + */ + for (; j < LPERCTL; j++) { + /* get L0 page */ + if (i0) { + /* read in L0 page: (blkno & (MAXL0SIZE - 1)) */ + + l0mp = read_metapage(ipbmap, p, PSIZE, 0); + if (l0mp == NULL) + goto errout; + l0dcp = (dmapctl_t *) l0mp->data; + + /* compute start dmap */ + i = (blkno & (MAXL0SIZE - 1)) >> + L2BPERDMAP; + l0leaf = l0dcp->stree + CTLLEAFIND + i; + p = BLKTODMAP(blkno, + sbi->l2nbperpage); + i0 = FALSE; + } else { + /* assign/init L0 page */ + l0mp = get_metapage(ipbmap, p, PSIZE, 0); + if (l0mp == NULL) + goto errout; + + l0dcp = (dmapctl_t *) l0mp->data; + + /* compute start dmap */ + i = 0; + l0leaf = l0dcp->stree + CTLLEAFIND; + p += nbperpage; /* 1st dmap of L0.j */ + } + + /* + * extend each dmap in L0 + */ + for (; i < LPERCTL; i++) { + /* + * reconstruct the dmap page, and + * initialize corresponding parent L0 leaf + */ + if ((n = blkno & (BPERDMAP - 1))) { + /* read in dmap page: */ + mp = read_metapage(ipbmap, p, + PSIZE, 0); + if (mp == NULL) + goto errout; + n = min(nblocks, (s64)BPERDMAP - n); + } else { + /* assign/init dmap page */ + mp = read_metapage(ipbmap, p, + PSIZE, 0); + if (mp == NULL) + goto errout; + + n = min(nblocks, (s64)BPERDMAP); + } + + dp = (dmap_t *) mp->data; + *l0leaf = dbInitDmap(dp, blkno, n); + + bmp->db_nfree += n; + agno = le64_to_cpu(dp->start) >> l2agsize; + bmp->db_agfree[agno] += n; + + write_metapage(mp); + + l0leaf++; + p += nbperpage; + + blkno += n; + nblocks -= n; + if (nblocks == 0) + break; + } /* for each dmap in a L0 */ + + /* + * build current L0 page from its leaves, and + * initialize corresponding parent L1 leaf + */ + *l1leaf = dbInitDmapCtl(l0dcp, 0, ++i); + write_metapage(l0mp); + + if (nblocks) + l1leaf++; /* continue for next L0 */ + else { + /* more than 1 L0 ? */ + if (j > 0) + break; /* build L1 page */ + else { + /* summarize in global bmap page */ + bmp->db_maxfreebud = *l1leaf; + release_metapage(l1mp); + release_metapage(l2mp); + goto finalize; + } + } + } /* for each L0 in a L1 */ + + /* + * build current L1 page from its leaves, and + * initialize corresponding parent L2 leaf + */ + *l2leaf = dbInitDmapCtl(l1dcp, 1, ++j); + write_metapage(l1mp); + + if (nblocks) + l2leaf++; /* continue for next L1 */ + else { + /* more than 1 L1 ? */ + if (k > 0) + break; /* build L2 page */ + else { + /* summarize in global bmap page */ + bmp->db_maxfreebud = *l2leaf; + release_metapage(l2mp); + goto finalize; + } + } + } /* for each L1 in a L2 */ + + assert(0); + + /* + * finalize bmap control page + */ + finalize: + + return 0; + + errout: + return EIO; +} + + +/* + * dbFinalizeBmap() + */ +void dbFinalizeBmap(struct inode *ipbmap) +{ + bmap_t *bmp = JFS_SBI(ipbmap->i_sb)->bmap; + int actags, inactags, l2nl; + s64 ag_rem, actfree, inactfree, avgfree; + int i, n; + + /* + * finalize bmap control page + */ +//finalize: + /* + * compute db_agpref: preferred ag to allocate from + * (the leftmost ag with average free space in it); + */ +//agpref: + /* get the number of active ags and inacitve ags */ + actags = bmp->db_maxag + 1; + inactags = bmp->db_numag - actags; + ag_rem = bmp->db_mapsize & (bmp->db_agsize - 1); /* ??? */ + + /* determine how many blocks are in the inactive allocation + * groups. in doing this, we must account for the fact that + * the rightmost group might be a partial group (i.e. file + * system size is not a multiple of the group size). + */ + inactfree = (inactags && ag_rem) ? + ((inactags - 1) << bmp->db_agl2size) + ag_rem + : inactags << bmp->db_agl2size; + + /* determine how many free blocks are in the active + * allocation groups plus the average number of free blocks + * within the active ags. + */ + actfree = bmp->db_nfree - inactfree; + avgfree = (u32) actfree / (u32) actags; + + /* if the preferred allocation group has not average free space. + * re-establish the preferred group as the leftmost + * group with average free space. + */ + if (bmp->db_agfree[bmp->db_agpref] < avgfree) { + for (bmp->db_agpref = 0; bmp->db_agpref < actags; + bmp->db_agpref++) { + if (bmp->db_agfree[bmp->db_agpref] >= avgfree) + break; + } + assert(bmp->db_agpref < bmp->db_numag); + } + + /* + * compute db_aglevel, db_agheigth, db_width, db_agstart: + * an ag is covered in aglevel dmapctl summary tree, + * at agheight level height (from leaf) with agwidth number of nodes + * each, which starts at agstart index node of the smmary tree node + * array; + */ + bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize); + l2nl = + bmp->db_agl2size - (L2BPERDMAP + bmp->db_aglevel * L2LPERCTL); + bmp->db_agheigth = l2nl >> 1; + bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheigth << 1)); + for (i = 5 - bmp->db_agheigth, bmp->db_agstart = 0, n = 1; i > 0; + i--) { + bmp->db_agstart += n; + n <<= 2; + } + +/* +printk("bmap: agpref:%d aglevel:%d agheigth:%d agwidth:%d\n", + bmp->db_agpref, bmp->db_aglevel, bmp->db_agheigth, bmp->db_agwidth); +*/ +} + + +/* + * NAME: dbInitDmap()/ujfs_idmap_page() + * + * FUNCTION: initialize working/persistent bitmap of the dmap page + * for the specified number of blocks: + * + * at entry, the bitmaps had been initialized as free (ZEROS); + * The number of blocks will only account for the actually + * existing blocks. Blocks which don't actually exist in + * the aggregate will be marked as allocated (ONES); + * + * PARAMETERS: + * dp - pointer to page of map + * nblocks - number of blocks this page + * + * RETURNS: NONE + */ +static int dbInitDmap(dmap_t * dp, s64 Blkno, int nblocks) +{ + int blkno, w, b, r, nw, nb, i; +/* +printk("sbh_dmap: in dbInitDmap blkno:%Ld nblocks:%ld\n", Blkno, nblocks); +*/ + + /* starting block number within the dmap */ + blkno = Blkno & (BPERDMAP - 1); + + if (blkno == 0) { + dp->nblocks = dp->nfree = cpu_to_le32(nblocks); + dp->start = cpu_to_le64(Blkno); + + if (nblocks == BPERDMAP) { + memset(&dp->wmap[0], 0, LPERDMAP * 4); + memset(&dp->pmap[0], 0, LPERDMAP * 4); + goto initTree; + } + } else { + dp->nblocks = + cpu_to_le32(le32_to_cpu(dp->nblocks) + nblocks); + dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) + nblocks); + } + + /* word number containing start block number */ + w = blkno >> L2DBWORD; + + /* + * free the bits corresponding to the block range (ZEROS): + * note: not all bits of the first and last words may be contained + * within the block range. + */ + for (r = nblocks; r > 0; r -= nb, blkno += nb) { + /* number of bits preceding range to be freed in the word */ + b = blkno & (DBWORD - 1); + /* number of bits to free in the word */ + nb = min(r, DBWORD - b); + + /* is partial word to be freed ? */ + if (nb < DBWORD) { + /* free (set to 0) from the bitmap word */ + dp->wmap[w] &= cpu_to_le32(~(ONES << (DBWORD - nb) + >> b)); + dp->pmap[w] &= cpu_to_le32(~(ONES << (DBWORD - nb) + >> b)); + + /* skip the word freed */ + w++; + } else { + /* free (set to 0) contiguous bitmap words */ + nw = r >> L2DBWORD; + memset(&dp->wmap[w], 0, nw * 4); + memset(&dp->pmap[w], 0, nw * 4); + + /* skip the words freed */ + nb = nw << L2DBWORD; + w += nw; + } + } + + /* + * mark bits following the range to be freed (non-existing + * blocks) as allocated (ONES) + */ +/* +printk("sbh_dmap: in dbInitDmap, preparing to mark unbacked, blkno:%ld nblocks:%ld\n", + blkno, nblocks); +*/ + + if (blkno == BPERDMAP) + goto initTree; + + /* the first word beyond the end of existing blocks */ + w = blkno >> L2DBWORD; + + /* does nblocks fall on a 32-bit boundary ? */ + b = blkno & (DBWORD - 1); +/* +printk("sbh_dmap: in dbInitDmap, b:%ld w:%ld mask: %lx\n", b, w, (ONES>>b)); +*/ + if (b) { + /* mark a partial word allocated */ + dp->wmap[w] = dp->pmap[w] = cpu_to_le32(ONES >> b); + w++; + } + + /* set the rest of the words in the page to allocated (ONES) */ + for (i = w; i < LPERDMAP; i++) + dp->pmap[i] = dp->wmap[i] = ONES; + + /* + * init tree + */ + initTree: + return (dbInitDmapTree(dp)); +} + + +/* + * NAME: dbInitDmapTree()/ujfs_complete_dmap() + * + * FUNCTION: initialize summary tree of the specified dmap: + * + * at entry, bitmap of the dmap has been initialized; + * + * PARAMETERS: + * dp - dmap to complete + * blkno - starting block number for this dmap + * treemax - will be filled in with max free for this dmap + * + * RETURNS: max free string at the root of the tree + */ +static int dbInitDmapTree(dmap_t * dp) +{ + dmaptree_t *tp; + s8 *cp; + int i; + + /* init fixed info of tree */ + tp = &dp->tree; + tp->nleafs = cpu_to_le32(LPERDMAP); + tp->l2nleafs = cpu_to_le32(L2LPERDMAP); + tp->leafidx = cpu_to_le32(LEAFIND); + tp->height = cpu_to_le32(4); + tp->budmin = BUDMIN; + + /* init each leaf from corresponding wmap word: + * note: leaf is set to NOFREE(-1) if all blocks of corresponding + * bitmap word are allocated. + */ + cp = tp->stree + le32_to_cpu(tp->leafidx); + for (i = 0; i < LPERDMAP; i++) + *cp++ = dbMaxBud((u8 *) & dp->wmap[i]); + + /* build the dmap's binary buddy summary tree */ + return (dbInitTree(tp)); +} + + +/* + * NAME: dbInitTree()/ujfs_adjtree() + * + * FUNCTION: initialize binary buddy summary tree of a dmap or dmapctl. + * + * at entry, the leaves of the tree has been initialized + * from corresponding bitmap word or root of summary tree + * of the child control page; + * configure binary buddy system at the leaf level, then + * bubble up the values of the leaf nodes up the tree. + * + * PARAMETERS: + * cp - Pointer to the root of the tree + * l2leaves- Number of leaf nodes as a power of 2 + * l2min - Number of blocks that can be covered by a leaf + * as a power of 2 + * + * RETURNS: max free string at the root of the tree + */ +static int dbInitTree(dmaptree_t * dtp) +{ + int l2max, l2free, bsize, nextb, i; + int child, parent, nparent; + s8 *tp, *cp, *cp1; + + tp = dtp->stree; + + /* Determine the maximum free string possible for the leaves */ + l2max = le32_to_cpu(dtp->l2nleafs) + dtp->budmin; + + /* + * configure the leaf levevl into binary buddy system + * + * Try to combine buddies starting with a buddy size of 1 + * (i.e. two leaves). At a buddy size of 1 two buddy leaves + * can be combined if both buddies have a maximum free of l2min; + * the combination will result in the left-most buddy leaf having + * a maximum free of l2min+1. + * After processing all buddies for a given size, process buddies + * at the next higher buddy size (i.e. current size * 2) and + * the next maximum free (current free + 1). + * This continues until the maximum possible buddy combination + * yields maximum free. + */ + for (l2free = dtp->budmin, bsize = 1; l2free < l2max; + l2free++, bsize = nextb) { + /* get next buddy size == current buddy pair size */ + nextb = bsize << 1; + + /* scan each adjacent buddy pair at current buddy size */ + for (i = 0, cp = tp + le32_to_cpu(dtp->leafidx); + i < le32_to_cpu(dtp->nleafs); + i += nextb, cp += nextb) { + /* coalesce if both adjacent buddies are max free */ + if (*cp == l2free && *(cp + bsize) == l2free) { + *cp = l2free + 1; /* left take right */ + *(cp + bsize) = -1; /* right give left */ + } + } + } + + /* + * bubble summary information of leaves up the tree. + * + * Starting at the leaf node level, the four nodes described by + * the higher level parent node are compared for a maximum free and + * this maximum becomes the value of the parent node. + * when all lower level nodes are processed in this fashion then + * move up to the next level (parent becomes a lower level node) and + * continue the process for that level. + */ + for (child = le32_to_cpu(dtp->leafidx), + nparent = le32_to_cpu(dtp->nleafs) >> 2; + nparent > 0; nparent >>= 2, child = parent) { + /* get index of 1st node of parent level */ + parent = (child - 1) >> 2; + + /* set the value of the parent node as the maximum + * of the four nodes of the current level. + */ + for (i = 0, cp = tp + child, cp1 = tp + parent; + i < nparent; i++, cp += 4, cp1++) + *cp1 = TREEMAX(cp); + } + + return (*tp); +} + + +/* + * dbInitDmapCtl() + * + * function: initialize dmapctl page + */ +static int dbInitDmapCtl(dmapctl_t * dcp, int level, int i) +{ /* start leaf index not covered by range */ + s8 *cp; + + dcp->nleafs = cpu_to_le32(LPERCTL); + dcp->l2nleafs = cpu_to_le32(L2LPERCTL); + dcp->leafidx = cpu_to_le32(CTLLEAFIND); + dcp->height = cpu_to_le32(5); + dcp->budmin = L2BPERDMAP + L2LPERCTL * level; + + /* + * initialize the leaves of current level that were not covered + * by the specified input block range (i.e. the leaves have no + * low level dmapctl or dmap). + */ + cp = &dcp->stree[CTLLEAFIND + i]; + for (; i < LPERCTL; i++) + *cp++ = NOFREE; + + /* build the dmap's binary buddy summary tree */ + return (dbInitTree((dmaptree_t *) dcp)); +} + + +/* + * NAME: dbGetL2AGSize()/ujfs_getagl2size() + * + * FUNCTION: Determine log2(allocation group size) from aggregate size + * + * PARAMETERS: + * nblocks - Number of blocks in aggregate + * + * RETURNS: log2(allocation group size) in aggregate blocks + */ +static int dbGetL2AGSize(s64 nblocks) +{ + s64 sz; + s64 m; + int l2sz; + + if (nblocks < BPERDMAP * MAXAG) + return (L2BPERDMAP); + + /* round up aggregate size to power of 2 */ + m = ((u64) 1 << (64 - 1)); + for (l2sz = 64; l2sz >= 0; l2sz--, m >>= 1) { + if (m & nblocks) + break; + } + + sz = (s64) 1 << l2sz; + if (sz < nblocks) + l2sz += 1; + + /* agsize = roundupSize/max_number_of_ag */ + return (l2sz - L2MAXAG); +} + + +/* + * NAME: dbMapFileSizeToMapSize() + * + * FUNCTION: compute number of blocks the block allocation map file + * can cover from the map file size; + * + * RETURNS: Number of blocks which can be covered by this block map file; + */ + +/* + * maximum number of map pages at each level including control pages + */ +#define MAXL0PAGES (1 + LPERCTL) +#define MAXL1PAGES (1 + LPERCTL * MAXL0PAGES) +#define MAXL2PAGES (1 + LPERCTL * MAXL1PAGES) + +/* + * convert number of map pages to the zero origin top dmapctl level + */ +#define BMAPPGTOLEV(npages) \ + (((npages) <= 3 + MAXL0PAGES) ? 0 \ + : ((npages) <= 2 + MAXL1PAGES) ? 1 : 2) + +s64 dbMapFileSizeToMapSize(struct inode * ipbmap) +{ + struct super_block *sb = ipbmap->i_sb; + s64 nblocks; + s64 npages, ndmaps; + int level, i; + int complete, factor; + + nblocks = ipbmap->i_size >> JFS_SBI(sb)->l2bsize; + npages = nblocks >> JFS_SBI(sb)->l2nbperpage; + level = BMAPPGTOLEV(npages); + + /* At each level, accumulate the number of dmap pages covered by + * the number of full child levels below it; + * repeat for the last incomplete child level. + */ + ndmaps = 0; + npages--; /* skip the first global control page */ + /* skip higher level control pages above top level covered by map */ + npages -= (2 - level); + npages--; /* skip top level's control page */ + for (i = level; i >= 0; i--) { + factor = + (i == 2) ? MAXL1PAGES : ((i == 1) ? MAXL0PAGES : 1); + complete = (u32) npages / factor; + ndmaps += complete * ((i == 2) ? LPERCTL * LPERCTL + : ((i == 1) ? LPERCTL : 1)); + + /* pages in last/incomplete child */ + npages = (u32) npages % factor; + /* skip incomplete child's level control page */ + npages--; + } + + /* convert the number of dmaps into the number of blocks + * which can be covered by the dmaps; + */ + nblocks = ndmaps << L2BPERDMAP; + + return (nblocks); +} + + +#ifdef _JFS_DEBUG_DMAP +/* + * DBinitmap() + */ +static void DBinitmap(s64 size, struct inode *ipbmap, u32 ** results) +{ + int npages; + u32 *dbmap, *d; + int n; + s64 lblkno, cur_block; + dmap_t *dp; + metapage_t *mp; + + npages = size / 32768; + npages += (size % 32768) ? 1 : 0; + + dbmap = (u32 *) xmalloc(npages * 4096, L2PSIZE, kernel_heap); + if (dbmap == NULL) + assert(0); + + for (n = 0, d = dbmap; n < npages; n++, d += 1024) + bzero(d, 4096); + + /* Need to initialize from disk map pages + */ + for (d = dbmap, cur_block = 0; cur_block < size; + cur_block += BPERDMAP, d += LPERDMAP) { + lblkno = BLKTODMAP(cur_block, + JFS_SBI(ipbmap->i_sb)->bmap-> + db_l2nbperpage); + mp = read_metapage(ipbmap, lblkno, PSIZE, 0); + if (mp == NULL) { + assert(0); + } + dp = (dmap_t *) mp->data; + + for (n = 0; n < LPERDMAP; n++) + d[n] = le32_to_cpu(dp->wmap[n]); + + release_metapage(mp); + } + + *results = dbmap; +} + + +/* + * DBAlloc() + */ +void DBAlloc(uint * dbmap, s64 mapsize, s64 blkno, s64 nblocks) +{ + int word, nb, bitno; + u32 mask; + + assert(blkno > 0 && blkno < mapsize); + assert(nblocks > 0 && nblocks <= mapsize); + + assert(blkno + nblocks <= mapsize); + + dbmap += (blkno / 32); + while (nblocks > 0) { + bitno = blkno & (32 - 1); + nb = min(nblocks, 32 - bitno); + + mask = (0xffffffff << (32 - nb) >> bitno); + assert((mask & *dbmap) == 0); + *dbmap |= mask; + + dbmap++; + blkno += nb; + nblocks -= nb; + } +} + + +/* + * DBFree() + */ +static void DBFree(uint * dbmap, s64 mapsize, s64 blkno, s64 nblocks) +{ + int word, nb, bitno; + u32 mask; + + assert(blkno > 0 && blkno < mapsize); + assert(nblocks > 0 && nblocks <= mapsize); + + assert(blkno + nblocks <= mapsize); + + dbmap += (blkno / 32); + while (nblocks > 0) { + bitno = blkno & (32 - 1); + nb = min(nblocks, 32 - bitno); + + mask = (0xffffffff << (32 - nb) >> bitno); + assert((mask & *dbmap) == mask); + *dbmap &= ~mask; + + dbmap++; + blkno += nb; + nblocks -= nb; + } +} + + +/* + * DBAllocCK() + */ +static void DBAllocCK(uint * dbmap, s64 mapsize, s64 blkno, s64 nblocks) +{ + int word, nb, bitno; + u32 mask; + + assert(blkno > 0 && blkno < mapsize); + assert(nblocks > 0 && nblocks <= mapsize); + + assert(blkno + nblocks <= mapsize); + + dbmap += (blkno / 32); + while (nblocks > 0) { + bitno = blkno & (32 - 1); + nb = min(nblocks, 32 - bitno); + + mask = (0xffffffff << (32 - nb) >> bitno); + assert((mask & *dbmap) == mask); + + dbmap++; + blkno += nb; + nblocks -= nb; + } +} + + +/* + * DBFreeCK() + */ +static void DBFreeCK(uint * dbmap, s64 mapsize, s64 blkno, s64 nblocks) +{ + int word, nb, bitno; + u32 mask; + + assert(blkno > 0 && blkno < mapsize); + assert(nblocks > 0 && nblocks <= mapsize); + + assert(blkno + nblocks <= mapsize); + + dbmap += (blkno / 32); + while (nblocks > 0) { + bitno = blkno & (32 - 1); + nb = min(nblocks, 32 - bitno); + + mask = (0xffffffff << (32 - nb) >> bitno); + assert((mask & *dbmap) == 0); + + dbmap++; + blkno += nb; + nblocks -= nb; + } +} + + +/* + * dbPrtMap() + */ +static void dbPrtMap(bmap_t * bmp) +{ + printk(" mapsize: %d%d\n", bmp->db_mapsize); + printk(" nfree: %d%d\n", bmp->db_nfree); + printk(" numag: %d\n", bmp->db_numag); + printk(" agsize: %d%d\n", bmp->db_agsize); + printk(" agl2size: %d\n", bmp->db_agl2size); + printk(" agwidth: %d\n", bmp->db_agwidth); + printk(" agstart: %d\n", bmp->db_agstart); + printk(" agheigth: %d\n", bmp->db_agheigth); + printk(" aglevel: %d\n", bmp->db_aglevel); + printk(" maxlevel: %d\n", bmp->db_maxlevel); + printk(" maxag: %d\n", bmp->db_maxag); + printk(" agpref: %d\n", bmp->db_agpref); + printk(" l2nbppg: %d\n", bmp->db_l2nbperpage); +} + + +/* + * dbPrtCtl() + */ +static void dbPrtCtl(dmapctl_t * dcp) +{ + int i, j, n; + + printk(" height: %08x\n", le32_to_cpu(dcp->height)); + printk(" leafidx: %08x\n", le32_to_cpu(dcp->leafidx)); + printk(" budmin: %08x\n", dcp->budmin); + printk(" nleafs: %08x\n", le32_to_cpu(dcp->nleafs)); + printk(" l2nleafs: %08x\n", le32_to_cpu(dcp->l2nleafs)); + + printk("\n Tree:\n"); + for (i = 0; i < CTLLEAFIND; i += 8) { + n = min(8, CTLLEAFIND - i); + + for (j = 0; j < n; j++) + printf(" [%03x]: %02x", i + j, + (char) dcp->stree[i + j]); + printf("\n"); + } + + printk("\n Tree Leaves:\n"); + for (i = 0; i < LPERCTL; i += 8) { + n = min(8, LPERCTL - i); + + for (j = 0; j < n; j++) + printf(" [%03x]: %02x", + i + j, + (char) dcp->stree[i + j + CTLLEAFIND]); + printf("\n"); + } +} +#endif /* _JFS_DEBUG_DMAP */ diff -uNr --exclude=CVS ../kernel.org/linux/fs/jfs/jfs_dtree.c linuxppc64_2_4/fs/jfs/jfs_dtree.c --- ../kernel.org/linux/fs/jfs/jfs_dtree.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/fs/jfs/jfs_dtree.c Wed Nov 14 10:19:36 2001 @@ -0,0 +1,4527 @@ +/* + * + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * +*/ + +/* + * jfs_dtree.c: directory B+-tree manager + * + * B+-tree with variable length key directory: + * + * each directory page is structured as an array of 32-byte + * directory entry slots initialized as a freelist + * to avoid search/compaction of free space at insertion. + * when an entry is inserted, a number of slots are allocated + * from the freelist as required to store variable length data + * of the entry; when the entry is deleted, slots of the entry + * are returned to freelist. + * + * leaf entry stores full name as key and file serial number + * (aka inode number) as data. + * internal/router entry stores sufffix compressed name + * as key and simple extent descriptor as data. + * + * each directory page maintains a sorted entry index table + * which stores the start slot index of sorted entries + * to allow binary search on the table. + * + * directory starts as a root/leaf page in on-disk inode + * inline data area. + * when it becomes full, it starts a leaf of a external extent + * of length of 1 block. each time the first leaf becomes full, + * it is extended rather than split (its size is doubled), + * until its length becoms 4 KBytes, from then the extent is split + * with new 4 Kbyte extent when it becomes full + * to reduce external fragmentation of small directories. + * + * blah, blah, blah, for linear scan of directory in pieces by + * readdir(). + * + * + * case-insensitive directory file system + * + * names are stored in case-sensitive way in leaf entry. + * but stored, searched and compared in case-insensitive (uppercase) order + * (i.e., both search key and entry key are folded for search/compare): + * (note that case-sensitive order is BROKEN in storage, e.g., + * sensitive: Ad, aB, aC, aD -> insensitive: aB, aC, aD, Ad + * + * entries which folds to the same key makes up a equivalent class + * whose members are stored as contiguous cluster (may cross page boundary) + * but whose order is arbitrary and acts as duplicate, e.g., + * abc, Abc, aBc, abC) + * + * once match is found at leaf, requires scan forward/backward + * either for, in case-insensitive search, duplicate + * or for, in case-sensitive search, for exact match + * + * router entry must be created/stored in case-insensitive way + * in internal entry: + * (right most key of left page and left most key of right page + * are folded, and its suffix compression is propagated as router + * key in parent) + * (e.g., if split occurs and , trather than + * should be made the router key for the split) + * + * case-insensitive search: + * + * fold search key; + * + * case-insensitive search of B-tree: + * for internal entry, router key is already folded; + * for leaf entry, fold the entry key before comparison. + * + * if (leaf entry case-insensitive match found) + * if (next entry satisfies case-insensitive match) + * return EDUPLICATE; + * if (prev entry satisfies case-insensitive match) + * return EDUPLICATE; + * return match; + * else + * return no match; + * + * serialization: + * target directory inode lock is being held on entry/exit + * of all main directory service routines. + * + * log based recovery: + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* dtree split parameter */ +typedef struct { + metapage_t *mp; + s16 index; + s16 nslot; + component_t *key; + ddata_t *data; + pxdlist_t *pxdlist; +} dtsplit_t; + +#define DT_PAGE(IP, MP) BT_PAGE(IP, MP, dtpage_t, i_dtroot) + +/* get page buffer for specified block address */ +#define DT_GETPAGE(IP, BN, MP, SIZE, P, RC)\ +{\ + BT_GETPAGE(IP, BN, MP, dtpage_t, SIZE, P, RC, i_dtroot)\ + if (!(RC))\ + {\ + if (((P)->header.nextindex > (((BN)==0)?DTROOTMAXSLOT:(P)->header.maxslot)) ||\ + ((BN) && ((P)->header.maxslot > DTPAGEMAXSLOT)))\ + {\ + jERROR(1,("DT_GETPAGE: dtree page corrupt\n"));\ + BT_PUTPAGE(MP);\ + MP = NULL;\ + RC = EIO;\ + }\ + }\ +} + +/* for consistency */ +#define DT_PUTPAGE(MP) BT_PUTPAGE(MP) + +#define DT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ + BT_GETSEARCH(IP, LEAF, BN, MP, dtpage_t, P, INDEX, i_dtroot) + +#ifdef _JFS_STATISTICS +/* + * statistics + */ +static uint dtstat_Search = 0; +static uint dtstat_Insert = 0; +static uint dtstat_split = 0; +static uint dtstat_rootsplit = 0; +static uint dtstat_Delete = 0; +static uint dtstat_free = 0; +#endif /* _JFS_STATISTICS */ + +/* + * forward references + */ +static int dtSplitUp(int tid, struct inode *ip, + dtsplit_t * split, btstack_t * btstack); + +static int dtSplitPage(int tid, struct inode *ip, dtsplit_t * split, + metapage_t ** rmpp, dtpage_t ** rpp, pxd_t * rxdp); + +static int dtExtendPage(int tid, struct inode *ip, + dtsplit_t * split, btstack_t * btstack); + +static int dtSplitRoot(int tid, struct inode *ip, + dtsplit_t * split, metapage_t ** rmpp); + +static int dtDeleteUp(int tid, struct inode *ip, metapage_t * fmp, + dtpage_t * fp, btstack_t * btstack); + +#ifdef _STILL_TO_PORT +static int dtSearchNode(struct inode *ip, + s64 lmxaddr, pxd_t * kpxd, btstack_t * btstack); +#endif /* _STILL_TO_PORT */ + +static int dtRelink(int tid, struct inode *ip, dtpage_t * p); + +static int dtReadFirst(struct inode *ip, btstack_t * btstack); + +static int dtReadNext(struct inode *ip, + loff_t * offset, btstack_t * btstack); + +static int dtCompare(component_t * key, dtpage_t * p, int si); + +static int ciCompare(component_t * key, dtpage_t * p, int si, int flag); + +static void dtGetKey(dtpage_t * p, int i, component_t * key, int flag); + +static void ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp, + int ri, component_t * key, int flag); + +static void dtInsertEntry(dtpage_t * p, int index, component_t * key, + ddata_t * data, dtlock_t ** dtlock); + +static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp, + dtlock_t ** sdtlock, dtlock_t ** ddtlock, + int do_index); + +static void dtDeleteEntry(dtpage_t * p, int fi, dtlock_t ** dtlock); + +static void dtTruncateEntry(dtpage_t * p, int ti, dtlock_t ** dtlock); + +static void dtLinelockFreelist(dtpage_t * p, int m, dtlock_t ** dtlock); + +#define ciToUpper(c) UniStrupr((c)->name) + +/* + * find_index() + * + * Returns dtree page containing directory table entry for specified + * index and pointer to its entry. + * + * mp must be released by caller. + */ +static dir_table_slot_t *find_index(struct inode *ip, u32 index, + metapage_t ** mp) +{ + struct jfs_inode_info *jfs_ip = JFS_IP(ip); + s64 blkno; + s64 offset; + int page_offset; + dir_table_slot_t *slot; + static int maxWarnings = 10; + + if (index < 2) { + if (maxWarnings) { + jERROR(1, ("find_entry called with index = %d\n", + index)); + maxWarnings--; + } + return 0; + } + + if (index >= jfs_ip->next_index) { + jFYI(1, ("find_entry called with index >= next_index\n")); + return 0; + } + + if (jfs_ip->next_index <= (MAX_INLINE_DIRTABLE_ENTRY + 1)) { + /* + * Inline directory table + */ + *mp = 0; + slot = &jfs_ip->i_dirtable[index - 2]; + } else { + offset = (index - 2) * sizeof(dir_table_slot_t); + page_offset = offset & (PSIZE - 1); + blkno = ((offset + 1) >> L2PSIZE) << + JFS_SBI(ip->i_sb)->l2nbperpage; + + if (*mp && ((*mp)->index != blkno)) { + release_metapage(*mp); + *mp = 0; + } + if (*mp == 0) + *mp = read_metapage(ip, blkno, PSIZE, 0); + if (*mp == 0) { + jERROR(1, + ("free_index: error reading directory table\n")); + return 0; + } + + slot = + (dir_table_slot_t *) ((char *) (*mp)->data + + page_offset); + } + return slot; +} + +static inline void lock_index(int tid, struct inode *ip, metapage_t * mp, + u32 index) +{ + tlock_t *tlck; + linelock_t *llck; + lv_t *lv; + + tlck = txLock(tid, ip, mp, tlckDATA); + llck = (linelock_t *) tlck->lock; + + if (llck->index >= llck->maxcnt) + llck = txLinelock(llck); + lv = &llck->lv[llck->index]; + + /* + * Linelock slot size is twice the size of directory table + * slot size. 512 entries per page. + */ + lv->offset = ((index - 2) & 511) >> 1; + lv->length = 1; + llck->index++; +} + +/* + * add_index() + * + * Adds an entry to the directory index table. This is used to provide + * each directory entry with a persistent index in which to resume + * directory traversals + */ +static u32 add_index(int tid, struct inode *ip, s64 bn, int slot) +{ + struct super_block *sb = ip->i_sb; + struct jfs_sb_info *sbi = JFS_SBI(sb); + struct jfs_inode_info *jfs_ip = JFS_IP(ip); + u64 blkno; + dir_table_slot_t *dirtab_slot; + u32 index; + linelock_t *llck; + lv_t *lv; + metapage_t *mp; + s64 offset; + uint page_offset; + int rc; + tlock_t *tlck; + s64 xaddr; + + ASSERT(DO_INDEX(ip)); + + if (jfs_ip->next_index < 2) { + jERROR(1, ("next_index = %d. Please fix this!\n", + jfs_ip->next_index)); + jfs_ip->next_index = 2; + } + + index = jfs_ip->next_index++; + + if (index <= MAX_INLINE_DIRTABLE_ENTRY) { + /* + * i_size reflects size of index table, or 8 bytes per entry. + */ + ip->i_size = (loff_t) (index - 1) << 3; + + /* + * dir table fits inline within inode + */ + dirtab_slot = &jfs_ip->i_dirtable[index-2]; + dirtab_slot->flag = DIR_INDEX_VALID; + dirtab_slot->slot = slot; + DTSaddress(dirtab_slot, bn); + + set_cflag(COMMIT_Dirtable, ip); + + return index; + } + if (index == (MAX_INLINE_DIRTABLE_ENTRY + 1)) { + /* + * It's time to move the inline table to an external + * page and begin to build the xtree + */ + + /* + * Save the table, we're going to overwrite it with the + * xtree root + */ + dir_table_slot_t temp_table[12]; + memcpy(temp_table, &jfs_ip->i_dirtable, sizeof(temp_table)); + + /* + * Initialize empty x-tree + */ + xtInitRoot(tid, ip); + + /* + * Clear this flag in case it had been set + */ + clear_cflag(COMMIT_Stale, ip); + + /* + * Allocate the first block & add it to the xtree + */ + xaddr = 0; + if ((rc = + xtInsert(tid, ip, 0, 0, sbi->nbperpage, + &xaddr, 0))) { + jFYI(1, ("add_index: xtInsert failed!\n")); + return -1; + } + ip->i_size = PSIZE; + ip->i_blocks += LBLK2PBLK(sb, sbi->nbperpage); + + if ((mp = get_metapage(ip, 0, ip->i_blksize, 0)) == 0) { + jERROR(1, ("add_index: get_metapage failed!\n")); + xtTruncate(tid, ip, 0, COMMIT_PWMAP); + return -1; + } + tlck = txLock(tid, ip, mp, tlckDATA); + llck = (linelock_t *) & tlck->lock; + ASSERT(llck->index == 0); + lv = &llck->lv[0]; + + lv->offset = 0; + lv->length = 6; /* tlckDATA slot size is 16 bytes */ + llck->index++; + + memcpy(mp->data, temp_table, sizeof(temp_table)); + + mark_metapage_dirty(mp); + release_metapage(mp); + + /* + * Logging is now directed by xtree tlocks + */ + clear_cflag(COMMIT_Dirtable, ip); + } + + offset = (index - 2) * sizeof(dir_table_slot_t); + page_offset = offset & (PSIZE - 1); + blkno = ((offset + 1) >> L2PSIZE) << sbi->l2nbperpage; + if (page_offset == 0) { + /* + * This will be the beginning of a new page + */ + xaddr = 0; + if ((rc = + xtInsert(tid, ip, 0, blkno, sbi->nbperpage, + &xaddr, 0))) { + jFYI(1, ("add_index: xtInsert failed!\n")); + jfs_ip->next_index--; + return -1; + } + ip->i_size += PSIZE; + ip->i_blocks += LBLK2PBLK(sb, sbi->nbperpage); + + if ((mp = get_metapage(ip, blkno, PSIZE, 0))) + memset(mp->data, 0, PSIZE); /* Just looks better */ + else + xtTruncate(tid, ip, offset, COMMIT_PWMAP); + } else + mp = read_metapage(ip, blkno, PSIZE, 0); + + if (mp == 0) { + jERROR(1, ("add_index: get/read_metapage failed!\n")); + return -1; + } + + lock_index(tid, ip, mp, index); + + dirtab_slot = + (dir_table_slot_t *) ((char *) mp->data + page_offset); + dirtab_slot->flag = DIR_INDEX_VALID; + dirtab_slot->slot = slot; + DTSaddress(dirtab_slot, bn); + + mark_metapage_dirty(mp); + release_metapage(mp); + + return index; +} + +/* + * free_index() + * + * Marks an entry to the directory index table as free. + */ +static void free_index(int tid, struct inode *ip, u32 index, u32 next) +{ + dir_table_slot_t *dirtab_slot; + metapage_t *mp = 0; + + dirtab_slot = find_index(ip, index, &mp); + + if (dirtab_slot == 0) + return; + + dirtab_slot->flag = DIR_INDEX_FREE; + dirtab_slot->slot = dirtab_slot->addr1 = 0; + dirtab_slot->addr2 = cpu_to_le32(next); + + if (mp) { + lock_index(tid, ip, mp, index); + mark_metapage_dirty(mp); + release_metapage(mp); + } else + set_cflag(COMMIT_Dirtable, ip); +} + +/* + * modify_index() + * + * Changes an entry in the directory index table + */ +static void modify_index(int tid, struct inode *ip, u32 index, s64 bn, + int slot, metapage_t ** mp) +{ + dir_table_slot_t *dirtab_slot; + + dirtab_slot = find_index(ip, index, mp); + + if (dirtab_slot == 0) + return; + + DTSaddress(dirtab_slot, bn); + dirtab_slot->slot = slot; + + if (*mp) { + lock_index(tid, ip, *mp, index); + mark_metapage_dirty(*mp); + } else + set_cflag(COMMIT_Dirtable, ip); +} + +/* + * get_index() + * + * reads a directory table slot + */ +static int get_index(struct inode *ip, u32 index, + dir_table_slot_t * dirtab_slot) +{ + metapage_t *mp = 0; + dir_table_slot_t *slot; + + slot = find_index(ip, index, &mp); + if (slot == 0) { + return -EIO; + } + + memcpy(dirtab_slot, slot, sizeof(dir_table_slot_t)); + + if (mp) + release_metapage(mp); + + return 0; +} + +/* + * dtSearch() + * + * function: + * Search for the entry with specified key + * + * parameter: + * + * return: 0 - search result on stack, leaf page pinned; + * errno - I/O error + */ +int dtSearch(struct inode *ip, + component_t * key, ino_t * data, btstack_t * btstack, int flag) +{ + int rc = 0; + int cmp = 1; /* init for empty page */ + s64 bn; + metapage_t *mp; + dtpage_t *p; + s8 *stbl; + int base, index, lim; + btframe_t *btsp; + pxd_t *pxd; + int psize = 288; /* initial in-line directory */ + ino_t inumber; + component_t ciKey; + struct super_block *sb = ip->i_sb; + + ciKey.name = + (wchar_t *) kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), + GFP_NOFS); + if (ciKey.name == 0) { + rc = ENOMEM; + goto dtSearch_Exit2; + } + + + /* uppercase search key for c-i directory */ + UniStrcpy(ciKey.name, key->name); + ciKey.namlen = key->namlen; + + /* only uppercase if case-insensitive support is on */ + if ((JFS_SBI(sb)->mntflag & JFS_OS2) == JFS_OS2) { + ciToUpper(&ciKey); + } + BT_CLR(btstack); /* reset stack */ + + /* init level count for max pages to split */ + btstack->nsplit = 1; + + /* + * search down tree from root: + * + * between two consecutive entries of and of + * internal page, child page Pi contains entry with k, Ki <= K < Kj. + * + * if entry with search key K is not found + * internal page search find the entry with largest key Ki + * less than K which point to the child page to search; + * leaf page search find the entry with smallest key Kj + * greater than K so that the returned index is the position of + * the entry to be shifted right for insertion of new entry. + * for empty tree, search key is greater than any key of the tree. + * + * by convention, root bn = 0. + */ + for (bn = 0;;) { + /* get/pin the page to search */ + DT_GETPAGE(ip, bn, mp, psize, p, rc); + if (rc) + goto dtSearch_Exit1; + + /* get sorted entry table of the page */ + stbl = DT_GETSTBL(p); + + /* + * binary search with search key K on the current page. + */ + for (base = 0, lim = p->header.nextindex; lim; lim >>= 1) { + index = base + (lim >> 1); + + if (p->header.flag & BT_LEAF) { + /* uppercase leaf name to compare */ + cmp = + ciCompare(&ciKey, p, stbl[index], + JFS_SBI(sb)->mntflag); + } else { + /* router key is in uppercase */ + + cmp = dtCompare(&ciKey, p, stbl[index]); + + + } + if (cmp == 0) { + /* + * search hit + */ + /* search hit - leaf page: + * return the entry found + */ + if (p->header.flag & BT_LEAF) { + inumber = le32_to_cpu( + ((ldtentry_t *) & p->slot[stbl[index]])->inumber); + + /* + * search for JFS_LOOKUP + */ + if (flag == JFS_LOOKUP) { + *data = inumber; + rc = 0; + goto out; + } + + /* + * search for JFS_CREATE + */ + if (flag == JFS_CREATE) { + *data = inumber; + rc = EEXIST; + goto out; + } + + /* + * search for JFS_REMOVE or JFS_RENAME + */ + if ((flag == JFS_REMOVE || + flag == JFS_RENAME) && + *data != inumber) { + rc = ESTALE; + goto out; + } + + /* + * JFS_REMOVE|JFS_FINDDIR|JFS_RENAME + */ + /* save search result */ + *data = inumber; + btsp = btstack->top; + btsp->bn = bn; + btsp->index = index; + btsp->mp = mp; + + rc = 0; + goto dtSearch_Exit1; + } + + /* search hit - internal page: + * descend/search its child page + */ + goto getChild; + } + + if (cmp > 0) { + base = index + 1; + --lim; + } + } + + /* + * search miss + * + * base is the smallest index with key (Kj) greater than + * search key (K) and may be zero or (maxindex + 1) index. + */ + /* + * search miss - leaf page + * + * return location of entry (base) where new entry with + * search key K is to be inserted. + */ + if (p->header.flag & BT_LEAF) { + /* + * search for JFS_LOOKUP, JFS_REMOVE, or JFS_RENAME + */ + if (flag == JFS_LOOKUP || flag == JFS_REMOVE || + flag == JFS_RENAME) { + rc = ENOENT; + goto out; + } + + /* + * search for JFS_CREATE|JFS_FINDDIR: + * + * save search result + */ + *data = 0; + btsp = btstack->top; + btsp->bn = bn; + btsp->index = base; + btsp->mp = mp; + + rc = 0; + goto dtSearch_Exit1; + } + + /* + * search miss - internal page + * + * if base is non-zero, decrement base by one to get the parent + * entry of the child page to search. + */ + index = base ? base - 1 : base; + + /* + * go down to child page + */ + getChild: + /* update max. number of pages to split */ + if (btstack->nsplit >= 8) { + /* Something's corrupted, mark filesytem dirty so + * chkdsk will fix it. + */ + jERROR(1, ("stack overrun in dtSearch!\n")); + updateSuper(sb, FM_DIRTY); + rc = EIO; + goto out; + } + btstack->nsplit++; + + /* push (bn, index) of the parent page/entry */ + BT_PUSH(btstack, bn, index); + + /* get the child page block number */ + pxd = (pxd_t *) & p->slot[stbl[index]]; + bn = addressPXD(pxd); + psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize; + + /* unpin the parent page */ + DT_PUTPAGE(mp); + } + + out: + DT_PUTPAGE(mp); + + dtSearch_Exit1: + + kfree(ciKey.name); + + dtSearch_Exit2: + + return rc; +} + + +/* + * dtInsert() + * + * function: insert an entry to directory tree + * + * parameter: + * + * return: 0 - success; + * errno - failure; + */ +int dtInsert(int tid, struct inode *ip, + component_t * name, ino_t * fsn, btstack_t * btstack) +{ + int rc = 0; + metapage_t *mp; /* meta-page buffer */ + dtpage_t *p; /* base B+-tree index page */ + s64 bn; + int index; + dtsplit_t split; /* split information */ + ddata_t data; + dtlock_t *dtlck; + int n; + tlock_t *tlck; + lv_t *lv; + + /* + * retrieve search result + * + * dtSearch() returns (leaf page pinned, index at which to insert). + * n.b. dtSearch() may return index of (maxindex + 1) of + * the full page. + */ + DT_GETSEARCH(ip, btstack->top, bn, mp, p, index); + + /* + * insert entry for new key + */ + if (DO_INDEX(ip)) { + if (JFS_IP(ip)->next_index == -1) { + DT_PUTPAGE(mp); + return EMLINK; + } + n = NDTLEAF(name->namlen); + data.leaf.tid = tid; + data.leaf.ip = ip; + } else { + n = NDTLEAF_LEGACY(name->namlen); + data.leaf.ip = 0; /* signifies legacy directory format */ + } + data.leaf.ino = cpu_to_le32(*fsn); + + /* + * leaf page does not have enough room for new entry: + * + * extend/split the leaf page; + * + * dtSplitUp() will insert the entry and unpin the leaf page. + */ + if (n > p->header.freecnt) { + split.mp = mp; + split.index = index; + split.nslot = n; + split.key = name; + split.data = &data; + rc = dtSplitUp(tid, ip, &split, btstack); + return rc; + } + + /* + * leaf page does have enough room for new entry: + * + * insert the new data entry into the leaf page; + */ + BT_MARK_DIRTY(mp, ip); + /* + * acquire a transaction lock on the leaf page + */ + tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); + dtlck = (dtlock_t *) & tlck->lock; + ASSERT(dtlck->index == 0); + lv = (lv_t *) & dtlck->lv[0]; + + /* linelock header */ + lv->offset = 0; + lv->length = 1; + dtlck->index++; + + dtInsertEntry(p, index, name, &data, &dtlck); + + /* linelock stbl of non-root leaf page */ + if (!(p->header.flag & BT_ROOT)) { + if (dtlck->index >= dtlck->maxcnt) + dtlck = (dtlock_t *) txLinelock(dtlck); + lv = (lv_t *) & dtlck->lv[dtlck->index]; + n = index >> L2DTSLOTSIZE; + lv->offset = p->header.stblindex + n; + lv->length = + ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1; + dtlck->index++; + } + + /* unpin the leaf page */ + DT_PUTPAGE(mp); + + return 0; +} + + +/* + * dtSplitUp() + * + * function: propagate insertion bottom up; + * + * parameter: + * + * return: 0 - success; + * errno - failure; + * leaf page unpinned; + */ +static int dtSplitUp(int tid, + struct inode *ip, dtsplit_t * split, btstack_t * btstack) +{ + struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); + int rc = 0; + metapage_t *smp; + dtpage_t *sp; /* split page */ + metapage_t *rmp; + dtpage_t *rp; /* new right page split from sp */ + pxd_t rpxd; /* new right page extent descriptor */ + metapage_t *lmp; + dtpage_t *lp; /* left child page */ + int skip; /* index of entry of insertion */ + btframe_t *parent; /* parent page entry on traverse stack */ + s64 xaddr, nxaddr; + int xlen, xsize; + pxdlist_t pxdlist; + pxd_t *pxd; + component_t key = { 0, 0 }; + ddata_t *data = split->data; + int n; + dtlock_t *dtlck; + tlock_t *tlck; + lv_t *lv; + + /* get split page */ + smp = split->mp; + sp = DT_PAGE(ip, smp); + + key.name = + (wchar_t *) kmalloc((JFS_NAME_MAX + 2) * sizeof(wchar_t), + GFP_NOFS); + if (key.name == 0) { + DT_PUTPAGE(smp); + rc = ENOMEM; + goto dtSplitUp_Exit; + } + + /* + * split leaf page + * + * The split routines insert the new entry, and + * acquire txLock as appropriate. + */ + /* + * split root leaf page: + */ + if (sp->header.flag & BT_ROOT) { + /* + * allocate a single extent child page + */ + xlen = 1; + n = sbi->bsize >> L2DTSLOTSIZE; + n -= (n + 31) >> L2DTSLOTSIZE; /* stbl size */ + n -= DTROOTMAXSLOT - sp->header.freecnt; /* header + entries */ + if (n <= split->nslot) + xlen++; + if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr))) + goto freeKeyName; + + pxdlist.maxnpxd = 1; + pxdlist.npxd = 0; + pxd = &pxdlist.pxd[0]; + PXDaddress(pxd, xaddr); + PXDlength(pxd, xlen); + split->pxdlist = &pxdlist; + rc = dtSplitRoot(tid, ip, split, &rmp); + + DT_PUTPAGE(rmp); + DT_PUTPAGE(smp); + + goto freeKeyName; + } + + /* + * extend first leaf page + * + * extend the 1st extent if less than buffer page size + * (dtExtendPage() reurns leaf page unpinned) + */ + pxd = &sp->header.self; + xlen = lengthPXD(pxd); + xsize = xlen << sbi->l2bsize; + if (xsize < PSIZE) { + xaddr = addressPXD(pxd); + n = xsize >> L2DTSLOTSIZE; + n -= (n + 31) >> L2DTSLOTSIZE; /* stbl size */ + if ((n + sp->header.freecnt) <= split->nslot) + n = xlen + (xlen << 1); + else + n = xlen; + if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen, + (s64) n, &nxaddr))) + goto extendOut; + + pxdlist.maxnpxd = 1; + pxdlist.npxd = 0; + pxd = &pxdlist.pxd[0]; + PXDaddress(pxd, nxaddr) + PXDlength(pxd, xlen + n); + split->pxdlist = &pxdlist; + if ((rc = dtExtendPage(tid, ip, split, btstack))) { + nxaddr = addressPXD(pxd); + if (xaddr != nxaddr) { + /* free relocated extent */ + xlen = lengthPXD(pxd); + dbFree(ip, nxaddr, (s64) xlen); + } else { + /* free extended delta */ + xlen = lengthPXD(pxd) - n; + xaddr = addressPXD(pxd) + xlen; + dbFree(ip, xaddr, (s64) n); + } + } + + extendOut: + DT_PUTPAGE(smp); + goto freeKeyName; + } + + /* + * split leaf page into and a new right page . + * + * return pinned and its extent descriptor + */ + /* + * allocate new directory page extent and + * new index page(s) to cover page split(s) + * + * allocation hint: ? + */ + n = btstack->nsplit; + pxdlist.maxnpxd = pxdlist.npxd = 0; + xlen = sbi->nbperpage; + for (pxd = pxdlist.pxd; n > 0; n--, pxd++) { + if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr)) == 0) { + PXDaddress(pxd, xaddr); + PXDlength(pxd, xlen); + pxdlist.maxnpxd++; + continue; + } + + DT_PUTPAGE(smp); + + /* undo allocation */ + goto splitOut; + } + + split->pxdlist = &pxdlist; + if ((rc = dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd))) { + DT_PUTPAGE(smp); + + /* undo allocation */ + goto splitOut; + } + + /* + * propagate up the router entry for the leaf page just split + * + * insert a router entry for the new page into the parent page, + * propagate the insert/split up the tree by walking back the stack + * of (bn of parent page, index of child page entry in parent page) + * that were traversed during the search for the page that split. + * + * the propagation of insert/split up the tree stops if the root + * splits or the page inserted into doesn't have to split to hold + * the new entry. + * + * the parent entry for the split page remains the same, and + * a new entry is inserted at its right with the first key and + * block number of the new right page. + * + * There are a maximum of 4 pages pinned at any time: + * two children, left parent and right parent (when the parent splits). + * keep the child pages pinned while working on the parent. + * make sure that all pins are released at exit. + */ + while ((parent = BT_POP(btstack)) != NULL) { + /* parent page specified by stack frame */ + + /* keep current child pages (, ) pinned */ + lmp = smp; + lp = sp; + + /* + * insert router entry in parent for new right child page + */ + /* get the parent page */ + DT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc); + if (rc) { + DT_PUTPAGE(lmp); + DT_PUTPAGE(rmp); + goto splitOut; + } + + /* + * The new key entry goes ONE AFTER the index of parent entry, + * because the split was to the right. + */ + skip = parent->index + 1; + + /* + * compute the key for the router entry + * + * key suffix compression: + * for internal pages that have leaf pages as children, + * retain only what's needed to distinguish between + * the new entry and the entry on the page to its left. + * If the keys compare equal, retain the entire key. + * + * note that compression is performed only at computing + * router key at the lowest internal level. + * further compression of the key between pairs of higher + * level internal pages loses too much information and + * the search may fail. + * (e.g., two adjacent leaf pages of {a, ..., x} {xx, ...,} + * results in two adjacent parent entries (a)(xx). + * if split occurs between these two entries, and + * if compression is applied, the router key of parent entry + * of right page (x) will divert search for x into right + * subtree and miss x in the left subtree.) + * + * the entire key must be retained for the next-to-leftmost + * internal key at any level of the tree, or search may fail + * (e.g., ?) + */ + switch (rp->header.flag & BT_TYPE) { + case BT_LEAF: + /* + * compute the length of prefix for suffix compression + * between last entry of left page and first entry + * of right page + */ + if ((sp->header.flag & BT_ROOT && skip > 1) || + sp->header.prev != 0 || skip > 1) { + /* compute uppercase router prefix key */ + ciGetLeafPrefixKey(lp, + lp->header.nextindex - 1, + rp, 0, &key, sbi->mntflag); + } else { + /* next to leftmost entry of + lowest internal level */ + + /* compute uppercase router key */ + dtGetKey(rp, 0, &key, sbi->mntflag); + key.name[key.namlen] = 0; + + if ((sbi->mntflag & JFS_OS2) == JFS_OS2) + ciToUpper(&key); + } + + n = NDTINTERNAL(key.namlen); + break; + + case BT_INTERNAL: + dtGetKey(rp, 0, &key, sbi->mntflag); + n = NDTINTERNAL(key.namlen); + break; + + default: + jERROR(2, ("dtSplitUp(): UFO!\n")); + break; + } + + /* unpin left child page */ + DT_PUTPAGE(lmp); + + /* + * compute the data for the router entry + */ + data->xd = rpxd; /* child page xd */ + + /* + * parent page is full - split the parent page + */ + if (n > sp->header.freecnt) { + /* init for parent page split */ + split->mp = smp; + split->index = skip; /* index at insert */ + split->nslot = n; + split->key = &key; + /* split->data = data; */ + + /* unpin right child page */ + DT_PUTPAGE(rmp); + + /* The split routines insert the new entry, + * acquire txLock as appropriate. + * return pinned and its block number . + */ + rc = (sp->header.flag & BT_ROOT) ? + dtSplitRoot(tid, ip, split, &rmp) : + dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd); + if (rc) { + DT_PUTPAGE(smp); + goto splitOut; + } + + /* smp and rmp are pinned */ + } + /* + * parent page is not full - insert router entry in parent page + */ + else { + BT_MARK_DIRTY(smp, ip); + /* + * acquire a transaction lock on the parent page + */ + tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY); + dtlck = (dtlock_t *) & tlck->lock; + ASSERT(dtlck->index == 0); + lv = (lv_t *) & dtlck->lv[0]; + + /* linelock header */ + lv->offset = 0; + lv->length = 1; + dtlck->index++; + + /* linelock stbl of non-root parent page */ + if (!(sp->header.flag & BT_ROOT)) { + lv++; + n = skip >> L2DTSLOTSIZE; + lv->offset = sp->header.stblindex + n; + lv->length = + ((sp->header.nextindex - + 1) >> L2DTSLOTSIZE) - n + 1; + dtlck->index++; + } + + dtInsertEntry(sp, skip, &key, data, &dtlck); + + /* exit propagate up */ + break; + } + } + + /* unpin current split and its right page */ + DT_PUTPAGE(smp); + DT_PUTPAGE(rmp); + + /* + * free remaining extents allocated for split + */ + splitOut: + n = pxdlist.npxd; + pxd = &pxdlist.pxd[n]; + for (; n < pxdlist.maxnpxd; n++, pxd++) + dbFree(ip, addressPXD(pxd), (s64) lengthPXD(pxd)); + + freeKeyName: + kfree(key.name); + + dtSplitUp_Exit: + + return rc; +} + + +/* + * dtSplitPage() + * + * function: Split a non-root page of a btree. + * + * parameter: + * + * return: 0 - success; + * errno - failure; + * return split and new page pinned; + */ +static int dtSplitPage(int tid, struct inode *ip, dtsplit_t * split, + metapage_t ** rmpp, dtpage_t ** rpp, pxd_t * rpxdp) +{ + struct super_block *sb = ip->i_sb; + int rc = 0; + metapage_t *smp; + dtpage_t *sp; + metapage_t *rmp; + dtpage_t *rp; /* new right page allocated */ + s64 rbn; /* new right page block number */ + metapage_t *mp; + dtpage_t *p; + s64 nextbn; + pxdlist_t *pxdlist; + pxd_t *pxd; + int skip, nextindex, half, left, nxt, off, si; + ldtentry_t *ldtentry; + idtentry_t *idtentry; + u8 *stbl; + dtslot_t *f; + int fsi, stblsize; + int n; + dtlock_t *sdtlck, *rdtlck; + tlock_t *tlck; + dtlock_t *dtlck; + lv_t *slv, *rlv, *lv; + + /* get split page */ + smp = split->mp; + sp = DT_PAGE(ip, smp); + +#ifdef _JFS_STATISTICS + ++dtstat_split; +#endif /* _JFS_STATISTICS */ + + /* + * allocate the new right page for the split + */ + pxdlist = split->pxdlist; + pxd = &pxdlist->pxd[pxdlist->npxd]; + pxdlist->npxd++; + rbn = addressPXD(pxd); + rmp = get_metapage(ip, rbn, PSIZE, 1); + if (rmp == NULL) + return EIO; + + jEVENT(0, + ("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p\n", ip, smp, rmp)); + + BT_MARK_DIRTY(rmp, ip); + /* + * acquire a transaction lock on the new right page + */ + tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW); + rdtlck = (dtlock_t *) & tlck->lock; + + rp = (dtpage_t *) rmp->data; + *rpp = rp; + rp->header.self = *pxd; + + BT_MARK_DIRTY(smp, ip); + /* + * acquire a transaction lock on the split page + * + * action: + */ + tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY); + sdtlck = (dtlock_t *) & tlck->lock; + + /* linelock header of split page */ + ASSERT(sdtlck->index == 0); + slv = (lv_t *) & sdtlck->lv[0]; + slv->offset = 0; + slv->length = 1; + sdtlck->index++; + + /* + * initialize/update sibling pointers between sp and rp + */ + nextbn = le64_to_cpu(sp->header.next); + rp->header.next = cpu_to_le64(nextbn); + rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self)); + sp->header.next = cpu_to_le64(rbn); + + /* + * initialize new right page + */ + rp->header.flag = sp->header.flag; + + /* compute sorted entry table at start of extent data area */ + rp->header.nextindex = 0; + rp->header.stblindex = 1; + + n = PSIZE >> L2DTSLOTSIZE; + rp->header.maxslot = n; + stblsize = (n + 31) >> L2DTSLOTSIZE; /* in unit of slot */ + + /* init freelist */ + fsi = rp->header.stblindex + stblsize; + rp->header.freelist = fsi; + rp->header.freecnt = rp->header.maxslot - fsi; + + /* + * sequential append at tail: append without split + * + * If splitting the last page on a level because of appending + * a entry to it (skip is maxentry), it's likely that the access is + * sequential. Adding an empty page on the side of the level is less + * work and can push the fill factor much higher than normal. + * If we're wrong it's no big deal, we'll just do the split the right + * way next time. + * (It may look like it's equally easy to do a similar hack for + * reverse sorted data, that is, split the tree left, + * but it's not. Be my guest.) + */ + if (nextbn == 0 && split->index == sp->header.nextindex) { + /* linelock header + stbl (first slot) of new page */ + rlv = (lv_t *) & rdtlck->lv[rdtlck->index]; + rlv->offset = 0; + rlv->length = 2; + rdtlck->index++; + + /* + * initialize freelist of new right page + */ + f = &rp->slot[fsi]; + for (fsi++; fsi < rp->header.maxslot; f++, fsi++) + f->next = fsi; + f->next = -1; + + /* insert entry at the first entry of the new right page */ + dtInsertEntry(rp, 0, split->key, split->data, &rdtlck); + + goto out; + } + + /* + * non-sequential insert (at possibly middle page) + */ + + /* + * update prev pointer of previous right sibling page; + */ + if (nextbn != 0) { + DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); + if (rc) + return rc; + + BT_MARK_DIRTY(mp, ip); + /* + * acquire a transaction lock on the next page + */ + tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK); + jEVENT(0, + ("dtSplitPage: tlck = 0x%p, ip = 0x%p, mp=0x%p\n", + tlck, ip, mp)); + dtlck = (dtlock_t *) & tlck->lock; + + /* linelock header of previous right sibling page */ + lv = (lv_t *) & dtlck->lv[dtlck->index]; + lv->offset = 0; + lv->length = 1; + dtlck->index++; + + p->header.prev = cpu_to_le64(rbn); + + DT_PUTPAGE(mp); + } + + /* + * split the data between the split and right pages. + */ + skip = split->index; + half = (PSIZE >> L2DTSLOTSIZE) >> 1; /* swag */ + left = 0; + + /* + * compute fill factor for split pages + * + * traces the next entry to move to rp + * traces the next entry to stay in sp + */ + stbl = (u8 *) & sp->slot[sp->header.stblindex]; + nextindex = sp->header.nextindex; + for (nxt = off = 0; nxt < nextindex; ++off) { + if (off == skip) + /* check for fill factor with new entry size */ + n = split->nslot; + else { + si = stbl[nxt]; + switch (sp->header.flag & BT_TYPE) { + case BT_LEAF: + ldtentry = (ldtentry_t *) & sp->slot[si]; + if (DO_INDEX(ip)) + n = NDTLEAF(ldtentry->namlen); + else + n = NDTLEAF_LEGACY(ldtentry-> + namlen); + break; + + case BT_INTERNAL: + idtentry = (idtentry_t *) & sp->slot[si]; + n = NDTINTERNAL(idtentry->namlen); + break; + + default: + break; + } + + ++nxt; /* advance to next entry to move in sp */ + } + + left += n; + if (left >= half) + break; + } + + /* poins to the 1st entry to move */ + + /* + * move entries to right page + * + * dtMoveEntry() initializes rp and reserves entry for insertion + * + * split page moved out entries are linelocked; + * new/right page moved in entries are linelocked; + */ + /* linelock header + stbl of new right page */ + rlv = (lv_t *) & rdtlck->lv[rdtlck->index]; + rlv->offset = 0; + rlv->length = 5; + rdtlck->index++; + + dtMoveEntry(sp, nxt, rp, &sdtlck, &rdtlck, DO_INDEX(ip)); + + sp->header.nextindex = nxt; + + /* + * finalize freelist of new right page + */ + fsi = rp->header.freelist; + f = &rp->slot[fsi]; + for (fsi++; fsi < rp->header.maxslot; f++, fsi++) + f->next = fsi; + f->next = -1; + + /* + * Update directory index table for entries now in right page + */ + if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) { + mp = 0; + stbl = DT_GETSTBL(rp); + for (n = 0; n < rp->header.nextindex; n++) { + ldtentry = (ldtentry_t *) & rp->slot[stbl[n]]; + modify_index(tid, ip, le32_to_cpu(ldtentry->index), + rbn, n, &mp); + } + if (mp) + release_metapage(mp); + } + + /* + * the skipped index was on the left page, + */ + if (skip <= off) { + /* insert the new entry in the split page */ + dtInsertEntry(sp, skip, split->key, split->data, &sdtlck); + + /* linelock stbl of split page */ + if (sdtlck->index >= sdtlck->maxcnt) + sdtlck = (dtlock_t *) txLinelock(sdtlck); + slv = (lv_t *) & sdtlck->lv[sdtlck->index]; + n = skip >> L2DTSLOTSIZE; + slv->offset = sp->header.stblindex + n; + slv->length = + ((sp->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1; + sdtlck->index++; + } + /* + * the skipped index was on the right page, + */ + else { + /* adjust the skip index to reflect the new position */ + skip -= nxt; + + /* insert the new entry in the right page */ + dtInsertEntry(rp, skip, split->key, split->data, &rdtlck); + } + + out: + *rmpp = rmp; + *rpxdp = *pxd; + + ip->i_blocks += LBLK2PBLK(sb, lengthPXD(pxd)); + + jEVENT(0, ("dtSplitPage: ip:0x%p sp:0x%p rp:0x%p\n", ip, sp, rp)); + return 0; +} + + +/* + * dtExtendPage() + * + * function: extend 1st/only directory leaf page + * + * parameter: + * + * return: 0 - success; + * errno - failure; + * return extended page pinned; + */ +static int dtExtendPage(int tid, + struct inode *ip, dtsplit_t * split, btstack_t * btstack) +{ + struct super_block *sb = ip->i_sb; + int rc; + metapage_t *smp, *pmp, *mp; + dtpage_t *sp, *pp; + pxdlist_t *pxdlist; + pxd_t *pxd, *tpxd; + int xlen, xsize; + int newstblindex, newstblsize; + int oldstblindex, oldstblsize; + int fsi, last; + dtslot_t *f; + btframe_t *parent; + int n; + dtlock_t *dtlck; + s64 xaddr, txaddr; + tlock_t *tlck; + pxdlock_t *pxdlock; + lv_t *lv; + uint type; + ldtentry_t *ldtentry; + u8 *stbl; + + /* get page to extend */ + smp = split->mp; + sp = DT_PAGE(ip, smp); + + /* get parent/root page */ + parent = BT_POP(btstack); + DT_GETPAGE(ip, parent->bn, pmp, PSIZE, pp, rc); + if (rc) + return (rc); + + /* + * extend the extent + */ + pxdlist = split->pxdlist; + pxd = &pxdlist->pxd[pxdlist->npxd]; + pxdlist->npxd++; + + xaddr = addressPXD(pxd); + tpxd = &sp->header.self; + txaddr = addressPXD(tpxd); + /* in-place extension */ + if (xaddr == txaddr) { + type = tlckEXTEND; + } + /* relocation */ + else { + type = tlckNEW; + + /* save moved extent descriptor for later free */ + tlck = txMaplock(tid, ip, tlckDTREE | tlckRELOCATE); + pxdlock = (pxdlock_t *) & tlck->lock; + pxdlock->flag = mlckFREEPXD; + pxdlock->pxd = sp->header.self; + pxdlock->index = 1; + + /* + * Update directory index table to reflect new page address + */ + if (DO_INDEX(ip)) { + mp = 0; + stbl = DT_GETSTBL(sp); + for (n = 0; n < sp->header.nextindex; n++) { + ldtentry = + (ldtentry_t *) & sp->slot[stbl[n]]; + modify_index(tid, ip, + le32_to_cpu(ldtentry->index), + xaddr, n, &mp); + } + if (mp) + release_metapage(mp); + } + } + + /* + * extend the page + */ + sp->header.self = *pxd; + + jEVENT(0, + ("dtExtendPage: ip:0x%p smp:0x%p sp:0x%p\n", ip, smp, sp)); + + BT_MARK_DIRTY(smp, ip); + /* + * acquire a transaction lock on the extended/leaf page + */ + tlck = txLock(tid, ip, smp, tlckDTREE | type); + dtlck = (dtlock_t *) & tlck->lock; + lv = (lv_t *) & dtlck->lv[0]; + + /* update buffer extent descriptor of extended page */ + xlen = lengthPXD(pxd); + xsize = xlen << JFS_SBI(sb)->l2bsize; +#ifdef _STILL_TO_PORT + bmSetXD(smp, xaddr, xsize); +#endif /* _STILL_TO_PORT */ + + /* + * copy old stbl to new stbl at start of extended area + */ + oldstblindex = sp->header.stblindex; + oldstblsize = (sp->header.maxslot + 31) >> L2DTSLOTSIZE; + newstblindex = sp->header.maxslot; + n = xsize >> L2DTSLOTSIZE; + newstblsize = (n + 31) >> L2DTSLOTSIZE; + memcpy(&sp->slot[newstblindex], &sp->slot[oldstblindex], + sp->header.nextindex); + + /* + * in-line extension: linelock old area of extended page + */ + if (type == tlckEXTEND) { + /* linelock header */ + lv->offset = 0; + lv->length = 1; + dtlck->index++; + lv++; + + /* linelock new stbl of extended page */ + lv->offset = newstblindex; + lv->length = newstblsize; + } + /* + * relocation: linelock whole relocated area + */ + else { + lv->offset = 0; + lv->length = sp->header.maxslot + newstblsize; + } + + dtlck->index++; + + sp->header.maxslot = n; + sp->header.stblindex = newstblindex; + /* sp->header.nextindex remains the same */ + + /* + * add old stbl region at head of freelist + */ + fsi = oldstblindex; + f = &sp->slot[fsi]; + last = sp->header.freelist; + for (n = 0; n < oldstblsize; n++, fsi++, f++) { + f->next = last; + last = fsi; + } + sp->header.freelist = last; + sp->header.freecnt += oldstblsize; + + /* + * append free region of newly extended area at tail of freelist + */ + /* init free region of newly extended area */ + fsi = n = newstblindex + newstblsize; + f = &sp->slot[fsi]; + for (fsi++; fsi < sp->header.maxslot; f++, fsi++) + f->next = fsi; + f->next = -1; + + /* append new free region at tail of old freelist */ + fsi = sp->header.freelist; + if (fsi == -1) + sp->header.freelist = n; + else { + do { + f = &sp->slot[fsi]; + fsi = f->next; + } while (fsi != -1); + + f->next = n; + } + + sp->header.freecnt += sp->header.maxslot - n; + + /* + * insert the new entry + */ + dtInsertEntry(sp, split->index, split->key, split->data, &dtlck); + + BT_MARK_DIRTY(pmp, ip); + /* + * linelock any freeslots residing in old extent + */ + if (type == tlckEXTEND) { + n = sp->header.maxslot >> 2; + if (sp->header.freelist < n) + dtLinelockFreelist(sp, n, &dtlck); + } + + /* + * update parent entry on the parent/root page + */ + /* + * acquire a transaction lock on the parent/root page + */ + tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY); + dtlck = (dtlock_t *) & tlck->lock; + lv = (lv_t *) & dtlck->lv[dtlck->index]; + + /* linelock parent entry - 1st slot */ + lv->offset = 1; + lv->length = 1; + dtlck->index++; + + /* update the parent pxd for page extension */ + tpxd = (pxd_t *) & pp->slot[1]; + *tpxd = *pxd; + + /* Since the directory might have an EA and/or ACL associated with it + * we need to make sure we take that into account when setting the + * i_nblocks + */ + ip->i_blocks = LBLK2PBLK(ip->i_sb, xlen + + ((JFS_IP(ip)->ea.flag & DXD_EXTENT) ? + lengthDXD(&JFS_IP(ip)->ea) : 0) + + ((JFS_IP(ip)->acl.flag & DXD_EXTENT) ? + lengthDXD(&JFS_IP(ip)->acl) : 0)); + + jEVENT(0, + ("dtExtendPage: ip:0x%p smp:0x%p sp:0x%p\n", ip, smp, sp)); + + + DT_PUTPAGE(pmp); + return 0; +} + + +/* + * dtSplitRoot() + * + * function: + * split the full root page into + * original/root/split page and new right page + * i.e., root remains fixed in tree anchor (inode) and + * the root is copied to a single new right child page + * since root page << non-root page, and + * the split root page contains a single entry for the + * new right child page. + * + * parameter: + * + * return: 0 - success; + * errno - failure; + * return new page pinned; + */ +static int dtSplitRoot(int tid, + struct inode *ip, dtsplit_t * split, metapage_t ** rmpp) +{ + struct super_block *sb = ip->i_sb; + metapage_t *smp; + dtroot_t *sp; + metapage_t *rmp; + dtpage_t *rp; + s64 rbn; + int xlen; + int xsize; + dtslot_t *f; + s8 *stbl; + int fsi, stblsize, n; + idtentry_t *s; + pxd_t *ppxd; + pxdlist_t *pxdlist; + pxd_t *pxd; + dtlock_t *dtlck; + tlock_t *tlck; + lv_t *lv; + + /* get split root page */ + smp = split->mp; + sp = &JFS_IP(ip)->i_dtroot; + + /* + * allocate/initialize a single (right) child page + * + * N.B. at first split, a one (or two) block to fit new entry + * is allocated; at subsequent split, a full page is allocated; + */ + pxdlist = split->pxdlist; + pxd = &pxdlist->pxd[pxdlist->npxd]; + pxdlist->npxd++; + rbn = addressPXD(pxd); + xlen = lengthPXD(pxd); + xsize = xlen << JFS_SBI(sb)->l2bsize; + rmp = get_metapage(ip, rbn, xsize, 1); + rp = rmp->data; + + BT_MARK_DIRTY(rmp, ip); + /* + * acquire a transaction lock on the new right page + */ + tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW); + dtlck = (dtlock_t *) & tlck->lock; + + rp->header.flag = + (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL; + rp->header.self = *pxd; + + /* initialize sibling pointers */ + rp->header.next = 0; + rp->header.prev = 0; + + /* + * move in-line root page into new right page extent + */ + /* linelock header + copied entries + new stbl (1st slot) in new page */ + ASSERT(dtlck->index == 0); + lv = (lv_t *) & dtlck->lv[0]; + lv->offset = 0; + lv->length = 10; /* 1 + 8 + 1 */ + dtlck->index++; + + n = xsize >> L2DTSLOTSIZE; + rp->header.maxslot = n; + stblsize = (n + 31) >> L2DTSLOTSIZE; + + /* copy old stbl to new stbl at start of extended area */ + rp->header.stblindex = DTROOTMAXSLOT; + stbl = (s8 *) & rp->slot[DTROOTMAXSLOT]; + memcpy(stbl, sp->header.stbl, sp->header.nextindex); + rp->header.nextindex = sp->header.nextindex; + + /* copy old data area to start of new data area */ + memcpy(&rp->slot[1], &sp->slot[1], IDATASIZE); + + /* + * append free region of newly extended area at tail of freelist + */ + /* init free region of newly extended area */ + fsi = n = DTROOTMAXSLOT + stblsize; + f = &rp->slot[fsi]; + for (fsi++; fsi < rp->header.maxslot; f++, fsi++) + f->next = fsi; + f->next = -1; + + /* append new free region at tail of old freelist */ + fsi = sp->header.freelist; + if (fsi == -1) + rp->header.freelist = n; + else { + rp->header.freelist = fsi; + + do { + f = &rp->slot[fsi]; + fsi = f->next; + } while (fsi != -1); + + f->next = n; + } + + rp->header.freecnt = sp->header.freecnt + rp->header.maxslot - n; + + /* + * insert the new entry into the new right/child page + * (skip index in the new right page will not change) + */ + dtInsertEntry(rp, split->index, split->key, split->data, &dtlck); + + /* + * reset parent/root page + * + * set the 1st entry offset to 0, which force the left-most key + * at any level of the tree to be less than any search key. + * + * The btree comparison code guarantees that the left-most key on any + * level of the tree is never used, so it doesn't need to be filled in. + */ + BT_MARK_DIRTY(smp, ip); + /* + * acquire a transaction lock on the root page (in-memory inode) + */ + tlck = txLock(tid, ip, smp, tlckDTREE | tlckNEW | tlckBTROOT); + dtlck = (dtlock_t *) & tlck->lock; + + /* linelock root */ + ASSERT(dtlck->index == 0); + lv = (lv_t *) & dtlck->lv[0]; + lv->offset = 0; + lv->length = DTROOTMAXSLOT; + dtlck->index++; + + /* update page header of root */ + if (sp->header.flag & BT_LEAF) { + sp->header.flag &= ~BT_LEAF; + sp->header.flag |= BT_INTERNAL; + } + + /* init the first entry */ + s = (idtentry_t *) & sp->slot[DTENTRYSTART]; + ppxd = (pxd_t *) s; + *ppxd = *pxd; + s->next = -1; + s->namlen = 0; + + stbl = sp->header.stbl; + stbl[0] = DTENTRYSTART; + sp->header.nextindex = 1; + + /* init freelist */ + fsi = DTENTRYSTART + 1; + f = &sp->slot[fsi]; + + /* init free region of remaining area */ + for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++) + f->next = fsi; + f->next = -1; + + sp->header.freelist = DTENTRYSTART + 1; + sp->header.freecnt = DTROOTMAXSLOT - (DTENTRYSTART + 1); + + *rmpp = rmp; + + ip->i_blocks += LBLK2PBLK(ip->i_sb, lengthPXD(pxd)); + return 0; +} + + +/* + * dtDelete() + * + * function: delete the entry(s) referenced by a key. + * + * parameter: + * + * return: + */ +int dtDelete(int tid, + struct inode *ip, component_t * key, ino_t * ino, int flag) +{ + int rc = 0; + s64 bn; + metapage_t *mp, *imp; + dtpage_t *p; + int index; + btstack_t btstack; + dtlock_t *dtlck; + tlock_t *tlck; + lv_t *lv; + int i; + ldtentry_t *ldtentry; + u8 *stbl; + u32 table_index, next_index; + metapage_t *nmp; + dtpage_t *np; + + /* + * search for the entry to delete: + * + * dtSearch() returns (leaf page pinned, index at which to delete). + */ + if ((rc = dtSearch(ip, key, ino, &btstack, flag))) + return rc; + + /* retrieve search result */ + DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); + + /* + * We need to find put the index of the next entry into the + * directory index table in order to resume a readdir from this + * entry. + */ + if (DO_INDEX(ip)) { + stbl = DT_GETSTBL(p); + ldtentry = (ldtentry_t *) & p->slot[stbl[index]]; + table_index = le32_to_cpu(ldtentry->index); + if (index == (p->header.nextindex - 1)) { + /* + * Last entry in this leaf page + */ + if ((p->header.flag & BT_ROOT) + || (p->header.next == 0)) + next_index = -1; + else { + /* Read next leaf page */ + DT_GETPAGE(ip, le64_to_cpu(p->header.next), + nmp, PSIZE, np, rc); + if (rc) + next_index = -1; + else { + stbl = DT_GETSTBL(np); + ldtentry = + (ldtentry_t *) & np-> + slot[stbl[0]]; + next_index = + le32_to_cpu(ldtentry->index); + DT_PUTPAGE(nmp); + } + } + } else { + ldtentry = + (ldtentry_t *) & p->slot[stbl[index + 1]]; + next_index = le32_to_cpu(ldtentry->index); + } + free_index(tid, ip, table_index, next_index); + } + /* + * the leaf page becomes empty, delete the page + */ + if (p->header.nextindex == 1) { + /* delete empty page */ + rc = dtDeleteUp(tid, ip, mp, p, &btstack); + } + /* + * the leaf page has other entries remaining: + * + * delete the entry from the leaf page. + */ + else { + BT_MARK_DIRTY(mp, ip); + /* + * acquire a transaction lock on the leaf page + */ + tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); + dtlck = (dtlock_t *) & tlck->lock; + + /* + * Do not assume that dtlck->index will be zero. During a + * rename within a directory, this transaction may have + * modified this page already when adding the new entry. + */ + + /* linelock header */ + if (dtlck->index >= dtlck->maxcnt) + dtlck = (dtlock_t *) txLinelock(dtlck); + lv = (lv_t *) & dtlck->lv[dtlck->index]; + lv->offset = 0; + lv->length = 1; + dtlck->index++; + + /* linelock stbl of non-root leaf page */ + if (!(p->header.flag & BT_ROOT)) { + if (dtlck->index >= dtlck->maxcnt) + dtlck = (dtlock_t *) txLinelock(dtlck); + lv = (lv_t *) & dtlck->lv[dtlck->index]; + i = index >> L2DTSLOTSIZE; + lv->offset = p->header.stblindex + i; + lv->length = + ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - + i + 1; + dtlck->index++; + } + + /* free the leaf entry */ + dtDeleteEntry(p, index, &dtlck); + + /* + * Update directory index table for entries moved in stbl + */ + if (DO_INDEX(ip) && index < p->header.nextindex) { + imp = 0; + stbl = DT_GETSTBL(p); + for (i = index; i < p->header.nextindex; i++) { + ldtentry = + (ldtentry_t *) & p->slot[stbl[i]]; + modify_index(tid, ip, + le32_to_cpu(ldtentry->index), + bn, i, &imp); + } + if (imp) + release_metapage(imp); + } + + DT_PUTPAGE(mp); + } + + return rc; +} + + +/* + * dtDeleteUp() + * + * function: + * free empty pages as propagating deletion up the tree + * + * parameter: + * + * return: + */ +static int dtDeleteUp(int tid, struct inode *ip, + metapage_t * fmp, dtpage_t * fp, btstack_t * btstack) +{ + int rc = 0; + metapage_t *mp; + dtpage_t *p; + int index, nextindex; + int xlen; + btframe_t *parent; + dtlock_t *dtlck; + tlock_t *tlck; + lv_t *lv; + pxdlock_t *pxdlock; + int i; + + /* + * keep the root leaf page which has become empty + */ + if (BT_IS_ROOT(fmp)) { + /* + * reset the root + * + * dtInitRoot() acquires txlock on the root + */ + dtInitRoot(tid, ip, PARENT(ip)); + + DT_PUTPAGE(fmp); + + return 0; + } + + /* + * free the non-root leaf page + */ + /* + * acquire a transaction lock on the page + * + * write FREEXTENT|NOREDOPAGE log record + * N.B. linelock is overlaid as freed extent descriptor, and + * the buffer page is freed; + */ + tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE); + pxdlock = (pxdlock_t *) & tlck->lock; + pxdlock->flag = mlckFREEPXD; + pxdlock->pxd = fp->header.self; + pxdlock->index = 1; + + /* update sibling pointers */ + if ((rc = dtRelink(tid, ip, fp))) + return rc; + + xlen = lengthPXD(&fp->header.self); + ip->i_blocks -= LBLK2PBLK(ip->i_sb, xlen); + + /* free/invalidate its buffer page */ + discard_metapage(fmp); + + /* + * propagate page deletion up the directory tree + * + * If the delete from the parent page makes it empty, + * continue all the way up the tree. + * stop if the root page is reached (which is never deleted) or + * if the entry deletion does not empty the page. + */ + while ((parent = BT_POP(btstack)) != NULL) { + /* pin the parent page */ + DT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc); + if (rc) + return rc; + + /* + * free the extent of the child page deleted + */ + index = parent->index; + + /* + * delete the entry for the child page from parent + */ + nextindex = p->header.nextindex; + + /* + * the parent has the single entry being deleted: + * + * free the parent page which has become empty. + */ + if (nextindex == 1) { + /* + * keep the root internal page which has become empty + */ + if (p->header.flag & BT_ROOT) { + /* + * reset the root + * + * dtInitRoot() acquires txlock on the root + */ + dtInitRoot(tid, ip, PARENT(ip)); + + DT_PUTPAGE(mp); + + return 0; + } + /* + * free the parent page + */ + else { + /* + * acquire a transaction lock on the page + * + * write FREEXTENT|NOREDOPAGE log record + */ + tlck = + txMaplock(tid, ip, + tlckDTREE | tlckFREE); + pxdlock = (pxdlock_t *) & tlck->lock; + pxdlock->flag = mlckFREEPXD; + pxdlock->pxd = p->header.self; + pxdlock->index = 1; + + /* update sibling pointers */ + if ((rc = dtRelink(tid, ip, p))) + return rc; + + xlen = lengthPXD(&p->header.self); + ip->i_blocks -= LBLK2PBLK(ip->i_sb, xlen); + + /* free/invalidate its buffer page */ + discard_metapage(mp); + + /* propagate up */ + continue; + } + } + + /* + * the parent has other entries remaining: + * + * delete the router entry from the parent page. + */ + BT_MARK_DIRTY(mp, ip); + /* + * acquire a transaction lock on the page + * + * action: router entry deletion + */ + tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); + dtlck = (dtlock_t *) & tlck->lock; + + /* linelock header */ + if (dtlck->index >= dtlck->maxcnt) + dtlck = (dtlock_t *) txLinelock(dtlck); + lv = (lv_t *) & dtlck->lv[dtlck->index]; + lv->offset = 0; + lv->length = 1; + dtlck->index++; + + /* linelock stbl of non-root leaf page */ + if (!(p->header.flag & BT_ROOT)) { + if (dtlck->index < dtlck->maxcnt) + lv++; + else { + dtlck = (dtlock_t *) txLinelock(dtlck); + lv = (lv_t *) & dtlck->lv[0]; + } + i = index >> L2DTSLOTSIZE; + lv->offset = p->header.stblindex + i; + lv->length = + ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - + i + 1; + dtlck->index++; + } + + /* free the router entry */ + dtDeleteEntry(p, index, &dtlck); + + /* reset key of new leftmost entry of level (for consistency) */ + if (index == 0 && + ((p->header.flag & BT_ROOT) || p->header.prev == 0)) + dtTruncateEntry(p, 0, &dtlck); + + /* unpin the parent page */ + DT_PUTPAGE(mp); + + /* exit propagation up */ + break; + } + + return 0; +} + + +#ifdef _STILL_TO_PORT +/* + * NAME: dtRelocate() + * + * FUNCTION: relocate dtpage (internal or leaf) of directory; + * This function is mainly used by defragfs utility. + */ +int dtRelocate(int tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd, + s64 nxaddr) +{ + int rc = 0; + int cmp; + metapage_t *mp, *pmp, *lmp, *rmp; + dtpage_t *p, *pp, *rp, *lp; + s64 bn; + int index; + btstack_t btstack; + pxd_t *pxd; + s64 oxaddr, nextbn, prevbn; + int xlen, xsize; + tlock_t *tlck; + dtlock_t *dtlck; + pxdlock_t *pxdlock; + s8 *stbl; + lv_t *lv; + + oxaddr = addressPXD(opxd); + xlen = lengthPXD(opxd); + + jEVENT(0, ("dtRelocate: lmxaddr:%Ld xaddr:%Ld:%Ld xlen:%d\n", + lmxaddr, oxaddr, nxaddr, xlen)); + + /* + * 1. get the internal parent dtpage covering + * router entry for the tartget page to be relocated; + */ + rc = dtSearchNode(ip, lmxaddr, opxd, &btstack); + if (rc) + return rc; + + /* retrieve search result */ + DT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); + jEVENT(0, ("dtRelocate: parent router entry validated.\n")); + + /* + * 2. relocate the target dtpage + */ + /* read in the target page from src extent */ + DT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc); + if (rc) { + /* release the pinned parent page */ + DT_PUTPAGE(pmp); + return rc; + } + + /* + * read in sibling pages if any to update sibling pointers; + */ + rmp = NULL; + if (p->header.next) { + nextbn = le64_to_cpu(p->header.next); + DT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc); + if (rc) { + DT_PUTPAGE(mp); + DT_PUTPAGE(pmp); + return (rc); + } + } + + lmp = NULL; + if (p->header.prev) { + prevbn = le64_to_cpu(p->header.prev); + DT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc); + if (rc) { + DT_PUTPAGE(mp); + DT_PUTPAGE(pmp); + if (rmp) + DT_PUTPAGE(rmp); + return (rc); + } + } + + /* at this point, all xtpages to be updated are in memory */ + + /* + * update sibling pointers of sibling dtpages if any; + */ + if (lmp) { + tlck = txLock(tid, ip, lmp, tlckDTREE | tlckRELINK); + dtlck = (dtlock_t *) & tlck->lock; + /* linelock header */ + ASSERT(dtlck->index == 0); + lv = (lv_t *) & dtlck->lv[0]; + lv->offset = 0; + lv->length = 1; + dtlck->index++; + + lp->header.next = cpu_to_le64(nxaddr); + DT_PUTPAGE(lmp); + } + + if (rmp) { + tlck = txLock(tid, ip, rmp, tlckDTREE | tlckRELINK); + dtlck = (dtlock_t *) & tlck->lock; + /* linelock header */ + ASSERT(dtlck->index == 0); + lv = (lv_t *) & dtlck->lv[0]; + lv->offset = 0; + lv->length = 1; + dtlck->index++; + + rp->header.prev = cpu_to_le64(nxaddr); + DT_PUTPAGE(rmp); + } + + /* + * update the target dtpage to be relocated + * + * write LOG_REDOPAGE of LOG_NEW type for dst page + * for the whole target page (logredo() will apply + * after image and update bmap for allocation of the + * dst extent), and update bmap for allocation of + * the dst extent; + */ + tlck = txLock(tid, ip, mp, tlckDTREE | tlckNEW); + dtlck = (dtlock_t *) & tlck->lock; + /* linelock header */ + ASSERT(dtlck->index == 0); + lv = (lv_t *) & dtlck->lv[0]; + + /* update the self address in the dtpage header */ + pxd = &p->header.self; + PXDaddress(pxd, nxaddr); + + /* the dst page is the same as the src page, i.e., + * linelock for afterimage of the whole page; + */ + lv->offset = 0; + lv->length = p->header.maxslot; + dtlck->index++; + + /* update the buffer extent descriptor of the dtpage */ + xsize = xlen << JFS_SBI(sb)->l2bsize; + bmSetXD(mp, nxaddr, xsize); + + /* unpin the relocated page */ + DT_PUTPAGE(mp); + jEVENT(0, ("dtRelocate: target dtpage relocated.\n")); + + /* the moved extent is dtpage, then a LOG_NOREDOPAGE log rec + * needs to be written (in logredo(), the LOG_NOREDOPAGE log rec + * will also force a bmap update ). + */ + + /* + * 3. acquire maplock for the source extent to be freed; + */ + out: + /* for dtpage relocation, write a LOG_NOREDOPAGE record + * for the source dtpage (logredo() will init NoRedoPage + * filter and will also update bmap for free of the source + * dtpage), and upadte bmap for free of the source dtpage; + */ + tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE); + pxdlock = (pxdlock_t *) & tlck->lock; + pxdlock->flag = mlckFREEPXD; + PXDaddress(&pxdlock->pxd, oxaddr); + PXDlength(&pxdlock->pxd, xlen); + pxdlock->index = 1; + + /* + * 4. update the parent router entry for relocation; + * + * acquire tlck for the parent entry covering the target dtpage; + * write LOG_REDOPAGE to apply after image only; + */ + jEVENT(0, ("dtRelocate: update parent router entry.\n")); + tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY); + dtlck = (dtlock_t *) & tlck->lock; + lv = (lv_t *) & dtlck->lv[dtlck->index]; + + /* update the PXD with the new address */ + stbl = DT_GETSTBL(pp); + pxd = (pxd_t *) & pp->slot[stbl[index]]; + PXDaddress(pxd, nxaddr); + lv->offset = stbl[index]; + lv->length = 1; + dtlck->index++; + + /* unpin the parent dtpage */ + DT_PUTPAGE(pmp); + + return rc; +} + + +/* + * NAME: dtSearchNode() + * + * FUNCTION: Search for an dtpage containing a specified address + * This function is mainly used by defragfs utility. + * + * NOTE: Search result on stack, the found page is pinned at exit. + * The result page must be an internal dtpage. + * lmxaddr give the address of the left most page of the + * dtree level, in which the required dtpage resides. + */ +static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd, + btstack_t * btstack) +{ + int rc = 0; + s64 bn; + metapage_t *mp; + dtpage_t *p; + int psize = 288; /* initial in-line directory */ + s8 *stbl; + int i; + pxd_t *pxd; + btframe_t *btsp; + + BT_CLR(btstack); /* reset stack */ + + /* + * descend tree to the level with specified leftmost page + * + * by convention, root bn = 0. + */ + for (bn = 0;;) { + /* get/pin the page to search */ + DT_GETPAGE(ip, bn, mp, psize, p, rc); + if (rc) + return rc; + + /* does the xaddr of leftmost page of the levevl + * matches levevl search key ? + */ + if (p->header.flag & BT_ROOT) { + if (lmxaddr == 0) + break; + } else if (addressPXD(&p->header.self) == lmxaddr) + break; + + /* + * descend down to leftmost child page + */ + if (p->header.flag & BT_LEAF) + return ESTALE; + + /* get the leftmost entry */ + stbl = DT_GETSTBL(p); + pxd = (pxd_t *) & p->slot[stbl[0]]; + + /* get the child page block address */ + bn = addressPXD(pxd); + psize = lengthPXD(pxd) << ip->i_ipmnt->i_l2bsize; + + /* unpin the parent page */ + DT_PUTPAGE(mp); + } + + /* + * search each page at the current levevl + */ + loop: + stbl = DT_GETSTBL(p); + for (i = 0; i < p->header.nextindex; i++) { + pxd = (pxd_t *) & p->slot[stbl[i]]; + + /* found the specified router entry */ + if (addressPXD(pxd) == addressPXD(kpxd) && + lengthPXD(pxd) == lengthPXD(kpxd)) { + btsp = btstack->top; + btsp->bn = bn; + btsp->index = i; + btsp->mp = mp; + + return 0; + } + } + + /* get the right sibling page if any */ + if (p->header.next) + bn = le64_to_cpu(p->header.next); + else { + DT_PUTPAGE(mp); + return ESTALE; + } + + /* unpin current page */ + DT_PUTPAGE(mp); + + /* get the right sibling page */ + DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); + if (rc) + return rc; + + goto loop; +} +#endif /* _STILL_TO_PORT */ + + +/* + * dtRelink() + * + * function: + * link around a freed page. + * + * parameter: + * fp: page to be freed + * + * return: + */ +static int dtRelink(int tid, struct inode *ip, dtpage_t * p) +{ + int rc; + metapage_t *mp; + s64 nextbn, prevbn; + tlock_t *tlck; + dtlock_t *dtlck; + lv_t *lv; + + nextbn = le64_to_cpu(p->header.next); + prevbn = le64_to_cpu(p->header.prev); + + /* update prev pointer of the next page */ + if (nextbn != 0) { + DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); + if (rc) + return rc; + + BT_MARK_DIRTY(mp, ip); + /* + * acquire a transaction lock on the next page + * + * action: update prev pointer; + */ + tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK); + jEVENT(0, + ("dtRelink nextbn: tlck = 0x%p, ip = 0x%p, mp=0x%p\n", + tlck, ip, mp)); + dtlck = (dtlock_t *) & tlck->lock; + + /* linelock header */ + if (dtlck->index >= dtlck->maxcnt) + dtlck = (dtlock_t *) txLinelock(dtlck); + lv = (lv_t *) & dtlck->lv[dtlck->index]; + lv->offset = 0; + lv->length = 1; + dtlck->index++; + + p->header.prev = cpu_to_le64(prevbn); + DT_PUTPAGE(mp); + } + + /* update next pointer of the previous page */ + if (prevbn != 0) { + DT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc); + if (rc) + return rc; + + BT_MARK_DIRTY(mp, ip); + /* + * acquire a transaction lock on the prev page + * + * action: update next pointer; + */ + tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK); + jEVENT(0, + ("dtRelink prevbn: tlck = 0x%p, ip = 0x%p, mp=0x%p\n", + tlck, ip, mp)); + dtlck = (dtlock_t *) & tlck->lock; + + /* linelock header */ + if (dtlck->index >= dtlck->maxcnt) + dtlck = (dtlock_t *) txLinelock(dtlck); + lv = (lv_t *) & dtlck->lv[dtlck->index]; + lv->offset = 0; + lv->length = 1; + dtlck->index++; + + p->header.next = cpu_to_le64(nextbn); + DT_PUTPAGE(mp); + } + + return 0; +} + + +/* + * dtInitRoot() + * + * initialize directory root (inline in inode) + */ +void dtInitRoot(int tid, struct inode *ip, u32 idotdot) +{ + struct jfs_inode_info *jfs_ip = JFS_IP(ip); + dtroot_t *p; + int fsi; + dtslot_t *f; + tlock_t *tlck; + dtlock_t *dtlck; + lv_t *lv; + u16 xflag_save; + + /* + * If this was previously an non-empty directory, we need to remove + * the old directory table. + */ + if (DO_INDEX(ip)) { + if (jfs_ip->next_index > (MAX_INLINE_DIRTABLE_ENTRY + 1)) { + /* + * We're playing games with the tid's xflag. If + * we're removing a regular file, the file's xtree + * is committed with COMMIT_PMAP, but we always + * commit the directories xtree with COMMIT_PWMAP. + */ + xflag_save = TxBlock[tid].xflag; + TxBlock[tid].xflag = 0; + /* + * xtTruncate isn't guaranteed to fully truncate + * the xtree. The caller needs to check i_size + * after committing the transaction to see if + * additional truncation is needed. + */ + xtTruncate(tid, ip, 0, COMMIT_PWMAP); + TxBlock[tid].xflag = xflag_save; + /* + * Tells jfs_metapage code that the metadata pages + * for the index table are no longer useful, and + * remove them from page cache. + */ + set_cflag(COMMIT_Stale, ip); + truncate_inode_pages(ip->i_mapping, 0); + } else + ip->i_size = 1; + + jfs_ip->next_index = 2; + } else + ip->i_size = IDATASIZE; + + mark_inode_dirty(ip); + /* + * acquire a transaction lock on the root + * + * action: directory initialization; + */ + tlck = txLock(tid, ip, (metapage_t *) & jfs_ip->bxflag, + tlckDTREE | tlckENTRY | tlckBTROOT); + dtlck = (dtlock_t *) & tlck->lock; + + /* linelock root */ + ASSERT(dtlck->index == 0); + lv = (lv_t *) & dtlck->lv[0]; + lv->offset = 0; + lv->length = DTROOTMAXSLOT; + dtlck->index++; + + p = &jfs_ip->i_dtroot; + + p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF; + + p->header.nextindex = 0; + + /* init freelist */ + fsi = 1; + f = &p->slot[fsi]; + + /* init data area of root */ + for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++) + f->next = fsi; + f->next = -1; + + p->header.freelist = 1; + p->header.freecnt = 8; + + /* init '..' entry */ + p->header.idotdot = cpu_to_le32(idotdot); + +#if 0 + ip->i_blocks = LBLK2PBLK(ip->i_sb, + ((jfs_ip->ea.flag & DXD_EXTENT) ? + lengthDXD(&jfs_ip->ea) : 0) + + ((jfs_ip->acl.flag & DXD_EXTENT) ? + lengthDXD(&jfs_ip->acl) : 0)); +#endif + + return; +} + +/* + * jfs_readdir() + * + * function: read directory entries sequentially + * from the specified entry offset + * + * parameter: + * + * return: offset = (pn, index) of start entry + * of next jfs_readdir()/dtRead() + */ +int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + struct inode *ip = filp->f_dentry->d_inode; + struct nls_table *codepage = JFS_SBI(ip->i_sb)->nls_tab; + int rc = 0; + struct dtoffset { + s16 pn; + s16 index; + s32 unused; + } *dtoffset = (struct dtoffset *) &filp->f_pos; + s64 bn; + metapage_t *mp; + dtpage_t *p; + int index; + s8 *stbl; + btstack_t btstack; + int i, next; + ldtentry_t *d; + dtslot_t *t; + int d_namleft, d_namlen, len; + char *d_name, *name_ptr; + int dtlhdrdatalen; + u32 dir_index; + int do_index = 0; + uint loop_count = 0; + + if (filp->f_pos == -1) + return 0; + + if (DO_INDEX(ip)) { + /* + * persistent index is stored in directory entries. + * Special cases: 0 = . + * 1 = .. + * -1 = End of directory + */ + do_index = 1; + dtlhdrdatalen = DTLHDRDATALEN; + + dir_index = (u32) filp->f_pos; + + if (dir_index > 1) { + dir_table_slot_t dirtab_slot; + + if (dtEmpty(ip)) { + filp->f_pos = -1; + return 0; + } + repeat: + rc = get_index(ip, dir_index, &dirtab_slot); + if (rc) { + filp->f_pos = -1; + return rc; + } + if (dirtab_slot.flag == DIR_INDEX_FREE) { + if (loop_count++ > JFS_IP(ip)->next_index) { + jERROR(1, ("jfs_readdir detected " + "infinite loop!\n")); + filp->f_pos = -1; + return 0; + } + dir_index = le32_to_cpu(dirtab_slot.addr2); + if (dir_index == -1) { + filp->f_pos = -1; + return 0; + } + goto repeat; + } + bn = addressDTS(&dirtab_slot); + index = dirtab_slot.slot; + DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); + if (rc) { + filp->f_pos = -1; + return 0; + } + } else { + if (dir_index == 0) { + /* + * self "." + */ + filp->f_pos = 0; + if (filldir(dirent, ".", 1, 0, ip->i_ino, + DT_DIR)) + return 0; + } + /* + * parent ".." + */ + filp->f_pos = 1; + if (filldir + (dirent, "..", 2, 1, PARENT(ip), DT_DIR)) + return 0; + + /* + * Find first entry of left-most leaf + */ + if (dtEmpty(ip)) { + filp->f_pos = -1; + return 0; + } + + if ((rc = dtReadFirst(ip, &btstack))) + return -rc; + + DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); + } + } else { + /* + * Legacy filesystem - OS/2 & Linux JFS < 0.3.6 + * + * pn = index = 0: First entry "." + * pn = 0; index = 1: Second entry ".." + * pn > 0: Real entries, pn=1 -> leftmost page + * pn = index = -1: No more entries + */ + dtlhdrdatalen = DTLHDRDATALEN_LEGACY; + + if (filp->f_pos == 0) { + /* build "." entry */ + + if (filldir(dirent, ".", 1, filp->f_pos, ip->i_ino, + DT_DIR)) + return 0; + dtoffset->index = 1; + } + + if (dtoffset->pn == 0) { + if (dtoffset->index == 1) { + /* build ".." entry */ + + if (filldir(dirent, "..", 2, filp->f_pos, + PARENT(ip), DT_DIR)) + return 0; + } else { + jERROR(1, + ("jfs_readdir called with invalid offset!\n")); + } + dtoffset->pn = 1; + dtoffset->index = 0; + } + + if (dtEmpty(ip)) { + filp->f_pos = -1; + return 0; + } + + if ((rc = dtReadNext(ip, &filp->f_pos, &btstack))) { + jERROR(1, + ("jfs_readdir: unexpected rc = %d from dtReadNext\n", + rc)); + filp->f_pos = -1; + return 0; + } + /* get start leaf page and index */ + DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); + + /* offset beyond directory eof ? */ + if (bn < 0) { + filp->f_pos = -1; + return 0; + } + } + + d_name = kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), GFP_NOFS); + if (d_name == NULL) { + DT_PUTPAGE(mp); + jERROR(1, ("jfs_readdir: kmalloc failed!\n")); + filp->f_pos = -1; + return 0; + } + while (1) { + stbl = DT_GETSTBL(p); + + for (i = index; i < p->header.nextindex; i++) { + d = (ldtentry_t *) & p->slot[stbl[i]]; + + d_namleft = d_namlen = d->namlen; + name_ptr = d_name; + + if (do_index) { + filp->f_pos = le32_to_cpu(d->index); + len = min(d_namleft, DTLHDRDATALEN); + } else + len = min(d_namleft, DTLHDRDATALEN_LEGACY); + + /* copy the name of head/only segment */ + jfs_strfromUCS_le(name_ptr, d->name, len, + codepage); + + /* copy name in the additional segment(s) */ + next = d->next; + while (next >= 0) { + t = (dtslot_t *) & p->slot[next]; + name_ptr += len; + d_namleft -= len; + len = min(d_namleft, DTSLOTDATALEN); + jfs_strfromUCS_le(name_ptr, t->name, len, + codepage); + + next = t->next; + } + + if (filldir(dirent, d_name, d_namlen, filp->f_pos, + le32_to_cpu(d->inumber), DT_UNKNOWN)) + goto out; + if (!do_index) + dtoffset->index++; + } + + /* + * get next leaf page + */ + + if (p->header.flag & BT_ROOT) { + filp->f_pos = -1; + break; + } + + bn = le64_to_cpu(p->header.next); + if (bn == 0) { + filp->f_pos = -1; + break; + } + + /* unpin previous leaf page */ + DT_PUTPAGE(mp); + + /* get next leaf page */ + DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); + if (rc) { + kfree(d_name); + return -rc; + } + + /* update offset (pn:index) for new page */ + index = 0; + if (!do_index) { + dtoffset->pn++; + dtoffset->index = 0; + } + + } + + out: + kfree(d_name); + DT_PUTPAGE(mp); + + return rc; +} + + +/* + * dtReadFirst() + * + * function: get the leftmost page of the directory + */ +static int dtReadFirst(struct inode *ip, btstack_t * btstack) +{ + int rc = 0; + s64 bn; + int psize = 288; /* initial in-line directory */ + metapage_t *mp; + dtpage_t *p; + s8 *stbl; + btframe_t *btsp; + pxd_t *xd; + + BT_CLR(btstack); /* reset stack */ + + /* + * descend leftmost path of the tree + * + * by convention, root bn = 0. + */ + for (bn = 0;;) { + DT_GETPAGE(ip, bn, mp, psize, p, rc); + if (rc) + return rc; + + /* + * leftmost leaf page + */ + if (p->header.flag & BT_LEAF) { + /* return leftmost entry */ + btsp = btstack->top; + btsp->bn = bn; + btsp->index = 0; + btsp->mp = mp; + + return 0; + } + + /* + * descend down to leftmost child page + */ + /* push (bn, index) of the parent page/entry */ + BT_PUSH(btstack, bn, 0); + + /* get the leftmost entry */ + stbl = DT_GETSTBL(p); + xd = (pxd_t *) & p->slot[stbl[0]]; + + /* get the child page block address */ + bn = addressPXD(xd); + psize = lengthPXD(xd) << JFS_SBI(ip->i_sb)->l2bsize; + + /* unpin the parent page */ + DT_PUTPAGE(mp); + } +} + + +/* + * dtReadNext() + * + * function: get the page of the specified offset (pn:index) + * + * return: if (offset > eof), bn = -1; + * + * note: if index > nextindex of the target leaf page, + * start with 1st entry of next leaf page; + */ +static int dtReadNext(struct inode *ip, loff_t * offset, btstack_t * btstack) +{ + int rc = 0; + struct dtoffset { + s16 pn; + s16 index; + s32 unused; + } *dtoffset = (struct dtoffset *) offset; + s64 bn; + metapage_t *mp; + dtpage_t *p; + int index; + int pn; + s8 *stbl; + btframe_t *btsp, *parent; + pxd_t *xd; + + /* + * get leftmost leaf page pinned + */ + if ((rc = dtReadFirst(ip, btstack))) + return rc; + + /* get leaf page */ + DT_GETSEARCH(ip, btstack->top, bn, mp, p, index); + + /* get the start offset (pn:index) */ + pn = dtoffset->pn - 1; /* Now pn = 0 represents leftmost leaf */ + index = dtoffset->index; + + /* start at leftmost page ? */ + if (pn == 0) { + /* offset beyond eof ? */ + if (index < p->header.nextindex) + goto out; + + if (p->header.flag & BT_ROOT) { + bn = -1; + goto out; + } + + /* start with 1st entry of next leaf page */ + dtoffset->pn++; + dtoffset->index = index = 0; + goto a; + } + + /* start at non-leftmost page: scan parent pages for large pn */ + if (p->header.flag & BT_ROOT) { + bn = -1; + goto out; + } + + /* start after next leaf page ? */ + if (pn > 1) + goto b; + + /* get leaf page pn = 1 */ + a: + bn = le64_to_cpu(p->header.next); + + /* unpin leaf page */ + DT_PUTPAGE(mp); + + /* offset beyond eof ? */ + if (bn == 0) { + bn = -1; + goto out; + } + + goto c; + + /* + * scan last internal page level to get target leaf page + */ + b: + /* unpin leftmost leaf page */ + DT_PUTPAGE(mp); + + /* get left most parent page */ + btsp = btstack->top; + parent = btsp - 1; + bn = parent->bn; + DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); + if (rc) + return rc; + + /* scan parent pages at last internal page level */ + while (pn >= p->header.nextindex) { + pn -= p->header.nextindex; + + /* get next parent page address */ + bn = le64_to_cpu(p->header.next); + + /* unpin current parent page */ + DT_PUTPAGE(mp); + + /* offset beyond eof ? */ + if (bn == 0) { + bn = -1; + goto out; + } + + /* get next parent page */ + DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); + if (rc) + return rc; + + /* update parent page stack frame */ + parent->bn = bn; + } + + /* get leaf page address */ + stbl = DT_GETSTBL(p); + xd = (pxd_t *) & p->slot[stbl[pn]]; + bn = addressPXD(xd); + + /* unpin parent page */ + DT_PUTPAGE(mp); + + /* + * get target leaf page + */ + c: + DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); + if (rc) + return rc; + + /* + * leaf page has been completed: + * start with 1st entry of next leaf page + */ + if (index >= p->header.nextindex) { + bn = le64_to_cpu(p->header.next); + + /* unpin leaf page */ + DT_PUTPAGE(mp); + + /* offset beyond eof ? */ + if (bn == 0) { + bn = -1; + goto out; + } + + /* get next leaf page */ + DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); + if (rc) + return rc; + + /* start with 1st entry of next leaf page */ + dtoffset->pn++; + dtoffset->index = 0; + } + + out: + /* return target leaf page pinned */ + btsp = btstack->top; + btsp->bn = bn; + btsp->index = dtoffset->index; + btsp->mp = mp; + + return 0; +} + + +/* + * dtCompare() + * + * function: compare search key with an internal entry + * + * return: + * < 0 if k is < record + * = 0 if k is = record + * > 0 if k is > record + */ +static int dtCompare(component_t * key, /* search key */ + dtpage_t * p, /* directory page */ + int si) +{ /* entry slot index */ + register int rc; + register wchar_t *kname, *name; + register int klen, namlen, len; + idtentry_t *ih; + dtslot_t *t; + + /* + * force the left-most key on internal pages, at any level of + * the tree, to be less than any search key. + * this obviates having to update the leftmost key on an internal + * page when the user inserts a new key in the tree smaller than + * anything that has been stored. + * + * (? if/when dtSearch() narrows down to 1st entry (index = 0), + * at any internal page at any level of the tree, + * it descends to child of the entry anyway - + * ? make the entry as min size dummy entry) + * + * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF)) + * return (1); + */ + + kname = key->name; + klen = key->namlen; + + ih = (idtentry_t *) & p->slot[si]; + si = ih->next; + name = ih->name; + namlen = ih->namlen; + len = min(namlen, DTIHDRDATALEN); + + /* compare with head/only segment */ + len = min(klen, len); + if ((rc = UniStrncmp_le(kname, name, len))) + return rc; + + klen -= len; + namlen -= len; + + /* compare with additional segment(s) */ + kname += len; + while (klen > 0 && namlen > 0) { + /* compare with next name segment */ + t = (dtslot_t *) & p->slot[si]; + len = min(namlen, DTSLOTDATALEN); + len = min(klen, len); + name = t->name; + if ((rc = UniStrncmp_le(kname, name, len))) + return rc; + + klen -= len; + namlen -= len; + kname += len; + si = t->next; + } + + return (klen - namlen); +} + + + + +/* + * ciCompare() + * + * function: compare search key with an (leaf/internal) entry + * + * return: + * < 0 if k is < record + * = 0 if k is = record + * > 0 if k is > record + */ +static int ciCompare(component_t * key, /* search key */ + dtpage_t * p, /* directory page */ + int si, /* entry slot index */ + int flag) +{ + register int rc; + register wchar_t *kname, *name, x; + register int klen, namlen, len; + ldtentry_t *lh; + idtentry_t *ih; + dtslot_t *t; + int i; + + /* + * force the left-most key on internal pages, at any level of + * the tree, to be less than any search key. + * this obviates having to update the leftmost key on an internal + * page when the user inserts a new key in the tree smaller than + * anything that has been stored. + * + * (? if/when dtSearch() narrows down to 1st entry (index = 0), + * at any internal page at any level of the tree, + * it descends to child of the entry anyway - + * ? make the entry as min size dummy entry) + * + * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF)) + * return (1); + */ + + kname = key->name; + klen = key->namlen; + + /* + * leaf page entry + */ + if (p->header.flag & BT_LEAF) { + lh = (ldtentry_t *) & p->slot[si]; + si = lh->next; + name = lh->name; + namlen = lh->namlen; + if (flag & JFS_DIR_INDEX) + len = min(namlen, DTLHDRDATALEN); + else + len = min(namlen, DTLHDRDATALEN_LEGACY); + } + /* + * internal page entry + */ + else { + ih = (idtentry_t *) & p->slot[si]; + si = ih->next; + name = ih->name; + namlen = ih->namlen; + len = min(namlen, DTIHDRDATALEN); + } + + /* compare with head/only segment */ + len = min(klen, len); + for (i = 0; i < len; i++, kname++, name++) { + /* only uppercase if case-insensitive support is on */ + if ((flag & JFS_OS2) == JFS_OS2) + x = UniToupper(le16_to_cpu(*name)); + else + x = le16_to_cpu(*name); + if ((rc = *kname - x)) + return rc; + } + + klen -= len; + namlen -= len; + + /* compare with additional segment(s) */ + while (klen > 0 && namlen > 0) { + /* compare with next name segment */ + t = (dtslot_t *) & p->slot[si]; + len = min(namlen, DTSLOTDATALEN); + len = min(klen, len); + name = t->name; + for (i = 0; i < len; i++, kname++, name++) { + /* only uppercase if case-insensitive support is on */ + if ((flag & JFS_OS2) == JFS_OS2) + x = UniToupper(le16_to_cpu(*name)); + else + x = le16_to_cpu(*name); + + if ((rc = *kname - x)) + return rc; + } + + klen -= len; + namlen -= len; + si = t->next; + } + + return (klen - namlen); +} + + +/* + * ciGetLeafPrefixKey() + * + * function: compute prefix of suffix compression + * from two adjacent leaf entries + * across page boundary + * + * return: + * Number of prefix bytes needed to distinguish b from a. + */ +static void ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp, + int ri, component_t * key, int flag) +{ + register int klen, namlen; + register wchar_t *pl, *pr, *kname; + wchar_t lname[JFS_NAME_MAX + 1]; + component_t lkey = { 0, lname }; + wchar_t rname[JFS_NAME_MAX + 1]; + component_t rkey = { 0, rname }; + + /* get left and right key */ + dtGetKey(lp, li, &lkey, flag); + lkey.name[lkey.namlen] = 0; + + if ((flag & JFS_OS2) == JFS_OS2) + ciToUpper(&lkey); + + dtGetKey(rp, ri, &rkey, flag); + rkey.name[rkey.namlen] = 0; + + + if ((flag & JFS_OS2) == JFS_OS2) + ciToUpper(&rkey); + + /* compute prefix */ + klen = 0; + kname = key->name; + namlen = min(lkey.namlen, rkey.namlen); + for (pl = lkey.name, pr = rkey.name; + namlen; pl++, pr++, namlen--, klen++, kname++) { + *kname = *pr; + if (*pl != *pr) { + key->namlen = klen + 1; + return; + } + } + + /* l->namlen <= r->namlen since l <= r */ + if (lkey.namlen < rkey.namlen) { + *kname = *pr; + key->namlen = klen + 1; + } else /* l->namelen == r->namelen */ + key->namlen = klen; + + return; +} + + + +/* + * dtGetKey() + * + * function: get key of the entry + */ +static void dtGetKey(dtpage_t * p, int i, /* entry index */ + component_t * key, int flag) +{ + int si; + s8 *stbl; + ldtentry_t *lh; + idtentry_t *ih; + dtslot_t *t; + int namlen, len; + wchar_t *name, *kname; + + /* get entry */ + stbl = DT_GETSTBL(p); + si = stbl[i]; + if (p->header.flag & BT_LEAF) { + lh = (ldtentry_t *) & p->slot[si]; + si = lh->next; + namlen = lh->namlen; + name = lh->name; + if (flag & JFS_DIR_INDEX) + len = min(namlen, DTLHDRDATALEN); + else + len = min(namlen, DTLHDRDATALEN_LEGACY); + } else { + ih = (idtentry_t *) & p->slot[si]; + si = ih->next; + namlen = ih->namlen; + name = ih->name; + len = min(namlen, DTIHDRDATALEN); + } + + key->namlen = namlen; + kname = key->name; + + /* + * move head/only segment + */ + UniStrncpy_le(kname, name, len); + + /* + * move additional segment(s) + */ + while (si >= 0) { + /* get next segment */ + t = &p->slot[si]; + kname += len; + namlen -= len; + len = min(namlen, DTSLOTDATALEN); + UniStrncpy_le(kname, t->name, len); + + si = t->next; + } +} + + +/* + * dtInsertEntry() + * + * function: allocate free slot(s) and + * write a leaf/internal entry + * + * return: entry slot index + */ +static void dtInsertEntry(dtpage_t * p, int index, component_t * key, + ddata_t * data, dtlock_t ** dtlock) +{ + dtslot_t *h, *t; + ldtentry_t *lh = 0; + idtentry_t *ih = 0; + int hsi, fsi, klen, len, nextindex; + wchar_t *kname, *name; + s8 *stbl; + pxd_t *xd; + dtlock_t *dtlck = *dtlock; + lv_t *lv; + int xsi, n; + s64 bn = 0; + metapage_t *mp = 0; + + klen = key->namlen; + kname = key->name; + + /* allocate a free slot */ + hsi = fsi = p->header.freelist; + h = &p->slot[fsi]; + p->header.freelist = h->next; + --p->header.freecnt; + + /* open new linelock */ + if (dtlck->index >= dtlck->maxcnt) + dtlck = (dtlock_t *) txLinelock(dtlck); + + lv = (lv_t *) & dtlck->lv[dtlck->index]; + lv->offset = hsi; + + /* write head/only segment */ + if (p->header.flag & BT_LEAF) { + lh = (ldtentry_t *) h; + lh->next = h->next; + lh->inumber = data->leaf.ino; /* little-endian */ + lh->namlen = klen; + name = lh->name; + if (data->leaf.ip) { + len = min(klen, DTLHDRDATALEN); + if (!(p->header.flag & BT_ROOT)) + bn = addressPXD(&p->header.self); + lh->index = cpu_to_le32(add_index(data->leaf.tid, + data->leaf.ip, + bn, index)); + } else + len = min(klen, DTLHDRDATALEN_LEGACY); + } else { + ih = (idtentry_t *) h; + ih->next = h->next; + xd = (pxd_t *) ih; + *xd = data->xd; + ih->namlen = klen; + name = ih->name; + len = min(klen, DTIHDRDATALEN); + } + + UniStrncpy_le(name, kname, len); + + n = 1; + xsi = hsi; + + /* write additional segment(s) */ + t = h; + klen -= len; + while (klen) { + /* get free slot */ + fsi = p->header.freelist; + t = &p->slot[fsi]; + p->header.freelist = t->next; + --p->header.freecnt; + + /* is next slot contiguous ? */ + if (fsi != xsi + 1) { + /* close current linelock */ + lv->length = n; + dtlck->index++; + + /* open new linelock */ + if (dtlck->index < dtlck->maxcnt) + lv++; + else { + dtlck = (dtlock_t *) txLinelock(dtlck); + lv = (lv_t *) & dtlck->lv[0]; + } + + lv->offset = fsi; + n = 0; + } + + kname += len; + len = min(klen, DTSLOTDATALEN); + UniStrncpy_le(t->name, kname, len); + + n++; + xsi = fsi; + klen -= len; + } + + /* close current linelock */ + lv->length = n; + dtlck->index++; + + *dtlock = dtlck; + + /* terminate last/only segment */ + if (h == t) { + /* single segment entry */ + if (p->header.flag & BT_LEAF) + lh->next = -1; + else + ih->next = -1; + } else + /* multi-segment entry */ + t->next = -1; + + /* if insert into middle, shift right succeeding entries in stbl */ + stbl = DT_GETSTBL(p); + nextindex = p->header.nextindex; + if (index < nextindex) { + memmove(stbl + index + 1, stbl + index, nextindex - index); + + if ((p->header.flag & BT_LEAF) && data->leaf.ip) { + /* + * Need to update slot number for entries that moved + * in the stbl + */ + mp = 0; + for (n = index + 1; n <= nextindex; n++) { + lh = (ldtentry_t *) & (p->slot[stbl[n]]); + modify_index(data->leaf.tid, data->leaf.ip, + le32_to_cpu(lh->index), bn, n, + &mp); + } + if (mp) + release_metapage(mp); + } + } + + stbl[index] = hsi; + + /* advance next available entry index of stbl */ + ++p->header.nextindex; +} + + +/* + * dtMoveEntry() + * + * function: move entries from split/left page to new/right page + * + * nextindex of dst page and freelist/freecnt of both pages + * are updated. + */ +static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp, + dtlock_t ** sdtlock, dtlock_t ** ddtlock, + int do_index) +{ + int ssi, next; /* src slot index */ + int di; /* dst entry index */ + int dsi; /* dst slot index */ + s8 *sstbl, *dstbl; /* sorted entry table */ + int snamlen, len; + ldtentry_t *slh, *dlh = 0; + idtentry_t *sih, *dih = 0; + dtslot_t *h, *s, *d; + dtlock_t *sdtlck = *sdtlock, *ddtlck = *ddtlock; + lv_t *slv, *dlv; + int xssi, ns, nd; + int sfsi; + + sstbl = (s8 *) & sp->slot[sp->header.stblindex]; + dstbl = (s8 *) & dp->slot[dp->header.stblindex]; + + dsi = dp->header.freelist; /* first (whole page) free slot */ + sfsi = sp->header.freelist; + + /* linelock destination entry slot */ + dlv = (lv_t *) & ddtlck->lv[ddtlck->index]; + dlv->offset = dsi; + + /* linelock source entry slot */ + slv = (lv_t *) & sdtlck->lv[sdtlck->index]; + slv->offset = sstbl[si]; + xssi = slv->offset - 1; + + /* + * move entries + */ + ns = nd = 0; + for (di = 0; si < sp->header.nextindex; si++, di++) { + ssi = sstbl[si]; + dstbl[di] = dsi; + + /* is next slot contiguous ? */ + if (ssi != xssi + 1) { + /* close current linelock */ + slv->length = ns; + sdtlck->index++; + + /* open new linelock */ + if (sdtlck->index < sdtlck->maxcnt) + slv++; + else { + sdtlck = (dtlock_t *) txLinelock(sdtlck); + slv = (lv_t *) & sdtlck->lv[0]; + } + + slv->offset = ssi; + ns = 0; + } + + /* + * move head/only segment of an entry + */ + /* get dst slot */ + h = d = &dp->slot[dsi]; + + /* get src slot and move */ + s = &sp->slot[ssi]; + if (sp->header.flag & BT_LEAF) { + /* get source entry */ + slh = (ldtentry_t *) s; + dlh = (ldtentry_t *) h; + snamlen = slh->namlen; + + if (do_index) { + len = min(snamlen, DTLHDRDATALEN); + dlh->index = slh->index; /* little-endian */ + } else + len = min(snamlen, DTLHDRDATALEN_LEGACY); + + memcpy(dlh, slh, 6 + len * 2); + + next = slh->next; + + /* update dst head/only segment next field */ + dsi++; + dlh->next = dsi; + } else { + sih = (idtentry_t *) s; + snamlen = sih->namlen; + + len = min(snamlen, DTIHDRDATALEN); + dih = (idtentry_t *) h; + memcpy(dih, sih, 10 + len * 2); + next = sih->next; + + dsi++; + dih->next = dsi; + } + + /* free src head/only segment */ + s->next = sfsi; + s->cnt = 1; + sfsi = ssi; + + ns++; + nd++; + xssi = ssi; + + /* + * move additional segment(s) of the entry + */ + snamlen -= len; + while ((ssi = next) >= 0) { + /* is next slot contiguous ? */ + if (ssi != xssi + 1) { + /* close current linelock */ + slv->length = ns; + sdtlck->index++; + + /* open new linelock */ + if (sdtlck->index < sdtlck->maxcnt) + slv++; + else { + sdtlck = + (dtlock_t *) + txLinelock(sdtlck); + slv = (lv_t *) & sdtlck->lv[0]; + } + + slv->offset = ssi; + ns = 0; + } + + /* get next source segment */ + s = &sp->slot[ssi]; + + /* get next destination free slot */ + d++; + + len = min(snamlen, DTSLOTDATALEN); + UniStrncpy(d->name, s->name, len); + + ns++; + nd++; + xssi = ssi; + + dsi++; + d->next = dsi; + + /* free source segment */ + next = s->next; + s->next = sfsi; + s->cnt = 1; + sfsi = ssi; + + snamlen -= len; + } /* end while */ + + /* terminate dst last/only segment */ + if (h == d) { + /* single segment entry */ + if (dp->header.flag & BT_LEAF) + dlh->next = -1; + else + dih->next = -1; + } else + /* multi-segment entry */ + d->next = -1; + } /* end for */ + + /* close current linelock */ + slv->length = ns; + sdtlck->index++; + *sdtlock = sdtlck; + + dlv->length = nd; + ddtlck->index++; + *ddtlock = ddtlck; + + /* update source header */ + sp->header.freelist = sfsi; + sp->header.freecnt += nd; + + /* update destination header */ + dp->header.nextindex = di; + + dp->header.freelist = dsi; + dp->header.freecnt -= nd; +} + + +/* + * dtDeleteEntry() + * + * function: free a (leaf/internal) entry + * + * log freelist header, stbl, and each segment slot of entry + * (even though last/only segment next field is modified, + * physical image logging requires all segment slots of + * the entry logged to avoid applying previous updates + * to the same slots) + */ +static void dtDeleteEntry(dtpage_t * p, int fi, dtlock_t ** dtlock) +{ + int fsi; /* free entry slot index */ + s8 *stbl; + dtslot_t *t; + int si, freecnt; + dtlock_t *dtlck = *dtlock; + lv_t *lv; + int xsi, n; + + /* get free entry slot index */ + stbl = DT_GETSTBL(p); + fsi = stbl[fi]; + + /* open new linelock */ + if (dtlck->index >= dtlck->maxcnt) + dtlck = (dtlock_t *) txLinelock(dtlck); + lv = (lv_t *) & dtlck->lv[dtlck->index]; + + lv->offset = fsi; + + /* get the head/only segment */ + t = &p->slot[fsi]; + if (p->header.flag & BT_LEAF) + si = ((ldtentry_t *) t)->next; + else + si = ((idtentry_t *) t)->next; + t->next = si; + t->cnt = 1; + + n = freecnt = 1; + xsi = fsi; + + /* find the last/only segment */ + while (si >= 0) { + /* is next slot contiguous ? */ + if (si != xsi + 1) { + /* close current linelock */ + lv->length = n; + dtlck->index++; + + /* open new linelock */ + if (dtlck->index < dtlck->maxcnt) + lv++; + else { + dtlck = (dtlock_t *) txLinelock(dtlck); + lv = (lv_t *) & dtlck->lv[0]; + } + + lv->offset = si; + n = 0; + } + + n++; + xsi = si; + freecnt++; + + t = &p->slot[si]; + t->cnt = 1; + si = t->next; + } + + /* close current linelock */ + lv->length = n; + dtlck->index++; + + *dtlock = dtlck; + + /* update freelist */ + t->next = p->header.freelist; + p->header.freelist = fsi; + p->header.freecnt += freecnt; + + /* if delete from middle, + * shift left the succedding entries in the stbl + */ + si = p->header.nextindex; + if (fi < si - 1) + memmove(&stbl[fi], &stbl[fi + 1], si - fi - 1); + + p->header.nextindex--; +} + + +/* + * dtTruncateEntry() + * + * function: truncate a (leaf/internal) entry + * + * log freelist header, stbl, and each segment slot of entry + * (even though last/only segment next field is modified, + * physical image logging requires all segment slots of + * the entry logged to avoid applying previous updates + * to the same slots) + */ +static void dtTruncateEntry(dtpage_t * p, int ti, dtlock_t ** dtlock) +{ + int tsi; /* truncate entry slot index */ + s8 *stbl; + dtslot_t *t; + int si, freecnt; + dtlock_t *dtlck = *dtlock; + lv_t *lv; + int fsi, xsi, n; + + /* get free entry slot index */ + stbl = DT_GETSTBL(p); + tsi = stbl[ti]; + + /* open new linelock */ + if (dtlck->index >= dtlck->maxcnt) + dtlck = (dtlock_t *) txLinelock(dtlck); + lv = (lv_t *) & dtlck->lv[dtlck->index]; + + lv->offset = tsi; + + /* get the head/only segment */ + t = &p->slot[tsi]; + ASSERT(p->header.flag & BT_INTERNAL); + ((idtentry_t *) t)->namlen = 0; + si = ((idtentry_t *) t)->next; + ((idtentry_t *) t)->next = -1; + + n = 1; + freecnt = 0; + fsi = si; + xsi = tsi; + + /* find the last/only segment */ + while (si >= 0) { + /* is next slot contiguous ? */ + if (si != xsi + 1) { + /* close current linelock */ + lv->length = n; + dtlck->index++; + + /* open new linelock */ + if (dtlck->index < dtlck->maxcnt) + lv++; + else { + dtlck = (dtlock_t *) txLinelock(dtlck); + lv = (lv_t *) & dtlck->lv[0]; + } + + lv->offset = si; + n = 0; + } + + n++; + xsi = si; + freecnt++; + + t = &p->slot[si]; + t->cnt = 1; + si = t->next; + } + + /* close current linelock */ + lv->length = n; + dtlck->index++; + + *dtlock = dtlck; + + /* update freelist */ + if (freecnt == 0) + return; + t->next = p->header.freelist; + p->header.freelist = fsi; + p->header.freecnt += freecnt; +} + + +/* + * dtLinelockFreelist() + */ +static void dtLinelockFreelist(dtpage_t * p, /* directory page */ + int m, /* max slot index */ + dtlock_t ** dtlock) +{ + int fsi; /* free entry slot index */ + dtslot_t *t; + int si; + dtlock_t *dtlck = *dtlock; + lv_t *lv; + int xsi, n; + + /* get free entry slot index */ + fsi = p->header.freelist; + + /* open new linelock */ + if (dtlck->index >= dtlck->maxcnt) + dtlck = (dtlock_t *) txLinelock(dtlck); + lv = (lv_t *) & dtlck->lv[dtlck->index]; + + lv->offset = fsi; + + n = 1; + xsi = fsi; + + t = &p->slot[fsi]; + si = t->next; + + /* find the last/only segment */ + while (si < m && si >= 0) { + /* is next slot contiguous ? */ + if (si != xsi + 1) { + /* close current linelock */ + lv->length = n; + dtlck->index++; + + /* open new linelock */ + if (dtlck->index < dtlck->maxcnt) + lv++; + else { + dtlck = (dtlock_t *) txLinelock(dtlck); + lv = (lv_t *) & dtlck->lv[0]; + } + + lv->offset = si; + n = 0; + } + + n++; + xsi = si; + + t = &p->slot[si]; + si = t->next; + } + + /* close current linelock */ + lv->length = n; + dtlck->index++; + + *dtlock = dtlck; +} + + +/* + * NAME: dtModify + * + * FUNCTION: Modify the inode number part of a directory entry + * + * PARAMETERS: + * tid - Transaction id + * ip - Inode of parent directory + * key - Name of entry to be modified + * orig_ino - Original inode number expected in entry + * new_ino - New inode number to put into entry + * flag - JFS_RENAME + * + * RETURNS: + * ESTALE - If entry found does not match orig_ino passed in + * ENOENT - If no entry can be found to match key + * 0 - If successfully modified entry + */ +int dtModify(int tid, struct inode *ip, + component_t * key, ino_t * orig_ino, ino_t new_ino, int flag) +{ + int rc; + s64 bn; + metapage_t *mp; + dtpage_t *p; + int index; + btstack_t btstack; + tlock_t *tlck; + dtlock_t *dtlck; + lv_t *lv; + s8 *stbl; + int entry_si; /* entry slot index */ + ldtentry_t *entry; + + /* + * search for the entry to modify: + * + * dtSearch() returns (leaf page pinned, index at which to modify). + */ + if ((rc = dtSearch(ip, key, orig_ino, &btstack, flag))) + return rc; + + /* retrieve search result */ + DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); + + BT_MARK_DIRTY(mp, ip); + /* + * acquire a transaction lock on the leaf page of named entry + */ + tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); + dtlck = (dtlock_t *) & tlck->lock; + + /* get slot index of the entry */ + stbl = DT_GETSTBL(p); + entry_si = stbl[index]; + + /* linelock entry */ + ASSERT(dtlck->index == 0); + lv = (lv_t *) & dtlck->lv[0]; + lv->offset = entry_si; + lv->length = 1; + dtlck->index++; + + /* get the head/only segment */ + entry = (ldtentry_t *) & p->slot[entry_si]; + + /* substitute the inode number of the entry */ + entry->inumber = cpu_to_le32(new_ino); + + /* unpin the leaf page */ + DT_PUTPAGE(mp); + + return 0; +} + +#ifdef _JFS_STATISTICS +/* + * dtStatistics + */ +void dtStatistics() +{ + /* report statistics */ +} +#endif /* _JFS_STATISTICS */ + + +#ifdef _JFS_DEBUG_DTREE +/* + * dtDisplayTree() + * + * function: traverse forward + */ +int dtDisplayTree(struct inode *ip) +{ + int rc; + metapage_t *mp; + dtpage_t *p; + s64 bn, pbn; + int index, lastindex, v, h; + pxd_t *xd; + btstack_t btstack; + btframe_t *btsp; + btframe_t *parent; + u8 *stbl; + int psize = 256; + + printk("display B+-tree.\n"); + + /* clear stack */ + btsp = btstack.stack; + + /* + * start with root + * + * root resides in the inode + */ + bn = 0; + v = h = 0; + + /* + * first access of each page: + */ + newPage: + DT_GETPAGE(ip, bn, mp, psize, p, rc); + if (rc) + return rc; + + /* process entries forward from first index */ + index = 0; + lastindex = p->header.nextindex - 1; + + if (p->header.flag & BT_INTERNAL) { + /* + * first access of each internal page + */ + printf("internal page "); + dtDisplayPage(ip, bn, p); + + goto getChild; + } else { /* (p->header.flag & BT_LEAF) */ + + /* + * first access of each leaf page + */ + printf("leaf page "); + dtDisplayPage(ip, bn, p); + + /* + * process leaf page entries + * + for ( ; index <= lastindex; index++) + { + } + */ + + /* unpin the leaf page */ + DT_PUTPAGE(mp); + } + + /* + * go back up to the parent page + */ + getParent: + /* pop/restore parent entry for the current child page */ + if ((parent = (btsp == btstack.stack ? NULL : --btsp)) == NULL) + /* current page must have been root */ + return; + + /* + * parent page scan completed + */ + if ((index = parent->index) == (lastindex = parent->lastindex)) { + /* go back up to the parent page */ + goto getParent; + } + + /* + * parent page has entries remaining + */ + /* get back the parent page */ + bn = parent->bn; + /* v = parent->level; */ + DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); + if (rc) + return rc; + + /* get next parent entry */ + index++; + + /* + * internal page: go down to child page of current entry + */ + getChild: + /* push/save current parent entry for the child page */ + btsp->bn = pbn = bn; + btsp->index = index; + btsp->lastindex = lastindex; + /* btsp->level = v; */ + /* btsp->node = h; */ + ++btsp; + + /* get current entry for the child page */ + stbl = DT_GETSTBL(p); + xd = (pxd_t *) & p->slot[stbl[index]]; + + /* + * first access of each internal entry: + */ + + /* get child page */ + bn = addressPXD(xd); + psize = lengthPXD(xd) << ip->i_ipmnt->i_l2bsize; + + printk("traverse down 0x%Lx[%d]->0x%Lx\n", pbn, index, bn); + v++; + h = index; + + /* release parent page */ + DT_PUTPAGE(mp); + + /* process the child page */ + goto newPage; +} + + +/* + * dtDisplayPage() + * + * function: display page + */ +int dtDisplayPage(struct inode *ip, s64 bn, dtpage_t * p) +{ + int rc; + metapage_t *mp; + ldtentry_t *lh; + idtentry_t *ih; + pxd_t *xd; + int i, j; + u8 *stbl; + wchar_t name[JFS_NAME_MAX + 1]; + component_t key = { 0, name }; + int freepage = 0; + + if (p == NULL) { + freepage = 1; + DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); + if (rc) + return rc; + } + + /* display page control */ + printk("bn:0x%Lx flag:0x%08x nextindex:%d\n", + bn, p->header.flag, p->header.nextindex); + + /* display entries */ + stbl = DT_GETSTBL(p); + for (i = 0, j = 1; i < p->header.nextindex; i++, j++) { + dtGetKey(p, i, &key, JFS_SBI(ip->i_sb)->mntflag); + key.name[key.namlen] = '\0'; + if (p->header.flag & BT_LEAF) { + lh = (ldtentry_t *) & p->slot[stbl[i]]; + printf("\t[%d] %s:%d", i, key.name, + le32_to_cpu(lh->inumber)); + } else { + ih = (idtentry_t *) & p->slot[stbl[i]]; + xd = (pxd_t *) ih; + bn = addressPXD(xd); + printf("\t[%d] %s:0x%Lx", i, key.name, bn); + } + + if (j == 4) { + printf("\n"); + j = 0; + } + } + + printf("\n"); + + if (freepage) + DT_PUTPAGE(mp); + + return 0; +} +#endif /* _JFS_DEBUG_DTREE */ diff -uNr --exclude=CVS ../kernel.org/linux/fs/jfs/jfs_extent.c linuxppc64_2_4/fs/jfs/jfs_extent.c --- ../kernel.org/linux/fs/jfs/jfs_extent.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/fs/jfs/jfs_extent.c Wed Nov 14 10:19:36 2001 @@ -0,0 +1,637 @@ +/* + * + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * + * + * Module: jfs_extent.c: + */ + +#include +#include +#include +#include +#include + +/* + * forward references + */ +static int extBalloc(struct inode *, s64, s64 *, s64 *); +static int extBrealloc(struct inode *, s64, s64, s64 *, s64 *); +int extRecord(struct inode *, xad_t *); +static s64 extRoundDown(s64 nb); + +/* + * external references + */ +extern int dbExtend(struct inode *, s64, s64, s64); +extern int jfs_commit_inode(struct inode *, int); + + +#define DPD(a) (printk("(a): %d\n",(a))) +#define DPC(a) (printk("(a): %c\n",(a))) +#define DPL1(a) \ +{ \ + if ((a) >> 32) \ + printk("(a): %x%08x ",(a)); \ + else \ + printk("(a): %x ",(a) << 32); \ +} +#define DPL(a) \ +{ \ + if ((a) >> 32) \ + printk("(a): %x%08x\n",(a)); \ + else \ + printk("(a): %x\n",(a) << 32); \ +} + +#define DPD1(a) (printk("(a): %d ",(a))) +#define DPX(a) (printk("(a): %08x\n",(a))) +#define DPX1(a) (printk("(a): %08x ",(a))) +#define DPS(a) (printk("%s\n",(a))) +#define DPE(a) (printk("\nENTERING: %s\n",(a))) +#define DPE1(a) (printk("\nENTERING: %s",(a))) +#define DPS1(a) (printk(" %s ",(a))) + + +/* + * NAME: extAlloc() + * + * FUNCTION: allocate an extent for a specified page range within a + * file. + * + * PARAMETERS: + * ip - the inode of the file. + * xlen - requested extent length. + * pno - the starting page number with the file. + * xp - pointer to an xad. on entry, xad describes an + * extent that is used as an allocation hint if the + * xaddr of the xad is non-zero. on successful exit, + * the xad describes the newly allocated extent. + * abnr - boolean_t indicating whether the newly allocated extent + * should be marked as allocated but not recorded. + * + * RETURN VALUES: + * 0 - success + * EIO - i/o error. + * ENOSPC - insufficient disk resources. + */ +int +extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr) +{ + struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); + s64 nxlen, nxaddr, xoff, hint, xaddr = 0; + int rc, nbperpage; + int xflag; + + /* This blocks if we are low on resources */ + txBeginAnon(ip->i_sb); + + /* validate extent length */ + if (xlen > MAXXLEN) + xlen = MAXXLEN; + + /* get the number of blocks per page */ + nbperpage = sbi->nbperpage; + + /* get the page's starting extent offset */ + xoff = pno << sbi->l2nbperpage; + + /* check if an allocation hint was provided */ + if ((hint = addressXAD(xp))) { + /* get the size of the extent described by the hint */ + nxlen = lengthXAD(xp); + + /* check if the hint is for the portion of the file + * immediately previous to the current allocation + * request and if hint extent has the same abnr + * value as the current request. if so, we can + * extend the hint extent to include the current + * extent if we can allocate the blocks immediately + * following the hint extent. + */ + if (offsetXAD(xp) + nxlen == xoff && + abnr == ((xp->flag & XAD_NOTRECORDED) ? TRUE : FALSE)) + xaddr = hint + nxlen; + + /* adjust the hint to the last block of the extent */ + hint += (nxlen - 1); + } + + /* allocate the disk blocks for the extent. initially, extBalloc() + * will try to allocate disk blocks for the requested size (xlen). + * if this fails (xlen contigious free blocks not avaliable), it'll + * try to allocate a smaller number of blocks (producing a smaller + * extent), with this smaller number of blocks consisting of the + * requested number of blocks rounded down to the next smaller + * power of 2 number (i.e. 16 -> 8). it'll continue to round down + * and retry the allocation until the number of blocks to allocate + * is smaller than the number of blocks per page. + */ + nxlen = xlen; + if ((rc = + extBalloc(ip, hint ? hint : INOHINT(ip), &nxlen, &nxaddr))) { + return (rc); + } + + /* determine the value of the extent flag */ + xflag = (abnr == TRUE) ? XAD_NOTRECORDED : 0; + + /* if we can extend the hint extent to cover the current request, + * extend it. otherwise, insert a new extent to + * cover the current request. + */ + if (xaddr && xaddr == nxaddr) + rc = xtExtend(0, ip, xoff, (int) nxlen, 0); + else + rc = xtInsert(0, ip, xflag, xoff, (int) nxlen, &nxaddr, 0); + + /* if the extend or insert failed, + * free the newly allocated blocks and return the error. + */ + if (rc) { + dbFree(ip, nxaddr, nxlen); + return (rc); + } + + /* update the number of blocks allocated to the file */ + ip->i_blocks += LBLK2PBLK(ip->i_sb, nxlen); + + /* set the results of the extent allocation */ + XADaddress(xp, nxaddr); + XADlength(xp, nxlen); + XADoffset(xp, xoff); + xp->flag = xflag; + + mark_inode_dirty(ip); + + /* + * COMMIT_SyncList flags an anonymous tlock on page that is on + * sync list. + * We need to commit the inode to get the page written disk. + */ + if (test_and_clear_cflag(COMMIT_Synclist,ip)) + jfs_commit_inode(ip, 0); + + return (0); +} + + +/* + * NAME: extRealloc() + * + * FUNCTION: extend the allocation of a file extent containing a + * partial back last page. + * + * PARAMETERS: + * ip - the inode of the file. + * cp - cbuf for the partial backed last page. + * xlen - request size of the resulting extent. + * xp - pointer to an xad. on successful exit, the xad + * describes the newly allocated extent. + * abnr - boolean_t indicating whether the newly allocated extent + * should be marked as allocated but not recorded. + * + * RETURN VALUES: + * 0 - success + * EIO - i/o error. + * ENOSPC - insufficient disk resources. + */ +int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr) +{ + struct super_block *sb = ip->i_sb; + s64 xaddr, xlen, nxaddr, delta, xoff; + s64 ntail, nextend, ninsert; + int rc, nbperpage = JFS_SBI(sb)->nbperpage; + int xflag; + + /* This blocks if we are low on resources */ + txBeginAnon(ip->i_sb); + + /* validate extent length */ + if (nxlen > MAXXLEN) + nxlen = MAXXLEN; + + /* get the extend (partial) page's disk block address and + * number of blocks. + */ + xaddr = addressXAD(xp); + xlen = lengthXAD(xp); + xoff = offsetXAD(xp); + + /* if the extend page is abnr and if the request is for + * the extent to be allocated and recorded, + * make the page allocated and recorded. + */ + if ((xp->flag & XAD_NOTRECORDED) && !abnr) { + xp->flag = 0; + if ((rc = xtUpdate(0, ip, xp))) + return (rc); + } + + /* try to allocated the request number of blocks for the + * extent. dbRealloc() first tries to satisfy the request + * by extending the allocation in place. otherwise, it will + * try to allocate a new set of blocks large enough for the + * request. in satisfying a request, dbReAlloc() may allocate + * less than what was request but will always allocate enough + * space as to satisfy the extend page. + */ + if ((rc = extBrealloc(ip, xaddr, xlen, &nxlen, &nxaddr))) + return (rc); + + delta = nxlen - xlen; + + /* check if the extend page is not abnr but the request is abnr + * and the allocated disk space is for more than one page. if this + * is the case, there is a miss match of abnr between the extend page + * and the one or more pages following the extend page. as a result, + * two extents will have to be manipulated. the first will be that + * of the extent of the extend page and will be manipulated thru + * an xtExtend() or an xtTailgate(), depending upon whether the + * disk allocation occurred as an inplace extension. the second + * extent will be manipulated (created) through an xtInsert() and + * will be for the pages following the extend page. + */ + if (abnr && (!(xp->flag & XAD_NOTRECORDED)) && (nxlen > nbperpage)) { + ntail = nbperpage; + nextend = ntail - xlen; + ninsert = nxlen - nbperpage; + + xflag = XAD_NOTRECORDED; + } else { + ntail = nxlen; + nextend = delta; + ninsert = 0; + + xflag = xp->flag; + } + + /* if we were able to extend the disk allocation in place, + * extend the extent. otherwise, move the extent to a + * new disk location. + */ + if (xaddr == nxaddr) { + /* extend the extent */ + if ((rc = xtExtend(0, ip, xoff + xlen, (int) nextend, 0))) { + dbFree(ip, xaddr + xlen, delta); + return (rc); + } + } else { + /* + * move the extent to a new location: + * + * xtTailgate() accounts for relocated tail extent; + */ + if ((rc = xtTailgate(0, ip, xoff, (int) ntail, nxaddr, 0))) { + dbFree(ip, nxaddr, nxlen); + return (rc); + } + } + + + /* check if we need to also insert a new extent */ + if (ninsert) { + /* perform the insert. if it fails, free the blocks + * to be inserted and make it appear that we only did + * the xtExtend() or xtTailgate() above. + */ + xaddr = nxaddr + ntail; + if (xtInsert (0, ip, xflag, xoff + ntail, (int) ninsert, + &xaddr, 0)) { + dbFree(ip, xaddr, (s64) ninsert); + delta = nextend; + nxlen = ntail; + xflag = 0; + } + } + + /* update the inode with the number of blocks allocated */ + ip->i_blocks += LBLK2PBLK(sb, delta); + + /* set the return results */ + XADaddress(xp, nxaddr); + XADlength(xp, nxlen); + XADoffset(xp, xoff); + xp->flag = xflag; + + mark_inode_dirty(ip); + + return (0); +} + + +/* + * NAME: extHint() + * + * FUNCTION: produce an extent allocation hint for a file offset. + * + * PARAMETERS: + * ip - the inode of the file. + * offset - file offset for which the hint is needed. + * xp - pointer to the xad that is to be filled in with + * the hint. + * + * RETURN VALUES: + * 0 - success + * EIO - i/o error. + */ +int extHint(struct inode *ip, s64 offset, xad_t * xp) +{ + struct super_block *sb = ip->i_sb; + xadlist_t xadl; + lxdlist_t lxdl; + lxd_t lxd; + s64 prev; + int rc, nbperpage = JFS_SBI(sb)->nbperpage; + + /* init the hint as "no hint provided" */ + XADaddress(xp, 0); + + /* determine the starting extent offset of the page previous + * to the page containing the offset. + */ + prev = ((offset & ~POFFSET) >> JFS_SBI(sb)->l2bsize) - nbperpage; + + /* if the offsets in the first page of the file, + * no hint provided. + */ + if (prev < 0) + return (0); + + /* prepare to lookup the previous page's extent info */ + lxdl.maxnlxd = 1; + lxdl.nlxd = 1; + lxdl.lxd = &lxd; + LXDoffset(&lxd, prev) + LXDlength(&lxd, nbperpage); + + xadl.maxnxad = 1; + xadl.nxad = 0; + xadl.xad = xp; + + /* perform the lookup */ + if ((rc = xtLookupList(ip, &lxdl, &xadl, 0))) + return (rc); + + /* check if not extent exists for the previous page. + * this is possible for sparse files. + */ + if (xadl.nxad == 0) { +// assert(ISSPARSE(ip)); + return (0); + } + + /* only preserve the abnr flag within the xad flags + * of the returned hint. + */ + xp->flag &= XAD_NOTRECORDED; + + assert(xadl.nxad == 1); + assert(lengthXAD(xp) == nbperpage); + + return (0); +} + + +/* + * NAME: extRecord() + * + * FUNCTION: change a page with a file from not recorded to recorded. + * + * PARAMETERS: + * ip - inode of the file. + * cp - cbuf of the file page. + * + * RETURN VALUES: + * 0 - success + * EIO - i/o error. + * ENOSPC - insufficient disk resources. + */ +int extRecord(struct inode *ip, xad_t * xp) +{ + int rc; + + txBeginAnon(ip->i_sb); + + /* update the extent */ + if ((rc = xtUpdate(0, ip, xp))) + return (rc); + +#ifdef _STILL_TO_PORT + /* no longer abnr */ + cp->cm_abnr = FALSE; + + /* mark the cbuf as modified */ + cp->cm_modified = TRUE; +#endif /* _STILL_TO_PORT */ + + return (0); +} + + +/* + * NAME: extFill() + * + * FUNCTION: allocate disk space for a file page that represents + * a file hole. + * + * PARAMETERS: + * ip - the inode of the file. + * cp - cbuf of the file page represent the hole. + * + * RETURN VALUES: + * 0 - success + * EIO - i/o error. + * ENOSPC - insufficient disk resources. + */ +int extFill(struct inode *ip, xad_t * xp) +{ + int rc, nbperpage = JFS_SBI(ip->i_sb)->nbperpage; + s64 blkno = offsetXAD(xp) >> ip->i_blksize; + +// assert(ISSPARSE(ip)); + + /* initialize the extent allocation hint */ + XADaddress(xp, 0); + + /* allocate an extent to fill the hole */ + if ((rc = extAlloc(ip, nbperpage, blkno, xp, FALSE))) + return (rc); + + assert(lengthPXD(xp) == nbperpage); + + return (0); +} + + +/* + * NAME: extBalloc() + * + * FUNCTION: allocate disk blocks to form an extent. + * + * initially, we will try to allocate disk blocks for the + * requested size (nblocks). if this fails (nblocks + * contigious free blocks not avaliable), we'll try to allocate + * a smaller number of blocks (producing a smaller extent), with + * this smaller number of blocks consisting of the requested + * number of blocks rounded down to the next smaller power of 2 + * number (i.e. 16 -> 8). we'll continue to round down and + * retry the allocation until the number of blocks to allocate + * is smaller than the number of blocks per page. + * + * PARAMETERS: + * ip - the inode of the file. + * hint - disk block number to be used as an allocation hint. + * *nblocks - pointer to an s64 value. on entry, this value specifies + * the desired number of block to be allocated. on successful + * exit, this value is set to the number of blocks actually + * allocated. + * blkno - pointer to a block address that is filled in on successful + * return with the starting block number of the newly + * allocated block range. + * + * RETURN VALUES: + * 0 - success + * EIO - i/o error. + * ENOSPC - insufficient disk resources. + */ +static int +extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno) +{ + s64 nb, nblks, daddr, max; + int rc, nbperpage = JFS_SBI(ip->i_sb)->nbperpage; + bmap_t *mp = JFS_SBI(ip->i_sb)->bmap; + + /* get the number of blocks to initially attempt to allocate. + * we'll first try the number of blocks requested unless this + * number is greater than the maximum number of contigious free + * blocks in the map. in that case, we'll start off with the + * maximum free. + */ + max = (s64) 1 << mp->db_maxfreebud; + if (*nblocks >= max && *nblocks > nbperpage) + nb = nblks = (max > nbperpage) ? max : nbperpage; + else + nb = nblks = *nblocks; + + /* try to allocate blocks */ + while ((rc = dbAlloc(ip, hint, nb, &daddr))) { + /* if something other than an out of space error, + * stop and return this error. + */ + if (rc != ENOSPC) + return (rc); + + /* decrease the allocation request size */ + nb = min(nblks, extRoundDown(nb)); + + /* give up if we cannot cover a page */ + if (nb < nbperpage) + return (rc); + } + + *nblocks = nb; + *blkno = daddr; + + return (0); +} + + +/* + * NAME: extBrealloc() + * + * FUNCTION: attempt to extend an extent's allocation. + * + * initially, we will try to extend the extent's allocation + * in place. if this fails, we'll try to move the extent + * to a new set of blocks. if moving the extent, we initially + * will try to allocate disk blocks for the requested size + * (nnew). if this fails (nnew contigious free blocks not + * avaliable), we'll try to allocate a smaller number of + * blocks (producing a smaller extent), with this smaller + * number of blocks consisting of the requested number of + * blocks rounded down to the next smaller power of 2 + * number (i.e. 16 -> 8). we'll continue to round down and + * retry the allocation until the number of blocks to allocate + * is smaller than the number of blocks per page. + * + * PARAMETERS: + * ip - the inode of the file. + * blkno - starting block number of the extents current allocation. + * nblks - number of blocks within the extents current allocation. + * newnblks - pointer to a s64 value. on entry, this value is the + * the new desired extent size (number of blocks). on + * successful exit, this value is set to the extent's actual + * new size (new number of blocks). + * newblkno - the starting block number of the extents new allocation. + * + * RETURN VALUES: + * 0 - success + * EIO - i/o error. + * ENOSPC - insufficient disk resources. + */ +static int +extBrealloc(struct inode *ip, + s64 blkno, s64 nblks, s64 * newnblks, s64 * newblkno) +{ + int rc; + + /* try to extend in place */ + if ((rc = dbExtend(ip, blkno, nblks, *newnblks - nblks)) == 0) { + *newblkno = blkno; + return (0); + } else { + if (rc != ENOSPC) + return (rc); + } + + /* in place extension not possible. + * try to move the extent to a new set of blocks. + */ + return (extBalloc(ip, blkno, newnblks, newblkno)); +} + + +/* + * NAME: extRoundDown() + * + * FUNCTION: round down a specified number of blocks to the next + * smallest power of 2 number. + * + * PARAMETERS: + * nb - the inode of the file. + * + * RETURN VALUES: + * next smallest power of 2 number. + */ +static s64 extRoundDown(s64 nb) +{ + int i; + u64 m, k; + + for (i = 0, m = (u64) 1 << 63; i < 64; i++, m >>= 1) { + if (m & nb) + break; + } + + i = 63 - i; + k = (u64) 1 << i; + k = ((k - 1) & nb) ? k : k >> 1; + + return (k); +} diff -uNr --exclude=CVS ../kernel.org/linux/fs/jfs/jfs_imap.c linuxppc64_2_4/fs/jfs/jfs_imap.c --- ../kernel.org/linux/fs/jfs/jfs_imap.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/fs/jfs/jfs_imap.c Wed Nov 14 10:19:36 2001 @@ -0,0 +1,3242 @@ +/* + + * + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* + * Change History : + * + */ + +/* + * jfs_imap.c: inode allocation map manager + * + * Serialization: + * Each AG has a simple lock which is used to control the serialization of + * the AG level lists. This lock should be taken first whenever an AG + * level list will be modified or accessed. + * + * Each IAG is locked by obtaining the buffer for the IAG page. + * + * There is also a inode lock for the inode map inode. A read lock needs to + * be taken whenever an IAG is read from the map or the global level + * information is read. A write lock needs to be taken whenever the global + * level information is modified or an atomic operation needs to be used. + * + * If more than one IAG is read at one time, the read lock may not + * be given up until all of the IAG's are read. Otherwise, a deadlock + * may occur when trying to obtain the read lock while another thread + * holding the read lock is waiting on the IAG already being held. + * + * The control page of the inode map is read into memory by diMount(). + * Thereafter it should only be modified in memory and then it will be + * written out when the filesystem is unmounted by diUnmount(). + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * imap locks + */ +/* iag free list lock */ +#define IAGFREE_LOCK_INIT(imap) init_MUTEX(&imap->im_freelock) +#define IAGFREE_LOCK(imap) down(&imap->im_freelock) +#define IAGFREE_UNLOCK(imap) up(&imap->im_freelock) + +/* per ag iag list locks */ +#define AG_LOCK_INIT(imap,index) init_MUTEX(&(imap->im_aglock[index])) +#define AG_LOCK(imap,agno) down(&imap->im_aglock[agno]) +#define AG_UNLOCK(imap,agno) up(&imap->im_aglock[agno]) + +/* + * external references + */ +extern struct address_space_operations jfs_aops; +extern kmem_cache_t *jfs_inode_cachep; + +/* + * forward references + */ +static int diAllocAG(imap_t *, int, boolean_t, struct inode *); +static int diAllocAny(imap_t *, int, boolean_t, struct inode *); +static int diAllocBit(imap_t *, iag_t *, int); +static int diAllocExt(imap_t *, int, struct inode *); +static int diAllocIno(imap_t *, int, struct inode *); +static int diFindFree(u32, int); +static int diNewExt(imap_t *, iag_t *, int); +static int diNewIAG(imap_t *, int *, int, metapage_t **); +static void duplicateIXtree(struct super_block *, s64, int, s64 *); + +static int diIAGRead(imap_t * imap, int, metapage_t **); +static int copy_from_dinode(dinode_t *, struct inode *); +void copy_to_dinode(dinode_t *, struct inode *); + +/* + * debug code for double-checking inode map + */ +/* #define _JFS_DEBUG_IMAP 1 */ + +#ifdef _JFS_DEBUG_IMAP +#define DBG_DIINIT(imap) DBGdiInit(imap) +#define DBG_DIALLOC(imap, ino) DBGdiAlloc(imap, ino) +#define DBG_DIFREE(imap, ino) DBGdiFree(imap, ino) + +static void *DBGdiInit(imap_t * imap); +static void DBGdiAlloc(imap_t * imap, ino_t ino); +static void DBGdiFree(imap_t * imap, ino_t ino); +#else +#define DBG_DIINIT(imap) +#define DBG_DIALLOC(imap, ino) +#define DBG_DIFREE(imap, ino) +#endif /* _JFS_DEBUG_IMAP */ + +/* + * NAME: diMount() + * + * FUNCTION: initialize the incore inode map control structures for + * a fileset or aggregate init time. + * + * the inode map's control structure (dinomap_t) is + * brought in from disk and placed in virtual memory. + * + * PARAMETERS: + * ipimap - pointer to inode map inode for the aggregate or fileset. + * + * RETURN VALUES: + * 0 - success + * ENOMEM - insufficient free virtual memory. + * EIO - i/o error. + */ +int diMount(struct inode *ipimap) +{ + imap_t *imap; + metapage_t *mp; + int index; + dinomap_t *dinom_le; + + /* + * allocate/initialize the in-memory inode map control structure + */ + /* allocate the in-memory inode map control structure. */ + imap = (imap_t *) kmalloc(sizeof(imap_t), GFP_KERNEL); + if (imap == NULL) { + jERROR(1, ("diMount: kmalloc returned NULL!\n")); + return (ENOMEM); + } + + /* read the on-disk inode map control structure. */ + + mp = read_metapage(ipimap, + IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage, + PSIZE, 0); + if (mp == NULL) { + kfree(imap); + return (EIO); + } + + /* copy the on-disk version to the in-memory version. */ + dinom_le = (dinomap_t *) mp->data; + imap->im_freeiag = le32_to_cpu(dinom_le->in_freeiag); + imap->im_nextiag = le32_to_cpu(dinom_le->in_nextiag); + atomic_set(&imap->im_numinos, le32_to_cpu(dinom_le->in_numinos)); + atomic_set(&imap->im_numfree, le32_to_cpu(dinom_le->in_numfree)); + imap->im_nbperiext = le32_to_cpu(dinom_le->in_nbperiext); + imap->im_l2nbperiext = le32_to_cpu(dinom_le->in_l2nbperiext); + for (index = 0; index < MAXAG; index++) { + imap->im_agctl[index].inofree = + le32_to_cpu(dinom_le->in_agctl[index].inofree); + imap->im_agctl[index].extfree = + le32_to_cpu(dinom_le->in_agctl[index].extfree); + imap->im_agctl[index].numinos = + le32_to_cpu(dinom_le->in_agctl[index].numinos); + imap->im_agctl[index].numfree = + le32_to_cpu(dinom_le->in_agctl[index].numfree); + } + + /* release the buffer. */ + release_metapage(mp); + + /* + * allocate/initialize inode allocation map locks + */ + /* allocate and init iag free list lock */ + IAGFREE_LOCK_INIT(imap); + + /* allocate and init ag list locks */ + for (index = 0; index < MAXAG; index++) { + AG_LOCK_INIT(imap, index); + } + + /* bind the inode map inode and inode map control structure + * to each other. + */ + imap->im_ipimap = ipimap; + JFS_IP(ipimap)->i_imap = imap; + +// DBG_DIINIT(imap); + + return (0); +} + + +/* + * NAME: diUnmount() + * + * FUNCTION: write to disk the incore inode map control structures for + * a fileset or aggregate at unmount time. + * + * PARAMETERS: + * ipimap - pointer to inode map inode for the aggregate or fileset. + * + * RETURN VALUES: + * 0 - success + * ENOMEM - insufficient free virtual memory. + * EIO - i/o error. + */ +int diUnmount(struct inode *ipimap, int mounterror) +{ + imap_t *imap = JFS_IP(ipimap)->i_imap; + + /* + * update the on-disk inode map control structure + */ + + if (!(mounterror || isReadOnly(ipimap))) + diSync(ipimap); + + /* + * Invalidate the page cache buffers + */ + truncate_inode_pages(ipimap->i_mapping, 0); + + /* + * free in-memory control structure + */ + kfree(imap); + + return (0); +} + + +/* + * diSync() + */ +int diSync(struct inode *ipimap) +{ + dinomap_t *dinom_le; + imap_t *imp = JFS_IP(ipimap)->i_imap; + metapage_t *mp; + int index; + + /* + * write imap global conrol page + */ + /* read the on-disk inode map control structure */ + mp = get_metapage(ipimap, + IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage, + PSIZE, 0); + if (mp == NULL) { + jERROR(1,("diSync: get_metapage failed!\n")); + return EIO; + } + + /* copy the in-memory version to the on-disk version */ + //memcpy(mp->data, &imp->im_imap,sizeof(dinomap_t)); + dinom_le = (dinomap_t *) mp->data; + dinom_le->in_freeiag = cpu_to_le32(imp->im_freeiag); + dinom_le->in_nextiag = cpu_to_le32(imp->im_nextiag); + dinom_le->in_numinos = cpu_to_le32(atomic_read(&imp->im_numinos)); + dinom_le->in_numfree = cpu_to_le32(atomic_read(&imp->im_numfree)); + dinom_le->in_nbperiext = cpu_to_le32(imp->im_nbperiext); + dinom_le->in_l2nbperiext = cpu_to_le32(imp->im_l2nbperiext); + for (index = 0; index < MAXAG; index++) { + dinom_le->in_agctl[index].inofree = + cpu_to_le32(imp->im_agctl[index].inofree); + dinom_le->in_agctl[index].extfree = + cpu_to_le32(imp->im_agctl[index].extfree); + dinom_le->in_agctl[index].numinos = + cpu_to_le32(imp->im_agctl[index].numinos); + dinom_le->in_agctl[index].numfree = + cpu_to_le32(imp->im_agctl[index].numfree); + } + + /* write out the control structure */ + write_metapage(mp); + + /* + * write out dirty pages of imap + */ + fsync_inode_data_buffers(ipimap); + + diWriteSpecial(ipimap); + + return (0); +} + + +/* + * NAME: diRead() + * + * FUNCTION: initialize an incore inode from disk. + * + * on entry, the specifed incore inode should itself + * specify the disk inode number corresponding to the + * incore inode (i.e. i_number should be initialized). + * + * this routine handles incore inode initialization for + * both "special" and "regular" inodes. special inodes + * are those required early in the mount process and + * require special handling since much of the file system + * is not yet initialized. these "special" inodes are + * identified by a NULL inode map inode pointer and are + * actually initialized by a call to diReadSpecial(). + * + * for regular inodes, the iag describing the disk inode + * is read from disk to determine the inode extent address + * for the disk inode. with the inode extent address in + * hand, the page of the extent that contains the disk + * inode is read and the disk inode is copied to the + * incore inode. + * + * PARAMETERS: + * ip - pointer to incore inode to be initialized from disk. + * + * RETURN VALUES: + * 0 - success + * EIO - i/o error. + * ENOMEM - insufficient memory + * + */ +int diRead(struct inode *ip) +{ + struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); + int iagno, ino, extno, rc; + struct inode *ipimap; + dinode_t *dp; + iag_t *iagp; + metapage_t *mp; + s64 blkno, agstart; + imap_t *imap; + int block_offset; + int inodes_left; + uint pageno; + int rel_inode; + + jFYI(1, ("diRead: ino = %ld\n", ip->i_ino)); + + ipimap = sbi->ipimap; + JFS_IP(ip)->ipimap = ipimap; + + /* determine the iag number for this inode (number) */ + iagno = INOTOIAG(ip->i_ino); + + /* read the iag */ + imap = JFS_IP(ipimap)->i_imap; + IREAD_LOCK(ipimap); + rc = diIAGRead(imap, iagno, &mp); + IREAD_UNLOCK(ipimap); + if (rc) { + jERROR(1, ("diRead: diIAGRead returned %d\n", rc)); + return (rc); + } + + iagp = (iag_t *) mp->data; + + /* determine inode extent that holds the disk inode */ + ino = ip->i_ino & (INOSPERIAG - 1); + extno = ino >> L2INOSPEREXT; + + if ((lengthPXD(&iagp->inoext[extno]) != imap->im_nbperiext) || + (addressPXD(&iagp->inoext[extno]) == 0)) { + jERROR(1, ("diRead: Bad inoext: 0x%lx, 0x%lx\n", + (ulong) addressPXD(&iagp->inoext[extno]), + (ulong) lengthPXD(&iagp->inoext[extno]))); + release_metapage(mp); + updateSuper(ip->i_sb, FM_DIRTY); + return ESTALE; + } + + /* get disk block number of the page within the inode extent + * that holds the disk inode. + */ + blkno = INOPBLK(&iagp->inoext[extno], ino, sbi->l2nbperpage); + + /* get the ag for the iag */ + agstart = le64_to_cpu(iagp->agstart); + + release_metapage(mp); + + rel_inode = (ino & (INOSPERPAGE - 1)); + pageno = blkno >> sbi->l2nbperpage; + + if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) { + /* + * OS/2 didn't always align inode extents on page boundaries + */ + inodes_left = + (sbi->nbperpage - block_offset) << sbi->l2niperblk; + + if (rel_inode < inodes_left) + rel_inode += block_offset << sbi->l2niperblk; + else { + pageno += 1; + rel_inode -= inodes_left; + } + } + + /* read the page of disk inode */ + mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); + if (mp == 0) { + jERROR(1, ("diRead: read_metapage failed\n")); + return EIO; + } + + /* locate the the disk inode requested */ + dp = (dinode_t *) mp->data; + dp += rel_inode; + + if (ip->i_ino != le32_to_cpu(dp->di_number)) { + jERROR(1, ("diRead: i_ino != di_number\n")); + updateSuper(ip->i_sb, FM_DIRTY); + rc = EIO; + } else if (le32_to_cpu(dp->di_nlink) == 0) { + jERROR(1, + ("diRead: di_nlink is zero. ino=%ld\n", ip->i_ino)); + updateSuper(ip->i_sb, FM_DIRTY); + rc = ESTALE; + } else + /* copy the disk inode to the in-memory inode */ + rc = copy_from_dinode(dp, ip); + + release_metapage(mp); + + /* set the ag for the inode */ + JFS_IP(ip)->agno = BLKTOAG(agstart, sbi); + + return (rc); +} + + +/* + * NAME: diReadSpecial() + * + * FUNCTION: initialize a 'special' inode from disk. + * + * this routines handles aggregate level inodes. The + * inode cache cannot differentiate between the + * aggregate inodes and the filesystem inodes, so we + * handle these here. We don't actually use the aggregate + * inode map, since these inodes are at a fixed location + * and in some cases the aggregate inode map isn't initialized + * yet. + * + * PARAMETERS: + * sb - filesystem superblock + * inum - aggregate inode number + * + * RETURN VALUES: + * new inode - success + * NULL - i/o error. + */ +struct inode *diReadSpecial(struct super_block *sb, ino_t inum) +{ + struct jfs_sb_info *sbi = JFS_SBI(sb); + struct jfs_inode_info *jfs_ip; + uint address; + dinode_t *dp; + struct inode *ip; + metapage_t *mp; + + ip = new_inode(sb); + if (ip == NULL) { + jERROR(1, + ("diReadSpecial: new_inode returned NULL!\n")); + return ip; + } + + jfs_ip = kmem_cache_alloc(jfs_inode_cachep, GFP_NOFS); + JFS_IP(ip) = jfs_ip; + if (!jfs_ip) { + iput(ip); + return NULL; + } + memset(jfs_ip, 0, sizeof(struct jfs_inode_info)); + + /* + * If ip->i_number >= 32 (INOSPEREXT), then read from secondary + * aggregate inode table. + */ + + if (inum >= INOSPEREXT) { + address = + addressPXD(&sbi->ait2) >> sbi->l2nbperpage; + inum -= INOSPEREXT; + ASSERT(inum < INOSPEREXT); + JFS_IP(ip)->ipimap = sbi->ipaimap2; + } else { + address = AITBL_OFF >> L2PSIZE; + JFS_IP(ip)->ipimap = sbi->ipaimap; + } + ip->i_ino = inum; + + address += inum >> 3; /* 8 inodes per 4K page */ + + /* read the page of fixed disk inode (AIT) in raw mode */ + jEVENT(0, + ("Reading aggregate inode %d from block %d\n", (uint) inum, + address)); + mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1); + if (mp == NULL) { + ip->i_sb = NULL; + ip->i_nlink = 1; /* Don't want iput() deleting it */ + iput(ip); + return (NULL); + } + + /* get the pointer to the disk inode of interest */ + dp = (dinode_t *) (mp->data); + dp += inum % 8; /* 8 inodes per 4K page */ + + /* copy on-disk inode to in-memory inode */ + if ((copy_from_dinode(dp, ip)) != 0) { + /* handle bad return by returning NULL for ip */ + ip->i_sb = NULL; + ip->i_nlink = 1; /* Don't want iput() deleting it */ + iput(ip); + /* release the page */ + release_metapage(mp); + return (NULL); + + } + + ip->i_mapping->a_ops = &jfs_aops; + ip->i_mapping->gfp_mask = GFP_NOFS | __GFP_HIGHMEM; + + if ((inum == FILESYSTEM_I) && (JFS_IP(ip)->ipimap == sbi->ipaimap)) { + sbi->gengen = le32_to_cpu(dp->di_gengen); + sbi->inostamp = le32_to_cpu(dp->di_inostamp); + } + + /* release the page */ + release_metapage(mp); + + return (ip); +} + +/* + * NAME: diWriteSpecial() + * + * FUNCTION: Write the special inode to disk + * + * PARAMETERS: + * ip - special inode + * + * RETURN VALUES: none + */ + +void diWriteSpecial(struct inode *ip) +{ + struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); + uint address; + dinode_t *dp; + ino_t inum = ip->i_ino; + metapage_t *mp; + + /* + * If ip->i_number >= 32 (INOSPEREXT), then write to secondary + * aggregate inode table. + */ + + if (!(ip->i_state & I_DIRTY)) + return; + + ip->i_state &= ~I_DIRTY; + + if (inum >= INOSPEREXT) { + address = + addressPXD(&sbi->ait2) >> sbi->l2nbperpage; + inum -= INOSPEREXT; + ASSERT(inum < INOSPEREXT); + } else { + address = AITBL_OFF >> L2PSIZE; + } + + address += inum >> 3; /* 8 inodes per 4K page */ + + /* read the page of fixed disk inode (AIT) in raw mode */ + jEVENT(0, + ("Reading aggregate inode %d from block %d\n", (uint) inum, + address)); + mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1); + if (mp == NULL) { + jERROR(1, + ("diWriteSpecial: failed to read aggregate inode extent!\n")); + return; + } + + /* get the pointer to the disk inode of interest */ + dp = (dinode_t *) (mp->data); + dp += inum % 8; /* 8 inodes per 4K page */ + + /* copy on-disk inode to in-memory inode */ + copy_to_dinode(dp, ip); + memcpy(&dp->di_xtroot, &JFS_IP(ip)->i_xtroot, 288); + + if (inum == FILESYSTEM_I) + dp->di_gengen = cpu_to_le32(sbi->gengen); + + /* write the page */ + write_metapage(mp); +} + +/* + * NAME: diFreeSpecial() + * + * FUNCTION: Free allocated space for special inode + */ +void diFreeSpecial(struct inode *ip) +{ + if (ip == NULL) { + jERROR(1, ("diFreeSpecial called with NULL ip!\n")); + return; + } + fsync_inode_data_buffers(ip); + truncate_inode_pages(ip->i_mapping, 0); + iput(ip); +} + + + +/* + * NAME: diWrite() + * + * FUNCTION: write the on-disk inode portion of the in-memory inode + * to its corresponding on-disk inode. + * + * on entry, the specifed incore inode should itself + * specify the disk inode number corresponding to the + * incore inode (i.e. i_number should be initialized). + * + * the inode contains the inode extent address for the disk + * inode. with the inode extent address in hand, the + * page of the extent that contains the disk inode is + * read and the disk inode portion of the incore inode + * is copied to the disk inode. + * + * PARAMETERS: + * tid - transacation id + * ip - pointer to incore inode to be written to the inode extent. + * + * RETURN VALUES: + * 0 - success + * EIO - i/o error. + */ +int diWrite(int tid, struct inode *ip) +{ + struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); + struct jfs_inode_info *jfs_ip = JFS_IP(ip); + int rc = 0; + s32 ino; + dinode_t *dp; + s64 blkno; + int block_offset; + int inodes_left; + metapage_t *mp; + uint pageno; + int rel_inode; + int dioffset; + struct inode *ipimap; + uint type; + int lid; + tlock_t *ditlck, *tlck; + linelock_t *dilinelock, *ilinelock; + lv_t *lv; + int n; + + ipimap = jfs_ip->ipimap; + + ino = ip->i_ino & (INOSPERIAG - 1); + + assert(lengthPXD(&(jfs_ip->ixpxd)) == + JFS_IP(ipimap)->i_imap->im_nbperiext); + assert(addressPXD(&(jfs_ip->ixpxd))); + + /* + * read the page of disk inode containing the specified inode: + */ + /* compute the block address of the page */ + blkno = INOPBLK(&(jfs_ip->ixpxd), ino, sbi->l2nbperpage); + + rel_inode = (ino & (INOSPERPAGE - 1)); + pageno = blkno >> sbi->l2nbperpage; + + if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) { + /* + * OS/2 didn't always align inode extents on page boundaries + */ + inodes_left = + (sbi->nbperpage - block_offset) << sbi->l2niperblk; + + if (rel_inode < inodes_left) + rel_inode += block_offset << sbi->l2niperblk; + else { + pageno += 1; + rel_inode -= inodes_left; + } + } + /* read the page of disk inode */ + retry: + mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); + if (mp == 0) + return (EIO); + + /* get the pointer to the disk inode */ + dp = (dinode_t *) mp->data; + dp += rel_inode; + + dioffset = (ino & (INOSPERPAGE - 1)) << L2DISIZE; + + /* + * acquire transaction lock on the on-disk inode; + * N.B. tlock is acquired on ipimap not ip; + */ + if ((ditlck = + txLock(tid, ipimap, mp, tlckINODE | tlckENTRY)) == NULL) + goto retry; + dilinelock = (linelock_t *) & ditlck->lock; + + /* + * copy btree root from in-memory inode to on-disk inode + * + * (tlock is taken from inline B+-tree root in in-memory + * inode when the B+-tree root is updated, which is pointed + * by jfs_ip->blid as well as being on tx tlock list) + * + * further processing of btree root is based on the copy + * in in-memory inode, where txLog() will log from, and, + * for xtree root, txUpdateMap() will update map and reset + * XAD_NEW bit; + */ + + if (S_ISDIR(ip->i_mode) && (lid = jfs_ip->xtlid)) { + /* + * This is the special xtree inside the directory for storing + * the directory table + */ + xtpage_t *p, *xp; + xad_t *xad; + + jfs_ip->xtlid = 0; + tlck = &TxLock[lid]; + assert(tlck->type & tlckXTREE); + tlck->type |= tlckBTROOT; + tlck->mp = mp; + ilinelock = (linelock_t *) & tlck->lock; + + /* + * copy xtree root from inode to dinode: + */ + p = &jfs_ip->i_xtroot; + xp = (xtpage_t *) &dp->di_dirtable; + lv = (lv_t *) & ilinelock->lv; + for (n = 0; n < ilinelock->index; n++, lv++) { + memcpy(&xp->xad[lv->offset], &p->xad[lv->offset], + lv->length << L2XTSLOTSIZE); + } + + /* reset on-disk (metadata page) xtree XAD_NEW bit */ + xad = &xp->xad[XTENTRYSTART]; + for (n = XTENTRYSTART; + n < le16_to_cpu(xp->header.nextindex); n++, xad++) + if (xad->flag & (XAD_NEW | XAD_EXTENDED)) + xad->flag &= ~(XAD_NEW | XAD_EXTENDED); + } + + if ((lid = jfs_ip->blid) == 0) + goto inlineData; + jfs_ip->blid = 0; + + tlck = &TxLock[lid]; + type = tlck->type; + tlck->type |= tlckBTROOT; + tlck->mp = mp; + ilinelock = (linelock_t *) & tlck->lock; + + /* + * regular file: 16 byte (XAD slot) granularity + */ + if (type & tlckXTREE) { + xtpage_t *p, *xp; + xad_t *xad; + + /* + * copy xtree root from inode to dinode: + */ + p = &jfs_ip->i_xtroot; + xp = &dp->di_xtroot; + lv = (lv_t *) & ilinelock->lv; + for (n = 0; n < ilinelock->index; n++, lv++) { + memcpy(&xp->xad[lv->offset], &p->xad[lv->offset], + lv->length << L2XTSLOTSIZE); + } + + /* reset on-disk (metadata page) xtree XAD_NEW bit */ + xad = &xp->xad[XTENTRYSTART]; + for (n = XTENTRYSTART; + n < le16_to_cpu(xp->header.nextindex); n++, xad++) + if (xad->flag & (XAD_NEW | XAD_EXTENDED)) + xad->flag &= ~(XAD_NEW | XAD_EXTENDED); + } + /* + * directory: 32 byte (directory entry slot) granularity + */ + else if (type & tlckDTREE) { + dtpage_t *p, *xp; + + /* + * copy dtree root from inode to dinode: + */ + p = (dtpage_t *) &jfs_ip->i_dtroot; + xp = (dtpage_t *) & dp->di_dtroot; + lv = (lv_t *) & ilinelock->lv; + for (n = 0; n < ilinelock->index; n++, lv++) { + memcpy(&xp->slot[lv->offset], &p->slot[lv->offset], + lv->length << L2DTSLOTSIZE); + } + } else { + jERROR(1, ("diWrite: UFO tlock\n")); + } + + inlineData: +#ifdef _STILL_TO_PORT + /* + * copy inline data from in-memory inode to on-disk inode: + * 128 byte slot granularity + */ + if (test_cflag(COMMIT_Inlineea, ip)) + lv = (lv_t *) & dilinelock->lv[dilinelock->index]; + lv->offset = (dioffset + 3 * 128) >> L2INODESLOTSIZE; + lv->length = 1; + memcpy(&dp->di_inlineea, &ip->i_inlineea, INODESLOTSIZE); + dilinelock->index++; + + clear_cflag(COMMIT_Inlineea, ip); + } +#endif /* _STILL_TO_PORT */ + + /* + * lock/copy inode base: 128 byte slot granularity + */ +// baseDinode: + lv = (lv_t *) & dilinelock->lv[dilinelock->index]; + lv->offset = dioffset >> L2INODESLOTSIZE; + copy_to_dinode(dp, ip); + if (test_and_clear_cflag(COMMIT_Dirtable, ip)) { + lv->length = 2; + memcpy(&dp->di_dirtable, &jfs_ip->i_dirtable, 96); + } else + lv->length = 1; + dilinelock->index++; + +#ifdef _JFS_FASTDASD + /* + * We aren't logging changes to the DASD used in directory inodes, + * but we need to write them to disk. If we don't unmount cleanly, + * mount will recalculate the DASD used. + */ + if (S_ISDIR(ip->i_mode) + && (ip->i_ipmnt->i_mntflag & JFS_DASD_ENABLED)) + bcopy(&ip->i_DASD, &dp->di_DASD, sizeof(dasd_t)); +#endif /* _JFS_FASTDASD */ + + /* release the buffer holding the updated on-disk inode. + * the buffer will be later written by commit processing. + */ + write_metapage(mp); + + return (rc); +} + + +/* + * NAME: diFree(ip) + * + * FUNCTION: free a specified inode from the inode working map + * for a fileset or aggregate. + * + * if the inode to be freed represents the first (only) + * free inode within the iag, the iag will be placed on + * the ag free inode list. + * + * freeing the inode will cause the inode extent to be + * freed if the inode is the only allocated inode within + * the extent. in this case all the disk resource backing + * up the inode extent will be freed. in addition, the iag + * will be placed on the ag extent free list if the extent + * is the first free extent in the iag. if freeing the + * extent also means that no free inodes will exist for + * the iag, the iag will also be removed from the ag free + * inode list. + * + * the iag describing the inode will be freed if the extent + * is to be freed and it is the only backed extent within + * the iag. in this case, the iag will be removed from the + * ag free extent list and ag free inode list and placed on + * the inode map's free iag list. + * + * a careful update approach is used to provide consistency + * in the face of updates to multiple buffers. under this + * approach, all required buffers are obtained before making + * any updates and are held until all updates are complete. + * + * PARAMETERS: + * ip - inode to be freed. + * + * RETURN VALUES: + * 0 - success + * EIO - i/o error. + */ +int diFree(struct inode *ip) +{ + int rc; + ino_t inum = ip->i_ino; + iag_t *iagp, *aiagp, *biagp, *ciagp, *diagp; + metapage_t *mp, *amp, *bmp, *cmp, *dmp; + int iagno, ino, extno, bitno, sword, agno; + int back, fwd; + u32 bitmap, mask; + struct inode *ipimap = JFS_SBI(ip->i_sb)->ipimap; + imap_t *imap = JFS_IP(ipimap)->i_imap; + s64 xaddr; + s64 xlen; + pxd_t freepxd; + int tid; + struct inode *iplist[3]; + tlock_t *tlck; + pxdlock_t *pxdlock; + + /* + * This is just to suppress compiler warnings. The same logic that + * references these variables is used to initialize them. + */ + aiagp = biagp = ciagp = diagp = NULL; + + /* get the iag number containing the inode. + */ + iagno = INOTOIAG(inum); + + /* make sure that the iag is contained within + * the map. + */ + //assert(iagno < imap->im_nextiag); + if (iagno >= imap->im_nextiag) { + jERROR(1, ("diFree: inum = %d, iagno = %d, nextiag = %d\n", + (uint) inum, iagno, imap->im_nextiag)); + dump_mem("imap", imap, 32); + updateSuper(ip->i_sb, FM_DIRTY); + return EIO; + } + + /* get the allocation group for this ino. + */ + agno = JFS_IP(ip)->agno; + + /* Lock the AG specific inode map information + */ + AG_LOCK(imap, agno); + + /* Obtain read lock in imap inode. Don't release it until we have + * read all of the IAG's that we are going to. + */ + IREAD_LOCK(ipimap); + + /* read the iag. + */ + if ((rc = diIAGRead(imap, iagno, &mp))) { + IREAD_UNLOCK(ipimap); + return (rc); + } + iagp = (iag_t *) mp->data; + + /* get the inode number and extent number of the inode within + * the iag and the inode number within the extent. + */ + ino = inum & (INOSPERIAG - 1); + extno = ino >> L2INOSPEREXT; + bitno = ino & (INOSPEREXT - 1); + mask = HIGHORDER >> bitno; + + assert(le32_to_cpu(iagp->wmap[extno]) & mask); +#ifdef _STILL_TO_PORT + assert((le32_to_cpu(iagp->pmap[extno]) & mask) == 0); +#endif /* _STILL_TO_PORT */ + assert(addressPXD(&iagp->inoext[extno])); + + /* compute the bitmap for the extent reflecting the freed inode. + */ + bitmap = le32_to_cpu(iagp->wmap[extno]) & ~mask; + + /* + * inode extent still has some inodes or below low water mark: + * keep the inode extent; + */ + if (bitmap || + imap->im_agctl[agno].numfree < 96 || + (imap->im_agctl[agno].numfree < 288 && + (((imap->im_agctl[agno].numfree * 100) / + imap->im_agctl[agno].numinos) <= 25))) { + /* if the iag currently has no free inodes (i.e., + * the inode being freed is the first free inode of iag), + * insert the iag at head of the inode free list for the ag. + */ + if (iagp->nfreeinos == 0) { + /* check if there are any iags on the ag inode + * free list. if so, read the first one so that + * we can link the current iag onto the list at + * the head. + */ + if ((fwd = imap->im_agctl[agno].inofree) >= 0) { + /* read the iag that currently is the head + * of the list. + */ + if ((rc = diIAGRead(imap, fwd, &))) { + IREAD_UNLOCK(ipimap); + AG_UNLOCK(imap, agno); + release_metapage(mp); + return (rc); + } + aiagp = (iag_t *) amp->data; + + /* make current head point back to the iag. + */ + aiagp->inofreeback = cpu_to_le32(iagno); + + write_metapage(amp); + } + + /* iag points forward to current head and iag + * becomes the new head of the list. + */ + iagp->inofreefwd = + cpu_to_le32(imap->im_agctl[agno].inofree); + iagp->inofreeback = -1; + imap->im_agctl[agno].inofree = iagno; + } + IREAD_UNLOCK(ipimap); + + /* update the free inode summary map for the extent if + * freeing the inode means the extent will now have free + * inodes (i.e., the inode being freed is the first free + * inode of extent), + */ + if (iagp->wmap[extno] == ONES) { + sword = extno >> L2EXTSPERSUM; + bitno = extno & (EXTSPERSUM - 1); + iagp->inosmap[sword] &= + cpu_to_le32(~(HIGHORDER >> bitno)); + } + + /* update the bitmap. + */ + iagp->wmap[extno] = cpu_to_le32(bitmap); + DBG_DIFREE(imap, inum); + + /* update the free inode counts at the iag, ag and + * map level. + */ + iagp->nfreeinos = + cpu_to_le32(le32_to_cpu(iagp->nfreeinos) + 1); + imap->im_agctl[agno].numfree += 1; + atomic_inc(&imap->im_numfree); + + /* release the AG inode map lock + */ + AG_UNLOCK(imap, agno); + + /* write the iag */ + write_metapage(mp); + + return (0); + } + + + /* + * inode extent has become free and above low water mark: + * free the inode extent; + */ + + /* + * prepare to update iag list(s) (careful update step 1) + */ + amp = bmp = cmp = dmp = NULL; + fwd = back = -1; + + /* check if the iag currently has no free extents. if so, + * it will be placed on the head of the ag extent free list. + */ + if (iagp->nfreeexts == 0) { + /* check if the ag extent free list has any iags. + * if so, read the iag at the head of the list now. + * this (head) iag will be updated later to reflect + * the addition of the current iag at the head of + * the list. + */ + if ((fwd = imap->im_agctl[agno].extfree) >= 0) { + if ((rc = diIAGRead(imap, fwd, &))) + goto error_out; + aiagp = (iag_t *) amp->data; + } + } else { + /* iag has free extents. check if the addition of a free + * extent will cause all extents to be free within this + * iag. if so, the iag will be removed from the ag extent + * free list and placed on the inode map's free iag list. + */ + if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) { + /* in preparation for removing the iag from the + * ag extent free list, read the iags preceeding + * and following the iag on the ag extent free + * list. + */ + if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) { + if ((rc = diIAGRead(imap, fwd, &))) + goto error_out; + aiagp = (iag_t *) amp->data; + } + + if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) { + if ((rc = diIAGRead(imap, back, &bmp))) + goto error_out; + biagp = (iag_t *) bmp->data; + } + } + } + + /* remove the iag from the ag inode free list if freeing + * this extent cause the iag to have no free inodes. + */ + if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) { + int inofreeback = le32_to_cpu(iagp->inofreeback); + int inofreefwd = le32_to_cpu(iagp->inofreefwd); + + /* in preparation for removing the iag from the + * ag inode free list, read the iags preceeding + * and following the iag on the ag inode free + * list. before reading these iags, we must make + * sure that we already don't have them in hand + * from up above, since re-reading an iag (buffer) + * we are currently holding would cause a deadlock. + */ + if (inofreefwd >= 0) { + + if (inofreefwd == fwd) + ciagp = (iag_t *) amp->data; + else if (inofreefwd == back) + ciagp = (iag_t *) bmp->data; + else { + if ((rc = + diIAGRead(imap, inofreefwd, &cmp))) + goto error_out; + assert(cmp != NULL); + ciagp = (iag_t *) cmp->data; + } + assert(ciagp != NULL); + } + + if (inofreeback >= 0) { + if (inofreeback == fwd) + diagp = (iag_t *) amp->data; + else if (inofreeback == back) + diagp = (iag_t *) bmp->data; + else { + if ((rc = + diIAGRead(imap, inofreeback, &dmp))) + goto error_out; + assert(dmp != NULL); + diagp = (iag_t *) dmp->data; + } + assert(diagp != NULL); + } + } + + IREAD_UNLOCK(ipimap); + + /* + * invalidate any page of the inode extent freed from buffer cache; + */ + freepxd = iagp->inoext[extno]; + xaddr = addressPXD(&iagp->inoext[extno]); + xlen = lengthPXD(&iagp->inoext[extno]); + invalidate_metapages(JFS_SBI(ip->i_sb)->direct_inode, xaddr, xlen); + + /* + * update iag list(s) (careful update step 2) + */ + /* add the iag to the ag extent free list if this is the + * first free extent for the iag. + */ + if (iagp->nfreeexts == 0) { + if (fwd >= 0) + aiagp->extfreeback = cpu_to_le32(iagno); + + iagp->extfreefwd = + cpu_to_le32(imap->im_agctl[agno].extfree); + iagp->extfreeback = -1; + imap->im_agctl[agno].extfree = iagno; + } else { + /* remove the iag from the ag extent list if all extents + * are now free and place it on the inode map iag free list. + */ + if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) { + if (fwd >= 0) + aiagp->extfreeback = iagp->extfreeback; + + if (back >= 0) + biagp->extfreefwd = iagp->extfreefwd; + else + imap->im_agctl[agno].extfree = + le32_to_cpu(iagp->extfreefwd); + + iagp->extfreefwd = iagp->extfreeback = -1; + + IAGFREE_LOCK(imap); + iagp->iagfree = cpu_to_le32(imap->im_freeiag); + imap->im_freeiag = iagno; + IAGFREE_UNLOCK(imap); + } + } + + /* remove the iag from the ag inode free list if freeing + * this extent causes the iag to have no free inodes. + */ + if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) { + if ((int) le32_to_cpu(iagp->inofreefwd) >= 0) + ciagp->inofreeback = iagp->inofreeback; + + if ((int) le32_to_cpu(iagp->inofreeback) >= 0) + diagp->inofreefwd = iagp->inofreefwd; + else + imap->im_agctl[agno].inofree = + le32_to_cpu(iagp->inofreefwd); + + iagp->inofreefwd = iagp->inofreeback = -1; + } + + /* update the inode extent address and working map + * to reflect the free extent. + * the permanent map should have been updated already + * for the inode being freed. + */ + assert(iagp->pmap[extno] == 0); + iagp->wmap[extno] = 0; + DBG_DIFREE(imap, inum); + PXDlength(&iagp->inoext[extno], 0); + PXDaddress(&iagp->inoext[extno], 0); + + /* update the free extent and free inode summary maps + * to reflect the freed extent. + * the inode summary map is marked to indicate no inodes + * available for the freed extent. + */ + sword = extno >> L2EXTSPERSUM; + bitno = extno & (EXTSPERSUM - 1); + mask = HIGHORDER >> bitno; + iagp->inosmap[sword] |= cpu_to_le32(mask); + iagp->extsmap[sword] &= cpu_to_le32(~mask); + + /* update the number of free inodes and number of free extents + * for the iag. + */ + iagp->nfreeinos = cpu_to_le32(le32_to_cpu(iagp->nfreeinos) - + (INOSPEREXT - 1)); + iagp->nfreeexts = cpu_to_le32(le32_to_cpu(iagp->nfreeexts) + 1); + + /* update the number of free inodes and backed inodes + * at the ag and inode map level. + */ + imap->im_agctl[agno].numfree -= (INOSPEREXT - 1); + imap->im_agctl[agno].numinos -= INOSPEREXT; + atomic_sub(INOSPEREXT - 1, &imap->im_numfree); + atomic_sub(INOSPEREXT, &imap->im_numinos); + + if (amp) + write_metapage(amp); + if (bmp) + write_metapage(bmp); + if (cmp) + write_metapage(cmp); + if (dmp) + write_metapage(dmp); + + /* + * start transaction to update block allocation map + * for the inode extent freed; + * + * N.B. AG_LOCK is released and iag will be released below, and + * other thread may allocate inode from/reusing the ixad freed + * BUT with new/different backing inode extent from the extent + * to be freed by the transaction; + */ + txBegin(ipimap->i_sb, &tid, COMMIT_FORCE); + + /* acquire tlock of the iag page of the freed ixad + * to force the page NOHOMEOK (even though no data is + * logged from the iag page) until NOREDOPAGE|FREEXTENT log + * for the free of the extent is committed; + * write FREEXTENT|NOREDOPAGE log record + * N.B. linelock is overlaid as freed extent descriptor; + */ + tlck = txLock(tid, ipimap, mp, tlckINODE | tlckFREE); + pxdlock = (pxdlock_t *) & tlck->lock; + pxdlock->flag = mlckFREEPXD; + pxdlock->pxd = freepxd; + pxdlock->index = 1; + + write_metapage(mp); + + iplist[0] = ipimap; + + /* + * logredo needs the IAG number and IAG extent index in order + * to ensure that the IMap is consistent. The least disruptive + * way to pass these values through to the transaction manager + * is in the iplist array. + * + * It's not pretty, but it works. + */ + iplist[1] = (struct inode *) (size_t)iagno; + iplist[2] = (struct inode *) (size_t)extno; + + rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE); // D233382 + + txEnd(tid); + + /* unlock the AG inode map information */ + AG_UNLOCK(imap, agno); + + return (0); + + error_out: + IREAD_UNLOCK(ipimap); + + if (amp) + release_metapage(amp); + if (bmp) + release_metapage(bmp); + if (cmp) + release_metapage(cmp); + if (dmp) + release_metapage(dmp); + + AG_UNLOCK(imap, agno); + + release_metapage(mp); + + return (rc); +} + +/* + * There are several places in the diAlloc* routines where we initialize + * the inode. We also need to take a reference on the page containing the + * inode extent. This way we can write the inode without having to allocate + * a page from the page cache if that page had been swapped out. This can + * cause a deadlock if jfs_write_inode had been by kupdate or kswapd. + */ +static inline void +diInitInode(struct inode *ip, int iagno, int ino, int extno, iag_t * iagp) +{ + s64 blkno; + struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); + struct jfs_inode_info *jfs_ip = JFS_IP(ip); + + ip->i_ino = (iagno << L2INOSPERIAG) + ino; + DBG_DIALLOC(JFS_IP(ipimap)->i_imap, ip->i_ino); + jfs_ip->ixpxd = iagp->inoext[extno]; + jfs_ip->agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi); + + /* Now grab a reference on the extent page */ + blkno = INOPBLK(&iagp->inoext[extno], ino, sbi->l2nbperpage); +#ifndef _JFS_4K + { + /* If volume shared with OS/2, inode extent may not be + * page-aligned + */ + int block_offset; + int inodes_left; + int rel_inode; + + if ((block_offset = (blkno & (sbi->nbperpage - 1)))) { + rel_inode = ino & (INOSPERPAGE - 1); + inodes_left = + (sbi->nbperpage - block_offset) << sbi->l2niperblk; + + if (rel_inode >= inodes_left) + blkno += sbi->nbperpage; + } + } +#endif +} + + +/* + * NAME: diAlloc(pip,dir,ip) + * + * FUNCTION: allocate a disk inode from the inode working map + * for a fileset or aggregate. + * + * PARAMETERS: + * pip - pointer to incore inode for the parent inode. + * dir - TRUE if the new disk inode is for a directory. + * ip - pointer to a new inode + * + * RETURN VALUES: + * 0 - success. + * ENOSPC - insufficient disk resources. + * EIO - i/o error. + */ +int diAlloc(struct inode *pip, boolean_t dir, struct inode *ip) +{ + int rc, ino, iagno, addext, extno, bitno, sword; + int nwords, rem, i, agno; + u32 mask, inosmap, extsmap; + struct inode *ipimap; + metapage_t *mp; + ino_t inum; + iag_t *iagp; + imap_t *imap; + + /* get the pointers to the inode map inode and the + * corresponding imap control structure. + */ + ipimap = JFS_SBI(pip->i_sb)->ipimap; + imap = JFS_IP(ipimap)->i_imap; + JFS_IP(ip)->ipimap = ipimap; + JFS_IP(ip)->fileset = FILESYSTEM_I; + + /* for a directory, the allocation policy is to start + * at the ag level using the preferred ag. + */ + if (dir == TRUE) { + agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap); + AG_LOCK(imap, agno); + goto tryag; + } + + /* for files, the policy starts off by trying to allocate from + * the same iag containing the parent disk inode: + * try to allocate the new disk inode close to the parent disk + * inode, using parent disk inode number + 1 as the allocation + * hint. (we use a left-to-right policy to attempt to avoid + * moving backward on the disk.) compute the hint within the + * file system and the iag. + */ + inum = pip->i_ino + 1; + ino = inum & (INOSPERIAG - 1); + + /* back off the the hint if it is outside of the iag */ + if (ino == 0) + inum = pip->i_ino; + + /* get the ag number of this iag */ + agno = JFS_IP(pip)->agno; + + /* lock the AG inode map information */ + AG_LOCK(imap, agno); + + /* Get read lock on imap inode */ + IREAD_LOCK(ipimap); + + /* get the iag number and read the iag */ + iagno = INOTOIAG(inum); + if ((rc = diIAGRead(imap, iagno, &mp))) { + IREAD_UNLOCK(ipimap); + return (rc); + } + iagp = (iag_t *) mp->data; + + /* determine if new inode extent is allowed to be added to the iag. + * new inode extent can be added to the iag if the ag + * has less than 32 free disk inodes and the iag has free extents. + */ + addext = (imap->im_agctl[agno].numfree < 32 && iagp->nfreeexts); + + /* + * try to allocate from the IAG + */ + /* check if the inode may be allocated from the iag + * (i.e. the inode has free inodes or new extent can be added). + */ + if (iagp->nfreeinos || addext) { + /* determine the extent number of the hint. + */ + extno = ino >> L2INOSPEREXT; + + /* check if the extent containing the hint has backed + * inodes. if so, try to allocate within this extent. + */ + if (addressPXD(&iagp->inoext[extno])) { + bitno = ino & (INOSPEREXT - 1); + if ((bitno = + diFindFree(le32_to_cpu(iagp->wmap[extno]), + bitno)) + < INOSPEREXT) { + ino = (extno << L2INOSPEREXT) + bitno; + + /* a free inode (bit) was found within this + * extent, so allocate it. + */ + rc = diAllocBit(imap, iagp, ino); + IREAD_UNLOCK(ipimap); + if (rc) { + assert(rc == EIO); + } else { + /* set the results of the allocation + * and write the iag. + */ + diInitInode(ip, iagno, ino, extno, + iagp); + mark_metapage_dirty(mp); + } + release_metapage(mp); + + /* free the AG lock and return. + */ + AG_UNLOCK(imap, agno); + return (rc); + } + + if (!addext) + extno = + (extno == + EXTSPERIAG - 1) ? 0 : extno + 1; + } + + /* + * no free inodes within the extent containing the hint. + * + * try to allocate from the backed extents following + * hint or, if appropriate (i.e. addext is true), allocate + * an extent of free inodes at or following the extent + * containing the hint. + * + * the free inode and free extent summary maps are used + * here, so determine the starting summary map position + * and the number of words we'll have to examine. again, + * the approach is to allocate following the hint, so we + * might have to initially ignore prior bits of the summary + * map that represent extents prior to the extent containing + * the hint and later revisit these bits. + */ + bitno = extno & (EXTSPERSUM - 1); + nwords = (bitno == 0) ? SMAPSZ : SMAPSZ + 1; + sword = extno >> L2EXTSPERSUM; + + /* mask any prior bits for the starting words of the + * summary map. + */ + mask = ONES << (EXTSPERSUM - bitno); + inosmap = le32_to_cpu(iagp->inosmap[sword]) | mask; + extsmap = le32_to_cpu(iagp->extsmap[sword]) | mask; + + /* scan the free inode and free extent summary maps for + * free resources. + */ + for (i = 0; i < nwords; i++) { + /* check if this word of the free inode summary + * map describes an extent with free inodes. + */ + if (~inosmap) { + /* an extent with free inodes has been + * found. determine the extent number + * and the inode number within the extent. + */ + rem = diFindFree(inosmap, 0); + extno = (sword << L2EXTSPERSUM) + rem; + rem = + diFindFree(le32_to_cpu + (iagp->wmap[extno]), 0); + assert(rem < INOSPEREXT); + + /* determine the inode number within the + * iag and allocate the inode from the + * map. + */ + ino = (extno << L2INOSPEREXT) + rem; + rc = diAllocBit(imap, iagp, ino); + IREAD_UNLOCK(ipimap); + if (rc) { + assert(rc == EIO); + } else { + /* set the results of the allocation + * and write the iag. + */ + diInitInode(ip, iagno, ino, extno, + iagp); + mark_metapage_dirty(mp); + } + release_metapage(mp); + + /* free the AG lock and return. + */ + AG_UNLOCK(imap, agno); + return (rc); + + } + + /* check if we may allocate an extent of free + * inodes and whether this word of the free + * extents summary map describes a free extent. + */ + if (addext && ~extsmap) { + /* a free extent has been found. determine + * the extent number. + */ + rem = diFindFree(extsmap, 0); + extno = (sword << L2EXTSPERSUM) + rem; + + /* allocate an extent of free inodes. + */ + if ((rc = diNewExt(imap, iagp, extno))) { + /* if there is no disk space for a + * new extent, try to allocate the + * disk inode from somewhere else. + */ + if (rc == ENOSPC) + break; + + assert(rc == EIO); + } else { + /* set the results of the allocation + * and write the iag. + */ + diInitInode(ip, iagno, + extno << L2INOSPEREXT, + extno, iagp); + mark_metapage_dirty(mp); + } + release_metapage(mp); + /* free the imap inode & the AG lock & return. + */ + IREAD_UNLOCK(ipimap); + AG_UNLOCK(imap, agno); + return (rc); + } + + /* move on to the next set of summary map words. + */ + sword = (sword == SMAPSZ - 1) ? 0 : sword + 1; + inosmap = le32_to_cpu(iagp->inosmap[sword]); + extsmap = le32_to_cpu(iagp->extsmap[sword]); + } + } + /* unlock imap inode */ + IREAD_UNLOCK(ipimap); + + /* nothing doing in this iag, so release it. */ + release_metapage(mp); + + tryag: + /* + * try to allocate anywhere within the same AG as the parent inode. + */ + rc = diAllocAG(imap, agno, dir, ip); + + AG_UNLOCK(imap, agno); + + if (rc != ENOSPC) + return (rc); + + /* + * try to allocate in any AG. + */ + return (diAllocAny(imap, agno, dir, ip)); +} + + +/* + * NAME: diAllocAG(imap,agno,dir,ip) + * + * FUNCTION: allocate a disk inode from the allocation group. + * + * this routine first determines if a new extent of free + * inodes should be added for the allocation group, with + * the current request satisfied from this extent. if this + * is the case, an attempt will be made to do just that. if + * this attempt fails or it has been determined that a new + * extent should not be added, an attempt is made to satisfy + * the request by allocating an existing (backed) free inode + * from the allocation group. + * + * PRE CONDITION: Already have the AG lock for this AG. + * + * PARAMETERS: + * imap - pointer to inode map control structure. + * agno - allocation group to allocate from. + * dir - TRUE if the new disk inode is for a directory. + * ip - pointer to the new inode to be filled in on successful return + * with the disk inode number allocated, its extent address + * and the start of the ag. + * + * RETURN VALUES: + * 0 - success. + * ENOSPC - insufficient disk resources. + * EIO - i/o error. + */ +static int +diAllocAG(imap_t * imap, int agno, boolean_t dir, struct inode *ip) +{ + int rc, addext, numfree, numinos; + + /* get the number of free and the number of backed disk + * inodes currently within the ag. + */ + numfree = imap->im_agctl[agno].numfree; + numinos = imap->im_agctl[agno].numinos; + + if (numfree > numinos) { + jERROR(1,("diAllocAG: numfree > numinos\n")); + updateSuper(ip->i_sb, FM_DIRTY); + return EIO; + } + + /* determine if we should allocate a new extent of free inodes + * within the ag: for directory inodes, add a new extent + * if there are a small number of free inodes or number of free + * inodes is a small percentage of the number of backed inodes. + */ + if (dir == TRUE) + addext = (numfree < 64 || + (numfree < 256 + && ((numfree * 100) / numinos) <= 20)); + else + addext = (numfree == 0); + + /* + * try to allocate a new extent of free inodes. + */ + if (addext) { + /* if free space is not avaliable for this new extent, try + * below to allocate a free and existing (already backed) + * inode from the ag. + */ + if ((rc = diAllocExt(imap, agno, ip)) != ENOSPC) + return (rc); + } + + /* + * try to allocate an existing free inode from the ag. + */ + return (diAllocIno(imap, agno, ip)); +} + + +/* + * NAME: diAllocAny(imap,agno,dir,iap) + * + * FUNCTION: allocate a disk inode from any other allocation group. + * + * this routine is called when an allocation attempt within + * the primary allocation group has failed. if attempts to + * allocate an inode from any allocation group other than the + * specified primary group. + * + * PARAMETERS: + * imap - pointer to inode map control structure. + * agno - primary allocation group (to avoid). + * dir - TRUE if the new disk inode is for a directory. + * ip - pointer to a new inode to be filled in on successful return + * with the disk inode number allocated, its extent address + * and the start of the ag. + * + * RETURN VALUES: + * 0 - success. + * ENOSPC - insufficient disk resources. + * EIO - i/o error. + */ +static int +diAllocAny(imap_t * imap, int agno, boolean_t dir, struct inode *ip) +{ + int ag, rc; + int maxag = JFS_SBI(imap->im_ipimap->i_sb)->bmap->db_maxag; + + + /* try to allocate from the ags following agno up to + * the maximum ag number. + */ + for (ag = agno + 1; ag <= maxag; ag++) { + AG_LOCK(imap, ag); + + rc = diAllocAG(imap, ag, dir, ip); + + AG_UNLOCK(imap, ag); + + if (rc != ENOSPC) + return (rc); + } + + /* try to allocate from the ags in front of agno. + */ + for (ag = 0; ag < agno; ag++) { + AG_LOCK(imap, ag); + + rc = diAllocAG(imap, ag, dir, ip); + + AG_UNLOCK(imap, ag); + + if (rc != ENOSPC) + return (rc); + } + + /* no free disk inodes. + */ + return (ENOSPC); +} + + +/* + * NAME: diAllocIno(imap,agno,ip) + * + * FUNCTION: allocate a disk inode from the allocation group's free + * inode list, returning an error if this free list is + * empty (i.e. no iags on the list). + * + * allocation occurs from the first iag on the list using + * the iag's free inode summary map to find the leftmost + * free inode in the iag. + * + * PRE CONDITION: Already have AG lock for this AG. + * + * PARAMETERS: + * imap - pointer to inode map control structure. + * agno - allocation group. + * ip - pointer to new inode to be filled in on successful return + * with the disk inode number allocated, its extent address + * and the start of the ag. + * + * RETURN VALUES: + * 0 - success. + * ENOSPC - insufficient disk resources. + * EIO - i/o error. + */ +static int diAllocIno(imap_t * imap, int agno, struct inode *ip) +{ + int iagno, ino, rc, rem, extno, sword; + metapage_t *mp; + iag_t *iagp; + + /* check if there are iags on the ag's free inode list. + */ + if ((iagno = imap->im_agctl[agno].inofree) < 0) + return (ENOSPC); + + /* obtain read lock on imap inode */ + IREAD_LOCK(imap->im_ipimap); + + /* read the iag at the head of the list. + */ + if ((rc = diIAGRead(imap, iagno, &mp))) { + IREAD_UNLOCK(imap->im_ipimap); + return (rc); + } + iagp = (iag_t *) mp->data; + + /* better be free inodes in this iag if it is on the + * list. + */ + //assert(iagp->nfreeinos); + if (!iagp->nfreeinos) { + jERROR(1, + ("diAllocIno: nfreeinos = 0, but iag on freelist\n")); + jERROR(1, (" agno = %d, iagno = %d\n", agno, iagno)); + dump_mem("iag", iagp, 64); + updateSuper(ip->i_sb, FM_DIRTY); + return EIO; + } + + /* scan the free inode summary map to find an extent + * with free inodes. + */ + for (sword = 0;; sword++) { + assert(sword < SMAPSZ); + + if (~iagp->inosmap[sword]) + break; + } + + /* found a extent with free inodes. determine + * the extent number. + */ + rem = diFindFree(le32_to_cpu(iagp->inosmap[sword]), 0); + assert(rem < EXTSPERSUM); + extno = (sword << L2EXTSPERSUM) + rem; + + /* find the first free inode in the extent. + */ + rem = diFindFree(le32_to_cpu(iagp->wmap[extno]), 0); + assert(rem < INOSPEREXT); + + /* compute the inode number within the iag. + */ + ino = (extno << L2INOSPEREXT) + rem; + + /* allocate the inode. + */ + rc = diAllocBit(imap, iagp, ino); + IREAD_UNLOCK(imap->im_ipimap); + if (rc) { + release_metapage(mp); + return (rc); + } + + /* set the results of the allocation and write the iag. + */ + diInitInode(ip, iagno, ino, extno, iagp); + write_metapage(mp); + + return (0); +} + + +/* + * NAME: diAllocExt(imap,agno,ip) + * + * FUNCTION: add a new extent of free inodes to an iag, allocating + * an inode from this extent to satisfy the current allocation + * request. + * + * this routine first tries to find an existing iag with free + * extents through the ag free extent list. if list is not + * empty, the head of the list will be selected as the home + * of the new extent of free inodes. otherwise (the list is + * empty), a new iag will be allocated for the ag to contain + * the extent. + * + * once an iag has been selected, the free extent summary map + * is used to locate a free extent within the iag and diNewExt() + * is called to initialize the extent, with initialization + * including the allocation of the first inode of the extent + * for the purpose of satisfying this request. + * + * PARAMETERS: + * imap - pointer to inode map control structure. + * agno - allocation group number. + * ip - pointer to new inode to be filled in on successful return + * with the disk inode number allocated, its extent address + * and the start of the ag. + * + * RETURN VALUES: + * 0 - success. + * ENOSPC - insufficient disk resources. + * EIO - i/o error. + */ +static int diAllocExt(imap_t * imap, int agno, struct inode *ip) +{ + int rem, iagno, sword, extno, rc; + metapage_t *mp; + iag_t *iagp; + + /* check if the ag has any iags with free extents. if not, + * allocate a new iag for the ag. + */ + if ((iagno = imap->im_agctl[agno].extfree) < 0) { + /* If successful, diNewIAG will obtain the read lock on the + * imap inode. + */ + if ((rc = diNewIAG(imap, &iagno, agno, &mp))) { + return (rc); + } + iagp = (iag_t *) mp->data; + + /* set the ag number if this a brand new iag + */ + iagp->agstart = + cpu_to_le64(AGTOBLK(agno, imap->im_ipimap)); + } else { + /* read the iag. + */ + IREAD_LOCK(imap->im_ipimap); + if ((rc = diIAGRead(imap, iagno, &mp))) { + assert(0); + } + iagp = (iag_t *) mp->data; + } + + /* using the free extent summary map, find a free extent. + */ + for (sword = 0;; sword++) { + assert(sword < SMAPSZ); + if (~iagp->extsmap[sword]) + break; + } + + /* determine the extent number of the free extent. + */ + rem = diFindFree(le32_to_cpu(iagp->extsmap[sword]), 0); + assert(rem < EXTSPERSUM); + extno = (sword << L2EXTSPERSUM) + rem; + + /* initialize the new extent. + */ + rc = diNewExt(imap, iagp, extno); + IREAD_UNLOCK(imap->im_ipimap); + if (rc) { + /* something bad happened. if a new iag was allocated, + * place it back on the inode map's iag free list, and + * clear the ag number information. + */ + if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { + IAGFREE_LOCK(imap); + iagp->iagfree = cpu_to_le32(imap->im_freeiag); + imap->im_freeiag = iagno; + IAGFREE_UNLOCK(imap); + } + write_metapage(mp); + return (rc); + } + + /* set the results of the allocation and write the iag. + */ + diInitInode(ip, iagno, extno << L2INOSPEREXT, extno, iagp); + + write_metapage(mp); + + return (0); +} + + +/* + * NAME: diAllocBit(imap,iagp,ino) + * + * FUNCTION: allocate a backed inode from an iag. + * + * this routine performs the mechanics of allocating a + * specified inode from a backed extent. + * + * if the inode to be allocated represents the last free + * inode within the iag, the iag will be removed from the + * ag free inode list. + * + * a careful update approach is used to provide consistency + * in the face of updates to multiple buffers. under this + * approach, all required buffers are obtained before making + * any updates and are held all are updates are complete. + * + * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on + * this AG. Must have read lock on imap inode. + * + * PARAMETERS: + * imap - pointer to inode map control structure. + * iagp - pointer to iag. + * ino - inode number to be allocated within the iag. + * + * RETURN VALUES: + * 0 - success. + * ENOSPC - insufficient disk resources. + * EIO - i/o error. + */ +static int diAllocBit(imap_t * imap, iag_t * iagp, int ino) +{ + int extno, bitno, agno, sword, rc; + metapage_t *amp, *bmp; + iag_t *aiagp = 0, *biagp = 0; + u32 mask; + + /* check if this is the last free inode within the iag. + * if so, it will have to be removed from the ag free + * inode list, so get the iags preceeding and following + * it on the list. + */ + if (iagp->nfreeinos == cpu_to_le32(1)) { + amp = bmp = NULL; + + if ((int) le32_to_cpu(iagp->inofreefwd) >= 0) { + if ((rc = + diIAGRead(imap, le32_to_cpu(iagp->inofreefwd), + &))) + return (rc); + aiagp = (iag_t *) amp->data; + } + + if ((int) le32_to_cpu(iagp->inofreeback) >= 0) { + if ((rc = + diIAGRead(imap, + le32_to_cpu(iagp->inofreeback), + &bmp))) { + if (amp) + release_metapage(amp); + return (rc); + } + biagp = (iag_t *) bmp->data; + } + } + + /* get the ag number, extent number, inode number within + * the extent. + */ + agno = BLKTOAG(le64_to_cpu(iagp->agstart), JFS_SBI(imap->im_ipimap->i_sb)); + extno = ino >> L2INOSPEREXT; + bitno = ino & (INOSPEREXT - 1); + + /* compute the mask for setting the map. + */ + mask = HIGHORDER >> bitno; + + /* the inode should be free and backed. + */ + assert((le32_to_cpu(iagp->pmap[extno]) & mask) == 0); + assert((le32_to_cpu(iagp->wmap[extno]) & mask) == 0); + assert(addressPXD(&iagp->inoext[extno]) != 0); + + /* mark the inode as allocated in the working map. + */ + iagp->wmap[extno] |= cpu_to_le32(mask); + + /* check if all inodes within the extent are now + * allocated. if so, update the free inode summary + * map to reflect this. + */ + if (iagp->wmap[extno] == ONES) { + sword = extno >> L2EXTSPERSUM; + bitno = extno & (EXTSPERSUM - 1); + iagp->inosmap[sword] |= cpu_to_le32(HIGHORDER >> bitno); + } + + /* if this was the last free inode in the iag, remove the + * iag from the ag free inode list. + */ + if (iagp->nfreeinos == cpu_to_le32(1)) { + if (amp) { + aiagp->inofreeback = iagp->inofreeback; + write_metapage(amp); + } + + if (bmp) { + biagp->inofreefwd = iagp->inofreefwd; + write_metapage(bmp); + } else { + imap->im_agctl[agno].inofree = + le32_to_cpu(iagp->inofreefwd); + } + iagp->inofreefwd = iagp->inofreeback = -1; + } + + /* update the free inode count at the iag, ag, inode + * map levels. + */ + iagp->nfreeinos = cpu_to_le32(le32_to_cpu(iagp->nfreeinos) - 1); + imap->im_agctl[agno].numfree -= 1; + atomic_dec(&imap->im_numfree); + + return (0); +} + + +/* + * NAME: diNewExt(imap,iagp,extno) + * + * FUNCTION: initialize a new extent of inodes for an iag, allocating + * the first inode of the extent for use for the current + * allocation request. + * + * disk resources are allocated for the new extent of inodes + * and the inodes themselves are initialized to reflect their + * existence within the extent (i.e. their inode numbers and + * inode extent addresses are set) and their initial state + * (mode and link count are set to zero). + * + * if the iag is new, it is not yet on an ag extent free list + * but will now be placed on this list. + * + * if the allocation of the new extent causes the iag to + * have no free extent, the iag will be removed from the + * ag extent free list. + * + * if the iag has no free backed inodes, it will be placed + * on the ag free inode list, since the addition of the new + * extent will now cause it to have free inodes. + * + * a careful update approach is used to provide consistency + * (i.e. list consistency) in the face of updates to multiple + * buffers. under this approach, all required buffers are + * obtained before making any updates and are held until all + * updates are complete. + * + * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on + * this AG. Must have read lock on imap inode. + * + * PARAMETERS: + * imap - pointer to inode map control structure. + * iagp - pointer to iag. + * extno - extent number. + * + * RETURN VALUES: + * 0 - success. + * ENOSPC - insufficient disk resources. + * EIO - i/o error. + */ +static int diNewExt(imap_t * imap, iag_t * iagp, int extno) +{ + int agno, iagno, fwd, back, freei = 0, sword, rc; + iag_t *aiagp = 0, *biagp = 0, *ciagp = 0; + metapage_t *amp, *bmp, *cmp, *dmp; + struct inode *ipimap; + s64 blkno, hint; + int i, j; + u32 mask; + ino_t ino; + dinode_t *dp; + struct jfs_sb_info *sbi; + + /* better have free extents. + */ + assert(iagp->nfreeexts); + + /* get the inode map inode. + */ + ipimap = imap->im_ipimap; + sbi = JFS_SBI(ipimap->i_sb); + + amp = bmp = cmp = NULL; + + /* get the ag and iag numbers for this iag. + */ + agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi); + iagno = le32_to_cpu(iagp->iagnum); + + /* check if this is the last free extent within the + * iag. if so, the iag must be removed from the ag + * free extent list, so get the iags preceeding and + * following the iag on this list. + */ + if (iagp->nfreeexts == cpu_to_le32(1)) { + if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) { + if ((rc = diIAGRead(imap, fwd, &))) + return (rc); + aiagp = (iag_t *) amp->data; + } + + if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) { + if ((rc = diIAGRead(imap, back, &bmp))) + goto error_out; + biagp = (iag_t *) bmp->data; + } + } else { + /* the iag has free extents. if all extents are free + * (as is the case for a newly allocated iag), the iag + * must be added to the ag free extent list, so get + * the iag at the head of the list in preparation for + * adding this iag to this list. + */ + fwd = back = -1; + if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { + if ((fwd = imap->im_agctl[agno].extfree) >= 0) { + if ((rc = diIAGRead(imap, fwd, &))) + goto error_out; + aiagp = (iag_t *) amp->data; + } + } + } + + /* check if the iag has no free inodes. if so, the iag + * will have to be added to the ag free inode list, so get + * the iag at the head of the list in preparation for + * adding this iag to this list. in doing this, we must + * check if we already have the iag at the head of + * the list in hand. + */ + if (iagp->nfreeinos == 0) { + freei = imap->im_agctl[agno].inofree; + + if (freei >= 0) { + if (freei == fwd) { + ciagp = aiagp; + } else if (freei == back) { + ciagp = biagp; + } else { + if ((rc = diIAGRead(imap, freei, &cmp))) + goto error_out; + ciagp = (iag_t *) cmp->data; + } + assert(ciagp != NULL); + } + } + + /* allocate disk space for the inode extent. + */ + if ((extno == 0) || (addressPXD(&iagp->inoext[extno - 1]) == 0)) + hint = ((s64) agno << sbi->bmap->db_agl2size) - 1; + else + hint = addressPXD(&iagp->inoext[extno - 1]) + + lengthPXD(&iagp->inoext[extno - 1]) - 1; + + if ((rc = dbAlloc(ipimap, hint, (s64) imap->im_nbperiext, &blkno))) + goto error_out; + + /* compute the inode number of the first inode within the + * extent. + */ + ino = (iagno << L2INOSPERIAG) + (extno << L2INOSPEREXT); + + /* initialize the inodes within the newly allocated extent a + * page at a time. + */ + for (i = 0; i < imap->im_nbperiext; i += sbi->nbperpage) { + /* get a buffer for this page of disk inodes. + */ + dmp = get_metapage(ipimap, blkno + i, PSIZE, 1); + if (dmp == NULL) { + rc = EIO; + goto error_out; + } + dp = (dinode_t *) dmp->data; + + /* initialize the inode number, mode, link count and + * inode extent address. + */ + for (j = 0; j < INOSPERPAGE; j++, dp++, ino++) { + dp->di_inostamp = cpu_to_le32(sbi->inostamp); + dp->di_number = cpu_to_le32(ino); + dp->di_fileset = cpu_to_le32(FILESYSTEM_I); + dp->di_mode = 0; + dp->di_nlink = 0; + PXDaddress(&(dp->di_ixpxd), blkno); + PXDlength(&(dp->di_ixpxd), imap->im_nbperiext); + } + write_metapage(dmp); + } + + /* if this is the last free extent within the iag, remove the + * iag from the ag free extent list. + */ + if (iagp->nfreeexts == cpu_to_le32(1)) { + if (fwd >= 0) + aiagp->extfreeback = iagp->extfreeback; + + if (back >= 0) + biagp->extfreefwd = iagp->extfreefwd; + else + imap->im_agctl[agno].extfree = + le32_to_cpu(iagp->extfreefwd); + + iagp->extfreefwd = iagp->extfreeback = -1; + } else { + /* if the iag has all free extents (newly allocated iag), + * add the iag to the ag free extent list. + */ + if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { + if (fwd >= 0) + aiagp->extfreeback = cpu_to_le32(iagno); + + iagp->extfreefwd = cpu_to_le32(fwd); + iagp->extfreeback = -1; + imap->im_agctl[agno].extfree = iagno; + } + } + + /* if the iag has no free inodes, add the iag to the + * ag free inode list. + */ + if (iagp->nfreeinos == 0) { + if (freei >= 0) + ciagp->inofreeback = cpu_to_le32(iagno); + + iagp->inofreefwd = + cpu_to_le32(imap->im_agctl[agno].inofree); + iagp->inofreeback = -1; + imap->im_agctl[agno].inofree = iagno; + } + + /* initialize the extent descriptor of the extent. */ + PXDlength(&iagp->inoext[extno], imap->im_nbperiext); + PXDaddress(&iagp->inoext[extno], blkno); + + /* initialize the working and persistent map of the extent. + * the working map will be initialized such that + * it indicates the first inode of the extent is allocated. + */ + iagp->wmap[extno] = cpu_to_le32(HIGHORDER); + iagp->pmap[extno] = 0; + + /* update the free inode and free extent summary maps + * for the extent to indicate the extent has free inodes + * and no longer represents a free extent. + */ + sword = extno >> L2EXTSPERSUM; + mask = HIGHORDER >> (extno & (EXTSPERSUM - 1)); + iagp->extsmap[sword] |= cpu_to_le32(mask); + iagp->inosmap[sword] &= cpu_to_le32(~mask); + + /* update the free inode and free extent counts for the + * iag. + */ + iagp->nfreeinos = cpu_to_le32(le32_to_cpu(iagp->nfreeinos) + + (INOSPEREXT - 1)); + iagp->nfreeexts = cpu_to_le32(le32_to_cpu(iagp->nfreeexts) - 1); + + /* update the free and backed inode counts for the ag. + */ + imap->im_agctl[agno].numfree += (INOSPEREXT - 1); + imap->im_agctl[agno].numinos += INOSPEREXT; + + /* update the free and backed inode counts for the inode map. + */ + atomic_add(INOSPEREXT - 1, &imap->im_numfree); + atomic_add(INOSPEREXT, &imap->im_numinos); + + /* write the iags. + */ + if (amp) + write_metapage(amp); + if (bmp) + write_metapage(bmp); + if (cmp) + write_metapage(cmp); + + return (0); + + error_out: + + /* release the iags. + */ + if (amp) + release_metapage(amp); + if (bmp) + release_metapage(bmp); + if (cmp) + release_metapage(cmp); + + return (rc); +} + + +/* + * NAME: diNewIAG(imap,iagnop,agno) + * + * FUNCTION: allocate a new iag for an allocation group. + * + * first tries to allocate the iag from the inode map + * iagfree list: + * if the list has free iags, the head of the list is removed + * and returned to satisfy the request. + * if the inode map's iag free list is empty, the inode map + * is extended to hold a new iag. this new iag is initialized + * and returned to satisfy the request. + * + * PARAMETERS: + * imap - pointer to inode map control structure. + * iagnop - pointer to an iag number set with the number of the + * newly allocated iag upon successful return. + * agno - allocation group number. + * bpp - Buffer pointer to be filled in with new IAG's buffer + * + * RETURN VALUES: + * 0 - success. + * ENOSPC - insufficient disk resources. + * EIO - i/o error. + * + * serialization: + * AG lock held on entry/exit; + * write lock on the map is held inside; + * read lock on the map is held on successful completion; + * + * note: new iag transaction: + * . synchronously write iag; + * . write log of xtree and inode of imap; + * . commit; + * . synchronous write of xtree (right to left, bottom to top); + * . at start of logredo(): init in-memory imap with one additional iag page; + * . at end of logredo(): re-read imap inode to determine + * new imap size; + */ +static int +diNewIAG(imap_t * imap, int *iagnop, int agno, metapage_t ** mpp) +{ + int rc; + int iagno, i, xlen; + struct inode *ipimap; + struct super_block *sb; + struct jfs_sb_info *sbi; + metapage_t *mp; + iag_t *iagp; + s64 xaddr = 0; + s64 blkno; + int tid = 0; +#ifdef _STILL_TO_PORT + xad_t xad; +#endif /* _STILL_TO_PORT */ + struct inode *iplist[1]; + + /* pick up pointers to the inode map and mount inodes */ + ipimap = imap->im_ipimap; + sb = ipimap->i_sb; + sbi = JFS_SBI(sb); + + /* acquire the free iag lock */ + IAGFREE_LOCK(imap); + + /* if there are any iags on the inode map free iag list, + * allocate the iag from the head of the list. + */ + if (imap->im_freeiag >= 0) { + /* pick up the iag number at the head of the list */ + iagno = imap->im_freeiag; + + /* determine the logical block number of the iag */ + blkno = IAGTOLBLK(iagno, sbi->l2nbperpage); + } else { + /* no free iags. the inode map will have to be extented + * to include a new iag. + */ + + /* acquire inode map lock */ + IWRITE_LOCK(ipimap); + + assert(ipimap->i_size >> L2PSIZE == imap->im_nextiag + 1); + + /* get the next avaliable iag number */ + iagno = imap->im_nextiag; + + /* make sure that we have not exceeded the maximum inode + * number limit. + */ + if (iagno > (MAXIAGS - 1)) { + /* release the inode map lock */ + IWRITE_UNLOCK(ipimap); + + rc = ENOSPC; + goto out; + } + + /* + * synchronously append new iag page. + */ + /* determine the logical address of iag page to append */ + blkno = IAGTOLBLK(iagno, sbi->l2nbperpage); + + /* Allocate extent for new iag page */ + xlen = sbi->nbperpage; + if ((rc = dbAlloc(ipimap, 0, (s64) xlen, &xaddr))) { + /* release the inode map lock */ + IWRITE_UNLOCK(ipimap); + + goto out; + } + + /* assign a buffer for the page */ + mp = get_metapage(ipimap, xaddr, PSIZE, 1); + //bp = bmAssign(ipimap, blkno, xaddr, PSIZE, bmREAD_PAGE); + if (!mp) { + /* Free the blocks allocated for the iag since it was + * not successfully added to the inode map + */ + dbFree(ipimap, xaddr, (s64) xlen); + + /* release the inode map lock */ + IWRITE_UNLOCK(ipimap); + + rc = EIO; + goto out; + } + iagp = (iag_t *) mp->data; + + /* init the iag */ + memset(iagp, 0, sizeof(iag_t)); + iagp->iagnum = cpu_to_le32(iagno); + iagp->inofreefwd = iagp->inofreeback = -1; + iagp->extfreefwd = iagp->extfreeback = -1; + iagp->iagfree = -1; + iagp->nfreeinos = 0; + iagp->nfreeexts = cpu_to_le32(EXTSPERIAG); + + /* initialize the free inode summary map (free extent + * summary map initialization handled by bzero). + */ + for (i = 0; i < SMAPSZ; i++) + iagp->inosmap[i] = ONES; + + flush_metapage(mp); +#ifdef _STILL_TO_PORT + /* synchronously write the iag page */ + if (bmWrite(bp)) { + /* Free the blocks allocated for the iag since it was + * not successfully added to the inode map + */ + dbFree(ipimap, xaddr, (s64) xlen); + + /* release the inode map lock */ + IWRITE_UNLOCK(ipimap); + + rc = EIO; + goto out; + } + + /* Now the iag is on disk */ + + /* + * start tyransaction of update of the inode map + * addressing structure pointing to the new iag page; + */ +#endif /* _STILL_TO_PORT */ + txBegin(sb, &tid, COMMIT_FORCE); + + /* update the inode map addressing structure to point to it */ + if ((rc = + xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) { + /* Free the blocks allocated for the iag since it was + * not successfully added to the inode map + */ + dbFree(ipimap, xaddr, (s64) xlen); + + /* release the inode map lock */ + IWRITE_UNLOCK(ipimap); + + goto out; + } + + /* update the inode map's inode to reflect the extension */ + ipimap->i_size += PSIZE; + ipimap->i_blocks += LBLK2PBLK(sb, xlen); + + /* + * txCommit(COMMIT_FORCE) will synchronously write address + * index pages and inode after commit in careful update order + * of address index pages (right to left, bottom up); + */ + iplist[0] = ipimap; + rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE); + + txEnd(tid); + + duplicateIXtree(sb, blkno, xlen, &xaddr); + + /* update the next avaliable iag number */ + imap->im_nextiag += 1; + + /* Add the iag to the iag free list so we don't lose the iag + * if a failure happens now. + */ + imap->im_freeiag = iagno; + + /* Until we have logredo working, we want the imap inode & + * control page to be up to date. + */ + diSync(ipimap); + + /* release the inode map lock */ + IWRITE_UNLOCK(ipimap); + } + + /* obtain read lock on map */ + IREAD_LOCK(ipimap); + + /* read the iag */ + if ((rc = diIAGRead(imap, iagno, &mp))) { + IREAD_UNLOCK(ipimap); + rc = EIO; + goto out; + } + iagp = (iag_t *) mp->data; + + /* remove the iag from the iag free list */ + imap->im_freeiag = le32_to_cpu(iagp->iagfree); + iagp->iagfree = -1; + + /* set the return iag number and buffer pointer */ + *iagnop = iagno; + *mpp = mp; + + out: + /* release the iag free lock */ + IAGFREE_UNLOCK(imap); + + return (rc); +} + +/* + * NAME: diIAGRead() + * + * FUNCTION: get the buffer for the specified iag within a fileset + * or aggregate inode map. + * + * PARAMETERS: + * imap - pointer to inode map control structure. + * iagno - iag number. + * bpp - point to buffer pointer to be filled in on successful + * exit. + * + * SERIALIZATION: + * must have read lock on imap inode + * (When called by diExtendFS, the filesystem is quiesced, therefore + * the read lock is unnecessary.) + * + * RETURN VALUES: + * 0 - success. + * EIO - i/o error. + */ +static int diIAGRead(imap_t * imap, int iagno, metapage_t ** mpp) +{ + struct inode *ipimap = imap->im_ipimap; + s64 blkno; + + /* compute the logical block number of the iag. */ + blkno = IAGTOLBLK(iagno, JFS_SBI(ipimap->i_sb)->l2nbperpage); + + /* read the iag. */ + *mpp = read_metapage(ipimap, blkno, PSIZE, 0); + if (*mpp == NULL) { + return (EIO); + } + + return (0); +} + +/* + * NAME: diFindFree() + * + * FUNCTION: find the first free bit in a word starting at + * the specified bit position. + * + * PARAMETERS: + * word - word to be examined. + * start - starting bit position. + * + * RETURN VALUES: + * bit position of first free bit in the word or 32 if + * no free bits were found. + */ +static int diFindFree(u32 word, int start) +{ + int bitno; + assert(start < 32); + /* scan the word for the first free bit. */ + for (word <<= start, bitno = start; bitno < 32; + bitno++, word <<= 1) { + if ((word & HIGHORDER) == 0) + break; + } + return (bitno); +} + +/* + * NAME: diUpdatePMap() + * + * FUNCTION: Update the persistent map in an IAG for the allocation or + * freeing of the specified inode. + * + * PRE CONDITIONS: Working map has already been updated for allocate. + * + * PARAMETERS: + * ipimap - Incore inode map inode + * inum - Number of inode to mark in permanent map + * is_free - If TRUE indicates inode should be marked freed, otherwise + * indicates inode should be marked allocated. + * + * RETURNS: 0 for success + */ +int +diUpdatePMap(struct inode *ipimap, + unsigned long inum, boolean_t is_free, tblock_t * tblk) +{ + int rc; + iag_t *iagp; + metapage_t *mp; + int iagno, ino, extno, bitno; + imap_t *imap; + u32 mask; + log_t *log; + int lsn, difft, diffp; + + imap = JFS_IP(ipimap)->i_imap; + /* get the iag number containing the inode */ + iagno = INOTOIAG(inum); + /* make sure that the iag is contained within the map */ + assert(iagno < imap->im_nextiag); + /* read the iag */ + IREAD_LOCK(ipimap); + rc = diIAGRead(imap, iagno, &mp); + IREAD_UNLOCK(ipimap); + if (rc) + return (rc); + iagp = (iag_t *) mp->data; + /* get the inode number and extent number of the inode within + * the iag and the inode number within the extent. + */ + ino = inum & (INOSPERIAG - 1); + extno = ino >> L2INOSPEREXT; + bitno = ino & (INOSPEREXT - 1); + mask = HIGHORDER >> bitno; + /* + * mark the inode free in persistent map: + */ + if (is_free == TRUE) { + /* The inode should have been allocated both in working + * map and in persistent map; + * the inode will be freed from working map at the release + * of last reference release; + */ +// assert(le32_to_cpu(iagp->wmap[extno]) & mask); + if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { + jERROR(1, + ("diUpdatePMap: inode %ld not marked as allocated in wmap!\n", + inum)); + updateSuper(ipimap->i_sb, FM_DIRTY); + } +// assert(le32_to_cpu(iagp->pmap[extno]) & mask); + if (!(le32_to_cpu(iagp->pmap[extno]) & mask)) { + jERROR(1, + ("diUpdatePMap: inode %ld not marked as allocated in pmap!\n", + inum)); + updateSuper(ipimap->i_sb, FM_DIRTY); + } + /* update the bitmap for the extent of the freed inode */ + iagp->pmap[extno] &= cpu_to_le32(~mask); + } + /* + * mark the inode allocated in persistent map: + */ + else { + /* The inode should be already allocated in the working map + * and should be free in persistent map; + */ + assert(le32_to_cpu(iagp->wmap[extno]) & mask); + assert((le32_to_cpu(iagp->pmap[extno]) & mask) == 0); + /* update the bitmap for the extent of the allocated inode */ + iagp->pmap[extno] |= cpu_to_le32(mask); + } + /* + * update iag lsn + */ + lsn = tblk->lsn; + log = JFS_SBI(tblk->sb)->log; + if (mp->lsn != 0) { + /* inherit older/smaller lsn */ + logdiff(difft, lsn, log); + logdiff(diffp, mp->lsn, log); + if (difft < diffp) { + mp->lsn = lsn; + /* move mp after tblock in logsync list */ + LOGSYNC_LOCK(log); + list_del(&mp->synclist); + list_add(&mp->synclist, &tblk->synclist); + LOGSYNC_UNLOCK(log); + } + /* inherit younger/larger clsn */ + LOGSYNC_LOCK(log); + assert(mp->clsn); + logdiff(difft, tblk->clsn, log); + logdiff(diffp, mp->clsn, log); + if (difft > diffp) + mp->clsn = tblk->clsn; + LOGSYNC_UNLOCK(log); + } else { + mp->log = log; + mp->lsn = lsn; + /* insert mp after tblock in logsync list */ + LOGSYNC_LOCK(log); + log->count++; + list_add(&mp->synclist, &tblk->synclist); + mp->clsn = tblk->clsn; + LOGSYNC_UNLOCK(log); + } +// bmLazyWrite(mp, log->flag & JFS_COMMIT); + write_metapage(mp); + return (0); +} + +/* + * diExtendFS() + * + * function: update imap for extendfs(); + * + * note: AG size has been increased s.t. each k old contiguous AGs are + * coalesced into a new AG; + */ +int diExtendFS(struct inode *ipimap, struct inode *ipbmap) +{ + int rc, rcx = 0; + imap_t *imap = JFS_IP(ipimap)->i_imap; + iag_t *iagp = 0, *hiagp = 0; + bmap_t *mp = JFS_SBI(ipbmap->i_sb)->bmap; + metapage_t *bp, *hbp; + int i, n, head; + int numinos, xnuminos = 0, xnumfree = 0; + s64 agstart; + + jEVENT(0, ("diExtendFS: nextiag:%d numinos:%d numfree:%d\n", + imap->im_nextiag, atomic_read(&imap->im_numinos), + atomic_read(&imap->im_numfree))); + + /* + * reconstruct imap + * + * coalesce contiguous k (newAGSize/oldAGSize) AGs; + * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn; + * note: new AG size = old AG size * (2**x). + */ + + /* init per AG control information im_agctl[] */ + for (i = 0; i < MAXAG; i++) { + imap->im_agctl[i].inofree = -1; /* free inode list */ + imap->im_agctl[i].extfree = -1; /* free extent list */ + imap->im_agctl[i].numinos = 0; /* number of backed inodes */ + imap->im_agctl[i].numfree = 0; /* number of free backed inodes */ + } + + /* + * process each iag_t page of the map. + * + * rebuild AG Free Inode List, AG Free Inode Extent List; + */ + for (i = 0; i < imap->im_nextiag; i++) { + if ((rc = diIAGRead(imap, i, &bp))) { + rcx = rc; + continue; + } + iagp = (iag_t *) bp->data; + assert(le32_to_cpu(iagp->iagnum) == i); + + /* leave free iag in the free iag list */ + if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { + release_metapage(bp); + continue; + } + + /* agstart that computes to the same ag is treated as same; */ + agstart = le64_to_cpu(iagp->agstart); + /* iagp->agstart = agstart & ~(mp->db_agsize - 1); */ + n = agstart >> mp->db_agl2size; +/* +printf("diExtendFS: iag:%d agstart:%Ld agno:%d\n", i, agstart, n); +*/ + + /* compute backed inodes */ + numinos = (EXTSPERIAG - le32_to_cpu(iagp->nfreeexts)) + << L2INOSPEREXT; + if (numinos > 0) { + /* merge AG backed inodes */ + imap->im_agctl[n].numinos += numinos; + xnuminos += numinos; + } + + /* if any backed free inodes, insert at AG free inode list */ + if ((int) le32_to_cpu(iagp->nfreeinos) > 0) { + if ((head = imap->im_agctl[n].inofree) == -1) + iagp->inofreefwd = iagp->inofreeback = -1; + else { + if ((rc = diIAGRead(imap, head, &hbp))) { + rcx = rc; + goto nextiag; + } + hiagp = (iag_t *) hbp->data; + hiagp->inofreeback = + le32_to_cpu(iagp->iagnum); + iagp->inofreefwd = cpu_to_le32(head); + iagp->inofreeback = -1; + write_metapage(hbp); + } + + imap->im_agctl[n].inofree = + le32_to_cpu(iagp->iagnum); + + /* merge AG backed free inodes */ + imap->im_agctl[n].numfree += + le32_to_cpu(iagp->nfreeinos); + xnumfree += le32_to_cpu(iagp->nfreeinos); + } + + /* if any free extents, insert at AG free extent list */ + if (le32_to_cpu(iagp->nfreeexts) > 0) { + if ((head = imap->im_agctl[n].extfree) == -1) + iagp->extfreefwd = iagp->extfreeback = -1; + else { + if ((rc = diIAGRead(imap, head, &hbp))) { + rcx = rc; + goto nextiag; + } + hiagp = (iag_t *) hbp->data; + hiagp->extfreeback = iagp->iagnum; + iagp->extfreefwd = cpu_to_le32(head); + iagp->extfreeback = -1; + write_metapage(hbp); + } + + imap->im_agctl[n].extfree = + le32_to_cpu(iagp->iagnum); + } + + nextiag: + write_metapage(bp); + } + + ASSERT(xnuminos == atomic_read(&imap->im_numinos) && + xnumfree == atomic_read(&imap->im_numfree)); + + return rcx; +} + + +/* + * duplicateIXtree() + * + * serialization: IWRITE_LOCK held on entry/exit + * + * note: shadow page with regular inode (rel.2); + */ +static void +duplicateIXtree(struct super_block *sb, s64 blkno, int xlen, s64 * xaddr) +{ + int rc; + int tid; + struct inode *ip; + metapage_t *mpsuper; + struct jfs_superblock *j_sb; + + /* if AIT2 ipmap2 is bad, do not try to update it */ + if (JFS_SBI(sb)->mntflag & JFS_BAD_SAIT) /* s_flag */ + return; + ip = diReadSpecial(sb, FILESYSTEM_I + INOSPEREXT); + if (ip == 0) { + JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT; + if ((rc = readSuper(sb, &mpsuper))) + return; + j_sb = (struct jfs_superblock *) (mpsuper->data); + j_sb->s_flag |= JFS_BAD_SAIT; + write_metapage(mpsuper); + return; + } + + /* start transaction */ + txBegin(sb, &tid, COMMIT_FORCE); + /* update the inode map addressing structure to point to it */ + if ((rc = xtInsert(tid, ip, 0, blkno, xlen, xaddr, 0))) { + JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT; + txAbort(tid, 1); + goto cleanup; + + } + /* update the inode map's inode to reflect the extension */ + ip->i_size += PSIZE; + ip->i_blocks += LBLK2PBLK(sb, xlen); + rc = txCommit(tid, 1, &ip, COMMIT_FORCE); + cleanup: + txEnd(tid); + diFreeSpecial(ip); +} + +/* + * NAME: copy_from_dinode() + * + * FUNCTION: Copies inode info from disk inode to in-memory inode + * + * RETURN VALUES: + * 0 - success + * ENOMEM - insufficient memory + */ +static int copy_from_dinode(dinode_t * dip, struct inode *ip) +{ + struct jfs_inode_info *jfs_ip = JFS_IP(ip); + + jfs_ip->fileset = le32_to_cpu(dip->di_fileset); + jfs_ip->mode2 = le32_to_cpu(dip->di_mode); + + ip->i_mode = le32_to_cpu(dip->di_mode) & 0xffff; + ip->i_nlink = le32_to_cpu(dip->di_nlink); + ip->i_uid = le32_to_cpu(dip->di_uid); + ip->i_gid = le32_to_cpu(dip->di_gid); + ip->i_size = le64_to_cpu(dip->di_size); + ip->i_atime = le32_to_cpu(dip->di_atime.tv_sec); + ip->i_mtime = le32_to_cpu(dip->di_mtime.tv_sec); + ip->i_ctime = le32_to_cpu(dip->di_ctime.tv_sec); + ip->i_blksize = ip->i_sb->s_blocksize; + ip->i_blocks = LBLK2PBLK(ip->i_sb, le64_to_cpu(dip->di_nblocks)); + ip->i_version = ++event; + ip->i_generation = le32_to_cpu(dip->di_gen); + + jfs_ip->ixpxd = dip->di_ixpxd; /* in-memory pxd's are little-endian */ + jfs_ip->acl = dip->di_acl; /* as are dxd's */ + jfs_ip->ea = dip->di_ea; + jfs_ip->next_index = le32_to_cpu(dip->di_next_index); + if (!S_ISFIFO(ip->i_mode)) { + jfs_ip->otime = le32_to_cpu(dip->di_otime.tv_sec); + jfs_ip->acltype = le32_to_cpu(dip->di_acltype); + } + /* + * We may only need to do this for "special" inodes (dmap, imap) + */ + RDWRLOCK_INIT(&jfs_ip->rdwrlock); + + if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode)) + ip->i_rdev = le32_to_cpu(dip->di_rdev); + else if (S_ISDIR(ip->i_mode)) { + memcpy(&jfs_ip->i_dirtable, &dip->di_dirtable, 384); + } else if (!S_ISFIFO(ip->i_mode)) { + memcpy(&jfs_ip->i_xtroot, &dip->di_xtroot, 288); + } + return (0); +} + +/* + * NAME: copy_to_dinode() + * + * FUNCTION: Copies inode info from in-memory inode to disk inode + */ +void copy_to_dinode(dinode_t * dip, struct inode *ip) +{ + struct jfs_inode_info *jfs_ip = JFS_IP(ip); + + dip->di_fileset = cpu_to_le32(jfs_ip->fileset); + dip->di_inostamp = cpu_to_le32(JFS_SBI(ip->i_sb)->inostamp); + dip->di_number = cpu_to_le32(ip->i_ino); + dip->di_gen = cpu_to_le32(ip->i_generation); + dip->di_size = cpu_to_le64(ip->i_size); + dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks)); + dip->di_nlink = cpu_to_le32(ip->i_nlink); + dip->di_uid = cpu_to_le32(ip->i_uid); + dip->di_gid = cpu_to_le32(ip->i_gid); + /* + * mode2 is only needed for storing the higher order bits. + * Trust i_mode for the lower order ones + */ + dip->di_mode = cpu_to_le32((jfs_ip->mode2 & 0xffff0000) | ip->i_mode); + dip->di_atime.tv_sec = cpu_to_le32(ip->i_atime); + dip->di_atime.tv_nsec = 0; + dip->di_ctime.tv_sec = cpu_to_le32(ip->i_ctime); + dip->di_ctime.tv_nsec = 0; + dip->di_mtime.tv_sec = cpu_to_le32(ip->i_mtime); + dip->di_mtime.tv_nsec = 0; + dip->di_ixpxd = jfs_ip->ixpxd; /* in-memory pxd's are little-endian */ + dip->di_acl = jfs_ip->acl; /* as are dxd's */ + dip->di_ea = jfs_ip->ea; + dip->di_next_index = cpu_to_le32(jfs_ip->next_index); + if (!S_ISFIFO(ip->i_mode)) { + dip->di_otime.tv_sec = cpu_to_le32(jfs_ip->otime); + dip->di_otime.tv_nsec = 0; + dip->di_acltype = cpu_to_le32(jfs_ip->acltype); + } + + if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode)) + dip->di_rdev = cpu_to_le32(ip->i_rdev); +} + +void diClearExtension(struct inode *ip) +{ + struct jfs_inode_info *jfs_ip = JFS_IP(ip); + + jFYI(1, ("diClearExtension called ip = 0x%p\n", ip)); + + if (jfs_ip->atlhead) { + jERROR(1, + ("diClearExtension: inode 0x%p has anonymous tlocks\n", + ip)); + } + + kmem_cache_free(jfs_inode_cachep, jfs_ip); + ip->u.generic_ip = 0; +} + +#ifdef _JFS_DEBUG_IMAP +/* + * DBGdiInit() + */ +static void *DBGdiInit(imap_t * imap) +{ + u32 *dimap; + int size; + size = 64 * 1024; + if ((dimap = (u32 *) xmalloc(size, L2PSIZE, kernel_heap)) == NULL) + assert(0); + bzero((void *) dimap, size); + imap->im_DBGdimap = dimap; +} + +/* + * DBGdiAlloc() + */ +static void DBGdiAlloc(imap_t * imap, ino_t ino) +{ + u32 *dimap = imap->im_DBGdimap; + int w, b; + u32 m; + w = ino >> 5; + b = ino & 31; + m = 0x80000000 >> b; + assert(w < 64 * 256); + if (dimap[w] & m) { + printk("DEBUG diAlloc: duplicate alloc ino:0x%x\n", ino); + } + dimap[w] |= m; +} + +/* + * DBGdiFree() + */ +static void DBGdiFree(imap_t * imap, ino_t ino) +{ + u32 *dimap = imap->im_DBGdimap; + int w, b; + u32 m; + w = ino >> 5; + b = ino & 31; + m = 0x80000000 >> b; + assert(w < 64 * 256); + if ((dimap[w] & m) == 0) { + printk("DEBUG diFree: duplicate free ino:0x%x\n", ino); + } + dimap[w] &= ~m; +} + +static void dump_cp(imap_t * ipimap, char *function, int line) +{ + printk("\n* ********* *\nControl Page %s %d\n", function, line); + printk("FreeIAG %d\tNextIAG %d\n", ipimap->im_freeiag, + ipimap->im_nextiag); + printk("NumInos %d\tNumFree %d\n", + atomic_read(&ipimap->im_numinos), + atomic_read(&ipimap->im_numfree)); + printk("AG InoFree %d\tAG ExtFree %d\n", + ipimap->im_agctl[0].inofree, ipimap->im_agctl[0].extfree); + printk("AG NumInos %d\tAG NumFree %d\n", + ipimap->im_agctl[0].numinos, ipimap->im_agctl[0].numfree); +} + +static void dump_iag(iag_t * iag, char *function, int line) +{ + printk("\n* ********* *\nIAG %s %d\n", function, line); + printk("IagNum %d\tIAG Free %d\n", le32_to_cpu(iag->iagnum), + le32_to_cpu(iag->iagfree)); + printk("InoFreeFwd %d\tInoFreeBack %d\n", + le32_to_cpu(iag->inofreefwd), + le32_to_cpu(iag->inofreeback)); + printk("ExtFreeFwd %d\tExtFreeBack %d\n", + le32_to_cpu(iag->extfreefwd), + le32_to_cpu(iag->extfreeback)); + printk("NFreeInos %d\tNFreeExts %d\n", le32_to_cpu(iag->nfreeinos), + le32_to_cpu(iag->nfreeexts)); +} +#endif /* _JFS_DEBUG_IMAP */ diff -uNr --exclude=CVS ../kernel.org/linux/fs/jfs/jfs_inode.c linuxppc64_2_4/fs/jfs/jfs_inode.c --- ../kernel.org/linux/fs/jfs/jfs_inode.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/fs/jfs/jfs_inode.c Wed Nov 14 10:19:36 2001 @@ -0,0 +1,142 @@ +/* + * + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include + +extern kmem_cache_t *jfs_inode_cachep; + +/* + * NAME: ialloc() + * + * FUNCTION: Allocate a new inode + * + */ +struct inode *ialloc(struct inode *parent, umode_t mode) +{ + struct super_block *sb = parent->i_sb; + struct inode *inode; + struct jfs_inode_info *jfs_inode; + int rc; + + inode = new_inode(sb); + if (!inode) { + jERROR(1, ("ialloc: new_inode returned NULL!\n")); + return inode; + } + + jfs_inode = kmem_cache_alloc(jfs_inode_cachep, GFP_NOFS); + JFS_IP(inode) = jfs_inode; + if (!jfs_inode) { + inode->i_nlink = 0; + inode->i_sb = 0; + iput(inode); + return NULL; + } + memset(jfs_inode, 0, sizeof(struct jfs_inode_info)); + + rc = diAlloc(parent, S_ISDIR(mode), inode); + if (rc) { + jERROR(1, ("ialloc: diAlloc returned %d!\n", rc)); + kmem_cache_free(jfs_inode_cachep, jfs_inode); + inode->i_sb = 0; + inode->i_nlink = 0; + iput(inode); + return NULL; + } + + inode->i_uid = current->fsuid; + if (parent->i_mode & S_ISGID) { + inode->i_gid = parent->i_gid; + if (S_ISDIR(mode)) + mode |= S_ISGID; + } else + inode->i_gid = current->fsgid; + + inode->i_mode = mode; + if (S_ISDIR(mode)) + jfs_inode->mode2 = IDIRECTORY | mode; + else + jfs_inode->mode2 = INLINEEA | ISPARSE | mode; + inode->i_blksize = sb->s_blocksize; + inode->i_blocks = 0; + inode->i_mtime = inode->i_atime = inode->i_ctime = + CURRENT_TIME; + /* + * otime is an OS/2 thing. In 2.2 kernels, we can't afford + * the space in a FIFO inode, because the same space is used used + * by the pipe code. OS/2 won't recognize a pipe anyway. + */ + if (!S_ISFIFO(mode)) + jfs_inode->otime = inode->i_ctime; + inode->i_version = ++event; + inode->i_generation = JFS_SBI(sb)->gengen++; + + set_cflag(COMMIT_New, inode); + + RDWRLOCK_INIT(&jfs_inode->rdwrlock); + + insert_inode_hash(inode); + + jFYI(1, ("ialloc returns inode = 0x%p\n", inode)); + + return inode; +} + +/* + * NAME: iwritelocklist() + * + * FUNCTION: Lock multiple inodes in sorted order to avoid deadlock + * + */ +void iwritelocklist(int n, ...) +{ + va_list ilist; + struct inode *sort[4]; + struct inode *ip; + int k, m; + + va_start(ilist, n); + for (k = 0; k < n; k++) + sort[k] = va_arg(ilist, struct inode *); + va_end(ilist); + + /* Bubble sort in descending order */ + do { + m = 0; + for (k = 0; k < n; k++) + if ((k + 1) < n + && sort[k + 1]->i_ino > sort[k]->i_ino) { + ip = sort[k]; + sort[k] = sort[k + 1]; + sort[k + 1] = ip; + m++; + } + } while (m); + + /* Lock them */ + for (k = 0; k < n; k++) { + IWRITE_LOCK(sort[k]); + } +} diff -uNr --exclude=CVS ../kernel.org/linux/fs/jfs/jfs_logmgr.c linuxppc64_2_4/fs/jfs/jfs_logmgr.c --- ../kernel.org/linux/fs/jfs/jfs_logmgr.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/fs/jfs/jfs_logmgr.c Wed Nov 14 10:19:36 2001 @@ -0,0 +1,2540 @@ +/* + * + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * +*/ + +/* + * jfs_logmgr.c: log manager + * + * for related information, see transaction manager (jfs_txnmgr.c), and + * recovery manager (jfs_logredo.c). + * + * note: for detail, RTFS. + * + * log buffer manager: + * special purpose buffer manager supporting log i/o requirements. + * per log serial pageout of logpage + * queuing i/o requests and redrive i/o at iodone + * maintain current logpage buffer + * no caching since append only + * appropriate jfs buffer cache buffers as needed + * + * group commit: + * transactions which wrote COMMIT records in the same in-memory + * log page during the pageout of previous/current log page(s) are + * committed together by the pageout of the page. + * + * TBD lazy commit: + * transactions are committed asynchronously when the log page + * containing it COMMIT is paged out when it becomes full; + * + * serialization: + * . a per log lock serialize log write. + * . a per log lock serialize group commit. + * . a per log lock serialize log open/close; + * + * TBD log integrity: + * careful-write (ping-pong) of last logpage to recover from crash + * in overwrite. + * detection of split (out-of-order) write of physical sectors + * of last logpage via timestamp at end of each sector + * with its mirror data array at trailer). + * + * alternatives: + * lsn - 64-bit monotonically increasing integer vs + * 32-bit lspn and page eor. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* + * log manager + */ +struct lCache { + int nLog; /* number of active log */ + unsigned int writes_submitted; + unsigned int writes_completed; +} lCache; + +/* + * lbuf's ready to be redriven. Protected by async_lock (jfsIOtask) + */ +lbuf_t *log_redrive_list; + +extern spinlock_t async_lock; + + +/* + * log read/write serialization (per log) + */ +#define LOG_LOCK_INIT(log) init_MUTEX(&(log)->loglock) +#define LOG_LOCK(log) down(&((log)->loglock)) +#define LOG_UNLOCK(log) up(&((log)->loglock)) + + +/* + * log group commit serialization (per log) + */ + +#define LOGGC_LOCK_INIT(log) spin_lock_init(&(log)->gclock) +#define LOGGC_LOCK(log) spin_lock_irq(&(log)->gclock) +#define LOGGC_UNLOCK(log) spin_unlock_irq(&(log)->gclock) +#define LOGGC_WAKEUP(tblk) wake_up(&(tblk)->gcwait) + +/* + * log sync serialization (per log) + */ +#define LOGSYNC_DELTA(logsize) min((logsize)/8, 128*LOGPSIZE) +#define LOGSYNC_BARRIER(logsize) ((logsize)/4) +/* +#define LOGSYNC_DELTA(logsize) min((logsize)/4, 256*LOGPSIZE) +#define LOGSYNC_BARRIER(logsize) ((logsize)/2) +*/ + + +/* + * log buffer cache synchronization + */ +static spinlock_t jfsLCacheLock; + +#define LCACHE_LOCK_INIT() spin_lock_init(&jfsLCacheLock) +#define LCACHE_LOCK(flags) spin_lock_irqsave(&jfsLCacheLock, flags) +#define LCACHE_UNLOCK(flags) spin_unlock_irqrestore(&jfsLCacheLock, flags) + +/* + * See __SLEEP_COND in jfs_locks.h + */ +#define LCACHE_SLEEP_COND(wq, cond, flags) \ +do { \ + if (cond) \ + break; \ + __SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \ +} while (0) + +#define LCACHE_WAKEUP(event) wake_up(event) + + +/* + * lbuf buffer cache (lCache) control + */ +/* log buffer manager pageout control (cumulative, inclusive) */ +#define lbmREAD 0x0001 +#define lbmWRITE 0x0002 /* enqueue at tail of write queue; + * init pageout if at head of queue; + */ +#define lbmRELEASE 0x0004 /* remove from write queue + * at completion of pageout; + * do not free/recycle it yet: + * caller will free it; + */ +#define lbmSYNC 0x0008 /* do not return to freelist + * when removed from write queue; + */ +#define lbmFREE 0x0010 /* return to freelist + * at completion of pageout; + * the buffer may be recycled; + */ +#define lbmDONE 0x0020 +#define lbmERROR 0x0040 +#define lbmGC 0x0080 /* lbmIODone to perform post-GC processing + * of log page + */ +#define lbmDIRECT 0x0100 + +/* + * external references + */ +extern void vPut(struct inode *ip); +extern void txLazyUnlock(tblock_t * tblk); +extern struct task_struct *jfsIOtask; + +/* + * forward references + */ +static int lmWriteRecord(log_t * log, tblock_t * tblk, lrd_t * lrd, + tlock_t * tlck); + +static int lmNextPage(log_t * log); +static int lmLogInit(log_t * log); +static int lmLogShutdown(log_t * log); + +static int lbmLogInit(log_t * log); +static void lbmLogShutdown(log_t * log); +static lbuf_t *lbmAllocate(log_t * log, int); +static void lbmFree(lbuf_t * bp); +static void lbmfree(lbuf_t * bp); +static int lbmRead(log_t * log, int pn, lbuf_t ** bpp); +static void lbmWrite(log_t * log, lbuf_t * bp, int flag, int cant_block); +static void lbmDirectWrite(log_t * log, lbuf_t * bp, int flag); +static int lbmIOWait(lbuf_t * bp, int flag); +static void lbmIODone(struct buffer_head *bh, int); +#ifdef _STILL_TO_PORT +static void lbmDirectIODone(iobuf_t * ddbp); +#endif /* _STILL_TO_PORT */ +void lbmStartIO(lbuf_t * bp); +void lmGCwrite(log_t * log, int cant_block); + + +#ifdef _JFS_STATISTICS +/* + * statistics + */ +struct statLCache { + uint commit; /* # of commit */ + uint pageinit; /* # of pages written */ + uint pagedone; /* # of page write */ + uint sync; /* # of logsysnc() */ + uint maxbufcnt; /* max # of buffers allocated */ +} statLCache; +#endif /* _JFS_STATISTICS */ + + +/* + * NAME: lmLog() + * + * FUNCTION: write a log record; + * + * PARAMETER: + * + * RETURN: lsn - offset to the next log record to write (end-of-log); + * -1 - error; + * + * note: todo: log error handler + */ +int lmLog(log_t * log, tblock_t * tblk, lrd_t * lrd, tlock_t * tlck) +{ + int lsn; + int diffp, difft; + metapage_t *mp = NULL; + + jFYI(1, ("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p\n", + log, tblk, lrd, tlck)); + + LOG_LOCK(log); + + /* log by (out-of-transaction) JFS ? */ + if (tblk == NULL) + goto writeRecord; + + /* log from page ? */ + if (tlck == NULL || + tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL) + goto writeRecord; + + /* + * initialize/update page/transaction recovery lsn + */ + lsn = log->lsn; + + LOGSYNC_LOCK(log); + + /* + * initialize page lsn if first log write of the page + */ + if (mp->lsn == 0) { + mp->log = log; + mp->lsn = lsn; + log->count++; + + /* insert page at tail of logsynclist */ + list_add_tail(&mp->synclist, &log->synclist); + } + + /* + * initialize/update lsn of tblock of the page + * + * transaction inherits oldest lsn of pages associated + * with allocation/deallocation of resources (their + * log records are used to reconstruct allocation map + * at recovery time: inode for inode allocation map, + * B+-tree index of extent descriptors for block + * allocation map); + * allocation map pages inherit transaction lsn at + * commit time to allow forwarding log syncpt past log + * records associated with allocation/deallocation of + * resources only after persistent map of these map pages + * have been updated and propagated to home. + */ + /* + * initialize transaction lsn: + */ + if (tblk->lsn == 0) { + /* inherit lsn of its first page logged */ + tblk->lsn = mp->lsn; + log->count++; + + /* insert tblock after the page on logsynclist */ + list_add(&tblk->synclist, &mp->synclist); + } + /* + * update transaction lsn: + */ + else { + /* inherit oldest/smallest lsn of page */ + logdiff(diffp, mp->lsn, log); + logdiff(difft, tblk->lsn, log); + if (diffp < difft) { + /* update tblock lsn with page lsn */ + tblk->lsn = mp->lsn; + + /* move tblock after page on logsynclist */ + list_del(&tblk->synclist); + list_add(&tblk->synclist, &mp->synclist); + } + } + + LOGSYNC_UNLOCK(log); + + /* + * write the log record + */ + writeRecord: + lsn = lmWriteRecord(log, tblk, lrd, tlck); + + /* + * forward log syncpt if log reached next syncpt trigger + */ + logdiff(diffp, lsn, log); + if (diffp >= log->nextsync) + lsn = lmLogSync(log, 0); + + /* update end-of-log lsn */ + log->lsn = lsn; + + LOG_UNLOCK(log); + + /* return end-of-log address */ + return lsn; +} + + +/* + * NAME: lmWriteRecord() + * + * FUNCTION: move the log record to current log page + * + * PARAMETER: cd - commit descriptor + * + * RETURN: end-of-log address + * + * serialization: LOG_LOCK() held on entry/exit + */ +static int +lmWriteRecord(log_t * log, tblock_t * tblk, lrd_t * lrd, tlock_t * tlck) +{ + int lsn = 0; /* end-of-log address */ + lbuf_t *bp; /* dst log page buffer */ + logpage_t *lp; /* dst log page */ + caddr_t dst; /* destination address in log page */ + int dstoffset; /* end-of-log offset in log page */ + int freespace; /* free space in log page */ + caddr_t p; /* src meta-data page */ + caddr_t src; + int srclen; + int nbytes; /* number of bytes to move */ + int i; + int len; + linelock_t *linelock; + lv_t *lv; + lvd_t *lvd; + int l2linesize; + + len = 0; + + /* retrieve destination log page to write */ + bp = (lbuf_t *) log->bp; + lp = (logpage_t *) bp->l_ldata; + dstoffset = log->eor; + + /* any log data to write ? */ + if (tlck == NULL) + goto moveLrd; + + /* + * move log record data + */ + /* retrieve source meta-data page to log */ + if (tlck->flag & tlckPAGELOCK) { + p = (caddr_t) (tlck->mp->data); + linelock = (linelock_t *) & tlck->lock; + } + /* retrieve source in-memory inode to log */ + else if (tlck->flag & tlckINODELOCK) { + if (tlck->type & tlckDTREE) + p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot; + else + p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot; + linelock = (linelock_t *) & tlck->lock; + } +#ifdef _JFS_WIP + else if (tlck->flag & tlckINLINELOCK) { + + inlinelock = (inlinelock_t *) & tlck; + p = (caddr_t) & inlinelock->pxd; + linelock = (linelock_t *) & tlck; + } +#endif /* _JFS_WIP */ + else { + jERROR(2, ("lmWriteRecord: UFO tlck:0x%p\n", tlck)); + return 0; /* Probably should trap */ + } + l2linesize = linelock->l2linesize; + + moveData: + ASSERT(linelock->index <= linelock->maxcnt); + + lv = (lv_t *) & linelock->lv; + for (i = 0; i < linelock->index; i++, lv++) { + if (lv->length == 0) + continue; + + /* is page full ? */ + if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) { + /* page become full: move on to next page */ + lmNextPage(log); + + bp = log->bp; + lp = (logpage_t *) bp->l_ldata; + dstoffset = LOGPHDRSIZE; + } + + /* + * move log vector data + */ + src = (u8 *) p + (lv->offset << l2linesize); + srclen = lv->length << l2linesize; + len += srclen; + while (srclen > 0) { + freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset; + nbytes = min(freespace, srclen); + dst = (caddr_t) lp + dstoffset; + memcpy(dst, src, nbytes); + dstoffset += nbytes; + + /* is page not full ? */ + if (dstoffset < LOGPSIZE - LOGPTLRSIZE) + break; + + /* page become full: move on to next page */ + lmNextPage(log); + + bp = (lbuf_t *) log->bp; + lp = (logpage_t *) bp->l_ldata; + dstoffset = LOGPHDRSIZE; + + srclen -= nbytes; + src += nbytes; + } + + /* + * move log vector descriptor + */ + len += 4; + lvd = (lvd_t *) ((caddr_t) lp + dstoffset); + lvd->offset = cpu_to_le16(lv->offset); + lvd->length = cpu_to_le16(lv->length); + dstoffset += 4; + jFYI(1, + ("lmWriteRecord: lv offset:%d length:%d\n", + lv->offset, lv->length)); + } + + if ((i = linelock->next)) { + linelock = (linelock_t *) & TxLock[i]; + goto moveData; + } + + /* + * move log record descriptor + */ + moveLrd: + lrd->length = cpu_to_le16(len); + + src = (caddr_t) lrd; + srclen = LOGRDSIZE; + + while (srclen > 0) { + freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset; + nbytes = min(freespace, srclen); + dst = (caddr_t) lp + dstoffset; + memcpy(dst, src, nbytes); + + dstoffset += nbytes; + srclen -= nbytes; + + /* are there more to move than freespace of page ? */ + if (srclen) + goto pageFull; + + /* + * end of log record descriptor + */ + + /* update last log record eor */ + log->eor = dstoffset; + bp->l_eor = dstoffset; + lsn = (log->page << L2LOGPSIZE) + dstoffset; + + if (lrd->type & cpu_to_le16(LOG_COMMIT)) { + tblk->clsn = lsn; + jFYI(1, + ("wr: tclsn:0x%x, beor:0x%x\n", tblk->clsn, + bp->l_eor)); + + INCREMENT(statLCache.commit); /* # of commit */ + + /* + * enqueue tblock for group commit: + * + * enqueue tblock of non-trivial/synchronous COMMIT + * at tail of group commit queue + * (trivial/asynchronous COMMITs are ignored by + * group commit.) + */ + LOGGC_LOCK(log); + + /* init tblock gc state */ + tblk->flag = tblkGC_QUEUE; + tblk->bp = log->bp; + tblk->pn = log->page; + tblk->eor = log->eor; + init_waitqueue_head(&tblk->gcwait); + + /* enqueue transaction to commit queue */ + tblk->cqnext = NULL; + if (log->cqueue.head) { + log->cqueue.tail->cqnext = tblk; + log->cqueue.tail = tblk; + } else + log->cqueue.head = log->cqueue.tail = tblk; + + LOGGC_UNLOCK(log); + } + + jFYI(1, + ("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x\n", + le16_to_cpu(lrd->type), log->bp, log->page, + dstoffset)); + + /* page not full ? */ + if (dstoffset < LOGPSIZE - LOGPTLRSIZE) + return lsn; + + pageFull: + /* page become full: move on to next page */ + lmNextPage(log); + + bp = (lbuf_t *) log->bp; + lp = (logpage_t *) bp->l_ldata; + dstoffset = LOGPHDRSIZE; + src += nbytes; + } + + return lsn; +} + + +/* + * NAME: lmNextPage() + * + * FUNCTION: write current page and allocate next page. + * + * PARAMETER: log + * + * RETURN: 0 + * + * serialization: LOG_LOCK() held on entry/exit + */ +static int lmNextPage(log_t * log) +{ + logpage_t *lp; + int lspn; /* log sequence page number */ + int pn; /* current page number */ + lbuf_t *bp; + lbuf_t *nextbp; + tblock_t *tblk; + + jFYI(1, ("lmNextPage\n")); + + INCREMENT(statLCache.pageinit); /* # of pages written */ + + /* get current log page number and log sequence page number */ + pn = log->page; + bp = log->bp; + lp = (logpage_t *) bp->l_ldata; + lspn = le32_to_cpu(lp->h.page); + + LOGGC_LOCK(log); + + /* + * write or queue the full page at the tail of write queue + */ + /* get the tail tblk on commit queue */ + tblk = log->cqueue.tail; + + /* every tblk who has COMMIT record on the current page, + * and has not been committed, must be on commit queue + * since tblk is queued at commit queueu at the time + * of writing its COMMIT record on the page before + * page becomes full (even though the tblk thread + * who wrote COMMIT record may have been suspended + * currently); + */ + + /* is page bound with outstanding tail tblk ? */ + if (tblk && tblk->pn == pn) { + /* mark tblk for end-of-page */ + tblk->flag |= tblkGC_EOP; + + /* if page is not already on write queue, + * just enqueue (no lbmWRITE to prevent redrive) + * buffer to wqueue to ensure correct serial order + * of the pages since log pages will be added + * continuously (tblk bound with the page hasn't + * got around to init write of the page, either + * preempted or the page got filled by its COMMIT + * record); + * pages with COMMIT are paged out explicitly by + * tblk in lmGroupCommit(); + */ + if (bp->l_wqnext == NULL) { + /* bp->l_ceor = bp->l_eor; */ + /* lp->h.eor = lp->t.eor = bp->l_ceor; */ + lbmWrite(log, bp, 0, 0); + } + } + /* page is not bound with outstanding tblk: + * init write or mark it to be redriven (lbmWRITE) + */ + else { + /* finalize the page */ + bp->l_ceor = bp->l_eor; + lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor); + lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0); + } + LOGGC_UNLOCK(log); + + /* + * allocate/initialize next page + */ + /* if log wraps, the first data page of log is 2 + * (0 never used, 1 is superblock). + */ + log->page = (pn == log->size - 1) ? 2 : pn + 1; + log->eor = LOGPHDRSIZE; /* ? valid page empty/full at logRedo() */ + + /* allocate/initialize next log page buffer */ + nextbp = lbmAllocate(log, log->page); + nextbp->l_eor = log->eor; + log->bp = nextbp; + + /* initialize next log page */ + lp = (logpage_t *) nextbp->l_ldata; + lp->h.page = lp->t.page = cpu_to_le32(lspn + 1); + lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE); + + jFYI(1, ("lmNextPage done\n")); + return 0; +} + + +/* + * NAME: lmGroupCommit() + * + * FUNCTION: group commit + * initiate pageout of the pages with COMMIT in the order of + * page number - redrive pageout of the page at the head of + * pageout queue until full page has been written. + * + * RETURN: + * + * NOTE: + * LOGGC_LOCK serializes log group commit queue, and + * transaction blocks on the commit queue. + * N.B. LOG_LOCK is NOT held during lmGroupCommit(). + */ +int lmGroupCommit(log_t * log, tblock_t * tblk) +{ + int rc = 0; + + LOGGC_LOCK(log); + + /* group committed already ? */ + if (tblk->flag & tblkGC_COMMITTED) { + if (tblk->flag & tblkGC_ERROR) + rc = EIO; + + LOGGC_UNLOCK(log); + return rc; + } + jFYI(1, + ("lmGroup Commit: tblk = 0x%p, gcrtc = %d\n", tblk, + log->gcrtc)); + + /* + * group commit pageout in progress + */ + if ((!(log->cflag & logGC_PAGEOUT)) && log->cqueue.head) { + /* + * only transaction in the commit queue: + * + * start one-transaction group commit as + * its group leader. + */ + log->cflag |= logGC_PAGEOUT; + + lmGCwrite(log, 0); + } + /* lmGCwrite gives up LOGGC_LOCK, check again */ + + if (tblk->flag & tblkGC_COMMITTED) { + if (tblk->flag & tblkGC_ERROR) + rc = EIO; + + LOGGC_UNLOCK(log); + return rc; + } + + /* upcount transaction waiting for completion + */ + log->gcrtc++; + + if (tblk->xflag & COMMIT_LAZY) { + tblk->flag |= tblkGC_LAZY; + LOGGC_UNLOCK(log); + return 0; + } + tblk->flag |= tblkGC_READY; + + __SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED), + LOGGC_LOCK(log), LOGGC_UNLOCK(log)); + + /* removed from commit queue */ + if (tblk->flag & tblkGC_ERROR) + rc = EIO; + + LOGGC_UNLOCK(log); + return rc; +} + +/* + * NAME: lmGCwrite() + * + * FUNCTION: group commit write + * initiate write of log page, building a group of all transactions + * with commit records on that page. + * + * RETURN: None + * + * NOTE: + * LOGGC_LOCK must be held by caller. + * N.B. LOG_LOCK is NOT held during lmGroupCommit(). + */ +void lmGCwrite(log_t * log, int cant_write) +{ + lbuf_t *bp; + logpage_t *lp; + int gcpn; /* group commit page number */ + tblock_t *tblk; + tblock_t *xtblk; + + /* + * build the commit group of a log page + * + * scan commit queue and make a commit group of all + * transactions with COMMIT records on the same log page. + */ + /* get the head tblk on the commit queue */ + tblk = xtblk = log->cqueue.head; + gcpn = tblk->pn; + + while (tblk && tblk->pn == gcpn) { + xtblk = tblk; + + /* state transition: (QUEUE, READY) -> COMMIT */ + tblk->flag |= tblkGC_COMMIT; + tblk = tblk->cqnext; + } + tblk = xtblk; /* last tblk of the page */ + + /* + * pageout to commit transactions on the log page. + */ + bp = (lbuf_t *) tblk->bp; + lp = (logpage_t *) bp->l_ldata; + /* is page already full ? */ + if (tblk->flag & tblkGC_EOP) { + /* mark page to free at end of group commit of the page */ + tblk->flag &= ~tblkGC_EOP; + tblk->flag |= tblkGC_FREE; + bp->l_ceor = bp->l_eor; + lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor); + jEVENT(0, + ("gc: tclsn:0x%x, bceor:0x%x\n", tblk->clsn, + bp->l_ceor)); + lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC, + cant_write); + } + /* page is not yet full */ + else { + bp->l_ceor = tblk->eor; /* ? bp->l_ceor = bp->l_eor; */ + lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor); + jEVENT(0, + ("gc: tclsn:0x%x, bceor:0x%x\n", tblk->clsn, + bp->l_ceor)); + lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write); + } +} + +/* + * NAME: lmPostGC() + * + * FUNCTION: group commit post-processing + * Processes transactions after their commit records have been written + * to disk, redriving log I/O if necessary. + * + * RETURN: None + * + * NOTE: + * This routine is called a interrupt time by lbmIODone + */ +void lmPostGC(lbuf_t * bp) +{ + unsigned long flags; + log_t *log = bp->l_log; + logpage_t *lp; + tblock_t *tblk; + + //LOGGC_LOCK(log); + spin_lock_irqsave(&log->gclock, flags); + /* + * current pageout of group commit completed. + * + * remove/wakeup transactions from commit queue who were + * group committed with the current log page + */ + while ((tblk = log->cqueue.head) && (tblk->flag & tblkGC_COMMIT)) { + /* if transaction was marked GC_COMMIT then + * it has been shipped in the current pageout + * and made it to disk - it is committed. + */ + + if (bp->l_flag & lbmERROR) + tblk->flag |= tblkGC_ERROR; + + /* remove it from the commit queue */ + log->cqueue.head = tblk->cqnext; + if (log->cqueue.head == NULL) + log->cqueue.tail = NULL; + tblk->flag &= ~tblkGC_QUEUE; + tblk->cqnext = 0; + + jEVENT(0, + ("lmPostGC: tblk = 0x%p, flag = 0x%x\n", tblk, + tblk->flag)); + + if (!(tblk->xflag & COMMIT_FORCE)) + /* + * Hand tblk over to lazy commit thread + */ + txLazyUnlock(tblk); + else { + /* state transition: COMMIT -> COMMITTED */ + tblk->flag |= tblkGC_COMMITTED; + + if (tblk->flag & tblkGC_READY) { + log->gcrtc--; + LOGGC_WAKEUP(tblk); + } + } + + /* was page full before pageout ? + * (and this is the last tblk bound with the page) + */ + if (tblk->flag & tblkGC_FREE) + lbmFree(bp); + /* did page become full after pageout ? + * (and this is the last tblk bound with the page) + */ + else if (tblk->flag & tblkGC_EOP) { + /* finalize the page */ + lp = (logpage_t *) bp->l_ldata; + bp->l_ceor = bp->l_eor; + lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor); + jEVENT(0, ("lmPostGC: calling lbmWrite\n")); + lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, + 1); + } + + } + + /* are there any transactions who have entered lnGroupCommit() + * (whose COMMITs are after that of the last log page written. + * They are waiting for new group commit (above at (SLEEP 1)): + * select the latest ready transaction as new group leader and + * wake her up to lead her group. + */ + if ((log->gcrtc > 0) && log->cqueue.head) + /* + * Call lmGCwrite with new group leader + */ + lmGCwrite(log, 1); + + /* no transaction are ready yet (transactions are only just + * queued (GC_QUEUE) and not entered for group commit yet). + * let the first transaction entering group commit + * will elect hetself as new group leader. + */ + else + log->cflag &= ~logGC_PAGEOUT; + + //LOGGC_UNLOCK(log); + spin_unlock_irqrestore(&log->gclock, flags); + return; +} + +/* + * NAME: lmLogSync() + * + * FUNCTION: write log SYNCPT record for specified log + * if new sync address is available + * (normally the case if sync() is executed by back-ground + * process). + * if not, explicitly run jfs_blogsync() to initiate + * getting of new sync address. + * calculate new value of i_nextsync which determines when + * this code is called again. + * + * this is called only from lmLog(). + * + * PARAMETER: ip - pointer to logs inode. + * + * RETURN: 0 + * + * serialization: LOG_LOCK() held on entry/exit + */ +int lmLogSync(log_t * log, int nosyncwait) +{ + int logsize; + int written; /* written since last syncpt */ + int free; /* free space left available */ + int delta; /* additional delta to write normally */ + int more; /* additional write granted */ + lrd_t lrd; + int lsn; + struct logsyncblk *lp; + + /* + * forward syncpt + */ + /* if last sync is same as last syncpt, + * invoke sync point forward processing to update sync. + */ + + if (log->sync == log->syncpt) { + LOGSYNC_LOCK(log); + /* ToDo: push dirty metapages out to disk */ +// bmLogSync(log); + + if (list_empty(&log->synclist)) + log->sync = log->lsn; + else { + lp = list_entry(log->synclist.next, + struct logsyncblk, synclist); + log->sync = lp->lsn; + } + LOGSYNC_UNLOCK(log); + + } + + /* if sync is different from last syncpt, + * write a SYNCPT record with syncpt = sync. + * reset syncpt = sync + */ + if (log->sync != log->syncpt) { + struct jfs_sb_info *sbi = JFS_SBI(log->sb); + /* + * We need to make sure all of the "written" metapages + * actually make it to disk + */ + fsync_inode_data_buffers(sbi->ipbmap); + fsync_inode_data_buffers(sbi->ipimap); + fsync_inode_data_buffers(sbi->direct_inode); + + lrd.logtid = 0; + lrd.backchain = 0; + lrd.type = cpu_to_le16(LOG_SYNCPT); + lrd.length = 0; + lrd.log.syncpt.sync = cpu_to_le32(log->sync); + lsn = lmWriteRecord(log, NULL, &lrd, NULL); + + log->syncpt = log->sync; + } else + lsn = log->lsn; + + /* + * setup next syncpt trigger (SWAG) + */ + logsize = log->logsize; + + logdiff(written, lsn, log); + free = logsize - written; + delta = LOGSYNC_DELTA(logsize); + more = min(free / 2, delta); + if (more < 2 * LOGPSIZE) { + jEVENT(1, + ("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n\n")); + /* + * log wrapping + * + * option 1 - panic ? No.! + * option 2 - shutdown file systems + * associated with log ? + * option 3 - extend log ? + */ + /* + * option 4 - second chance + * + * mark log wrapped, and continue. + * when all active transactions are completed, + * mark log vaild for recovery. + * if crashed during invalid state, log state + * implies invald log, forcing fsck(). + */ + /* mark log state log wrap in log superblock */ + /* log->state = LOGWRAP; */ + + /* reset sync point computation */ + log->syncpt = log->sync = lsn; + log->nextsync = delta; + } else + /* next syncpt trigger = written + more */ + log->nextsync = written + more; + + /* return if lmLogSync() from outside of transaction, e.g., sync() */ + if (nosyncwait) + return lsn; + + /* if number of bytes written from last sync point is more + * than 1/4 of the log size, stop new transactions from + * starting until all current transactions are completed + * by setting syncbarrier flag. + */ + if (written > LOGSYNC_BARRIER(logsize) && logsize > 32 * LOGPSIZE) { + log->syncbarrier = 1; + jFYI(1, ("log barrier on: lsn=0x%x syncpt=0x%x\n", lsn, + log->syncpt)); + } + + return lsn; +} + + +/* + * NAME: lmLogOpen() + * + * FUNCTION: open the log on first open; + * insert filesystem in the active list of the log. + * + * PARAMETER: ipmnt - file system mount inode + * iplog - log inode (out) + * + * RETURN: + * + * serialization: + */ +int lmLogOpen(struct super_block *sb, log_t ** logptr) +{ + int rc; + kdev_t logdev; /* dev_t of log device */ + log_t *log; + + logdev = sb->s_dev; + +#ifdef _STILL_TO_PORT + /* + * open the inode representing the log device (aka log inode) + */ + if (logdev != fsdev) + goto externalLog; +#endif /* _STILL_TO_PORT */ + + /* + * in-line log in host file system + * + * file system to log have 1-to-1 relationship; + */ +// inlineLog: + + *logptr = log = kmalloc(sizeof(log_t), GFP_KERNEL); + if (log == 0) + return ENOMEM; + + memset(log, 0, sizeof(log_t)); + log->sb = sb; /* This should be a list */ + log->flag = JFS_INLINELOG; + log->dev = logdev; + log->base = addressPXD(&JFS_SBI(sb)->logpxd); + log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >> + (L2LOGPSIZE - sb->s_blocksize_bits); + log->l2bsize = sb->s_blocksize_bits; + ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits); + /* + * initialize log. + */ + if ((rc = lmLogInit(log))) + goto errout10; + +#ifdef _STILL_TO_PORT + goto out; + + /* + * external log as separate logical volume + * + * file systems to log may have n-to-1 relationship; + */ + externalLog: + /* + * open log inode + * + * log inode is reserved inode of (dev_t = log device, + * fileset number = 0, i_number = 0), which acquire + * one i_count for each open by file system. + * + * hand craft dummy vfs to force iget() the special case of + * an in-memory inode allocation without on-disk inode + */ + memset(&dummyvfs, 0, sizeof(struct vfs)); + dummyvfs.filesetvfs.vfs_data = NULL; + dummyvfs.dummyvfs.dev = logdev; + dummyvfs.dummyvfs.ipmnt = NULL; + ICACHE_LOCK(); + rc = iget((struct vfs *) &dummyvfs, 0, (inode_t **) & log, 0); + ICACHE_UNLOCK(); + if (rc) + return rc; + + log->flag = 0; + log->dev = logdev; + log->base = 0; + log->size = 0; + + /* + * serialize open/close between multiple file systems + * bound with the log; + */ + ip = (inode_t *) log; + IWRITE_LOCK(ip); + + /* + * subsequent open: add file system to log active file system list + */ +#ifdef _JFS_OS2 + if (log->strat2p) +#endif /* _JFS_OS2 */ + { + if (rc = lmLogFileSystem(log, fsdev, 1)) + goto errout10; + + IWRITE_UNLOCK(ip); + + *iplog = ip; + jFYI(1, ("lmLogOpen: exit(0)\n")); + return 0; + } + + /* decouple log inode from dummy vfs */ + vPut(ip); + + /* + * first open: + */ +#ifdef _JFS_OS2 + /* + * establish access to the single/shared (already open) log device + */ + logdevfp = (void *) logStrat2; + log->strat2p = logStrat2; + log->strat3p = logStrat3; + + log->l2pbsize = 9; /* todo: when OS/2 have multiple external log */ +#endif /* _JFS_OS2 */ + + /* + * initialize log: + */ + if (rc = lmLogInit(log)) + goto errout20; + + /* + * add file system to log active file system list + */ + if (rc = lmLogFileSystem(log, fsdev, 1)) + goto errout30; + + /* + * insert log device into log device list + */ + out: +#endif /* _STILL_TO_PORT */ + jFYI(1, ("lmLogOpen: exit(0)\n")); + return 0; + + /* + * unwind on error + */ +#ifdef _STILL_TO_PORT + errout30: /* unwind lbmLogInit() */ + lbmLogShutdown(log); + + errout20: /* close external log device */ + +#endif /* _STILL_TO_PORT */ + errout10: /* free log inode */ + kfree(log); + + jFYI(1, ("lmLogOpen: exit(%d)\n", rc)); + return rc; +} + + +/* + * NAME: lmLogInit() + * + * FUNCTION: log initialization at first log open. + * + * logredo() (or logformat()) should have been run previously. + * initialize the log inode from log superblock. + * set the log state in the superblock to LOGMOUNT and + * write SYNCPT log record. + * + * PARAMETER: log - log structure + * + * RETURN: 0 - if ok + * EINVAL - bad log magic number or superblock dirty + * error returned from logwait() + * + * serialization: single first open thread + */ +static int lmLogInit(log_t * log) +{ + int rc = 0; + lrd_t lrd; + logsuper_t *logsuper; + lbuf_t *bpsuper; + lbuf_t *bp; + logpage_t *lp; + int lsn; + + jFYI(1, ("lmLogInit: log:0x%p\n", log)); + + /* + * log inode is overlaid on generic inode where + * dinode have been zeroed out by iRead(); + */ + + /* + * initialize log i/o + */ + if ((rc = lbmLogInit(log))) + return rc; + + /* + * validate log superblock + */ + if ((rc = lbmRead(log, 1, &bpsuper))) + goto errout10; + + logsuper = (logsuper_t *) bpsuper->l_ldata; + + if (logsuper->magic != cpu_to_le32(LOGMAGIC)) { + jERROR(1, ("*** Log Format Error ! ***\n")); + rc = EINVAL; + goto errout20; + } + + /* logredo() should have been run successfully. */ + if (logsuper->state != cpu_to_le32(LOGREDONE)) { + jERROR(1, ("*** Log Is Dirty ! ***\n")); + rc = EINVAL; + goto errout20; + } + + /* initialize log inode from log superblock */ + if (log->flag & JFS_INLINELOG) { + if (log->size != le32_to_cpu(logsuper->size)) { + rc = EINVAL; + goto errout20; + } + jFYI(0, + ("lmLogInit: inline log:0x%p base:0x%Lx size:0x%x\n", + log, (unsigned long long) log->base, log->size)); + } else { + log->size = le32_to_cpu(logsuper->size); + jFYI(0, + ("lmLogInit: external log:0x%p base:0x%Lx size:0x%x\n", + log, (unsigned long long) log->base, log->size)); + } + + log->flag |= JFS_GROUPCOMMIT; +/* + log->flag |= JFS_LAZYCOMMIT; +*/ + log->page = le32_to_cpu(logsuper->end) / LOGPSIZE; + log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page); + + /* + * initialize for log append write mode + */ + /* establish current/end-of-log page/buffer */ + if ((rc = lbmRead(log, log->page, &bp))) + goto errout20; + + lp = (logpage_t *) bp->l_ldata; + + jFYI(1, ("lmLogInit: lsn:0x%x page:%d eor:%d:%d\n", + le32_to_cpu(logsuper->end), log->page, log->eor, + le16_to_cpu(lp->h.eor))); + +// ASSERT(log->eor == lp->h.eor); + + log->bp = bp; + bp->l_pn = log->page; + bp->l_eor = log->eor; + + /* initialize the group commit serialization lock */ + LOGGC_LOCK_INIT(log); + + /* if current page is full, move on to next page */ + if (log->eor >= LOGPSIZE - LOGPTLRSIZE) + lmNextPage(log); + + /* allocate/initialize the log write serialization lock */ + LOG_LOCK_INIT(log); + + /* + * initialize log syncpoint + */ + /* + * write the first SYNCPT record with syncpoint = 0 + * (i.e., log redo up to HERE !); + * remove current page from lbm write queue at end of pageout + * (to write log superblock update), but do not release to freelist; + */ + lrd.logtid = 0; + lrd.backchain = 0; + lrd.type = cpu_to_le16(LOG_SYNCPT); + lrd.length = 0; + lrd.log.syncpt.sync = 0; + lsn = lmWriteRecord(log, NULL, &lrd, NULL); + bp = log->bp; + bp->l_ceor = bp->l_eor; + lp = (logpage_t *) bp->l_ldata; + lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor); + lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0); + if ((rc = lbmIOWait(bp, 0))) + goto errout30; + + /* initialize logsync parameters */ + log->logsize = (log->size - 2) << L2LOGPSIZE; + log->lsn = lsn; + log->syncpt = lsn; + log->sync = log->syncpt; + log->nextsync = LOGSYNC_DELTA(log->logsize); + init_waitqueue_head(&log->syncwait); + + jFYI(1, ("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x\n", + log->lsn, log->syncpt, log->sync)); + + LOGSYNC_LOCK_INIT(log); + + INIT_LIST_HEAD(&log->synclist); + + log->cqueue.head = log->cqueue.tail = 0; + + log->count = 0; + log->yah = NULL; + + /* + * initialize for lazy/group commit + */ + log->clsn = lsn; + + /* + * update/write superblock + */ + logsuper->state = cpu_to_le32(LOGMOUNT); + log->serial = le32_to_cpu(logsuper->serial) + 1; + logsuper->serial = cpu_to_le32(log->serial); + lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC); + if ((rc = lbmIOWait(bpsuper, lbmFREE))) + goto errout30; + + jFYI(1, ("lmLogInit: exit(%d)\n", rc)); + return 0; + + /* + * unwind on error + */ + errout30: /* release log page */ + lbmFree(bp); + + errout20: /* release log superblock */ + lbmFree(bpsuper); + + errout10: /* unwind lbmLogInit() */ + lbmLogShutdown(log); + + jFYI(1, ("lmLogInit: exit(%d)\n", rc)); + return rc; +} + + +/* + * NAME: lmLogClose() + * + * FUNCTION: remove file system from active list of log + * and close it on last close. + * + * PARAMETER: sb - superblock + * log - log inode + * + * RETURN: errors from subroutines + * + * serialization: + */ +int lmLogClose(struct super_block *sb, log_t * log) +{ + int rc; + + jFYI(1, ("lmLogClose: log:0x%p\n", log)); + + /* + * in-line log in host file system + */ +// inlineLog: +#ifdef _STILL_TO_PORT + if (log->flag & JFS_INLINELOG) { + rc = lmLogShutdown(log); + + goto out1; + } + + /* + * external log as separate logical volume + */ + externalLog: + + /* serialize open/close between multiple file systems + * associated with the log + */ + IWRITE_LOCK(iplog); + + /* + * remove file system from log active file system list + */ + rc = lmLogFileSystem(log, fsdev, 0); + + if (iplog->i_count > 1) + goto out2; + + /* + * last close: shut down log + */ + rc = ((rc1 = lmLogShutdown(log)) && rc == 0) ? rc1 : rc; + + out1: +#else /* _STILL_TO_PORT */ + rc = lmLogShutdown(log); +#endif /* _STILL_TO_PORT */ + +// out2: + + jFYI(0, ("lmLogClose: exit(%d)\n", rc)); + return rc; +} + + +/* + * NAME: lmLogShutdown() + * + * FUNCTION: log shutdown at last LogClose(). + * + * write log syncpt record. + * update super block to set redone flag to 0. + * + * PARAMETER: log - log inode + * + * RETURN: 0 - success + * + * serialization: single last close thread + */ +static int lmLogShutdown(log_t * log) +{ + int rc; + lrd_t lrd; + int lsn; + logsuper_t *logsuper; + lbuf_t *bpsuper; + lbuf_t *bp; + logpage_t *lp; + + jFYI(1, ("lmLogShutdown: log:0x%p\n", log)); + + if (log->cqueue.head || !list_empty(&log->synclist)) { + /* + * If there was very recent activity, we may need to wait + * for the lazycommit thread to catch up + */ + int i; + + for (i = 0; i < 100; i++) { /* Too much? */ + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(HZ / 10); + if ((log->cqueue.head == NULL) && + list_empty(&log->synclist)) + break; + } + } + assert(log->cqueue.head == NULL); + assert(list_empty(&log->synclist)); + + /* + * We need to make sure all of the "written" metapages + * actually make it to disk + */ +#if ( (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,8)) || \ + ( (LINUX_VERSION_CODE == KERNEL_VERSION(2,4,8)) && defined(MODULE) ) ) + /* + * fsync_no_super not added until 2.4.8, not exported until 2.4.9 + */ + { + struct jfs_sb_info *sbi = JFS_SBI(log->sb); + + fsync_inode_data_buffers(sbi->ipbmap); + fsync_inode_data_buffers(sbi->ipimap); + fsync_inode_data_buffers(sbi->direct_inode); + } +#else + fsync_no_super(log->sb->s_dev); +#endif + + /* + * write the last SYNCPT record with syncpoint = 0 + * (i.e., log redo up to HERE !) + */ + lrd.logtid = 0; + lrd.backchain = 0; + lrd.type = cpu_to_le16(LOG_SYNCPT); + lrd.length = 0; + lrd.log.syncpt.sync = 0; + lsn = lmWriteRecord(log, NULL, &lrd, NULL); + bp = log->bp; + lp = (logpage_t *) bp->l_ldata; + lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor); + lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0); + lbmIOWait(log->bp, lbmFREE); + + /* + * synchronous update log superblock + * mark log state as shutdown cleanly + * (i.e., Log does not need to be replayed). + */ + if ((rc = lbmRead(log, 1, &bpsuper))) + goto out; + + logsuper = (logsuper_t *) bpsuper->l_ldata; + logsuper->state = cpu_to_le32(LOGREDONE); + logsuper->end = cpu_to_le32(lsn); + lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC); + rc = lbmIOWait(bpsuper, lbmFREE); + + jFYI(1, ("lmLogShutdown: lsn:0x%x page:%d eor:%d\n", + lsn, log->page, log->eor)); + + out: + /* + * shutdown per log i/o + */ + lbmLogShutdown(log); + + if (rc) { + jFYI(1, ("lmLogShutdown: exit(%d)\n", rc)); + } + return rc; +} + + +#ifdef _STILL_TO_PORT +/* + * NAME: lmLogFileSystem() + * + * FUNCTION: insert ( = true)/remove ( = false) + * file system into/from log active file system list. + * + * PARAMETE: log - pointer to logs inode. + * fsdev - dev_t of filesystem. + * serial - pointer to returned log serial number + * activate - insert/remove device from active list. + * + * RETURN: 0 - success + * errors returned by vms_iowait(). + * + * serialization: IWRITE_LOCK(log inode) held on entry/exit + */ +static int lmLogFileSystem(log_t * log, dev_t fsdev, int activate) +{ + int rc = 0; + int bit, word; + logsuper_t *logsuper; + lbuf_t *bpsuper; + + /* + * insert/remove file system device to log active file system list. + */ + if ((rc = lbmRead(log, 1, &bpsuper))) + return rc; + + logsuper = (logsuper_t *) bpsuper->l_ldata; + bit = MINOR(fsdev); + word = bit / 32; + bit -= 32 * word; + if (activate) + logsuper->active[word] |= + cpu_to_le32((LEFTMOSTONE >> bit)); + else + logsuper->active[word] &= + cpu_to_le32((~(LEFTMOSTONE >> bit))); + + /* + * synchronous write log superblock: + * + * write sidestream bypassing write queue: + * at file system mount, log super block is updated for + * activation of the file system before any log record + * (MOUNT record) of the file system, and at file system + * unmount, all meta data for the file system has been + * flushed before log super block is updated for deactivation + * of the file system. + */ + lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC); + rc = lbmIOWait(bpsuper, lbmFREE); + + return rc; +} +#endif /* _STILL_TO_PORT */ + + +/* + * lmLogQuiesce() + */ +int lmLogQuiesce(log_t * log) +{ + int rc; + + rc = lmLogShutdown(log); + + return rc; +} + + +/* + * lmLogResume() + */ +int lmLogResume(log_t * log, struct super_block *sb) +{ + struct jfs_sb_info *sbi = JFS_SBI(sb); + int rc; + + log->base = addressPXD(&sbi->logpxd); + log->size = + (lengthPXD(&sbi->logpxd) << sb->s_blocksize_bits) >> L2LOGPSIZE; + rc = lmLogInit(log); + + return rc; +} + + +/* + * lmInit() + * + * initialization at JFS bringup + */ +int lmInit() +{ + /* + * allocate/initialize per system log resources + */ + /* allocate/initialize active log device list */ + lCache.nLog = 0; + lCache.writes_submitted = 0; + lCache.writes_completed = 0; + + /* + * initialize log buffer manager (lbm) + */ + /* initialize log buffer cache lock */ + LCACHE_LOCK_INIT(); + + log_redrive_list = NULL; + + return 0; +} + + +/* + * log buffer manager (lbm) + * ------------------------ + * + * special purpose buffer manager supporting log i/o requirements. + * + * per log write queue: + * log pageout occurs in serial order by fifo write queue and + * restricting to a single i/o in pregress at any one time. + * a circular singly-linked list + * (log->wrqueue points to the tail, and buffers are linked via + * bp->wrqueue field), and + * maintains log page in pageout ot waiting for pageout in serial pageout. + */ + +/* + * lbmLogInit() + * + * initialize per log I/O setup at lmLogInit() + */ +static int lbmLogInit(log_t * log) +{ /* log inode */ + int i; + lbuf_t *lbuf; + + jFYI(1, ("lbmLogInit: log:0x%p\n", log)); + + /* initialize current buffer cursor */ + log->bp = NULL; + + /* initialize log device write queue */ + log->wqueue = NULL; + + /* + * Each log has its own buffer pages allocated to it. These are + * not managed by the page cache. This ensures that a transaction + * writing to the log does not block trying to allocate a page from + * the page cache (for the log). This would be bad, since page + * allocation waits on the kswapd thread that may be committing inodes + * which would cause log activity. Was that clear? I'm trying to + * avoid deadlock here. + */ + init_waitqueue_head(&log->free_wait); + + log->lbuf_free = NULL; + + for (i = 0; i < LOGPAGES; i++) { + lbuf = kmalloc(sizeof(lbuf_t), GFP_KERNEL); + if (lbuf == 0) + goto error; + lbuf->l_bh.b_data = lbuf->l_ldata = + (char *) __get_free_page(GFP_KERNEL); + if (lbuf->l_ldata == 0) { + kfree(lbuf); + goto error; + } + lbuf->l_log = log; + init_waitqueue_head(&lbuf->l_ioevent); + + lbuf->l_bh.b_size = LOGPSIZE; + lbuf->l_bh.b_dev = log->dev; + lbuf->l_bh.b_end_io = lbmIODone; + lbuf->l_bh.b_private = lbuf; + lbuf->l_bh.b_page = virt_to_page(lbuf->l_ldata); + lbuf->l_bh.b_state = 0; + init_waitqueue_head(&lbuf->l_bh.b_wait); + + lbuf->l_freelist = log->lbuf_free; + log->lbuf_free = lbuf; + } + + return (0); + + error: + lbmLogShutdown(log); + return (ENOMEM); +} + + +/* + * lbmLogShutdown() + * + * finalize per log I/O setup at lmLogShutdown() + */ +static void lbmLogShutdown(log_t * log) +{ + lbuf_t *lbuf; + + jFYI(1, ("lbmLogShutdown: log:0x%p\n", log)); + + lbuf = log->lbuf_free; + while (lbuf) { + lbuf_t *next = lbuf->l_freelist; + free_page((unsigned long) lbuf->l_ldata); + kfree(lbuf); + lbuf = next; + } + + log->bp = NULL; +} + + +/* + * lbmAllocate() + * + * allocate an empty log buffer + */ +static lbuf_t *lbmAllocate(log_t * log, int pn) +{ + lbuf_t *bp; + unsigned long flags; + + /* + * recycle from log buffer freelist if any + */ + LCACHE_LOCK(flags); + LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags); + log->lbuf_free = bp->l_freelist; + LCACHE_UNLOCK(flags); + + bp->l_flag = 0; + + bp->l_wqnext = NULL; + bp->l_freelist = NULL; + + bp->l_pn = pn; + bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize)); + bp->l_bh.b_blocknr = bp->l_blkno; + bp->l_ceor = 0; + + return bp; +} + + +/* + * lbmFree() + * + * release a log buffer to freelist + */ +static void lbmFree(lbuf_t * bp) +{ + unsigned long flags; + + LCACHE_LOCK(flags); + + lbmfree(bp); + + LCACHE_UNLOCK(flags); +} + +static void lbmfree(lbuf_t * bp) +{ + log_t *log = bp->l_log; + + assert(bp->l_wqnext == NULL); + + /* + * return the buffer to head of freelist + */ + bp->l_freelist = log->lbuf_free; + log->lbuf_free = bp; + + wake_up(&log->free_wait); + return; +} + + +#ifdef _THIS_IS_NOT_USED +/* + * lbmRelease() + * + * remove the log buffer from log device write queue; + */ +static void lbmRelease(log_t * log, uint flag) +{ + lbuf_t *bp, *tail; + unsigned long flags; + + bp = log->bp; + + LCACHE_LOCK(flags); + + tail = log->wqueue; + + /* single element queue */ + if (bp == tail) { + log->wqueue = NULL; + bp->l_wqnext = NULL; + } + /* multi element queue */ + else { + tail->l_wqnext = bp->l_wqnext; + bp->l_wqnext = NULL; + } + + if (flag & lbmFREE) + lbmfree(bp); + + LCACHE_UNLOCK(flags); +} +#endif /* _THIS_IS_NOT_USED */ + + +/* + * lbmRead() + */ +static int lbmRead(log_t * log, int pn, lbuf_t ** bpp) +{ + lbuf_t *bp; + + /* + * allocate a log buffer + */ + *bpp = bp = lbmAllocate(log, pn); + jFYI(1, ("lbmRead: bp:0x%p pn:0x%x\n", bp, pn)); + + bp->l_flag |= lbmREAD; + bp->l_bh.b_reqnext = NULL; + clear_bit(BH_Uptodate, &bp->l_bh.b_state); + lock_buffer(&bp->l_bh); + set_bit(BH_Mapped, &bp->l_bh.b_state); + set_bit(BH_Req, &bp->l_bh.b_state); + bp->l_bh.b_rdev = bp->l_bh.b_dev; + bp->l_bh.b_rsector = bp->l_blkno << (log->l2bsize - 9); + generic_make_request(READ, &bp->l_bh); + run_task_queue(&tq_disk); + + wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD)); + + return 0; +} + + +/* + * lbmWrite() + * + * buffer at head of pageout queue stays after completion of + * partial-page pageout and redriven by explicit initiation of + * pageout by caller until full-page pageout is completed and + * released. + * + * device driver i/o done redrives pageout of new buffer at + * head of pageout queue when current buffer at head of pageout + * queue is released at the completion of its full-page pageout. + * + * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit(). + * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone() + */ +static void lbmWrite(log_t * log, lbuf_t * bp, int flag, int cant_block) +{ + lbuf_t *tail; + unsigned long flags; + + jFYI(1, ("lbmWrite: bp:0x%p flag:0x%x pn:0x%x\n", + bp, flag, bp->l_pn)); + + /* map the logical block address to physical block address */ + bp->l_blkno = + log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize)); + + LCACHE_LOCK(flags); /* disable+lock */ + + /* + * initialize buffer for device driver + */ + bp->l_flag = flag; + + /* + * insert bp at tail of write queue associated with log + * + * (request is either for bp already/currently at head of queue + * or new bp to be inserted at tail) + */ + tail = log->wqueue; + + /* is buffer not already on write queue ? */ + if (bp->l_wqnext == NULL) { + /* insert at tail of wqueue */ + if (tail == NULL) { + log->wqueue = bp; + bp->l_wqnext = bp; + } else { + log->wqueue = bp; + bp->l_wqnext = tail->l_wqnext; + tail->l_wqnext = bp; + } + + tail = bp; + } + + /* is buffer at head of wqueue and for write ? */ + if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) { + LCACHE_UNLOCK(flags); /* unlock+enable */ + return; + } + + LCACHE_UNLOCK(flags); /* unlock+enable */ + + if (cant_block) { + spin_lock_irqsave(&async_lock, flags); + bp->l_redrive_next = log_redrive_list; + log_redrive_list = bp; + spin_unlock_irqrestore(&async_lock, flags); + wake_up_process(jfsIOtask); + } else { + if (!(flag & lbmSYNC)) { + /* + * Can't hold spinlock during I/O + */ + LOGGC_UNLOCK(log); + } + + /* + * initiate pageout of the page at head of write queue + */ + lbmStartIO(bp); + + if (!(flag & lbmSYNC)) { + LOGGC_LOCK(log); + } + } +} + + +/* + * lbmDirectWrite() + * + * initiate pageout bypassing write queue for sidestream + * (e.g., log superblock) write; + */ +static void lbmDirectWrite(log_t * log, lbuf_t * bp, int flag) +{ + jEVENT(0, ("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x\n", + bp, flag, bp->l_pn)); + + /* + * initialize buffer for device driver + */ + bp->l_flag = flag | lbmDIRECT; + + /* map the logical block address to physical block address */ + bp->l_blkno = + log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize)); + + /* + * initiate pageout of the page + */ + lbmStartIO(bp); +} + + +/* + * NAME: lbmStartIO() + * + * FUNCTION: Interface to DD strategy routine + * + * RETURN: none + * + * serialization: LCACHE_LOCK() is NOT held during log i/o; + */ +void lbmStartIO(lbuf_t * bp) +{ + jFYI(1, ("lbmStartIO\n")); + + bp->l_bh.b_reqnext = NULL; + set_bit(BH_Dirty, &bp->l_bh.b_state); +// lock_buffer(&bp->l_bh); + assert(!test_bit(BH_Lock, &bp->l_bh.b_state)); + set_bit(BH_Lock, &bp->l_bh.b_state); + + set_bit(BH_Mapped, &bp->l_bh.b_state); + set_bit(BH_Req, &bp->l_bh.b_state); + bp->l_bh.b_rdev = bp->l_bh.b_dev; + bp->l_bh.b_rsector = bp->l_blkno << (bp->l_log->l2bsize - 9); + generic_make_request(WRITE, &bp->l_bh); + + lCache.writes_submitted++; + run_task_queue(&tq_disk); + + jFYI(1, ("lbmStartIO done\n")); +} + + +/* + * lbmIOWait() + */ +static int lbmIOWait(lbuf_t * bp, int flag) +{ + unsigned long flags; + int rc = 0; + + jFYI(1, + ("lbmIOWait1: bp:0x%p flag:0x%x:0x%x\n", bp, bp->l_flag, + flag)); + + LCACHE_LOCK(flags); /* disable+lock */ + + LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags); + + rc = (bp->l_flag & lbmERROR) ? EIO : 0; + + if (flag & lbmFREE) + lbmfree(bp); + + LCACHE_UNLOCK(flags); /* unlock+enable */ + + jFYI(1, + ("lbmIOWait2: bp:0x%p flag:0x%x:0x%x\n", bp, bp->l_flag, + flag)); + return rc; +} + +/* + * lbmIODone() + * + * executed at INTIODONE level + */ +static void lbmIODone(struct buffer_head *bh, int uptodate) +{ + lbuf_t *bp = bh->b_private; + lbuf_t *nextbp, *tail; + log_t *log; + unsigned long flags; + + /* + * get back jfs buffer bound to the i/o buffer + */ + jEVENT(0, ("lbmIODone: bp:0x%p flag:0x%x\n", bp, bp->l_flag)); + + LCACHE_LOCK(flags); /* disable+lock */ + + unlock_buffer(&bp->l_bh); + bp->l_flag |= lbmDONE; + + if (!uptodate) { + bp->l_flag |= lbmERROR; + + jERROR(1, ("lbmIODone: I/O error in JFS log\n")); + } + + /* + * pagein completion + */ + if (bp->l_flag & lbmREAD) { + bp->l_flag &= ~lbmREAD; + + LCACHE_UNLOCK(flags); /* unlock+enable */ + + /* wakeup I/O initiator */ + LCACHE_WAKEUP(&bp->l_ioevent); + + return; + } + + lCache.writes_completed++; + /* + * pageout completion + * + * the bp at the head of write queue has completed pageout. + * + * if single-commit/full-page pageout, remove the current buffer + * from head of pageout queue, and redrive pageout with + * the new buffer at head of pageout queue; + * otherwise, the partial-page pageout buffer stays at + * the head of pageout queue to be redriven for pageout + * by lmGroupCommit() until full-page pageout is completed. + */ + bp->l_flag &= ~lbmWRITE; +// INCREMENT(statLCache.pagedone); + + /* update committed lsn */ + log = bp->l_log; + log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor; + + if (bp->l_flag & lbmDIRECT) { + LCACHE_WAKEUP(&bp->l_ioevent); + LCACHE_UNLOCK(flags); + return; + } + + tail = log->wqueue; + + /* single element queue */ + if (bp == tail) { + /* remove head buffer of full-page pageout + * from log device write queue + */ + if (bp->l_flag & lbmRELEASE) { + log->wqueue = NULL; + bp->l_wqnext = NULL; + } + } + /* multi element queue */ + else { + /* remove head buffer of full-page pageout + * from log device write queue + */ + if (bp->l_flag & lbmRELEASE) { + nextbp = tail->l_wqnext = bp->l_wqnext; + bp->l_wqnext = NULL; + + /* + * redrive pageout of next page at head of write queue: + * redrive next page without any bound tblk + * (i.e., page w/o any COMMIT records), or + * first page of new group commit which has been + * queued after current page (subsequent pageout + * is performed synchronously, except page without + * any COMMITs) by lmGroupCommit() as indicated + * by lbmWRITE flag; + */ + if (nextbp->l_flag & lbmWRITE) { + /* + * We can't do the I/O at interrupt time. + * The jfsIO thread can do it + */ + jEVENT(0, + ("lbmRedrive: bp:0x%p flag:0x%x\n", + bp, bp->l_flag)); + /* + * Don't need irqsave, we're already holding + * LCACHE_LOCK + */ + spin_lock(&async_lock); + nextbp->l_redrive_next = log_redrive_list; + log_redrive_list = nextbp; + spin_unlock(&async_lock); + wake_up_process(jfsIOtask); + } + } + } + + /* + * synchronous pageout: + * + * buffer has not necessarily been removed from write queue + * (e.g., synchronous write of partial-page with COMMIT): + * leave buffer for i/o initiator to dispose + */ + if (bp->l_flag & lbmSYNC) { + LCACHE_UNLOCK(flags); /* unlock+enable */ + + /* wakeup I/O initiator */ + LCACHE_WAKEUP(&bp->l_ioevent); + } + + /* + * Group Commit pageout: + */ + else if (bp->l_flag & lbmGC) { + LCACHE_UNLOCK(flags); + lmPostGC(bp); + } + + /* + * asynchronous pageout: + * + * buffer must have been removed from write queue: + * insert buffer at head of freelist where it can be recycled + */ + else { + assert(bp->l_flag & lbmRELEASE); + assert(bp->l_flag & lbmFREE); + lbmfree(bp); + + LCACHE_UNLOCK(flags); /* unlock+enable */ + } +} + +/* + * We cannot redrive the log I/O at interrupt time, so this is called by + * the jfsIO thread with async_lock held. + */ +void jfs_logredrive(void) +{ + lbuf_t *bp; + + while ((bp = log_redrive_list)) { + log_redrive_list = bp->l_redrive_next; + bp->l_redrive_next = NULL; + spin_unlock_irq(&async_lock); + lbmStartIO(bp); + spin_lock_irq(&async_lock); + } +} + + +#ifdef _STILL_TO_PORT +/* + * lbmDirectIODone() + * + * iodone() for lbmDirectWrite() to bypass write queue; + * executed at INTIODONE level; + */ +static void lbmDirectIODone(iobuf_t * iobp) +{ + lbuf_t *bp; + unsigned long flags; + + /* + * get back jfs buffer bound to the io buffer + */ + bp = (lbuf_t *) iobp->b_jfsbp; + jEVENT(0, + ("lbmDirectIODone: bp:0x%p flag:0x%x\n", bp, bp->l_flag)); + + LCACHE_LOCK(flags); /* disable+lock */ + + bp->l_flag |= lbmDONE; + + if (iobp->b_flags & B_ERROR) { + bp->l_flag |= lbmERROR; +#ifdef _JFS_OS2 + SysLogError(); +#endif + } + + /* + * pageout completion + */ + bp->l_flag &= ~lbmWRITE; + + /* + * synchronous pageout: + */ + if (bp->l_flag & lbmSYNC) { + LCACHE_UNLOCK(flags); /* unlock+enable */ + + /* wakeup I/O initiator */ + LCACHE_WAKEUP(&bp->l_ioevent); + } + /* + * asynchronous pageout: + */ + else { + assert(bp->l_flag & lbmRELEASE); + assert(bp->l_flag & lbmFREE); + lbmfree(bp); + + LCACHE_UNLOCK(flags); /* unlock+enable */ + } +} +#endif /* _STILL_TO_PORT */ + +#ifdef _STILL_TO_PORT +/* + * NAME: lmLogFormat()/jfs_logform() + * + * FUNCTION: format file system log (ref. jfs_logform()). + * + * PARAMETERS: + * log - log inode (with common mount inode base); + * logAddress - start address of log space in FS block; + * logSize - length of log space in FS block; + * + * RETURN: 0 - success + * -1 - i/o error + */ +int lmLogFormat(inode_t * ipmnt, s64 logAddress, int logSize) +{ + int rc = 0; + cbuf_t *bp; + logsuper_t *logsuper; + logpage_t *lp; + int lspn; /* log sequence page number */ + struct lrd *lrd_ptr; + int npbperpage, npages; + + jFYI(0, ("lmLogFormat: logAddress:%Ld logSize:%d\n", + logAddress, logSize)); + + /* allocate a JFS buffer */ + bp = rawAllocate(); + + /* map the logical block address to physical block address */ + bp->cm_blkno = logAddress << ipmnt->i_l2bfactor; + + npbperpage = LOGPSIZE >> ipmnt->i_l2pbsize; + npages = logSize / (LOGPSIZE >> ipmnt->i_l2bsize); + + /* + * log space: + * + * page 0 - reserved; + * page 1 - log superblock; + * page 2 - log data page: A SYNC log record is written + * into this page at logform time; + * pages 3-N - log data page: set to empty log data pages; + */ + /* + * init log superblock: log page 1 + */ + logsuper = (logsuper_t *) bp->cm_cdata; + + logsuper->magic = cpu_to_le32(LOGMAGIC); + logsuper->version = cpu_to_le32(LOGVERSION); + logsuper->state = cpu_to_le32(LOGREDONE); + logsuper->flag = cpu_to_le32(ipmnt->i_mntflag); /* ? */ + logsuper->size = cpu_to_le32(npages); + logsuper->bsize = cpu_to_le32(ipmnt->i_bsize); + logsuper->l2bsize = cpu_to_le32(ipmnt->i_l2bsize); + logsuper->end = + cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE); + + bp->cm_blkno += npbperpage; + rawWrite(ipmnt, bp, 0); + + /* + * init pages 2 to npages-1 as log data pages: + * + * log page sequence number (lpsn) initialization: + * + * pn: 0 1 2 3 n-1 + * +-----+-----+=====+=====+===.....===+=====+ + * lspn: N-1 0 1 N-2 + * <--- N page circular file ----> + * + * the N (= npages-2) data pages of the log is maintained as + * a circular file for the log records; + * lpsn grows by 1 monotonically as each log page is written + * to the circular file of the log; + * Since the AIX DUMMY log record is dropped for this XJFS, + * and setLogpage() will not reset the page number even if + * the eor is equal to LOGPHDRSIZE. In order for binary search + * still work in find log end process, we have to simulate the + * log wrap situation at the log format time. + * The 1st log page written will have the highest lpsn. Then + * the succeeding log pages will have ascending order of + * the lspn starting from 0, ... (N-2) + */ + lp = (logpage_t *) bp->cm_cdata; + + /* + * initialize 1st log page to be written: lpsn = N - 1, + * write a SYNCPT log record is written to this page + */ + lp->h.page = lp->t.page = cpu_to_le32(npages - 3); + lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE); + + lrd_ptr = (struct lrd *) &lp->data; + lrd_ptr->logtid = 0; + lrd_ptr->backchain = 0; + lrd_ptr->type = cpu_to_le16(LOG_SYNCPT); + lrd_ptr->length = 0; + lrd_ptr->log.syncpt.sync = 0; + + bp->cm_blkno += npbperpage; + rawWrite(ipmnt, bp, 0); + + /* + * initialize succeeding log pages: lpsn = 0, 1, ..., (N-2) + */ + for (lspn = 0; lspn < npages - 3; lspn++) { + lp->h.page = lp->t.page = cpu_to_le32(lspn); + lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE); + + bp->cm_blkno += npbperpage; + rawWrite(ipmnt, bp, 0); + } + + /* + * finalize log + */ + /* release the buffer */ + rawRelease(bp); + + return rc; +} +#endif /* _STILL_TO_PORT */ + + +#ifdef _JFS_STATISTICS +/* + * lmStatistics() + */ +lmStatistics(caddr_t arg, int flag) +{ + int rc; + + /* copy out the argument */ + if (rc = copyout((caddr_t) & statLCache, (caddr_t) arg, + sizeof(struct statLCache))) + return rc; + + /* reset the counters */ + if (flag) { + statLCache.commit = 0; + statLCache.pageinit = 0; + statLCache.pagedone = 0; + statLCache.sync = 0; + statLCache.maxbufcnt = 0; + } + + return 0; +} +#endif /* _JFS_STATISTICS */ + +#if CONFIG_PROC_FS +int jfs_logmgr_read(char *buffer, char **start, off_t offset, int length, + int *eof, void *data) +{ + int len = 0; + off_t begin; + unsigned long flags; + + LCACHE_LOCK(flags); + len += sprintf(buffer, + "JFS Logmgr\n" + "============\n" + "redrive_list = 0x%p\n" + "writes_submitted = %d\n" + "writes_completed = %d\n", + log_redrive_list, + lCache.writes_submitted, lCache.writes_completed); + LCACHE_UNLOCK(flags); + + begin = offset; + *start = buffer + begin; + len -= begin; + + if (len > length) + len = length; + else + *eof = 1; + + if (len < 0) + len = 0; + + return len; +} +#endif diff -uNr --exclude=CVS ../kernel.org/linux/fs/jfs/jfs_metapage.c linuxppc64_2_4/fs/jfs/jfs_metapage.c --- ../kernel.org/linux/fs/jfs/jfs_metapage.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/fs/jfs/jfs_metapage.c Wed Nov 14 10:19:36 2001 @@ -0,0 +1,642 @@ +/* + * + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Module: jfs/jfs_metapage.c + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +extern void jfs_logredrive(void); +extern struct task_struct *jfsIOtask; +extern struct task_struct *jfsCommitTask; +extern struct semaphore jfsIOsem; +extern lbuf_t *log_redrive_list; + +static struct list_head async_list; +spinlock_t async_lock; + +static unsigned int metapages = 1024; /* ??? Need a better number */ +static unsigned int free_metapages; +static metapage_t *metapage_buf; +static unsigned long meta_order; +static metapage_t *meta_free_list = NULL; +static spinlock_t meta_lock = SPIN_LOCK_UNLOCKED; +static wait_queue_head_t meta_wait; + +#define HASH_BITS 10 /* This makes hash_table 1 4K page */ +#define HASH_SIZE (1 << HASH_BITS) +static metapage_t **hash_table = NULL; +static unsigned long hash_order; + +#define LOCK_METAPAGE(mp) set_bit(META_locked, &(mp)->flag) +#define UNLOCK_METAPAGE(mp) clear_bit(META_locked, &(mp)->flag) +#define METAPAGE_LOCKED(mp) test_bit(META_locked, &(mp)->flag) + +/* We're currently re-evaluating the method we use to write metadata + * pages. Currently, we have to make sure there no dirty buffer_heads + * hanging around after we free the metadata page, since the same + * physical disk blocks may be used in a different address space and we + * can't write old data over the good data. + * + * The best way to do this now is with block_invalidate_page. However, + * this is only available in the newer kernels and is not exported + * to modules. block_flushpage is the next best, but it too is not exported + * to modules. + * + * In a module, about the best we have is generic_buffer_fdatasync. This + * synchronously writes any dirty buffers. This is not optimal, but it will + * keep old dirty buffers from overwriting newer data. + */ +static inline void invalidate_page(metapage_t *mp) +{ +#ifdef MODULE + generic_buffer_fdatasync(mp->mapping->host, mp->index, mp->index + 1); +#else + while (TryLockPage(mp->page)) + wait_on_page(mp->page); + block_flushpage(mp->page, 0); + UnlockPage(mp->page); +#endif + page_cache_release(mp->page); +} +/* + * meta_lock must be held by caller + */ +static inline void wait_on_metapage(metapage_t * mp) +{ + if (!METAPAGE_LOCKED(mp)) + return; + + __SLEEP_COND(mp->wait, !METAPAGE_LOCKED(mp), spin_lock(&meta_lock), + spin_unlock(&meta_lock)); +} + +int __init metapage_init(void) +{ + int i; + metapage_t *last = NULL; + metapage_t *mp; + + /* + * Initialize wait queue + */ + init_waitqueue_head(&meta_wait); + + INIT_LIST_HEAD(&async_list); + async_lock = SPIN_LOCK_UNLOCKED; + + /* + * Allocate the metapage structures + */ + for (meta_order = 0; + ((PAGE_SIZE << meta_order) / sizeof(metapage_t)) < metapages; + meta_order++); + metapages = (PAGE_SIZE << meta_order) / sizeof(metapage_t); + + jFYI(1, ("metapage_init: metapage size = %Zd, metapages = %d\n", + sizeof(metapage_t), metapages)); + + metapage_buf = + (metapage_t *) __get_free_pages(GFP_KERNEL, meta_order); + assert(metapage_buf); + memset(metapage_buf, 0, PAGE_SIZE << meta_order); + + mp = metapage_buf; + for (i = 0; i < metapages; i++, mp++) { + mp->flag = 0; + set_bit(META_free, &mp->flag); + init_waitqueue_head(&mp->wait); + mp->hash_next = last; + last = mp; + } + meta_free_list = last; + free_metapages = metapages; + + /* + * Now the hash list + */ + for (hash_order = 0; + ((PAGE_SIZE << hash_order) / sizeof(void *)) < HASH_SIZE; + hash_order++); + hash_table = + (metapage_t **) __get_free_pages(GFP_KERNEL, hash_order); + assert(hash_table); + memset(hash_table, 0, PAGE_SIZE << hash_order); + + return 0; +} + +void __exit metapage_exit(void) +{ + free_pages((unsigned long) metapage_buf, meta_order); + free_pages((unsigned long) hash_table, hash_order); + metapage_buf = 0; /* This is a signal to the jfsIOwait thread */ +} + +/* + * Get metapage structure from freelist + * + * Caller holds meta_lock + */ +static metapage_t *alloc_metapage(int *dropped_lock) +{ + metapage_t *new; + + *dropped_lock = FALSE; + + /* + * Reserve two metapages for the lazy commit thread. Otherwise + * we may deadlock with holders of metapages waiting for tlocks + * that lazy thread should be freeing. + */ + if ((free_metapages < 3) && (current != jfsCommitTask)) { + *dropped_lock = TRUE; + __SLEEP_COND(meta_wait, (free_metapages > 2), + spin_lock(&meta_lock), spin_unlock(&meta_lock)); + } + + assert(meta_free_list); + + new = meta_free_list; + meta_free_list = new->hash_next; + free_metapages--; + + return new; +} + +/* + * Put metapage on freelist (holding meta_lock) + */ +static inline void __free_metapage(metapage_t * mp) +{ + mp->flag = 0; + set_bit(META_free, &mp->flag); + mp->hash_next = meta_free_list; + meta_free_list = mp; + free_metapages++; + wake_up(&meta_wait); +} + +/* + * Put metapage on freelist (not holding meta_lock) + */ +static inline void free_metapage(metapage_t * mp) +{ + spin_lock(&meta_lock); + __free_metapage(mp); + spin_unlock(&meta_lock); +} + +/* + * Basically same hash as in pagemap.h, but using our hash table + */ +static metapage_t **meta_hash(struct address_space *mapping, + unsigned long index) +{ +#define i (((unsigned long)mapping)/ \ + (sizeof(struct inode) & ~(sizeof(struct inode) -1 ))) +#define s(x) ((x) + ((x) >> HASH_BITS)) + return hash_table + (s(i + index) & (HASH_SIZE - 1)); +#undef i +#undef s +} + +static metapage_t *search_hash(metapage_t ** hash_ptr, + struct address_space *mapping, + unsigned long index) +{ + metapage_t *ptr; + + for (ptr = *hash_ptr; ptr; ptr = ptr->hash_next) { + if ((ptr->mapping == mapping) && (ptr->index == index)) + return ptr; + } + + return NULL; +} + +static void add_to_hash(metapage_t * mp, metapage_t ** hash_ptr) +{ + if (*hash_ptr) + (*hash_ptr)->hash_prev = mp; + + mp->hash_prev = NULL; + mp->hash_next = *hash_ptr; + *hash_ptr = mp; +} + +static void remove_from_hash(metapage_t * mp, metapage_t ** hash_ptr) +{ + if (mp->hash_prev) + mp->hash_prev->hash_next = mp->hash_next; + else { + assert(*hash_ptr == mp); + *hash_ptr = mp->hash_next; + } + + if (mp->hash_next) + mp->hash_next->hash_prev = mp->hash_prev; +} + +/* + * Direct address space operations + */ + +static int direct_get_block(struct inode *ip, long lblock, + struct buffer_head *bh_result, int create) +{ + bh_result->b_dev = ip->i_dev; + bh_result->b_blocknr = lblock; + if (create) + bh_result->b_state |= (1UL << BH_Mapped) | (1UL << BH_New); + else + bh_result->b_state |= (1UL << BH_Mapped); + + return 0; +} + +static int direct_writepage(struct page *page) +{ + return block_write_full_page(page, direct_get_block); +} + +static int direct_readpage(struct file *fp, struct page *page) +{ + return block_read_full_page(page, direct_get_block); +} + +static int direct_prepare_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + return block_prepare_write(page, from, to, direct_get_block); +} + +static int direct_bmap(struct address_space *mapping, long block) +{ + return generic_block_bmap(mapping, block, direct_get_block); +} + +struct address_space_operations direct_aops = { + readpage: direct_readpage, + writepage: direct_writepage, + sync_page: block_sync_page, + prepare_write: direct_prepare_write, + commit_write: generic_commit_write, + bmap: direct_bmap, +}; + +metapage_t *__get_metapage(struct inode *inode, + unsigned long lblock, unsigned int size, + int absolute, unsigned long new) +{ + int dropped_lock; + metapage_t **hash_ptr; + int l2BlocksPerPage; + int l2bsize; + struct address_space *mapping; + metapage_t *mp; + unsigned long page_index; + unsigned long page_offset; + + jFYI(1, ("__get_metapage: inode = 0x%p, lblock = 0x%lx\n", + inode, lblock)); + + if (absolute) + mapping = JFS_SBI(inode->i_sb)->direct_mapping; + else + mapping = inode->i_mapping; + + spin_lock(&meta_lock); + + hash_ptr = meta_hash(mapping, lblock); + + mp = search_hash(hash_ptr, mapping, lblock); + if (mp) { + page_found: + if (test_bit(META_discard, &mp->flag)) { + assert(new); /* It's okay to reuse a discarded + * if we expect it to be empty + */ + clear_bit(META_discard, &mp->flag); + } + mp->count++; + jFYI(1, ("__get_metapage: found 0x%p, in hash\n", mp)); + wait_on_metapage(mp); + assert(mp->logical_size == size); + LOCK_METAPAGE(mp); + spin_unlock(&meta_lock); + } else { + l2bsize = inode->i_sb->s_blocksize_bits; + l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize; + page_index = lblock >> l2BlocksPerPage; + page_offset = (lblock - (page_index << l2BlocksPerPage)) << + l2bsize; + if ((page_offset + size) > PAGE_SIZE) { + spin_unlock(&meta_lock); + jERROR(1, ("MetaData crosses page boundary!!\n")); + return NULL; + } + + mp = alloc_metapage(&dropped_lock); + if (dropped_lock) { + /* alloc_metapage blocked, we need to search the hash + * again. (The goto is ugly, maybe we'll clean this + * up in the future.) + */ + metapage_t *mp2; + mp2 = search_hash(hash_ptr, mapping, lblock); + if (mp2) { + __free_metapage(mp); + mp = mp2; + goto page_found; + } + } + mp->flag = 0; + LOCK_METAPAGE(mp); + if (absolute) + set_bit(META_absolute, &mp->flag); + mp->xflag = COMMIT_PAGE; + mp->count = 1; + atomic_set(&mp->nohomeok,0); + mp->mapping = mapping; + mp->index = lblock; + mp->page = 0; + mp->logical_size = size; + add_to_hash(mp, hash_ptr); + spin_unlock(&meta_lock); + + if (new) { + jFYI(1, + ("__get_metapage: Calling grab_cache_page\n")); + mp->page = grab_cache_page(mapping, page_index); + if (!mp->page) { + jERROR(1, ("grab_cache_page failed!\n")); + spin_lock(&meta_lock); + remove_from_hash(mp, hash_ptr); + __free_metapage(mp); + spin_unlock(&meta_lock); + return NULL; + } + UnlockPage(mp->page); + } else { + jFYI(1, + ("__get_metapage: Calling read_cache_page\n")); + mp->page = + read_cache_page(mapping, lblock, + (filler_t *) mapping->a_ops-> + readpage, NULL); + if (!mp->page) { + jERROR(1, ("read_cache_page failed!\n")); + spin_lock(&meta_lock); + remove_from_hash(mp, hash_ptr); + __free_metapage(mp); + spin_unlock(&meta_lock); + return NULL; + } + wait_on_page(mp->page); + } + mp->data = (void *) (kmap(mp->page) + page_offset); + } + jFYI(1, ("__get_metapage: returning = 0x%p\n", mp)); + return mp; +} + +void hold_metapage(metapage_t * mp, int force) +{ + spin_lock(&meta_lock); + + mp->count++; + + if (force) { + ASSERT (!(test_bit(META_forced, &mp->flag))); + if (METAPAGE_LOCKED(mp)) + set_bit(META_forced, &mp->flag); + else + LOCK_METAPAGE(mp); + } else { + wait_on_metapage(mp); + LOCK_METAPAGE(mp); + } + + spin_unlock(&meta_lock); +} + +static void __write_metapage(metapage_t * mp) +{ + struct inode *ip = (struct inode *) mp->mapping->host; + unsigned long page_index; + unsigned long page_offset; + int rc; + int l2bsize = ip->i_sb->s_blocksize_bits; + int l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize; + + jFYI(1, ("__write_metapage: mp = 0x%p\n", mp)); + + if (test_cflag(COMMIT_Stale, ip) || test_bit(META_discard, &mp->flag)) { + /* + * This metadata is no longer valid + */ + clear_bit(META_dirty, &mp->flag); + return; + } + while (TryLockPage(mp->page)) + wait_on_page(mp->page); + + page_index = mp->page->index; + page_offset = + (mp->index - (page_index << l2BlocksPerPage)) << l2bsize; + + rc = mp->mapping->a_ops->prepare_write(NULL, mp->page, page_offset, + page_offset + + mp->logical_size); + if (rc) { + jERROR(1, ("prepare_write return %d!\n", rc)); + ClearPageUptodate(mp->page); + kunmap(mp->page); + UnlockPage(mp->page); + clear_bit(META_dirty, &mp->flag); + return; + } + rc = mp->mapping->a_ops->commit_write(NULL, mp->page, page_offset, + page_offset + + mp->logical_size); + if (rc) { + jERROR(1, ("commit_write returned %d\n", rc)); + } + UnlockPage(mp->page); + + clear_bit(META_dirty, &mp->flag); + + jFYI(1, ("__write_metapage done\n")); +} + +void release_metapage(metapage_t * mp) +{ + log_t *log; + struct inode *ip; + + jFYI(1, + ("release_metapage: mp = 0x%p, flag = 0x%lx\n", mp, + mp->flag)); + + spin_lock(&meta_lock); + if (test_bit(META_forced, &mp->flag)) { + clear_bit(META_forced, &mp->flag); + mp->count--; + spin_unlock(&meta_lock); + return; + } + + ip = (struct inode *) mp->mapping->host; + + /* + * Don't write obsolete metadata + */ + if (test_cflag(COMMIT_Stale, ip)) + clear_bit(META_dirty, &mp->flag); + + assert(mp->count); + if (--mp->count || atomic_read(&mp->nohomeok)) { + UNLOCK_METAPAGE(mp); + wake_up(&mp->wait); + spin_unlock(&meta_lock); + } else { + kunmap(mp->page); + mp->data = 0; + remove_from_hash(mp, meta_hash(mp->mapping, mp->index)); + spin_unlock(&meta_lock); + if (test_bit(META_dirty, &mp->flag)) + __write_metapage(mp); + if (test_bit(META_sync, &mp->flag)) { + __sync_metapage(mp); + clear_bit(META_sync, &mp->flag); + } + + if (mp->lsn) { + /* + * Remove metapage from logsynclist. + */ + log = mp->log; + LOGSYNC_LOCK(log); + mp->log = 0; + mp->lsn = 0; + mp->clsn = 0; + log->count--; + list_del(&mp->synclist); + LOGSYNC_UNLOCK(log); + } + + if (test_bit(META_discard, &mp->flag)) + invalidate_page(mp); + else + page_cache_release(mp->page); + free_metapage(mp); + } + jFYI(1, ("release_metapage: done\n")); +} + +void invalidate_metapages(struct inode *ip, unsigned long addr, + unsigned long len) +{ + metapage_t **hash_ptr; + unsigned long lblock; + struct address_space *mapping = ip->i_mapping; + int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip->i_sb->s_blocksize_bits; + metapage_t *mp; + + /* + * First, mark metapages to discard. They will eventually be + * released, but should not be written. + */ + spin_lock(&meta_lock); + for (lblock = addr; lblock < addr + len; + lblock += 1 << l2BlocksPerPage) { + hash_ptr = meta_hash(mapping, lblock); + mp = search_hash(hash_ptr, mapping, lblock); + if (mp) + set_bit(META_discard, &mp->flag); + } + spin_unlock(&meta_lock); + + /* Now force any initiated I/O to the disk. Dirty buffer_heads + * would be a problem if these disk blocks were re-used in another + * address space. + */ + generic_buffer_fdatasync(ip, addr << l2BlocksPerPage, + (addr + len) << l2BlocksPerPage); +} + +int jfsIOWait(void *arg) +{ + siginfo_t info; + unsigned long signr; + + jFYI(1, ("jfsIOWait is here!\n")); + + lock_kernel(); + + daemonize(); + current->tty = NULL; + strcpy(current->comm, "jfsIO"); + + unlock_kernel(); + + jfsIOtask = current; + + spin_lock_irq(¤t->sigmask_lock); + siginitsetinv(¤t->blocked, + sigmask(SIGHUP) | sigmask(SIGKILL) | sigmask(SIGSTOP) + | sigmask(SIGCONT)); + spin_unlock_irq(¤t->sigmask_lock); + + up(&jfsIOsem); + + spin_lock_irq(&async_lock); + while (1) { + if (log_redrive_list) + jfs_logredrive(); + else { + set_current_state(TASK_INTERRUPTIBLE); + spin_unlock_irq(&async_lock); + schedule(); + spin_lock_irq(¤t->sigmask_lock); + signr = dequeue_signal(¤t->blocked, &info); + spin_unlock_irq(¤t->sigmask_lock); + switch (signr) { + case SIGKILL: + if (metapage_buf) + /* Not our SIGKILL signal */ + break; + + jFYI(1,("jfsIOWait being killed!\n")); + up(&jfsIOsem); + return 0; + } + spin_lock_irq(&async_lock); + + } + } +} diff -uNr --exclude=CVS ../kernel.org/linux/fs/jfs/jfs_mount.c linuxppc64_2_4/fs/jfs/jfs_mount.c --- ../kernel.org/linux/fs/jfs/jfs_mount.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/fs/jfs/jfs_mount.c Wed Nov 14 10:19:36 2001 @@ -0,0 +1,547 @@ +/* + * MODULE_NAME: jfs_mount.c + * + * COMPONENT_NAME: sysjfs + * + * + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* + * Change History : + * + */ + +/* + * Module: jfs_mount.c + * + * note: file system in transition to aggregate/fileset: + * + * file system mount is interpreted as the mount of aggregate, + * if not already mounted, and mount of the single/only fileset in + * the aggregate; + * + * a file system/aggregate is represented by an internal inode + * (aka mount inode) initialized with aggregate superblock; + * each vfs represents a fileset, and points to its "fileset inode + * allocation map inode" (aka fileset inode): + * (an aggregate itself is structured recursively as a filset: + * an internal vfs is constructed and points to its "fileset inode + * allocation map inode" (aka aggregate inode) where each inode + * represents a fileset inode) so that inode number is mapped to + * on-disk inode in uniform way at both aggregate and fileset level; + * + * each vnode/inode of a fileset is linked to its vfs (to facilitate + * per fileset inode operations, e.g., unmount of a fileset, etc.); + * each inode points to the mount inode (to facilitate access to + * per aggregate information, e.g., block size, etc.) as well as + * its file set inode. + * + * aggregate + * ipmnt + * mntvfs -> fileset ipimap+ -> aggregate ipbmap -> aggregate ipaimap; + * fileset vfs -> vp(1) <-> ... <-> vp(n) <->vproot; + */ + +#include +#include +#include +#include +#include +#include +#include +#include + + +/* + * forward references + */ +static int chkSuper(struct super_block *); +static int logMOUNT(struct super_block *sb); + +/* + * NAME: jfs_mount(vfsp, crp) + * + * FUNCTION: vfs_mount() + * + * PARAMETER: vfsp - virtual file system pointer + * crp - credential + * + * RETURN: EBUSY - device already mounted or open for write + * EBUSY - cvrdvp already mounted; + * EBUSY - mount table full + * ENOTDIR - cvrdvp not directory on a device mount + * ENXIO - device open failure + */ +int jfs_mount(struct super_block *sb, char *options, int silent) +{ + int rc = 0; /* Return code */ + struct jfs_sb_info *sbi = JFS_SBI(sb); + struct inode *ipaimap = NULL; + struct inode *ipaimap2 = NULL; + struct inode *ipimap = NULL; + struct inode *ipbmap = NULL; + + jFYI(1, ("\nMount JFS\n")); + + /* + * get the file system device being mounted + */ + + jFYI(1, ("ToDo: Parse mount options: \"%s\"\n", (char *) options)); + + /* + * read/validate superblock + * (initialize mount inode from the superblock) + */ + if ((rc = chkSuper(sb))) { + goto errout20; + } + + ipaimap = diReadSpecial(sb, AGGREGATE_I); + if (ipaimap == NULL) { + jERROR(1, ("jfs_mount: Faild to read AGGREGATE_I\n")); + rc = EIO; + goto errout20; + } + sbi->ipaimap = ipaimap; + + jFYI(1, ("jfs_mount: ipaimap:0x%p\n", ipaimap)); + + /* + * initialize aggregate inode allocation map + */ + if ((rc = diMount(ipaimap))) { + jERROR(1, + ("jfs_mount: diMount(ipaimap) failed w/rc = %d\n", + rc)); + goto errout21; + } + + /* + * open aggregate block allocation map + */ + ipbmap = diReadSpecial(sb, BMAP_I); + if (ipbmap == NULL) { + rc = EIO; + goto errout22; + } + + jFYI(1, ("jfs_mount: ipbmap:0x%p\n", ipbmap)); + + sbi->ipbmap = ipbmap; + + /* + * initialize aggregate block allocation map + */ + if ((rc = dbMount(ipbmap))) { + jERROR(1, ("jfs_mount: dbMount failed w/rc = %d\n", rc)); + goto errout22; + } + + /* + * open the secondary aggregate inode allocation map + * + * This is a duplicate of the aggregate inode allocation map. + * + * hand craft a vfs in the same fashion as we did to read ipaimap. + * By adding INOSPEREXT (32) to the inode number, we are telling + * diReadSpecial that we are reading from the secondary aggregate + * inode table. This also creates a unique entry in the inode hash + * table. + */ + if ((sbi->mntflag & JFS_BAD_SAIT) == 0) { + ipaimap2 = diReadSpecial(sb, AGGREGATE_I + INOSPEREXT); + if (ipaimap2 == 0) { + jERROR(1, + ("jfs_mount: Faild to read AGGREGATE_I\n")); + rc = EIO; + goto errout35; + } + sbi->ipaimap2 = ipaimap2; + + jFYI(1, ("jfs_mount: ipaimap2:0x%p\n", ipaimap2)); + + /* + * initialize secondary aggregate inode allocation map + */ + if ((rc = diMount(ipaimap2))) { + jERROR(1, + ("jfs_mount: diMount(ipaimap2) failed, rc = %d\n", + rc)); + goto errout35; + } + } else + /* Secondary aggregate inode table is not valid */ + sbi->ipaimap2 = 0; + + /* + * mount (the only/single) fileset + */ + /* + * open fileset inode allocation map (aka fileset inode) + */ + ipimap = diReadSpecial(sb, FILESYSTEM_I); + if (ipimap == NULL) { + jERROR(1, ("jfs_mount: Failed to read FILESYSTEM_I\n")); + /* open fileset secondary inode allocation map */ + rc = EIO; + goto errout40; + } + jFYI(1, ("jfs_mount: ipimap:0x%p\n", ipimap)); + + /* map further access of per fileset inodes by the fileset inode */ + sbi->ipimap = ipimap; + + /* initialize fileset inode allocation map */ + if ((rc = diMount(ipimap))) { + jERROR(1, ("jfs_mount: diMount failed w/rc = %d\n", rc)); + goto errout41; + } + + jFYI(1, ("Mount JFS Complete.\n")); + goto out; + + /* + * unwind on error + */ +//errout42: /* close fileset inode allocation map */ + diUnmount(ipimap, 1); + + errout41: /* close fileset inode allocation map inode */ + diFreeSpecial(ipimap); + + errout40: /* fileset closed */ + + /* close secondary aggregate inode allocation map */ + if (ipaimap2) { + diUnmount(ipaimap2, 1); + diFreeSpecial(ipaimap2); + } + + errout35: + + /* close aggregate block allocation map */ + dbUnmount(ipbmap, 1); + diFreeSpecial(ipbmap); + + errout22: /* close aggregate inode allocation map */ + + diUnmount(ipaimap, 1); + + errout21: /* close aggregate inodes */ + diFreeSpecial(ipaimap); + errout20: /* aggregate closed */ + + out: + + if (rc) { + jERROR(1, ("Mount JFS Failure: %d\n", rc)); + } + return rc; +} + +/* + * NAME: jfs_mount_rw(sb, remount) + * + * FUNCTION: Completes read-write mount, or remounts read-only volume + * as read-write + */ +int jfs_mount_rw(struct super_block *sb, int remount) +{ + struct jfs_sb_info *sbi = JFS_SBI(sb); + log_t *log; + int rc; + + /* + * If we are re-mounting a previously read-only volume, we want to + * re-read the inode and block maps, since fsck.jfs may have updated + * them. + */ + if (remount) { + if (chkSuper(sb) || (sbi->state != FM_CLEAN)) + return -EINVAL; + + truncate_inode_pages(sbi->ipimap->i_mapping, 0); + truncate_inode_pages(sbi->ipbmap->i_mapping, 0); + diUnmount(sbi->ipimap, 1); + if ((rc = diMount(sbi->ipimap))) { + jERROR(1,("jfs_mount_rw: diMount failed!\n")); + return rc; + } + + dbUnmount(sbi->ipbmap, 1); + if ((rc = dbMount(sbi->ipbmap))) { + jERROR(1,("jfs_mount_rw: dbMount failed!\n")); + return rc; + } + } +#ifdef _STILL_TO_PORT + /* + * get log device associated with the fs being mounted; + */ + if (ipmnt->i_mntflag & JFS_INLINELOG) { + vfsp->vfs_logVPB = vfsp->vfs_hVPB; + vfsp->vfs_logvpfs = vfsp->vfs_vpfsi; + } else if (vfsp->vfs_logvpfs == NULL) { + /* + * XXX: there's only one external log per system; + */ + jERROR(1, ("jfs_mount: Mount Failure! No Log Device.\n")); + goto errout30; + } + + logdev = vfsp->vfs_logvpfs->vpi_unit; + ipmnt->i_logdev = logdev; +#endif /* _STILL_TO_PORT */ + + /* + * open/initialize log + */ + if ((rc = lmLogOpen(sb, &log))) + return rc; + + JFS_SBI(sb)->log = log; + + /* + * update file system superblock; + */ + if ((rc = updateSuper(sb, FM_MOUNT))) { + jERROR(1, + ("jfs_mount: updateSuper failed w/rc = %d\n", rc)); + lmLogClose(sb, log); + JFS_SBI(sb)->log = 0; + return rc; + } + + /* + * write MOUNT log record of the file system + */ + logMOUNT(sb); + + return rc; +} + +/* + * chkSuper() + * + * validate the superblock of the file system to be mounted and + * get the file system parameters. + * + * returns + * 0 with fragsize set if check successful + * error code if not successful + */ +static int chkSuper(struct super_block *sb) +{ + int rc = 0; + metapage_t *mp; + struct jfs_sb_info *sbi = JFS_SBI(sb); + struct jfs_superblock *j_sb; + int AIM_bytesize, AIT_bytesize; + int expected_AIM_bytesize, expected_AIT_bytesize; + s64 AIM_byte_addr, AIT_byte_addr, fsckwsp_addr; + s64 byte_addr_diff0, byte_addr_diff1; + s32 bsize; + + if ((rc = readSuper(sb, &mp))) + return rc; + j_sb = (struct jfs_superblock *) (mp->data); + + /* + * validate superblock + */ + /* validate fs signature */ + if (strncmp(j_sb->s_magic, JFS_MAGIC, 4) || + j_sb->s_version != cpu_to_le32(JFS_VERSION)) { + //rc = EFORMAT; + rc = EINVAL; + goto out; + } + + bsize = le32_to_cpu(j_sb->s_bsize); +#ifdef _JFS_4K + if (bsize != PSIZE) { + jERROR(1, ("Currently only 4K block size supported!\n")); + rc = EINVAL; + goto out; + } +#endif /* _JFS_4K */ + + jFYI(1, ("superblock: flag:0x%08x state:0x%08x size:0x%Lx\n", + le32_to_cpu(j_sb->s_flag), le32_to_cpu(j_sb->s_state), + (unsigned long long) le64_to_cpu(j_sb->s_size))); + + /* validate the descriptors for Secondary AIM and AIT */ + if ((j_sb->s_flag & cpu_to_le32(JFS_BAD_SAIT)) != + cpu_to_le32(JFS_BAD_SAIT)) { + expected_AIM_bytesize = 2 * PSIZE; + AIM_bytesize = lengthPXD(&(j_sb->s_aim2)) * bsize; + expected_AIT_bytesize = 4 * PSIZE; + AIT_bytesize = lengthPXD(&(j_sb->s_ait2)) * bsize; + AIM_byte_addr = addressPXD(&(j_sb->s_aim2)) * bsize; + AIT_byte_addr = addressPXD(&(j_sb->s_ait2)) * bsize; + byte_addr_diff0 = AIT_byte_addr - AIM_byte_addr; + fsckwsp_addr = addressPXD(&(j_sb->s_fsckpxd)) * bsize; + byte_addr_diff1 = fsckwsp_addr - AIT_byte_addr; + if ((AIM_bytesize != expected_AIM_bytesize) || + (AIT_bytesize != expected_AIT_bytesize) || + (byte_addr_diff0 != AIM_bytesize) || + (byte_addr_diff1 <= AIT_bytesize)) + j_sb->s_flag |= cpu_to_le32(JFS_BAD_SAIT); + } + + /* in release 1, the flag MUST reflect inline log, and group commit */ + if ((j_sb->s_flag & cpu_to_le32(JFS_INLINELOG)) != + cpu_to_le32(JFS_INLINELOG)) + j_sb->s_flag |= cpu_to_le32(JFS_INLINELOG); + if ((j_sb->s_flag & cpu_to_le32(JFS_GROUPCOMMIT)) != + cpu_to_le32(JFS_GROUPCOMMIT)) + j_sb->s_flag |= cpu_to_le32(JFS_GROUPCOMMIT); + jFYI(0, ("superblock: flag:0x%08x state:0x%08x size:0x%Lx\n", + le32_to_cpu(j_sb->s_flag), le32_to_cpu(j_sb->s_state), + (unsigned long long) le64_to_cpu(j_sb->s_size))); + + /* validate fs state */ + if (j_sb->s_state != cpu_to_le32(FM_CLEAN) && + !(sb->s_flags & MS_RDONLY)) { + jERROR(1, + ("jfs_mount: Mount Failure: File System Dirty.\n")); + rc = EINVAL; + goto out; + } + + sbi->state = le32_to_cpu(j_sb->s_state); + sbi->mntflag = le32_to_cpu(j_sb->s_flag); + + /* + * JFS always does I/O by 4K pages. Don't tell the buffer cache + * that we use anything else (leave s_blocksize alone). + */ + sbi->bsize = bsize; + sbi->l2bsize = le16_to_cpu(j_sb->s_l2bsize); + + /* + * For now, ignore s_pbsize, l2bfactor. All I/O going through buffer + * cache. + */ + sbi->nbperpage = PSIZE >> sbi->l2bsize; + sbi->l2nbperpage = L2PSIZE - sbi->l2bsize; + sbi->l2niperblk = sbi->l2bsize - L2DISIZE; + if (sbi->mntflag & JFS_INLINELOG) + sbi->logpxd = j_sb->s_logpxd; + sbi->ait2 = j_sb->s_ait2; + + out: + release_metapage(mp); + + return rc; +} + + +/* + * updateSuper() + * + * update synchronously superblock if it is mounted read-write. + */ +int updateSuper(struct super_block *sb, uint state) +{ + int rc; + metapage_t *mp; + struct jfs_superblock *j_sb; + + /* + * Only fsck can fix dirty state + */ + if (JFS_SBI(sb)->state == FM_DIRTY) + return 0; + + if ((rc = readSuper(sb, &mp))) + return rc; + + j_sb = (struct jfs_superblock *) (mp->data); + + j_sb->s_state = cpu_to_le32(state); + JFS_SBI(sb)->state = state; + + if (state == FM_MOUNT) { + /* record log's dev_t and mount serial number */ + j_sb->s_logdev = cpu_to_le32(JFS_SBI(sb)->log->dev); + j_sb->s_logserial = cpu_to_le32(JFS_SBI(sb)->log->serial); + } else if (state == FM_CLEAN) { + /* + * If this volume is shared with OS/2, OS/2 will need to + * recalculate DASD usage, since we don't deal with it. + */ + if (j_sb->s_flag & cpu_to_le32(JFS_DASD_ENABLED)) + j_sb->s_flag |= cpu_to_le32(JFS_DASD_PRIME); + } + + write_metapage(mp); + + return 0; +} + + +/* + * readSuper() + * + * read superblock by raw sector address + */ +int readSuper(struct super_block *sb, metapage_t ** mpp) +{ + /* read in primary superblock */ + *mpp = read_metapage(JFS_SBI(sb)->direct_inode, + SUPER1_OFF >> sb->s_blocksize_bits, PSIZE, 1); + if (*mpp == NULL) { + /* read in secondary/replicated superblock */ + *mpp = read_metapage(JFS_SBI(sb)->direct_inode, + SUPER2_OFF >> sb->s_blocksize_bits, + PSIZE, 1); + } + return *mpp ? 0 : 1; +} + + +/* + * logMOUNT() + * + * function: write a MOUNT log record for file system. + * + * MOUNT record keeps logredo() from processing log records + * for this file system past this point in log. + * it is harmless if mount fails. + * + * note: MOUNT record is at aggregate level, not at fileset level, + * since log records of previous mounts of a fileset + * (e.g., AFTER record of extent allocation) have to be processed + * to update block allocation map at aggregate level. + */ +static int logMOUNT(struct super_block *sb) +{ + log_t *log = JFS_SBI(sb)->log; + lrd_t lrd; + + lrd.logtid = 0; + lrd.backchain = 0; + lrd.type = cpu_to_le16(LOG_MOUNT); + lrd.length = 0; + lrd.aggregate = cpu_to_le32(sb->s_dev); + lmLog(log, NULL, &lrd, NULL); + + return 0; +} diff -uNr --exclude=CVS ../kernel.org/linux/fs/jfs/jfs_txnmgr.c linuxppc64_2_4/fs/jfs/jfs_txnmgr.c --- ../kernel.org/linux/fs/jfs/jfs_txnmgr.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/fs/jfs/jfs_txnmgr.c Wed Nov 14 10:19:36 2001 @@ -0,0 +1,3110 @@ +/* + * + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* + * jfs_txnmgr.c: transaction manager + * + * notes: + * transaction starts with txBegin() and ends with txCommit() + * or txAbort(). + * + * tlock is acquired at the time of update; + * (obviate scan at commit time for xtree and dtree) + * tlock and mp points to each other; + * (no hashlist for mp -> tlock). + * + * special cases: + * tlock on in-memory inode: + * in-place tlock in the in-memory inode itself; + * converted to page lock by iWrite() at commit time. + * + * tlock during write()/mmap() under anonymous transaction (tid = 0): + * transferred (?) to transaction at commit time. + * + * use the page itself to update allocation maps + * (obviate intermediate replication of allocation/deallocation data) + * hold on to mp+lock thru update of maps + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * transaction management structures + */ +static struct { + /* tblock */ + int freetid; /* 4: index of a free tid structure */ + wait_queue_head_t freewait; /* 4: eventlist of free tblock */ + + /* tlock */ + int freelock; /* 4: index first free lock word */ + wait_queue_head_t freelockwait; /* 4: eventlist of free tlock */ + wait_queue_head_t lowlockwait; /* 4: eventlist of ample tlocks */ + int tlocksInUse; /* 4: Number of tlocks in use */ + spinlock_t LazyLock; /* 4: synchronize sync_queue & unlock_queue */ +/* tblock_t *sync_queue; * 4: Transactions waiting for data sync */ + tblock_t *unlock_queue; /* 4: Transactions waiting to be released */ + tblock_t *unlock_tail; /* 4: Tail of unlock_queue */ + int lazyQsize; /* 4: # of tblocks in unlock queue */ + int lazyQwait; /* 4: Make new transactions wait until unlock */ + struct inode *anon_list; /* 4: inodes having anonymous txns */ + struct inode *anon_list2; /* 4: inodes having anonymous txns + that couldn't be sync'ed */ +} TxAnchor; + +static int nTxBlock = 512; /* number of transaction blocks */ +struct tblock *TxBlock; /* transaction block table */ + +static int nTxLock = 2048; /* number of transaction locks */ +static int TxLockLWM = 2048*.4; /* Low water mark for number of txLocks used */ +static int TxLockHWM = 2048*.8; /* High water mark for number of txLocks used */ +struct tlock *TxLock; /* transaction lock table */ +static int TlocksLow = 0; /* Indicates low number of available tlocks */ +static int LazyHWM = 64; /* Slow down when we have this may outstanding + * transactions in unlock queue */ +static int LazyLWM = 8; /* Get going again when we're down to this many + * in unlock queue */ + + +/* + * transaction management lock + */ +static spinlock_t jfsTxnLock; + +#define TXN_LOCK_INIT() spin_lock_init(&jfsTxnLock) +#define TXN_LOCK() spin_lock(&jfsTxnLock) +#define TXN_UNLOCK() spin_unlock(&jfsTxnLock) + +#define LAZY_LOCK_INIT() spin_lock_init(&TxAnchor.LazyLock); +#define LAZY_LOCK(flags) spin_lock_irqsave(&TxAnchor.LazyLock, flags) +#define LAZY_UNLOCK(flags) spin_unlock_irqrestore(&TxAnchor.LazyLock, flags) + +/* + * Retry logic exist outside these macros to protect from spurrious wakeups. + */ +static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event) +{ + DECLARE_WAITQUEUE(wait, current); + + add_wait_queue(event, &wait); + set_current_state(TASK_UNINTERRUPTIBLE); + TXN_UNLOCK(); + schedule(); + current->state = TASK_RUNNING; + remove_wait_queue(event, &wait); +} + +#define TXN_SLEEP(event)\ +{\ + TXN_SLEEP_DROP_LOCK(event);\ + TXN_LOCK();\ +} + +#define TXN_WAKEUP(event) wake_up_all(event) +/* + * Get a transaction lock from the free list. If the number in use is + * greater than the high water mark, wake up the sync daemon. This should + * free some anonymous transaction locks. (TXN_LOCK must be held.) + */ + +#define TXLOCK_ALLOC(LID)\ +{\ + while ((LID = TxAnchor.freelock) == 0)\ + {\ + TXN_SLEEP(&TxAnchor.freelockwait);\ + }\ + TxAnchor.freelock = TxLock[LID].next;\ + HIGHWATERMARK(stattx.maxlid,LID);\ + if ((++TxAnchor.tlocksInUse > TxLockHWM) && (TlocksLow == 0))\ + {\ + jEVENT(0,("TXLOCK_ALLOC TlocksLow\n"));\ + TlocksLow = 1;\ + wake_up_process(jfsSyncTask);\ + }\ +} + +#define TXLOCK_FREE(LID)\ +{\ + TxLock[LID].next = TxAnchor.freelock;\ + TxAnchor.freelock = LID;\ + TxAnchor.tlocksInUse--;\ + if (TlocksLow && (TxAnchor.tlocksInUse < TxLockLWM))\ + {\ + jEVENT(0,("TXLOCK_FREE TlocksLow no more\n"));\ + TlocksLow = 0;\ + TXN_WAKEUP(&TxAnchor.lowlockwait);\ + }\ + TXN_WAKEUP(&TxAnchor.freelockwait);\ +} + +/* + * statistics + */ +struct { + int maxtid; /* 4: biggest tid ever used */ + int maxlid; /* 4: biggest lid ever used */ + int ntid; /* 4: # of transactions performed */ + int nlid; /* 4: # of tlocks acquired */ + int waitlock; /* 4: # of tlock wait */ +} stattx; + + +/* + * external references + */ +extern int lmGroupCommit(log_t * log, tblock_t * tblk); +extern void lmSync(log_t *); +extern int readSuper(struct super_block *sb, metapage_t ** bpp); +extern int jfs_commit_inode(struct inode *, int); + +extern struct task_struct *jfsCommitTask; +extern struct semaphore jfsIOsem; +extern struct task_struct *jfsSyncTask; + +/* + * forward references + */ +int diLog(log_t * log, tblock_t * tblk, lrd_t * lrd, tlock_t * tlck, + commit_t * cd); +int dataLog(log_t * log, tblock_t * tblk, lrd_t * lrd, tlock_t * tlck); +void dtLog(log_t * log, tblock_t * tblk, lrd_t * lrd, tlock_t * tlck); +void inlineLog(log_t * log, tblock_t * tblk, lrd_t * lrd, tlock_t * tlck); +void mapLog(log_t * log, tblock_t * tblk, lrd_t * lrd, tlock_t * tlck); +void txAbortCommit(commit_t * cd, int exval); +static void txAllocPMap(struct inode *ip, maplock_t * maplock, + tblock_t * tblk); +void txForce(tblock_t * tblk); +static int txLog(log_t * log, tblock_t * tblk, commit_t * cd); +int txMoreLock(void); +static void txUpdateMap(tblock_t * tblk); +static void txRelease(tblock_t * tblk); +void xtLog(log_t * log, tblock_t * tblk, lrd_t * lrd, tlock_t * tlck); +static void LogSyncRelease(metapage_t * mp); + +/* + * transaction block/lock management + * --------------------------------- + */ + +/* + * NAME: txInit() + * + * FUNCTION: initialize transaction management structures + * + * RETURN: + * + * serialization: single thread at jfs_init() + */ +int txInit(void) +{ + int k, size; + + /* + * initialize the transaction management lock + */ + TXN_LOCK_INIT(); + + /* + * initialize transaction block (tblock) table + * + * transaction id (tid) = tblock index + * tid = 0 is reserved. + */ + size = sizeof(tblock_t) * nTxBlock; + TxBlock = (tblock_t *) vmalloc(size); + if (TxBlock == NULL) + return ENOMEM; + + for (k = 1; k < nTxBlock - 1; k++) { + TxBlock[k].next = k + 1; + init_waitqueue_head(&TxBlock[k].gcwait); + init_waitqueue_head(&TxBlock[k].waitor); + } + TxBlock[k].next = 0; + init_waitqueue_head(&TxBlock[k].gcwait); + init_waitqueue_head(&TxBlock[k].waitor); + + TxAnchor.freetid = 1; + init_waitqueue_head(&TxAnchor.freewait); + + stattx.maxtid = 1; /* statistics */ + + /* + * initialize transaction lock (tlock) table + * + * transaction lock id = tlock index + * tlock id = 0 is reserved. + */ + size = sizeof(tlock_t) * nTxLock; + TxLock = (tlock_t *) vmalloc(size); + if (TxLock == NULL) { + vfree(TxBlock); + return ENOMEM; + } + + /* initialize tlock table */ + for (k = 1; k < nTxLock - 1; k++) + TxLock[k].next = k + 1; + TxLock[k].next = 0; + init_waitqueue_head(&TxAnchor.freelockwait); + init_waitqueue_head(&TxAnchor.lowlockwait); + + TxAnchor.freelock = 1; + TxAnchor.tlocksInUse = 0; + TxAnchor.anon_list = 0; + TxAnchor.anon_list2 = 0; + + stattx.maxlid = 1; /* statistics */ + + return 0; +} + +/* + * NAME: txExit() + * + * FUNCTION: clean up when module is unloaded + */ +void __exit txExit(void) +{ + vfree(TxLock); + TxLock = 0; + vfree(TxBlock); + TxBlock = 0; +} + + +/* + * NAME: txBegin() + * + * FUNCTION: start a transaction. + * + * PARAMETER: sb - superblock + * tid - transaction id (out) + * flag - force for nested tx; + * + * RETURN: + * + * note: flag force allows to start tx for nested tx + * to prevent deadlock on logsync barrier; + */ +void txBegin(struct super_block *sb, int *tid, int flag) +{ + int t; + tblock_t *tblk; + log_t *log; + + jFYI(1, ("txBegin: flag = 0x%x\n", flag)); + log = (log_t *) JFS_SBI(sb)->log; + + TXN_LOCK(); + + retry: + if (flag != COMMIT_FORCE) { + /* + * synchronize with logsync barrier + */ + if (log->syncbarrier) { + TXN_SLEEP(&log->syncwait); + goto retry; + } + if (TxAnchor.lazyQwait) { + TXN_SLEEP(&TxAnchor.freewait); + goto retry; + } + } + if (flag == 0) { + /* + * Don't begin transaction if we're getting starved for tlocks + * unless COMMIT_FORCE (imap changes) or COMMIT_INODE (which + * may ultimately free tlocks) + */ + if (TlocksLow) { + TXN_SLEEP(&TxAnchor.lowlockwait); + goto retry; + } + } + + /* + * allocate transaction id/block + */ + if ((t = TxAnchor.freetid) == 0) { + jFYI(1, ("txBegin: waiting for free tid\n")); + TXN_SLEEP(&TxAnchor.freewait); + goto retry; + } + + tblk = &TxBlock[t]; + TxAnchor.freetid = tblk->next; + + /* + * initialize transaction + */ + + /* + * We can't zero the whole thing or we screw up another thread being + * awakened after sleeping on tblk->waitor + * + * memset(tblk, 0, sizeof(tblock_t)); + */ + tblk->next = tblk->xflag = tblk->flag = tblk->lsn = 0; + + tblk->sb = sb; + ++log->logtid; + tblk->logtid = log->logtid; + + ++log->active; + + *tid = t; + + HIGHWATERMARK(stattx.maxtid, t); /* statistics */ + INCREMENT(stattx.ntid); /* statistics */ + + TXN_UNLOCK(); + + jFYI(1, ("txBegin: returning tid = %d\n", t)); + +} + + +/* + * NAME: txBeginAnon() + * + * FUNCTION: start an anonymous transaction. + * Blocks if logsync or available tlocks are low to prevent + * anonymous tlocks from depleting supply. + * + * PARAMETER: sb - superblock + * + * RETURN: none + */ +void txBeginAnon(struct super_block *sb) +{ + log_t *log; + + log = (log_t *) JFS_SBI(sb)->log; + + TXN_LOCK(); + + retry: + /* + * synchronize with logsync barrier + */ + if (log->syncbarrier) { + TXN_SLEEP(&log->syncwait); + goto retry; + } + + /* + * Don't begin transaction if we're getting starved for tlocks + */ + if (TlocksLow) { + TXN_SLEEP(&TxAnchor.lowlockwait); + goto retry; + } + TXN_UNLOCK(); +} + + +/* + * txEnd() + * + * function: free specified transaction block. + * + * logsync barrier processing: + * + * serialization: + */ +void txEnd(int tid) +{ + tblock_t *tblk = &TxBlock[tid]; + log_t *log; + + jFYI(1, ("txEnd: tid = %d\n", tid)); + TXN_LOCK(); + + /* + * wakeup transactions waiting on the page locked + * by the current transaction + */ + TXN_WAKEUP(&tblk->waitor); + + log = (log_t *) JFS_SBI(tblk->sb)->log; + + /* + * Lazy commit thread can't free this guy until we mark it UNLOCKED, + * otherwise, we would be left with a transaction that may have been + * reused. + * + * Lazy commit thread will turn off tblkGC_LAZY before calling this + * routine. + */ + if (tblk->flag & tblkGC_LAZY) { + jFYI(1, + ("txEnd called w/lazy tid: %d, tblk = 0x%p\n", + tid, tblk)); + TXN_UNLOCK(); + + spin_lock_irq(&log->gclock); // LOGGC_LOCK + tblk->flag |= tblkGC_UNLOCKED; + spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK + return; + } + + jFYI(1, ("txEnd: tid: %d, tblk = 0x%p\n", tid, tblk)); + + assert(tblk->next == 0); + + /* + * insert tblock back on freelist + */ + tblk->next = TxAnchor.freetid; + TxAnchor.freetid = tid; + + /* + * mark the tblock not active + */ + --log->active; + + /* + * synchronize with logsync barrier + */ + if (log->syncbarrier && log->active == 0) { + /* forward log syncpt */ + /* lmSync(log); */ + + jFYI(1, (" log barrier off: 0x%x\n", log->lsn)); + + /* enable new transactions start */ + log->syncbarrier = 0; + + /* wakeup all waitors for logsync barrier */ + TXN_WAKEUP(&log->syncwait); + } + + /* + * wakeup all waitors for a free tblock + */ + TXN_WAKEUP(&TxAnchor.freewait); + + TXN_UNLOCK(); + jFYI(1, ("txEnd: exitting\n")); +} + + +/* + * txLock() + * + * function: acquire a transaction lock on the specified + * + * parameter: + * + * return: transaction lock id + * + * serialization: + */ +tlock_t *txLock(int tid, struct inode *ip, metapage_t * mp, int type) +{ + struct jfs_inode_info *jfs_ip = JFS_IP(ip); + int dir_xtree = 0; + int lid, xtid; + tlock_t *tlck; + xtlock_t *xtlck; + linelock_t *linelock; + xtpage_t *p; + + TXN_LOCK(); + + if (S_ISDIR(ip->i_mode) && (type & tlckXTREE) && + !(mp->xflag & COMMIT_PAGE)) { + /* + * Directory inode is special. It can have both an xtree tlock + * and a dtree tlock associated with it. + */ + dir_xtree = 1; + lid = jfs_ip->xtlid; + } else + lid = mp->lid; + + /* is page not locked by a transaction ? */ + if (lid == 0) + goto allocateLock; + + jFYI(1, ("txLock: tid:%d ip:0x%p mp:0x%p lid:%d\n", + tid, ip, mp, lid)); + + /* is page locked by the requester transaction ? */ + tlck = &TxLock[lid]; + if ((xtid = tlck->tid) == tid) + goto grantLock; + + /* + * is page locked by anonymous transaction/lock ? + * + * (page update without transaction (i.e., file write) is + * locked under anonymous transaction tid = 0: + * anonymous tlocks maintained on anonymous tlock list of + * the inode of the page and available to all anonymous + * transactions until txCommit() time at which point + * they are transferred to the transaction tlock list of + * the commiting transaction of the inode) + */ + if (xtid == 0) { + tlck->tid = tid; + goto grantLock; + } + + goto waitLock; + + /* + * allocate a tlock + */ + allocateLock: + TXLOCK_ALLOC(lid); + tlck = &TxLock[lid]; + + /* + * initialize tlock + */ + tlck->tid = tid; + + /* mark tlock for meta-data page */ + if (mp->xflag & COMMIT_PAGE) { + + tlck->flag = tlckPAGELOCK; + + /* mark the page dirty and nohomeok */ + mark_metapage_dirty(mp); + atomic_inc(&mp->nohomeok); + + jFYI(1, + ("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p\n", + mp, atomic_read(&mp->nohomeok), tid, tlck)); + + /* if anonymous transaction, and buffer is on the group + * commit synclist, mark inode to show this. This will + * prevent the buffer from being marked nohomeok for too + * long a time. + */ + if ((tid == 0) && mp->lsn) + set_cflag(COMMIT_Synclist, ip); + } + /* mark tlock for in-memory inode */ + else + tlck->flag = tlckINODELOCK; + + tlck->type = 0; + + /* bind the tlock and the page */ + tlck->ip = ip; + tlck->mp = mp; + if (dir_xtree) + jfs_ip->xtlid = lid; + else + mp->lid = lid; + + /* + * enqueue transaction lock to transaction/inode + */ + /* insert the tlock at head of transaction tlock list */ + if (tid) { + tlck->next = TxBlock[tid].next; + TxBlock[tid].next = lid; + } + /* anonymous transaction: + * insert the tlock at head of inode anonymous tlock list + */ + else { + tlck->next = jfs_ip->atlhead; + jfs_ip->atlhead = lid; + if (tlck->next == 0) { + /* This inode's first anonymous transaction */ + jfs_ip->atltail = lid; + jfs_ip->atlnext = TxAnchor.anon_list; + jfs_ip->atlprev = 0; + TxAnchor.anon_list = ip; + if (jfs_ip->atlnext) + JFS_IP(jfs_ip->atlnext)->atlprev = ip; + } + } + + /* initialize type dependent area for linelock */ + linelock = (linelock_t *) & tlck->lock; + linelock->next = 0; + linelock->flag = tlckLINELOCK; + linelock->maxcnt = TLOCKSHORT; + linelock->index = 0; + + switch (type & tlckTYPE) { + case tlckDTREE: + linelock->l2linesize = L2DTSLOTSIZE; + break; + + case tlckXTREE: + linelock->l2linesize = L2XTSLOTSIZE; + + xtlck = (xtlock_t *) linelock; + xtlck->header.offset = 0; + xtlck->header.length = 2; + + if (type & tlckNEW) { + xtlck->lwm.offset = XTENTRYSTART; + } else { + if (mp->xflag & COMMIT_PAGE) + p = (xtpage_t *) mp->data; + else + p = &jfs_ip->i_xtroot; + xtlck->lwm.offset = + le16_to_cpu(p->header.nextindex); + } + xtlck->lwm.length = 0; /* ! */ + + xtlck->index = 2; + break; + + case tlckINODE: + linelock->l2linesize = L2INODESLOTSIZE; + break; + + case tlckDATA: + linelock->l2linesize = L2DATASLOTSIZE; + break; + + default: + jERROR(1, ("UFO tlock:0x%p\n", tlck)); + } + + /* + * update tlock vector + */ + grantLock: + tlck->type |= type; + + TXN_UNLOCK(); + + return tlck; + + /* + * page is being locked by another transaction: + */ + waitLock: + /* Only locks on ipimap or ipaimap should reach here */ + /* assert(jfs_ip->fileset == AGGREGATE_I); */ + if (jfs_ip->fileset != AGGREGATE_I) { + jERROR(1, ("txLock: trying to lock locked page!\n")); + dump_mem("ip", ip, sizeof(struct inode)); + dump_mem("mp", mp, sizeof(metapage_t)); + dump_mem("Locker's tblk", &TxBlock[xtid], + sizeof(tblock_t)); + dump_mem("Tlock", tlck, sizeof(tlock_t)); + BUG(); + } + INCREMENT(stattx.waitlock); /* statistics */ + release_metapage(mp); + + TxBlock[tid].locker = xtid; + jEVENT(0, ("txLock: in waitLock, tid = %d, xtid = %d, lid = %d\n", + tid, xtid, lid)); + TXN_SLEEP_DROP_LOCK(&TxBlock[xtid].waitor); + jEVENT(0, ("txLock: awakened tid = %d, lid = %d\n", tid, lid)) + + return NULL; +} + + +/* + * NAME: txRelease() + * + * FUNCTION: Release buffers associated with transaction locks, but don't + * mark homeok yet. The allows other transactions to modify + * buffers, but won't let them go to disk until commit record + * actually gets written. + * + * PARAMETER: + * tblk - + * + * RETURN: Errors from subroutines. + */ +static void txRelease(tblock_t * tblk) +{ + metapage_t *mp; + int lid; + tlock_t *tlck; + + TXN_LOCK(); + + for (lid = tblk->next; lid > 0; lid = tlck->next) { + tlck = &TxLock[lid]; + if ((mp = tlck->mp) != NULL && + (tlck->type & tlckBTROOT) == 0) { + assert(mp->xflag & COMMIT_PAGE); + mp->lid = 0; + } + } + + /* + * wakeup transactions waiting on a page locked + * by the current transaction + */ + TXN_WAKEUP(&tblk->waitor); + + TXN_UNLOCK(); +} + + +/* + * NAME: txUnlock() + * + * FUNCTION: Initiates pageout of pages modified by tid in journalled + * objects and frees their lockwords. + * + * PARAMETER: + * flag - + * + * RETURN: Errors from subroutines. + */ +static void txUnlock(tblock_t * tblk, int flag) +{ + tlock_t *tlck; + linelock_t *linelock; + int lid, next, llid, k; + metapage_t *mp; + log_t *log; + int force; + int difft, diffp; + + jFYI(1, ("txUnlock: tblk = 0x%p\n", tblk)); + log = (log_t *) JFS_SBI(tblk->sb)->log; + force = flag & COMMIT_FLUSH; + if (log->syncbarrier) + force |= COMMIT_FORCE; + + /* + * mark page under tlock homeok (its log has been written): + * if caller has specified FORCE (e.g., iRecycle()), or + * if syncwait for the log is set (i.e., the log sync point + * has fallen behind), or + * if syncpt is set for the page, or + * if the page is new, initiate pageout; + * otherwise, leave the page in memory. + */ + for (lid = tblk->next; lid > 0; lid = next) { + tlck = &TxLock[lid]; + next = tlck->next; + + jFYI(1, ("unlocking lid = %d, tlck = 0x%p\n", lid, tlck)); + + /* unbind page from tlock */ + if ((mp = tlck->mp) != NULL && + (tlck->type & tlckBTROOT) == 0) { + assert(mp->xflag & COMMIT_PAGE); + + /* hold buffer + * + * It's possible that someone else has the metapage. + * The only things were changing are nohomeok, which + * is handled atomically, and clsn which is protected + * by the LOGSYNC_LOCK. + */ + hold_metapage(mp, 1); + + assert(atomic_read(&mp->nohomeok) > 0); + atomic_dec(&mp->nohomeok); + + /* inherit younger/larger clsn */ + LOGSYNC_LOCK(log); + if (mp->clsn) { + logdiff(difft, tblk->clsn, log); + logdiff(diffp, mp->clsn, log); + if (difft > diffp) + mp->clsn = tblk->clsn; + } else + mp->clsn = tblk->clsn; + LOGSYNC_UNLOCK(log); + + assert(!(tlck->flag & tlckFREEPAGE)); + + if (tlck->flag & tlckWRITEPAGE) { + write_metapage(mp); + } else { + /* release page which has been forced */ + release_metapage(mp); + } + } + + /* insert tlock, and linelock(s) of the tlock if any, + * at head of freelist + */ + TXN_LOCK(); + + llid = ((linelock_t *) & tlck->lock)->next; + while (llid) { + linelock = (linelock_t *) & TxLock[llid]; + k = linelock->next; + TXLOCK_FREE(llid); + llid = k; + } + TXLOCK_FREE(lid); + + TXN_UNLOCK(); + } + tblk->next = 0; + + /* + * remove tblock from logsynclist + * (allocation map pages inherited lsn of tblk and + * has been inserted in logsync list at txUpdateMap()) + */ + if (tblk->lsn) { + LOGSYNC_LOCK(log); + log->count--; + list_del(&tblk->synclist); + LOGSYNC_UNLOCK(log); + } +} + + +/* + * txMaplock() + * + * function: allocate a transaction lock for freed page/entry; + * for freed page, maplock is used as xtlock/dtlock type; + */ +tlock_t *txMaplock(int tid, struct inode *ip, int type) +{ + struct jfs_inode_info *jfs_ip = JFS_IP(ip); + int lid; + tlock_t *tlck; + maplock_t *maplock; + + TXN_LOCK(); + + /* + * allocate a tlock + */ + TXLOCK_ALLOC(lid); + tlck = &TxLock[lid]; + + /* + * initialize tlock + */ + tlck->tid = tid; + + /* bind the tlock and the object */ + tlck->flag = tlckINODELOCK; + tlck->ip = ip; + tlck->mp = NULL; + + tlck->type = type; + + /* + * enqueue transaction lock to transaction/inode + */ + /* insert the tlock at head of transaction tlock list */ + if (tid) { + tlck->next = TxBlock[tid].next; + TxBlock[tid].next = lid; + } + /* anonymous transaction: + * insert the tlock at head of inode anonymous tlock list + */ + else { + tlck->next = jfs_ip->atlhead; + jfs_ip->atlhead = lid; + if (tlck->next == 0) { + /* This inode's first anonymous transaction */ + jfs_ip->atltail = lid; + jfs_ip->atlnext = TxAnchor.anon_list; + jfs_ip->atlprev = 0; + TxAnchor.anon_list = ip; + if (jfs_ip->atlnext) + JFS_IP(jfs_ip->atlnext)->atlprev = ip; + } + } + + TXN_UNLOCK(); + + /* initialize type dependent area for maplock */ + maplock = (maplock_t *) & tlck->lock; + maplock->next = 0; + maplock->maxcnt = 0; + maplock->index = 0; + + return tlck; +} + + +/* + * txLinelock() + * + * function: allocate a transaction lock for log vector list + */ +linelock_t *txLinelock(linelock_t * tlock) +{ + int lid; + tlock_t *tlck; + linelock_t *linelock; + + TXN_LOCK(); + + /* allocate a TxLock structure */ + TXLOCK_ALLOC(lid); + tlck = &TxLock[lid]; + + TXN_UNLOCK(); + + /* initialize linelock */ + linelock = (linelock_t *) tlck; + linelock->next = 0; + linelock->flag = tlckLINELOCK; + linelock->maxcnt = TLOCKLONG; + linelock->index = 0; + + /* append linelock after tlock */ + linelock->next = tlock->next; + tlock->next = lid; + + return linelock; +} + + + +/* + * transaction commit management + * ----------------------------- + */ + +/* + * NAME: txCommit() + * + * FUNCTION: commit the changes to the objects specified in + * clist. For journalled segments only the + * changes of the caller are committed, ie by tid. + * for non-journalled segments the data are flushed to + * disk and then the change to the disk inode and indirect + * blocks committed (so blocks newly allocated to the + * segment will be made a part of the segment atomically). + * + * all of the segments specified in clist must be in + * one file system. no more than 6 segments are needed + * to handle all unix svcs. + * + * if the i_nlink field (i.e. disk inode link count) + * is zero, and the type of inode is a regular file or + * directory, or symbolic link , the inode is truncated + * to zero length. the truncation is committed but the + * VM resources are unaffected until it is closed (see + * iput and iclose). + * + * PARAMETER: + * + * RETURN: + * + * serialization: + * on entry the inode lock on each segment is assumed + * to be held. + * + * i/o error: + */ +int txCommit(int tid, /* transaction identifier */ + int nip, /* number of inodes to commit */ + struct inode **iplist, /* list of inode to commit */ + int flag) +{ + int rc = 0, rc1 = 0; + commit_t cd; + log_t *log; + tblock_t *tblk; + lrd_t *lrd; + int lsn; + struct inode *ip; + struct jfs_inode_info *jfs_ip; + int k, n; + ino_t top; + struct super_block *sb; + unsigned long flags; + + jFYI(1, ("txCommit, tid = %d, flag = %d\n", tid, flag)); + /* is read-only file system ? */ + if (isReadOnly(iplist[0])) { + rc = EROFS; + goto TheEnd; + } + + sb = cd.sb = iplist[0]->i_sb; + + if (tid == 0) + txBegin(sb, &tid, 0); + tblk = &TxBlock[tid]; + + /* + * initialize commit structure + */ + log = (log_t *) JFS_SBI(sb)->log; + cd.log = log; + + /* initialize log record descriptor in commit */ + lrd = &cd.lrd; + lrd->logtid = cpu_to_le32(tblk->logtid); + lrd->backchain = 0; + + tblk->xflag |= flag; + + if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0) + tblk->xflag |= COMMIT_LAZY; + /* + * prepare non-journaled objects for commit + * + * flush data pages of non-journaled file + * to prevent the file getting non-initialized disk blocks + * in case of crash. + * (new blocks - ) + */ + cd.iplist = iplist; + cd.nip = nip; + + /* + * acquire transaction lock on (on-disk) inodes + * + * update on-disk inode from in-memory inode + * acquiring transaction locks for AFTER records + * on the on-disk inode of file object + * + * sort the inodes array by inode number in descending order + * to prevent deadlock when acquiring transaction lock + * of on-disk inodes on multiple on-disk inode pages by + * multiple concurrent transactions + */ + for (k = 0; k < cd.nip; k++) { + top = (cd.iplist[k])->i_ino; + for (n = k + 1; n < cd.nip; n++) { + ip = cd.iplist[n]; + if (ip->i_ino > top) { + top = ip->i_ino; + cd.iplist[n] = cd.iplist[k]; + cd.iplist[k] = ip; + } + } + + ip = cd.iplist[k]; + jfs_ip = JFS_IP(ip); + + /* + * BUGBUG - Should we call filemap_fdatasync here instead + * of fsync_inode_data? + * If we do, we have a deadlock condition since we may end + * up recursively calling jfs_get_block with the IWRITELOCK + * held. We may be able to do away with IWRITELOCK while + * committing transactions and use i_sem instead. + */ + if ((!S_ISDIR(ip->i_mode)) + && (tblk->flag & COMMIT_DELETE) == 0) + fsync_inode_data_buffers(ip); + + /* + * Mark inode as not dirty. It will still be on the dirty + * inode list, but we'll know not to commit it again unless + * it gets marked dirty again + */ + clear_cflag(COMMIT_Dirty, ip); + + /* inherit anonymous tlock(s) of inode */ + if (jfs_ip->atlhead) { + TxLock[jfs_ip->atltail].next = tblk->next; + tblk->next = jfs_ip->atlhead; + jfs_ip->atlhead = jfs_ip->atltail = 0; + TXN_LOCK(); + if (jfs_ip->atlprev) + JFS_IP(jfs_ip->atlprev)->atlnext = + jfs_ip->atlnext; + else if (TxAnchor.anon_list == ip) + TxAnchor.anon_list = jfs_ip->atlnext; + else if (TxAnchor.anon_list2 == ip) + TxAnchor.anon_list2 = jfs_ip->atlnext; + else { + jERROR(1, + ("ip = 0x%p not found on anon_list\n", + ip)); + } + if (jfs_ip->atlnext) + JFS_IP(jfs_ip->atlnext)->atlprev = + jfs_ip->atlprev; + jfs_ip->atlnext = jfs_ip->atlprev = 0; + TXN_UNLOCK(); + } + + /* + * acquire transaction lock on on-disk inode page + * (become first tlock of the tblk's tlock list) + */ + if (((rc = diWrite(tid, ip)))) + goto out; + } + + /* + * write log records from transaction locks + * + * txUpdateMap() resets XAD_NEW in XAD. + */ + if ((rc = txLog(log, tblk, &cd))) + goto TheEnd; + + /* + * Ensure that inode isn't reused before + * lazy commit thread finishes processing + */ + if (tblk->xflag & (COMMIT_CREATE | COMMIT_DELETE)) + atomic_inc(&tblk->ip->i_count); + if (tblk->xflag & COMMIT_DELETE) { + ip = tblk->ip; + assert((ip->i_nlink == 0) && !test_cflag(COMMIT_Nolink, ip)); + set_cflag(COMMIT_Nolink, ip); + } + + /* + * write COMMIT log record + */ + lrd->type = cpu_to_le16(LOG_COMMIT); + lrd->length = 0; + lsn = lmLog(log, tblk, lrd, NULL); + + if (!(tblk->xflag & COMMIT_FORCE)) { + LAZY_LOCK(flags); + TxAnchor.lazyQsize++; + if ((TxAnchor.lazyQwait == 0) && + (TxAnchor.lazyQsize > LazyHWM)) { + jFYI(1, ("Lazy Commit Queue hit HWM\n")); + TxAnchor.lazyQwait = 1; + } + LAZY_UNLOCK(flags); + } + lmGroupCommit(log, tblk); + + /* + * - transaction is now committed - + */ + + /* + * force pages in careful update + * (imap addressing structure update) + */ + if (flag & COMMIT_FORCE) + txForce(tblk); + + /* + * update allocation map. + * + * update inode allocation map and inode: + * free pager lock on memory object of inode if any. + * update block allocation map. + * + * txUpdateMap() resets XAD_NEW in XAD. + */ + if (tblk->xflag & COMMIT_FORCE) + txUpdateMap(tblk); + + /* + * free transaction locks and pageout/free pages + */ + txRelease(tblk); + + if ((tblk->flag & tblkGC_LAZY) == 0) + txUnlock(tblk, flag); + + + /* + * reset in-memory object state + */ + for (k = 0; k < cd.nip; k++) { + ip = cd.iplist[k]; + jfs_ip = JFS_IP(ip); + + /* + * reset in-memory inode state + */ + jfs_ip->bxflag = 0; + jfs_ip->blid = 0; + } + + out: + if (rc != 0) + txAbortCommit(&cd, rc); + else + rc = rc1; + + TheEnd: + jFYI(1, ("txCommit: tid = %d, returning %d\n", tid, rc)); + return rc; +} + + +/* + * NAME: txLog() + * + * FUNCTION: Writes AFTER log records for all lines modified + * by tid for segments specified by inodes in comdata. + * Code assumes only WRITELOCKS are recorded in lockwords. + * + * PARAMETERS: + * + * RETURN : + */ +static int txLog(log_t * log, tblock_t * tblk, commit_t * cd) +{ + int rc = 0; + struct inode *ip; + int lid; + tlock_t *tlck; + lrd_t *lrd = &cd->lrd; + + /* + * write log record(s) for each tlock of transaction, + */ + for (lid = tblk->next; lid > 0; lid = tlck->next) { + tlck = &TxLock[lid]; + + tlck->flag |= tlckLOG; + + /* initialize lrd common */ + ip = tlck->ip; + lrd->aggregate = cpu_to_le32(ip->i_dev); + lrd->log.redopage.fileset = cpu_to_le32(JFS_IP(ip)->fileset); + lrd->log.redopage.inode = cpu_to_le32(ip->i_ino); + + if (tlck->mp) + hold_metapage(tlck->mp, 0); + + /* write log record of page from the tlock */ + switch (tlck->type & tlckTYPE) { + case tlckXTREE: + xtLog(log, tblk, lrd, tlck); + break; + + case tlckDTREE: + dtLog(log, tblk, lrd, tlck); + break; + + case tlckINODE: + diLog(log, tblk, lrd, tlck, cd); + break; + + case tlckMAP: + mapLog(log, tblk, lrd, tlck); + break; + + case tlckDATA: + dataLog(log, tblk, lrd, tlck); + break; + + default: + jERROR(1, ("UFO tlock:0x%p\n", tlck)); + } + if (tlck->mp) + release_metapage(tlck->mp); + } + + return rc; +} + + +/* + * diLog() + * + * function: log inode tlock and format maplock to update bmap; + */ +int diLog(log_t * log, + tblock_t * tblk, lrd_t * lrd, tlock_t * tlck, commit_t * cd) +{ + int rc = 0; + metapage_t *mp; + pxd_t *pxd; + pxdlock_t *pxdlock; + + mp = tlck->mp; + + /* initialize as REDOPAGE record format */ + lrd->log.redopage.type = cpu_to_le16(LOG_INODE); + lrd->log.redopage.l2linesize = cpu_to_le16(L2INODESLOTSIZE); + + pxd = &lrd->log.redopage.pxd; + + /* + * inode after image + */ + if (tlck->type & tlckENTRY) { + /* log after-image for logredo(): */ + lrd->type = cpu_to_le16(LOG_REDOPAGE); +// *pxd = mp->cm_pxd; + PXDaddress(pxd, mp->index); + PXDlength(pxd, + mp->logical_size >> tblk->sb->s_blocksize_bits); + lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); + + /* mark page as homeward bound */ + tlck->flag |= tlckWRITEPAGE; + } else if (tlck->type & tlckFREE) { + /* + * free inode extent + * + * (pages of the freed inode extent have been invalidated and + * a maplock for free of the extent has been formatted at + * txLock() time); + * + * the tlock had been acquired on the inode allocation map page + * (iag) that specifies the freed extent, even though the map + * page is not itself logged, to prevent pageout of the map + * page before the log; + */ + assert(tlck->type & tlckFREE); + + /* log LOG_NOREDOINOEXT of the freed inode extent for + * logredo() to start NoRedoPage filters, and to update + * imap and bmap for free of the extent; + */ + lrd->type = cpu_to_le16(LOG_NOREDOINOEXT); + /* + * For the LOG_NOREDOINOEXT record, we need + * to pass the IAG number and inode extent + * index (within that IAG) from which the + * the extent being released. These have been + * passed to us in the iplist[1] and iplist[2]. + */ + lrd->log.noredoinoext.iagnum = + cpu_to_le32((u32) (size_t) cd->iplist[1]); + lrd->log.noredoinoext.inoext_idx = + cpu_to_le32((u32) (size_t) cd->iplist[2]); + + pxdlock = (pxdlock_t *) & tlck->lock; + *pxd = pxdlock->pxd; + lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); + + /* update bmap */ + tlck->flag |= tlckUPDATEMAP; + + /* mark page as homeward bound */ + tlck->flag |= tlckWRITEPAGE; + } else { + jERROR(2, ("diLog: UFO type tlck:0x%p\n", tlck)); + } +#ifdef _JFS_WIP + /* + * alloc/free external EA extent + * + * a maplock for txUpdateMap() to update bPWMAP for alloc/free + * of the extent has been formatted at txLock() time; + */ + else { + assert(tlck->type & tlckEA); + + /* log LOG_UPDATEMAP for logredo() to update bmap for + * alloc of new (and free of old) external EA extent; + */ + lrd->type = cpu_to_le16(LOG_UPDATEMAP); + pxdlock = (pxdlock_t *) & tlck->lock; + nlock = pxdlock->index; + for (i = 0; i < nlock; i++, pxdlock++) { + if (pxdlock->flag & mlckALLOCPXD) + lrd->log.updatemap.type = + cpu_to_le16(LOG_ALLOCPXD); + else + lrd->log.updatemap.type = + cpu_to_le16(LOG_FREEPXD); + lrd->log.updatemap.nxd = cpu_to_le16(1); + lrd->log.updatemap.pxd = pxdlock->pxd; + lrd->backchain = + cpu_to_le32(lmLog(log, tblk, lrd, NULL)); + } + + /* update bmap */ + tlck->flag |= tlckUPDATEMAP; + } +#endif /* _JFS_WIP */ + + return rc; +} + + +/* + * dataLog() + * + * function: log data tlock + */ +int dataLog(log_t * log, tblock_t * tblk, lrd_t * lrd, tlock_t * tlck) +{ + metapage_t *mp; + pxd_t *pxd; + int rc; + s64 xaddr; + int xflag; + s32 xlen; + + mp = tlck->mp; + + /* initialize as REDOPAGE record format */ + lrd->log.redopage.type = cpu_to_le16(LOG_DATA); + lrd->log.redopage.l2linesize = cpu_to_le16(L2DATASLOTSIZE); + + pxd = &lrd->log.redopage.pxd; + + /* log after-image for logredo(): */ + lrd->type = cpu_to_le16(LOG_REDOPAGE); + + if (JFS_IP(tlck->ip)->next_index < MAX_INLINE_DIRTABLE_ENTRY) { + /* + * The table has been truncated, we've must have deleted + * the last entry, so don't bother logging this + */ + mp->lid = 0; + atomic_dec(&mp->nohomeok); + discard_metapage(mp); + tlck->mp = 0; + return 0; + } + + rc = xtLookup(tlck->ip, mp->index, 1, &xflag, &xaddr, &xlen, 1); + if (rc || (xlen == 0)) { + jERROR(1, ("dataLog: can't find physical address\n")); + return 0; + } + + PXDaddress(pxd, xaddr); + PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); + + lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); + + /* mark page as homeward bound */ + tlck->flag |= tlckWRITEPAGE; + + return 0; +} + + +/* + * dtLog() + * + * function: log dtree tlock and format maplock to update bmap; + */ +void dtLog(log_t * log, tblock_t * tblk, lrd_t * lrd, tlock_t * tlck) +{ + struct inode *ip; + metapage_t *mp; + pxdlock_t *pxdlock; + pxd_t *pxd; + + ip = tlck->ip; + mp = tlck->mp; + + /* initialize as REDOPAGE/NOREDOPAGE record format */ + lrd->log.redopage.type = cpu_to_le16(LOG_DTREE); + lrd->log.redopage.l2linesize = cpu_to_le16(L2DTSLOTSIZE); + + pxd = &lrd->log.redopage.pxd; + + if (tlck->type & tlckBTROOT) + lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); + + /* + * page extension via relocation: entry insertion; + * page extension in-place: entry insertion; + * new right page from page split, reinitialized in-line + * root from root page split: entry insertion; + */ + if (tlck->type & (tlckNEW | tlckEXTEND)) { + /* log after-image of the new page for logredo(): + * mark log (LOG_NEW) for logredo() to initialize + * freelist and update bmap for alloc of the new page; + */ + lrd->type = cpu_to_le16(LOG_REDOPAGE); + if (tlck->type & tlckEXTEND) + lrd->log.redopage.type |= cpu_to_le16(LOG_EXTEND); + else + lrd->log.redopage.type |= cpu_to_le16(LOG_NEW); +// *pxd = mp->cm_pxd; + PXDaddress(pxd, mp->index); + PXDlength(pxd, + mp->logical_size >> tblk->sb->s_blocksize_bits); + lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); + + /* format a maplock for txUpdateMap() to update bPMAP for + * alloc of the new page; + */ + if (tlck->type & tlckBTROOT) + return; + tlck->flag |= tlckUPDATEMAP; + pxdlock = (pxdlock_t *) & tlck->lock; + pxdlock->flag = mlckALLOCPXD; + pxdlock->pxd = *pxd; + + pxdlock->index = 1; + + /* mark page as homeward bound */ + tlck->flag |= tlckWRITEPAGE; + return; + } + + /* + * entry insertion/deletion, + * sibling page link update (old right page before split); + */ + if (tlck->type & (tlckENTRY | tlckRELINK)) { + /* log after-image for logredo(): */ + lrd->type = cpu_to_le16(LOG_REDOPAGE); + PXDaddress(pxd, mp->index); + PXDlength(pxd, + mp->logical_size >> tblk->sb->s_blocksize_bits); + lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); + + /* mark page as homeward bound */ + tlck->flag |= tlckWRITEPAGE; + return; + } + + /* + * page deletion: page has been invalidated + * page relocation: source extent + * + * a maplock for free of the page has been formatted + * at txLock() time); + */ + if (tlck->type & (tlckFREE | tlckRELOCATE)) { + /* log LOG_NOREDOPAGE of the deleted page for logredo() + * to start NoRedoPage filter and to update bmap for free + * of the deletd page + */ + lrd->type = cpu_to_le16(LOG_NOREDOPAGE); + pxdlock = (pxdlock_t *) & tlck->lock; + *pxd = pxdlock->pxd; + lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); + + /* a maplock for txUpdateMap() for free of the page + * has been formatted at txLock() time; + */ + tlck->flag |= tlckUPDATEMAP; + } + return; +} + + +/* + * xtLog() + * + * function: log xtree tlock and format maplock to update bmap; + */ +void xtLog(log_t * log, tblock_t * tblk, lrd_t * lrd, tlock_t * tlck) +{ + struct inode *ip; + metapage_t *mp; + xtpage_t *p; + xtlock_t *xtlck; + maplock_t *maplock; + xdlistlock_t *xadlock; + pxdlock_t *pxdlock; + pxd_t *pxd; + int next, lwm, hwm; + + ip = tlck->ip; + mp = tlck->mp; + + /* initialize as REDOPAGE/NOREDOPAGE record format */ + lrd->log.redopage.type = cpu_to_le16(LOG_XTREE); + lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE); + + pxd = &lrd->log.redopage.pxd; + + if (tlck->type & tlckBTROOT) { + lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); + p = &JFS_IP(ip)->i_xtroot; + if (S_ISDIR(ip->i_mode)) + lrd->log.redopage.type |= + cpu_to_le16(LOG_DIR_XTREE); + } else + p = (xtpage_t *) mp->data; + next = le16_to_cpu(p->header.nextindex); + + xtlck = (xtlock_t *) & tlck->lock; + + maplock = (maplock_t *) & tlck->lock; + xadlock = (xdlistlock_t *) maplock; + + /* + * entry insertion/extension; + * sibling page link update (old right page before split); + */ + if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) { + /* log after-image for logredo(): + * logredo() will update bmap for alloc of new/extended + * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from + * after-image of XADlist; + * logredo() resets (XAD_NEW|XAD_EXTEND) flag when + * applying the after-image to the meta-data page. + */ + lrd->type = cpu_to_le16(LOG_REDOPAGE); +// *pxd = mp->cm_pxd; + PXDaddress(pxd, mp->index); + PXDlength(pxd, + mp->logical_size >> tblk->sb->s_blocksize_bits); + lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); + + /* format a maplock for txUpdateMap() to update bPMAP + * for alloc of new/extended extents of XAD[lwm:next) + * from the page itself; + * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag. + */ + lwm = xtlck->lwm.offset; + if (lwm == 0) + lwm = XTPAGEMAXSLOT; + + if (lwm == next) + goto out; + assert(lwm < next); + tlck->flag |= tlckUPDATEMAP; + xadlock->flag = mlckALLOCXADLIST; + xadlock->count = next - lwm; + if ((xadlock->count <= 2) && (tblk->xflag & COMMIT_LAZY)) { + int i; + /* + * Lazy commit may allow xtree to be modified before + * txUpdateMap runs. Copy xad into linelock to + * preserve correct data. + */ + xadlock->xdlist = &xtlck->pxdlock; + memcpy(xadlock->xdlist, &p->xad[lwm], + sizeof(xad_t) * xadlock->count); + + for (i = 0; i < xadlock->count; i++) + p->xad[lwm + i].flag &= + ~(XAD_NEW | XAD_EXTENDED); + } else { + /* + * xdlist will point to into inode's xtree, ensure + * that transaction is not committed lazily. + */ + xadlock->xdlist = &p->xad[lwm]; + tblk->xflag &= ~COMMIT_LAZY; + } + jFYI(1, + ("xtLog: alloc ip:0x%p mp:0x%p tlck:0x%p lwm:%d count:%d\n", + tlck->ip, mp, tlck, lwm, xadlock->count)); + + maplock->index = 1; + + out: + /* mark page as homeward bound */ + tlck->flag |= tlckWRITEPAGE; + + return; + } + + /* + * page deletion: file deletion/truncation (ref. xtTruncate()) + * + * (page will be invalidated after log is written and bmap + * is updated from the page); + */ + if (tlck->type & tlckFREE) { + /* LOG_NOREDOPAGE log for NoRedoPage filter: + * if page free from file delete, NoRedoFile filter from + * inode image of zero link count will subsume NoRedoPage + * filters for each page; + * if page free from file truncattion, write NoRedoPage + * filter; + * + * upadte of block allocation map for the page itself: + * if page free from deletion and truncation, LOG_UPDATEMAP + * log for the page itself is generated from processing + * its parent page xad entries; + */ + /* if page free from file truncation, log LOG_NOREDOPAGE + * of the deleted page for logredo() to start NoRedoPage + * filter for the page; + */ + if (tblk->xflag & COMMIT_TRUNCATE) { + /* write NOREDOPAGE for the page */ + lrd->type = cpu_to_le16(LOG_NOREDOPAGE); + PXDaddress(pxd, mp->index); + PXDlength(pxd, + mp->logical_size >> tblk->sb-> + s_blocksize_bits); + lrd->backchain = + cpu_to_le32(lmLog(log, tblk, lrd, NULL)); + + if (tlck->type & tlckBTROOT) { + /* Empty xtree must be logged */ + lrd->type = cpu_to_le16(LOG_REDOPAGE); + lrd->backchain = + cpu_to_le32(lmLog(log, tblk, lrd, tlck)); + } + } + + /* init LOG_UPDATEMAP of the freed extents + * XAD[XTENTRYSTART:hwm) from the deleted page itself + * for logredo() to update bmap; + */ + lrd->type = cpu_to_le16(LOG_UPDATEMAP); + lrd->log.updatemap.type = cpu_to_le16(LOG_FREEXADLIST); + xtlck = (xtlock_t *) & tlck->lock; + hwm = xtlck->hwm.offset; + lrd->log.updatemap.nxd = + cpu_to_le16(hwm - XTENTRYSTART + 1); + /* reformat linelock for lmLog() */ + xtlck->header.offset = XTENTRYSTART; + xtlck->header.length = hwm - XTENTRYSTART + 1; + xtlck->index = 1; + lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); + + /* format a maplock for txUpdateMap() to update bmap + * to free extents of XAD[XTENTRYSTART:hwm) from the + * deleted page itself; + */ + tlck->flag |= tlckUPDATEMAP; + xadlock->flag = mlckFREEXADLIST; + xadlock->count = hwm - XTENTRYSTART + 1; + if ((xadlock->count <= 2) && (tblk->xflag & COMMIT_LAZY)) { + /* + * Lazy commit may allow xtree to be modified before + * txUpdateMap runs. Copy xad into linelock to + * preserve correct data. + */ + xadlock->xdlist = &xtlck->pxdlock; + memcpy(xadlock->xdlist, &p->xad[XTENTRYSTART], + sizeof(xad_t) * xadlock->count); + } else { + /* + * xdlist will point to into inode's xtree, ensure + * that transaction is not committed lazily unless + * we're deleting the inode (unlink). In that case + * we have special logic for the inode to be + * unlocked by the lazy commit thread. + */ + xadlock->xdlist = &p->xad[XTENTRYSTART]; + if ((tblk->xflag & COMMIT_LAZY) && + (tblk->xflag & COMMIT_DELETE) && + (tblk->ip == ip)) + set_cflag(COMMIT_Holdlock, ip); + else + tblk->xflag &= ~COMMIT_LAZY; + } + jFYI(1, + ("xtLog: free ip:0x%p mp:0x%p count:%d lwm:2\n", + tlck->ip, mp, xadlock->count)); + + maplock->index = 1; + + /* mark page as invalid */ + if (((tblk->xflag & COMMIT_PWMAP) || S_ISDIR(ip->i_mode)) + && !(tlck->type & tlckBTROOT)) + tlck->flag |= tlckFREEPAGE; + /* + else (tblk->xflag & COMMIT_PMAP) + ? release the page; + */ + return; + } + + /* + * page/entry truncation: file truncation (ref. xtTruncate()) + * + * |----------+------+------+---------------| + * | | | + * | | hwm - hwm before truncation + * | next - truncation point + * lwm - lwm before truncation + * header ? + */ + if (tlck->type & tlckTRUNCATE) { + pxd_t tpxd; /* truncated extent of xad */ + + /* + * For truncation the entire linelock may be used, so it would + * be difficult to store xad list in linelock itself. + * Therefore, we'll just force transaction to be committed + * synchronously, so that xtree pages won't be changed before + * txUpdateMap runs. + */ + tblk->xflag &= ~COMMIT_LAZY; + lwm = xtlck->lwm.offset; + if (lwm == 0) + lwm = XTPAGEMAXSLOT; + hwm = xtlck->hwm.offset; + + /* + * write log records + */ + /* + * allocate entries XAD[lwm:next]: + */ + if (lwm < next) { + /* log after-image for logredo(): + * logredo() will update bmap for alloc of new/extended + * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from + * after-image of XADlist; + * logredo() resets (XAD_NEW|XAD_EXTEND) flag when + * applying the after-image to the meta-data page. + */ + lrd->type = cpu_to_le16(LOG_REDOPAGE); + PXDaddress(pxd, mp->index); + PXDlength(pxd, + mp->logical_size >> tblk->sb-> + s_blocksize_bits); + lrd->backchain = + cpu_to_le32(lmLog(log, tblk, lrd, tlck)); + } + + /* + * truncate entry XAD[hwm == next - 1]: + */ + if (hwm == next - 1) { + /* init LOG_UPDATEMAP for logredo() to update bmap for + * free of truncated delta extent of the truncated + * entry XAD[next - 1]: + * (xtlck->pxdlock = truncated delta extent); + */ + pxdlock = (pxdlock_t *) & xtlck->pxdlock; + /* assert(pxdlock->type & tlckTRUNCATE); */ + lrd->type = cpu_to_le16(LOG_UPDATEMAP); + lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD); + lrd->log.updatemap.nxd = cpu_to_le16(1); + lrd->log.updatemap.pxd = pxdlock->pxd; + tpxd = pxdlock->pxd; /* save to format maplock */ + lrd->backchain = + cpu_to_le32(lmLog(log, tblk, lrd, NULL)); + } + + /* + * free entries XAD[next:hwm]: + */ + if (hwm >= next) { + /* init LOG_UPDATEMAP of the freed extents + * XAD[next:hwm] from the deleted page itself + * for logredo() to update bmap; + */ + lrd->type = cpu_to_le16(LOG_UPDATEMAP); + lrd->log.updatemap.type = + cpu_to_le16(LOG_FREEXADLIST); + xtlck = (xtlock_t *) & tlck->lock; + hwm = xtlck->hwm.offset; + lrd->log.updatemap.nxd = + cpu_to_le16(hwm - next + 1); + /* reformat linelock for lmLog() */ + xtlck->header.offset = next; + xtlck->header.length = hwm - next + 1; + xtlck->index = 1; + lrd->backchain = + cpu_to_le32(lmLog(log, tblk, lrd, tlck)); + } + + /* + * format maplock(s) for txUpdateMap() to update bmap + */ + maplock->index = 0; + + /* + * allocate entries XAD[lwm:next): + */ + if (lwm < next) { + /* format a maplock for txUpdateMap() to update bPMAP + * for alloc of new/extended extents of XAD[lwm:next) + * from the page itself; + * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag. + */ + tlck->flag |= tlckUPDATEMAP; + xadlock->flag = mlckALLOCXADLIST; + xadlock->count = next - lwm; + xadlock->xdlist = &p->xad[lwm]; + + jFYI(1, + ("xtLog: alloc ip:0x%p mp:0x%p count:%d lwm:%d next:%d\n", + tlck->ip, mp, xadlock->count, lwm, next)); + maplock->index++; + xadlock++; + } + + /* + * truncate entry XAD[hwm == next - 1]: + */ + if (hwm == next - 1) { + pxdlock_t *pxdlock; + + /* format a maplock for txUpdateMap() to update bmap + * to free truncated delta extent of the truncated + * entry XAD[next - 1]; + * (xtlck->pxdlock = truncated delta extent); + */ + tlck->flag |= tlckUPDATEMAP; + pxdlock = (pxdlock_t *) xadlock; + pxdlock->flag = mlckFREEPXD; + pxdlock->count = 1; + pxdlock->pxd = tpxd; + + jFYI(1, + ("xtLog: truncate ip:0x%p mp:0x%p count:%d hwm:%d\n", + ip, mp, pxdlock->count, hwm)); + maplock->index++; + xadlock++; + } + + /* + * free entries XAD[next:hwm]: + */ + if (hwm >= next) { + /* format a maplock for txUpdateMap() to update bmap + * to free extents of XAD[next:hwm] from thedeleted + * page itself; + */ + tlck->flag |= tlckUPDATEMAP; + xadlock->flag = mlckFREEXADLIST; + xadlock->count = hwm - next + 1; + xadlock->xdlist = &p->xad[next]; + + jFYI(1, + ("xtLog: free ip:0x%p mp:0x%p count:%d next:%d hwm:%d\n", + tlck->ip, mp, xadlock->count, next, hwm)); + maplock->index++; + } + + /* mark page as homeward bound */ + tlck->flag |= tlckWRITEPAGE; + } + return; +} + + +/* + * mapLog() + * + * function: log from maplock of freed data extents; + */ +void mapLog(log_t * log, tblock_t * tblk, lrd_t * lrd, tlock_t * tlck) +{ + pxdlock_t *pxdlock; + int i, nlock; + pxd_t *pxd; + + /* + * page relocation: free the source page extent + * + * a maplock for txUpdateMap() for free of the page + * has been formatted at txLock() time saving the src + * relocated page address; + */ + if (tlck->type & tlckRELOCATE) { + /* log LOG_NOREDOPAGE of the old relocated page + * for logredo() to start NoRedoPage filter; + */ + lrd->type = cpu_to_le16(LOG_NOREDOPAGE); + pxdlock = (pxdlock_t *) & tlck->lock; + pxd = &lrd->log.redopage.pxd; + *pxd = pxdlock->pxd; + lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); + + /* (N.B. currently, logredo() does NOT update bmap + * for free of the page itself for (LOG_XTREE|LOG_NOREDOPAGE); + * if page free from relocation, LOG_UPDATEMAP log is + * specifically generated now for logredo() + * to update bmap for free of src relocated page; + * (new flag LOG_RELOCATE may be introduced which will + * inform logredo() to start NORedoPage filter and also + * update block allocation map at the same time, thus + * avoiding an extra log write); + */ + lrd->type = cpu_to_le16(LOG_UPDATEMAP); + lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD); + lrd->log.updatemap.nxd = cpu_to_le16(1); + lrd->log.updatemap.pxd = pxdlock->pxd; + lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); + + /* a maplock for txUpdateMap() for free of the page + * has been formatted at txLock() time; + */ + tlck->flag |= tlckUPDATEMAP; + return; + } + /* + + * Otherwise it's not a relocate request + * + */ + else { + /* log LOG_UPDATEMAP for logredo() to update bmap for + * free of truncated/relocated delta extent of the data; + * e.g.: external EA extent, relocated/truncated extent + * from xtTailgate(); + */ + lrd->type = cpu_to_le16(LOG_UPDATEMAP); + pxdlock = (pxdlock_t *) & tlck->lock; + nlock = pxdlock->index; + for (i = 0; i < nlock; i++, pxdlock++) { + if (pxdlock->flag & mlckALLOCPXD) + lrd->log.updatemap.type = + cpu_to_le16(LOG_ALLOCPXD); + else + lrd->log.updatemap.type = + cpu_to_le16(LOG_FREEPXD); + lrd->log.updatemap.nxd = cpu_to_le16(1); + lrd->log.updatemap.pxd = pxdlock->pxd; + lrd->backchain = + cpu_to_le32(lmLog(log, tblk, lrd, NULL)); + jFYI(1, ("mapLog: xaddr:0x%lx xlen:0x%x\n", + (ulong) addressPXD(&pxdlock->pxd), + lengthPXD(&pxdlock->pxd))); + } + + /* update bmap */ + tlck->flag |= tlckUPDATEMAP; + } +} + + +/* + * txEA() + * + * function: acquire maplock for EA/ACL extents or + * set COMMIT_INLINE flag; + */ +void txEA(int tid, struct inode *ip, dxd_t * oldea, dxd_t * newea) +{ + tlock_t *tlck = NULL; + pxdlock_t *maplock = NULL, *pxdlock = NULL; + + /* + * format maplock for alloc of new EA extent + */ + if (newea) { + /* Since the newea could be a completely zeroed entry we need to + * check for the two flags which indicate we should actually + * commit new EA data + */ + if (newea->flag & DXD_EXTENT) { + tlck = txMaplock(tid, ip, tlckMAP); + maplock = (pxdlock_t *) & tlck->lock; + pxdlock = (pxdlock_t *) maplock; + pxdlock->flag = mlckALLOCPXD; + PXDaddress(&pxdlock->pxd, addressDXD(newea)); + PXDlength(&pxdlock->pxd, lengthDXD(newea)); + pxdlock++; + maplock->index = 1; + } else if (newea->flag & DXD_INLINE) { + tlck = NULL; + + set_cflag(COMMIT_Inlineea, ip); + } + } + + /* + * format maplock for free of old EA extent + */ + if (!test_cflag(COMMIT_Nolink, ip) && oldea->flag & DXD_EXTENT) { + if (tlck == NULL) { + tlck = txMaplock(tid, ip, tlckMAP); + maplock = (pxdlock_t *) & tlck->lock; + pxdlock = (pxdlock_t *) maplock; + maplock->index = 0; + } + pxdlock->flag = mlckFREEPXD; + PXDaddress(&pxdlock->pxd, addressDXD(oldea)); + PXDlength(&pxdlock->pxd, lengthDXD(oldea)); + maplock->index++; + } +} + + +/* + * txForce() + * + * function: synchronously write pages locked by transaction + * after txLog() but before txUpdateMap(); + */ +void txForce(tblock_t * tblk) +{ + tlock_t *tlck; + int lid, next; + metapage_t *mp; + + /* + * reverse the order of transaction tlocks in + * careful update order of address index pages + * (right to left, bottom up) + */ + tlck = &TxLock[tblk->next]; + lid = tlck->next; + tlck->next = 0; + while (lid) { + tlck = &TxLock[lid]; + next = tlck->next; + tlck->next = tblk->next; + tblk->next = lid; + lid = next; + } + + /* + * synchronously write the page, and + * hold the page for txUpdateMap(); + */ + for (lid = tblk->next; lid > 0; lid = next) { + tlck = &TxLock[lid]; + next = tlck->next; + + if ((mp = tlck->mp) != NULL && + (tlck->type & tlckBTROOT) == 0) { + assert(mp->xflag & COMMIT_PAGE); + + if (tlck->flag & tlckWRITEPAGE) { + tlck->flag &= ~tlckWRITEPAGE; + + /* do not release page to freelist */ + assert(atomic_read(&mp->nohomeok)); + hold_metapage(mp, 0); + write_metapage(mp); + } + } + } +} + + +/* + * txUpdateMap() + * + * function: update persistent allocation map (and working map + * if appropriate); + * + * parameter: + */ +static void txUpdateMap(tblock_t * tblk) +{ + struct inode *ip; + struct inode *ipimap; + int lid; + tlock_t *tlck; + maplock_t *maplock; + pxdlock_t pxdlock; + int maptype; + int k, nlock; + metapage_t *mp = 0; + + ipimap = JFS_SBI(tblk->sb)->ipimap; + + maptype = (tblk->xflag & COMMIT_PMAP) ? COMMIT_PMAP : COMMIT_PWMAP; + + + /* + * update block allocation map + * + * update allocation state in pmap (and wmap) and + * update lsn of the pmap page; + */ + /* + * scan each tlock/page of transaction for block allocation/free: + * + * for each tlock/page of transaction, update map. + * ? are there tlock for pmap and pwmap at the same time ? + */ + for (lid = tblk->next; lid > 0; lid = tlck->next) { + tlck = &TxLock[lid]; + + if ((tlck->flag & tlckUPDATEMAP) == 0) + continue; + + if (tlck->flag & tlckFREEPAGE) { + /* + * Another thread may attempt to reuse freed space + * immediately, so we want to get rid of the metapage + * before anyone else has a chance to get it. + * Lock metapage, update maps, then invalidate + * the metapage. + */ + mp = tlck->mp; + ASSERT(mp->xflag & COMMIT_PAGE); + hold_metapage(mp, 0); + } + + /* + * extent list: + * . in-line PXD list: + * . out-of-line XAD list: + */ + maplock = (maplock_t *) & tlck->lock; + nlock = maplock->index; + + for (k = 0; k < nlock; k++, maplock++) { + /* + * allocate blocks in persistent map: + * + * blocks have been allocated from wmap at alloc time; + */ + if (maplock->flag & mlckALLOC) { + txAllocPMap(ipimap, maplock, tblk); + } + /* + * free blocks in persistent and working map: + * blocks will be freed in pmap and then in wmap; + * + * ? tblock specifies the PMAP/PWMAP based upon + * transaction + * + * free blocks in persistent map: + * blocks will be freed from wmap at last reference + * release of the object for regular files; + * + * Alway free blocks from both persistent & working + * maps for directories + */ + else { /* (maplock->flag & mlckFREE) */ + + if (S_ISDIR(tlck->ip->i_mode)) + txFreeMap(ipimap, maplock, + tblk, COMMIT_PWMAP); + else + txFreeMap(ipimap, maplock, + tblk, maptype); + } + } + if (tlck->flag & tlckFREEPAGE) { + if (!(tblk->flag & tblkGC_LAZY)) { + /* This is equivalent to txRelease */ + ASSERT(mp->lid == lid); + tlck->mp->lid = 0; + } + assert(atomic_read(&mp->nohomeok) == 1); + atomic_dec(&mp->nohomeok); + discard_metapage(mp); + tlck->mp = 0; + } + } + /* + * update inode allocation map + * + * update allocation state in pmap and + * update lsn of the pmap page; + * update in-memory inode flag/state + * + * unlock mapper/write lock + */ + if (tblk->xflag & COMMIT_CREATE) { + ip = tblk->ip; + + ASSERT(test_cflag(COMMIT_New, ip)); + clear_cflag(COMMIT_New, ip); + + diUpdatePMap(ipimap, ip->i_ino, FALSE, tblk); + ipimap->i_state |= I_DIRTY; + /* update persistent block allocation map + * for the allocation of inode extent; + */ + pxdlock.flag = mlckALLOCPXD; + pxdlock.pxd = JFS_IP(ip)->ixpxd; + pxdlock.index = 1; + txAllocPMap(ip, (maplock_t *) & pxdlock, tblk); + iput(ip); + } else if (tblk->xflag & COMMIT_DELETE) { + ip = tblk->ip; + diUpdatePMap(ipimap, ip->i_ino, TRUE, tblk); + ipimap->i_state |= I_DIRTY; + if (test_and_clear_cflag(COMMIT_Holdlock, ip)) { + if (tblk->flag & tblkGC_LAZY) + IWRITE_UNLOCK(ip); + } + iput(ip); + } +} + + +/* + * txAllocPMap() + * + * function: allocate from persistent map; + * + * parameter: + * ipbmap - + * malock - + * xad list: + * pxd: + * + * maptype - + * allocate from persistent map; + * free from persistent map; + * (e.g., tmp file - free from working map at releae + * of last reference); + * free from persistent and working map; + * + * lsn - log sequence number; + */ +static void txAllocPMap(struct inode *ip, maplock_t * maplock, + tblock_t * tblk) +{ + struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; + xdlistlock_t *xadlistlock; + xad_t *xad; + s64 xaddr; + int xlen; + pxdlock_t *pxdlock; + xdlistlock_t *pxdlistlock; + pxd_t *pxd; + int n; + + /* + * allocate from persistent map; + */ + if (maplock->flag & mlckALLOCXADLIST) { + xadlistlock = (xdlistlock_t *) maplock; + xad = xadlistlock->xdlist; + for (n = 0; n < xadlistlock->count; n++, xad++) { + if (xad->flag & (XAD_NEW | XAD_EXTENDED)) { + xaddr = addressXAD(xad); + xlen = lengthXAD(xad); + dbUpdatePMap(ipbmap, FALSE, xaddr, + (s64) xlen, tblk); + xad->flag &= ~(XAD_NEW | XAD_EXTENDED); + jFYI(1, + ("allocPMap: xaddr:0x%lx xlen:%d\n", + (ulong) xaddr, xlen)); + } + } + } else if (maplock->flag & mlckALLOCPXD) { + pxdlock = (pxdlock_t *) maplock; + xaddr = addressPXD(&pxdlock->pxd); + xlen = lengthPXD(&pxdlock->pxd); + dbUpdatePMap(ipbmap, FALSE, xaddr, (s64) xlen, tblk); + jFYI(1, + ("allocPMap: xaddr:0x%lx xlen:%d\n", (ulong) xaddr, + xlen)); + } else { /* (maplock->flag & mlckALLOCPXDLIST) */ + + pxdlistlock = (xdlistlock_t *) maplock; + pxd = pxdlistlock->xdlist; + for (n = 0; n < pxdlistlock->count; n++, pxd++) { + xaddr = addressPXD(pxd); + xlen = lengthPXD(pxd); + dbUpdatePMap(ipbmap, FALSE, xaddr, (s64) xlen, + tblk); + jFYI(1, + ("allocPMap: xaddr:0x%lx xlen:%d\n", + (ulong) xaddr, xlen)); + } + } +} + + +/* + * txFreeMap() + * + * function: free from persistent and/or working map; + * + * todo: optimization + */ +void txFreeMap(struct inode *ip, + maplock_t * maplock, tblock_t * tblk, int maptype) +{ + struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; + xdlistlock_t *xadlistlock; + xad_t *xad; + s64 xaddr; + int xlen; + pxdlock_t *pxdlock; + xdlistlock_t *pxdlistlock; + pxd_t *pxd; + int n; + + jFYI(1, + ("txFreeMap: tblk:0x%p maplock:0x%p maptype:0x%x\n", + tblk, maplock, maptype)); + + /* + * free from persistent map; + */ + if (maptype == COMMIT_PMAP || maptype == COMMIT_PWMAP) { + if (maplock->flag & mlckFREEXADLIST) { + xadlistlock = (xdlistlock_t *) maplock; + xad = xadlistlock->xdlist; + for (n = 0; n < xadlistlock->count; n++, xad++) { + if (!(xad->flag & XAD_NEW)) { + xaddr = addressXAD(xad); + xlen = lengthXAD(xad); + dbUpdatePMap(ipbmap, TRUE, xaddr, + (s64) xlen, tblk); + jFYI(1, + ("freePMap: xaddr:0x%lx xlen:%d\n", + (ulong) xaddr, xlen)); + } + } + } else if (maplock->flag & mlckFREEPXD) { + pxdlock = (pxdlock_t *) maplock; + xaddr = addressPXD(&pxdlock->pxd); + xlen = lengthPXD(&pxdlock->pxd); + dbUpdatePMap(ipbmap, TRUE, xaddr, (s64) xlen, + tblk); + jFYI(1, + ("freePMap: xaddr:0x%lx xlen:%d\n", + (ulong) xaddr, xlen)); + } else { /* (maplock->flag & mlckALLOCPXDLIST) */ + + pxdlistlock = (xdlistlock_t *) maplock; + pxd = pxdlistlock->xdlist; + for (n = 0; n < pxdlistlock->count; n++, pxd++) { + xaddr = addressPXD(pxd); + xlen = lengthPXD(pxd); + dbUpdatePMap(ipbmap, TRUE, xaddr, + (s64) xlen, tblk); + jFYI(1, + ("freePMap: xaddr:0x%lx xlen:%d\n", + (ulong) xaddr, xlen)); + } + } + } + + /* + * free from working map; + */ + if (maptype == COMMIT_PWMAP || maptype == COMMIT_WMAP) { + if (maplock->flag & mlckFREEXADLIST) { + xadlistlock = (xdlistlock_t *) maplock; + xad = xadlistlock->xdlist; + for (n = 0; n < xadlistlock->count; n++, xad++) { + xaddr = addressXAD(xad); + xlen = lengthXAD(xad); + dbFree(ip, xaddr, (s64) xlen); + xad->flag = 0; + jFYI(1, + ("freeWMap: xaddr:0x%lx xlen:%d\n", + (ulong) xaddr, xlen)); + } + } else if (maplock->flag & mlckFREEPXD) { + pxdlock = (pxdlock_t *) maplock; + xaddr = addressPXD(&pxdlock->pxd); + xlen = lengthPXD(&pxdlock->pxd); + dbFree(ip, xaddr, (s64) xlen); + jFYI(1, + ("freeWMap: xaddr:0x%lx xlen:%d\n", + (ulong) xaddr, xlen)); + } else { /* (maplock->flag & mlckFREEPXDLIST) */ + + pxdlistlock = (xdlistlock_t *) maplock; + pxd = pxdlistlock->xdlist; + for (n = 0; n < pxdlistlock->count; n++, pxd++) { + xaddr = addressPXD(pxd); + xlen = lengthPXD(pxd); + dbFree(ip, xaddr, (s64) xlen); + jFYI(1, + ("freeWMap: xaddr:0x%lx xlen:%d\n", + (ulong) xaddr, xlen)); + } + } + } +} + + +/* + * txFreelock() + * + * function: remove tlock from inode anonymous locklist + */ +void txFreelock(struct inode *ip) +{ + struct jfs_inode_info *jfs_ip = JFS_IP(ip); + tlock_t *xtlck, *tlck; + int xlid = 0, lid; + + xtlck = (tlock_t *) &jfs_ip->atlhead; + + while ((lid = xtlck->next)) { + tlck = &TxLock[lid]; + if (tlck->flag & tlckFREELOCK) { + xtlck->next = tlck->next; + TXLOCK_FREE(lid); + } else { + xtlck = tlck; + xlid = lid; + } + } + + if (jfs_ip->atlhead) + jfs_ip->atltail = xlid; + else { + jfs_ip->atltail = 0; + /* + * If inode was on anon_list, remove it + */ + TXN_LOCK(); + if (jfs_ip->atlnext) + JFS_IP(jfs_ip->atlnext)->atlprev = jfs_ip->atlprev; + if (jfs_ip->atlprev) + JFS_IP(jfs_ip->atlprev)->atlnext = jfs_ip->atlnext; + else if (TxAnchor.anon_list == ip) + TxAnchor.anon_list = jfs_ip->atlnext; + else if (TxAnchor.anon_list2 == ip) + TxAnchor.anon_list2 = jfs_ip->atlnext; + jfs_ip->atlnext = jfs_ip->atlprev = 0; + TXN_UNLOCK(); + } +} + + +#ifdef _JFS_WIP +/* + * NAME: txFreeLock(lw) + * + * FUNCTION: removes tlock from transaction locklist + * and insert on freelist. + * + * PARAMETERS: lw - index of tlock. + * + * RETURN: 0 + * + * SERIALIZATION: + */ +int txFreeLock(lid) +int lid; +{ + int tid, prev, next; + + /* + * remove tlock from tid locklist + */ + tid = TxLock[lid].tid; + prev = -1; + next = TxBlock[tid].next; + while (next != lid) { + prev = next; + next = TxLock[next].tidnxt; + } + + if (prev < 0) + TxBlock[tid].next = TxLock[lid].next; + else + TxLock[prev].next = TxLock[lid].next; + + /* + * insert tlock at head of freelist + */ + TXLOCK_FREE(lid); + + return 0; +} +#endif /* _JFS_WIP */ + + +/* + * txAbort() + * + * function: abort tx before commit; + * + * frees line-locks and segment locks for all + * segments in comdata structure. + * Optionally sets state of file-system to FM_DIRTY in super-block. + * log age of page-frames in memory for which caller has + * are reset to 0 (to avoid logwarap). + */ +void txAbort(int tid, int dirty) +{ + int lid, next; + metapage_t *mp; + + jEVENT(1, ("txAbort: tid:%d dirty:0x%x\n", tid, dirty)); + + /* + * free tlocks of the transaction + */ + for (lid = TxBlock[tid].next; lid > 0; lid = next) { + next = TxLock[lid].next; + + mp = TxLock[lid].mp; + + if (mp) { + mp->lid = 0; + + /* + * reset lsn of page to avoid logwarap: + * + * (page may have been previously committed by another + * transaction(s) but has not been paged, i.e., + * it may be on logsync list even though it has not + * been logged for the current tx.) + */ + if (mp->xflag & COMMIT_PAGE && mp->lsn) + LogSyncRelease(mp); + } + /* insert tlock at head of freelist */ + TXN_LOCK(); + TXLOCK_FREE(lid); + TXN_UNLOCK(); + } + + /* caller will free the transaction block */ + + TxBlock[tid].next = 0; + + /* + * mark filesystem dirty + */ + if (dirty) + updateSuper(TxBlock[tid].sb, FM_DIRTY); + + return; +} + + +/* + * txAbortCommit() + * + * function: abort commit. + * + * frees tlocks of transaction; line-locks and segment locks for all + * segments in comdata structure. frees malloc storage + * sets state of file-system to FM_MDIRTY in super-block. + * log age of page-frames in memory for which caller has + * are reset to 0 (to avoid logwarap). + */ +void txAbortCommit(commit_t * cd, int exval) +{ + int tid, lid, next; + metapage_t *mp; + + assert(exval == EIO || exval == ENOMEM); + jEVENT(1, ("txAbortCommit: cd:0x%p\n", cd)); + + /* + * free tlocks of the transaction + */ + tid = cd->tid; + for (lid = TxBlock[tid].next; lid > 0; lid = next) { + next = TxLock[lid].next; + + mp = TxLock[lid].mp; + if (mp) { /* 207090 *//* 207090 */ + mp->lid = 0; + + /* + * reset lsn of page to avoid logwarap; + */ + if (mp->xflag & COMMIT_PAGE) + LogSyncRelease(mp); + } + + /* 207090 */ + /* insert tlock at head of freelist */ + TXN_LOCK(); + TXLOCK_FREE(lid); + TXN_UNLOCK(); + } + + TxBlock[tid].next = 0; + + /* free the transaction block */ + txEnd(tid); + + /* + * mark filesystem dirty + */ + updateSuper(cd->sb, FM_DIRTY); +} + + +/* + * txLazyCommit(void) + * + * All transactions except those changing ipimap (COMMIT_FORCE) are + * processed by this routine. This insures that the inode and block + * allocation maps are updated in order. For synchronous transactions, + * let the user thread finish processing after txUpdateMap() is called. + */ +void txLazyCommit(tblock_t * tblk) +{ + log_t *log; + unsigned long flags; + + while (((tblk->flag & tblkGC_READY) == 0) && + ((tblk->flag & tblkGC_UNLOCKED) == 0)) { + /* We must have gotten ahead of the user thread + */ + jFYI(1, + ("jfs_lazycommit: tblk 0x%p not unlocked\n", tblk)); + schedule(); + } + + TXN_LOCK(); + LAZY_LOCK(flags); + + TxAnchor.lazyQsize--; + if (TxAnchor.lazyQwait && (TxAnchor.lazyQsize < LazyLWM)) { + TxAnchor.lazyQwait = 0; + TXN_WAKEUP(&TxAnchor.freewait); + } + + LAZY_UNLOCK(flags); + TXN_UNLOCK(); + + jFYI(1, ("txLazyCommit: processing tblk 0x%p\n", tblk)); + + txUpdateMap(tblk); + + log = (log_t *) JFS_SBI(tblk->sb)->log; + + spin_lock_irq(&log->gclock); // LOGGC_LOCK + + tblk->flag |= tblkGC_COMMITTED; + + if ((tblk->flag & tblkGC_READY) || (tblk->flag & tblkGC_LAZY)) + log->gcrtc--; + + if (tblk->flag & tblkGC_READY) + wake_up(&tblk->gcwait); // LOGGC_WAKEUP + + spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK + + if (tblk->flag & tblkGC_LAZY) { + txUnlock(tblk, 0); + tblk->flag &= ~tblkGC_LAZY; + txEnd(tblk - TxBlock); /* Convert back to tid */ + } + + jFYI(1, ("txLazyCommit: done: tblk = 0x%p\n", tblk)); +} + +/* + * jfs_lazycommit(void) + * + * To be run as a kernel daemon. If lbmIODone is called in an interrupt + * context, or where blocking is not wanted, this routine will process + * committed transactions from the unlock queue. + */ +int jfs_lazycommit(void) +{ + int WorkDone; + tblock_t *tblk; + siginfo_t info; + unsigned long signr; + unsigned long flags; + + lock_kernel(); + + daemonize(); + current->tty = NULL; + strcpy(current->comm, "jfsCommit"); + + unlock_kernel(); + + jfsCommitTask = current; + + spin_lock_irq(¤t->sigmask_lock); + siginitsetinv(¤t->blocked, + sigmask(SIGHUP) | sigmask(SIGKILL) | sigmask(SIGSTOP) + | sigmask(SIGCONT)); + spin_unlock_irq(¤t->sigmask_lock); + + LAZY_LOCK_INIT(); + TxAnchor.unlock_queue = TxAnchor.unlock_tail = 0; + TxAnchor.lazyQsize = TxAnchor.lazyQwait = 0; + + up(&jfsIOsem); + + LAZY_LOCK(flags); + while (TRUE) { + WorkDone = 0; + + while ((tblk = TxAnchor.unlock_queue)) { + /* + * We can't get ahead of user thread. Spinning is + * simpler than blocking/waking. We shouldn't spin + * very long, since user thread shouldn't be blocking + * between lmGroupCommit & txEnd. + */ + WorkDone = 1; + + /* + * Remove first transaction from queue + */ + TxAnchor.unlock_queue = tblk->cqnext; + tblk->cqnext = 0; + if (TxAnchor.unlock_tail == tblk) + TxAnchor.unlock_tail = 0; + + LAZY_UNLOCK(flags); + txLazyCommit(tblk); + + /* + * We can be running indefinately if other processors + * are adding transactions to this list + */ + if (current->need_resched) + schedule(); + + LAZY_LOCK(flags); + } + +/* if (TxAnchor.sync_queue) + { + * XXXXXX * + } + */ + if (!WorkDone) { + jFYI(1, ("jfs_lazycommit: sleeping\n")); + set_current_state(TASK_INTERRUPTIBLE); + LAZY_UNLOCK(flags); + schedule(); + spin_lock_irq(¤t->sigmask_lock); + signr = dequeue_signal(¤t->blocked, &info); + spin_unlock_irq(¤t->sigmask_lock); + switch (signr) { + case SIGKILL: + if (TxLock) + /* Not our SIGKILL */ + break; + + if (TxAnchor.unlock_queue) { + jERROR(1, + ("jfs_lazycommit being killed with pending transactions!\n")); + } else { + jFYI(1, + ("jfs_lazycommit being killed\n")); + } + up(&jfsIOsem); + return 0; + } + LAZY_LOCK(flags); + } + } +} + +void txLazyUnlock(tblock_t * tblk) +{ + unsigned long flags; + + LAZY_LOCK(flags); + + if (TxAnchor.unlock_tail) + TxAnchor.unlock_tail->cqnext = tblk; + else + TxAnchor.unlock_queue = tblk; + TxAnchor.unlock_tail = tblk; + tblk->cqnext = 0; + LAZY_UNLOCK(flags); + wake_up_process(jfsCommitTask); +} + +static void LogSyncRelease(metapage_t * mp) +{ + log_t *log = mp->log; + + assert(atomic_read(&mp->nohomeok)); + assert(log); + atomic_dec(&mp->nohomeok); + + if (atomic_read(&mp->nohomeok)) + return; + + hold_metapage(mp, 0); + + LOGSYNC_LOCK(log); + mp->log = NULL; + mp->lsn = 0; + mp->clsn = 0; + log->count--; + list_del_init(&mp->synclist); + LOGSYNC_UNLOCK(log); + + release_metapage(mp); +} + +/* + * jfs_sync(void) + * + * To be run as a kernel daemon. This is awakened when tlocks run low. + * We write any inodes that have anonymous tlocks so they will become + * available. + */ +int jfs_sync(void) +{ + siginfo_t info; + unsigned long signr; + struct inode *ip; + struct jfs_inode_info *jfs_ip; + + lock_kernel(); + + daemonize(); + current->tty = NULL; + strcpy(current->comm, "jfsSync"); + + unlock_kernel(); + + jfsSyncTask = current; + + spin_lock_irq(¤t->sigmask_lock); + siginitsetinv(¤t->blocked, + sigmask(SIGHUP) | sigmask(SIGKILL) | sigmask(SIGSTOP) + | sigmask(SIGCONT)); + spin_unlock_irq(¤t->sigmask_lock); + + up(&jfsIOsem); + + while (TRUE) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + spin_lock_irq(¤t->sigmask_lock); + signr = dequeue_signal(¤t->blocked, &info); + spin_unlock_irq(¤t->sigmask_lock); + switch (signr) { + case SIGKILL: + if (TxLock) + /* Not our SIGKILL */ + break; + + jFYI(1, ("jfs_sync being killed\n")); + up(&jfsIOsem); + return 0; + } + + /* + * write each inode on the anonymous inode list + */ + TXN_LOCK(); + while (TxAnchor.anon_list && TlocksLow) { + ip = TxAnchor.anon_list; + jfs_ip = JFS_IP(ip); + + /* + * We must release the TXN_LOCK since our + * IWRITE_TRYLOCK implementation may still block + */ + TXN_UNLOCK(); + if (IWRITE_TRYLOCK(ip)) { + /* + * inode will be removed from anonymous list + * when it is committed + */ + jfs_commit_inode(ip, 0); + IWRITE_UNLOCK(ip); + /* + * Just to be safe. I don't know how + * long we can run without blocking + */ + if (current->need_resched) + schedule(); + TXN_LOCK(); + } else { + /* We can't get the write lock. It may + * be held by a thread waiting for tlock's + * so let's not block here. Save it to + * put back on the anon_list. + */ + + /* + * We released TXN_LOCK, let's make sure + * this inode is still there + */ + TXN_LOCK(); + if (ip != TxAnchor.anon_list) + continue; + + /* Take off anon_list */ + TxAnchor.anon_list = jfs_ip->atlnext; + if (jfs_ip->atlnext) + JFS_IP(jfs_ip->atlnext)->atlprev = 0; + + /* Put on anon_list2 */ + if (TxAnchor.anon_list2) + JFS_IP(TxAnchor.anon_list2)-> + atlprev = ip; + jfs_ip->atlnext = TxAnchor.anon_list2; + TxAnchor.anon_list2 = ip; + } + } + /* Add anon_list2 back to anon_list */ + if (TxAnchor.anon_list2) { + if (TxAnchor.anon_list) { + /* Find last member of anon_list */ + ip = TxAnchor.anon_list; + while (JFS_IP(ip)->atlnext) + ip = JFS_IP(ip)->atlnext; + + JFS_IP(ip)->atlnext = TxAnchor.anon_list2; + JFS_IP(TxAnchor.anon_list2)->atlprev = ip; + } else + TxAnchor.anon_list = TxAnchor.anon_list2; + TxAnchor.anon_list2 = 0; + } + TXN_UNLOCK(); + } +} + +#if CONFIG_PROC_FS +int jfs_txanchor_read(char *buffer, char **start, off_t offset, int length, + int *eof, void *data) +{ + int len = 0; + off_t begin; + char *freewait; + char *freelockwait; + char *lowlockwait; + + freewait = + waitqueue_active(&TxAnchor.freewait) ? "active" : "empty"; + freelockwait = + waitqueue_active(&TxAnchor.freelockwait) ? "active" : "empty"; + lowlockwait = + waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty"; + + len += sprintf(buffer, + "JFS TxAnchor\n" + "============\n" + "freetid = %d\n" + "freewait = %s\n" + "freelock = %d\n" + "freelockwait = %s\n" + "lowlockwait = %s\n" + "tlocksInUse = %d\n" + "unlock_queue = 0x%p\n" + "unlock_tail = 0x%p\n" + "lazyQsize = %d\n" + "lazyQwait = %d\n", + TxAnchor.freetid, + freewait, + TxAnchor.freelock, + freelockwait, + lowlockwait, + TxAnchor.tlocksInUse, + TxAnchor.unlock_queue, + TxAnchor.unlock_tail, + TxAnchor.lazyQsize, TxAnchor.lazyQwait); + + begin = offset; + *start = buffer + begin; + len -= begin; + + if (len > length) + len = length; + else + *eof = 1; + + if (len < 0) + len = 0; + + return len; +} +#endif diff -uNr --exclude=CVS ../kernel.org/linux/fs/jfs/jfs_umount.c linuxppc64_2_4/fs/jfs/jfs_umount.c --- ../kernel.org/linux/fs/jfs/jfs_umount.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/fs/jfs/jfs_umount.c Wed Nov 14 10:19:36 2001 @@ -0,0 +1,158 @@ +/* + * + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* + * Change History : + */ + +/* + * jfs_umount.c + * + * note: file system in transition to aggregate/fileset: + * (ref. jfs_mount.c) + * + * file system unmount is interpreted as mount of the single/only + * fileset in the aggregate and, if unmount of the last fileset, + * as unmount of the aggerate; + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * NAME: jfs_umount(vfsp, flags, crp) + * + * FUNCTION: vfs_umount() + * + * PARAMETERS: vfsp - virtual file system pointer + * flags - unmount for shutdown + * crp - credential + * + * RETURN : EBUSY - device has open files + */ +int jfs_umount(struct super_block *sb) +{ + int rc = 0; + log_t *log; + struct jfs_sb_info *sbi = JFS_SBI(sb); + struct inode *ipbmap = sbi->ipbmap; + struct inode *ipimap = sbi->ipimap; + struct inode *ipaimap = sbi->ipaimap; + struct inode *ipaimap2 = sbi->ipaimap2; + + jFYI(1, ("\n UnMount JFS: sb:0x%p\n", sb)); + + /* + * update superblock and close log + * + * if mounted read-write and log based recovery was enabled + */ + if ((log = sbi->log)) { + /* + * close log: + * + * remove file system from log active file system list. + */ + log = sbi->log; + rc = lmLogClose(sb, log); + } + + /* + * close fileset inode allocation map (aka fileset inode) + */ + jEVENT(0, ("jfs_umount: close ipimap:0x%p\n", ipimap)); + diUnmount(ipimap, 0); + + diFreeSpecial(ipimap); + sbi->ipimap = NULL; + + /* + * close secondary aggregate inode allocation map + */ + ipaimap2 = sbi->ipaimap2; + if (ipaimap2) { + jEVENT(0, ("jfs_umount: close ipaimap2:0x%p\n", ipaimap2)); + diUnmount(ipaimap2, 0); + diFreeSpecial(ipaimap2); + sbi->ipaimap2 = NULL; + } + + /* + * close aggregate inode allocation map + */ + ipaimap = sbi->ipaimap; + jEVENT(0, ("jfs_umount: close ipaimap:0x%p\n", ipaimap)); + diUnmount(ipaimap, 0); + diFreeSpecial(ipaimap); + sbi->ipaimap = NULL; + + /* + * close aggregate block allocation map + */ + jEVENT(0, ("jfs_umount: close ipbmap:%p\n", ipbmap)); + dbUnmount(ipbmap, 0); + + diFreeSpecial(ipbmap); + sbi->ipimap = NULL; + + /* + * ensure all file system file pages are propagated to their + * home blocks on disk (and their in-memory buffer pages are + * invalidated) BEFORE updating file system superblock state + * (to signify file system is unmounted cleanly, and thus in + * consistent state) and log superblock active file system + * list (to signify skip logredo()). + */ + if (log) /* log = NULL if read-only mount */ + rc = updateSuper(sb, FM_CLEAN); + + + jFYI(0, (" UnMount JFS Complete: %d\n", rc)); + return rc; +} + + +int jfs_umount_rw(struct super_block *sb) +{ + struct jfs_sb_info *sbi = JFS_SBI(sb); + + if (!sbi->log) + return 0; + + /* + * close log: + * + * remove file system from log active file system list. + */ + lmLogClose(sb, sbi->log); + + dbSync(sbi->ipbmap); + diSync(sbi->ipimap); + + sbi->log = 0; + + return updateSuper(sb, FM_CLEAN); + +} diff -uNr --exclude=CVS ../kernel.org/linux/fs/jfs/jfs_unicode.c linuxppc64_2_4/fs/jfs/jfs_unicode.c --- ../kernel.org/linux/fs/jfs/jfs_unicode.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/fs/jfs/jfs_unicode.c Wed Nov 14 10:19:36 2001 @@ -0,0 +1,110 @@ +/* + * + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include +#include +#include +#include +#include +#include + +/* + * NAME: jfs_strfromUCS() + * + * FUNCTION: Convert little-endian unicode string to character string + * + */ +int jfs_strfromUCS_le(char *to, const wchar_t * from, /* LITTLE ENDIAN */ + int len, struct nls_table *codepage) +{ + int i; + int outlen = 0; + + for (i = 0; (i < len) && from[i]; i++) { + int charlen; + charlen = + codepage->uni2char(le16_to_cpu(from[i]), &to[outlen], + NLS_MAX_CHARSET_SIZE); + if (charlen > 0) { + outlen += charlen; + } else { + to[outlen++] = '?'; + } + } + to[outlen] = 0; + jEVENT(0, ("jfs_strfromUCS returning %d - '%s'\n", outlen, to)); + return outlen; +} + +/* + * NAME: jfs_strtoUCS() + * + * FUNCTION: Convert character string to unicode string + * + */ +int jfs_strtoUCS(wchar_t * to, + const char *from, int len, struct nls_table *codepage) +{ + int charlen; + int i; + + jEVENT(0, ("jfs_strtoUCS - '%s'\n", from)); + + for (i = 0; len && *from; i++, from += charlen, len -= charlen) { + charlen = codepage->char2uni(from, len, &to[i]); + if (charlen < 1) { + jERROR(1, ("jfs_strtoUCS: char2uni returned %d.\n", + charlen)); + jERROR(1, ("charset = %s, char = 0x%x\n", + codepage->charset, (unsigned char) *from)); + to[i] = 0x003f; /* a question mark */ + charlen = 1; + } + } + + jEVENT(0, (" returning %d\n", i)); + + to[i] = 0; + return i; +} + +/* + * NAME: get_UCSname() + * + * FUNCTION: Allocate and translate to unicode string + * + */ +int get_UCSname(component_t * uniName, struct dentry *dentry, + struct nls_table *nls_tab) +{ + int length = dentry->d_name.len; + + if (length > JFS_NAME_MAX) + return ENAMETOOLONG; + + uniName->name = + kmalloc((length + 1) * sizeof(wchar_t), GFP_NOFS); + + if (uniName->name == NULL) + return ENOSPC; + + uniName->namlen = jfs_strtoUCS(uniName->name, dentry->d_name.name, + length, nls_tab); + + return 0; +} diff -uNr --exclude=CVS ../kernel.org/linux/fs/jfs/jfs_uniupr.c linuxppc64_2_4/fs/jfs/jfs_uniupr.c --- ../kernel.org/linux/fs/jfs/jfs_uniupr.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/fs/jfs/jfs_uniupr.c Wed Nov 14 10:22:29 2001 @@ -0,0 +1,136 @@ +/* + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * jfs_uniupr.c - Unicode compressed case ranges + * +*/ + +#include + +/* + * Latin upper case + */ +signed char UniUpperTable[512] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 000-00f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 010-01f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 020-02f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 030-03f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 040-04f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 050-05f */ + 0,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32, /* 060-06f */ + -32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32, 0, 0, 0, 0, 0, /* 070-07f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 080-08f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 090-09f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0a0-0af */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0b0-0bf */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0c0-0cf */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0d0-0df */ + -32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32, /* 0e0-0ef */ + -32,-32,-32,-32,-32,-32,-32, 0,-32,-32,-32,-32,-32,-32,-32,121, /* 0f0-0ff */ + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 100-10f */ + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 110-11f */ + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 120-12f */ + 0, 0, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, /* 130-13f */ + -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, /* 140-14f */ + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 150-15f */ + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 160-16f */ + 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, /* 170-17f */ + 0, 0, 0, -1, 0, -1, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, /* 180-18f */ + 0, 0, -1, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, /* 190-19f */ + 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, 0, 0, 0, -1, 0, 0, /* 1a0-1af */ + -1, 0, 0, 0, -1, 0, -1, 0, 0, -1, 0, 0, 0, -1, 0, 0, /* 1b0-1bf */ + 0, 0, 0, 0, 0, -1, -2, 0, -1, -2, 0, -1, -2, 0, -1, 0, /* 1c0-1cf */ + -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,-79, 0, -1, /* 1d0-1df */ + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e0-1ef */ + 0, 0, -1, -2, 0, -1, 0, 0, 0, -1, 0, -1, 0, -1, 0, -1, /* 1f0-1ff */ +}; + +/* Upper case range - Greek */ +static signed char UniCaseRangeU03a0[47] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-38,-37,-37,-37, /* 3a0-3af */ + 0,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32, /* 3b0-3bf */ + -32,-32,-31,-32,-32,-32,-32,-32,-32,-32,-32,-32,-64,-63,-63, +}; + +/* Upper case range - Cyrillic */ +static signed char UniCaseRangeU0430[48] = { + -32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32, /* 430-43f */ + -32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32, /* 440-44f */ + 0,-80,-80,-80,-80,-80,-80,-80,-80,-80,-80,-80,-80, 0,-80,-80, /* 450-45f */ +}; + +/* Upper case range - Extended cyrillic */ +static signed char UniCaseRangeU0490[61] = { + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 490-49f */ + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 4a0-4af */ + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 4b0-4bf */ + 0, 0, -1, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1, +}; + +/* Upper case range - Extended latin and greek */ +static signed char UniCaseRangeU1e00[509] = { + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e00-1e0f */ + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e10-1e1f */ + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e20-1e2f */ + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e30-1e3f */ + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e40-1e4f */ + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e50-1e5f */ + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e60-1e6f */ + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e70-1e7f */ + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e80-1e8f */ + 0, -1, 0, -1, 0, -1, 0, 0, 0, 0, 0,-59, 0, -1, 0, -1, /* 1e90-1e9f */ + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1ea0-1eaf */ + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1eb0-1ebf */ + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1ec0-1ecf */ + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1ed0-1edf */ + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1ee0-1eef */ + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, 0, 0, 0, 0, /* 1ef0-1eff */ + 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f00-1f0f */ + 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f10-1f1f */ + 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f20-1f2f */ + 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f30-1f3f */ + 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f40-1f4f */ + 0, 8, 0, 8, 0, 8, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f50-1f5f */ + 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f60-1f6f */ + 74, 74, 86, 86, 86, 86,100,100, 0, 0,112,112,126,126, 0, 0, /* 1f70-1f7f */ + 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f80-1f8f */ + 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f90-1f9f */ + 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1fa0-1faf */ + 8, 8, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1fb0-1fbf */ + 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1fc0-1fcf */ + 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1fd0-1fdf */ + 8, 8, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1fe0-1fef */ + 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +/* Upper case range - Wide latin */ +static signed char UniCaseRangeUff40[27] = { + 0,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32, /* ff40-ff4f */ + -32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32, +}; + +/* + * Upper Case Range + */ +UNICASERANGE UniUpperRange[] = { + { 0x03a0, 0x03ce, UniCaseRangeU03a0 }, + { 0x0430, 0x045f, UniCaseRangeU0430 }, + { 0x0490, 0x04cc, UniCaseRangeU0490 }, + { 0x1e00, 0x1ffc, UniCaseRangeU1e00 }, + { 0xff40, 0xff5a, UniCaseRangeUff40 }, + { 0, 0, 0 } +}; diff -uNr --exclude=CVS ../kernel.org/linux/fs/jfs/jfs_xtree.c linuxppc64_2_4/fs/jfs/jfs_xtree.c --- ../kernel.org/linux/fs/jfs/jfs_xtree.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/fs/jfs/jfs_xtree.c Wed Nov 14 10:19:36 2001 @@ -0,0 +1,4391 @@ +/* + * + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * jfs_xtree.c: extent allocation descriptor B+-tree manager + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * xtree local flag + */ +#define XT_INSERT 0x00000001 + +/* + * xtree key/entry comparison: extent offset + * + * return: + * -1: k < start of extent + * 0: start_of_extent <= k <= end_of_extent + * 1: k > end_of_extent + */ +#define XT_CMP(CMP, K, X, OFFSET64)\ +{\ + OFFSET64 = offsetXAD(X);\ + (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\ + ((K) < OFFSET64) ? -1 : 0;\ +} + +/* write a xad entry */ +#define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\ +{\ + (XAD)->flag = (FLAG);\ + XADoffset((XAD), (OFF));\ + XADlength((XAD), (LEN));\ + XADaddress((XAD), (ADDR));\ +} + +#define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot) + +/* get page buffer for specified block address */ +#define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)\ +{\ + BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot)\ + if (!(RC))\ + {\ + if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) ||\ + (le16_to_cpu((P)->header.nextindex) > le16_to_cpu((P)->header.maxentry)) ||\ + (le16_to_cpu((P)->header.maxentry) > (((BN)==0)?XTROOTMAXSLOT:PSIZE>>L2XTSLOTSIZE)))\ + {\ + jERROR(1,("XT_GETPAGE: xtree page corrupt\n"));\ + BT_PUTPAGE(MP);\ + MP = NULL;\ + RC = EIO;\ + }\ + }\ +} + +/* for consistency */ +#define XT_PUTPAGE(MP) BT_PUTPAGE(MP) + +#define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ + BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot) +/* xtree entry parameter descriptor */ +typedef struct { + metapage_t *mp; + s16 index; + u8 flag; + s64 off; + s64 addr; + int len; + pxdlist_t *pxdlist; +} xtsplit_t; + + +#ifdef _JFS_STATISTICS +/* + * statistics + */ +static struct { + uint search; + uint fastSearch; + uint split; +} xtStat; +#endif /* _JFS_STATISTICS */ + + +/* + * forward references + */ +static int xtSearch(struct inode *ip, + s64 xoff, int *cmpp, btstack_t * btstack, int flag); + +static int xtSplitUp(int tid, + struct inode *ip, + xtsplit_t * split, btstack_t * btstack); + +static int xtSplitPage(int tid, + struct inode *ip, + xtsplit_t * split, metapage_t ** rmpp, s64 * rbnp); + +static int xtSplitRoot(int tid, + struct inode *ip, + xtsplit_t * split, metapage_t ** rmpp); + +#ifdef _STILL_TO_PORT +static int xtDeleteUp(int tid, + struct inode *ip, + metapage_t * fmp, + xtpage_t * fp, btstack_t * btstack); + +static int xtSearchNode(struct inode *ip, + xad_t * xad, + int *cmpp, btstack_t * btstack, int flag); + +static int xtRelink(int tid, struct inode *ip, xtpage_t * fp); +#endif /* _STILL_TO_PORT */ + +/* External references */ + +/* + * debug control + */ +/* #define _JFS_DEBUG_XTREE 1 */ + + +/* + * xtLookup() + * + * function: map a single page into a physical extent; + */ +int xtLookup(struct inode *ip, s64 lstart, + s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check) +{ + int rc = 0; + btstack_t btstack; + int cmp; + s64 bn; + metapage_t *mp; + xtpage_t *p; + int index; + xad_t *xad; + s64 size, xoff, xend; + int xlen; + s64 xaddr; + + *plen = 0; + + if (!no_check) { + /* is lookup offset beyond eof ? */ + size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >> + JFS_SBI(ip->i_sb)->l2bsize; + if (lstart >= size) { + jERROR(1, + ("xtLookup: lstart (0x%lx) >= size (0x%lx)\n", + (ulong) lstart, (ulong) size)); + return 0; + } + } + + /* + * search for the xad entry covering the logical extent + */ +//search: + if ((rc = xtSearch(ip, lstart, &cmp, &btstack, 0))) { + jERROR(1, ("xtLookup: xtSearch returned %d\n", rc)); + return rc; + } + + /* + * compute the physical extent covering logical extent + * + * N.B. search may have failed (e.g., hole in sparse file), + * and returned the index of the next entry. + */ + /* retrieve search result */ + XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); + + /* is xad found covering start of logical extent ? + * lstart is a page start address, + * i.e., lstart cannot start in a hole; + */ + if (cmp) { + jFYI(1, ("xtLookup: cmp = %d\n", cmp)); + goto out; + } + + /* + * lxd covered by xad + */ + xad = &p->xad[index]; + xoff = offsetXAD(xad); + xlen = lengthXAD(xad); + xend = xoff + xlen; + xaddr = addressXAD(xad); + + jEVENT(0, + ("index = %d, xoff = 0x%lx, xlen = 0x%x, xaddr = 0x%lx\n", + index, (ulong) xoff, xlen, (ulong) xaddr)); + + /* initialize new pxd */ + *pflag = xad->flag; + *paddr = xaddr + (lstart - xoff); + /* a page must be fully covered by an xad */ + *plen = min(xend - lstart, llen); + + out: + XT_PUTPAGE(mp); + + return rc; +} + + +/* + * xtLookupList() + * + * function: map a single logical extent into a list of physical extent; + * + * parameter: + * struct inode *ip, + * lxdlist_t *lxdlist, lxd list (in) + * xadlist_t *xadlist, xad list (in/out) + * int flag) + * + * coverage of lxd by xad under assumption of + * . lxd's are ordered and disjoint. + * . xad's are ordered and disjoint. + * + * return: + * 0: success + * + * note: a page being written (even a single byte) is backed fully, + * except the last page which is only backed with blocks + * required to cover the last byte; + * the extent backing a page is fully contained within an xad; + */ +int xtLookupList(struct inode *ip, lxdlist_t * lxdlist, /* lxd list (in) */ + xadlist_t * xadlist, /* xad list (in/out) */ + int flag) +{ + int rc = 0; + btstack_t btstack; + int cmp; + s64 bn; + metapage_t *mp; + xtpage_t *p; + int index; + lxd_t *lxd; + xad_t *xad, *pxd; + s64 size, lstart, lend, xstart, xend, pstart; + s64 llen, xlen, plen; + s64 xaddr, paddr; + int nlxd, npxd, maxnpxd; + + npxd = xadlist->nxad = 0; + maxnpxd = xadlist->maxnxad; + pxd = xadlist->xad; + + nlxd = lxdlist->nlxd; + lxd = lxdlist->lxd; + + lstart = offsetLXD(lxd); + llen = lengthLXD(lxd); + lend = lstart + llen; + + size = (ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >> + JFS_SBI(ip->i_sb)->l2bsize; + + /* + * search for the xad entry covering the logical extent + */ + search: + if (lstart >= size) + return 0; + + if ((rc = xtSearch(ip, lstart, &cmp, &btstack, 0))) + return rc; + + /* + * compute the physical extent covering logical extent + * + * N.B. search may have failed (e.g., hole in sparse file), + * and returned the index of the next entry. + */ +//map: + /* retrieve search result */ + XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); + + /* is xad on the next sibling page ? */ + if (index == le16_to_cpu(p->header.nextindex)) { + if (p->header.flag & BT_ROOT) + goto mapend; + + if ((bn = le64_to_cpu(p->header.next)) == 0) + goto mapend; + + XT_PUTPAGE(mp); + + /* get next sibling page */ + XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); + if (rc) + return rc; + + index = XTENTRYSTART; + } + + xad = &p->xad[index]; + + /* + * is lxd covered by xad ? + */ + compare: + xstart = offsetXAD(xad); + xlen = lengthXAD(xad); + xend = xstart + xlen; + xaddr = addressXAD(xad); + + compare1: + if (xstart < lstart) + goto compare2; + + /* (lstart <= xstart) */ + + /* lxd is NOT covered by xad */ + if (lend <= xstart) { + /* + * get next lxd + */ + if (--nlxd == 0) + goto mapend; + lxd++; + + lstart = offsetLXD(lxd); + llen = lengthLXD(lxd); + lend = lstart + llen; + if (lstart >= size) + goto mapend; + + /* compare with the current xad */ + goto compare1; + } + /* lxd is covered by xad */ + else { /* (xstart < lend) */ + + /* initialize new pxd */ + pstart = xstart; + plen = min(lend - xstart, xlen); + paddr = xaddr; + + goto cover; + } + + /* (xstart < lstart) */ + compare2: + /* lxd is covered by xad */ + if (lstart < xend) { + /* initialize new pxd */ + pstart = lstart; + plen = min(xend - lstart, llen); + paddr = xaddr + (lstart - xstart); + + goto cover; + } + /* lxd is NOT covered by xad */ + else { /* (xend <= lstart) */ + + /* + * get next xad + * + * linear search next xad covering lxd on + * the current xad page, and then tree search + */ + if (index == le16_to_cpu(p->header.nextindex) - 1) { + if (p->header.flag & BT_ROOT) + goto mapend; + + XT_PUTPAGE(mp); + goto search; + } else { + index++; + xad++; + + /* compare with new xad */ + goto compare; + } + } + + /* + * lxd is covered by xad and a new pxd has been initialized + * (lstart <= xstart < lend) or (xstart < lstart < xend) + */ + cover: + /* finalize pxd corresponding to current xad */ + XT_PUTENTRY(pxd, xad->flag, pstart, plen, paddr); + + if (++npxd >= maxnpxd) + goto mapend; + pxd++; + + /* + * lxd is fully covered by xad + */ + if (lend <= xend) { + /* + * get next lxd + */ + if (--nlxd == 0) + goto mapend; + lxd++; + + lstart = offsetLXD(lxd); + llen = lengthLXD(lxd); + lend = lstart + llen; + if (lstart >= size) + goto mapend; + + /* + * test for old xad covering new lxd + * (old xstart < new lstart) + */ + goto compare2; + } + /* + * lxd is partially covered by xad + */ + else { /* (xend < lend) */ + + /* + * get next xad + * + * linear search next xad covering lxd on + * the current xad page, and then next xad page search + */ + if (index == le16_to_cpu(p->header.nextindex) - 1) { + if (p->header.flag & BT_ROOT) + goto mapend; + + if ((bn = le64_to_cpu(p->header.next)) == 0) + goto mapend; + + XT_PUTPAGE(mp); + + /* get next sibling page */ + XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); + if (rc) + return rc; + + index = XTENTRYSTART; + xad = &p->xad[index]; + } else { + index++; + xad++; + } + + /* + * test for new xad covering old lxd + * (old lstart < new xstart) + */ + goto compare; + } + + mapend: + xadlist->nxad = npxd; + +//out: + XT_PUTPAGE(mp); + + return rc; +} + + +/* + * xtSearch() + * + * function: search for the xad entry covering specified offset. + * + * parameters: + * ip - file object; + * xoff - extent offset; + * cmpp - comparison result: + * btstack - traverse stack; + * flag - search process flag (XT_INSERT); + * + * returns: + * btstack contains (bn, index) of search path traversed to the entry. + * *cmpp is set to result of comparison with the entry returned. + * the page containing the entry is pinned at exit. + */ +static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */ + int *cmpp, btstack_t * btstack, int flag) +{ + struct jfs_inode_info *jfs_ip = JFS_IP(ip); + int rc = 0; + int cmp = 1; /* init for empty page */ + s64 bn; /* block number */ + metapage_t *mp; /* page buffer */ + xtpage_t *p; /* page */ + xad_t *xad; + int base, index, lim, btindex; + btframe_t *btsp; + int nsplit = 0; /* number of pages to split */ + s64 t64; + + INCREMENT(xtStat.search); + + BT_CLR(btstack); + + btstack->nsplit = 0; + + /* + * search down tree from root: + * + * between two consecutive entries of and of + * internal page, child page Pi contains entry with k, Ki <= K < Kj. + * + * if entry with search key K is not found + * internal page search find the entry with largest key Ki + * less than K which point to the child page to search; + * leaf page search find the entry with smallest key Kj + * greater than K so that the returned index is the position of + * the entry to be shifted right for insertion of new entry. + * for empty tree, search key is greater than any key of the tree. + * + * by convention, root bn = 0. + */ + for (bn = 0;;) { + /* get/pin the page to search */ + XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); + if (rc) + return rc; + + /* try sequential access heuristics with the previous + * access entry in target leaf page: + * once search narrowed down into the target leaf, + * key must either match an entry in the leaf or + * key entry does not exist in the tree; + */ +//fastSearch: + if ((jfs_ip->btorder & BT_SEQUENTIAL) && + (p->header.flag & BT_LEAF) && + (index = jfs_ip->btindex) < + le16_to_cpu(p->header.nextindex)) { + xad = &p->xad[index]; + t64 = offsetXAD(xad); + if (xoff < t64 + lengthXAD(xad)) { + if (xoff >= t64) { + *cmpp = 0; + goto out; + } + + /* stop sequential access heuristics */ + goto binarySearch; + } else { /* (t64 + lengthXAD(xad)) <= xoff */ + + /* try next sequential entry */ + index++; + if (index < + le16_to_cpu(p->header.nextindex)) { + xad++; + t64 = offsetXAD(xad); + if (xoff < t64 + lengthXAD(xad)) { + if (xoff >= t64) { + *cmpp = 0; + goto out; + } + + /* miss: key falls between + * previous and this entry + */ + *cmpp = 1; + goto out; + } + + /* (xoff >= t64 + lengthXAD(xad)); + * matching entry may be further out: + * stop heuristic search + */ + /* stop sequential access heuristics */ + goto binarySearch; + } + + /* (index == p->header.nextindex); + * miss: key entry does not exist in + * the target leaf/tree + */ + *cmpp = 1; + goto out; + } + + /* + * if hit, return index of the entry found, and + * if miss, where new entry with search key is + * to be inserted; + */ + out: + /* compute number of pages to split */ + if (flag & XT_INSERT) { + if (p->header.nextindex == /* little-endian */ + p->header.maxentry) + nsplit++; + else + nsplit = 0; + btstack->nsplit = nsplit; + } + + /* save search result */ + btsp = btstack->top; + btsp->bn = bn; + btsp->index = index; + btsp->mp = mp; + + /* update sequential access heuristics */ + jfs_ip->btindex = index; + + INCREMENT(xtStat.fastSearch); + return 0; + } + + /* well, ... full search now */ + binarySearch: + lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; + + /* + * binary search with search key K on the current page + */ + for (base = XTENTRYSTART; lim; lim >>= 1) { + index = base + (lim >> 1); + + XT_CMP(cmp, xoff, &p->xad[index], t64); + if (cmp == 0) { + /* + * search hit + */ + /* search hit - leaf page: + * return the entry found + */ + if (p->header.flag & BT_LEAF) { + *cmpp = cmp; + + /* compute number of pages to split */ + if (flag & XT_INSERT) { + if (p->header.nextindex == + p->header.maxentry) + nsplit++; + else + nsplit = 0; + btstack->nsplit = nsplit; + } + + /* save search result */ + btsp = btstack->top; + btsp->bn = bn; + btsp->index = index; + btsp->mp = mp; + + /* init sequential access heuristics */ + btindex = jfs_ip->btindex; + if (index == btindex || + index == btindex + 1) + jfs_ip->btorder = BT_SEQUENTIAL; + else + jfs_ip->btorder = BT_RANDOM; + jfs_ip->btindex = index; + + return 0; + } + + /* search hit - internal page: + * descend/search its child page + */ + goto next; + } + + if (cmp > 0) { + base = index + 1; + --lim; + } + } + + /* + * search miss + * + * base is the smallest index with key (Kj) greater than + * search key (K) and may be zero or maxentry index. + */ + /* + * search miss - leaf page: + * + * return location of entry (base) where new entry with + * search key K is to be inserted. + */ + if (p->header.flag & BT_LEAF) { + *cmpp = cmp; + + /* compute number of pages to split */ + if (flag & XT_INSERT) { + if (p->header.nextindex == + p->header.maxentry) + nsplit++; + else + nsplit = 0; + btstack->nsplit = nsplit; + } + + /* save search result */ + btsp = btstack->top; + btsp->bn = bn; + btsp->index = base; + btsp->mp = mp; + + /* init sequential access heuristics */ + btindex = jfs_ip->btindex; + if (base == btindex || base == btindex + 1) + jfs_ip->btorder = BT_SEQUENTIAL; + else + jfs_ip->btorder = BT_RANDOM; + jfs_ip->btindex = base; + + return 0; + } + + /* + * search miss - non-leaf page: + * + * if base is non-zero, decrement base by one to get the parent + * entry of the child page to search. + */ + index = base ? base - 1 : base; + + /* + * go down to child page + */ + next: + /* update number of pages to split */ + if (p->header.nextindex == p->header.maxentry) + nsplit++; + else + nsplit = 0; + + /* push (bn, index) of the parent page/entry */ + BT_PUSH(btstack, bn, index); + + /* get the child page block number */ + bn = addressXAD(&p->xad[index]); + + /* unpin the parent page */ + XT_PUTPAGE(mp); + } +} + +/* + * xtInsert() + * + * function: + * + * parameter: + * tid - transaction id; + * ip - file object; + * xflag - extent flag (XAD_NOTRECORDED): + * xoff - extent offset; + * xlen - extent length; + * xaddrp - extent address pointer (in/out): + * if (*xaddrp) + * caller allocated data extent at *xaddrp; + * else + * allocate data extent and return its xaddr; + * flag - + * + * return: + */ +int xtInsert(int tid, /* transaction id */ + struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp, + int flag) +{ + int rc = 0; + s64 xaddr, hint; + metapage_t *mp; /* meta-page buffer */ + xtpage_t *p; /* base B+-tree index page */ + s64 bn; + int index, nextindex; + btstack_t btstack; /* traverse stack */ + xtsplit_t split; /* split information */ + xad_t *xad; + int cmp; + tlock_t *tlck; + xtlock_t *xtlck; + + jFYI(1, + ("xtInsert: nxoff:0x%lx nxlen:0x%x\n", (ulong) xoff, xlen)); + + /* + * search for the entry location at which to insert: + * + * xtFastSearch() and xtSearch() both returns (leaf page + * pinned, index at which to insert). + * n.b. xtSearch() may return index of maxentry of + * the full page. + */ + if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT))) + return rc; + + /* retrieve search result */ + XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); + + /* This test must follow XT_GETSEARCH since mp must be valid if + * we branch to out: */ + if (cmp == 0) { + rc = EEXIST; + goto out; + } + + /* + * allocate data extent requested + * + * allocation hint: last xad + */ + if ((xaddr = *xaddrp) == 0) { + if (index > XTENTRYSTART) { + xad = &p->xad[index - 1]; + hint = addressXAD(xad) + lengthXAD(xad) - 1; + } else + hint = 0; + if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) + goto out; + } + + /* + * insert entry for new extent + */ + xflag |= XAD_NEW; + + /* + * if the leaf page is full, split the page and + * propagate up the router entry for the new page from split + * + * The xtSplitUp() will insert the entry and unpin the leaf page. + */ + nextindex = le16_to_cpu(p->header.nextindex); + if (nextindex == le16_to_cpu(p->header.maxentry)) { + split.mp = mp; + split.index = index; + split.flag = xflag; + split.off = xoff; + split.len = xlen; + split.addr = xaddr; + split.pxdlist = NULL; + if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { + /* undo data extent allocation */ + if (*xaddrp == 0) + dbFree(ip, xaddr, (s64) xlen); + return rc; + } + + *xaddrp = xaddr; + return 0; + } + + /* + * insert the new entry into the leaf page + */ + /* + * acquire a transaction lock on the leaf page; + * + * action: xad insertion/extension; + */ + BT_MARK_DIRTY(mp, ip); + + /* if insert into middle, shift right remaining entries. */ + if (index < nextindex) + memmove(&p->xad[index + 1], &p->xad[index], + (nextindex - index) * sizeof(xad_t)); + + /* insert the new entry: mark the entry NEW */ + xad = &p->xad[index]; + XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); + + /* advance next available entry index */ + p->header.nextindex = + cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); + + /* Don't log it if there are no links to the file */ + if (!test_cflag(COMMIT_Nolink, ip)) { + tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); + xtlck = (xtlock_t *) & tlck->lock; + xtlck->lwm.offset = + (xtlck->lwm.offset) ? min(index, + (int)xtlck->lwm.offset) : index; + xtlck->lwm.length = + le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; + } + + *xaddrp = xaddr; + + out: + /* unpin the leaf page */ + XT_PUTPAGE(mp); + + return rc; +} + + +/* + * xtSplitUp() + * + * function: + * split full pages as propagating insertion up the tree + * + * parameter: + * tid - transaction id; + * ip - file object; + * split - entry parameter descriptor; + * btstack - traverse stack from xtSearch() + * + * return: + */ +static int +xtSplitUp(int tid, + struct inode *ip, xtsplit_t * split, btstack_t * btstack) +{ + int rc = 0; + metapage_t *smp; + xtpage_t *sp; /* split page */ + metapage_t *rmp; + s64 rbn; /* new right page block number */ + metapage_t *rcmp; + xtpage_t *rcp; /* right child page */ + s64 rcbn; /* right child page block number */ + int skip; /* index of entry of insertion */ + int nextindex; /* next available entry index of p */ + btframe_t *parent; /* parent page entry on traverse stack */ + xad_t *xad; + s64 xaddr; + int xlen; + int nsplit; /* number of pages split */ + pxdlist_t pxdlist; + pxd_t *pxd; + tlock_t *tlck; + xtlock_t *xtlck; + + smp = split->mp; + sp = XT_PAGE(ip, smp); + + /* is inode xtree root extension/inline EA area free ? */ + if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) && + (sp->header.maxentry < cpu_to_le16(XTROOTMAXSLOT)) && + (JFS_IP(ip)->mode2 & INLINEEA)) { + sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT); + JFS_IP(ip)->mode2 &= ~INLINEEA; + + BT_MARK_DIRTY(smp, ip); + /* + * acquire a transaction lock on the leaf page; + * + * action: xad insertion/extension; + */ + + /* if insert into middle, shift right remaining entries. */ + skip = split->index; + nextindex = le16_to_cpu(sp->header.nextindex); + if (skip < nextindex) + memmove(&sp->xad[skip + 1], &sp->xad[skip], + (nextindex - skip) * sizeof(xad_t)); + + /* insert the new entry: mark the entry NEW */ + xad = &sp->xad[skip]; + XT_PUTENTRY(xad, split->flag, split->off, split->len, + split->addr); + + /* advance next available entry index */ + sp->header.nextindex = + cpu_to_le16(le16_to_cpu(sp->header.nextindex) + 1); + + /* Don't log it if there are no links to the file */ + if (!test_cflag(COMMIT_Nolink, ip)) { + tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); + xtlck = (xtlock_t *) & tlck->lock; + xtlck->lwm.offset = (xtlck->lwm.offset) ? + min(skip, (int)xtlck->lwm.offset) : skip; + xtlck->lwm.length = + le16_to_cpu(sp->header.nextindex) - + xtlck->lwm.offset; + } + + return 0; + } + + /* + * allocate new index blocks to cover index page split(s) + * + * allocation hint: ? + */ + if (split->pxdlist == NULL) { + nsplit = btstack->nsplit; + split->pxdlist = &pxdlist; + pxdlist.maxnpxd = pxdlist.npxd = 0; + pxd = &pxdlist.pxd[0]; + xlen = JFS_SBI(ip->i_sb)->nbperpage; + for (; nsplit > 0; nsplit--, pxd++) { + if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr)) + == 0) { + PXDaddress(pxd, xaddr); + PXDlength(pxd, xlen); + + pxdlist.maxnpxd++; + + continue; + } + + /* undo allocation */ + + XT_PUTPAGE(smp); + return rc; + } + } + + /* + * Split leaf page into and a new right page . + * + * The split routines insert the new entry into the leaf page, + * and acquire txLock as appropriate. + * return pinned and its block number . + */ + rc = (sp->header.flag & BT_ROOT) ? + xtSplitRoot(tid, ip, split, &rmp) : + xtSplitPage(tid, ip, split, &rmp, &rbn); + if (rc) + return EIO; + + XT_PUTPAGE(smp); + + /* + * propagate up the router entry for the leaf page just split + * + * insert a router entry for the new page into the parent page, + * propagate the insert/split up the tree by walking back the stack + * of (bn of parent page, index of child page entry in parent page) + * that were traversed during the search for the page that split. + * + * the propagation of insert/split up the tree stops if the root + * splits or the page inserted into doesn't have to split to hold + * the new entry. + * + * the parent entry for the split page remains the same, and + * a new entry is inserted at its right with the first key and + * block number of the new right page. + * + * There are a maximum of 3 pages pinned at any time: + * right child, left parent and right parent (when the parent splits) + * to keep the child page pinned while working on the parent. + * make sure that all pins are released at exit. + */ + while ((parent = BT_POP(btstack)) != NULL) { + /* parent page specified by stack frame */ + + /* keep current child pages pinned */ + rcmp = rmp; + rcbn = rbn; + rcp = XT_PAGE(ip, rcmp); + + /* + * insert router entry in parent for new right child page + */ + /* get/pin the parent page */ + XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc); + if (rc) + goto errout2; + + /* + * The new key entry goes ONE AFTER the index of parent entry, + * because the split was to the right. + */ + skip = parent->index + 1; + + /* + * split or shift right remaining entries of the parent page + */ + nextindex = le16_to_cpu(sp->header.nextindex); + /* + * parent page is full - split the parent page + */ + if (nextindex == le16_to_cpu(sp->header.maxentry)) { + /* init for parent page split */ + split->mp = smp; + split->index = skip; /* index at insert */ + split->flag = XAD_NEW; + split->off = offsetXAD(&rcp->xad[XTENTRYSTART]); + split->len = JFS_SBI(ip->i_sb)->nbperpage; + split->addr = rcbn; + + /* unpin previous right child page */ + XT_PUTPAGE(rcmp); + + /* The split routines insert the new entry, + * and acquire txLock as appropriate. + * return pinned and its block number . + */ + rc = (sp->header.flag & BT_ROOT) ? + xtSplitRoot(tid, ip, split, &rmp) : + xtSplitPage(tid, ip, split, &rmp, &rbn); + if (rc) + goto errout1; + + XT_PUTPAGE(smp); + /* keep new child page pinned */ + } + /* + * parent page is not full - insert in parent page + */ + else { + /* + * insert router entry in parent for the right child + * page from the first entry of the right child page: + */ + /* + * acquire a transaction lock on the parent page; + * + * action: router xad insertion; + */ + BT_MARK_DIRTY(smp, ip); + + /* + * if insert into middle, shift right remaining entries + */ + if (skip < nextindex) + memmove(&sp->xad[skip + 1], &sp->xad[skip], + (nextindex - + skip) << L2XTSLOTSIZE); + + /* insert the router entry */ + xad = &sp->xad[skip]; + XT_PUTENTRY(xad, XAD_NEW, + offsetXAD(&rcp->xad[XTENTRYSTART]), + JFS_SBI(ip->i_sb)->nbperpage, rcbn); + + /* advance next available entry index. */ + sp->header.nextindex = + cpu_to_le16(le16_to_cpu(sp->header.nextindex) + + 1); + + /* Don't log it if there are no links to the file */ + if (!test_cflag(COMMIT_Nolink, ip)) { + tlck = txLock(tid, ip, smp, + tlckXTREE | tlckGROW); + xtlck = (xtlock_t *) & tlck->lock; + xtlck->lwm.offset = (xtlck->lwm.offset) ? + min(skip, (int)xtlck->lwm.offset) : skip; + xtlck->lwm.length = + le16_to_cpu(sp->header.nextindex) - + xtlck->lwm.offset; + } + + /* unpin parent page */ + XT_PUTPAGE(smp); + + /* exit propagate up */ + break; + } + } + + /* unpin current right page */ + XT_PUTPAGE(rmp); + + return 0; + + /* + * If something fails in the above loop we were already walking back + * up the tree and the tree is now inconsistent. + * release all pages we're holding. + */ + errout1: + XT_PUTPAGE(smp); + + errout2: + XT_PUTPAGE(rcmp); + + return rc; +} + + +/* + * xtSplitPage() + * + * function: + * split a full non-root page into + * original/split/left page and new right page + * i.e., the original/split page remains as left page. + * + * parameter: + * int tid, + * struct inode *ip, + * xtsplit_t *split, + * metapage_t **rmpp, + * u64 *rbnp, + * + * return: + * Pointer to page in which to insert or NULL on error. + */ +static int +xtSplitPage(int tid, struct inode *ip, + xtsplit_t * split, metapage_t ** rmpp, s64 * rbnp) +{ + int rc = 0; + metapage_t *smp; + xtpage_t *sp; + metapage_t *rmp; + xtpage_t *rp; /* new right page allocated */ + s64 rbn; /* new right page block number */ + metapage_t *mp; + xtpage_t *p; + s64 nextbn; + int skip, maxentry, middle, righthalf, n; + xad_t *xad; + pxdlist_t *pxdlist; + pxd_t *pxd; + tlock_t *tlck; + xtlock_t *sxtlck = 0, *rxtlck = 0; + + smp = split->mp; + sp = XT_PAGE(ip, smp); + + INCREMENT(xtStat.split); + + /* + * allocate the new right page for the split + */ + pxdlist = split->pxdlist; + pxd = &pxdlist->pxd[pxdlist->npxd]; + pxdlist->npxd++; + rbn = addressPXD(pxd); + rmp = get_metapage(ip, rbn, PSIZE, 1); + if (rmp == NULL) + return EIO; + + jEVENT(0, + ("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p\n", ip, smp, rmp)); + + BT_MARK_DIRTY(rmp, ip); + /* + * action: new page; + */ + + rp = (xtpage_t *) rmp->data; + rp->header.self = *pxd; + rp->header.flag = sp->header.flag & BT_TYPE; + rp->header.maxentry = sp->header.maxentry; /* little-endian */ + rp->header.nextindex = cpu_to_le16(XTENTRYSTART); + + BT_MARK_DIRTY(smp, ip); + /* Don't log it if there are no links to the file */ + if (!test_cflag(COMMIT_Nolink, ip)) { + /* + * acquire a transaction lock on the new right page; + */ + tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); + rxtlck = (xtlock_t *) & tlck->lock; + rxtlck->lwm.offset = XTENTRYSTART; + /* + * acquire a transaction lock on the split page + */ + tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); + sxtlck = (xtlock_t *) & tlck->lock; + } + + /* + * initialize/update sibling pointers of and + */ + nextbn = le64_to_cpu(sp->header.next); + rp->header.next = cpu_to_le64(nextbn); + rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self)); + sp->header.next = cpu_to_le64(rbn); + + skip = split->index; + + /* + * sequential append at tail (after last entry of last page) + * + * if splitting the last page on a level because of appending + * a entry to it (skip is maxentry), it's likely that the access is + * sequential. adding an empty page on the side of the level is less + * work and can push the fill factor much higher than normal. + * if we're wrong it's no big deal - we will do the split the right + * way next time. + * (it may look like it's equally easy to do a similar hack for + * reverse sorted data, that is, split the tree left, but it's not. + * Be my guest.) + */ + if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) { + /* + * acquire a transaction lock on the new/right page; + * + * action: xad insertion; + */ + /* insert entry at the first entry of the new right page */ + xad = &rp->xad[XTENTRYSTART]; + XT_PUTENTRY(xad, split->flag, split->off, split->len, + split->addr); + + rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); + + if (!test_cflag(COMMIT_Nolink, ip)) { + /* rxtlck->lwm.offset = XTENTRYSTART; */ + rxtlck->lwm.length = 1; + } + + *rmpp = rmp; + *rbnp = rbn; + + ip->i_blocks += LBLK2PBLK(ip->i_sb, lengthPXD(pxd)); + + jEVENT(0, ("xtSplitPage: sp:0x%p rp:0x%p\n", sp, rp)); + return 0; + } + + /* + * non-sequential insert (at possibly middle page) + */ + + /* + * update previous pointer of old next/right page of + */ + if (nextbn != 0) { + XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); + if (rc) { + XT_PUTPAGE(rmp); + return rc; + } + + BT_MARK_DIRTY(mp, ip); + /* + * acquire a transaction lock on the next page; + * + * action:sibling pointer update; + */ + if (!test_cflag(COMMIT_Nolink, ip)) + tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); + + p->header.prev = cpu_to_le64(rbn); + + /* sibling page may have been updated previously, or + * it may be updated later; + */ + + XT_PUTPAGE(mp); + } + + /* + * split the data between the split and new/right pages + */ + maxentry = le16_to_cpu(sp->header.maxentry); + middle = maxentry >> 1; + righthalf = maxentry - middle; + + /* + * skip index in old split/left page - insert into left page: + */ + if (skip <= middle) { + /* move right half of split page to the new right page */ + memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], + righthalf << L2XTSLOTSIZE); + + /* shift right tail of left half to make room for new entry */ + if (skip < middle) + memmove(&sp->xad[skip + 1], &sp->xad[skip], + (middle - skip) << L2XTSLOTSIZE); + + /* insert new entry */ + xad = &sp->xad[skip]; + XT_PUTENTRY(xad, split->flag, split->off, split->len, + split->addr); + + /* update page header */ + sp->header.nextindex = cpu_to_le16(middle + 1); + if (!test_cflag(COMMIT_Nolink, ip)) { + sxtlck->lwm.offset = (sxtlck->lwm.offset) ? + min(skip, (int)sxtlck->lwm.offset) : skip; + } + + rp->header.nextindex = + cpu_to_le16(XTENTRYSTART + righthalf); + } + /* + * skip index in new right page - insert into right page: + */ + else { + /* move left head of right half to right page */ + n = skip - middle; + memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], + n << L2XTSLOTSIZE); + + /* insert new entry */ + n += XTENTRYSTART; + xad = &rp->xad[n]; + XT_PUTENTRY(xad, split->flag, split->off, split->len, + split->addr); + + /* move right tail of right half to right page */ + if (skip < maxentry) + memmove(&rp->xad[n + 1], &sp->xad[skip], + (maxentry - skip) << L2XTSLOTSIZE); + + /* update page header */ + sp->header.nextindex = cpu_to_le16(middle); + if (!test_cflag(COMMIT_Nolink, ip)) { + sxtlck->lwm.offset = (sxtlck->lwm.offset) ? + min(middle, (int)sxtlck->lwm.offset) : middle; + } + + rp->header.nextindex = cpu_to_le16(XTENTRYSTART + + righthalf + 1); + } + + if (!test_cflag(COMMIT_Nolink, ip)) { + sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) - + sxtlck->lwm.offset; + + /* rxtlck->lwm.offset = XTENTRYSTART; */ + rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - + XTENTRYSTART; + } + + *rmpp = rmp; + *rbnp = rbn; + + ip->i_blocks += LBLK2PBLK(ip->i_sb, lengthPXD(pxd)); + + jEVENT(0, ("xtSplitPage: sp:0x%p rp:0x%p\n", sp, rp)); + return rc; +} + + +/* + * xtSplitRoot() + * + * function: + * split the full root page into + * original/root/split page and new right page + * i.e., root remains fixed in tree anchor (inode) and + * the root is copied to a single new right child page + * since root page << non-root page, and + * the split root page contains a single entry for the + * new right child page. + * + * parameter: + * int tid, + * struct inode *ip, + * xtsplit_t *split, + * metapage_t **rmpp) + * + * return: + * Pointer to page in which to insert or NULL on error. + */ +static int +xtSplitRoot(int tid, + struct inode *ip, xtsplit_t * split, metapage_t ** rmpp) +{ + xtpage_t *sp; + metapage_t *rmp; + xtpage_t *rp; + s64 rbn; + int skip, nextindex; + xad_t *xad; + pxd_t *pxd; + pxdlist_t *pxdlist; + tlock_t *tlck; + xtlock_t *xtlck; + + sp = &JFS_IP(ip)->i_xtroot; + + INCREMENT(xtStat.split); + + /* + * allocate a single (right) child page + */ + pxdlist = split->pxdlist; + pxd = &pxdlist->pxd[pxdlist->npxd]; + pxdlist->npxd++; + rbn = addressPXD(pxd); + rmp = get_metapage(ip, rbn, PSIZE, 1); + if (rmp == NULL) + return EIO; + + jEVENT(0, ("xtSplitRoot: ip:0x%p rmp:0x%p\n", ip, rmp)); + + /* + * acquire a transaction lock on the new right page; + * + * action: new page; + */ + BT_MARK_DIRTY(rmp, ip); + + rp = (xtpage_t *) rmp->data; + rp->header.flag = + (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL; + rp->header.self = *pxd; + rp->header.nextindex = cpu_to_le16(XTENTRYSTART); + rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE); + + /* initialize sibling pointers */ + rp->header.next = 0; + rp->header.prev = 0; + + /* + * copy the in-line root page into new right page extent + */ + nextindex = le16_to_cpu(sp->header.maxentry); + memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART], + (nextindex - XTENTRYSTART) << L2XTSLOTSIZE); + + /* + * insert the new entry into the new right/child page + * (skip index in the new right page will not change) + */ + skip = split->index; + /* if insert into middle, shift right remaining entries */ + if (skip != nextindex) + memmove(&rp->xad[skip + 1], &rp->xad[skip], + (nextindex - skip) * sizeof(xad_t)); + + xad = &rp->xad[skip]; + XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr); + + /* update page header */ + rp->header.nextindex = cpu_to_le16(nextindex + 1); + + if (!test_cflag(COMMIT_Nolink, ip)) { + tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); + xtlck = (xtlock_t *) & tlck->lock; + xtlck->lwm.offset = XTENTRYSTART; + xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - + XTENTRYSTART; + } + + /* + * reset the root + * + * init root with the single entry for the new right page + * set the 1st entry offset to 0, which force the left-most key + * at any level of the tree to be less than any search key. + */ + /* + * acquire a transaction lock on the root page (in-memory inode); + * + * action: root split; + */ + BT_MARK_DIRTY(split->mp, ip); + + xad = &sp->xad[XTENTRYSTART]; + XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn); + + /* update page header of root */ + sp->header.flag &= ~BT_LEAF; + sp->header.flag |= BT_INTERNAL; + + sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); + + if (!test_cflag(COMMIT_Nolink, ip)) { + tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW); + xtlck = (xtlock_t *) & tlck->lock; + xtlck->lwm.offset = XTENTRYSTART; + xtlck->lwm.length = 1; + } + + *rmpp = rmp; + + ip->i_blocks += LBLK2PBLK(ip->i_sb, lengthPXD(pxd)); + + jEVENT(0, ("xtSplitRoot: sp:0x%p rp:0x%p\n", sp, rp)); + return 0; +} + + +/* + * xtExtend() + * + * function: extend in-place; + * + * note: existing extent may or may not have been committed. + * caller is responsible for pager buffer cache update, and + * working block allocation map update; + * update pmap: alloc whole extended extent; + */ +int xtExtend(int tid, /* transaction id */ + struct inode *ip, s64 xoff, /* delta extent offset */ + s32 xlen, /* delta extent length */ + int flag) +{ + int rc = 0; + int cmp; + metapage_t *mp; /* meta-page buffer */ + xtpage_t *p; /* base B+-tree index page */ + s64 bn; + int index, nextindex, len; + btstack_t btstack; /* traverse stack */ + xtsplit_t split; /* split information */ + xad_t *xad; + s64 xaddr; + tlock_t *tlck; + xtlock_t *xtlck = 0; + int rootsplit = 0; + + jFYI(1, + ("xtExtend: nxoff:0x%lx nxlen:0x%x\n", (ulong) xoff, xlen)); + + /* there must exist extent to be extended */ + if ((rc = xtSearch(ip, xoff - 1, &cmp, &btstack, 0))) + return rc; + assert(cmp == 0); + + /* retrieve search result */ + XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); + + /* extension must be contiguous */ + xad = &p->xad[index]; + jFYI(0, ("xtExtend: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", + (ulong) offsetXAD(xad), lengthXAD(xad), + (ulong) addressXAD(xad))); + assert((offsetXAD(xad) + lengthXAD(xad)) == xoff); + + /* + * acquire a transaction lock on the leaf page; + * + * action: xad insertion/extension; + */ + BT_MARK_DIRTY(mp, ip); + if (!test_cflag(COMMIT_Nolink, ip)) { + tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); + xtlck = (xtlock_t *) & tlck->lock; + } + + /* extend will overflow extent ? */ + xlen = lengthXAD(xad) + xlen; + if ((len = xlen - MAXXLEN) <= 0) + goto extendOld; + + /* + * extent overflow: insert entry for new extent + */ +//insertNew: + xoff = offsetXAD(xad) + MAXXLEN; + xaddr = addressXAD(xad) + MAXXLEN; + nextindex = le16_to_cpu(p->header.nextindex); + + /* + * if the leaf page is full, insert the new entry and + * propagate up the router entry for the new page from split + * + * The xtSplitUp() will insert the entry and unpin the leaf page. + */ + if (nextindex == le16_to_cpu(p->header.maxentry)) { + rootsplit = p->header.flag & BT_ROOT; + + /* xtSpliUp() unpins leaf pages */ + split.mp = mp; + split.index = index + 1; + split.flag = XAD_NEW; + split.off = xoff; /* split offset */ + split.len = len; + split.addr = xaddr; + split.pxdlist = NULL; + if ((rc = xtSplitUp(tid, ip, &split, &btstack))) + return rc; + + /* + * if leaf root has been split, original root has been + * copied to new child page, i.e., original entry now + * resides on the new child page; + */ + if (rootsplit) { + if (p->header.nextindex == + cpu_to_le16(XTENTRYSTART + 1)) { + xad = &p->xad[XTENTRYSTART]; + bn = addressXAD(xad); + + /* get new child page */ + XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); + + BT_MARK_DIRTY(mp, ip); + if (!test_cflag(COMMIT_Nolink, ip)) { + tlck = txLock(tid, ip, mp, + tlckXTREE | + tlckGROW); + xtlck = (xtlock_t *) & tlck->lock; + } + } + } else + /* get back old page */ + XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); + } + /* + * insert the new entry into the leaf page + */ + else { + /* insert the new entry: mark the entry NEW */ + xad = &p->xad[index + 1]; + XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); + + /* advance next available entry index */ + p->header.nextindex = + cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); + } + + /* get back old entry */ + xad = &p->xad[index]; + xlen = MAXXLEN; + + /* + * extend old extent + */ + extendOld: + XADlength(xad, xlen); + if (!(xad->flag & XAD_NEW)) + xad->flag |= XAD_EXTENDED; + + if (!test_cflag(COMMIT_Nolink, ip)) { + xtlck->lwm.offset = + (xtlck->lwm.offset) ? min(index, + (int)xtlck->lwm.offset) : index; + xtlck->lwm.length = + le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; + } + + /* unpin the leaf page */ + XT_PUTPAGE(mp); + + return rc; +} + + +/* + * xtTailgate() + * + * function: split existing 'tail' extent + * (split offset >= start offset of tail extent), and + * relocate and extend the split tail half; + * + * note: existing extent may or may not have been committed. + * caller is responsible for pager buffer cache update, and + * working block allocation map update; + * update pmap: free old split tail extent, alloc new extent; + */ +int xtTailgate(int tid, /* transaction id */ + struct inode *ip, s64 xoff, /* split/new extent offset */ + s32 xlen, /* new extent length */ + s64 xaddr, /* new extent address */ + int flag) +{ + int rc = 0; + int cmp; + metapage_t *mp; /* meta-page buffer */ + xtpage_t *p; /* base B+-tree index page */ + s64 bn; + int index, nextindex, llen, rlen; + btstack_t btstack; /* traverse stack */ + xtsplit_t split; /* split information */ + xad_t *xad; + tlock_t *tlck; + xtlock_t *xtlck = 0; + tlock_t *mtlck; + maplock_t *pxdlock; + int rootsplit = 0; + +/* +printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", + (ulong)xoff, xlen, (ulong)xaddr); +*/ + + /* there must exist extent to be tailgated */ + if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT))) + return rc; + assert(cmp == 0); + + /* retrieve search result */ + XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); + + /* entry found must be last entry */ + nextindex = le16_to_cpu(p->header.nextindex); + assert(index == nextindex - 1); + + BT_MARK_DIRTY(mp, ip); + /* + * acquire tlock of the leaf page containing original entry + */ + if (!test_cflag(COMMIT_Nolink, ip)) { + tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); + xtlck = (xtlock_t *) & tlck->lock; + } + + /* completely replace extent ? */ + xad = &p->xad[index]; +/* +printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", + (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad)); +*/ + if ((llen = xoff - offsetXAD(xad)) == 0) + goto updateOld; + + /* + * partially replace extent: insert entry for new extent + */ +//insertNew: + /* + * if the leaf page is full, insert the new entry and + * propagate up the router entry for the new page from split + * + * The xtSplitUp() will insert the entry and unpin the leaf page. + */ + if (nextindex == le16_to_cpu(p->header.maxentry)) { + rootsplit = p->header.flag & BT_ROOT; + + /* xtSpliUp() unpins leaf pages */ + split.mp = mp; + split.index = index + 1; + split.flag = XAD_NEW; + split.off = xoff; /* split offset */ + split.len = xlen; + split.addr = xaddr; + split.pxdlist = NULL; + if ((rc = xtSplitUp(tid, ip, &split, &btstack))) + return rc; + + /* + * if leaf root has been split, original root has been + * copied to new child page, i.e., original entry now + * resides on the new child page; + */ + if (rootsplit) { + if (p->header.nextindex == + cpu_to_le16(XTENTRYSTART + 1)) { + xad = &p->xad[XTENTRYSTART]; + bn = addressXAD(xad); + + /* get new child page */ + XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); + + BT_MARK_DIRTY(mp, ip); + if (!test_cflag(COMMIT_Nolink, ip)) { + tlck = txLock(tid, ip, mp, + tlckXTREE | + tlckGROW); + xtlck = (xtlock_t *) & tlck->lock; + } + } + } else + /* get back old page */ + XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); + } + /* + * insert the new entry into the leaf page + */ + else { + /* insert the new entry: mark the entry NEW */ + xad = &p->xad[index + 1]; + XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); + + /* advance next available entry index */ + p->header.nextindex = + cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); + } + + /* get back old XAD */ + xad = &p->xad[index]; + + /* + * truncate/relocate old extent at split offset + */ + updateOld: + /* update dmap for old/committed/truncated extent */ + rlen = lengthXAD(xad) - llen; + if (!(xad->flag & XAD_NEW)) { + /* free from PWMAP at commit */ + if (!test_cflag(COMMIT_Nolink, ip)) { + mtlck = txMaplock(tid, ip, tlckMAP); + pxdlock = (maplock_t *) & mtlck->lock; + pxdlock->flag = mlckFREEPXD; + PXDaddress(&pxdlock->pxd, addressXAD(xad) + llen); + PXDlength(&pxdlock->pxd, rlen); + pxdlock->index = 1; + } + jEVENT(0, + ("xtTailgate: free extent xaddr:0x%lx xlen:0x%x\n", + (ulong) addressPXD(&pxdlock->pxd), + lengthPXD(&pxdlock->pxd))); + } else + /* free from WMAP */ + dbFree(ip, addressXAD(xad) + llen, (s64) rlen); + + if (llen) + /* truncate */ + XADlength(xad, llen); + else + /* replace */ + XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); + + if (!test_cflag(COMMIT_Nolink, ip)) { + xtlck->lwm.offset = (xtlck->lwm.offset) ? + min(index, (int)xtlck->lwm.offset) : index; + xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - + xtlck->lwm.offset; + } + + /* unpin the leaf page */ + XT_PUTPAGE(mp); + + return rc; +} + + +/* + * xtUpdate() + * + * function: update XAD; + * + * update extent for allocated_but_not_recorded or + * compressed extent; + * + * parameter: + * nxad - new XAD; + * logical extent of the specified XAD must be completely + * contained by an existing XAD; + */ +int xtUpdate(int tid, struct inode *ip, xad_t * nxad) +{ /* new XAD */ + int rc = 0; + int cmp; + metapage_t *mp; /* meta-page buffer */ + xtpage_t *p; /* base B+-tree index page */ + s64 bn; + int index0, index, newindex, nextindex; + btstack_t btstack; /* traverse stack */ + xtsplit_t split; /* split information */ + xad_t *xad, *lxad, *rxad; + int xflag; + s64 nxoff, xoff; + int nxlen, xlen, lxlen, rxlen; + s64 nxaddr, xaddr; + tlock_t *tlck; + xtlock_t *xtlck = 0; + int rootsplit = 0, newpage = 0; + + /* there must exist extent to be tailgated */ + nxoff = offsetXAD(nxad); + nxlen = lengthXAD(nxad); + nxaddr = addressXAD(nxad); +/* +printf("xtUpdate: nxflag:0x%x nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", + nxad->flag, (ulong)nxoff, nxlen, (ulong)nxaddr); +*/ + if ((rc = xtSearch(ip, nxoff, &cmp, &btstack, XT_INSERT))) + return rc; + assert(cmp == 0); + + /* retrieve search result */ + XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); + + BT_MARK_DIRTY(mp, ip); + /* + * acquire tlock of the leaf page containing original entry + */ + if (!test_cflag(COMMIT_Nolink, ip)) { + tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); + xtlck = (xtlock_t *) & tlck->lock; + } + + xad = &p->xad[index0]; + xflag = xad->flag; + xoff = offsetXAD(xad); + xlen = lengthXAD(xad); + xaddr = addressXAD(xad); +/* +printf("xtUpdate: xflag:0x%x xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", + xflag, (ulong)xoff, xlen, (ulong)xaddr); +*/ + + /* nXAD must be completely contained within XAD */ + assert(xoff <= nxoff); + assert(nxoff + nxlen <= xoff + xlen); + + index = index0; + newindex = index + 1; + nextindex = le16_to_cpu(p->header.nextindex); + +#ifdef _JFS_WIP_NOCOALESCE + if (xoff < nxoff) + goto updateRight; + + /* + * replace XAD with nXAD + */ + replace: /* (nxoff == xoff) */ + if (nxlen == xlen) { + /* replace XAD with nXAD:recorded */ + *xad = *nxad; + xad->flag = xflag & ~XAD_NOTRECORDED; + + goto out; + } else /* (nxlen < xlen) */ + goto updateLeft; +#endif /* _JFS_WIP_NOCOALESCE */ + +/* #ifdef _JFS_WIP_COALESCE */ + if (xoff < nxoff) + goto coalesceRight; + + /* + * coalesce with left XAD + */ +//coalesceLeft: /* (xoff == nxoff) */ + /* is XAD first entry of page ? */ + if (index == XTENTRYSTART) + goto replace; + + /* is nXAD logically and physically contiguous with lXAD ? */ + lxad = &p->xad[index - 1]; + lxlen = lengthXAD(lxad); + if (!(lxad->flag & XAD_NOTRECORDED) && + (nxoff == offsetXAD(lxad) + lxlen) && + (nxaddr == addressXAD(lxad) + lxlen) && + (lxlen + nxlen < MAXXLEN)) { + /* extend right lXAD */ + index0 = index - 1; + XADlength(lxad, lxlen + nxlen); + + /* If we just merged two extents together, need to make sure the + * right extent gets logged. If the left one is marked XAD_NEW, + * then we know it will be logged. Otherwise, mark as + * XAD_EXTENDED + */ + if (!(lxad->flag & XAD_NEW)) + lxad->flag |= XAD_EXTENDED; + + if (xlen > nxlen) { + /* truncate XAD */ + XADoffset(xad, xoff + nxlen); + XADlength(xad, xlen - nxlen); + XADaddress(xad, xaddr + nxlen); + goto out; + } else { /* (xlen == nxlen) */ + + /* remove XAD */ + if (index < nextindex - 1) + memmove(&p->xad[index], &p->xad[index + 1], + (nextindex - index - + 1) << L2XTSLOTSIZE); + + p->header.nextindex = + cpu_to_le16(le16_to_cpu(p->header.nextindex) - + 1); + + index = index0; + newindex = index + 1; + nextindex = le16_to_cpu(p->header.nextindex); + xoff = nxoff = offsetXAD(lxad); + xlen = nxlen = lxlen + nxlen; + xaddr = nxaddr = addressXAD(lxad); + goto coalesceRight; + } + } + + /* + * replace XAD with nXAD + */ + replace: /* (nxoff == xoff) */ + if (nxlen == xlen) { + /* replace XAD with nXAD:recorded */ + *xad = *nxad; + xad->flag = xflag & ~XAD_NOTRECORDED; + + goto coalesceRight; + } else /* (nxlen < xlen) */ + goto updateLeft; + + /* + * coalesce with right XAD + */ + coalesceRight: /* (xoff <= nxoff) */ + /* is XAD last entry of page ? */ + if (newindex == nextindex) { + if (xoff == nxoff) + goto out; + goto updateRight; + } + + /* is nXAD logically and physically contiguous with rXAD ? */ + rxad = &p->xad[index + 1]; + rxlen = lengthXAD(rxad); + if (!(rxad->flag & XAD_NOTRECORDED) && + (nxoff + nxlen == offsetXAD(rxad)) && + (nxaddr + nxlen == addressXAD(rxad)) && + (rxlen + nxlen < MAXXLEN)) { + /* extend left rXAD */ + XADoffset(rxad, nxoff); + XADlength(rxad, rxlen + nxlen); + XADaddress(rxad, nxaddr); + + /* If we just merged two extents together, need to make sure + * the left extent gets logged. If the right one is marked + * XAD_NEW, then we know it will be logged. Otherwise, mark as + * XAD_EXTENDED + */ + if (!(rxad->flag & XAD_NEW)) + rxad->flag |= XAD_EXTENDED; + + if (xlen > nxlen) + /* truncate XAD */ + XADlength(xad, xlen - nxlen); + else { /* (xlen == nxlen) */ + + /* remove XAD */ + memmove(&p->xad[index], &p->xad[index + 1], + (nextindex - index - 1) << L2XTSLOTSIZE); + + p->header.nextindex = + cpu_to_le16(le16_to_cpu(p->header.nextindex) - + 1); + } + + goto out; + } else if (xoff == nxoff) + goto out; + + assert(xoff < nxoff); +/* #endif _JFS_WIP_COALESCE */ + + /* + * split XAD into (lXAD, nXAD): + * + * |---nXAD---> + * --|----------XAD----------|-- + * |-lXAD-| + */ + updateRight: /* (xoff < nxoff) */ + /* truncate old XAD as lXAD:not_recorded */ + xad = &p->xad[index]; + XADlength(xad, nxoff - xoff); + + /* insert nXAD:recorded */ + if (nextindex == le16_to_cpu(p->header.maxentry)) { +/* +printf("xtUpdate.updateRight.split p:0x%p\n", p); +*/ + rootsplit = p->header.flag & BT_ROOT; + + /* xtSpliUp() unpins leaf pages */ + split.mp = mp; + split.index = newindex; + split.flag = xflag & ~XAD_NOTRECORDED; + split.off = nxoff; + split.len = nxlen; + split.addr = nxaddr; + split.pxdlist = NULL; + if ((rc = xtSplitUp(tid, ip, &split, &btstack))) + return rc; + + /* + * if leaf root has been split, original root has been + * copied to new child page, i.e., original entry now + * resides on the new child page; + */ + if (rootsplit) { + if (p->header.nextindex == + cpu_to_le16(XTENTRYSTART + 1)) { + xad = &p->xad[XTENTRYSTART]; + bn = addressXAD(xad); + + /* get new child page */ + XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); + + BT_MARK_DIRTY(mp, ip); + if (!test_cflag(COMMIT_Nolink, ip)) { + tlck = txLock(tid, ip, mp, + tlckXTREE | + tlckGROW); + xtlck = (xtlock_t *) & tlck->lock; + } + } + } else { + /* get back old page */ + XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); + + /* is nXAD on new page ? */ + if (newindex > + (le16_to_cpu(p->header.maxentry) >> 1)) { + newindex = + newindex - + le16_to_cpu(p->header.nextindex) + + XTENTRYSTART; + newpage = 1; + } + } + } else { + /* if insert into middle, shift right remaining entries */ + if (newindex < nextindex) + memmove(&p->xad[newindex + 1], &p->xad[newindex], + (nextindex - newindex) << L2XTSLOTSIZE); + + /* insert the entry */ + xad = &p->xad[newindex]; + *xad = *nxad; + xad->flag = xflag & ~XAD_NOTRECORDED; + + /* advance next available entry index. */ + p->header.nextindex = + cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); + } + + /* + * does nXAD force 3-way split ? + * + * |---nXAD--->| + * --|----------XAD-------------|-- + * |-lXAD-| |-rXAD -| + */ + if (nxoff + nxlen == xoff + xlen) + goto out; + + /* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */ + if (newpage) { + /* close out old page */ + if (!test_cflag(COMMIT_Nolink, ip)) { + xtlck->lwm.offset = (xtlck->lwm.offset) ? + min(index0, (int)xtlck->lwm.offset) : index0; + xtlck->lwm.length = + le16_to_cpu(p->header.nextindex) - + xtlck->lwm.offset; + } + + bn = le64_to_cpu(p->header.next); + XT_PUTPAGE(mp); + + /* get new right page */ + XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); + + BT_MARK_DIRTY(mp, ip); + if (!test_cflag(COMMIT_Nolink, ip)) { + tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); + xtlck = (xtlock_t *) & tlck->lock; + } + + index0 = index = newindex; + } else + index++; + + newindex = index + 1; + nextindex = le16_to_cpu(p->header.nextindex); + xlen = xlen - (nxoff - xoff); + xoff = nxoff; + xaddr = nxaddr; + + /* recompute split pages */ + if (nextindex == le16_to_cpu(p->header.maxentry)) { +/* +printf("xtUpdate: updateRight+Left recompute split pages: p:0x%p\n", p); +*/ + XT_PUTPAGE(mp); + + if ((rc = xtSearch(ip, nxoff, &cmp, &btstack, XT_INSERT))) + return rc; + assert(cmp == 0); + + /* retrieve search result */ + XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); + assert(index0 == index); + } + + /* + * split XAD into (nXAD, rXAD) + * + * ---nXAD---| + * --|----------XAD----------|-- + * |-rXAD-| + */ + updateLeft: /* (nxoff == xoff) && (nxlen < xlen) */ + /* update old XAD with nXAD:recorded */ + xad = &p->xad[index]; + *xad = *nxad; + xad->flag = xflag & ~XAD_NOTRECORDED; + + /* insert rXAD:not_recorded */ + xoff = xoff + nxlen; + xlen = xlen - nxlen; + xaddr = xaddr + nxlen; + if (nextindex == le16_to_cpu(p->header.maxentry)) { + rootsplit = p->header.flag & BT_ROOT; + +/* +printf("xtUpdate.updateLeft.split p:0x%p\n", p); +*/ + /* xtSpliUp() unpins leaf pages */ + split.mp = mp; + split.index = newindex; + split.flag = xflag; + split.off = xoff; + split.len = xlen; + split.addr = xaddr; + split.pxdlist = NULL; + if ((rc = xtSplitUp(tid, ip, &split, &btstack))) + return rc; + + /* + * if leaf root has been split, original root has been + * copied to new child page, i.e., original entry now + * resides on the new child page; + */ + if (rootsplit) { + if (p->header.nextindex == + cpu_to_le16(XTENTRYSTART + 1)) { + xad = &p->xad[XTENTRYSTART]; + bn = addressXAD(xad); + + /* get new child page */ + XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); + + BT_MARK_DIRTY(mp, ip); + if (!test_cflag(COMMIT_Nolink, ip)) { + tlck = txLock(tid, ip, mp, + tlckXTREE | + tlckGROW); + xtlck = (xtlock_t *) & tlck->lock; + } + } + } else + /* get back old page */ + XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); + } else { + /* if insert into middle, shift right remaining entries */ + if (newindex < nextindex) + memmove(&p->xad[newindex + 1], &p->xad[newindex], + (nextindex - newindex) << L2XTSLOTSIZE); + + /* insert the entry */ + xad = &p->xad[newindex]; + XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); + + /* advance next available entry index. */ + p->header.nextindex = + cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); + } + + out: + if (!test_cflag(COMMIT_Nolink, ip)) { + xtlck->lwm.offset = (xtlck->lwm.offset) ? + min(index0, (int)xtlck->lwm.offset) : index0; + xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - + xtlck->lwm.offset; + } + + /* unpin the leaf page */ + XT_PUTPAGE(mp); + + return rc; +} + + +#ifdef _STILL_TO_PORT +/* + * xtAppend() + * + * function: grow in append mode from contiguous region specified ; + * + * parameter: + * tid - transaction id; + * ip - file object; + * xflag - extent flag: + * xoff - extent offset; + * maxblocks - max extent length; + * xlen - extent length (in/out); + * xaddrp - extent address pointer (in/out): + * flag - + * + * return: + */ +int xtAppend(int tid, /* transaction id */ + struct inode *ip, int xflag, s64 xoff, s32 maxblocks, /* @GD1 */ + s32 * xlenp, /* (in/out) */ + s64 * xaddrp, /* (in/out) */ + int flag) +{ + int rc = 0; + metapage_t *mp; /* meta-page buffer */ + xtpage_t *p; /* base B+-tree index page */ + s64 bn, xaddr; + int index, nextindex; + btstack_t btstack; /* traverse stack */ + xtsplit_t split; /* split information */ + xad_t *xad; + int cmp; + tlock_t *tlck; + xtlock_t *xtlck; + int nsplit, nblocks, xlen; + pxdlist_t pxdlist; + pxd_t *pxd; + + xaddr = *xaddrp; + xlen = *xlenp; + jEVENT(0, + ("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx\n", + (ulong) xoff, maxblocks, xlen, (ulong) xaddr)); + + /* + * search for the entry location at which to insert: + * + * xtFastSearch() and xtSearch() both returns (leaf page + * pinned, index at which to insert). + * n.b. xtSearch() may return index of maxentry of + * the full page. + */ + if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT))) + return rc; + + /* retrieve search result */ + XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); + + if (cmp == 0) { + rc = EEXIST; + goto out; + } +//insert: + /* + * insert entry for new extent + */ + xflag |= XAD_NEW; + + /* + * if the leaf page is full, split the page and + * propagate up the router entry for the new page from split + * + * The xtSplitUp() will insert the entry and unpin the leaf page. + */ + nextindex = le16_to_cpu(p->header.nextindex); + if (nextindex < le16_to_cpu(p->header.maxentry)) + goto insertLeaf; + + /* + * allocate new index blocks to cover index page split(s) + */ + nsplit = btstack.nsplit; + split.pxdlist = &pxdlist; + pxdlist.maxnpxd = pxdlist.npxd = 0; + pxd = &pxdlist.pxd[0]; + nblocks = JFS_SBI(ip->i_sb)->nbperpage; + for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) { /* @GD1 */ + if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) { + PXDaddress(pxd, xaddr); + PXDlength(pxd, nblocks); + + pxdlist.maxnpxd++; + + continue; + } + + /* undo allocation */ + + goto out; + } + + xlen = min(xlen, maxblocks); /* @GD1 */ + + /* + * allocate data extent requested + */ + if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) + goto out; + + split.mp = mp; + split.index = index; + split.flag = xflag; + split.off = xoff; + split.len = xlen; + split.addr = xaddr; + if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { + /* undo data extent allocation */ + dbFree(ip, *xaddrp, (s64) * xlenp); + + return rc; + } + + *xaddrp = xaddr; + *xlenp = xlen; + return 0; + + /* + * insert the new entry into the leaf page + */ + insertLeaf: + /* + * allocate data extent requested + */ + if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) + goto out; + + BT_MARK_DIRTY(mp, ip); + /* + * acquire a transaction lock on the leaf page; + * + * action: xad insertion/extension; + */ + tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); + xtlck = (xtlock_t *) & tlck->lock; + + /* insert the new entry: mark the entry NEW */ + xad = &p->xad[index]; + XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); + + /* advance next available entry index */ + p->header.nextindex = + cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); + + xtlck->lwm.offset = + (xtlck->lwm.offset) ? min(index, xtlck->lwm.offset) : index; + xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - + xtlck->lwm.offset; + + *xaddrp = xaddr; + *xlenp = xlen; + + out: + /* unpin the leaf page */ + XT_PUTPAGE(mp); + + return rc; +} + + +/* - TBD for defragmentaion/reorganization - + * + * xtDelete() + * + * function: + * delete the entry with the specified key. + * + * N.B.: whole extent of the entry is assumed to be deleted. + * + * parameter: + * + * return: + * ENOENT: if the entry is not found. + * + * exception: + */ +int xtDelete(int tid, struct inode *ip, s64 xoff, s32 xlen, int flag) +{ + int rc = 0; + btstack_t btstack; + int cmp; + s64 bn; + metapage_t *mp; + xtpage_t *p; + int index, nextindex; + tlock_t *tlck; + xtlock_t *xtlck; + + /* + * find the matching entry; xtSearch() pins the page + */ + if ((rc = xtSearch(ip, xoff, &cmp, &btstack, 0))) + return rc; + + XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); + if (cmp) { + /* unpin the leaf page */ + XT_PUTPAGE(mp); + return ENOENT; + } + + /* + * delete the entry from the leaf page + */ + nextindex = le16_to_cpu(p->header.nextindex); + p->header.nextindex = + cpu_to_le16(le16_to_cpu(p->header.nextindex) - 1); + + /* + * if the leaf page bocome empty, free the page + */ + if (p->header.nextindex == cpu_to_le16(XTENTRYSTART)) + return (xtDeleteUp(tid, ip, mp, p, &btstack)); + + BT_MARK_DIRTY(mp, ip); + /* + * acquire a transaction lock on the leaf page; + * + * action:xad deletion; + */ + tlck = txLock(tid, ip, mp, tlckXTREE); + xtlck = (xtlock_t *) & tlck->lock; + xtlck->lwm.offset = + (xtlck->lwm.offset) ? min(index, xtlck->lwm.offset) : index; + + /* if delete from middle, shift left/compact the remaining entries */ + if (index < nextindex - 1) + memmove(&p->xad[index], &p->xad[index + 1], + (nextindex - index - 1) * sizeof(xad_t)); + + XT_PUTPAGE(mp); + + return 0; +} + + +/* - TBD for defragmentaion/reorganization - + * + * xtDeleteUp() + * + * function: + * free empty pages as propagating deletion up the tree + * + * parameter: + * + * return: + */ +static int +xtDeleteUp(int tid, + struct inode *ip, + metapage_t * fmp, xtpage_t * fp, btstack_t * btstack) +{ + int rc = 0; + metapage_t *mp; + xtpage_t *p; + int index, nextindex; + s64 xaddr; + int xlen; + btframe_t *parent; + tlock_t *tlck; + xtlock_t *xtlck; + + /* + * keep root leaf page which has become empty + */ + if (fp->header.flag & BT_ROOT) { + /* keep the root page */ + fp->header.flag &= ~BT_INTERNAL; + fp->header.flag |= BT_LEAF; + fp->header.nextindex = cpu_to_le16(XTENTRYSTART); + + /* XT_PUTPAGE(fmp); */ + + return 0; + } + + /* + * free non-root leaf page + */ + if ((rc = xtRelink(tid, ip, fp))) + return rc; + + xaddr = addressPXD(&fp->header.self); + xlen = lengthPXD(&fp->header.self); + /* free the page extent */ + dbFree(ip, xaddr, (s64) xlen); + + /* free the buffer page */ + discard_metapage(fmp); + + /* + * propagate page deletion up the index tree + * + * If the delete from the parent page makes it empty, + * continue all the way up the tree. + * stop if the root page is reached (which is never deleted) or + * if the entry deletion does not empty the page. + */ + while ((parent = BT_POP(btstack)) != NULL) { + /* get/pin the parent page */ + XT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc); + if (rc) + return rc; + + index = parent->index; + + /* delete the entry for the freed child page from parent. + */ + nextindex = le16_to_cpu(p->header.nextindex); + + /* + * the parent has the single entry being deleted: + * free the parent page which has become empty. + */ + if (nextindex == 1) { + if (p->header.flag & BT_ROOT) { + /* keep the root page */ + p->header.flag &= ~BT_INTERNAL; + p->header.flag |= BT_LEAF; + p->header.nextindex = + cpu_to_le16(XTENTRYSTART); + + /* XT_PUTPAGE(fmp); */ + + break; + } else { + /* free the parent page */ + if ((rc = xtRelink(tid, ip, p))) + return rc; + + xaddr = addressPXD(&p->header.self); + /* free the page extent */ + dbFree(ip, xaddr, + (s64) JFS_SBI(ip->i_sb)->nbperpage); + + /* unpin/free the buffer page */ + discard_metapage(fmp); + + /* propagate up */ + continue; + } + } + /* + * the parent has other entries remaining: + * delete the router entry from the parent page. + */ + else { + BT_MARK_DIRTY(mp, ip); + /* + * acquire a transaction lock on the leaf page; + * + * action:xad deletion; + */ + tlck = txLock(tid, ip, mp, tlckXTREE); + xtlck = (xtlock_t *) & tlck->lock; + xtlck->lwm.offset = + (xtlck->lwm.offset) ? min(index, + xtlck->lwm. + offset) : index; + + /* if delete from middle, + * shift left/compact the remaining entries in the page + */ + if (index < nextindex - 1) + memmove(&p->xad[index], &p->xad[index + 1], + (nextindex - index - + 1) << L2XTSLOTSIZE); + + p->header.nextindex = + cpu_to_le16(le16_to_cpu(p->header.nextindex) - + 1); + jEVENT(0, + ("xtDeleteUp(entry): 0x%lx[%d]\n", + (ulong) parent->bn, index)); + } + + /* unpin the parent page */ + XT_PUTPAGE(mp); + + /* exit propagation up */ + break; + } + + return 0; +} + + +/* + * NAME: xtRelocate() + * + * FUNCTION: relocate xtpage or data extent of regular file; + * This function is mainly used by defragfs utility. + * + * NOTE: This routine does not have the logic to handle + * uncommitted allocated extent. The caller should call + * txCommit() to commit all the allocation before call + * this routine. + */ +xtRelocate(int tid, struct inode * ip, xad_t * oxad, /* old XAD */ + s64 nxaddr, /* new xaddr */ + int xtype) +{ /* extent type: XTPAGE or DATAEXT */ + int rc = 0; + tblock_t *tblk; + tlock_t *tlck; + xtlock_t *xtlck; + metapage_t *mp, *pmp, *lmp, *rmp; /* meta-page buffer */ + xtpage_t *p, *pp, *rp, *lp; /* base B+-tree index page */ + xad_t *xad; + pxd_t *pxd; + s64 xoff, xsize; + int xlen; + s64 oxaddr, sxaddr, dxaddr, nextbn, prevbn; + cbuf_t *cp; + s64 offset, nbytes, nbrd, pno; + int nb, npages, nblks; + s64 bn; + int cmp; + int index; + pxdlock_t *pxdlock; + btstack_t btstack; /* traverse stack */ + + xtype = xtype & EXTENT_TYPE; + + xoff = offsetXAD(oxad); + oxaddr = addressXAD(oxad); + xlen = lengthXAD(oxad); + + /* validate extent offset */ + offset = xoff << JFS_SBI(ip->i_sb)->l2bsize; + if (offset >= ip->i_size) + return ESTALE; /* stale extent */ + + jEVENT(0, + ("xtRelocate: xtype:%d xoff:0x%lx xlen:0x%x xaddr:0x%lx:0x%lx\n", + xtype, (ulong) xoff, xlen, (ulong) oxaddr, + (ulong) nxaddr)); + + /* + * 1. get and validate the parent xtpage/xad entry + * covering the source extent to be relocated; + */ + if (xtype == DATAEXT) { + /* search in leaf entry */ + rc = xtSearch(ip, xoff, &cmp, &btstack, 0); + if (rc) + return rc; + if (cmp) { + XT_PUTPAGE(pmp); + return ESTALE; + } + + /* retrieve search result */ + XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); + + /* validate for exact match with a single entry */ + xad = &pp->xad[index]; + if (addressXAD(xad) != oxaddr || lengthXAD(xad) != xlen) { + XT_PUTPAGE(pmp); + return ESTALE; + } + } else { /* (xtype == XTPAGE) */ + + /* search in internal entry */ + rc = xtSearchNode(ip, oxad, &cmp, &btstack, 0); + if (rc) + return rc; + if (cmp) { + XT_PUTPAGE(pmp); + return ESTALE; + } + + /* retrieve search result */ + XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); + + /* xtSearchNode() validated for exact match with a single entry + */ + xad = &pp->xad[index]; + } + jEVENT(0, ("xtRelocate: parent xad entry validated.\n")); + + /* + * 2. relocate the extent + */ + if (xtype == DATAEXT) { + /* if the extent is allocated-but-not-recorded + * there is no real data to be moved in this extent, + */ + if (xad->flag & XAD_NOTRECORDED) + goto out; + else + /* release xtpage for cmRead()/xtLookup() */ + XT_PUTPAGE(pmp); + + /* + * cmRelocate() + * + * copy target data pages to be relocated; + * + * data extent must start at page boundary and + * multiple of page size (except the last data extent); + * read in each page of the source data extent into cbuf, + * update the cbuf extent descriptor of the page to be + * homeward bound to new dst data extent + * copy the data from the old extent to new extent. + * copy is essential for compressed files to avoid problems + * that can arise if there was a change in compression + * algorithms. + * it is a good strategy because it may disrupt cache + * policy to keep the pages in memory afterwards. + */ + offset = xoff << JFS_SBI(ip->i_sb)->l2bsize; + assert((offset & CM_OFFSET) == 0); + nbytes = xlen << JFS_SBI(ip->i_sb)->l2bsize; + pno = offset >> CM_L2BSIZE; + npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE; +/* + npages = ((offset + nbytes - 1) >> CM_L2BSIZE) - + (offset >> CM_L2BSIZE) + 1; +*/ + sxaddr = oxaddr; + dxaddr = nxaddr; + + /* process the request one cache buffer at a time */ + for (nbrd = 0; nbrd < nbytes; nbrd += nb, + offset += nb, pno++, npages--) { + /* compute page size */ + nb = min(nbytes - nbrd, CM_BSIZE); + + /* get the cache buffer of the page */ + if (rc = cmRead(ip, offset, npages, &cp)) + break; + + assert(addressPXD(&cp->cm_pxd) == sxaddr); + assert(!cp->cm_modified); + + /* bind buffer with the new extent address */ + nblks = nb >> JFS_IP(ip->i_sb)->l2bsize; + cmSetXD(ip, cp, pno, dxaddr, nblks); + + /* release the cbuf, mark it as modified */ + cmPut(cp, TRUE); + + dxaddr += nblks; + sxaddr += nblks; + } + + /* get back parent page */ + rc = xtSearch(ip, xoff, &cmp, &btstack, 0); + XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); + jEVENT(0, ("xtRelocate: target data extent relocated.\n")); + } else { /* (xtype == XTPAGE) */ + + /* + * read in the target xtpage from the source extent; + */ + XT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc); + if (rc) { + XT_PUTPAGE(pmp); + return rc; + } + + /* + * read in sibling pages if any to update sibling pointers; + */ + rmp = NULL; + if (p->header.next) { + nextbn = le64_to_cpu(p->header.next); + XT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc); + if (rc) { + XT_PUTPAGE(pmp); + XT_PUTPAGE(mp); + return (rc); + } + } + + lmp = NULL; + if (p->header.prev) { + prevbn = le64_to_cpu(p->header.prev); + XT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc); + if (rc) { + XT_PUTPAGE(pmp); + XT_PUTPAGE(mp); + if (rmp) + XT_PUTPAGE(rmp); + return (rc); + } + } + + /* at this point, all xtpages to be updated are in memory */ + + /* + * update sibling pointers of sibling xtpages if any; + */ + if (lmp) { + BT_MARK_DIRTY(lmp, ip); + tlck = + txLock(tid, ip, lmp, tlckXTREE | tlckRELINK); + lp->header.next = cpu_to_le64(nxaddr); + XT_PUTPAGE(lmp); + } + + if (rmp) { + BT_MARK_DIRTY(rmp, ip); + tlck = + txLock(tid, ip, rmp, tlckXTREE | tlckRELINK); + rp->header.prev = cpu_to_le64(nxaddr); + XT_PUTPAGE(rmp); + } + + /* + * update the target xtpage to be relocated + * + * update the self address of the target page + * and write to destination extent; + * redo image covers the whole xtpage since it is new page + * to the destination extent; + * update of bmap for the free of source extent + * of the target xtpage itself: + * update of bmap for the allocation of destination extent + * of the target xtpage itself: + * update of bmap for the extents covered by xad entries in + * the target xtpage is not necessary since they are not + * updated; + * if not committed before this relocation, + * target page may contain XAD_NEW entries which must + * be scanned for bmap update (logredo() always + * scan xtpage REDOPAGE image for bmap update); + * if committed before this relocation (tlckRELOCATE), + * scan may be skipped by commit() and logredo(); + */ + BT_MARK_DIRTY(mp, ip); + /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */ + tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW); + xtlck = (xtlock_t *) & tlck->lock; + + /* update the self address in the xtpage header */ + pxd = &p->header.self; + PXDaddress(pxd, nxaddr); + + /* linelock for the after image of the whole page */ + xtlck->lwm.length = + le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; + + /* update the buffer extent descriptor of target xtpage */ + xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize; + bmSetXD(mp, nxaddr, xsize); + + /* unpin the target page to new homeward bound */ + XT_PUTPAGE(mp); + jEVENT(0, ("xtRelocate: target xtpage relocated.\n")); + } + + /* + * 3. acquire maplock for the source extent to be freed; + * + * acquire a maplock saving the src relocated extent address; + * to free of the extent at commit time; + */ + out: + /* if DATAEXT relocation, write a LOG_UPDATEMAP record for + * free PXD of the source data extent (logredo() will update + * bmap for free of source data extent), and update bmap for + * free of the source data extent; + */ + if (xtype == DATAEXT) + tlck = txMaplock(tid, ip, tlckMAP); + /* if XTPAGE relocation, write a LOG_NOREDOPAGE record + * for the source xtpage (logredo() will init NoRedoPage + * filter and will also update bmap for free of the source + * xtpage), and update bmap for free of the source xtpage; + * N.B. We use tlckMAP instead of tlkcXTREE because there + * is no buffer associated with this lock since the buffer + * has been redirected to the target location. + */ + else /* (xtype == XTPAGE) */ + tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE); + + pxdlock = (pxdlock_t *) & tlck->lock; + pxdlock->flag = mlckFREEPXD; + PXDaddress(&pxdlock->pxd, oxaddr); + PXDlength(&pxdlock->pxd, xlen); + pxdlock->index = 1; + + /* + * 4. update the parent xad entry for relocation; + * + * acquire tlck for the parent entry with XAD_NEW as entry + * update which will write LOG_REDOPAGE and update bmap for + * allocation of XAD_NEW destination extent; + */ + jEVENT(0, ("xtRelocate: update parent xad entry.\n")); + BT_MARK_DIRTY(pmp, ip); + tlck = txLock(tid, ip, pmp, tlckXTREE | tlckGROW); + xtlck = (xtlock_t *) & tlck->lock; + + /* update the XAD with the new destination extent; */ + xad = &pp->xad[index]; + xad->flag |= XAD_NEW; + XADaddress(xad, nxaddr); + + xtlck->lwm.offset = min(index, xtlck->lwm.offset); + xtlck->lwm.length = le16_to_cpu(pp->header.nextindex) - + xtlck->lwm.offset; + + /* unpin the parent xtpage */ + XT_PUTPAGE(pmp); + + return rc; +} + + +/* + * xtSearchNode() + * + * function: search for the internal xad entry covering specified extent. + * This function is mainly used by defragfs utility. + * + * parameters: + * ip - file object; + * xad - extent to find; + * cmpp - comparison result: + * btstack - traverse stack; + * flag - search process flag; + * + * returns: + * btstack contains (bn, index) of search path traversed to the entry. + * *cmpp is set to result of comparison with the entry returned. + * the page containing the entry is pinned at exit. + */ +static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ + int *cmpp, btstack_t * btstack, int flag) +{ + int rc = 0; + s64 xoff, xaddr; + int xlen; + int cmp = 1; /* init for empty page */ + s64 bn; /* block number */ + metapage_t *mp; /* meta-page buffer */ + xtpage_t *p; /* page */ + int base, index, lim; + btframe_t *btsp; + s64 t64; + + BT_CLR(btstack); + + xoff = offsetXAD(xad); + xlen = lengthXAD(xad); + xaddr = addressXAD(xad); + + /* + * search down tree from root: + * + * between two consecutive entries of and of + * internal page, child page Pi contains entry with k, Ki <= K < Kj. + * + * if entry with search key K is not found + * internal page search find the entry with largest key Ki + * less than K which point to the child page to search; + * leaf page search find the entry with smallest key Kj + * greater than K so that the returned index is the position of + * the entry to be shifted right for insertion of new entry. + * for empty tree, search key is greater than any key of the tree. + * + * by convention, root bn = 0. + */ + for (bn = 0;;) { + /* get/pin the page to search */ + XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); + if (rc) + return rc; + if (p->header.flag & BT_LEAF) + return ESTALE; + + lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; + + /* + * binary search with search key K on the current page + */ + for (base = XTENTRYSTART; lim; lim >>= 1) { + index = base + (lim >> 1); + + XT_CMP(cmp, xoff, &p->xad[index], t64); + if (cmp == 0) { + /* + * search hit + * + * verify for exact match; + */ + if (xaddr == addressXAD(&p->xad[index]) && + xoff == offsetXAD(&p->xad[index])) { + *cmpp = cmp; + + /* save search result */ + btsp = btstack->top; + btsp->bn = bn; + btsp->index = index; + btsp->mp = mp; + + return 0; + } + + /* descend/search its child page */ + goto next; + } + + if (cmp > 0) { + base = index + 1; + --lim; + } + } + + /* + * search miss - non-leaf page: + * + * base is the smallest index with key (Kj) greater than + * search key (K) and may be zero or maxentry index. + * if base is non-zero, decrement base by one to get the parent + * entry of the child page to search. + */ + index = base ? base - 1 : base; + + /* + * go down to child page + */ + next: + /* get the child page block number */ + bn = addressXAD(&p->xad[index]); + + /* unpin the parent page */ + XT_PUTPAGE(mp); + } +} + + +/* + * xtRelink() + * + * function: + * link around a freed page. + * + * Parameter: + * int tid, + * struct inode *ip, + * xtpage_t *p) + * + * returns: + */ +static int xtRelink(int tid, struct inode *ip, xtpage_t * p) +{ + int rc = 0; + metapage_t *mp; + s64 nextbn, prevbn; + tlock_t *tlck; + + nextbn = le64_to_cpu(p->header.next); + prevbn = le64_to_cpu(p->header.prev); + + /* update prev pointer of the next page */ + if (nextbn != 0) { + XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); + if (rc) + return rc; + + /* + * acquire a transaction lock on the page; + * + * action: update prev pointer; + */ + BT_MARK_DIRTY(mp, ip); + tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); + + /* the page may already have been tlock'd */ + + p->header.prev = cpu_to_le64(prevbn); + + XT_PUTPAGE(mp); + } + + /* update next pointer of the previous page */ + if (prevbn != 0) { + XT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc); + if (rc) + return rc; + + /* + * acquire a transaction lock on the page; + * + * action: update next pointer; + */ + BT_MARK_DIRTY(mp, ip); + tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); + + /* the page may already have been tlock'd */ + + p->header.next = le64_to_cpu(nextbn); + + XT_PUTPAGE(mp); + } + + return 0; +} +#endif /* _STILL_TO_PORT */ + + +/* + * xtInitRoot() + * + * initialize file root (inline in inode) + */ +void xtInitRoot(int tid, struct inode *ip) +{ + xtpage_t *p; + tlock_t *tlck; + + /* + * acquire a transaction lock on the root + * + * action: + */ + tlck = txLock(tid, ip, (metapage_t *) &JFS_IP(ip)->bxflag, + tlckXTREE | tlckNEW); + p = &JFS_IP(ip)->i_xtroot; + + p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF; + p->header.nextindex = cpu_to_le16(XTENTRYSTART); + + if (S_ISDIR(ip->i_mode)) + p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR); + else { + p->header.maxentry = cpu_to_le16(XTROOTINITSLOT); + ip->i_size = 0; + } + + + return; +} + + +/* + * We can run into a deadlock truncating a file with a large number of + * xtree pages (large fragmented file). A robust fix would entail a + * reservation system where we would reserve a number of metadata pages + * and tlocks which we would be guaranteed without a deadlock. Without + * this, a partial fix is to limit number of metadata pages we will lock + * in a single transaction. Currently we will truncate the file so that + * no more than 50 leaf pages will be locked. The caller of xtTruncate + * will be responsible for ensuring that the current transaction gets + * committed, and that subsequent transactions are created to truncate + * the file further if needed. + */ +#define MAX_TRUNCATE_LEAVES 50 + +/* + * xtTruncate() + * + * function: + * traverse for truncation logging backward bottom up; + * terminate at the last extent entry at the current subtree + * root page covering new down size. + * truncation may occur within the last extent entry. + * + * parameter: + * int tid, + * struct inode *ip, + * s64 newsize, + * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE} + * + * return: + * + * note: + * PWMAP: + * 1. truncate (non-COMMIT_NOLINK file) + * by jfs_truncate() or jfs_open(O_TRUNC): + * xtree is updated; + * 2. truncate index table of directory when last entry removed + * map update via tlock at commit time; + * PMAP: + * Call xtTruncate_pmap instead + * WMAP: + * 1. remove (free zero link count) on last reference release + * (pmap has been freed at commit zero link count); + * 2. truncate (COMMIT_NOLINK file, i.e., tmp file): + * xtree is updated; + * map update directly at truncation time; + * + * if (DELETE) + * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient); + * else if (TRUNCATE) + * must write LOG_NOREDOPAGE for deleted index page; + * + * pages may already have been tlocked by anonymous transactions + * during file growth (i.e., write) before truncation; + * + * except last truncated entry, deleted entries remains as is + * in the page (nextindex is updated) for other use + * (e.g., log/update allocation map): this avoid copying the page + * info but delay free of pages; + * + */ +s64 xtTruncate(int tid, struct inode *ip, s64 newsize, int flag) +{ + int rc = 0; + s64 teof; + metapage_t *mp; + xtpage_t *p; + s64 bn; + int index, nextindex; + xad_t *xad; + s64 xoff, xaddr; + int xlen, len, freexlen; + btstack_t btstack; + btframe_t *parent; + tblock_t *tblk; + tlock_t *tlck = 0; + xtlock_t *xtlck = 0; + xdlistlock_t xadlock; /* maplock for COMMIT_WMAP */ + pxdlock_t *pxdlock; /* maplock for COMMIT_WMAP */ + int lid; + s64 nfreed; + int freed, log; + int locked_leaves = 0; + + /* save object truncation type */ + if (tid) { + tblk = &TxBlock[tid]; + tblk->xflag |= flag; + } + + nfreed = 0; + + flag &= COMMIT_MAP; + assert(flag != COMMIT_PMAP); + + if (flag == COMMIT_PWMAP) + log = 1; + else { + log = 0; + xadlock.flag = mlckFREEXADLIST; + xadlock.index = 1; + } + + /* + * if the newsize is not an integral number of pages, + * the file between newsize and next page boundary will + * be cleared. + * if truncating into a file hole, it will cause + * a full block to be allocated for the logical block. + */ + + /* + * release page blocks of truncated region + * + * free the data blocks from the leaf index blocks. + * delete the parent index entries corresponding to + * the freed child data/index blocks. + * free the index blocks themselves which aren't needed + * in new sized file. + * + * index blocks are updated only if the blocks are to be + * retained in the new sized file. + * if type is PMAP, the data and index pages are NOT + * freed, and the data and index blocks are NOT freed + * from working map. + * (this will allow continued access of data/index of + * temporary file (zerolink count file truncated to zero-length)). + */ + teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >> + JFS_SBI(ip->i_sb)->l2bsize; + + /* clear stack */ + BT_CLR(&btstack); + + /* + * start with root + * + * root resides in the inode + */ + bn = 0; + + /* + * first access of each page: + */ + getPage: + XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); + if (rc) + return -rc; + + /* process entries backward from last index */ + index = le16_to_cpu(p->header.nextindex) - 1; + + if (p->header.flag & BT_INTERNAL) + goto getChild; + + /* + * leaf page + */ + + /* Since this is the rightmost leaf, and we may have already freed + * a page that was formerly to the right, let's make sure that the + * next pointer is zero. + */ + p->header.next = 0; + + freed = 0; + + /* does region covered by leaf page precede Teof ? */ + xad = &p->xad[index]; + xoff = offsetXAD(xad); + xlen = lengthXAD(xad); + if (teof >= xoff + xlen) { + XT_PUTPAGE(mp); + goto getParent; + } + + /* (re)acquire tlock of the leaf page */ + if (log) { + if (++locked_leaves > MAX_TRUNCATE_LEAVES) { + /* + * We need to limit the size of the transaction + * to avoid exhausting pagecache & tlocks + */ + XT_PUTPAGE(mp); + newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; + goto getParent; + } + tlck = txLock(tid, ip, mp, tlckXTREE); + tlck->type = tlckXTREE | tlckTRUNCATE; + xtlck = (xtlock_t *) & tlck->lock; + xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1; + } + BT_MARK_DIRTY(mp, ip); + + /* + * scan backward leaf page entries + */ + for (; index >= XTENTRYSTART; index--) { + xad = &p->xad[index]; + xoff = offsetXAD(xad); + xlen = lengthXAD(xad); + xaddr = addressXAD(xad); + + /* + * entry beyond eof: continue scan of current page + * xad + * ---|---=======-------> + * eof + */ + if (teof < xoff) { + nfreed += xlen; + continue; + } + + /* + * (xoff <= teof): last entry to be deleted from page; + * If other entries remain in page: keep and update the page. + */ + + /* + * eof == entry_start: delete the entry + * xad + * -------|=======-------> + * eof + * + */ + if (teof == xoff) { + nfreed += xlen; + + if (index == XTENTRYSTART) + break; + + nextindex = index; + } + /* + * eof within the entry: truncate the entry. + * xad + * -------===|===-------> + * eof + */ + else if (teof < xoff + xlen) { + /* update truncated entry */ + len = teof - xoff; + freexlen = xlen - len; + XADlength(xad, len); + + /* save pxd of truncated extent in tlck */ + xaddr += len; + if (log) { /* COMMIT_PWMAP */ + xtlck->lwm.offset = (xtlck->lwm.offset) ? + min(index, (int)xtlck->lwm.offset) : index; + xtlck->lwm.length = index + 1 - + xtlck->lwm.offset; + pxdlock = (pxdlock_t *) & xtlck->pxdlock; + pxdlock->flag = mlckFREEPXD; + PXDaddress(&pxdlock->pxd, xaddr); + PXDlength(&pxdlock->pxd, freexlen); + } + /* free truncated extent */ + else { /* COMMIT_WMAP */ + + pxdlock = (pxdlock_t *) & xadlock; + pxdlock->flag = mlckFREEPXD; + PXDaddress(&pxdlock->pxd, xaddr); + PXDlength(&pxdlock->pxd, freexlen); + txFreeMap(ip, pxdlock, 0, COMMIT_WMAP); + + /* reset map lock */ + xadlock.flag = mlckFREEXADLIST; + } + + /* current entry is new last entry; */ + nextindex = index + 1; + + nfreed += freexlen; + } + /* + * eof beyond the entry: + * xad + * -------=======---|---> + * eof + */ + else { /* (xoff + xlen < teof) */ + + nextindex = index + 1; + } + + if (nextindex < le16_to_cpu(p->header.nextindex)) { + if (!log) { /* COMMIT_WAMP */ + xadlock.xdlist = &p->xad[nextindex]; + xadlock.count = + le16_to_cpu(p->header.nextindex) - + nextindex; + txFreeMap(ip, (maplock_t *) & xadlock, 0, + COMMIT_WMAP); + } + p->header.nextindex = cpu_to_le16(nextindex); + } + + XT_PUTPAGE(mp); + + /* assert(freed == 0); */ + goto getParent; + } /* end scan of leaf page entries */ + + freed = 1; + + /* + * leaf page become empty: free the page if type != PMAP + */ + if (log) { /* COMMIT_PWMAP */ + /* txCommit() with tlckFREE: + * free data extents covered by leaf [XTENTRYSTART:hwm); + * invalidate leaf if COMMIT_PWMAP; + * if (TRUNCATE), will write LOG_NOREDOPAGE; + */ + tlck->type = tlckXTREE | tlckFREE; + } else { /* COMMIT_WAMP */ + + /* free data extents covered by leaf */ + xadlock.xdlist = &p->xad[XTENTRYSTART]; + xadlock.count = + le16_to_cpu(p->header.nextindex) - XTENTRYSTART; + txFreeMap(ip, (maplock_t *) & xadlock, 0, COMMIT_WMAP); + } + + if (p->header.flag & BT_ROOT) { + p->header.flag &= ~BT_INTERNAL; + p->header.flag |= BT_LEAF; + p->header.nextindex = cpu_to_le16(XTENTRYSTART); + + XT_PUTPAGE(mp); /* debug */ + goto out; + } else { + if (log) { /* COMMIT_PWMAP */ + /* page will be invalidated at tx completion + */ + XT_PUTPAGE(mp); + } else { /* COMMIT_WMAP */ + + if ((lid = mp->lid)) + TxLock[lid].flag |= tlckFREELOCK; + + /* invalidate empty leaf page */ + discard_metapage(mp); + } + } + + /* + * the leaf page become empty: delete the parent entry + * for the leaf page if the parent page is to be kept + * in the new sized file. + */ + + /* + * go back up to the parent page + */ + getParent: + /* pop/restore parent entry for the current child page */ + if ((parent = BT_POP(&btstack)) == NULL) + /* current page must have been root */ + goto out; + + /* get back the parent page */ + bn = parent->bn; + XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); + if (rc) + return -rc; + + index = parent->index; + + /* + * child page was not empty: + */ + if (freed == 0) { + /* has any entry deleted from parent ? */ + if (index < le16_to_cpu(p->header.nextindex) - 1) { + /* (re)acquire tlock on the parent page */ + if (log) { /* COMMIT_PWMAP */ + /* txCommit() with tlckTRUNCATE: + * free child extents covered by parent [); + */ + tlck = txLock(tid, ip, mp, tlckXTREE); + xtlck = (xtlock_t *) & tlck->lock; + xtlck->twm.offset = index; + if (!(tlck->type & tlckTRUNCATE)) { + xtlck->hwm.offset = + le16_to_cpu(p->header. + nextindex) - 1; + tlck->type = + tlckXTREE | tlckTRUNCATE; + } + } else { /* COMMIT_WMAP */ + + /* free child extents covered by parent */ + xadlock.xdlist = &p->xad[index + 1]; + xadlock.count = + le16_to_cpu(p->header.nextindex) - + index - 1; + txFreeMap(ip, (maplock_t *) & xadlock, 0, + COMMIT_WMAP); + } + BT_MARK_DIRTY(mp, ip); + + p->header.nextindex = cpu_to_le16(index + 1); + + /* freed = 0; */ + XT_PUTPAGE(mp); + goto getParent; + } else { + XT_PUTPAGE(mp); + goto out; + } + } + + /* + * child page was empty: + */ + nfreed += lengthXAD(&p->xad[index]); + + /* + * parent page become empty: free the page + */ + if (index == XTENTRYSTART) { + if (log) { /* COMMIT_PWMAP */ + /* txCommit() with tlckFREE: + * free child extents covered by parent; + * invalidate parent if COMMIT_PWMAP; + */ + tlck = txLock(tid, ip, mp, tlckXTREE); + xtlck = (xtlock_t *) & tlck->lock; + xtlck->twm.offset = index; + xtlck->hwm.offset = + le16_to_cpu(p->header.nextindex) - 1; + tlck->type = tlckXTREE | tlckFREE; + } else { /* COMMIT_WMAP */ + + /* free child extents covered by parent */ + xadlock.xdlist = &p->xad[XTENTRYSTART]; + xadlock.count = + le16_to_cpu(p->header.nextindex) - + XTENTRYSTART; + txFreeMap(ip, (maplock_t *) & xadlock, 0, + COMMIT_WMAP); + } + BT_MARK_DIRTY(mp, ip); + + if (p->header.flag & BT_ROOT) { + p->header.flag &= ~BT_INTERNAL; + p->header.flag |= BT_LEAF; + p->header.nextindex = cpu_to_le16(XTENTRYSTART); + if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) { + /* + * Shrink root down to allow inline + * EA (otherwise fsck complains) + */ + p->header.maxentry = + cpu_to_le16(XTROOTINITSLOT); + JFS_IP(ip)->mode2 |= INLINEEA; + } + + XT_PUTPAGE(mp); /* debug */ + goto out; + } else { + if (log) { /* COMMIT_PWMAP */ + /* page will be invalidated at tx completion + */ + XT_PUTPAGE(mp); + } else { /* COMMIT_WMAP */ + + if ((lid = mp->lid)) + TxLock[lid].flag |= tlckFREELOCK; + + /* invalidate parent page */ + discard_metapage(mp); + } + + /* parent has become empty and freed: + * go back up to its parent page + */ + /* freed = 1; */ + goto getParent; + } + } + /* + * parent page still has entries for front region; + */ + else { + /* try truncate region covered by preceding entry + * (process backward) + */ + index--; + + /* go back down to the child page corresponding + * to the entry + */ + goto getChild; + } + + /* + * internal page: go down to child page of current entry + */ + getChild: + /* save current parent entry for the child page */ + BT_PUSH(&btstack, bn, index); + + /* get child page */ + xad = &p->xad[index]; + bn = addressXAD(xad); + + /* + * first access of each internal entry: + */ + /* release parent page */ + XT_PUTPAGE(mp); + + /* process the child page */ + goto getPage; + + out: + /* + * update file resource stat + */ + /* set size + */ + if (S_ISDIR(ip->i_mode) && !newsize) + ip->i_size = 1; /* fsck hates zero-length directories */ + else + ip->i_size = newsize; + + /* update nblocks to reflect freed blocks */ + ip->i_blocks -= LBLK2PBLK(ip->i_sb, nfreed); + + /* + * free tlock of invalidated pages + */ + if (flag == COMMIT_WMAP) + txFreelock(ip); + + return newsize; +} + + +/* + * xtTruncate_pmap() + * + * function: + * Perform truncate to zero lenghth for deleted file, leaving the + * the xtree and working map untouched. This allows the file to + * be accessed via open file handles, while the delete of the file + * is committed to disk. + * + * parameter: + * int tid, + * struct inode *ip, + * s64 committed_size) + * + * return: new committed size + * + * note: + * + * To avoid deadlock by holding too many transaction locks, the + * truncation may be broken up into multiple transactions. + * The committed_size keeps track of part of the file has been + * freed from the pmaps. + */ +s64 xtTruncate_pmap(int tid, struct inode *ip, s64 committed_size) +{ + s64 bn; + btstack_t btstack; + int cmp; + int index; + int locked_leaves = 0; + metapage_t *mp; + xtpage_t *p; + btframe_t *parent; + int rc; + tblock_t *tblk; + tlock_t *tlck = 0; + xad_t *xad; + int xlen; + s64 xoff; + xtlock_t *xtlck = 0; + + /* save object truncation type */ + tblk = &TxBlock[tid]; + tblk->xflag |= COMMIT_PMAP; + + /* clear stack */ + BT_CLR(&btstack); + + if (committed_size) { + rc = xtSearch(ip, committed_size - 1, &cmp, &btstack, 0); + if (rc) + return -rc; + assert(cmp == 0); + XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); + } else { + /* + * start with root + * + * root resides in the inode + */ + bn = 0; + + /* + * first access of each page: + */ + getPage: + XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); + if (rc) + return -rc; + + /* process entries backward from last index */ + index = le16_to_cpu(p->header.nextindex) - 1; + + if (p->header.flag & BT_INTERNAL) + goto getChild; + } + + /* + * leaf page + */ + + if (++locked_leaves > MAX_TRUNCATE_LEAVES) { + /* + * We need to limit the size of the transaction + * to avoid exhausting pagecache & tlocks + */ + xad = &p->xad[index]; + xoff = offsetXAD(xad); + xlen = lengthXAD(xad); + XT_PUTPAGE(mp); + return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; + } + tlck = txLock(tid, ip, mp, tlckXTREE); + tlck->type = tlckXTREE | tlckTRUNCATE; + xtlck = (xtlock_t *) & tlck->lock; + xtlck->hwm.offset = index; + + tlck->type = tlckXTREE | tlckFREE; + + XT_PUTPAGE(mp); + + /* + * go back up to the parent page + */ + getParent: + /* pop/restore parent entry for the current child page */ + if ((parent = BT_POP(&btstack)) == NULL) + /* current page must have been root */ + goto out; + + /* get back the parent page */ + bn = parent->bn; + XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); + if (rc) + return -rc; + + index = parent->index; + + /* + * parent page become empty: free the page + */ + if (index == XTENTRYSTART) { + /* txCommit() with tlckFREE: + * free child extents covered by parent; + * invalidate parent if COMMIT_PWMAP; + */ + tlck = txLock(tid, ip, mp, tlckXTREE); + xtlck = (xtlock_t *) & tlck->lock; + xtlck->twm.offset = index; + xtlck->hwm.offset = + le16_to_cpu(p->header.nextindex) - 1; + tlck->type = tlckXTREE | tlckFREE; + + XT_PUTPAGE(mp); + + if (p->header.flag & BT_ROOT) { + + goto out; + } else { + goto getParent; + } + } + /* + * parent page still has entries for front region; + */ + else + index--; + /* + * internal page: go down to child page of current entry + */ + getChild: + /* save current parent entry for the child page */ + BT_PUSH(&btstack, bn, index); + + /* get child page */ + xad = &p->xad[index]; + bn = addressXAD(xad); + + /* + * first access of each internal entry: + */ + /* release parent page */ + XT_PUTPAGE(mp); + + /* process the child page */ + goto getPage; + + out: + + return 0; +} + + +#ifdef _JFS_STATISTICS +/* + * xtStatistics + */ +void xtStatistics() +{ + /* report statistics */ +} + +#endif /* _JFS_STATISTICS */ + + +#ifdef _JFS_DEBUG_XTREE +/* + * xtDisplayTree() + * + * function: traverse forward + */ +int xtDisplayTree(struct inode *ip) +{ + int rc = 0; + metapage_t *mp; + xtpage_t *p; + s64 bn, pbn; + int index, lastindex, v, h; + xad_t *xad; + btstack_t btstack; + btframe_t *btsp; + btframe_t *parent; + + printk("display B+-tree.\n"); + + /* clear stack */ + btsp = btstack.stack; + + /* + * start with root + * + * root resides in the inode + */ + bn = 0; + v = h = 0; + + /* + * first access of each page: + */ + getPage: + XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); + if (rc) + return rc; + + /* process entries forward from first index */ + index = XTENTRYSTART; + lastindex = le16_to_cpu(p->header.nextindex) - 1; + + if (p->header.flag & BT_INTERNAL) { + /* + * first access of each internal page + */ + goto getChild; + } else { /* (p->header.flag & BT_LEAF) */ + + /* + * first access of each leaf page + */ + printf("leaf page "); + xtDisplayPage(ip, bn, p); + + /* unpin the leaf page */ + XT_PUTPAGE(mp); + } + + /* + * go back up to the parent page + */ + getParent: + /* pop/restore parent entry for the current child page */ + if ((parent = (btsp == btstack.stack ? NULL : --btsp)) == NULL) + /* current page must have been root */ + return; + + /* + * parent page scan completed + */ + if ((index = parent->index) == (lastindex = parent->lastindex)) { + /* go back up to the parent page */ + goto getParent; + } + + /* + * parent page has entries remaining + */ + /* get back the parent page */ + bn = parent->bn; + /* v = parent->level; */ + XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); + if (rc) + return rc; + + /* get next parent entry */ + index++; + + /* + * internal page: go down to child page of current entry + */ + getChild: + /* push/save current parent entry for the child page */ + btsp->bn = pbn = bn; + btsp->index = index; + btsp->lastindex = lastindex; + /* btsp->level = v; */ + /* btsp->node = h; */ + ++btsp; + + /* get child page */ + xad = &p->xad[index]; + bn = addressXAD(xad); + + /* + * first access of each internal entry: + */ + /* release parent page */ + XT_PUTPAGE(mp); + + printk("traverse down 0x%lx[%d]->0x%lx\n", (ulong) pbn, index, + (ulong) bn); + v++; + h = index; + + /* process the child page */ + goto getPage; +} + + +/* + * xtDisplayPage() + * + * function: display page + */ +int xtDisplayPage(struct inode *ip, s64 bn, xtpage_t * p) +{ + int rc = 0; + metapage_t *mp; + xad_t *xad; + s64 xaddr, xoff; + int xlen, i, j; + + if (p == NULL) { + XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); + if (rc) + return rc; + } + + /* display page control */ + printf("bn:0x%lx flag:0x%x nextindex:%d\n", + (ulong) bn, p->header.flag, + le16_to_cpu(p->header.nextindex)); + + /* display entries */ + xad = &p->xad[XTENTRYSTART]; + for (i = XTENTRYSTART, j = 1; i < le16_to_cpu(p->header.nextindex); + i++, xad++, j++) { + xoff = offsetXAD(xad); + xaddr = addressXAD(xad); + xlen = lengthXAD(xad); + printf("\t[%d] 0x%lx:0x%lx(0x%x)", i, (ulong) xoff, + (ulong) xaddr, xlen); + + if (j == 4) { + printf("\n"); + j = 0; + } + } + + printf("\n"); +} +#endif /* _JFS_DEBUG_XTREE */ + + +#ifdef _JFS_WIP +/* + * xtGather() + * + * function: + * traverse for allocation acquiring tlock at commit time + * (vs at the time of update) logging backward top down + * + * note: + * problem - establishing that all new allocation have been + * processed both for append and random write in sparse file + * at the current entry at the current subtree root page + * + */ +int xtGather(t) +btree_t *t; +{ + int rc = 0; + xtpage_t *p; + u64 bn; + int index; + btentry_t *e; + btstack_t btstack; + struct btsf *parent; + + /* clear stack */ + BT_CLR(&btstack); + + /* + * start with root + * + * root resides in the inode + */ + bn = 0; + XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); + if (rc) + return rc; + + /* new root is NOT pointed by a new entry + if (p->header.flag & NEW) + allocate new page lock; + write a NEWPAGE log; + */ + + dopage: + /* + * first access of each page: + */ + /* process entries backward from last index */ + index = le16_to_cpu(p->header.nextindex) - 1; + + if (p->header.flag & BT_LEAF) { + /* + * first access of each leaf page + */ + NOISE(0, ("commit(leaf): bn:%d\n", bn)); + /* process leaf page entries backward */ + for (; index >= XTENTRYSTART; index--) { + e = &p->xad[index]; + /* + * if newpage, log NEWPAGE. + * + if (e->flag & XAD_NEW) { + nfound =+ entry->length; + update current page lock for the entry; + newpage(entry); + * + * if moved, log move. + * + } else if (e->flag & XAD_MOVED) { + reset flag; + update current page lock for the entry; + } + */ + } + + /* unpin the leaf page */ + XT_PUTPAGE(mp); + + /* + * go back up to the parent page + */ + getParent: + /* restore parent entry for the current child page */ + if ((parent = BT_POP(&btstack)) == NULL) + /* current page must have been root */ + return 0; + + if ((index = parent->index) == XTENTRYSTART) { + /* + * parent page scan completed + */ + /* go back up to the parent page */ + goto getParent; + } else { + /* + * parent page has entries remaining + */ + /* get back the parent page */ + bn = parent->bn; + XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); + if (rc) + return EIO; + + /* first subroot page which + * covers all new allocated blocks + * itself not new/modified. + * (if modified from split of descendent, + * go down path of split page) + + if (nfound == nnew && + !(p->header.flag & (NEW | MOD))) + exit scan; + */ + + /* process parent page entries backward */ + index--; + } + } else { + /* + * first access of each internal page + */ + } + + /* + * internal page: go down to child page of current entry + */ + NOISE(0, ("commit(internal): bn:%d(%d)\n", bn, index)); + + /* save current parent entry for the child page */ + BT_PUSH(&btstack, bn, index); + + /* get current entry for the child page */ + e = &p->xad[index]; + + /* + * first access of each internal entry: + */ + /* + * if new entry, log btree_tnewentry. + * + if (e->flag & XAD_NEW) + update parent page lock for the entry; + */ + + /* release parent page */ + XT_PUTPAGE(mp); + + /* get child page */ + bn = e->bn; + XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); + if (rc) + return rc; + + /* + * first access of each non-root page: + */ + /* + * if new, log btree_newpage. + * + if (p->header.flag & NEW) + allocate new page lock; + write a NEWPAGE log (next, prev); + */ + + /* process the child page */ + goto dopage; + + out: + return 0; +} +#endif /* _JFS_WIP */ diff -uNr --exclude=CVS ../kernel.org/linux/fs/jfs/namei.c linuxppc64_2_4/fs/jfs/namei.c --- ../kernel.org/linux/fs/jfs/namei.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/fs/jfs/namei.c Wed Nov 14 10:19:36 2001 @@ -0,0 +1,1502 @@ +/* + * + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Module: jfs/namei.c + * + */ + +/* + * Change History : + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern struct inode_operations jfs_file_inode_operations; +extern struct inode_operations jfs_symlink_inode_operations; +extern struct file_operations jfs_file_operations; +extern struct address_space_operations jfs_aops; + +extern int jfs_fsync(struct file *, struct dentry *, int); +extern void jfs_truncate_nolock(struct inode *, loff_t); + +/* + * forward references + */ +struct inode_operations jfs_dir_inode_operations; +struct file_operations jfs_dir_operations; + +s64 commitZeroLink(int, struct inode *); + +/* + * NAME: jfs_create(dip, dentry, mode) + * + * FUNCTION: create a regular file in the parent directory + * with name = and mode = + * + * PARAMETER: dip - parent directory vnode + * dentry - dentry of new file + * mode - create mode (rwxrwxrwx). + * + * RETURN: Errors from subroutines + * + */ +int jfs_create(struct inode *dip, struct dentry *dentry, int mode) +{ + int rc = 0; + int tid = 0; /* transaction id */ + struct inode *ip = NULL; /* child directory inode */ + ino_t ino; + component_t dname; /* child directory name */ + btstack_t btstack; + struct inode *iplist[2]; + tblock_t *tblk; + + jFYI(1, ("jfs_create: dip:0x%p name:%s\n", dip, dentry->d_name.name)); + + IWRITE_LOCK(dip); + + /* + * search parent directory for entry/freespace + * (dtSearch() returns parent directory page pinned) + */ + if ((rc = get_UCSname(&dname, dentry, JFS_SBI(dip->i_sb)->nls_tab))) + goto out1; + + /* + * Either iAlloc() or txBegin() may block. Deadlock can occur if we + * block there while holding dtree page, so we allocate the inode & + * begin the transaction before we search the directory. + */ + ip = ialloc(dip, mode); + if (ip == NULL) { + rc = ENOSPC; + goto out2; + } + + IWRITE_LOCK(ip); + + txBegin(dip->i_sb, &tid, 0); + + if ((rc = dtSearch(dip, &dname, &ino, &btstack, JFS_CREATE))) { + jERROR(1, ("jfs_create: dtSearch returned %d\n", rc)); + ip->i_nlink = 0; + IWRITE_UNLOCK(ip); + iput(ip); + txEnd(tid); + goto out2; + } + + tblk = &TxBlock[tid]; + tblk->xflag |= COMMIT_CREATE; + tblk->ip = ip; + + iplist[0] = dip; + iplist[1] = ip; + + /* + * initialize the child XAD tree root in-line in inode + */ + xtInitRoot(tid, ip); + + /* + * create entry in parent directory for child directory + * (dtInsert() releases parent directory page) + */ + ino = ip->i_ino; + if ((rc = dtInsert(tid, dip, &dname, &ino, &btstack))) { + jERROR(1, ("jfs_create: dtInsert returned %d\n", rc)); + /* discard new inode */ + ip->i_nlink = 0; + IWRITE_UNLOCK(ip); + iput(ip); + + if (rc == EIO) + txAbort(tid, 1); /* Marks Filesystem dirty */ + else + txAbort(tid, 0); /* Filesystem full */ + txEnd(tid); + goto out2; + } + + ip->i_op = &jfs_file_inode_operations; + ip->i_fop = &jfs_file_operations; + ip->i_mapping->a_ops = &jfs_aops; + mark_inode_dirty(ip); + d_instantiate(dentry, ip); + + dip->i_version = ++event; + dip->i_ctime = dip->i_mtime = CURRENT_TIME; + mark_inode_dirty(dip); + + rc = txCommit(tid, 2, &iplist[0], 0); + txEnd(tid); + + IWRITE_UNLOCK(ip); + out2: + free_UCSname(&dname); + + out1: + + IWRITE_UNLOCK(dip); + jFYI(1, ("jfs_create: rc:%d\n", -rc)); + return -rc; +} + + +/* + * NAME: jfs_mkdir(dip, dentry, mode) + * + * FUNCTION: create a child directory in the parent directory + * with name = and mode = + * + * PARAMETER: dip - parent directory vnode + * dentry - dentry of child directory + * mode - create mode (rwxrwxrwx). + * + * RETURN: Errors from subroutines + * + * note: + * EACCESS: user needs search+write permission on the parent directory + */ +int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode) +{ + int rc = 0; + int tid = 0; /* transaction id */ + struct inode *ip = NULL; /* child directory inode */ + ino_t ino; + component_t dname; /* child directory name */ + btstack_t btstack; + struct inode *iplist[2]; + tblock_t *tblk; + + jFYI(1, ("jfs_mkdir: dip:0x%p name:%s\n", dip, dentry->d_name.name)); + + IWRITE_LOCK(dip); + + /* link count overflow on parent directory ? */ + if (dip->i_nlink == JFS_LINK_MAX) { + rc = EMLINK; + goto out1; + } + + /* + * search parent directory for entry/freespace + * (dtSearch() returns parent directory page pinned) + */ + if ((rc = get_UCSname(&dname, dentry, JFS_SBI(dip->i_sb)->nls_tab))) + goto out1; + + /* + * Either iAlloc() or txBegin() may block. Deadlock can occur if we + * block there while holding dtree page, so we allocate the inode & + * begin the transaction before we search the directory. + */ + ip = ialloc(dip, S_IFDIR | mode); + if (ip == NULL) { + rc = ENOSPC; + goto out2; + } + + IWRITE_LOCK(ip); + + txBegin(dip->i_sb, &tid, 0); + + if ((rc = dtSearch(dip, &dname, &ino, &btstack, JFS_CREATE))) { + jERROR(1, ("jfs_mkdir: dtSearch returned %d\n", rc)); + ip->i_nlink = 0; + IWRITE_UNLOCK(ip); + iput(ip); + txEnd(tid); + goto out2; + } + + tblk = &TxBlock[tid]; + tblk->xflag |= COMMIT_CREATE; + tblk->ip = ip; + + iplist[0] = dip; + iplist[1] = ip; + + /* + * initialize the child directory in-line in inode + */ + dtInitRoot(tid, ip, dip->i_ino); + + /* + * create entry in parent directory for child directory + * (dtInsert() releases parent directory page) + */ + ino = ip->i_ino; + if ((rc = dtInsert(tid, dip, &dname, &ino, &btstack))) { + jERROR(1, ("jfs_mkdir: dtInsert returned %d\n", rc)); + /* discard new directory inode */ + ip->i_nlink = 0; + IWRITE_UNLOCK(ip); + iput(ip); + + if (rc == EIO) + txAbort(tid, 1); /* Marks Filesystem dirty */ + else + txAbort(tid, 0); /* Filesystem full */ + txEnd(tid); + goto out2; + } + + ip->i_nlink = 2; /* for '.' */ + ip->i_op = &jfs_dir_inode_operations; + ip->i_fop = &jfs_dir_operations; + ip->i_mapping->a_ops = &jfs_aops; + ip->i_mapping->gfp_mask = GFP_NOFS | __GFP_HIGHMEM; + mark_inode_dirty(ip); + d_instantiate(dentry, ip); + + /* update parent directory inode */ + dip->i_nlink++; /* for '..' from child directory */ + dip->i_version = ++event; + dip->i_ctime = dip->i_mtime = CURRENT_TIME; + mark_inode_dirty(dip); + + rc = txCommit(tid, 2, &iplist[0], 0); + txEnd(tid); + + IWRITE_UNLOCK(ip); + + out2: + free_UCSname(&dname); + + out1: + + IWRITE_UNLOCK(dip); + + jFYI(1, ("jfs_mkdir: rc:%d\n", -rc)); + return -rc; +} + +/* + * NAME: jfs_rmdir(dip, dentry) + * + * FUNCTION: remove a link to child directory + * + * PARAMETER: dip - parent inode + * dentry - child directory dentry + * + * RETURN: EINVAL - if name is . or .. + * EINVAL - if . or .. exist but are invalid. + * errors from subroutines + * + * note: + * if other threads have the directory open when the last link + * is removed, the "." and ".." entries, if present, are removed before + * rmdir() returns and no new entries may be created in the directory, + * but the directory is not removed until the last reference to + * the directory is released (cf.unlink() of regular file). + */ +int jfs_rmdir(struct inode *dip, struct dentry *dentry) +{ + int rc; + int tid = 0; /* transaction id */ + struct inode *ip = dentry->d_inode; + ino_t ino; + component_t dname; + struct inode *iplist[2]; + tblock_t *tblk; + + jFYI(1, ("jfs_rmdir: dip:0x%p name:%s\n", dip, dentry->d_name.name)); + + IWRITE_LOCK_LIST(2, dip, ip); + + /* directory must be empty to be removed */ + if (!dtEmpty(ip)) { + IWRITE_UNLOCK(ip); + IWRITE_UNLOCK(dip); + rc = ENOTEMPTY; + goto out; + } + + if ((rc = get_UCSname(&dname, dentry, JFS_SBI(dip->i_sb)->nls_tab))) { + IWRITE_UNLOCK(ip); + IWRITE_UNLOCK(dip); + goto out; + } + + txBegin(dip->i_sb, &tid, 0); + + iplist[0] = dip; + iplist[1] = ip; + + tblk = &TxBlock[tid]; + tblk->xflag |= COMMIT_DELETE; + tblk->ip = ip; + + /* + * delete the entry of target directory from parent directory + */ + ino = ip->i_ino; + if ((rc = dtDelete(tid, dip, &dname, &ino, JFS_REMOVE))) { + jERROR(1, ("jfs_rmdir: dtDelete returned %d\n", rc)); + if (rc == EIO) + txAbort(tid, 1); + txEnd(tid); + + IWRITE_UNLOCK(ip); + IWRITE_UNLOCK(dip); + + goto out2; + } + + /* update parent directory's link count corresponding + * to ".." entry of the target directory deleted + */ + dip->i_nlink--; + dip->i_ctime = dip->i_mtime = CURRENT_TIME; + dip->i_version = ++event; + mark_inode_dirty(dip); + + /* + * OS/2 could have created EA and/or ACL + */ + /* free EA from both persistent and working map */ + if (JFS_IP(ip)->ea.flag & DXD_EXTENT) { + /* free EA pages */ + txEA(tid, ip, &JFS_IP(ip)->ea, NULL); + } + JFS_IP(ip)->ea.flag = 0; + + /* free ACL from both persistent and working map */ + if (JFS_IP(ip)->acl.flag & DXD_EXTENT) { + /* free ACL pages */ + txEA(tid, ip, &JFS_IP(ip)->acl, NULL); + } + JFS_IP(ip)->acl.flag = 0; + + /* mark the target directory as deleted */ + ip->i_nlink = 0; + mark_inode_dirty(ip); + + rc = txCommit(tid, 2, &iplist[0], 0); + + txEnd(tid); + + IWRITE_UNLOCK(ip); + + /* + * Truncating the directory index table is not guaranteed. It + * may need to be done iteratively + */ + if (test_cflag(COMMIT_Stale, dip) && (dip->i_size > 1)) + jfs_truncate_nolock(dip, 0); + + IWRITE_UNLOCK(dip); + + d_delete(dentry); + + out2: + free_UCSname(&dname); + + out: + jFYI(1, ("jfs_rmdir: rc:%d\n", rc)); + return -rc; +} + +/* + * NAME: jfs_unlink(dip, dentry) + * + * FUNCTION: remove a link to object named by + * from parent directory + * + * PARAMETER: dip - inode of parent directory + * dentry - dentry of object to be removed + * + * RETURN: errors from subroutines + * + * note: + * temporary file: if one or more processes have the file open + * when the last link is removed, the link will be removed before + * unlink() returns, but the removal of the file contents will be + * postponed until all references to the files are closed. + * + * JFS does NOT support unlink() on directories. + * + */ +int jfs_unlink(struct inode *dip, struct dentry *dentry) +{ + int rc; + int tid = 0; /* transaction id */ + struct inode *ip = dentry->d_inode; + ino_t ino; + component_t dname; /* object name */ + struct inode *iplist[2]; + tblock_t *tblk; + s64 new_size = 0; + + jFYI(1, ("jfs_unlink: dip:0x%p name:%s\n", dip, dentry->d_name.name)); + + if ((rc = get_UCSname(&dname, dentry, JFS_SBI(dip->i_sb)->nls_tab))) + goto out; + + IWRITE_LOCK_LIST(2, ip, dip); + + txBegin(dip->i_sb, &tid, 0); + + iplist[0] = dip; + iplist[1] = ip; + + /* + * delete the entry of target file from parent directory + */ + ino = ip->i_ino; + if ((rc = dtDelete(tid, dip, &dname, &ino, JFS_REMOVE))) { + jERROR(1, ("jfs_unlink: dtDelete returned %d\n", rc)); + if (rc == EIO) + txAbort(tid, 1); /* Marks FS Dirty */ + txEnd(tid); + IWRITE_UNLOCK(ip); + IWRITE_UNLOCK(dip); + goto out1; + } + + ASSERT(ip->i_nlink); + + ip->i_ctime = dip->i_ctime = dip->i_mtime = CURRENT_TIME; + dip->i_version = ++event; + mark_inode_dirty(dip); + + /* update target's inode */ + ip->i_nlink--; + mark_inode_dirty(ip); + + /* + * commit zero link count object + */ + if (ip->i_nlink == 0) { + assert(!test_cflag(COMMIT_Nolink, ip)); + /* free block resources */ + if ((new_size = commitZeroLink(tid, ip)) < 0) { + txAbort(tid, 1); /* Marks FS Dirty */ + txEnd(tid); + IWRITE_UNLOCK(ip); + IWRITE_UNLOCK(dip); + rc = -new_size; /* We return -rc */ + goto out1; + } + tblk = &TxBlock[tid]; + tblk->xflag |= COMMIT_DELETE; + tblk->ip = ip; + } + + /* + * If xtTruncate was incomplete, commit synchronously to avoid + * timing complications + */ + rc = txCommit(tid, 2, &iplist[0], new_size ? COMMIT_SYNC : 0); + + txEnd(tid); + + while (new_size && (rc == 0)) { + txBegin(dip->i_sb, &tid, 0); + new_size = xtTruncate_pmap(tid, ip, new_size); + if (new_size < 0) { + txAbort(tid, 1); /* Marks FS Dirty */ + rc = -new_size; /* We return -rc */ + } else + rc = txCommit(tid, 2, &iplist[0], COMMIT_SYNC); + txEnd(tid); + } + + if (!test_cflag(COMMIT_Holdlock, ip)) + IWRITE_UNLOCK(ip); + + /* + * Truncating the directory index table is not guaranteed. It + * may need to be done iteratively + */ + if (test_cflag(COMMIT_Stale, dip) && (dip->i_size > 1)) + jfs_truncate_nolock(dip, 0); + + IWRITE_UNLOCK(dip); + + d_delete(dentry); + + out1: + free_UCSname(&dname); + out: + jFYI(1, ("jfs_unlink: rc:%d\n", -rc)); + return -rc; +} + +/* + * NAME: commitZeroLink() + * + * FUNCTION: for non-directory, called by jfs_remove(), + * truncate a regular file, directory or symbolic + * link to zero length. return 0 if type is not + * one of these. + * + * if the file is currently associated with a VM segment + * only permanent disk and inode map resources are freed, + * and neither the inode nor indirect blocks are modified + * so that the resources can be later freed in the work + * map by ctrunc1. + * if there is no VM segment on entry, the resources are + * freed in both work and permanent map. + * (? for temporary file - memory object is cached even + * after no reference: + * reference count > 0 - ) + * + * PARAMETERS: cd - pointer to commit data structure. + * current inode is the one to truncate. + * + * RETURN : Errors from subroutines + */ +s64 commitZeroLink(int tid, struct inode *ip) +{ + int filetype, committype; + tblock_t *tblk; + + jFYI(1, ("commitZeroLink: tid = %d, ip = 0x%p\n", tid, ip)); + + filetype = ip->i_mode & S_IFMT; + switch (filetype) { + case S_IFREG: + break; + case S_IFLNK: + /* fast symbolic link */ + if (ip->i_size <= 256) { + ip->i_size = 0; + return 0; + } + break; + default: + assert(filetype != S_IFDIR); + return 0; + } + +#ifdef _STILL_TO_PORT + /* + * free from block allocation map: + * + * if there is no cache control element associated with + * the file, free resources in both persistent and work map; + * otherwise just persistent map. + */ + if (ip->i_cacheid) { + committype = COMMIT_PMAP; + + /* mark for iClose() to free from working map */ + set_cflag(COMMIT_Freewmap, ip); + } else + committype = COMMIT_PWMAP; +#else /* _STILL_TO_PORT */ + + set_cflag(COMMIT_Freewmap, ip); + committype = COMMIT_PMAP; +#endif /* _STILL_TO_PORT */ + + /* mark transaction of block map update type */ + tblk = &TxBlock[tid]; + tblk->xflag |= committype; + + /* + * free EA + */ + if (JFS_IP(ip)->ea.flag & DXD_EXTENT) { +#ifdef _STILL_TO_PORT + /* free EA pages from cache */ + if (committype == COMMIT_PWMAP) + bmExtentInvalidate(ip, addressDXD(&ip->i_ea), + lengthDXD(&ip->i_ea)); +#endif /* _STILL_TO_PORT */ + + /* acquire maplock on EA to be freed from block map */ + txEA(tid, ip, &JFS_IP(ip)->ea, NULL); + + if (committype == COMMIT_PWMAP) + JFS_IP(ip)->ea.flag = 0; + } + + /* + * free ACL + */ + if (JFS_IP(ip)->acl.flag & DXD_EXTENT) { +#ifdef _STILL_TO_PORT + /* free ACL pages from cache */ + if (committype == COMMIT_PWMAP) + bmExtentInvalidate(ip, addressDXD(&ip->i_acl), + lengthDXD(&ip->i_acl)); +#endif /* _STILL_TO_PORT */ + + /* acquire maplock on EA to be freed from block map */ + txEA(tid, ip, &JFS_IP(ip)->acl, NULL); + + if (committype == COMMIT_PWMAP) + JFS_IP(ip)->acl.flag = 0; + } + + /* + * free xtree/data (truncate to zero length): + * free xtree/data pages from cache if COMMIT_PWMAP, + * free xtree/data blocks from persistent block map, and + * free xtree/data blocks from working block map if COMMIT_PWMAP; + */ + if (ip->i_size) + return xtTruncate_pmap(tid, ip, 0); + + return 0; +} + + +/* + * NAME: freeZeroLink() + * + * FUNCTION: for non-directory, called by iClose(), + * free resources of a file from cache and WORKING map + * for a file previously committed with zero link count + * while associated with a pager object, + * + * PARAMETER: ip - pointer to inode of file. + * + * RETURN: 0 -ok + */ +int freeZeroLink(struct inode *ip) +{ + int rc = 0; + int type; + + jFYI(1, ("freeZeroLink: ip = 0x%p\n", ip)); + + /* return if not reg or symbolic link or if size is + * already ok. + */ + type = ip->i_mode & S_IFMT; + + switch (type) { + case S_IFREG: + break; + case S_IFLNK: + /* if its contained in inode nothing to do */ + if (ip->i_size <= 256) + return 0; + break; + default: + return 0; + } + + /* + * free EA + */ + if (JFS_IP(ip)->ea.flag & DXD_EXTENT) { + s64 xaddr; + int xlen; + maplock_t maplock; /* maplock for COMMIT_WMAP */ + pxdlock_t *pxdlock; /* maplock for COMMIT_WMAP */ + + /* free EA pages from cache */ + xaddr = addressDXD(&JFS_IP(ip)->ea); + xlen = lengthDXD(&JFS_IP(ip)->ea); +#ifdef _STILL_TO_PORT + bmExtentInvalidate(ip, xaddr, xlen); +#endif + + /* free EA extent from working block map */ + maplock.index = 1; + pxdlock = (pxdlock_t *) & maplock; + pxdlock->flag = mlckFREEPXD; + PXDaddress(&pxdlock->pxd, xaddr); + PXDlength(&pxdlock->pxd, xlen); + txFreeMap(ip, pxdlock, 0, COMMIT_WMAP); + } + + /* + * free ACL + */ + if (JFS_IP(ip)->acl.flag & DXD_EXTENT) { + s64 xaddr; + int xlen; + maplock_t maplock; /* maplock for COMMIT_WMAP */ + pxdlock_t *pxdlock; /* maplock for COMMIT_WMAP */ + + /* free ACL pages from cache */ + xaddr = addressDXD(&JFS_IP(ip)->acl); + xlen = lengthDXD(&JFS_IP(ip)->acl); +#ifdef _STILL_TO_PORT + bmExtentInvalidate(ip, xaddr, xlen); +#endif + + /* free ACL extent from working block map */ + maplock.index = 1; + pxdlock = (pxdlock_t *) & maplock; + pxdlock->flag = mlckFREEPXD; + PXDaddress(&pxdlock->pxd, xaddr); + PXDlength(&pxdlock->pxd, xlen); + txFreeMap(ip, pxdlock, 0, COMMIT_WMAP); + } + + /* + * free xtree/data (truncate to zero length): + * free xtree/data pages from cache, and + * free xtree/data blocks from working block map; + */ + if (ip->i_size) + rc = xtTruncate(0, ip, 0, COMMIT_WMAP); + + return rc; +} + +/* + * NAME: jfs_link(vp, dvp, name, crp) + * + * FUNCTION: create a link to by the name = + * in the parent directory + * + * PARAMETER: vp - target object + * dvp - parent directory of new link + * name - name of new link to target object + * crp - credential + * + * RETURN: Errors from subroutines + * + * note: + * JFS does NOT support link() on directories (to prevent circular + * path in the directory hierarchy); + * EPERM: the target object is a directory, and either the caller + * does not have appropriate privileges or the implementation prohibits + * using link() on directories [XPG4.2]. + * + * JFS does NOT support links between file systems: + * EXDEV: target object and new link are on different file systems and + * implementation does not support links between file systems [XPG4.2]. + */ +int jfs_link(struct dentry *old_dentry, + struct inode *dir, struct dentry *dentry) +{ + int rc; + int tid = 0; + struct inode *ip = old_dentry->d_inode; + ino_t ino; + component_t dname; + btstack_t btstack; + struct inode *iplist[2]; + + jFYI(1, + ("jfs_link: %s %s\n", old_dentry->d_name.name, + dentry->d_name.name)); +/* The checks for links between filesystems and permissions are + handled by the VFS layer */ + + /* JFS does NOT support link() on directories */ + if (S_ISDIR(ip->i_mode)) + return -EPERM; + + IWRITE_LOCK_LIST(2, dir, ip); + + txBegin(ip->i_sb, &tid, 0); + + if (ip->i_nlink == JFS_LINK_MAX) { + rc = EMLINK; + goto out; + } + + /* + * scan parent directory for entry/freespace + */ + if ((rc = get_UCSname(&dname, dentry, JFS_SBI(ip->i_sb)->nls_tab))) + goto out; + + if ((rc = dtSearch(dir, &dname, &ino, &btstack, JFS_CREATE))) + goto out; + + /* + * create entry for new link in parent directory + */ + ino = ip->i_ino; + if ((rc = dtInsert(tid, dir, &dname, &ino, &btstack))) + goto out; + + dir->i_version = ++event; + + /* update object inode */ + ip->i_nlink++; /* for new link */ + ip->i_ctime = CURRENT_TIME; + mark_inode_dirty(dir); + atomic_inc(&ip->i_count); + d_instantiate(dentry, ip); + + iplist[0] = ip; + iplist[1] = dir; + rc = txCommit(tid, 2, &iplist[0], 0); + + out: + IWRITE_UNLOCK(dir); + IWRITE_UNLOCK(ip); + + txEnd(tid); + + jFYI(1, ("jfs_link: rc:%d\n", rc)); + return -rc; +} + +/* + * NAME: jfs_symlink(dip, dentry, name) + * + * FUNCTION: creates a symbolic link to by name + * in directory + * + * PARAMETER: dip - parent directory vnode + * dentry - dentry of symbolic link + * name - the path name of the existing object + * that will be the source of the link + * + * RETURN: errors from subroutines + * + * note: + * ENAMETOOLONG: pathname resolution of a symbolic link produced + * an intermediate result whose length exceeds PATH_MAX [XPG4.2] +*/ + +int jfs_symlink(struct inode *dip, struct dentry *dentry, const char *name) +{ + int rc; + int tid = 0; + ino_t ino = 0; + component_t dname; + int ssize; /* source pathname size */ + btstack_t btstack; + struct inode *ip = dentry->d_inode; + unchar *i_fastsymlink; + s64 xlen = 0; + int bmask = 0, xsize; + s64 xaddr; + metapage_t *mp; + struct super_block *sb; + tlock_t *tlck; + xtlock_t *xtlck; + tblock_t *tblk; + + struct inode *iplist[2]; + + jFYI(1, ("jfs_symlink: dip:0x%p name:%s\n", dip, name)); + + IWRITE_LOCK(dip); + + ssize = strlen(name) + 1; + + txBegin(dip->i_sb, &tid, 0); + + /* + * search parent directory for entry/freespace + * (dtSearch() returns parent directory page pinned) + */ + + if ((rc = get_UCSname(&dname, dentry, JFS_SBI(dip->i_sb)->nls_tab))) + goto out1; + + if ((rc = dtSearch(dip, &dname, &ino, &btstack, JFS_CREATE))) + goto out2; + + + + /* + * allocate on-disk/in-memory inode for symbolic link: + * (iAlloc() returns new, locked inode) + */ + + ip = ialloc(dip, S_IFLNK | 0777); + if (ip == NULL) { + BT_PUTSEARCH(&btstack); + rc = ENOSPC; + goto out2; + } + IWRITE_LOCK(ip); + + tblk = &TxBlock[tid]; + tblk->xflag |= COMMIT_CREATE; + tblk->ip = ip; + + /* + * create entry for symbolic link in parent directory + */ + + ino = ip->i_ino; + + + + if ((rc = dtInsert(tid, dip, &dname, &ino, &btstack))) { + jERROR(1, ("jfs_symlink: dtInsert returned %d\n", rc)); + /* discard ne inode */ + ip->i_nlink = 0; + IWRITE_UNLOCK(ip); + iput(ip); + goto out2; + + } + + /* fix symlink access permission + * (dir_create() ANDs in the u.u_cmask, + * but symlinks really need to be 777 access) + */ + ip->i_mode |= 0777; + + /* + * write symbolic link target path name + */ + xtInitRoot(tid, ip); + + /* + * write source path name inline in on-disk inode (fast symbolic link) + */ + + if (ssize <= IDATASIZE) { + ip->i_op = &jfs_symlink_inode_operations; + + i_fastsymlink = JFS_IP(ip)->i_inline; + memcpy(i_fastsymlink, name, ssize); + ip->i_size = ssize - 1; + /* + * This could probably be done cleaner, but for now, treat + * the transaction like an xtree update. + */ + tlck = txLock(tid, ip, (metapage_t *) & JFS_IP(ip)->bxflag, + tlckXTREE); + xtlck = (xtlock_t *) & tlck->lock; + xtlck->lwm.length = + (ssize + XTSLOTSIZE - 1) >> L2XTSLOTSIZE; + jFYI(1, + ("jfs_symlink: fast symlink added ssize:%d name:%s \n", + ssize, name)); + } + /* + * write source path name in a single extent + */ + else { + jFYI(1, ("jfs_symlink: allocate extent ip:0x%p\n", ip)); + + ip->i_op = &page_symlink_inode_operations; + ip->i_mapping->a_ops = &jfs_aops; + + /* + * even though the data of symlink object (source + * path name) is treated as non-journaled user data, + * it is read/written thru buffer cache for performance. + */ + sb = ip->i_sb; + bmask = JFS_SBI(sb)->bsize - 1; + xsize = (ssize + bmask) & ~bmask; + xaddr = 0; + xlen = xsize >> JFS_SBI(sb)->l2bsize; + if ((rc = xtInsert(tid, ip, 0, 0, xlen, &xaddr, 0)) == 0) { + ip->i_size = ssize - 1; + while (ssize) { + int copy_size = min(ssize, PSIZE); + + mp = get_metapage(ip, xaddr, PSIZE, 1); + + if (mp == NULL) { + dtDelete(tid, dip, &dname, &ino, + JFS_REMOVE); + ip->i_nlink = 0; + IWRITE_UNLOCK(ip); + iput(ip); + rc = EIO; + goto out2; + } + memcpy(mp->data, name, copy_size); + flush_metapage(mp); +#if 0 + mark_buffer_uptodate(bp, 1); + mark_buffer_dirty(bp, 1); + if (IS_SYNC(dip)) { + ll_rw_block(WRITE, 1, &bp); + wait_on_buffer(bp); + } + brelse(bp); +#endif /* 0 */ + ssize -= copy_size; + xaddr += JFS_SBI(sb)->nbperpage; + } + ip->i_blocks = LBLK2PBLK(sb, xlen); + } else { + dtDelete(tid, dip, &dname, &ino, JFS_REMOVE); + ip->i_nlink = 0; + IWRITE_UNLOCK(ip); + iput(ip); + rc = ENOSPC; + goto out2; + } + } + dip->i_version = ++event; + + mark_inode_dirty(ip); + d_instantiate(dentry, ip); + + /* + * commit update of parent directory and link object + * + * if extent allocation failed (ENOSPC), + * the parent inode is committed regardless to avoid + * backing out parent directory update (by dtInsert()) + * and subsequent dtDelete() which is harmless wrt + * integrity concern. + * the symlink inode will be freed by iput() at exit + * as it has a zero link count (by dtDelete()) and + * no permanant resources. + */ + + iplist[0] = dip; + if (rc == 0) { + iplist[1] = ip; + rc = txCommit(tid, 2, &iplist[0], 0); + } else + rc = txCommit(tid, 1, &iplist[0], 0); + + IWRITE_UNLOCK(ip); + + out2: + + free_UCSname(&dname); + out1: + IWRITE_UNLOCK(dip); + + txEnd(tid); + + jFYI(1, ("jfs_symlink: rc:%d\n", -rc)); + return -rc; +} + + +/* + * NAME: jfs_rename + * + * FUNCTION: rename a file or directory + */ +int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + btstack_t btstack; + ino_t ino; + component_t new_dname; + struct inode *new_ip; + component_t old_dname; + struct inode *old_ip; + int rc; + int tid = 0; + tlock_t *tlck; + dtlock_t *dtlck; + lv_t *lv; + int ipcount; + struct inode *iplist[4]; + tblock_t *tblk; + s64 new_size = 0; + + + jFYI(1, + ("jfs_rename: %s %s\n", old_dentry->d_name.name, + new_dentry->d_name.name)); + + old_ip = old_dentry->d_inode; + new_ip = new_dentry->d_inode; + + if (old_dir == new_dir) { + if (new_ip) + IWRITE_LOCK_LIST(3, old_dir, old_ip, new_ip); + else + IWRITE_LOCK_LIST(2, old_dir, old_ip); + } else { + if (new_ip) + IWRITE_LOCK_LIST(4, old_dir, new_dir, old_ip, + new_ip); + else + IWRITE_LOCK_LIST(3, old_dir, new_dir, old_ip); + } + + if ((rc = get_UCSname(&old_dname, old_dentry, + JFS_SBI(old_dir->i_sb)->nls_tab))) + goto out1; + + if ((rc = get_UCSname(&new_dname, new_dentry, + JFS_SBI(old_dir->i_sb)->nls_tab))) + goto out2; + + /* + * Make sure source inode number is what we think it is + */ + rc = dtSearch(old_dir, &old_dname, &ino, &btstack, JFS_LOOKUP); + if (rc || (ino != old_ip->i_ino)) { + rc = ENOENT; + goto out3; + } + + /* + * Make sure dest inode number (if any) is what we think it is + */ + rc = dtSearch(new_dir, &new_dname, &ino, &btstack, JFS_LOOKUP); + if (rc == 0) { + if ((new_ip == 0) || (ino != new_ip->i_ino)) { + rc = ESTALE; + goto out3; + } + } else if (rc != ENOENT) + goto out3; + else if (new_ip) { + /* no entry exists, but one was expected */ + rc = ESTALE; + goto out3; + } + + if (S_ISDIR(old_ip->i_mode)) { + if (new_ip) { + if (!dtEmpty(new_ip)) { + rc = ENOTEMPTY; + goto out3; + } + } else if ((new_dir != old_dir) && + (new_dir->i_nlink == JFS_LINK_MAX)) { + rc = EMLINK; + goto out3; + } + } + + /* + * The real work starts here + */ + txBegin(new_dir->i_sb, &tid, 0); + + if (new_ip) { + /* + * Change existing directory entry to new inode number + */ + ino = new_ip->i_ino; + rc = dtModify(tid, new_dir, &new_dname, &ino, + old_ip->i_ino, JFS_RENAME); + if (rc) + goto out4; + new_ip->i_nlink--; + if (S_ISDIR(new_ip->i_mode)) { + new_ip->i_nlink--; + assert(new_ip->i_nlink == 0); + tblk = &TxBlock[tid]; + tblk->xflag |= COMMIT_DELETE; + tblk->ip = new_ip; + } else if (new_ip->i_nlink == 0) { + assert(!test_cflag(COMMIT_Nolink, new_ip)); + /* free block resources */ + if ((new_size = commitZeroLink(tid, new_ip)) < 0) { + txAbort(tid, 1); /* Marks FS Dirty */ + rc = -new_size; /* We return -rc */ + goto out4; + } + tblk = &TxBlock[tid]; + tblk->xflag |= COMMIT_DELETE; + tblk->ip = new_ip; + } else { + new_ip->i_ctime = CURRENT_TIME; + mark_inode_dirty(new_ip); + } + } else { + /* + * Add new directory entry + */ + rc = dtSearch(new_dir, &new_dname, &ino, &btstack, + JFS_CREATE); + if (rc) { + jERROR(1, + ("jfs_rename didn't expect dtSearch to fail w/rc = %d\n", + rc)); + goto out4; + } + + ino = old_ip->i_ino; + rc = dtInsert(tid, new_dir, &new_dname, &ino, &btstack); + if (rc) { + jERROR(1, + ("jfs_rename: dtInsert failed w/rc = %d\n", + rc)); + goto out4; + } + if (S_ISDIR(old_ip->i_mode)) + new_dir->i_nlink++; + } + /* + * Remove old directory entry + */ + + ino = old_ip->i_ino; + rc = dtDelete(tid, old_dir, &old_dname, &ino, JFS_REMOVE); + if (rc) { + jERROR(1, + ("jfs_rename did not expect dtDelete to return rc = %d\n", + rc)); + txAbort(tid, 1); /* Marks Filesystem dirty */ + goto out4; + } + if (S_ISDIR(old_ip->i_mode)) { + old_dir->i_nlink--; + if (old_dir != new_dir) { + /* + * Change inode number of parent for moved directory + */ + + JFS_IP(old_ip)->i_dtroot.header.idotdot = + cpu_to_le32(new_dir->i_ino); + + /* Linelock header of dtree */ + tlck = txLock(tid, old_ip, + (metapage_t *) & JFS_IP(old_ip)->bxflag, + tlckDTREE | tlckBTROOT); + dtlck = (dtlock_t *) & tlck->lock; + ASSERT(dtlck->index == 0); + lv = (lv_t *) & dtlck->lv[0]; + lv->offset = 0; + lv->length = 1; + dtlck->index++; + } + } + + /* + * Update ctime on changed/moved inodes & mark dirty + */ + old_ip->i_ctime = CURRENT_TIME; + mark_inode_dirty(old_ip); + + new_dir->i_version = ++event; + new_dir->i_ctime = CURRENT_TIME; + mark_inode_dirty(new_dir); + + /* Build list of inodes modified by this transaction */ + ipcount = 0; + iplist[ipcount++] = old_ip; + if (new_ip) + iplist[ipcount++] = new_ip; + iplist[ipcount++] = old_dir; + + if (old_dir != new_dir) { + iplist[ipcount++] = new_dir; + old_dir->i_version = ++event; + old_dir->i_ctime = CURRENT_TIME; + mark_inode_dirty(old_dir); + } + + /* + * if new_size > 0, truncate was not complete. committing + * synchronously avoids some timing problems with COMMIT_Holdlock + */ + rc = txCommit(tid, ipcount, iplist, new_size ? COMMIT_SYNC : 0); + + /* + * Don't unlock new_ip if COMMIT_HOLDLOCK is set + */ + if (new_ip && test_cflag(COMMIT_Holdlock, new_ip)) + new_ip = 0; + + out4: + txEnd(tid); + + while (new_size && (rc == 0)) { + txBegin(new_ip->i_sb, &tid, 0); + new_size = xtTruncate_pmap(tid, new_ip, new_size); + if (new_size < 0) { + txAbort(tid, 1); + rc = -new_size; /* We return -rc */ + } else + rc = txCommit(tid, 1, &new_ip, COMMIT_SYNC); + txEnd(tid); + } + out3: + free_UCSname(&new_dname); + out2: + free_UCSname(&old_dname); + out1: + IWRITE_UNLOCK(old_ip); + if (old_dir != new_dir) + IWRITE_UNLOCK(new_dir); + if (new_ip) + IWRITE_UNLOCK(new_ip); + + /* + * Truncating the directory index table is not guaranteed. It + * may need to be done iteratively + */ + if ((rc == 0) && test_cflag(COMMIT_Stale, old_dir) && + (old_dir->i_size > 1)) + jfs_truncate_nolock(old_dir, 0); + + IWRITE_UNLOCK(old_dir); + + jFYI(1, ("jfs_rename: returning %d\n", rc)); + return -rc; +} + + +/* + * NAME: jfs_mknod + * + * FUNCTION: Create a special file (device) + */ +int jfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int rdev) +{ + btstack_t btstack; + component_t dname; + ino_t ino; + struct inode *ip; + struct inode *iplist[2]; + int rc; + int tid; + tblock_t *tblk; + + jFYI(1, ("jfs_mknod: %s\n", dentry->d_name.name)); + + if ((rc = get_UCSname(&dname, dentry, JFS_SBI(dir->i_sb)->nls_tab))) + goto out; + + IWRITE_LOCK(dir); + + ip = ialloc(dir, mode); + if (ip == NULL) { + rc = ENOSPC; + goto out1; + } + + IWRITE_LOCK(ip); + + txBegin(dir->i_sb, &tid, 0); + + if ((rc = dtSearch(dir, &dname, &ino, &btstack, JFS_CREATE))) { + ip->i_nlink = 0; + IWRITE_UNLOCK(ip); + iput(ip); + txEnd(tid); + goto out1; + } + + tblk = &TxBlock[tid]; + tblk->xflag |= COMMIT_CREATE; + tblk->ip = ip; + + ino = ip->i_ino; + if ((rc = dtInsert(tid, dir, &dname, &ino, &btstack))) { + ip->i_nlink = 0; + IWRITE_UNLOCK(ip); + iput(ip); + txEnd(tid); + goto out1; + } + + if (S_ISREG(ip->i_mode)) { + ip->i_op = &jfs_file_inode_operations; + ip->i_fop = &jfs_file_operations; + ip->i_mapping->a_ops = &jfs_aops; + } else + init_special_inode(ip, ip->i_mode, rdev); + + mark_inode_dirty(ip); + d_instantiate(dentry, ip); + + dir->i_version = ++event; + dir->i_ctime = dir->i_mtime = CURRENT_TIME; + + mark_inode_dirty(dir); + + iplist[0] = dir; + iplist[1] = ip; + rc = txCommit(tid, 2, iplist, 0); + txEnd(tid); + + IWRITE_UNLOCK(ip); + out1: + IWRITE_UNLOCK(dir); + free_UCSname(&dname); + + out: + jFYI(1, ("jfs_mknod: returning %d\n", rc)); + return -rc; +} + +static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry) +{ + btstack_t btstack; + ino_t inum; + struct inode *ip; + component_t key; + const char *name = dentry->d_name.name; + int len = dentry->d_name.len; + int rc; + + jFYI(1, ("jfs_lookup: name = %s\n", name)); + + + if ((name[0] == '.') && (len == 1)) + inum = dip->i_ino; + else if (strcmp(name, "..") == 0) + inum = PARENT(dip); + else { + if ((rc = + get_UCSname(&key, dentry, JFS_SBI(dip->i_sb)->nls_tab))) + return ERR_PTR(-rc); + IREAD_LOCK(dip); + rc = dtSearch(dip, &key, &inum, &btstack, JFS_LOOKUP); + IREAD_UNLOCK(dip); + free_UCSname(&key); + if (rc == ENOENT) { + d_add(dentry, NULL); + return ERR_PTR(0); + } else if (rc) { + jERROR(1, + ("jfs_lookup: dtSearch returned %d\n", rc)); + return ERR_PTR(-rc); + } + } + + ip = iget(dip->i_sb, inum); + if (ip == NULL) { + jERROR(1, + ("jfs_lookup: iget failed on inum %d\n", + (uint) inum)); + return ERR_PTR(-EACCES); + } + + d_add(dentry, ip); + + return ERR_PTR(0); +} + +struct inode_operations jfs_dir_inode_operations = { + create: jfs_create, + lookup: jfs_lookup, + link: jfs_link, + unlink: jfs_unlink, + symlink: jfs_symlink, + mkdir: jfs_mkdir, + rmdir: jfs_rmdir, + mknod: jfs_mknod, + rename: jfs_rename, +}; + +struct file_operations jfs_dir_operations = { + read: generic_read_dir, + readdir: jfs_readdir, + fsync: jfs_fsync, +}; diff -uNr --exclude=CVS ../kernel.org/linux/fs/jfs/super.c linuxppc64_2_4/fs/jfs/super.c --- ../kernel.org/linux/fs/jfs/super.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/fs/jfs/super.c Wed Nov 14 10:19:36 2001 @@ -0,0 +1,403 @@ +/* + * + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("The Journaled Filesystem (JFS)"); +MODULE_AUTHOR("Steve Best/Dave Kleikamp/Barry Arndt, IBM"); +MODULE_LICENSE("GPL"); + +static pid_t jfsIOthread; +static pid_t jfsCommitThread; +static pid_t jfsSyncThread; +struct task_struct *jfsIOtask; +struct task_struct *jfsCommitTask; +struct task_struct *jfsSyncTask; +DECLARE_MUTEX_LOCKED(jfsIOsem); + +#ifdef CONFIG_JFS_DEBUG +int jfsFYI = 0; +int jfsERROR = 1; + +MODULE_PARM(jfsFYI, "i"); +MODULE_PARM_DESC(jfsFYI, "Print noisy debugging messages"); +#endif + +/* + * External declarations + */ +extern int jfs_mount(struct super_block *, char *); +extern int jfs_mount_rw(struct super_block *, int); +extern int jfs_umount(struct super_block *); +extern int jfs_umount_rw(struct super_block *); + +extern int jfsIOWait(void *); +extern int jfs_lazycommit(void *); +extern int jfs_sync(void *); +extern void jfs_put_inode(struct inode *inode); +extern void jfs_read_inode(struct inode *inode); +extern void jfs_dirty_inode(struct inode *inode); +extern void jfs_delete_inode(struct inode *inode); +extern void jfs_write_inode(struct inode *inode, int wait); + +#if defined(CONFIG_JFS_DEBUG) && defined(CONFIG_PROC_FS) +extern void jfs_proc_init(void); +extern void jfs_proc_clean(void); +#endif + +extern kmem_cache_t *jfs_inode_cachep; + +static int jfs_statfs(struct super_block *sb, struct statfs *buf) +{ + struct jfs_sb_info *sbi = JFS_SBI(sb); + s64 maxinodes; + imap_t *imap = JFS_IP(sbi->ipimap)->i_imap; + + jFYI(1, ("In jfs_statfs\n")); + buf->f_type = JFS_SUPER_MAGIC; + buf->f_bsize = sbi->bsize; + buf->f_blocks = sbi->bmap->db_mapsize; + buf->f_bfree = sbi->bmap->db_nfree; + buf->f_bavail = sbi->bmap->db_nfree; + /* + * If we really return the number of allocated & free inodes, some + * applications will fail because they won't see enough free inodes. + * We'll try to calculate some guess as to how may inodes we can + * really allocate + * + * buf->f_files = atomic_read(&imap->im_numinos); + * buf->f_ffree = atomic_read(&imap->im_numfree); + */ + maxinodes = min((s64) atomic_read(&imap->im_numinos) + + ((sbi->bmap->db_nfree >> imap->im_l2nbperiext) + << L2INOSPEREXT), (s64)0xffffffffLL); + buf->f_files = maxinodes; + buf->f_ffree = maxinodes - (atomic_read(&imap->im_numinos) - + atomic_read(&imap->im_numfree)); + + buf->f_namelen = JFS_NAME_MAX; + return 0; +} + +static void jfs_put_super(struct super_block *sb) +{ + struct jfs_sb_info *sbi = JFS_SBI(sb); + int rc; + + jFYI(1, ("In jfs_put_super\n")); + rc = jfs_umount(sb); + if (rc) { + jERROR(1, ("jfs_umount failed with return code %d\n", rc)); + } + unload_nls(sbi->nls_tab); + sbi->nls_tab = NULL; + + /* + * We need to clean out the direct_inode pages since this inode + * is not in the inode hash. + */ + fsync_inode_data_buffers(sbi->direct_inode); + truncate_inode_pages(sbi->direct_mapping, 0); + iput(sbi->direct_inode); + sbi->direct_inode = NULL; + sbi->direct_mapping = NULL; + + JFS_SBI(sb) = 0; + kfree(sbi); +} + +int jfs_remount(struct super_block *sb, int *flags, char *data) +{ + if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { + /* + * Invalidate any previously read metadata. fsck may + * have changed the on-disk data since we mounted r/o + */ + truncate_inode_pages(JFS_SBI(sb)->direct_mapping, 0); + + return jfs_mount_rw(sb, 1); + } else if ((!(sb->s_flags & MS_RDONLY)) && (*flags & MS_RDONLY)) + return jfs_umount_rw(sb); + + return 0; +} + +static struct super_operations jfs_sops = { + read_inode: jfs_read_inode, + dirty_inode: jfs_dirty_inode, + write_inode: jfs_write_inode, + put_inode: jfs_put_inode, + delete_inode: jfs_delete_inode, + put_super: jfs_put_super, + statfs: jfs_statfs, + remount_fs: jfs_remount, + clear_inode: diClearExtension, +}; + +static struct super_block *jfs_read_super(struct super_block *sb, + void *data, int silent) +{ + struct jfs_sb_info *sbi; + struct inode *inode; + struct jfs_inode_info *jfs_inode; + int rc; + + jFYI(1, + ("In jfs_read_super s_dev=0x%x s_flags=0x%lx\n", sb->s_dev, + sb->s_flags)); + + sbi = kmalloc(sizeof(struct jfs_sb_info), GFP_KERNEL); + JFS_SBI(sb) = sbi; + if (!sbi) + return NULL; + memset(sbi, 0, sizeof(struct jfs_sb_info)); + + /* + * Initialize blocksize to 4K. + */ + sb->s_blocksize = PSIZE; + sb->s_blocksize_bits = L2PSIZE; + set_blocksize(sb->s_dev, PSIZE); + + /* + * Initialize direct-mapping inode/address-space + */ + inode = new_inode(sb); + if (inode == NULL) + goto out_kfree; + inode->i_ino = 0; + inode->i_nlink = 1; + inode->i_size = 0x0000010000000000LL; + inode->i_mapping->a_ops = &direct_aops; + inode->i_mapping->gfp_mask = GFP_NOFS | __GFP_HIGHMEM; + + sbi->direct_inode = inode; + sbi->direct_mapping = inode->i_mapping; + + jfs_inode = kmem_cache_alloc(jfs_inode_cachep, GFP_NOFS); + JFS_IP(inode) = jfs_inode; + if (!jfs_inode) + goto out_free_inode; + memset(jfs_inode, 0, sizeof(struct jfs_inode_info)); + + sb->s_op = &jfs_sops; + rc = jfs_mount(sb, data); + if (rc) { + if (!silent) { + jERROR(1, + ("jfs_mount failed w/return code = %d\n", + rc)); + } + goto out_mount_failed; + } + if (sb->s_flags & MS_RDONLY) + sbi->log = 0; + else { + rc = jfs_mount_rw(sb, 0); + if (rc) { + if (!silent) { + jERROR(1, + ("jfs_mount_rw failed w/return code = %d\n", + rc)); + } + goto out_no_rw; + } + } + + sb->s_magic = JFS_SUPER_MAGIC; + + inode = iget(sb, ROOT_I); + if (!inode) + goto out_no_root; + sb->s_root = d_alloc_root(inode); + if (!sb->s_root) + goto out_no_root; + + sbi->nls_tab = load_nls_default(); + + sb->s_maxbytes = ((u64) sb->s_blocksize) << 40; + + return sb; + +out_no_root: + jEVENT(1, ("jfs_read_super: get root inode failed\n")); + if (inode) + iput(inode); + +out_no_rw: + rc = jfs_umount(sb); + if (rc) { + jERROR(1, ("jfs_umount failed with return code %d\n", rc)); + } +out_mount_failed: + fsync_inode_data_buffers(sbi->direct_inode); + truncate_inode_pages(sbi->direct_mapping, 0); + sb->s_op = NULL; + + kmem_cache_free(jfs_inode_cachep, jfs_inode); + +out_free_inode: + iput(sbi->direct_inode); + sbi->direct_inode = NULL; + sbi->direct_mapping = NULL; +out_kfree: + kfree(sbi); + return NULL; +} + +static DECLARE_FSTYPE_DEV(jfs_fs_type, "jfs", jfs_read_super); + +extern int metapage_init(void); +extern int txInit(void); +extern void txExit(void); +extern void metapage_exit(void); + +static int __init init_jfs_fs(void) +{ + int rc; + + printk("JFS development version: $Name: $\n"); + + jfs_inode_cachep = + kmem_cache_create("jfs_ip", + sizeof(struct jfs_inode_info), + 0, 0, NULL, NULL); + if (jfs_inode_cachep == NULL) + return -ENOMEM; + + /* + * Metapage initialization + */ + rc = metapage_init(); + if (rc) { + jERROR(1, ("metapage_init failed w/rc = %d\n", rc)); + goto free_slab; + } + + /* + * Log Manager initialization + */ + rc = lmInit(); + if (rc) { + jERROR(1, ("lmInit failed w/rc = %d\n", rc)); + goto free_metapage; + } + + /* + * Transaction Manager initialization + */ + rc = txInit(); + if (rc) { + jERROR(1, ("txInit failed w/rc = %d\n", rc)); + goto free_metapage; + } + + /* + * I/O completion thread (endio) + */ + jfsIOthread = kernel_thread(jfsIOWait, 0, + CLONE_FS | CLONE_FILES | + CLONE_SIGHAND); + if (jfsIOthread < 0) { + jERROR(1, + ("init_jfs_fs: fork failed w/rc = %d\n", + jfsIOthread)); + goto end_txmngr; + } + down(&jfsIOsem); /* Wait until IO thread starts */ + + jfsCommitThread = kernel_thread(jfs_lazycommit, 0, + CLONE_FS | CLONE_FILES | + CLONE_SIGHAND); + if (jfsCommitThread < 0) { + jERROR(1, + ("init_jfs_fs: fork failed w/rc = %d\n", + jfsCommitThread)); + goto kill_iotask; + } + down(&jfsIOsem); /* Wait until IO thread starts */ + + jfsSyncThread = kernel_thread(jfs_sync, 0, + CLONE_FS | CLONE_FILES | + CLONE_SIGHAND); + if (jfsSyncThread < 0) { + jERROR(1, + ("init_jfs_fs: fork failed w/rc = %d\n", + jfsSyncThread)); + goto kill_committask; + } + down(&jfsIOsem); /* Wait until IO thread starts */ + +#if defined(CONFIG_JFS_DEBUG) && defined(CONFIG_PROC_FS) + jfs_proc_init(); +#endif + + return register_filesystem(&jfs_fs_type); + + +kill_committask: + send_sig(SIGKILL, jfsCommitTask, 1); + down(&jfsIOsem); /* Wait until Commit thread exits */ +kill_iotask: + send_sig(SIGKILL, jfsIOtask, 1); + down(&jfsIOsem); /* Wait until IO thread exits */ +end_txmngr: + txExit(); +free_metapage: + metapage_exit(); +free_slab: + kmem_cache_destroy(jfs_inode_cachep); + return -rc; +} + +static void __exit exit_jfs_fs(void) +{ + jFYI(1, ("exit_jfs_fs called\n")); + txExit(); + metapage_exit(); + send_sig(SIGKILL, jfsIOtask, 1); + down(&jfsIOsem); /* Wait until IO thread exits */ + send_sig(SIGKILL, jfsCommitTask, 1); + down(&jfsIOsem); /* Wait until Commit thread exits */ + send_sig(SIGKILL, jfsSyncTask, 1); + down(&jfsIOsem); /* Wait until Sync thread exits */ +#if defined(CONFIG_JFS_DEBUG) && defined(CONFIG_PROC_FS) + jfs_proc_clean(); +#endif + unregister_filesystem(&jfs_fs_type); + kmem_cache_destroy(jfs_inode_cachep); +} + + +EXPORT_NO_SYMBOLS; + +module_init(init_jfs_fs) +module_exit(exit_jfs_fs) diff -uNr --exclude=CVS ../kernel.org/linux/fs/jfs/symlink.c linuxppc64_2_4/fs/jfs/symlink.c --- ../kernel.org/linux/fs/jfs/symlink.c Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/fs/jfs/symlink.c Wed Nov 14 10:19:36 2001 @@ -0,0 +1,47 @@ + +/* + * + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * JFS fast symlink handling code + */ + +#include +#include + +static int jfs_readlink(struct dentry *, char *buffer, int buflen); +static int jfs_follow_link(struct dentry *dentry, struct nameidata *nd); + +/* + * symlinks can't do much... + */ +struct inode_operations jfs_symlink_inode_operations = { + readlink: jfs_readlink, + follow_link: jfs_follow_link, +}; + +static int jfs_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + char *s = JFS_IP(dentry->d_inode)->i_inline; + return vfs_follow_link(nd, s); +} + +static int jfs_readlink(struct dentry *dentry, char *buffer, int buflen) +{ + char *s = JFS_IP(dentry->d_inode)->i_inline; + return vfs_readlink(dentry, buffer, buflen, s); +} diff -uNr --exclude=CVS ../kernel.org/linux/fs/nls/Config.in linuxppc64_2_4/fs/nls/Config.in --- ../kernel.org/linux/fs/nls/Config.in Wed Jun 27 19:10:55 2001 +++ linuxppc64_2_4/fs/nls/Config.in Thu Sep 13 14:29:39 2001 @@ -12,7 +12,7 @@ # msdos and Joliet want NLS if [ "$CONFIG_JOLIET" = "y" -o "$CONFIG_FAT_FS" != "n" \ -o "$CONFIG_NTFS_FS" != "n" -o "$CONFIG_NCPFS_NLS" = "y" \ - -o "$CONFIG_SMB_NLS" = "y" ]; then + -o "$CONFIG_SMB_NLS" = "y" -o "$CONFIG_JFS_FS" != "n" ]; then define_bool CONFIG_NLS y else define_bool CONFIG_NLS n diff -uNr --exclude=CVS ../kernel.org/linux/fs/proc/root.c linuxppc64_2_4/fs/proc/root.c --- ../kernel.org/linux/fs/proc/root.c Sat Oct 20 21:14:42 2001 +++ linuxppc64_2_4/fs/proc/root.c Tue Aug 14 14:55:53 2001 @@ -16,6 +16,7 @@ #include #include #include +#include struct proc_dir_entry *proc_net, *proc_bus, *proc_root_fs, *proc_root_driver; @@ -57,6 +58,12 @@ proc_tty_init(); #ifdef CONFIG_PROC_DEVICETREE proc_device_tree_init(); +#endif +#ifdef CONFIG_PPC_ISERIES + iSeries_proc_create(); +#endif +#ifdef CONFIG_PPC64 + proc_ppc64_init(); #endif #ifdef CONFIG_PPC_RTAS proc_rtas_init(); diff -uNr --exclude=CVS ../kernel.org/linux/fs/reiserfs/Makefile linuxppc64_2_4/fs/reiserfs/Makefile --- ../kernel.org/linux/fs/reiserfs/Makefile Fri Oct 12 16:19:28 2001 +++ linuxppc64_2_4/fs/reiserfs/Makefile Thu Nov 15 03:18:53 2001 @@ -13,13 +13,13 @@ obj-m := $(O_TARGET) -# gcc -O2 (the kernel default) is overaggressive on ppc when many inline +# gcc -O2 (the kernel default) is overaggressive on ppc32 when many inline # functions are used. This causes the compiler to advance the stack # pointer out of the available stack space, corrupting kernel space, -# and causing a panic. Since this behavior only affects ppc, this ifeq +# and causing a panic. Since this behavior only affects ppc32, this ifeq # will work around it. If any other architecture displays this behavior, # add it here. -ifeq ($(shell uname -m),ppc) +ifeq ($(CONFIG_PPC32),y) EXTRA_CFLAGS := -O1 endif diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/Naca.h linuxppc64_2_4/include/asm-ppc64/Naca.h --- ../kernel.org/linux/include/asm-ppc64/Naca.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/Naca.h Tue Nov 13 21:41:37 2001 @@ -0,0 +1,39 @@ +#ifndef _NACA_H +#define _NACA_H + +/* + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include + +struct Naca +{ + void *xItVpdAreas; + void *xRamDisk; + u64 xRamDiskSize; /* In pages */ + struct Paca *paca; /* Ptr to an array of pacas */ + u64 debug_switch; /* Bits to control debug printing */ + u16 processorCount; /* # of physical processors */ + u16 dCacheL1LineSize; /* Line size of L1 DCache in bytes */ + u16 dCacheL1LogLineSize; /* Log-2 of DCache line size */ + u16 dCacheL1LinesPerPage; /* DCache lines per page */ + u16 iCacheL1LineSize; /* Line size of L1 ICache in bytes */ + u16 iCacheL1LogLineSize; /* Log-2 of ICache line size */ + u16 iCacheL1LinesPerPage; /* ICache lines per page */ + u16 slb_size; /* SLB size in entries */ + u64 physicalMemorySize; /* Size of real memory in bytes */ + u64 pftSize; /* Log base 2 of page table size */ + u64 serialPortAddr; /* Phyical address of serial port */ + u8 interrupt_controller; /* Type of interrupt controller */ + u8 resv0[6]; /* Padding */ +}; + +extern struct Naca *naca; + +#endif /* _NACA_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/Paca.h linuxppc64_2_4/include/asm-ppc64/Paca.h --- ../kernel.org/linux/include/asm-ppc64/Paca.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/Paca.h Sat Nov 3 23:52:22 2001 @@ -0,0 +1,164 @@ +#ifndef _PPC64_PACA_H +#define _PPC64_PACA_H + +/*============================================================================ + * Header File Id + * Name______________: Paca.H + * + * Description_______: + * + * This control block defines the PACA which defines the processor + * specific data for each logical processor on the system. + * There are some pointers defined that are utilized by PLIC. + * + * C 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include + +#define N_EXC_STACK 2 + +/*----------------------------------------------------------------------------- + * Other Includes + *----------------------------------------------------------------------------- + */ +#include +#include +#include +#include +#include +#include + +/* A Paca entry is required for each logical processor. On systems + * that support hardware multi-threading, this is equal to twice the + * number of physical processors. On LPAR systems, we are required + * to have space for the maximum number of logical processors we + * could ever possibly have. Currently, we are limited to allocating + * 24 processors to a partition which gives 48 logical processors on + * an HMT box. Therefore, we reserve this many Paca entries. + */ +#define maxProcessors 24 +#define maxPacas maxProcessors * 2 + +extern struct Paca xPaca[]; +#define get_paca() ((struct Paca *)mfspr(SPRG3)) + +/*============================================================================ + * Name_______: Paca + * + * Description: + * + * Defines the layout of the Paca. + * + * This structure is not directly accessed by PLIC or the SP except + * for the first two pointers that point to the ItLpPaca area and the + * ItLpRegSave area for this processor. Both the ItLpPaca and + * ItLpRegSave objects are currently contained within the + * PACA but they do not need to be. + * + *============================================================================ + */ +struct Paca +{ +/*===================================================================================== + * CACHE_LINE_1 0x0000 - 0x007F + *===================================================================================== + */ + struct ItLpPaca *xLpPacaPtr; /* Pointer to LpPaca for PLIC 0x00 */ + struct ItLpRegSave *xLpRegSavePtr; /* Pointer to LpRegSave for PLIC 0x08 */ + u64 xCurrent; /* Pointer to current 0x10 */ + u16 xPacaIndex; /* Logical processor number 0x18 */ + u16 xHwProcNum; /* Actual Hardware Processor Number 0x1a */ + u32 default_decr; /* Default decrementer value 0x1c */ + u64 xHrdIntStack; /* Stack for hardware interrupts 0x20 */ + u64 xKsave; /* Saved Kernel stack addr or zero 0x28 */ + u64 pvr; /* Processor version register 0x30 */ + u8 *exception_sp; /* 0x38 */ + + struct ItLpQueue *lpQueuePtr; /* LpQueue handled by this processor 0x40 */ + u64 xTOC; /* Kernel TOC address 0x48 */ + STAB xStab_data; /* Segment table information 0x50,0x58,0x60 */ + u8 xSegments[STAB_CACHE_SIZE]; /* Cache of used stab entries 0x68,0x70 */ + u8 xProcEnabled; /* 1=soft enabled 0x78 */ + u8 xHrdIntCount; /* Count of active hardware interrupts 0x79 */ + u8 prof_enabled; /* 1=iSeries profiling enabled 0x7A */ + u8 resv1[5]; /* 0x7B-0x7F */ + +/*===================================================================================== + * CACHE_LINE_2 0x0080 - 0x00FF + *===================================================================================== + */ + u64 *pgd_cache; /* 0x00 */ + u64 *pmd_cache; /* 0x08 */ + u64 *pte_cache; /* 0x10 */ + u64 pgtable_cache_sz; /* 0x18 */ + u64 next_jiffy_update_tb; /* TB value for next jiffy update 0x20 */ + u32 lpEvent_count; /* lpEvents processed 0x28 */ + u32 prof_multiplier; /* 0x2C */ + u32 prof_counter; /* 0x30 */ + u32 prof_shift; /* iSeries shift for profile bucket size0x34 */ + u32 *prof_buffer; /* iSeries profiling buffer 0x38 */ + u32 *prof_stext; /* iSeries start of kernel text 0x40 */ + u32 prof_len; /* iSeries length of profile buffer -1 0x48 */ + u8 rsvd2[128-76]; /* 0x4C */ + +/*===================================================================================== + * CACHE_LINE_3 0x0100 - 0x017F + *===================================================================================== + */ + u8 xProcStart; /* At startup, processor spins until 0x100 */ + /* xProcStart becomes non-zero. */ + u8 rsvd3[127]; + +/*===================================================================================== + * CACHE_LINE_4-8 0x0180 - 0x03FF Contains ItLpPaca + *===================================================================================== + */ + struct ItLpPaca xLpPaca; /* Space for ItLpPaca */ + +/*===================================================================================== + * CACHE_LINE_9-16 0x0400 - 0x07FF Contains ItLpRegSave + *===================================================================================== + */ + struct ItLpRegSave xRegSav; /* Register save for proc */ + +/*===================================================================================== + * CACHE_LINE_17-18 0x0800 - 0x0EFF Reserved + *===================================================================================== + */ + struct rtas_args xRtas; /* Per processor RTAS struct */ + u64 xR1; /* r1 save for RTAS calls */ + u64 xSavedMsr; /* Old msr saved here by HvCall */ + u8 rsvd5[256-16-sizeof(struct rtas_args)]; + +/*===================================================================================== + * CACHE_LINE_19-30 0x0800 - 0x0EFF Reserved + *===================================================================================== + */ + u8 rsvd6[0x600]; + +/*===================================================================================== + * CACHE_LINE_31 0x0F00 - 0x0F7F Exception stack + *===================================================================================== + */ + u8 exception_stack[N_EXC_STACK*EXC_FRAME_SIZE]; + +/*===================================================================================== + * CACHE_LINE_32 0x0F80 - 0x0FFF Reserved + *===================================================================================== + */ + u8 rsvd7[0x80]; /* Give the stack some rope ... */ + +/*===================================================================================== + * Page 2 Reserved for guard page. Also used as a stack early in SMP boots before + * relocation is enabled. + *===================================================================================== + */ + u8 guard[0x1000]; /* ... and then hang 'em */ +}; + +#endif /* _PPC64_PACA_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/a.out.h linuxppc64_2_4/include/asm-ppc64/a.out.h --- ../kernel.org/linux/include/asm-ppc64/a.out.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/a.out.h Mon Dec 3 20:37:53 2001 @@ -0,0 +1,46 @@ +#ifndef __PPC64_A_OUT_H__ +#define __PPC64_A_OUT_H__ + +/* + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +struct exec +{ + unsigned long a_info; /* Use macros N_MAGIC, etc for access */ + unsigned a_text; /* length of text, in bytes */ + unsigned a_data; /* length of data, in bytes */ + unsigned a_bss; /* length of uninitialized data area for file, in bytes */ + unsigned a_syms; /* length of symbol table data in file, in bytes */ + unsigned a_entry; /* start address */ + unsigned a_trsize; /* length of relocation info for text, in bytes */ + unsigned a_drsize; /* length of relocation info for data, in bytes */ +}; + +#define N_TRSIZE(a) ((a).a_trsize) +#define N_DRSIZE(a) ((a).a_drsize) +#define N_SYMSIZE(a) ((a).a_syms) + +#ifdef __KERNEL__ + +/* 64-bit user address space is less PGDIR_SIZE due to pgd_index() bug. */ +#define STACK_TOP_USER64 (TASK_SIZE_USER64 - PGDIR_SIZE) + +/* Give 32-bit user space a full 4G address space to live in. */ +#define STACK_TOP_USER32 (TASK_SIZE_USER32) + +#ifndef PPC64_32B_ADDR_SPACE +#define STACK_TOP ((current->thread.flags & PPC_FLAG_32BIT) ? \ + STACK_TOP_USER32 : STACK_TOP_USER64) +#else +#define STACK_TOP STACK_TOP_USER32 +#endif /* PPC64_32B_ADDR_SPACE */ + +#endif /* __KERNEL__ */ + +#endif /* __PPC64_A_OUT_H__ */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/abs_addr.h linuxppc64_2_4/include/asm-ppc64/abs_addr.h --- ../kernel.org/linux/include/asm-ppc64/abs_addr.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/abs_addr.h Sat Sep 15 22:16:07 2001 @@ -0,0 +1,119 @@ +#ifndef _ABS_ADDR_H +#define _ABS_ADDR_H + +/* + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include + +typedef u32 msChunks_entry; +struct msChunks { + unsigned long num_chunks; + unsigned long chunk_size; + unsigned long chunk_shift; + unsigned long chunk_mask; + msChunks_entry *abs; +}; + +extern struct msChunks msChunks; + +extern unsigned long msChunks_alloc(unsigned long, unsigned long, unsigned long); +extern unsigned long reloc_offset(void); + +#ifdef CONFIG_MSCHUNKS + +static inline unsigned long +chunk_to_addr(unsigned long chunk) +{ + unsigned long offset = reloc_offset(); + struct msChunks *_msChunks = PTRRELOC(&msChunks); + + return chunk << _msChunks->chunk_shift; +} + +static inline unsigned long +addr_to_chunk(unsigned long addr) +{ + unsigned long offset = reloc_offset(); + struct msChunks *_msChunks = PTRRELOC(&msChunks); + + return addr >> _msChunks->chunk_shift; +} + +static inline unsigned long +chunk_offset(unsigned long addr) +{ + unsigned long offset = reloc_offset(); + struct msChunks *_msChunks = PTRRELOC(&msChunks); + + return addr & _msChunks->chunk_mask; +} + +static inline unsigned long +abs_chunk(unsigned long pchunk) +{ + unsigned long offset = reloc_offset(); + struct msChunks *_msChunks = PTRRELOC(&msChunks); + if ( pchunk >= _msChunks->num_chunks ) { + return pchunk; + } + return PTRRELOC(_msChunks->abs)[pchunk]; +} + + +static inline unsigned long +phys_to_absolute(unsigned long pa) +{ + return chunk_to_addr(abs_chunk(addr_to_chunk(pa))) + chunk_offset(pa); +} + +static inline unsigned long +physRpn_to_absRpn(unsigned long rpn) +{ + unsigned long pa = rpn << PAGE_SHIFT; + unsigned long aa = phys_to_absolute(pa); + return (aa >> PAGE_SHIFT); +} + +static inline unsigned long +absolute_to_phys(unsigned long aa) +{ + return lmb_abs_to_phys(aa); +} + +#else /* !CONFIG_MSCHUNKS */ + +#define chunk_to_addr(chunk) ((unsigned long)(chunk)) +#define addr_to_chunk(addr) (addr) +#define chunk_offset(addr) (0) +#define abs_chunk(pchunk) (pchunk) + +#define phys_to_absolute(pa) (pa) +#define physRpn_to_absRpn(rpn) (rpn) +#define absolute_to_phys(aa) (aa) + +#endif /* CONFIG_MSCHUNKS */ + + +static inline unsigned long +virt_to_absolute(unsigned long ea) +{ + return phys_to_absolute(__pa(ea)); +} + +static inline unsigned long +absolute_to_virt(unsigned long aa) +{ + return (unsigned long)__va(absolute_to_phys(aa)); +} + +#endif /* _ABS_ADDR_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/atomic.h linuxppc64_2_4/include/asm-ppc64/atomic.h --- ../kernel.org/linux/include/asm-ppc64/atomic.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/atomic.h Mon Nov 19 21:43:25 2001 @@ -0,0 +1,180 @@ +/* + * PowerPC64 atomic operations + * + * Copyright (C) 2001 Paul Mackerras , IBM + * Copyright (C) 2001 Anton Blanchard , IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _ASM_PPC64_ATOMIC_H_ +#define _ASM_PPC64_ATOMIC_H_ + +#include + +typedef struct { volatile int counter; } atomic_t; + +#define ATOMIC_INIT(i) { (i) } + +#define atomic_read(v) ((v)->counter) +#define atomic_set(v,i) (((v)->counter) = (i)) + +static __inline__ void atomic_add(int a, atomic_t *v) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%3 # atomic_add\n\ + add %0,%2,%0\n\ + stwcx. %0,0,%3\n\ + bne- 1b" + : "=&r" (t), "=m" (v->counter) + : "r" (a), "r" (&v->counter), "m" (v->counter) + : "cc"); +} + +static __inline__ int atomic_add_return(int a, atomic_t *v) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%2 # atomic_add_return\n\ + add %0,%1,%0\n\ + stwcx. %0,0,%2\n\ + bne- 1b" + ISYNC_ON_SMP + : "=&r" (t) + : "r" (a), "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +static __inline__ void atomic_sub(int a, atomic_t *v) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%3 # atomic_sub\n\ + subf %0,%2,%0\n\ + stwcx. %0,0,%3\n\ + bne- 1b" + : "=&r" (t), "=m" (v->counter) + : "r" (a), "r" (&v->counter), "m" (v->counter) + : "cc"); +} + +static __inline__ int atomic_sub_return(int a, atomic_t *v) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%2 # atomic_sub_return\n\ + subf %0,%1,%0\n\ + stwcx. %0,0,%2\n\ + bne- 1b" + ISYNC_ON_SMP + : "=&r" (t) + : "r" (a), "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +static __inline__ void atomic_inc(atomic_t *v) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%2 # atomic_inc\n\ + addic %0,%0,1\n\ + stwcx. %0,0,%2\n\ + bne- 1b" + : "=&r" (t), "=m" (v->counter) + : "r" (&v->counter), "m" (v->counter) + : "cc"); +} + +static __inline__ int atomic_inc_return(atomic_t *v) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%1 # atomic_inc_return\n\ + addic %0,%0,1\n\ + stwcx. %0,0,%1\n\ + bne- 1b" + ISYNC_ON_SMP + : "=&r" (t) + : "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +static __inline__ void atomic_dec(atomic_t *v) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%2 # atomic_dec\n\ + addic %0,%0,-1\n\ + stwcx. %0,0,%2\n\ + bne- 1b" + : "=&r" (t), "=m" (v->counter) + : "r" (&v->counter), "m" (v->counter) + : "cc"); +} + +static __inline__ int atomic_dec_return(atomic_t *v) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%1 # atomic_dec_return\n\ + addic %0,%0,-1\n\ + stwcx. %0,0,%1\n\ + bne- 1b" + ISYNC_ON_SMP + : "=&r" (t) + : "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +#define atomic_sub_and_test(a, v) (atomic_sub_return((a), (v)) == 0) +#define atomic_dec_and_test(v) (atomic_dec_return((v)) == 0) + +/* + * Atomically test *v and decrement if it is greater than 0. + * The function returns the old value of *v minus 1. + */ +static __inline__ int atomic_dec_if_positive(atomic_t *v) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%1 # atomic_dec_if_positive\n\ + addic. %0,%0,-1\n\ + blt- 2f\n\ + stwcx. %0,0,%1\n\ + bne- 1b" + ISYNC_ON_SMP + "\n\ +2:" : "=&r" (t) + : "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +#define smp_mb__before_atomic_dec() smp_mb() +#define smp_mb__after_atomic_dec() smp_mb() +#define smp_mb__before_atomic_inc() smp_mb() +#define smp_mb__after_atomic_inc() smp_mb() + +#endif /* _ASM_PPC64_ATOMIC_H_ */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/bitops.h linuxppc64_2_4/include/asm-ppc64/bitops.h --- ../kernel.org/linux/include/asm-ppc64/bitops.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/bitops.h Mon Nov 19 21:43:25 2001 @@ -0,0 +1,432 @@ +/* + * PowerPC64 atomic bit operations. + * Dave Engebretsen, Todd Inglett, Don Reed, Pat McCarthy, Peter Bergner, + * Anton Blanchard + * + * Originally taken from the 32b PPC code. Modified to use 64b values for + * the various counters & memory references. + * + * Bitops are odd when viewed on big-endian systems. They were designed + * on little endian so the size of the bitset doesn't matter (low order bytes + * come first) as long as the bit in question is valid. + * + * Bits are "tested" often using the C expression (val & (1< +#include + +/* + * clear_bit doesn't imply a memory barrier + */ +#define smp_mb__before_clear_bit() smp_mb() +#define smp_mb__after_clear_bit() smp_mb() + +static __inline__ int test_bit(unsigned long nr, __const__ volatile void *addr) +{ + return (1UL & (((__const__ long *) addr)[nr >> 6] >> (nr & 63))); +} + +static __inline__ void set_bit(unsigned long nr, volatile void *addr) +{ + unsigned long old; + unsigned long mask = 1UL << (nr & 0x3f); + unsigned long *p = ((unsigned long *)addr) + (nr >> 6); + + __asm__ __volatile__( +"1: ldarx %0,0,%3 # set_bit\n\ + or %0,%0,%2\n\ + stdcx. %0,0,%3\n\ + bne- 1b" + : "=&r" (old), "=m" (*p) + : "r" (mask), "r" (p), "m" (*p) + : "cc"); +} + +static __inline__ void clear_bit(unsigned long nr, volatile void *addr) +{ + unsigned long old; + unsigned long mask = 1UL << (nr & 0x3f); + unsigned long *p = ((unsigned long *)addr) + (nr >> 6); + + __asm__ __volatile__( +"1: ldarx %0,0,%3 # clear_bit\n\ + andc %0,%0,%2\n\ + stdcx. %0,0,%3\n\ + bne- 1b" + : "=&r" (old), "=m" (*p) + : "r" (mask), "r" (p), "m" (*p) + : "cc"); +} + +static __inline__ void change_bit(unsigned long nr, volatile void *addr) +{ + unsigned long old; + unsigned long mask = 1UL << (nr & 0x3f); + unsigned long *p = ((unsigned long *)addr) + (nr >> 6); + + __asm__ __volatile__( +"1: ldarx %0,0,%3 # change_bit\n\ + xor %0,%0,%2\n\ + stdcx. %0,0,%3\n\ + bne- 1b" + : "=&r" (old), "=m" (*p) + : "r" (mask), "r" (p), "m" (*p) + : "cc"); +} + +static __inline__ int test_and_set_bit(unsigned long nr, volatile void *addr) +{ + unsigned long old, t; + unsigned long mask = 1UL << (nr & 0x3f); + unsigned long *p = ((unsigned long *)addr) + (nr >> 6); + + __asm__ __volatile__( + EIEIO_ON_SMP +"1: ldarx %0,0,%3 # test_and_set_bit\n\ + or %1,%0,%2 \n\ + stdcx. %1,0,%3 \n\ + bne- 1b" + ISYNC_ON_SMP + : "=&r" (old), "=&r" (t) + : "r" (mask), "r" (p) + : "cc", "memory"); + + return (old & mask) != 0; +} + +static __inline__ int test_and_clear_bit(unsigned long nr, volatile void *addr) +{ + unsigned long old, t; + unsigned long mask = 1UL << (nr & 0x3f); + unsigned long *p = ((unsigned long *)addr) + (nr >> 6); + + __asm__ __volatile__( + EIEIO_ON_SMP +"1: ldarx %0,0,%3 # test_and_clear_bit\n\ + andc %1,%0,%2\n\ + stdcx. %1,0,%3\n\ + bne- 1b" + ISYNC_ON_SMP + : "=&r" (old), "=&r" (t) + : "r" (mask), "r" (p) + : "cc", "memory"); + + return (old & mask) != 0; +} + +static __inline__ int test_and_change_bit(unsigned long nr, volatile void *addr) +{ + unsigned long old, t; + unsigned long mask = 1UL << (nr & 0x3f); + unsigned long *p = ((unsigned long *)addr) + (nr >> 6); + + __asm__ __volatile__( + EIEIO_ON_SMP +"1: ldarx %0,0,%3 # test_and_change_bit\n\ + xor %1,%0,%2\n\ + stdcx. %1,0,%3\n\ + bne- 1b" + ISYNC_ON_SMP + : "=&r" (old), "=&r" (t) + : "r" (mask), "r" (p) + : "cc", "memory"); + + return (old & mask) != 0; +} + +/* + * non-atomic versions + */ +static __inline__ void __set_bit(unsigned long nr, volatile void *addr) +{ + unsigned long mask = 1UL << (nr & 0x3f); + unsigned long *p = ((unsigned long *)addr) + (nr >> 6); + + *p |= mask; +} + +static __inline__ void __clear_bit(unsigned long nr, volatile void *addr) +{ + unsigned long mask = 1UL << (nr & 0x3f); + unsigned long *p = ((unsigned long *)addr) + (nr >> 6); + + *p &= ~mask; +} + +static __inline__ void __change_bit(unsigned long nr, volatile void *addr) +{ + unsigned long mask = 1UL << (nr & 0x3f); + unsigned long *p = ((unsigned long *)addr) + (nr >> 6); + + *p ^= mask; +} + +static __inline__ int __test_and_set_bit(unsigned long nr, volatile void *addr) +{ + unsigned long mask = 1UL << (nr & 0x3f); + unsigned long *p = ((unsigned long *)addr) + (nr >> 6); + unsigned long old = *p; + + *p = old | mask; + return (old & mask) != 0; +} + +static __inline__ int __test_and_clear_bit(unsigned long nr, volatile void *addr) +{ + unsigned long mask = 1UL << (nr & 0x3f); + unsigned long *p = ((unsigned long *)addr) + (nr >> 6); + unsigned long old = *p; + + *p = old & ~mask; + return (old & mask) != 0; +} + +static __inline__ int __test_and_change_bit(unsigned long nr, volatile void *addr) +{ + unsigned long mask = 1UL << (nr & 0x3f); + unsigned long *p = ((unsigned long *)addr) + (nr >> 6); + unsigned long old = *p; + + *p = old ^ mask; + return (old & mask) != 0; +} + +/* + * Return the zero-based bit position (from RIGHT TO LEFT, 63 -> 0) of the + * most significant (left-most) 1-bit in a double word. + */ +static __inline__ int __ilog2(unsigned long x) +{ + int lz; + + asm ("cntlzd %0,%1" : "=r" (lz) : "r" (x)); + return 63 - lz; +} + +/* Return the zero-based bit position + * from RIGHT TO LEFT 63 --> 0 + * of the most significant (left-most) 1-bit in an 8-byte area. + */ +static __inline__ long cnt_trailing_zeros(unsigned long mask) +{ + long cnt; + + asm( +" addi %0,%1,-1 \n\ + andc %0,%0,%1 \n\ + cntlzd %0,%0 \n\ + subfic %0,%0,64" + : "=r" (cnt) + : "r" (mask)); + return cnt; +} + + + +/* + * ffz = Find First Zero in word. Undefined if no zero exists, + * Determines the bit position of the LEAST significant + * (rightmost) 0 bit in the specified DOUBLE-WORD. + * The returned bit position will be zero-based, starting + * from the right side (63 - 0). + * the code should check against ~0UL first.. + */ +static __inline__ unsigned long ffz(unsigned long x) +{ + u32 tempRC; + + /* Change all of x's 1s to 0s and 0s to 1s in x. + * And insure at least 1 zero exists in the 8 byte area. + */ + if ((x = ~x) == 0) + { /* no zero exists anywhere in the 8 byte area. */ + return 64; + } + + /* Calculate the bit position of the least signficant '1' bit in x + * (since x has been changed this will actually be the least signficant + * '0' bit in the original x). + * Note: (x & -x) gives us a mask that is the LEAST significant + * (RIGHT-most) 1-bit of the value in x. + */ + tempRC = __ilog2(x & -x); + + return tempRC; + +} + +/* + * ffs: find first bit set. This is defined the same way as + * the libc and compiler builtin ffs routines, therefore + * differs in spirit from the above ffz (man ffs). + */ +static __inline__ int ffs(int x) +{ + int result = ffz(~x); + return x ? result+1 : 0; +} + +/* + * hweightN: returns the hamming weight (i.e. the number + * of bits set) of a N-bit word + */ +#define hweight32(x) generic_hweight32(x) +#define hweight16(x) generic_hweight16(x) +#define hweight8(x) generic_hweight8(x) + +extern unsigned long find_next_zero_bit(void * addr, unsigned long size, + unsigned long offset); +/* + * The optimizer actually does good code for this case.. + */ +#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0) + +/* Bitmap functions for the ext2 filesystem. */ +#define _EXT2_HAVE_ASM_BITOPS_ + +static __inline__ int ext2_set_bit(int nr, void* addr) +{ + /* This method needs to take into account the fact that the ext2 file system represents + * it's bitmaps as "little endian" unsigned integers. + * Note: this method is not atomic, but ext2 does not need it to be. + */ + int mask; + int oldbit; + unsigned char* ADDR = (unsigned char*) addr; + + /* Determine the BYTE containing the specified bit + * (nr) - important as if we go to a byte there are no + * little endian concerns. + */ + ADDR += nr >> 3; + mask = 1 << (nr & 0x07); /* Create a mask to the bit within this byte. */ + oldbit = *ADDR & mask; /* Save the bit's previous value. */ + *ADDR |= mask; /* Turn the bit on. */ + return oldbit; /* Return the bit's previous value. */ +} + +static __inline__ int ext2_clear_bit(int nr, void* addr) +{ + /* This method needs to take into account the fact that the ext2 file system represents + * | it's bitmaps as "little endian" unsigned integers. + * Note: this method is not atomic, but ext2 does not need it to be. + */ + int mask; + int oldbit; + unsigned char* ADDR = (unsigned char*) addr; + + /* Determine the BYTE containing the specified bit (nr) + * - important as if we go to a byte there are no little endian concerns. + */ + ADDR += nr >> 3; + mask = 1 << (nr & 0x07); /* Create a mask to the bit within this byte. */ + oldbit = *ADDR & mask; /* Save the bit's previous value. */ + *ADDR = *ADDR & ~mask; /* Turn the bit off. */ + return oldbit; /* Return the bit's previous value. */ +} + +static __inline__ int ext2_test_bit(int nr, __const__ void * addr) +{ + /* This method needs to take into account the fact that the ext2 file system represents + * | it's bitmaps as "little endian" unsigned integers. + * Determine the BYTE containing the specified bit (nr), + * then shift to the right the correct number of bits and return that bit's value. + */ + __const__ unsigned char *ADDR = (__const__ unsigned char *) addr; + return (ADDR[nr >> 3] >> (nr & 7)) & 1; +} + +/* Returns the bit position of the most significant 1 bit in a WORD. */ +static __inline__ int ext2_ilog2(unsigned int x) +{ + int lz; + + asm ("cntlzw %0,%1" : "=r" (lz) : "r" (x)); + return 31 - lz; +} + +/* ext2_ffz = ext2's Find First Zero. + * Determines the bit position of the LEAST significant (rightmost) 0 bit in the specified WORD. + * The returned bit position will be zero-based, starting from the right side (31 - 0). + */ +static __inline__ int ext2_ffz(unsigned int x) +{ + u32 tempRC; + /* Change all of x's 1s to 0s and 0s to 1s in x. And insure at least 1 zero exists in the word. */ + if ((x = ~x) == 0) + { /* no zero exists anywhere in the 4 byte area. */ + return 32; + } + /* Calculate the bit position of the least signficant '1' bit in x + * (since x has been changed this will actually be the least signficant '0' bit in the original x). + * Note: (x & -x) gives us a mask that is the LEAST significant (RIGHT-most) 1-bit of the value in x. + */ + tempRC = ext2_ilog2(x & -x); + return tempRC; +} + +static __inline__ u32 ext2_find_next_zero_bit(void* addr, u32 size, u32 offset) +{ + /* This method needs to take into account the fact that the ext2 file system represents + * | it's bitmaps as "little endian" unsigned integers. + */ + unsigned int *p = ((unsigned int *) addr) + (offset >> 5); + unsigned int result = offset & ~31; + unsigned int tmp; + + if (offset >= size) + return size; + size -= result; + offset &= 31; + if (offset) { + tmp = cpu_to_le32p(p++); + tmp |= ~0U >> (32-offset); /* bug or feature ? */ + if (size < 32) + goto found_first; + if (tmp != ~0) + goto found_middle; + size -= 32; + result += 32; + } + while (size >= 32) { + if ((tmp = cpu_to_le32p(p++)) != ~0) + goto found_middle; + result += 32; + size -= 32; + } + if (!size) + return result; + tmp = cpu_to_le32p(p); +found_first: + tmp |= ~0 << size; + if (tmp == ~0) /* Are any bits zero? */ + return result + size; /* Nope. */ +found_middle: + return result + ext2_ffz(tmp); +} + +#define ext2_find_first_zero_bit(addr, size) ext2_find_next_zero_bit((addr), (size), 0) + +#endif /* __KERNEL__ */ +#endif /* _PPC64_BITOPS_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/bootinfo.h linuxppc64_2_4/include/asm-ppc64/bootinfo.h --- ../kernel.org/linux/include/asm-ppc64/bootinfo.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/bootinfo.h Wed Oct 31 08:32:02 2001 @@ -0,0 +1,74 @@ +/* + * Non-machine dependent bootinfo structure. Basic idea + * borrowed from the m68k. + * + * Copyright (C) 1999 Cort Dougan + * Copyright (c) 2001 PPC64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + + +#ifndef _PPC64_BOOTINFO_H +#define _PPC64_BOOTINFO_H + +#include +#include + +/* We use a u32 for the type of the fields since they're written by + * the bootloader which is a 32-bit process and read by the kernel + * which is a 64-bit process. This way they can both agree on the + * size of the type. + */ +typedef u32 bi_rec_field; + +struct bi_record { + bi_rec_field tag; /* tag ID */ + bi_rec_field size; /* size of record (in bytes) */ + bi_rec_field data[0]; /* data */ +}; + +#define BI_FIRST 0x1010 /* first record - marker */ +#define BI_LAST 0x1011 /* last record - marker */ +#define BI_CMD_LINE 0x1012 +#define BI_BOOTLOADER_ID 0x1013 +#define BI_INITRD 0x1014 +#define BI_SYSMAP 0x1015 +#define BI_MACHTYPE 0x1016 + +static __inline__ struct bi_record * bi_rec_init(unsigned long addr) +{ + struct bi_record *bi_recs; + bi_recs = (struct bi_record *)_ALIGN(addr, PAGE_SIZE); + bi_recs->size = 0; + return bi_recs; +} + +static __inline__ struct bi_record * bi_rec_alloc(struct bi_record *rec, + unsigned long args) +{ + rec = (struct bi_record *)((unsigned long)rec + rec->size); + rec->size = sizeof(struct bi_record) + args*sizeof(bi_rec_field); + return rec; +} + +static __inline__ struct bi_record * bi_rec_alloc_bytes(struct bi_record *rec, + unsigned long bytes) +{ + rec = (struct bi_record *)((unsigned long)rec + rec->size); + rec->size = sizeof(struct bi_record) + bytes; + return rec; +} + +static __inline__ struct bi_record * bi_rec_next(struct bi_record *rec) +{ + return (struct bi_record *)((unsigned long)rec + rec->size); +} + + + +#endif /* _PPC64_BOOTINFO_H */ + diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/bugs.h linuxppc64_2_4/include/asm-ppc64/bugs.h --- ../kernel.org/linux/include/asm-ppc64/bugs.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/bugs.h Fri May 4 17:12:34 2001 @@ -0,0 +1,8 @@ +/* + * This file is included by 'init/main.c' to check for architecture-dependent + * bugs. + * + */ + +static void check_bugs(void) { +} diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/byteorder.h linuxppc64_2_4/include/asm-ppc64/byteorder.h --- ../kernel.org/linux/include/asm-ppc64/byteorder.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/byteorder.h Tue Jun 19 08:38:06 2001 @@ -0,0 +1,95 @@ +#ifndef _PPC64_BYTEORDER_H +#define _PPC64_BYTEORDER_H + +/* + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include + +#ifdef __GNUC__ + +static __inline__ __u16 ld_le16(const volatile __u16 *addr) +{ + __u16 val; + + __asm__ __volatile__ ("lhbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr)); + return val; +} + +static __inline__ void st_le16(volatile __u16 *addr, const __u16 val) +{ + __asm__ __volatile__ ("sthbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr)); +} + +static __inline__ __u32 ld_le32(const volatile __u32 *addr) +{ + __u32 val; + + __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr)); + return val; +} + +static __inline__ void st_le32(volatile __u32 *addr, const __u32 val) +{ + __asm__ __volatile__ ("stwbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr)); +} + +#if 0 +static __inline__ __const__ __u16 ___arch__swab16(__u16 value) +{ + __u16 result; + + __asm__("rlwimi %0,%1,8,16,23" + : "=r" (result) + : "r" (value), "0" (value >> 8)); + return result; +} + +static __inline__ __const__ __u32 ___arch__swab32(__u32 value) +{ + __u32 result; + + __asm__("rlwimi %0,%1,24,16,23\n\t" + "rlwimi %0,%1,8,8,15\n\t" + "rlwimi %0,%1,24,0,7" + : "=r" (result) + : "r" (value), "0" (value >> 24)); + return result; +} + +static __inline__ __const__ __u64 ___arch__swab64(__u64 value) +{ + __u64 result; +#error implement me +} + +#define __arch__swab16(x) ___arch__swab16(x) +#define __arch__swab32(x) ___arch__swab32(x) +#define __arch__swab64(x) ___arch__swab64(x) + +#endif + +/* The same, but returns converted value from the location pointer by addr. */ +#define __arch__swab16p(addr) ld_le16(addr) +#define __arch__swab32p(addr) ld_le32(addr) + +/* The same, but do the conversion in situ, ie. put the value back to addr. */ +#define __arch__swab16s(addr) st_le16(addr,*addr) +#define __arch__swab32s(addr) st_le32(addr,*addr) + +#endif /* __GNUC__ */ + +/* MIKEC: What does __BYTEORDER_HAS_U64__ mean? */ +#ifndef __STRICT_ANSI__ +#define __BYTEORDER_HAS_U64__ +#endif + +#include + +#endif /* _PPC64_BYTEORDER_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/cache.h linuxppc64_2_4/include/asm-ppc64/cache.h --- ../kernel.org/linux/include/asm-ppc64/cache.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/cache.h Mon Nov 19 22:46:56 2001 @@ -0,0 +1,17 @@ +/* + * include/asm-ppc/cache.h + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef __ARCH_PPC64_CACHE_H +#define __ARCH_PPC64_CACHE_H + +#include + +/* bytes per L1 cache line */ +#define L1_CACHE_BYTES 128 + +#endif diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/checksum.h linuxppc64_2_4/include/asm-ppc64/checksum.h --- ../kernel.org/linux/include/asm-ppc64/checksum.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/checksum.h Thu Aug 2 16:01:21 2001 @@ -0,0 +1,115 @@ +#ifndef _PPC64_CHECKSUM_H +#define _PPC64_CHECKSUM_H + +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. ihl is the number + * of 32-bit words and is always >= 5. + */ +extern unsigned short ip_fast_csum(unsigned char * iph, unsigned int ihl); + +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented + */ +extern unsigned short csum_tcpudp_magic(unsigned long saddr, + unsigned long daddr, + unsigned short len, + unsigned short proto, + unsigned int sum); + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +extern unsigned int csum_partial(const unsigned char * buff, int len, + unsigned int sum); + +/* + * the same as csum_partial, but copies from src to dst while it + * checksums + */ +unsigned int csum_partial_copy(const char *src, char *dst, + int len, unsigned int sum); + +extern unsigned int csum_partial_copy_generic(const char *src, char *dst, + int len, unsigned int sum, + int *src_err, int *dst_err); +/* + * the same as csum_partial, but copies from user space. + */ + +unsigned int csum_partial_copy_fromuser(const char *src, + char *dst, + int len, + unsigned int sum, + int *src_err); + +unsigned int csum_partial_copy_nocheck(const char *src, + char *dst, + int len, + unsigned int sum); + +/* + * turns a 32-bit partial checksum (e.g. from csum_partial) into a + * 1's complement 16-bit checksum. + */ +static inline unsigned int csum_fold(unsigned int sum) +{ + unsigned int tmp; + + /* swap the two 16-bit halves of sum */ + __asm__("rlwinm %0,%1,16,0,31" : "=r" (tmp) : "r" (sum)); + /* if there is a carry from adding the two 16-bit halves, + it will carry from the lower half into the upper half, + giving us the correct sum in the upper half. */ + sum = ~(sum + tmp) >> 16; + return sum; +} + +/* + * this routine is used for miscellaneous IP-like checksums, mainly + * in icmp.c + */ +static inline unsigned short ip_compute_csum(unsigned char * buff, int len) +{ + return csum_fold(csum_partial(buff, len, 0)); +} + +#define csum_partial_copy_from_user(src, dst, len, sum, errp) \ + csum_partial_copy_generic((src), (dst), (len), (sum), (errp), 0) + +#define csum_partial_copy_nocheck(src, dst, len, sum) \ + csum_partial_copy_generic((src), (dst), (len), (sum), 0, 0) + +static inline u32 csum_tcpudp_nofold(u32 saddr, + u32 daddr, + unsigned short len, + unsigned short proto, + unsigned int sum) +{ + unsigned long s = sum; + + s += saddr; + s += daddr; + s += (proto << 16) + len; + s += (s >> 32); + return (u32) s; +} + +#endif diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/current.h linuxppc64_2_4/include/asm-ppc64/current.h --- ../kernel.org/linux/include/asm-ppc64/current.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/current.h Tue Oct 23 22:57:43 2001 @@ -0,0 +1,15 @@ +#ifndef _PPC64_CURRENT_H +#define _PPC64_CURRENT_H + +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Use r13 for current since the ppc64 ABI reserves it - Anton + */ + +register struct task_struct *current asm ("r13"); + +#endif /* !(_PPC64_CURRENT_H) */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/delay.h linuxppc64_2_4/include/asm-ppc64/delay.h --- ../kernel.org/linux/include/asm-ppc64/delay.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/delay.h Thu Aug 30 05:36:32 2001 @@ -0,0 +1,37 @@ +#ifndef _PPC64_DELAY_H +#define _PPC64_DELAY_H + +/* + * Copyright 1996, Paul Mackerras. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * PPC64 Support added by Dave Engebretsen, Todd Inglett, Mike Corrigan, + * Anton Blanchard. + */ + +#include +#include +#include + +extern unsigned long tb_ticks_per_usec; + +static inline void __delay(unsigned long loops) +{ + unsigned long start = get_tb(); + + while((get_tb()-start) < loops) + HMT_low(); +} + +static __inline__ void udelay(unsigned long usecs) +{ + unsigned long loops = tb_ticks_per_usec * usecs; + __delay(loops); + HMT_medium(); +} + +#endif /* _PPC64_DELAY_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/div64.h linuxppc64_2_4/include/asm-ppc64/div64.h --- ../kernel.org/linux/include/asm-ppc64/div64.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/div64.h Fri May 18 13:55:08 2001 @@ -0,0 +1,18 @@ +#ifndef __PPC_DIV64 +#define __PPC_DIV64 + +/* Copyright 2001 PPC64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define do_div(n,base) ({ \ + int __res; \ + __res = ((unsigned long) (n)) % (unsigned) (base); \ + (n) = ((unsigned long) (n)) / (unsigned) (base); \ + __res; }) + +#endif diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/dma.h linuxppc64_2_4/include/asm-ppc64/dma.h --- ../kernel.org/linux/include/asm-ppc64/dma.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/dma.h Fri Jul 13 00:11:30 2001 @@ -0,0 +1,326 @@ +/* + * linux/include/asm/dma.h: Defines for using and allocating dma channels. + * Written by Hennus Bergman, 1992. + * High DMA channel support & info by Hannu Savolainen + * and John Boyd, Nov. 1992. + * Changes for ppc sound by Christoph Nadig + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _ASM_DMA_H +#define _ASM_DMA_H + +#include +#include +#include +#include + +#ifndef MAX_DMA_CHANNELS +#define MAX_DMA_CHANNELS 8 +#endif + +/* The maximum address that we can perform a DMA transfer to on this platform */ +/* Doesn't really apply... */ +#define MAX_DMA_ADDRESS (~0UL) + +#define dma_outb outb +#define dma_inb inb + +/* + * NOTES about DMA transfers: + * + * controller 1: channels 0-3, byte operations, ports 00-1F + * controller 2: channels 4-7, word operations, ports C0-DF + * + * - ALL registers are 8 bits only, regardless of transfer size + * - channel 4 is not used - cascades 1 into 2. + * - channels 0-3 are byte - addresses/counts are for physical bytes + * - channels 5-7 are word - addresses/counts are for physical words + * - transfers must not cross physical 64K (0-3) or 128K (5-7) boundaries + * - transfer count loaded to registers is 1 less than actual count + * - controller 2 offsets are all even (2x offsets for controller 1) + * - page registers for 5-7 don't use data bit 0, represent 128K pages + * - page registers for 0-3 use bit 0, represent 64K pages + * + * On PReP, DMA transfers are limited to the lower 16MB of _physical_ memory. + * On CHRP, the W83C553F (and VLSI Tollgate?) support full 32 bit addressing. + * Note that addresses loaded into registers must be _physical_ addresses, + * not logical addresses (which may differ if paging is active). + * + * Address mapping for channels 0-3: + * + * A23 ... A16 A15 ... A8 A7 ... A0 (Physical addresses) + * | ... | | ... | | ... | + * | ... | | ... | | ... | + * | ... | | ... | | ... | + * P7 ... P0 A7 ... A0 A7 ... A0 + * | Page | Addr MSB | Addr LSB | (DMA registers) + * + * Address mapping for channels 5-7: + * + * A23 ... A17 A16 A15 ... A9 A8 A7 ... A1 A0 (Physical addresses) + * | ... | \ \ ... \ \ \ ... \ \ + * | ... | \ \ ... \ \ \ ... \ (not used) + * | ... | \ \ ... \ \ \ ... \ + * P7 ... P1 (0) A7 A6 ... A0 A7 A6 ... A0 + * | Page | Addr MSB | Addr LSB | (DMA registers) + * + * Again, channels 5-7 transfer _physical_ words (16 bits), so addresses + * and counts _must_ be word-aligned (the lowest address bit is _ignored_ at + * the hardware level, so odd-byte transfers aren't possible). + * + * Transfer count (_not # bytes_) is limited to 64K, represented as actual + * count - 1 : 64K => 0xFFFF, 1 => 0x0000. Thus, count is always 1 or more, + * and up to 128K bytes may be transferred on channels 5-7 in one operation. + * + */ + +/* 8237 DMA controllers */ +#define IO_DMA1_BASE 0x00 /* 8 bit slave DMA, channels 0..3 */ +#define IO_DMA2_BASE 0xC0 /* 16 bit master DMA, ch 4(=slave input)..7 */ + +/* DMA controller registers */ +#define DMA1_CMD_REG 0x08 /* command register (w) */ +#define DMA1_STAT_REG 0x08 /* status register (r) */ +#define DMA1_REQ_REG 0x09 /* request register (w) */ +#define DMA1_MASK_REG 0x0A /* single-channel mask (w) */ +#define DMA1_MODE_REG 0x0B /* mode register (w) */ +#define DMA1_CLEAR_FF_REG 0x0C /* clear pointer flip-flop (w) */ +#define DMA1_TEMP_REG 0x0D /* Temporary Register (r) */ +#define DMA1_RESET_REG 0x0D /* Master Clear (w) */ +#define DMA1_CLR_MASK_REG 0x0E /* Clear Mask */ +#define DMA1_MASK_ALL_REG 0x0F /* all-channels mask (w) */ + +#define DMA2_CMD_REG 0xD0 /* command register (w) */ +#define DMA2_STAT_REG 0xD0 /* status register (r) */ +#define DMA2_REQ_REG 0xD2 /* request register (w) */ +#define DMA2_MASK_REG 0xD4 /* single-channel mask (w) */ +#define DMA2_MODE_REG 0xD6 /* mode register (w) */ +#define DMA2_CLEAR_FF_REG 0xD8 /* clear pointer flip-flop (w) */ +#define DMA2_TEMP_REG 0xDA /* Temporary Register (r) */ +#define DMA2_RESET_REG 0xDA /* Master Clear (w) */ +#define DMA2_CLR_MASK_REG 0xDC /* Clear Mask */ +#define DMA2_MASK_ALL_REG 0xDE /* all-channels mask (w) */ + +#define DMA_ADDR_0 0x00 /* DMA address registers */ +#define DMA_ADDR_1 0x02 +#define DMA_ADDR_2 0x04 +#define DMA_ADDR_3 0x06 +#define DMA_ADDR_4 0xC0 +#define DMA_ADDR_5 0xC4 +#define DMA_ADDR_6 0xC8 +#define DMA_ADDR_7 0xCC + +#define DMA_CNT_0 0x01 /* DMA count registers */ +#define DMA_CNT_1 0x03 +#define DMA_CNT_2 0x05 +#define DMA_CNT_3 0x07 +#define DMA_CNT_4 0xC2 +#define DMA_CNT_5 0xC6 +#define DMA_CNT_6 0xCA +#define DMA_CNT_7 0xCE + +#define DMA_LO_PAGE_0 0x87 /* DMA page registers */ +#define DMA_LO_PAGE_1 0x83 +#define DMA_LO_PAGE_2 0x81 +#define DMA_LO_PAGE_3 0x82 +#define DMA_LO_PAGE_5 0x8B +#define DMA_LO_PAGE_6 0x89 +#define DMA_LO_PAGE_7 0x8A + +#define DMA_HI_PAGE_0 0x487 /* DMA page registers */ +#define DMA_HI_PAGE_1 0x483 +#define DMA_HI_PAGE_2 0x481 +#define DMA_HI_PAGE_3 0x482 +#define DMA_HI_PAGE_5 0x48B +#define DMA_HI_PAGE_6 0x489 +#define DMA_HI_PAGE_7 0x48A + +#define DMA1_EXT_REG 0x40B +#define DMA2_EXT_REG 0x4D6 + +#define DMA_MODE_READ 0x44 /* I/O to memory, no autoinit, increment, single mode */ +#define DMA_MODE_WRITE 0x48 /* memory to I/O, no autoinit, increment, single mode */ +#define DMA_MODE_CASCADE 0xC0 /* pass thru DREQ->HRQ, DACK<-HLDA only */ + +#define DMA_AUTOINIT 0x10 + +extern spinlock_t dma_spin_lock; + +static __inline__ unsigned long claim_dma_lock(void) +{ + unsigned long flags; + spin_lock_irqsave(&dma_spin_lock, flags); + return flags; +} + +static __inline__ void release_dma_lock(unsigned long flags) +{ + spin_unlock_irqrestore(&dma_spin_lock, flags); +} + +/* enable/disable a specific DMA channel */ +static __inline__ void enable_dma(unsigned int dmanr) +{ + unsigned char ucDmaCmd=0x00; + + if (dmanr != 4) + { + dma_outb(0, DMA2_MASK_REG); /* This may not be enabled */ + dma_outb(ucDmaCmd, DMA2_CMD_REG); /* Enable group */ + } + if (dmanr<=3) + { + dma_outb(dmanr, DMA1_MASK_REG); + dma_outb(ucDmaCmd, DMA1_CMD_REG); /* Enable group */ + } else + { + dma_outb(dmanr & 3, DMA2_MASK_REG); + } +} + +static __inline__ void disable_dma(unsigned int dmanr) +{ + if (dmanr<=3) + dma_outb(dmanr | 4, DMA1_MASK_REG); + else + dma_outb((dmanr & 3) | 4, DMA2_MASK_REG); +} + +/* Clear the 'DMA Pointer Flip Flop'. + * Write 0 for LSB/MSB, 1 for MSB/LSB access. + * Use this once to initialize the FF to a known state. + * After that, keep track of it. :-) + * --- In order to do that, the DMA routines below should --- + * --- only be used while interrupts are disabled! --- + */ +static __inline__ void clear_dma_ff(unsigned int dmanr) +{ + if (dmanr<=3) + dma_outb(0, DMA1_CLEAR_FF_REG); + else + dma_outb(0, DMA2_CLEAR_FF_REG); +} + +/* set mode (above) for a specific DMA channel */ +static __inline__ void set_dma_mode(unsigned int dmanr, char mode) +{ + if (dmanr<=3) + dma_outb(mode | dmanr, DMA1_MODE_REG); + else + dma_outb(mode | (dmanr&3), DMA2_MODE_REG); +} + +/* Set only the page register bits of the transfer address. + * This is used for successive transfers when we know the contents of + * the lower 16 bits of the DMA current address register, but a 64k boundary + * may have been crossed. + */ +static __inline__ void set_dma_page(unsigned int dmanr, int pagenr) +{ + switch(dmanr) { + case 0: + dma_outb(pagenr, DMA_LO_PAGE_0); + dma_outb(pagenr>>8, DMA_HI_PAGE_0); + break; + case 1: + dma_outb(pagenr, DMA_LO_PAGE_1); + dma_outb(pagenr>>8, DMA_HI_PAGE_1); + break; + case 2: + dma_outb(pagenr, DMA_LO_PAGE_2); + dma_outb(pagenr>>8, DMA_HI_PAGE_2); + break; + case 3: + dma_outb(pagenr, DMA_LO_PAGE_3); + dma_outb(pagenr>>8, DMA_HI_PAGE_3); + break; + case 5: + dma_outb(pagenr & 0xfe, DMA_LO_PAGE_5); + dma_outb(pagenr>>8, DMA_HI_PAGE_5); + break; + case 6: + dma_outb(pagenr & 0xfe, DMA_LO_PAGE_6); + dma_outb(pagenr>>8, DMA_HI_PAGE_6); + break; + case 7: + dma_outb(pagenr & 0xfe, DMA_LO_PAGE_7); + dma_outb(pagenr>>8, DMA_HI_PAGE_7); + break; + } +} + + +/* Set transfer address & page bits for specific DMA channel. + * Assumes dma flipflop is clear. + */ +static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int phys) +{ + if (dmanr <= 3) { + dma_outb( phys & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE ); + dma_outb( (phys>>8) & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE ); + } else { + dma_outb( (phys>>1) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE ); + dma_outb( (phys>>9) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE ); + } + set_dma_page(dmanr, phys>>16); +} + + +/* Set transfer size (max 64k for DMA1..3, 128k for DMA5..7) for + * a specific DMA channel. + * You must ensure the parameters are valid. + * NOTE: from a manual: "the number of transfers is one more + * than the initial word count"! This is taken into account. + * Assumes dma flip-flop is clear. + * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7. + */ +static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count) +{ + count--; + if (dmanr <= 3) { + dma_outb( count & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE ); + dma_outb( (count>>8) & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE ); + } else { + dma_outb( (count>>1) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE ); + dma_outb( (count>>9) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE ); + } +} + + +/* Get DMA residue count. After a DMA transfer, this + * should return zero. Reading this while a DMA transfer is + * still in progress will return unpredictable results. + * If called before the channel has been used, it may return 1. + * Otherwise, it returns the number of _bytes_ left to transfer. + * + * Assumes DMA flip-flop is clear. + */ +static __inline__ int get_dma_residue(unsigned int dmanr) +{ + unsigned int io_port = (dmanr<=3)? ((dmanr&3)<<1) + 1 + IO_DMA1_BASE + : ((dmanr&3)<<2) + 2 + IO_DMA2_BASE; + + /* using short to get 16-bit wrap around */ + unsigned short count; + + count = 1 + dma_inb(io_port); + count += dma_inb(io_port) << 8; + + return (dmanr <= 3)? count : (count<<1); +} + +/* These are in kernel/dma.c: */ +extern int request_dma(unsigned int dmanr, const char * device_id); /* reserve a DMA channel */ +extern void free_dma(unsigned int dmanr); /* release it again */ + +#ifdef CONFIG_PCI +extern int isa_dma_bridge_buggy; +#else +#define isa_dma_bridge_buggy (0) +#endif +#endif /* _ASM_DMA_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/eeh.h linuxppc64_2_4/include/asm-ppc64/eeh.h --- ../kernel.org/linux/include/asm-ppc64/eeh.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/eeh.h Tue Nov 13 10:47:33 2001 @@ -0,0 +1,150 @@ +/* + * eeh.h + * Copyright (C) 2001 Dave Engebretsen & Todd Inglett IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* Start Change Log + * 2001/10/27 : engebret : Created. + * End Change Log + */ + +#ifndef _EEH_H +#define _EEH_H + +struct pci_dev; + +#define IO_UNMAPPED_REGION_ID 0xaUL + +#define IO_TOKEN_TO_ADDR(token) ((((unsigned long)(token)) & 0xFFFFFFFF) | (0xEUL << 60)) +/* Flag bits encoded in the 3 unused function bits of devfn */ +#define EEH_TOKEN_DISABLED (1UL << 34UL) /* eeh is disabled for this token */ +#define IS_EEH_TOKEN_DISABLED(token) ((unsigned long)(token) & EEH_TOKEN_DISABLED) + +#define EEH_STATE_OVERRIDE 1 /* IOA does not require eeh traps */ +#define EEH_STATE_FAILURE 16 /* */ + +/* This is for profiling only and should be removed */ +extern unsigned long eeh_total_mmio_reads; +extern unsigned long eeh_total_mmio_ffs; + +void eeh_init(void); +int eeh_get_state(unsigned long ea); +unsigned long eeh_check_failure(void *token, unsigned long val); + +#define EEH_DISABLE 0 +#define EEH_ENABLE 1 +#define EEH_RELEASE_LOADSTORE 2 +#define EEH_RELEASE_DMA 3 +int eeh_set_option(struct pci_dev *dev, int options); + +/* Given a PCI device check if eeh should be configured or not. + * This may look at firmware properties and/or kernel cmdline options. + */ +int is_eeh_configured(struct pci_dev *dev); + +/* Generate an EEH token. + * The high nibble of the offset is cleared, otherwise bounds checking is performed. + * Use IO_TOKEN_TO_ADDR(token) to translate this token back to a mapped virtual addr. + * Do NOT do this to perform IO -- use the read/write macros! + */ +unsigned long eeh_token(unsigned long phb, + unsigned long bus, + unsigned long devfn, + unsigned long offset); + +extern void *memcpy(void *, const void *, unsigned long); +extern void *memset(void *,int, unsigned long); + +/* EEH_POSSIBLE_ERROR() -- test for possible MMIO failure. + * + * Order this macro for performance. + * If EEH is off for a device and it is a memory BAR, ioremap will + * map it to the IOREGION. In this case addr == vaddr and since these + * should be in registers we compare them first. Next we check for + * all ones which is perhaps fastest as ~val. Finally we weed out + * EEH disabled IO BARs. + * + * If this macro yields TRUE, the caller relays to eeh_check_failure() + * which does further tests out of line. + */ +/* #define EEH_POSSIBLE_ERROR(addr, vaddr, val) ((vaddr) != (addr) && ~(val) == 0 && !IS_EEH_TOKEN_DISABLED(addr)) */ +/* This version is rearranged to collect some profiling data */ +#define EEH_POSSIBLE_ERROR(addr, vaddr, val) (++eeh_total_mmio_reads, (~(val) == 0 && (++eeh_total_mmio_ffs, (vaddr) != (addr) && !IS_EEH_TOKEN_DISABLED(addr)))) + +/* + * MMIO read/write operations with EEH support. + * + * addr: 64b token of the form 0xA0PPBBDDyyyyyyyy + * 0xA0 : Unmapped MMIO region + * PP : PHB index (starting at zero) + * BB : PCI Bus number under given PHB + * DD : PCI devfn under given bus + * yyyyyyyy : Virtual address offset + * + * An actual virtual address is produced from this token + * by masking into the form: + * 0xE0000000yyyyyyyy + */ +static inline u8 eeh_readb(void *addr) { + volatile u8 *vaddr = (volatile u8 *)IO_TOKEN_TO_ADDR(addr); + u8 val = in_8(vaddr); + if (EEH_POSSIBLE_ERROR(addr, vaddr, val)) + return eeh_check_failure(addr, val); + return val; +} +static inline void eeh_writeb(u8 val, void *addr) { + volatile u8 *vaddr = (volatile u8 *)IO_TOKEN_TO_ADDR(addr); + out_8(vaddr, val); +} +static inline u16 eeh_readw(void *addr) { + volatile u16 *vaddr = (volatile u16 *)IO_TOKEN_TO_ADDR(addr); + u16 val = in_le16(vaddr); + if (EEH_POSSIBLE_ERROR(addr, vaddr, val)) + return eeh_check_failure(addr, val); + return val; +} +static inline void eeh_writew(u16 val, void *addr) { + volatile u16 *vaddr = (volatile u16 *)IO_TOKEN_TO_ADDR(addr); + out_le16(vaddr, val); +} +static inline u32 eeh_readl(void *addr) { + volatile u32 *vaddr = (volatile u32 *)IO_TOKEN_TO_ADDR(addr); + u32 val = in_le32(vaddr); + if (EEH_POSSIBLE_ERROR(addr, vaddr, val)) + return eeh_check_failure(addr, val); + return val; +} +static inline void eeh_writel(u32 val, void *addr) { + volatile u32 *vaddr = (volatile u32 *)IO_TOKEN_TO_ADDR(addr); + out_le32(vaddr, val); +} + +static inline void eeh_memset_io(void *addr, int c, unsigned long n) { + void *vaddr = (void *)IO_TOKEN_TO_ADDR(addr); + memset(vaddr, c, n); +} +static inline void eeh_memcpy_fromio(void *dest, void *src, unsigned long n) { + void *vsrc = (void *)IO_TOKEN_TO_ADDR(src); + memcpy(dest, vsrc, n); + /* look for ffff's here at dest[n] */ +} +static inline void eeh_memcpy_toio(void *dest, void *src, unsigned long n) { + void *vdest = (void *)IO_TOKEN_TO_ADDR(dest); + memcpy(vdest, src, n); +} + +#endif /* _EEH_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/elf.h linuxppc64_2_4/include/asm-ppc64/elf.h --- ../kernel.org/linux/include/asm-ppc64/elf.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/elf.h Thu Oct 18 04:28:48 2001 @@ -0,0 +1,140 @@ +#ifndef __PPC64_ELF_H +#define __PPC64_ELF_H + +/* + * ELF register definitions.. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include + +#define ELF_NGREG 48 /* includes nip, msr, lr, etc. */ +#define ELF_NFPREG 33 /* includes fpscr */ +#define ELF_NVRREG 33 /* includes vscr */ + +typedef unsigned long elf_greg_t64; +typedef elf_greg_t64 elf_gregset_t64[ELF_NGREG]; + +typedef unsigned int elf_greg_t32; +typedef elf_greg_t32 elf_gregset_t32[ELF_NGREG]; + +/* + * These are used to set parameters in the core dumps. + */ +#ifndef ELF_ARCH +# define ELF_ARCH EM_PPC64 +# define ELF_CLASS ELFCLASS64 +# define ELF_DATA ELFDATA2MSB + typedef elf_greg_t64 elf_greg_t; + typedef elf_gregset_t64 elf_gregset_t; +# define elf_addr_t unsigned long +# define elf_caddr_t char * +#else + /* Assumption: ELF_ARCH == EM_PPC and ELF_CLASS == ELFCLASS32 */ + typedef elf_greg_t32 elf_greg_t; + typedef elf_gregset_t32 elf_gregset_t; +# define elf_addr_t u32 +# define elf_caddr_t u32 +#endif + +typedef double elf_fpreg_t; +typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; + +/* + * This is used to ensure we don't load something for the wrong architecture. + */ +#define elf_check_arch(x) ((x)->e_machine == ELF_ARCH) + +#define USE_ELF_CORE_DUMP +#define ELF_EXEC_PAGESIZE 4096 + +/* This is the location that an ET_DYN program is loaded if exec'ed. Typical + use of this is to invoke "./ld.so someprog" to test out a new version of + the loader. We need to make sure that it is out of the way of the program + that it will "exec", and that there is sufficient room for the brk. */ + +#define ELF_ET_DYN_BASE (0x08000000) + +/* Common routine for both 32-bit and 64-bit processes */ +#define ELF_CORE_COPY_REGS(gregs, regs) elf_core_copy_regs(gregs, regs); +static inline void +elf_core_copy_regs(elf_gregset_t dstRegs, struct pt_regs* srcRegs) +{ + int i; + + int numGPRS = ((sizeof(struct pt_regs)/sizeof(elf_greg_t64)) < ELF_NGREG) ? (sizeof(struct pt_regs)/sizeof(elf_greg_t64)) : ELF_NGREG; + + for (i=0; i < numGPRS; i++) + dstRegs[i] = (elf_greg_t)((elf_greg_t64 *)srcRegs)[i]; +} + +/* This yields a mask that user programs can use to figure out what + instruction set this cpu supports. This could be done in userspace, + but it's not easy, and we've already done it here. */ + +#define ELF_HWCAP (0) + +/* This yields a string that ld.so will use to load implementation + specific libraries for optimization. This is more specific in + intent than poking at uname or /proc/cpuinfo. + + For the moment, we have only optimizations for the Intel generations, + but that could change... */ + +#define ELF_PLATFORM (NULL) + +#ifdef __KERNEL__ +#define SET_PERSONALITY(ex, ibcs2) \ +do { if ((ex).e_ident[EI_CLASS] == ELFCLASS32) \ + current->thread.flags |= PPC_FLAG_32BIT; \ + else \ + current->thread.flags &= ~PPC_FLAG_32BIT; \ + if (ibcs2) \ + set_personality(PER_SVR4); \ + else if (current->personality != PER_LINUX32) \ + set_personality(PER_LINUX); \ +} while (0) +#endif + +/* + * We need to put in some extra aux table entries to tell glibc what + * the cache block size is, so it can use the dcbz instruction safely. + */ +#define AT_DCACHEBSIZE 19 +#define AT_ICACHEBSIZE 20 +#define AT_UCACHEBSIZE 21 +/* A special ignored type value for PPC, for glibc compatibility. */ +#define AT_IGNOREPPC 22 + +extern int dcache_bsize; +extern int icache_bsize; +extern int ucache_bsize; + +/* + * The requirements here are: + * - keep the final alignment of sp (sp & 0xf) + * - make sure the 32-bit value at the first 16 byte aligned position of + * AUXV is greater than 16 for glibc compatibility. + * AT_IGNOREPPC is used for that. + * - for compatibility with glibc ARCH_DLINFO must always be defined on PPC, + * even if DLINFO_ARCH_ITEMS goes to zero or is undefined. + */ +#define DLINFO_ARCH_ITEMS 3 +#define ARCH_DLINFO \ +do { \ + sp -= DLINFO_ARCH_ITEMS * 2; \ + NEW_AUX_ENT(0, AT_DCACHEBSIZE, dcache_bsize); \ + NEW_AUX_ENT(1, AT_ICACHEBSIZE, icache_bsize); \ + NEW_AUX_ENT(2, AT_UCACHEBSIZE, ucache_bsize); \ + /* \ + * Now handle glibc compatibility. \ + */ \ + sp -= 2*2; \ + NEW_AUX_ENT(0, AT_IGNOREPPC, AT_IGNOREPPC); \ + NEW_AUX_ENT(1, AT_IGNOREPPC, AT_IGNOREPPC); \ + } while (0) + +#endif /* __PPC64_ELF_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/errno.h linuxppc64_2_4/include/asm-ppc64/errno.h --- ../kernel.org/linux/include/asm-ppc64/errno.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/errno.h Mon May 7 13:19:28 2001 @@ -0,0 +1,145 @@ +#ifndef _PPC64_ERRNO_H +#define _PPC64_ERRNO_H + +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define EPERM 1 /* Operation not permitted */ +#define ENOENT 2 /* No such file or directory */ +#define ESRCH 3 /* No such process */ +#define EINTR 4 /* Interrupted system call */ +#define EIO 5 /* I/O error */ +#define ENXIO 6 /* No such device or address */ +#define E2BIG 7 /* Arg list too long */ +#define ENOEXEC 8 /* Exec format error */ +#define EBADF 9 /* Bad file number */ +#define ECHILD 10 /* No child processes */ +#define EAGAIN 11 /* Try again */ +#define ENOMEM 12 /* Out of memory */ +#define EACCES 13 /* Permission denied */ +#define EFAULT 14 /* Bad address */ +#define ENOTBLK 15 /* Block device required */ +#define EBUSY 16 /* Device or resource busy */ +#define EEXIST 17 /* File exists */ +#define EXDEV 18 /* Cross-device link */ +#define ENODEV 19 /* No such device */ +#define ENOTDIR 20 /* Not a directory */ +#define EISDIR 21 /* Is a directory */ +#define EINVAL 22 /* Invalid argument */ +#define ENFILE 23 /* File table overflow */ +#define EMFILE 24 /* Too many open files */ +#define ENOTTY 25 /* Not a typewriter */ +#define ETXTBSY 26 /* Text file busy */ +#define EFBIG 27 /* File too large */ +#define ENOSPC 28 /* No space left on device */ +#define ESPIPE 29 /* Illegal seek */ +#define EROFS 30 /* Read-only file system */ +#define EMLINK 31 /* Too many links */ +#define EPIPE 32 /* Broken pipe */ +#define EDOM 33 /* Math argument out of domain of func */ +#define ERANGE 34 /* Math result not representable */ +#define EDEADLK 35 /* Resource deadlock would occur */ +#define ENAMETOOLONG 36 /* File name too long */ +#define ENOLCK 37 /* No record locks available */ +#define ENOSYS 38 /* Function not implemented */ +#define ENOTEMPTY 39 /* Directory not empty */ +#define ELOOP 40 /* Too many symbolic links encountered */ +#define EWOULDBLOCK EAGAIN /* Operation would block */ +#define ENOMSG 42 /* No message of desired type */ +#define EIDRM 43 /* Identifier removed */ +#define ECHRNG 44 /* Channel number out of range */ +#define EL2NSYNC 45 /* Level 2 not synchronized */ +#define EL3HLT 46 /* Level 3 halted */ +#define EL3RST 47 /* Level 3 reset */ +#define ELNRNG 48 /* Link number out of range */ +#define EUNATCH 49 /* Protocol driver not attached */ +#define ENOCSI 50 /* No CSI structure available */ +#define EL2HLT 51 /* Level 2 halted */ +#define EBADE 52 /* Invalid exchange */ +#define EBADR 53 /* Invalid request descriptor */ +#define EXFULL 54 /* Exchange full */ +#define ENOANO 55 /* No anode */ +#define EBADRQC 56 /* Invalid request code */ +#define EBADSLT 57 /* Invalid slot */ +#define EDEADLOCK 58 /* File locking deadlock error */ +#define EBFONT 59 /* Bad font file format */ +#define ENOSTR 60 /* Device not a stream */ +#define ENODATA 61 /* No data available */ +#define ETIME 62 /* Timer expired */ +#define ENOSR 63 /* Out of streams resources */ +#define ENONET 64 /* Machine is not on the network */ +#define ENOPKG 65 /* Package not installed */ +#define EREMOTE 66 /* Object is remote */ +#define ENOLINK 67 /* Link has been severed */ +#define EADV 68 /* Advertise error */ +#define ESRMNT 69 /* Srmount error */ +#define ECOMM 70 /* Communication error on send */ +#define EPROTO 71 /* Protocol error */ +#define EMULTIHOP 72 /* Multihop attempted */ +#define EDOTDOT 73 /* RFS specific error */ +#define EBADMSG 74 /* Not a data message */ +#define EOVERFLOW 75 /* Value too large for defined data type */ +#define ENOTUNIQ 76 /* Name not unique on network */ +#define EBADFD 77 /* File descriptor in bad state */ +#define EREMCHG 78 /* Remote address changed */ +#define ELIBACC 79 /* Can not access a needed shared library */ +#define ELIBBAD 80 /* Accessing a corrupted shared library */ +#define ELIBSCN 81 /* .lib section in a.out corrupted */ +#define ELIBMAX 82 /* Attempting to link in too many shared libraries */ +#define ELIBEXEC 83 /* Cannot exec a shared library directly */ +#define EILSEQ 84 /* Illegal byte sequence */ +#define ERESTART 85 /* Interrupted system call should be restarted */ +#define ESTRPIPE 86 /* Streams pipe error */ +#define EUSERS 87 /* Too many users */ +#define ENOTSOCK 88 /* Socket operation on non-socket */ +#define EDESTADDRREQ 89 /* Destination address required */ +#define EMSGSIZE 90 /* Message too long */ +#define EPROTOTYPE 91 /* Protocol wrong type for socket */ +#define ENOPROTOOPT 92 /* Protocol not available */ +#define EPROTONOSUPPORT 93 /* Protocol not supported */ +#define ESOCKTNOSUPPORT 94 /* Socket type not supported */ +#define EOPNOTSUPP 95 /* Operation not supported on transport endpoint */ +#define EPFNOSUPPORT 96 /* Protocol family not supported */ +#define EAFNOSUPPORT 97 /* Address family not supported by protocol */ +#define EADDRINUSE 98 /* Address already in use */ +#define EADDRNOTAVAIL 99 /* Cannot assign requested address */ +#define ENETDOWN 100 /* Network is down */ +#define ENETUNREACH 101 /* Network is unreachable */ +#define ENETRESET 102 /* Network dropped connection because of reset */ +#define ECONNABORTED 103 /* Software caused connection abort */ +#define ECONNRESET 104 /* Connection reset by peer */ +#define ENOBUFS 105 /* No buffer space available */ +#define EISCONN 106 /* Transport endpoint is already connected */ +#define ENOTCONN 107 /* Transport endpoint is not connected */ +#define ESHUTDOWN 108 /* Cannot send after transport endpoint shutdown */ +#define ETOOMANYREFS 109 /* Too many references: cannot splice */ +#define ETIMEDOUT 110 /* Connection timed out */ +#define ECONNREFUSED 111 /* Connection refused */ +#define EHOSTDOWN 112 /* Host is down */ +#define EHOSTUNREACH 113 /* No route to host */ +#define EALREADY 114 /* Operation already in progress */ +#define EINPROGRESS 115 /* Operation now in progress */ +#define ESTALE 116 /* Stale NFS file handle */ +#define EUCLEAN 117 /* Structure needs cleaning */ +#define ENOTNAM 118 /* Not a XENIX named type file */ +#define ENAVAIL 119 /* No XENIX semaphores available */ +#define EISNAM 120 /* Is a named type file */ +#define EREMOTEIO 121 /* Remote I/O error */ +#define EDQUOT 122 /* Quota exceeded */ + +#define ENOMEDIUM 123 /* No medium found */ +#define EMEDIUMTYPE 124 /* Wrong medium type */ + +/* Should never be seen by user programs */ +#define ERESTARTSYS 512 +#define ERESTARTNOINTR 513 +#define ERESTARTNOHAND 514 /* restart if no handler.. */ +#define ENOIOCTLCMD 515 /* No ioctl command */ + +#define _LAST_ERRNO 515 + +#endif diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/fcntl.h linuxppc64_2_4/include/asm-ppc64/fcntl.h --- ../kernel.org/linux/include/asm-ppc64/fcntl.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/fcntl.h Mon May 7 13:19:28 2001 @@ -0,0 +1,110 @@ +#ifndef _PPC64_FCNTL_H +#define _PPC64_FCNTL_H + +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* open/fcntl - O_SYNC is only implemented on blocks devices and on files + located on an ext2 file system */ +#define O_ACCMODE 0003 +#define O_RDONLY 00 +#define O_WRONLY 01 +#define O_RDWR 02 +#define O_CREAT 0100 /* not fcntl */ +#define O_EXCL 0200 /* not fcntl */ +#define O_NOCTTY 0400 /* not fcntl */ +#define O_TRUNC 01000 /* not fcntl */ +#define O_APPEND 02000 +#define O_NONBLOCK 04000 +#define O_NDELAY O_NONBLOCK +#define O_SYNC 010000 +#define FASYNC 020000 /* fcntl, for BSD compatibility */ +#define O_DIRECTORY 040000 /* must be a directory */ +#define O_NOFOLLOW 0100000 /* don't follow links */ +#define O_LARGEFILE 0200000 +#define O_DIRECT 0400000 /* direct disk access hint - currently ignored */ + +#define F_DUPFD 0 /* dup */ +#define F_GETFD 1 /* get close_on_exec */ +#define F_SETFD 2 /* set/clear close_on_exec */ +#define F_GETFL 3 /* get file->f_flags */ +#define F_SETFL 4 /* set file->f_flags */ +#define F_GETLK 5 +#define F_SETLK 6 +#define F_SETLKW 7 + +#define F_SETOWN 8 /* for sockets. */ +#define F_GETOWN 9 /* for sockets. */ +#define F_SETSIG 10 /* for sockets. */ +#define F_GETSIG 11 /* for sockets. */ + +#define F_GETLK64 12 /* using 'struct flock64' */ +#define F_SETLK64 13 +#define F_SETLKW64 14 + +/* for F_[GET|SET]FL */ +#define FD_CLOEXEC 1 /* actually anything with low bit set goes */ + +/* for posix fcntl() and lockf() */ +#define F_RDLCK 0 +#define F_WRLCK 1 +#define F_UNLCK 2 + +/* for old implementation of bsd flock () */ +#define F_EXLCK 4 /* or 3 */ +#define F_SHLCK 8 /* or 4 */ + +/* for leases */ +#define F_INPROGRESS 16 + +/* operations for bsd flock(), also used by the kernel implementation */ +#define LOCK_SH 1 /* shared lock */ +#define LOCK_EX 2 /* exclusive lock */ +#define LOCK_NB 4 /* or'd with one of the above to prevent + blocking */ +#define LOCK_UN 8 /* remove lock */ + +#define LOCK_MAND 32 /* This is a mandatory flock */ +#define LOCK_READ 64 /* ... Which allows concurrent read operations */ +#define LOCK_WRITE 128 /* ... Which allows concurrent write operations */ +#define LOCK_RW 192 /* ... Which allows concurrent read & write ops */ + +#ifdef __KERNEL__ +#define F_POSIX 1 +#define F_FLOCK 2 +#define F_BROKEN 4 /* broken flock() emulation */ +#endif /* __KERNEL__ */ + +struct flock { + short l_type; + short l_whence; + off_t l_start; + off_t l_len; + pid_t l_pid; +}; + +#ifdef __KERNEL__ +struct flock32 { + short l_type; + short l_whence; + __kernel_off_t32 l_start; + __kernel_off_t32 l_len; + __kernel_pid_t32 l_pid; + short __unused; +}; +#endif + +struct flock64 { + short l_type; + short l_whence; + loff_t l_start; + loff_t l_len; + pid_t l_pid; +}; + +#define F_LINUX_SPECIFIC_BASE 1024 +#endif /* _PPC64_FCNTL_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/flight_recorder.h linuxppc64_2_4/include/asm-ppc64/flight_recorder.h --- ../kernel.org/linux/include/asm-ppc64/flight_recorder.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/flight_recorder.h Fri Aug 3 18:43:39 2001 @@ -0,0 +1,56 @@ +/************************************************************************ + * flight_recorder.h * + ************************************************************************ + * This code supports the a generic flight recorder. * + * Copyright (C) 20yy * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the: * + * Free Software Foundation, Inc., * + * 59 Temple Place, Suite 330, * + * Boston, MA 02111-1307 USA * + ************************************************************************ + * See the fight_recorder.c file for useage deails. * + ************************************************************************/ +#include +#include + +/************************************************************************ + * Generic Flight Recorder Structure * + ************************************************************************/ +struct flightRecorder { /* Structure Defination */ + char Signature[8]; /* Eye Catcher */ + int Size; /* Size of Flight Recorder */ + int Flags; /* Format Flags. */ + char* StartPointer; /* Buffer Starting Address */ + char* EndPointer; /* Buffer Ending Address */ + char* NextPointer; /* Next Entry Address */ + char* WrapPointer; /* Point at which buffer wraps */ + char* Buffer; /* Where the data log is. */ +}; +typedef struct flightRecorder FlightRecorder; + +/************************************************************************ + * Forware declares + ************************************************************************/ +FlightRecorder* alloc_Flight_Recorder(FlightRecorder* FrPtr, char* Signature, int SizeOfFr); +void fr_Log_Entry(FlightRecorder* LogFr, const char *fmt, ...); +int fr_Dump(FlightRecorder* Fr, char *Buffer, int BufferLen); + +/************************************************************************ + * Sample Macro to make life easier using the flight_recorder. + * TestFr is a global value. + * To use them: TESTFR("Test Loop value is &d",Loop"); + ************************************************************************/ +#define LOGFR(...) (fr_Log_Entry(__VA_ARGS__)) + diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/floppy.h linuxppc64_2_4/include/asm-ppc64/floppy.h --- ../kernel.org/linux/include/asm-ppc64/floppy.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/floppy.h Thu Oct 25 20:35:19 2001 @@ -0,0 +1,105 @@ +/* + * Architecture specific parts of the Floppy driver + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1995 + */ +#ifndef __ASM_PPC64_FLOPPY_H +#define __ASM_PPC64_FLOPPY_H + +#define fd_inb(port) inb_p(port) +#define fd_outb(port,value) outb_p(port,value) + +#define fd_enable_dma() enable_dma(FLOPPY_DMA) +#define fd_disable_dma() disable_dma(FLOPPY_DMA) +#define fd_request_dma() request_dma(FLOPPY_DMA,"floppy") +#define fd_free_dma() free_dma(FLOPPY_DMA) +#define fd_clear_dma_ff() clear_dma_ff(FLOPPY_DMA) +#define fd_set_dma_mode(mode) set_dma_mode(FLOPPY_DMA,mode) +#define fd_set_dma_count(count) set_dma_count(FLOPPY_DMA,count) +#define fd_enable_irq() enable_irq(FLOPPY_IRQ) +#define fd_disable_irq() disable_irq(FLOPPY_IRQ) +#define fd_cacheflush(addr,size) /* nothing */ +#define fd_request_irq() request_irq(FLOPPY_IRQ, floppy_interrupt, \ + SA_INTERRUPT|SA_SAMPLE_RANDOM, \ + "floppy", NULL) +#define fd_free_irq() free_irq(FLOPPY_IRQ, NULL); + +#ifdef CONFIG_PCI + +#include + +#define fd_dma_setup(addr,size,mode,io) ppc64_fd_dma_setup(addr,size,mode,io) + +extern struct pci_dev *ppc64_floppy_dev; + +static __inline__ int +ppc64_fd_dma_setup(char *addr, unsigned long size, int mode, int io) +{ + static unsigned long prev_size; + static dma_addr_t bus_addr = 0; + static char *prev_addr; + static int prev_dir; + int dir; + + dir = (mode == DMA_MODE_READ) ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE; + + if (bus_addr + && (addr != prev_addr || size != prev_size || dir != prev_dir)) { + /* different from last time -- unmap prev */ + pci_unmap_single(ppc64_floppy_dev, bus_addr, prev_size, prev_dir); + bus_addr = 0; + } + + if (!bus_addr) /* need to map it */ { + bus_addr = pci_map_single(ppc64_floppy_dev, addr, size, dir); + } + + /* remember this one as prev */ + prev_addr = addr; + prev_size = size; + prev_dir = dir; + + fd_clear_dma_ff(); + fd_cacheflush(addr, size); + fd_set_dma_mode(mode); + set_dma_addr(FLOPPY_DMA, bus_addr); + fd_set_dma_count(size); + virtual_dma_port = io; + fd_enable_dma(); + + return 0; +} + +#endif /* CONFIG_PCI */ + +__inline__ void virtual_dma_init(void) +{ + /* Nothing to do on PowerPC */ +} + +static int FDC1 = 0x3f0; +static int FDC2 = -1; + +/* + * Again, the CMOS information not available + */ +#define FLOPPY0_TYPE 6 +#define FLOPPY1_TYPE 0 + +#define N_FDC 2 /* Don't change this! */ +#define N_DRIVE 8 + +#define FLOPPY_MOTOR_MASK 0xf0 + +/* + * The PowerPC has no problems with floppy DMA crossing 64k borders. + */ +#define CROSS_64KB(a,s) (0) + +#define EXTRA_FLOPPY_PARAMS + +#endif /* __ASM_PPC64_FLOPPY_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/hardirq.h linuxppc64_2_4/include/asm-ppc64/hardirq.h --- ../kernel.org/linux/include/asm-ppc64/hardirq.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/hardirq.h Tue Sep 18 14:25:47 2001 @@ -0,0 +1,100 @@ +#ifdef __KERNEL__ +#ifndef __ASM_HARDIRQ_H +#define __ASM_HARDIRQ_H + +/* + * Use a brlock for the global irq lock, based on sparc64. + * Anton Blanchard + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include + + +typedef struct { + unsigned long __softirq_pending; +#ifndef CONFIG_SMP + unsigned int __local_irq_count; +#else + unsigned int __unused_on_SMP; /* We use brlocks on SMP */ +#endif + unsigned int __local_bh_count; + unsigned int __syscall_count; + unsigned long __unused; + struct task_struct * __ksoftirqd_task; +} ____cacheline_aligned irq_cpustat_t; + +#include /* Standard mappings for irq_cpustat_t above */ +/* Note that local_irq_count() is replaced by ppc64 specific version for SMP */ + +#ifndef CONFIG_SMP +#define irq_enter(cpu) (local_irq_count(cpu)++) +#define irq_exit(cpu) (local_irq_count(cpu)--) +#else +#undef local_irq_count +#define local_irq_count(cpu) (__brlock_array[cpu][BR_GLOBALIRQ_LOCK]) +#define irq_enter(cpu) br_read_lock(BR_GLOBALIRQ_LOCK) +#define irq_exit(cpu) br_read_unlock(BR_GLOBALIRQ_LOCK) +#endif + +/* + * Are we in an interrupt context? Either doing bottom half + * or hardware interrupt processing? + */ +#define in_interrupt() ({ int __cpu = smp_processor_id(); \ + (local_irq_count(__cpu) + local_bh_count(__cpu) != 0); }) + +/* This tests only the local processors hw IRQ context disposition. */ +#define in_irq() (local_irq_count(smp_processor_id()) != 0) + +#ifndef CONFIG_SMP + +#define hardirq_trylock(cpu) (local_irq_count(cpu) == 0) +#define hardirq_endlock(cpu) do { } while (0) + +#define synchronize_irq() barrier() + +#else /* CONFIG_SMP */ + +static __inline__ int irqs_running(void) +{ + int i; + + for (i = 0; i < smp_num_cpus; i++) + if (local_irq_count(cpu_logical_map(i))) + return 1; + return 0; +} + +extern unsigned char global_irq_holder; + +static inline void release_irqlock(int cpu) +{ + /* if we didn't own the irq lock, just ignore... */ + if(global_irq_holder == (unsigned char) cpu) { + global_irq_holder = NO_PROC_ID; + br_write_unlock(BR_GLOBALIRQ_LOCK); + } +} + +static inline int hardirq_trylock(int cpu) +{ + spinlock_t *lock = &__br_write_locks[BR_GLOBALIRQ_LOCK].lock; + + return (!local_irq_count(cpu) && !spin_is_locked(lock)); +} + +#define hardirq_endlock(cpu) do { (void)(cpu); } while (0) + +extern void synchronize_irq(void); + +#endif /* CONFIG_SMP */ + +#endif /* __KERNEL__ */ +#endif /* __ASM_HARDIRQ_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/hdreg.h linuxppc64_2_4/include/asm-ppc64/hdreg.h --- ../kernel.org/linux/include/asm-ppc64/hdreg.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/hdreg.h Mon May 7 13:21:44 2001 @@ -0,0 +1,22 @@ +/* + * linux/include/asm-ppc/hdreg.h + * + * Copyright (C) 1994-1996 Linus Torvalds & authors + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* + * This file contains the ppc architecture specific IDE code. + */ + +#ifndef __ASMPPC64_HDREG_H +#define __ASMPPC64_HDREG_H + +typedef unsigned short ide_ioreg_t; + +#endif /* __ASMPPC64_HDREG_H */ + diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/hw_irq.h linuxppc64_2_4/include/asm-ppc64/hw_irq.h --- ../kernel.org/linux/include/asm-ppc64/hw_irq.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/hw_irq.h Tue Sep 25 12:26:45 2001 @@ -0,0 +1,80 @@ +/* + * Copyright (C) 1999 Cort Dougan + * + * Use inline IRQs where possible - Anton Blanchard + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifdef __KERNEL__ +#ifndef _PPC64_HW_IRQ_H +#define _PPC64_HW_IRQ_H + +#include +#include + +int timer_interrupt(struct pt_regs *); + +#ifdef CONFIG_PPC_ISERIES + +extern void __no_use_sti(void); +extern void __no_use_cli(void); +extern void __no_use_restore_flags(unsigned long); +extern unsigned long __no_use_save_flags(void); +extern void __no_use_set_lost(unsigned long); +extern void __no_lpq_restore_flags(unsigned long); + +#define __cli() __no_use_cli() +#define __sti() __no_use_sti() +#define __save_flags(flags) ((flags) = __no_use_save_flags()) +#define __restore_flags(flags) __no_use_restore_flags((unsigned long)flags) +#define __save_and_cli(flags) ({__save_flags(flags);__cli();}) + +#else + +#define __save_flags(flags) ((flags) = mfmsr()) +#define __restore_flags(flags) mtmsrd(flags) + +static inline void __cli(void) +{ + unsigned long msr; + msr = mfmsr(); + mtmsrd(msr & ~MSR_EE); + __asm__ __volatile__("": : :"memory"); +} + +static inline void __sti(void) +{ + unsigned long msr; + __asm__ __volatile__("": : :"memory"); + msr = mfmsr(); + mtmsrd(msr | MSR_EE); +} + +static inline void __do_save_and_cli(unsigned long *flags) +{ + unsigned long msr; + msr = mfmsr(); + *flags = msr; + mtmsrd(msr & ~MSR_EE); + __asm__ __volatile__("": : :"memory"); +} + +#define __save_and_cli(flags) __do_save_and_cli(&flags) + +#endif /* CONFIG_PPC_ISERIES */ + +#define mask_irq(irq) ({if (irq_desc[irq].handler && irq_desc[irq].handler->disable) irq_desc[irq].handler->disable(irq);}) +#define unmask_irq(irq) ({if (irq_desc[irq].handler && irq_desc[irq].handler->enable) irq_desc[irq].handler->enable(irq);}) +#define ack_irq(irq) ({if (irq_desc[irq].handler && irq_desc[irq].handler->ack) irq_desc[irq].handler->ack(irq);}) + +/* Should we handle this via lost interrupts and IPIs or should we don't care like + * we do now ? --BenH. + */ +struct hw_interrupt_type; +static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {} + +#endif /* _PPC64_HW_IRQ_H */ +#endif /* __KERNEL__ */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/HvCall.h linuxppc64_2_4/include/asm-ppc64/iSeries/HvCall.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/HvCall.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/HvCall.h Fri Aug 17 13:03:16 2001 @@ -0,0 +1,209 @@ +/* + * HvCall.h + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +//=========================================================================== +// +// This file contains the "hypervisor call" interface which is used to +// drive the hypervisor from the OS. +// +//=========================================================================== + +//------------------------------------------------------------------- +// Standard Includes +//------------------------------------------------------------------- +#ifndef _HVCALLSC_H +#include "HvCallSc.h" +#endif + +#ifndef _HVTYPES_H +#include +#endif + +#include + +//------------------------------------------------------------------- +// Constants +//------------------------------------------------------------------- +#ifndef _HVCALL_H +#define _HVCALL_H +/* +enum HvCall_ReturnCode +{ + HvCall_Good = 0, + HvCall_Partial = 1, + HvCall_NotOwned = 2, + HvCall_NotFreed = 3, + HvCall_UnspecifiedError = 4 +}; + +enum HvCall_TypeOfSIT +{ + HvCall_ReduceOnly = 0, + HvCall_Unconditional = 1 +}; + +enum HvCall_TypeOfYield +{ + HvCall_YieldTimed = 0, // Yield until specified time + HvCall_YieldToActive = 1, // Yield until all active procs have run + HvCall_YieldToProc = 2 // Yield until the specified processor has run +}; + +enum HvCall_InterruptMasks +{ + HvCall_MaskIPI = 0x00000001, + HvCall_MaskLpEvent = 0x00000002, + HvCall_MaskLpProd = 0x00000004, + HvCall_MaskTimeout = 0x00000008 +}; + +enum HvCall_VaryOffChunkRc +{ + HvCall_VaryOffSucceeded = 0, + HvCall_VaryOffWithdrawn = 1, + HvCall_ChunkInLoadArea = 2, + HvCall_ChunkInHPT = 3, + HvCall_ChunkNotAccessible = 4, + HvCall_ChunkInUse = 5 +}; +*/ + +/* Type of yield for HvCallBaseYieldProcessor */ +#define HvCall_YieldTimed 0 // Yield until specified time (tb) +#define HvCall_YieldToActive 1 // Yield until all active procs have run +#define HvCall_YieldToProc 2 // Yield until the specified processor has run + +/* interrupt masks for setEnabledInterrupts */ +#define HvCall_MaskIPI 0x00000001 +#define HvCall_MaskLpEvent 0x00000002 +#define HvCall_MaskLpProd 0x00000004 +#define HvCall_MaskTimeout 0x00000008 + +/* Log buffer formats */ +#define HvCall_LogBuffer_ASCII 0 +#define HvCall_LogBuffer_EBCDIC 1 + +#define HvCallBaseAckDeferredInts HvCallBase + 0 +#define HvCallBaseCpmPowerOff HvCallBase + 1 +#define HvCallBaseGetHwPatch HvCallBase + 2 +#define HvCallBaseReIplSpAttn HvCallBase + 3 +#define HvCallBaseSetASR HvCallBase + 4 +#define HvCallBaseSetASRAndRfi HvCallBase + 5 +#define HvCallBaseSetIMR HvCallBase + 6 +#define HvCallBaseSendIPI HvCallBase + 7 +#define HvCallBaseTerminateMachine HvCallBase + 8 +#define HvCallBaseTerminateMachineSrc HvCallBase + 9 +#define HvCallBaseProcessPlicInterrupts HvCallBase + 10 +#define HvCallBaseIsPrimaryCpmOrMsdIpl HvCallBase + 11 +#define HvCallBaseSetVirtualSIT HvCallBase + 12 +#define HvCallBaseVaryOffThisProcessor HvCallBase + 13 +#define HvCallBaseVaryOffMemoryChunk HvCallBase + 14 +#define HvCallBaseVaryOffInteractivePercentage HvCallBase + 15 +#define HvCallBaseSendLpProd HvCallBase + 16 +#define HvCallBaseSetEnabledInterrupts HvCallBase + 17 +#define HvCallBaseYieldProcessor HvCallBase + 18 +#define HvCallBaseVaryOffSharedProcUnits HvCallBase + 19 +#define HvCallBaseSetVirtualDecr HvCallBase + 20 +#define HvCallBaseClearLogBuffer HvCallBase + 21 +#define HvCallBaseGetLogBufferCodePage HvCallBase + 22 +#define HvCallBaseGetLogBufferFormat HvCallBase + 23 +#define HvCallBaseGetLogBufferLength HvCallBase + 24 +#define HvCallBaseReadLogBuffer HvCallBase + 25 +#define HvCallBaseSetLogBufferFormatAndCodePage HvCallBase + 26 +#define HvCallBaseWriteLogBuffer HvCallBase + 27 +#define HvCallBaseRouter28 HvCallBase + 28 +#define HvCallBaseRouter29 HvCallBase + 29 +#define HvCallBaseRouter30 HvCallBase + 30 +//===================================================================================== +static inline void HvCall_setVirtualDecr(void) +{ + // Ignore any error return codes - most likely means that the target value for the + // LP has been increased and this vary off would bring us below the new target. + HvCall0(HvCallBaseSetVirtualDecr); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); +} +//===================================================================== +static inline void HvCall_yieldProcessor(unsigned typeOfYield, u64 yieldParm) +{ + HvCall2( HvCallBaseYieldProcessor, typeOfYield, yieldParm ); +} +//===================================================================== +static inline void HvCall_setEnabledInterrupts(u64 enabledInterrupts) +{ + HvCall1(HvCallBaseSetEnabledInterrupts,enabledInterrupts); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); +} + +//===================================================================== +static inline void HvCall_clearLogBuffer(HvLpIndex lpindex) +{ + HvCall1(HvCallBaseClearLogBuffer,lpindex); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); +} + +//===================================================================== +static inline u32 HvCall_getLogBufferCodePage(HvLpIndex lpindex) +{ + u32 retVal = HvCall1(HvCallBaseGetLogBufferCodePage,lpindex); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retVal; +} + +//===================================================================== +static inline int HvCall_getLogBufferFormat(HvLpIndex lpindex) +{ + int retVal = HvCall1(HvCallBaseGetLogBufferFormat,lpindex); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retVal; +} + +//===================================================================== +static inline u32 HvCall_getLogBufferLength(HvLpIndex lpindex) +{ + u32 retVal = HvCall1(HvCallBaseGetLogBufferLength,lpindex); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retVal; +} + +//===================================================================== +static inline void HvCall_setLogBufferFormatAndCodepage(int format, u32 codePage) +{ + HvCall2(HvCallBaseSetLogBufferFormatAndCodePage,format, codePage); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); +} + +//===================================================================== +int HvCall_readLogBuffer(HvLpIndex lpindex, void *buffer, u64 bufLen); +void HvCall_writeLogBuffer(const void *buffer, u64 bufLen); + +//===================================================================== +static inline void HvCall_sendIPI(struct Paca * targetPaca) +{ + HvCall1( HvCallBaseSendIPI, targetPaca->xPacaIndex ); +} + +//===================================================================== +static inline void HvCall_terminateMachineSrc(void) +{ + HvCall0( HvCallBaseTerminateMachineSrc ); +} + + +#endif // _HVCALL_H + diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/HvCallCfg.h linuxppc64_2_4/include/asm-ppc64/iSeries/HvCallCfg.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/HvCallCfg.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/HvCallCfg.h Fri Aug 17 13:03:16 2001 @@ -0,0 +1,219 @@ +/* + * HvCallCfg.h + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +//===================================================================================== +// +// This file contains the "hypervisor call" interface which is used to +// drive the hypervisor from the OS. +// +//===================================================================================== + +//------------------------------------------------------------------- +// Standard Includes +//------------------------------------------------------------------- +#ifndef _HVCALLSC_H +#include "HvCallSc.h" +#endif + +#ifndef _HVTYPES_H +#include +#endif + +//------------------------------------------------------------------------------------- +// Constants +//------------------------------------------------------------------------------------- +#ifndef _HVCALLCFG_H +#define _HVCALLCFG_H + +enum HvCallCfg_ReqQual +{ + HvCallCfg_Cur = 0, + HvCallCfg_Init = 1, + HvCallCfg_Max = 2, + HvCallCfg_Min = 3 +}; + +#define HvCallCfgGetLps HvCallCfg + 0 +#define HvCallCfgGetActiveLpMap HvCallCfg + 1 +#define HvCallCfgGetLpVrmIndex HvCallCfg + 2 +#define HvCallCfgGetLpMinSupportedPlicVrmIndex HvCallCfg + 3 +#define HvCallCfgGetLpMinCompatablePlicVrmIndex HvCallCfg + 4 +#define HvCallCfgGetLpVrmName HvCallCfg + 5 +#define HvCallCfgGetSystemPhysicalProcessors HvCallCfg + 6 +#define HvCallCfgGetPhysicalProcessors HvCallCfg + 7 +#define HvCallCfgGetSystemMsChunks HvCallCfg + 8 +#define HvCallCfgGetMsChunks HvCallCfg + 9 +#define HvCallCfgGetInteractivePercentage HvCallCfg + 10 +#define HvCallCfgIsBusDedicated HvCallCfg + 11 +#define HvCallCfgGetBusOwner HvCallCfg + 12 +#define HvCallCfgGetBusAllocation HvCallCfg + 13 +#define HvCallCfgGetBusUnitOwner HvCallCfg + 14 +#define HvCallCfgGetBusUnitAllocation HvCallCfg + 15 +#define HvCallCfgGetVirtualBusPool HvCallCfg + 16 +#define HvCallCfgGetBusUnitInterruptProc HvCallCfg + 17 +#define HvCallCfgGetConfiguredBusUnitsForIntProc HvCallCfg + 18 +#define HvCallCfgGetRioSanBusPool HvCallCfg + 19 +#define HvCallCfgGetSharedPoolIndex HvCallCfg + 20 +#define HvCallCfgGetSharedProcUnits HvCallCfg + 21 +#define HvCallCfgGetNumProcsInSharedPool HvCallCfg + 22 +#define HvCallCfgRouter23 HvCallCfg + 23 +#define HvCallCfgRouter24 HvCallCfg + 24 +#define HvCallCfgRouter25 HvCallCfg + 25 +#define HvCallCfgRouter26 HvCallCfg + 26 +#define HvCallCfgRouter27 HvCallCfg + 27 +#define HvCallCfgGetMinRuntimeMsChunks HvCallCfg + 28 +#define HvCallCfgSetMinRuntimeMsChunks HvCallCfg + 29 +#define HvCallCfgGetVirtualLanIndexMap HvCallCfg + 30 +#define HvCallCfgGetLpExecutionMode HvCallCfg + 31 +#define HvCallCfgGetHostingLpIndex HvCallCfg + 32 + +//==================================================================== +static inline HvLpIndex HvCallCfg_getLps(void) +{ + HvLpIndex retVal = HvCall0(HvCallCfgGetLps); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retVal; +} +//==================================================================== +static inline int HvCallCfg_isBusDedicated(u64 busIndex) +{ + int retVal = HvCall1(HvCallCfgIsBusDedicated,busIndex); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retVal; +} +//==================================================================== +static inline HvLpIndex HvCallCfg_getBusOwner(u64 busIndex) +{ + HvLpIndex retVal = HvCall1(HvCallCfgGetBusOwner,busIndex); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retVal; +} +//==================================================================== +static inline HvLpIndexMap HvCallCfg_getBusAllocation(u64 busIndex) +{ + HvLpIndexMap retVal = HvCall1(HvCallCfgGetBusAllocation,busIndex); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retVal; +} +//==================================================================== +static inline HvLpIndexMap HvCallCfg_getActiveLpMap(void) +{ + HvLpIndexMap retVal = HvCall0(HvCallCfgGetActiveLpMap); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retVal; +} +//==================================================================== +static inline HvLpVirtualLanIndexMap HvCallCfg_getVirtualLanIndexMap(HvLpIndex lp) +{ + // This is a new function in V5R1 so calls to this on older + // hypervisors will return -1 + u64 retVal = HvCall1(HvCallCfgGetVirtualLanIndexMap, lp); + if(retVal == -1) + retVal = 0; + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retVal; +} +//=================================================================== +static inline u64 HvCallCfg_getSystemMsChunks(void) +{ + u64 retVal = HvCall0(HvCallCfgGetSystemMsChunks); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retVal; +} +//=================================================================== +static inline u64 HvCallCfg_getMsChunks(HvLpIndex lp,enum HvCallCfg_ReqQual qual) +{ + u64 retVal = HvCall2(HvCallCfgGetMsChunks,lp,qual); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retVal; +} +//=================================================================== +static inline u64 HvCallCfg_getMinRuntimeMsChunks(HvLpIndex lp) +{ + // NOTE: This function was added in v5r1 so older hypervisors will return a -1 value + u64 retVal = HvCall1(HvCallCfgGetMinRuntimeMsChunks,lp); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retVal; +} +//=================================================================== +static inline u64 HvCallCfg_setMinRuntimeMsChunks(u64 chunks) +{ + u64 retVal = HvCall1(HvCallCfgSetMinRuntimeMsChunks,chunks); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retVal; +} +//=================================================================== +static inline u64 HvCallCfg_getSystemPhysicalProcessors(void) +{ + u64 retVal = HvCall0(HvCallCfgGetSystemPhysicalProcessors); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retVal; +} +//=================================================================== +static inline u64 HvCallCfg_getPhysicalProcessors(HvLpIndex lp,enum HvCallCfg_ReqQual qual) +{ + u64 retVal = HvCall2(HvCallCfgGetPhysicalProcessors,lp,qual); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retVal; +} +//=================================================================== +static inline u64 HvCallCfg_getConfiguredBusUnitsForInterruptProc(HvLpIndex lp, + u16 hvLogicalProcIndex) +{ + u64 retVal = HvCall2(HvCallCfgGetConfiguredBusUnitsForIntProc,lp,hvLogicalProcIndex); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retVal; + +} +//================================================================== +static inline HvLpSharedPoolIndex HvCallCfg_getSharedPoolIndex(HvLpIndex lp) +{ + HvLpSharedPoolIndex retVal = + HvCall1(HvCallCfgGetSharedPoolIndex,lp); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retVal; + +} +//================================================================== +static inline u64 HvCallCfg_getSharedProcUnits(HvLpIndex lp,enum HvCallCfg_ReqQual qual) +{ + u64 retVal = HvCall2(HvCallCfgGetSharedProcUnits,lp,qual); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retVal; + +} +//================================================================== +static inline u64 HvCallCfg_getNumProcsInSharedPool(HvLpSharedPoolIndex sPI) +{ + u16 retVal = HvCall1(HvCallCfgGetNumProcsInSharedPool,sPI); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retVal; + +} +//================================================================== +static inline HvLpIndex HvCallCfg_getHostingLpIndex(HvLpIndex lp) +{ + u64 retVal = HvCall1(HvCallCfgGetHostingLpIndex,lp); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retVal; + +} + +#endif // _HVCALLCFG_H + diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/HvCallEvent.h linuxppc64_2_4/include/asm-ppc64/iSeries/HvCallEvent.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/HvCallEvent.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/HvCallEvent.h Fri Aug 17 13:03:16 2001 @@ -0,0 +1,335 @@ +/* + * HvCallEvent.h + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +//================================================================== +// +// This file contains the "hypervisor call" interface which is used to +// drive the hypervisor from the OS. +// +//================================================================== + +//------------------------------------------------------------------- +// Standard Includes +//------------------------------------------------------------------- +#ifndef _HVCALLSC_H +#include +#endif + +#ifndef _HVTYPES_H +#include +#endif + +#include + +//------------------------------------------------------------------- +// Other Includes +//------------------------------------------------------------------- + +//------------------------------------------------------------------- +// Constants +//------------------------------------------------------------------- +#ifndef _HVCALLEVENT_H +#define _HVCALLEVENT_H + +struct HvLpEvent; + +typedef u8 HvLpEvent_Type; +typedef u8 HvLpEvent_AckInd; +typedef u8 HvLpEvent_AckType; + +struct HvCallEvent_PackedParms +{ + u8 xAckType:1; + u8 xAckInd:1; + u8 xRsvd:1; + u8 xTargetLp:5; + u8 xType; + u16 xSubtype; + HvLpInstanceId xSourceInstId; + HvLpInstanceId xTargetInstId; +}; + +typedef u8 HvLpDma_Direction; +typedef u8 HvLpDma_AddressType; + +struct HvCallEvent_PackedDmaParms +{ + u8 xDirection:1; + u8 xLocalAddrType:1; + u8 xRemoteAddrType:1; + u8 xRsvd1:5; + HvLpIndex xRemoteLp; + u8 xType; + u8 xRsvd2; + HvLpInstanceId xLocalInstId; + HvLpInstanceId xRemoteInstId; +}; + +typedef u64 HvLpEvent_Rc; +typedef u64 HvLpDma_Rc; + +#define HvCallEventAckLpEvent HvCallEvent + 0 +#define HvCallEventCancelLpEvent HvCallEvent + 1 +#define HvCallEventCloseLpEventPath HvCallEvent + 2 +#define HvCallEventDmaBufList HvCallEvent + 3 +#define HvCallEventDmaSingle HvCallEvent + 4 +#define HvCallEventDmaToSp HvCallEvent + 5 +#define HvCallEventGetOverflowLpEvents HvCallEvent + 6 +#define HvCallEventGetSourceLpInstanceId HvCallEvent + 7 +#define HvCallEventGetTargetLpInstanceId HvCallEvent + 8 +#define HvCallEventOpenLpEventPath HvCallEvent + 9 +#define HvCallEventSetLpEventStack HvCallEvent + 10 +#define HvCallEventSignalLpEvent HvCallEvent + 11 +#define HvCallEventSignalLpEventParms HvCallEvent + 12 +#define HvCallEventSetInterLpQueueIndex HvCallEvent + 13 +#define HvCallEventSetLpEventQueueInterruptProc HvCallEvent + 14 +#define HvCallEventRouter15 HvCallEvent + 15 + +//====================================================================== +static inline void HvCallEvent_getOverflowLpEvents(u8 queueIndex) +{ + HvCall1(HvCallEventGetOverflowLpEvents,queueIndex); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); +} +//====================================================================== +static inline void HvCallEvent_setInterLpQueueIndex(u8 queueIndex) +{ + HvCall1(HvCallEventSetInterLpQueueIndex,queueIndex); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); +} +//====================================================================== +static inline void HvCallEvent_setLpEventStack(u8 queueIndex, + char * eventStackAddr, + u32 eventStackSize) +{ + u64 abs_addr; + abs_addr = virt_to_absolute( (unsigned long) eventStackAddr ); + + HvCall3(HvCallEventSetLpEventStack, queueIndex, abs_addr, eventStackSize); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); +} +//====================================================================== +static inline void HvCallEvent_setLpEventQueueInterruptProc(u8 queueIndex, + u16 lpLogicalProcIndex) +{ + HvCall2(HvCallEventSetLpEventQueueInterruptProc,queueIndex,lpLogicalProcIndex); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); +} +//===================================================================== +static inline HvLpEvent_Rc HvCallEvent_signalLpEvent(struct HvLpEvent* event) +{ + u64 abs_addr; + HvLpEvent_Rc retVal; +#ifdef DEBUG_SENDEVENT + printk("HvCallEvent_signalLpEvent: *event = %016lx\n ", (unsigned long)event); +#endif + abs_addr = virt_to_absolute( (unsigned long) event ); + retVal = (HvLpEvent_Rc)HvCall1(HvCallEventSignalLpEvent, abs_addr); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retVal; +} +//===================================================================== +static inline HvLpEvent_Rc HvCallEvent_signalLpEventFast(HvLpIndex targetLp, + HvLpEvent_Type type, + u16 subtype, + HvLpEvent_AckInd ackInd, + HvLpEvent_AckType ackType, + HvLpInstanceId sourceInstanceId, + HvLpInstanceId targetInstanceId, + u64 correlationToken, + u64 eventData1, + u64 eventData2, + u64 eventData3, + u64 eventData4, + u64 eventData5) +{ + HvLpEvent_Rc retVal; + + // Pack the misc bits into a single Dword to pass to PLIC + union + { + struct HvCallEvent_PackedParms parms; + u64 dword; + } packed; + packed.parms.xAckType = ackType; + packed.parms.xAckInd = ackInd; + packed.parms.xRsvd = 0; + packed.parms.xTargetLp = targetLp; + packed.parms.xType = type; + packed.parms.xSubtype = subtype; + packed.parms.xSourceInstId = sourceInstanceId; + packed.parms.xTargetInstId = targetInstanceId; + + retVal = (HvLpEvent_Rc)HvCall7(HvCallEventSignalLpEventParms, + packed.dword, + correlationToken, + eventData1,eventData2, + eventData3,eventData4, + eventData5); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retVal; +} +//==================================================================== +static inline HvLpEvent_Rc HvCallEvent_ackLpEvent(struct HvLpEvent* event) +{ + u64 abs_addr; + HvLpEvent_Rc retVal; + abs_addr = virt_to_absolute( (unsigned long) event ); + + retVal = (HvLpEvent_Rc)HvCall1(HvCallEventAckLpEvent, abs_addr); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retVal; +} +//==================================================================== +static inline HvLpEvent_Rc HvCallEvent_cancelLpEvent(struct HvLpEvent* event) +{ + u64 abs_addr; + HvLpEvent_Rc retVal; + abs_addr = virt_to_absolute( (unsigned long) event ); + + retVal = (HvLpEvent_Rc)HvCall1(HvCallEventCancelLpEvent, abs_addr); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retVal; +} +//=================================================================== +static inline HvLpInstanceId HvCallEvent_getSourceLpInstanceId(HvLpIndex targetLp, HvLpEvent_Type type) +{ + HvLpInstanceId retVal; + retVal = HvCall2(HvCallEventGetSourceLpInstanceId,targetLp,type); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retVal; +} +//=================================================================== +static inline HvLpInstanceId HvCallEvent_getTargetLpInstanceId(HvLpIndex targetLp, HvLpEvent_Type type) +{ + HvLpInstanceId retVal; + retVal = HvCall2(HvCallEventGetTargetLpInstanceId,targetLp,type); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retVal; +} +//=================================================================== +static inline void HvCallEvent_openLpEventPath(HvLpIndex targetLp, + HvLpEvent_Type type) +{ + HvCall2(HvCallEventOpenLpEventPath,targetLp,type); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); +} +//=================================================================== +static inline void HvCallEvent_closeLpEventPath(HvLpIndex targetLp, + HvLpEvent_Type type) +{ + HvCall2(HvCallEventCloseLpEventPath,targetLp,type); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); +} +//=================================================================== +static inline HvLpDma_Rc HvCallEvent_dmaBufList(HvLpEvent_Type type, + HvLpIndex remoteLp, + HvLpDma_Direction direction, + HvLpInstanceId localInstanceId, + HvLpInstanceId remoteInstanceId, + HvLpDma_AddressType localAddressType, + HvLpDma_AddressType remoteAddressType, + // Do these need to be converted to + // absolute addresses? + u64 localBufList, + u64 remoteBufList, + + u32 transferLength) +{ + HvLpDma_Rc retVal; + // Pack the misc bits into a single Dword to pass to PLIC + union + { + struct HvCallEvent_PackedDmaParms parms; + u64 dword; + } packed; + packed.parms.xDirection = direction; + packed.parms.xLocalAddrType = localAddressType; + packed.parms.xRemoteAddrType = remoteAddressType; + packed.parms.xRsvd1 = 0; + packed.parms.xRemoteLp = remoteLp; + packed.parms.xType = type; + packed.parms.xRsvd2 = 0; + packed.parms.xLocalInstId = localInstanceId; + packed.parms.xRemoteInstId = remoteInstanceId; + + retVal = (HvLpDma_Rc)HvCall4(HvCallEventDmaBufList, + packed.dword, + localBufList, + remoteBufList, + transferLength); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retVal; +} +//================================================================= +static inline HvLpDma_Rc HvCallEvent_dmaSingle(HvLpEvent_Type type, + HvLpIndex remoteLp, + HvLpDma_Direction direction, + HvLpInstanceId localInstanceId, + HvLpInstanceId remoteInstanceId, + HvLpDma_AddressType localAddressType, + HvLpDma_AddressType remoteAddressType, + u64 localAddrOrTce, + u64 remoteAddrOrTce, + u32 transferLength) +{ + HvLpDma_Rc retVal; + // Pack the misc bits into a single Dword to pass to PLIC + union + { + struct HvCallEvent_PackedDmaParms parms; + u64 dword; + } packed; + packed.parms.xDirection = direction; + packed.parms.xLocalAddrType = localAddressType; + packed.parms.xRemoteAddrType = remoteAddressType; + packed.parms.xRsvd1 = 0; + packed.parms.xRemoteLp = remoteLp; + packed.parms.xType = type; + packed.parms.xRsvd2 = 0; + packed.parms.xLocalInstId = localInstanceId; + packed.parms.xRemoteInstId = remoteInstanceId; + + retVal = (HvLpDma_Rc)HvCall4(HvCallEventDmaSingle, + packed.dword, + localAddrOrTce, + remoteAddrOrTce, + transferLength); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retVal; +} +//================================================================= +static inline HvLpDma_Rc HvCallEvent_dmaToSp(void* local, u32 remote, u32 length, HvLpDma_Direction dir) +{ + u64 abs_addr; + HvLpDma_Rc retVal; + abs_addr = virt_to_absolute( (unsigned long) local ); + + retVal = (HvLpDma_Rc)HvCall4(HvCallEventDmaToSp, + abs_addr, + remote, + length, + dir); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retVal; +} +//================================================================ + +#endif // _HVCALLEVENT_H + diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/HvCallHpt.h linuxppc64_2_4/include/asm-ppc64/iSeries/HvCallHpt.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/HvCallHpt.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/HvCallHpt.h Fri Aug 17 13:03:16 2001 @@ -0,0 +1,143 @@ +/* + * HvCallHpt.h + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +//============================================================================ +// +// This file contains the "hypervisor call" interface which is used to +// drive the hypervisor from the OS. +// +//============================================================================ + +//------------------------------------------------------------------- +// Standard Includes +//------------------------------------------------------------------- +#ifndef _HVCALLSC_H +#include "HvCallSc.h" +#endif + +#ifndef _HVTYPES_H +#include +#endif + +//------------------------------------------------------------------- +// Other Includes +//------------------------------------------------------------------- + +#ifndef _PPC_MMU_H +#include +#endif + +//----------------------------------------------------------------------------- +// Constants +//----------------------------------------------------------------------------- +#ifndef _HVCALLHPT_H +#define _HVCALLHPT_H + +#define HvCallHptGetHptAddress HvCallHpt + 0 +#define HvCallHptGetHptPages HvCallHpt + 1 +#define HvCallHptSetPp HvCallHpt + 5 +#define HvCallHptSetSwBits HvCallHpt + 6 +#define HvCallHptUpdate HvCallHpt + 7 +#define HvCallHptInvalidateNoSyncICache HvCallHpt + 8 +#define HvCallHptGet HvCallHpt + 11 +#define HvCallHptFindNextValid HvCallHpt + 12 +#define HvCallHptFindValid HvCallHpt + 13 +#define HvCallHptAddValidate HvCallHpt + 16 +#define HvCallHptInvalidateSetSwBitsGet HvCallHpt + 18 + + +//============================================================================ +static inline u64 HvCallHpt_getHptAddress(void) +{ + u64 retval = HvCall0(HvCallHptGetHptAddress); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retval; +} +//============================================================================ +static inline u64 HvCallHpt_getHptPages(void) +{ + u64 retval = HvCall0(HvCallHptGetHptPages); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retval; +} +//============================================================================= +static inline void HvCallHpt_setPp(u32 hpteIndex, u8 value) +{ + HvCall2( HvCallHptSetPp, hpteIndex, value ); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); +} +//============================================================================= +static inline void HvCallHpt_setSwBits(u32 hpteIndex, u8 bitson, u8 bitsoff ) +{ + HvCall3( HvCallHptSetSwBits, hpteIndex, bitson, bitsoff ); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); +} +//============================================================================= +static inline void HvCallHpt_invalidateNoSyncICache(u32 hpteIndex) + +{ + HvCall1( HvCallHptInvalidateNoSyncICache, hpteIndex ); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); +} +//============================================================================= +static inline u64 HvCallHpt_invalidateSetSwBitsGet(u32 hpteIndex, u8 bitson, u8 bitsoff ) + +{ + u64 compressedStatus; + compressedStatus = HvCall4( HvCallHptInvalidateSetSwBitsGet, hpteIndex, bitson, bitsoff, 1 ); + HvCall1( HvCallHptInvalidateNoSyncICache, hpteIndex ); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return compressedStatus; +} +//============================================================================= +static inline u64 HvCallHpt_findValid( struct _HPTE *hpte, u64 vpn ) +{ + u64 retIndex = HvCall3Ret16( HvCallHptFindValid, hpte, vpn, 0, 0 ); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retIndex; +} +//============================================================================= +static inline u64 HvCallHpt_findNextValid( struct _HPTE *hpte, u32 hpteIndex, u8 bitson, u8 bitsoff ) +{ + u64 retIndex = HvCall3Ret16( HvCallHptFindNextValid, hpte, hpteIndex, bitson, bitsoff ); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retIndex; +} +//============================================================================= +static inline void HvCallHpt_get( struct _HPTE *hpte, u32 hpteIndex ) +{ + HvCall2Ret16( HvCallHptGet, hpte, hpteIndex, 0 ); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); +} +//============================================================================ +static inline void HvCallHpt_addValidate( u32 hpteIndex, + u32 hBit, + struct _HPTE *hpte ) + +{ + HvCall4( HvCallHptAddValidate, hpteIndex, + hBit, (*((u64 *)hpte)), (*(((u64 *)hpte)+1)) ); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); +} + + +//============================================================================= + +#endif // _HVCALLHPT_H + diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/HvCallPci.h linuxppc64_2_4/include/asm-ppc64/iSeries/HvCallPci.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/HvCallPci.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/HvCallPci.h Fri Nov 30 16:53:36 2001 @@ -0,0 +1,693 @@ +/************************************************************************/ +/* Provides the Hypervisor PCI calls for iSeries Linux Parition. */ +/* Copyright (C) 20yy */ +/* */ +/* This program is free software; you can redistribute it and/or modify */ +/* it under the terms of the GNU General Public License as published by */ +/* the Free Software Foundation; either version 2 of the License, or */ +/* (at your option) any later version. */ +/* */ +/* This program is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ +/* GNU General Public License for more details. */ +/* */ +/* You should have received a copy of the GNU General Public License */ +/* along with this program; if not, write to the: */ +/* Free Software Foundation, Inc., */ +/* 59 Temple Place, Suite 330, */ +/* Boston, MA 02111-1307 USA */ +/************************************************************************/ +/* Change Activity: */ +/* Created, Jan 9, 2001 */ +/************************************************************************/ +//============================================================================ +// Header File Id +// Name______________: HvCallPci.H +// +// Description_______: +// +// This file contains the "hypervisor call" interface which is used to +// drive the hypervisor from SLIC. +// +//============================================================================ + +//------------------------------------------------------------------- +// Forward declarations +//------------------------------------------------------------------- + +//------------------------------------------------------------------- +// Standard Includes +//------------------------------------------------------------------- +#ifndef _HVCALLSC_H +#include "HvCallSc.h" +#endif + +#ifndef _HVTYPES_H +#include +#endif + +//------------------------------------------------------------------- +// Other Includes +//------------------------------------------------------------------- + + +//----------------------------------------------------------------------------- +// Constants +//----------------------------------------------------------------------------- +#ifndef _HVCALLPCI_H +#define _HVCALLPCI_H + +struct HvCallPci_DsaAddr { // make sure this struct size is 64-bits total + u16 busNumber; + u8 subBusNumber; + u8 deviceId; + u8 barNumber; + u8 reserved[3]; +}; +union HvDsaMap { + u64 DsaAddr; + struct HvCallPci_DsaAddr Dsa; +}; + +struct HvCallPci_LoadReturn { + u64 rc; + u64 value; +}; + +enum HvCallPci_DeviceType {HvCallPci_NodeDevice = 1, + HvCallPci_SpDevice = 2, + HvCallPci_IopDevice = 3, + HvCallPci_BridgeDevice = 4, + HvCallPci_MultiFunctionDevice = 5, + HvCallPci_IoaDevice = 6 +}; + + +struct HvCallPci_DeviceInfo { + u32 deviceType; // See DeviceType enum for values +}; + +struct HvCallPci_BusUnitInfo { + u32 sizeReturned; // length of data returned + u32 deviceType; // see DeviceType enum for values +}; + +struct HvCallPci_BridgeInfo { + struct HvCallPci_BusUnitInfo busUnitInfo; // Generic bus unit info + u8 subBusNumber; // Bus number of secondary bus + u8 maxAgents; // Max idsels on secondary bus +}; + + +// Maximum BusUnitInfo buffer size. Provided for clients so they can allocate +// a buffer big enough for any type of bus unit. Increase as needed. +enum {HvCallPci_MaxBusUnitInfoSize = 128}; + +struct HvCallPci_BarParms { + u64 vaddr; + u64 raddr; + u64 size; + u64 protectStart; + u64 protectEnd; + u64 relocationOffset; + u64 pciAddress; + u64 reserved[3]; +}; + +enum HvCallPci_VpdType { + HvCallPci_BusVpd = 1, + HvCallPci_BusAdapterVpd = 2 +}; + +#define HvCallPciConfigLoad8 HvCallPci + 0 +#define HvCallPciConfigLoad16 HvCallPci + 1 +#define HvCallPciConfigLoad32 HvCallPci + 2 +#define HvCallPciConfigStore8 HvCallPci + 3 +#define HvCallPciConfigStore16 HvCallPci + 4 +#define HvCallPciConfigStore32 HvCallPci + 5 +#define HvCallPciEoi HvCallPci + 16 +#define HvCallPciGetBarParms HvCallPci + 18 +#define HvCallPciMaskFisr HvCallPci + 20 +#define HvCallPciUnmaskFisr HvCallPci + 21 +#define HvCallPciSetSlotReset HvCallPci + 25 +#define HvCallPciGetDeviceInfo HvCallPci + 27 +#define HvCallPciGetCardVpd HvCallPci + 28 +#define HvCallPciBarLoad8 HvCallPci + 40 +#define HvCallPciBarLoad16 HvCallPci + 41 +#define HvCallPciBarLoad32 HvCallPci + 42 +#define HvCallPciBarLoad64 HvCallPci + 43 +#define HvCallPciBarStore8 HvCallPci + 44 +#define HvCallPciBarStore16 HvCallPci + 45 +#define HvCallPciBarStore32 HvCallPci + 46 +#define HvCallPciBarStore64 HvCallPci + 47 +#define HvCallPciMaskInterrupts HvCallPci + 48 +#define HvCallPciUnmaskInterrupts HvCallPci + 49 +#define HvCallPciGetBusUnitInfo HvCallPci + 50 + +//============================================================================ +static inline u64 HvCallPci_configLoad8(u16 busNumber, u8 subBusNumber, + u8 deviceId, u32 offset, + u8 *value) +{ + struct HvCallPci_DsaAddr dsa; + struct HvCallPci_LoadReturn retVal; + + *((u64*)&dsa) = 0; + + dsa.busNumber = busNumber; + dsa.subBusNumber = subBusNumber; + dsa.deviceId = deviceId; + + HvCall3Ret16(HvCallPciConfigLoad8, &retVal, *(u64 *)&dsa, offset, 0); + + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + + *value = retVal.value; + + return retVal.rc; +} +//============================================================================ +static inline u64 HvCallPci_configLoad16(u16 busNumber, u8 subBusNumber, + u8 deviceId, u32 offset, + u16 *value) +{ + struct HvCallPci_DsaAddr dsa; + struct HvCallPci_LoadReturn retVal; + + *((u64*)&dsa) = 0; + + dsa.busNumber = busNumber; + dsa.subBusNumber = subBusNumber; + dsa.deviceId = deviceId; + + HvCall3Ret16(HvCallPciConfigLoad16, &retVal, *(u64 *)&dsa, offset, 0); + + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + + *value = retVal.value; + + return retVal.rc; +} +//============================================================================ +static inline u64 HvCallPci_configLoad32(u16 busNumber, u8 subBusNumber, + u8 deviceId, u32 offset, + u32 *value) +{ + struct HvCallPci_DsaAddr dsa; + struct HvCallPci_LoadReturn retVal; + + *((u64*)&dsa) = 0; + + dsa.busNumber = busNumber; + dsa.subBusNumber = subBusNumber; + dsa.deviceId = deviceId; + + HvCall3Ret16(HvCallPciConfigLoad32, &retVal, *(u64 *)&dsa, offset, 0); + + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + + *value = retVal.value; + + return retVal.rc; +} +//============================================================================ +static inline u64 HvCallPci_configStore8(u16 busNumber, u8 subBusNumber, + u8 deviceId, u32 offset, + u8 value) +{ + struct HvCallPci_DsaAddr dsa; + u64 retVal; + + *((u64*)&dsa) = 0; + + dsa.busNumber = busNumber; + dsa.subBusNumber = subBusNumber; + dsa.deviceId = deviceId; + + retVal = HvCall4(HvCallPciConfigStore8, *(u64 *)&dsa, offset, value, 0); + + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + + return retVal; +} +//============================================================================ +static inline u64 HvCallPci_configStore16(u16 busNumber, u8 subBusNumber, + u8 deviceId, u32 offset, + u16 value) +{ + struct HvCallPci_DsaAddr dsa; + u64 retVal; + + *((u64*)&dsa) = 0; + + dsa.busNumber = busNumber; + dsa.subBusNumber = subBusNumber; + dsa.deviceId = deviceId; + + retVal = HvCall4(HvCallPciConfigStore16, *(u64 *)&dsa, offset, value, 0); + + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + + return retVal; +} +//============================================================================ +static inline u64 HvCallPci_configStore32(u16 busNumber, u8 subBusNumber, + u8 deviceId, u32 offset, + u32 value) +{ + struct HvCallPci_DsaAddr dsa; + u64 retVal; + + *((u64*)&dsa) = 0; + + dsa.busNumber = busNumber; + dsa.subBusNumber = subBusNumber; + dsa.deviceId = deviceId; + + retVal = HvCall4(HvCallPciConfigStore32, *(u64 *)&dsa, offset, value, 0); + + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + + return retVal; +} +//============================================================================ +static inline u64 HvCallPci_barLoad8(u16 busNumberParm, + u8 subBusParm, + u8 deviceIdParm, + u8 barNumberParm, + u64 offsetParm, + u8* valueParm) +{ + struct HvCallPci_DsaAddr dsa; + struct HvCallPci_LoadReturn retVal; + + *((u64*)&dsa) = 0; + + dsa.busNumber = busNumberParm; + dsa.subBusNumber = subBusParm; + dsa.deviceId = deviceIdParm; + dsa.barNumber = barNumberParm; + + HvCall3Ret16(HvCallPciBarLoad8, &retVal, *(u64 *)&dsa, offsetParm, 0); + + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + + *valueParm = retVal.value; + + return retVal.rc; +} +//============================================================================ +static inline u64 HvCallPci_barLoad16(u16 busNumberParm, + u8 subBusParm, + u8 deviceIdParm, + u8 barNumberParm, + u64 offsetParm, + u16* valueParm) +{ + struct HvCallPci_DsaAddr dsa; + struct HvCallPci_LoadReturn retVal; + + *((u64*)&dsa) = 0; + + dsa.busNumber = busNumberParm; + dsa.subBusNumber = subBusParm; + dsa.deviceId = deviceIdParm; + dsa.barNumber = barNumberParm; + + HvCall3Ret16(HvCallPciBarLoad16, &retVal, *(u64 *)&dsa, offsetParm, 0); + + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + + *valueParm = retVal.value; + + return retVal.rc; +} +//============================================================================ +static inline u64 HvCallPci_barLoad32(u16 busNumberParm, + u8 subBusParm, + u8 deviceIdParm, + u8 barNumberParm, + u64 offsetParm, + u32* valueParm) +{ + struct HvCallPci_DsaAddr dsa; + struct HvCallPci_LoadReturn retVal; + + *((u64*)&dsa) = 0; + + dsa.busNumber = busNumberParm; + dsa.subBusNumber = subBusParm; + dsa.deviceId = deviceIdParm; + dsa.barNumber = barNumberParm; + + HvCall3Ret16(HvCallPciBarLoad32, &retVal, *(u64 *)&dsa, offsetParm, 0); + + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + + *valueParm = retVal.value; + + return retVal.rc; +} +//============================================================================ +static inline u64 HvCallPci_barLoad64(u16 busNumberParm, + u8 subBusParm, + u8 deviceIdParm, + u8 barNumberParm, + u64 offsetParm, + u64* valueParm) +{ + struct HvCallPci_DsaAddr dsa; + struct HvCallPci_LoadReturn retVal; + + *((u64*)&dsa) = 0; + + dsa.busNumber = busNumberParm; + dsa.subBusNumber = subBusParm; + dsa.deviceId = deviceIdParm; + dsa.barNumber = barNumberParm; + + HvCall3Ret16(HvCallPciBarLoad64, &retVal, *(u64 *)&dsa, offsetParm, 0); + + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + + *valueParm = retVal.value; + + return retVal.rc; +} +//============================================================================ +static inline u64 HvCallPci_barStore8(u16 busNumberParm, + u8 subBusParm, + u8 deviceIdParm, + u8 barNumberParm, + u64 offsetParm, + u8 valueParm) +{ + struct HvCallPci_DsaAddr dsa; + u64 retVal; + + *((u64*)&dsa) = 0; + + dsa.busNumber = busNumberParm; + dsa.subBusNumber = subBusParm; + dsa.deviceId = deviceIdParm; + dsa.barNumber = barNumberParm; + + retVal = HvCall4(HvCallPciBarStore8, *(u64 *)&dsa, offsetParm, valueParm, 0); + + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + + return retVal; +} +//============================================================================ +static inline u64 HvCallPci_barStore16(u16 busNumberParm, + u8 subBusParm, + u8 deviceIdParm, + u8 barNumberParm, + u64 offsetParm, + u16 valueParm) +{ + struct HvCallPci_DsaAddr dsa; + u64 retVal; + + *((u64*)&dsa) = 0; + + dsa.busNumber = busNumberParm; + dsa.subBusNumber = subBusParm; + dsa.deviceId = deviceIdParm; + dsa.barNumber = barNumberParm; + + retVal = HvCall4(HvCallPciBarStore16, *(u64 *)&dsa, offsetParm, valueParm, 0); + + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + + return retVal; +} +//============================================================================ +static inline u64 HvCallPci_barStore32(u16 busNumberParm, + u8 subBusParm, + u8 deviceIdParm, + u8 barNumberParm, + u64 offsetParm, + u32 valueParm) +{ + struct HvCallPci_DsaAddr dsa; + u64 retVal; + + *((u64*)&dsa) = 0; + + dsa.busNumber = busNumberParm; + dsa.subBusNumber = subBusParm; + dsa.deviceId = deviceIdParm; + dsa.barNumber = barNumberParm; + + retVal = HvCall4(HvCallPciBarStore32, *(u64 *)&dsa, offsetParm, valueParm, 0); + + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + + return retVal; +} +//============================================================================ +static inline u64 HvCallPci_barStore64(u16 busNumberParm, + u8 subBusParm, + u8 deviceIdParm, + u8 barNumberParm, + u64 offsetParm, + u64 valueParm) +{ + struct HvCallPci_DsaAddr dsa; + u64 retVal; + + *((u64*)&dsa) = 0; + + dsa.busNumber = busNumberParm; + dsa.subBusNumber = subBusParm; + dsa.deviceId = deviceIdParm; + dsa.barNumber = barNumberParm; + + retVal = HvCall4(HvCallPciBarStore64, *(u64 *)&dsa, offsetParm, valueParm, 0); + + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + + return retVal; +} +//============================================================================ +static inline u64 HvCallPci_eoi(u16 busNumberParm, + u8 subBusParm, + u8 deviceIdParm) +{ + struct HvCallPci_DsaAddr dsa; + struct HvCallPci_LoadReturn retVal; + + *((u64*)&dsa) = 0; + + dsa.busNumber = busNumberParm; + dsa.subBusNumber = subBusParm; + dsa.deviceId = deviceIdParm; + + HvCall1Ret16(HvCallPciEoi, &retVal, *(u64*)&dsa); + + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + + return retVal.rc; +} +//============================================================================ +static inline u64 HvCallPci_getBarParms(u16 busNumberParm, + u8 subBusParm, + u8 deviceIdParm, + u8 barNumberParm, + u64 parms, + u32 sizeofParms) +{ + struct HvCallPci_DsaAddr dsa; + u64 retVal; + + *((u64*)&dsa) = 0; + + dsa.busNumber = busNumberParm; + dsa.subBusNumber = subBusParm; + dsa.deviceId = deviceIdParm; + dsa.barNumber = barNumberParm; + + retVal = HvCall3(HvCallPciGetBarParms, *(u64*)&dsa, parms, sizeofParms); + + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + + return retVal; +} +//============================================================================ +static inline u64 HvCallPci_maskFisr(u16 busNumberParm, + u8 subBusParm, + u8 deviceIdParm, + u64 fisrMask) +{ + struct HvCallPci_DsaAddr dsa; + u64 retVal; + + *((u64*)&dsa) = 0; + + dsa.busNumber = busNumberParm; + dsa.subBusNumber = subBusParm; + dsa.deviceId = deviceIdParm; + + retVal = HvCall2(HvCallPciMaskFisr, *(u64*)&dsa, fisrMask); + + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + + return retVal; +} +//============================================================================ +static inline u64 HvCallPci_unmaskFisr(u16 busNumberParm, + u8 subBusParm, + u8 deviceIdParm, + u64 fisrMask) +{ + struct HvCallPci_DsaAddr dsa; + u64 retVal; + + *((u64*)&dsa) = 0; + + dsa.busNumber = busNumberParm; + dsa.subBusNumber = subBusParm; + dsa.deviceId = deviceIdParm; + + retVal = HvCall2(HvCallPciUnmaskFisr, *(u64*)&dsa, fisrMask); + + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + + return retVal; +} +//============================================================================ +static inline u64 HvCallPci_setSlotReset(u16 busNumberParm, + u8 subBusParm, + u8 deviceIdParm, + u64 onNotOff) +{ + struct HvCallPci_DsaAddr dsa; + u64 retVal; + + *((u64*)&dsa) = 0; + + dsa.busNumber = busNumberParm; + dsa.subBusNumber = subBusParm; + dsa.deviceId = deviceIdParm; + + retVal = HvCall2(HvCallPciSetSlotReset, *(u64*)&dsa, onNotOff); + + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + + return retVal; +} +//============================================================================ +static inline u64 HvCallPci_getDeviceInfo(u16 busNumberParm, + u8 subBusParm, + u8 deviceNumberParm, + u64 parms, + u32 sizeofParms) +{ + struct HvCallPci_DsaAddr dsa; + u64 retVal; + + *((u64*)&dsa) = 0; + + dsa.busNumber = busNumberParm; + dsa.subBusNumber = subBusParm; + dsa.deviceId = deviceNumberParm << 4; + + retVal = HvCall3(HvCallPciGetDeviceInfo, *(u64*)&dsa, parms, sizeofParms); + + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + + return retVal; +} +//============================================================================ +static inline u64 HvCallPci_maskInterrupts(u16 busNumberParm, + u8 subBusParm, + u8 deviceIdParm, + u64 interruptMask) +{ + struct HvCallPci_DsaAddr dsa; + u64 retVal; + + *((u64*)&dsa) = 0; + + dsa.busNumber = busNumberParm; + dsa.subBusNumber = subBusParm; + dsa.deviceId = deviceIdParm; + + retVal = HvCall2(HvCallPciMaskInterrupts, *(u64*)&dsa, interruptMask); + + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + + return retVal; +} +//============================================================================ +static inline u64 HvCallPci_unmaskInterrupts(u16 busNumberParm, + u8 subBusParm, + u8 deviceIdParm, + u64 interruptMask) +{ + struct HvCallPci_DsaAddr dsa; + u64 retVal; + + *((u64*)&dsa) = 0; + + dsa.busNumber = busNumberParm; + dsa.subBusNumber = subBusParm; + dsa.deviceId = deviceIdParm; + + retVal = HvCall2(HvCallPciUnmaskInterrupts, *(u64*)&dsa, interruptMask); + + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + + return retVal; +} +//============================================================================ + +static inline u64 HvCallPci_getBusUnitInfo(u16 busNumberParm, + u8 subBusParm, + u8 deviceIdParm, + u64 parms, + u32 sizeofParms) +{ + struct HvCallPci_DsaAddr dsa; + u64 retVal; + + *((u64*)&dsa) = 0; + + dsa.busNumber = busNumberParm; + dsa.subBusNumber = subBusParm; + dsa.deviceId = deviceIdParm; + + retVal = HvCall3(HvCallPciGetBusUnitInfo, *(u64*)&dsa, parms, sizeofParms); + + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + + return retVal; +} +//============================================================================ + +static inline int HvCallPci_getBusVpd(u16 busNumParm, u64 destParm, u16 sizeParm) { + int xRetSize; + u64 xRc = HvCall4(HvCallPciGetCardVpd, busNumParm, destParm, sizeParm, HvCallPci_BusVpd); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + if (xRc == -1) + xRetSize = -1; + else + xRetSize = xRc & 0xFFFF; + return xRetSize; +} +//============================================================================ + +static inline int HvCallPci_getBusAdapterVpd(u16 busNumParm, u64 destParm, u16 sizeParm) { + int xRetSize; + u64 xRc = HvCall4(HvCallPciGetCardVpd, busNumParm, destParm, sizeParm, HvCallPci_BusAdapterVpd); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + if (xRc == -1) + xRetSize = -1; + else + xRetSize = xRc & 0xFFFF; + return xRetSize; +} +//============================================================================ +#endif // _HVCALLPCI_H diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/HvCallSc.h linuxppc64_2_4/include/asm-ppc64/iSeries/HvCallSc.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/HvCallSc.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/HvCallSc.h Fri Aug 17 13:03:16 2001 @@ -0,0 +1,53 @@ +/* + * HvCallSc.h + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _HVTYPES_H +#include +#endif + +#ifndef _HVCALLSC_H +#define _HVCALLSC_H + +#define HvCallBase 0x8000000000000000 +#define HvCallCfg 0x8002000000000000 +#define HvCallEvent 0x8003000000000000 +#define HvCallHpt 0x8004000000000000 +#define HvCallPci 0x8005000000000000 +#define HvCallSm 0x8007000000000000 +#define HvCallXm 0x8009000000000000 + +u64 HvCall0( u64 ); +u64 HvCall1( u64, u64 ); +u64 HvCall2( u64, u64, u64 ); +u64 HvCall3( u64, u64, u64, u64 ); +u64 HvCall4( u64, u64, u64, u64, u64 ); +u64 HvCall5( u64, u64, u64, u64, u64, u64 ); +u64 HvCall6( u64, u64, u64, u64, u64, u64, u64 ); +u64 HvCall7( u64, u64, u64, u64, u64, u64, u64, u64 ); + +u64 HvCall0Ret16( u64, void * ); +u64 HvCall1Ret16( u64, void *, u64 ); +u64 HvCall2Ret16( u64, void *, u64, u64 ); +u64 HvCall3Ret16( u64, void *, u64, u64, u64 ); +u64 HvCall4Ret16( u64, void *, u64, u64, u64, u64 ); +u64 HvCall5Ret16( u64, void *, u64, u64, u64, u64, u64 ); +u64 HvCall6Ret16( u64, void *, u64, u64, u64, u64, u64, u64 ); +u64 HvCall7Ret16( u64, void *, u64, u64 ,u64 ,u64 ,u64 ,u64 ,u64 ); + +#endif /* _HVCALLSC_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/HvCallSm.h linuxppc64_2_4/include/asm-ppc64/iSeries/HvCallSm.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/HvCallSm.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/HvCallSm.h Fri Aug 17 13:03:16 2001 @@ -0,0 +1,58 @@ +/* + * HvCallSm.h + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +//============================================================================ +// +// This file contains the "hypervisor call" interface which is used to +// drive the hypervisor from the OS. +// +//============================================================================ + +//------------------------------------------------------------------- +// Standard Includes +//------------------------------------------------------------------- +#ifndef _HVCALLSC_H +#include "HvCallSc.h" +#endif + +#ifndef _HVTYPES_H +#include +#endif + +//----------------------------------------------------------------------------- +// Constants +//----------------------------------------------------------------------------- +#ifndef _HVCALLSM_H +#define _HVCALLSM_H + +#define HvCallSmGet64BitsOfAccessMap HvCallSm + 11 + + +//============================================================================ +static inline u64 HvCallSm_get64BitsOfAccessMap( + HvLpIndex lpIndex, u64 indexIntoBitMap ) +{ + u64 retval = HvCall2(HvCallSmGet64BitsOfAccessMap, lpIndex, + indexIntoBitMap ); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retval; +} +//============================================================================ +#endif // _HVCALLSM_H + diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/HvCallXm.h linuxppc64_2_4/include/asm-ppc64/iSeries/HvCallXm.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/HvCallXm.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/HvCallXm.h Fri Aug 17 13:03:16 2001 @@ -0,0 +1,105 @@ +//============================================================================ +// Header File Id +// Name______________: HvCallXm.H +// +// Description_______: +// +// This file contains the "hypervisor call" interface which is used to +// drive the hypervisor from SLIC. +// +//============================================================================ + +//------------------------------------------------------------------- +// Forward declarations +//------------------------------------------------------------------- + +//------------------------------------------------------------------- +// Standard Includes +//------------------------------------------------------------------- +#ifndef _HVCALLSC_H +#include "HvCallSc.h" +#endif + +#ifndef _HVTYPES_H +#include +#endif + +//------------------------------------------------------------------- +// Other Includes +//------------------------------------------------------------------- + + +//----------------------------------------------------------------------------- +// Constants +//----------------------------------------------------------------------------- +#ifndef _HVCALLXM_H +#define _HVCALLXM_H + +#define HvCallXmGetTceTableParms HvCallXm + 0 +#define HvCallXmTestBus HvCallXm + 1 +#define HvCallXmConnectBusUnit HvCallXm + 2 +#define HvCallXmLoadTod HvCallXm + 8 +#define HvCallXmTestBusUnit HvCallXm + 9 +#define HvCallXmSetTce HvCallXm + 11 +#define HvCallXmSetTces HvCallXm + 13 + + + +//============================================================================ +static inline void HvCallXm_getTceTableParms(u64 cb) +{ + HvCall1(HvCallXmGetTceTableParms, cb); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); +} +//============================================================================ +static inline u64 HvCallXm_setTce(u64 tceTableToken, u64 tceOffset, u64 tce) +{ + u64 retval = HvCall3(HvCallXmSetTce, tceTableToken, tceOffset, tce ); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retval; +} +//============================================================================ +static inline u64 HvCallXm_setTces(u64 tceTableToken, u64 tceOffset, u64 numTces, u64 tce1, u64 tce2, u64 tce3, u64 tce4) +{ + u64 retval = HvCall7(HvCallXmSetTces, tceTableToken, tceOffset, numTces, + tce1, tce2, tce3, tce4 ); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retval; +} +//============================================================================= +static inline u64 HvCallXm_testBus(u16 busNumber) +{ + u64 retVal = HvCall1(HvCallXmTestBus, busNumber); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retVal; +} +//===================================================================================== +static inline u64 HvCallXm_testBusUnit(u16 busNumber, u8 subBusNumber, u8 deviceId) { + u64 busUnitNumber = (subBusNumber << 8) | deviceId; + u64 retVal = HvCall2(HvCallXmTestBusUnit, busNumber, busUnitNumber); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retVal; +} +//===================================================================================== +static inline u64 HvCallXm_connectBusUnit(u16 busNumber, u8 subBusNumber, u8 deviceId, + u64 interruptToken) +{ + u64 busUnitNumber = (subBusNumber << 8) | deviceId; + u64 queueIndex = 0; // HvLpConfig::mapDsaToQueueIndex(HvLpDSA(busNumber, xBoard, xCard)); + + u64 retVal = HvCall5(HvCallXmConnectBusUnit, busNumber, busUnitNumber, + interruptToken, 0, queueIndex); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retVal; +} +//===================================================================================== +static inline u64 HvCallXm_loadTod(void) +{ + u64 retVal = HvCall0(HvCallXmLoadTod); + // getPaca()->adjustHmtForNoOfSpinLocksHeld(); + return retVal; +} +//===================================================================================== + +#endif // _HVCALLXM_H + diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/HvLpConfig.h linuxppc64_2_4/include/asm-ppc64/iSeries/HvLpConfig.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/HvLpConfig.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/HvLpConfig.h Mon Sep 24 18:25:00 2001 @@ -0,0 +1,292 @@ +/* + * HvLpConfig.h + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +//=========================================================================== +// +// This file contains the interface to the LPAR configuration data +// to determine which resources should be allocated to each partition. +// +//=========================================================================== + +#ifndef _HVCALLCFG_H +#include "HvCallCfg.h" +#endif + +#ifndef _HVTYPES_H +#include +#endif + +#ifndef _ITLPNACA_H +#include +#endif + +#ifndef _LPARDATA_H +#include +#endif + +#ifndef _HVLPCONFIG_H +#define _HVLPCONFIG_H + +//------------------------------------------------------------------- +// Constants +//------------------------------------------------------------------- + +extern HvLpIndex HvLpConfig_getLpIndex_outline(void); + +//=================================================================== +static inline HvLpIndex HvLpConfig_getLpIndex(void) +{ + return itLpNaca.xLpIndex; +} +//=================================================================== +static inline HvLpIndex HvLpConfig_getPrimaryLpIndex(void) +{ + return itLpNaca.xPrimaryLpIndex; +} +//================================================================= +static inline HvLpIndex HvLpConfig_getLps(void) +{ + return HvCallCfg_getLps(); +} +//================================================================= +static inline HvLpIndexMap HvLpConfig_getActiveLpMap(void) +{ + return HvCallCfg_getActiveLpMap(); +} +//================================================================= +static inline u64 HvLpConfig_getSystemMsMegs(void) +{ + return HvCallCfg_getSystemMsChunks() / HVCHUNKSPERMEG; +} +//================================================================= +static inline u64 HvLpConfig_getSystemMsChunks(void) +{ + return HvCallCfg_getSystemMsChunks(); +} +//================================================================= +static inline u64 HvLpConfig_getSystemMsPages(void) +{ + return HvCallCfg_getSystemMsChunks() * HVPAGESPERCHUNK; +} +//================================================================ +static inline u64 HvLpConfig_getMsMegs(void) +{ + return HvCallCfg_getMsChunks(HvLpConfig_getLpIndex(),HvCallCfg_Cur) / HVCHUNKSPERMEG; +} +//================================================================ +static inline u64 HvLpConfig_getMsChunks(void) +{ + return HvCallCfg_getMsChunks(HvLpConfig_getLpIndex(),HvCallCfg_Cur); +} +//================================================================ +static inline u64 HvLpConfig_getMsPages(void) +{ + return HvCallCfg_getMsChunks(HvLpConfig_getLpIndex(),HvCallCfg_Cur) * HVPAGESPERCHUNK; +} +//================================================================ +static inline u64 HvLpConfig_getMinMsMegs(void) +{ + return HvCallCfg_getMsChunks(HvLpConfig_getLpIndex(),HvCallCfg_Min) / HVCHUNKSPERMEG; +} +//================================================================ +static inline u64 HvLpConfig_getMinMsChunks(void) +{ + return HvCallCfg_getMsChunks(HvLpConfig_getLpIndex(),HvCallCfg_Min); +} +//================================================================ +static inline u64 HvLpConfig_getMinMsPages(void) +{ + return HvCallCfg_getMsChunks(HvLpConfig_getLpIndex(),HvCallCfg_Min) * HVPAGESPERCHUNK; +} +//================================================================ +static inline u64 HvLpConfig_getMinRuntimeMsMegs(void) +{ + return HvCallCfg_getMinRuntimeMsChunks(HvLpConfig_getLpIndex()) / HVCHUNKSPERMEG; +} +//=============================================================== +static inline u64 HvLpConfig_getMinRuntimeMsChunks(void) +{ + return HvCallCfg_getMinRuntimeMsChunks(HvLpConfig_getLpIndex()); +} +//=============================================================== +static inline u64 HvLpConfig_getMinRuntimeMsPages(void) +{ + return HvCallCfg_getMinRuntimeMsChunks(HvLpConfig_getLpIndex()) * HVPAGESPERCHUNK; +} +//=============================================================== +static inline u64 HvLpConfig_getMaxMsMegs(void) +{ + return HvCallCfg_getMsChunks(HvLpConfig_getLpIndex(),HvCallCfg_Max) / HVCHUNKSPERMEG; +} +//=============================================================== +static inline u64 HvLpConfig_getMaxMsChunks(void) +{ + return HvCallCfg_getMsChunks(HvLpConfig_getLpIndex(),HvCallCfg_Max); +} +//=============================================================== +static inline u64 HvLpConfig_getMaxMsPages(void) +{ + return HvCallCfg_getMsChunks(HvLpConfig_getLpIndex(),HvCallCfg_Max) * HVPAGESPERCHUNK; +} +//=============================================================== +static inline u64 HvLpConfig_getInitMsMegs(void) +{ + return HvCallCfg_getMsChunks(HvLpConfig_getLpIndex(),HvCallCfg_Init) / HVCHUNKSPERMEG; +} +//=============================================================== +static inline u64 HvLpConfig_getInitMsChunks(void) +{ + return HvCallCfg_getMsChunks(HvLpConfig_getLpIndex(),HvCallCfg_Init); +} +//=============================================================== +static inline u64 HvLpConfig_getInitMsPages(void) +{ return HvCallCfg_getMsChunks(HvLpConfig_getLpIndex(),HvCallCfg_Init) * HVPAGESPERCHUNK; +} +//=============================================================== +static inline u64 HvLpConfig_getSystemPhysicalProcessors(void) +{ + return HvCallCfg_getSystemPhysicalProcessors(); +} +//=============================================================== +static inline u64 HvLpConfig_getSystemLogicalProcessors(void) +{ + return HvCallCfg_getSystemPhysicalProcessors() * (/*getPaca()->getSecondaryThreadCount() +*/ 1); +} +//=============================================================== +static inline u64 HvLpConfig_getNumProcsInSharedPool(HvLpSharedPoolIndex sPI) +{ + return HvCallCfg_getNumProcsInSharedPool(sPI); +} +//=============================================================== +static inline u64 HvLpConfig_getPhysicalProcessors(void) +{ + return HvCallCfg_getPhysicalProcessors(HvLpConfig_getLpIndex(),HvCallCfg_Cur); +} +//=============================================================== +static inline u64 HvLpConfig_getLogicalProcessors(void) +{ + return HvCallCfg_getPhysicalProcessors(HvLpConfig_getLpIndex(),HvCallCfg_Cur) * (/*getPaca()->getSecondaryThreadCount() +*/ 1); +} +//=============================================================== +static inline HvLpSharedPoolIndex HvLpConfig_getSharedPoolIndex(void) +{ + return HvCallCfg_getSharedPoolIndex(HvLpConfig_getLpIndex()); +} +//=============================================================== +static inline u64 HvLpConfig_getSharedProcUnits(void) +{ + return HvCallCfg_getSharedProcUnits(HvLpConfig_getLpIndex(),HvCallCfg_Cur); +} +//=============================================================== +static inline u64 HvLpConfig_getMinSharedProcUnits(void) +{ + return HvCallCfg_getSharedProcUnits(HvLpConfig_getLpIndex(),HvCallCfg_Min); +} +//=============================================================== +static inline u64 HvLpConfig_getMaxSharedProcUnits(void) +{ + return HvCallCfg_getSharedProcUnits(HvLpConfig_getLpIndex(),HvCallCfg_Max); +} +//=============================================================== +static inline u64 HvLpConfig_getMinPhysicalProcessors(void) +{ + return HvCallCfg_getPhysicalProcessors(HvLpConfig_getLpIndex(),HvCallCfg_Min); +} +//=============================================================== +static inline u64 HvLpConfig_getMinLogicalProcessors(void) +{ + return HvCallCfg_getPhysicalProcessors(HvLpConfig_getLpIndex(),HvCallCfg_Min) * (/*getPaca()->getSecondaryThreadCount() +*/ 1); +} +//=============================================================== +static inline u64 HvLpConfig_getMaxPhysicalProcessors(void) +{ + return HvCallCfg_getPhysicalProcessors(HvLpConfig_getLpIndex(),HvCallCfg_Max); +} +//=============================================================== +static inline u64 HvLpConfig_getMaxLogicalProcessors(void) +{ + return HvCallCfg_getPhysicalProcessors(HvLpConfig_getLpIndex(),HvCallCfg_Max) * (/*getPaca()->getSecondaryThreadCount() +*/ 1); +} +//=============================================================== +static inline u64 HvLpConfig_getInitPhysicalProcessors(void) +{ + return HvCallCfg_getPhysicalProcessors(HvLpConfig_getLpIndex(),HvCallCfg_Init); +} +//=============================================================== +static inline u64 HvLpConfig_getInitLogicalProcessors(void) +{ + return HvCallCfg_getPhysicalProcessors(HvLpConfig_getLpIndex(),HvCallCfg_Init) * (/*getPaca()->getSecondaryThreadCount() +*/ 1); +} +//================================================================ +static inline HvLpVirtualLanIndexMap HvLpConfig_getVirtualLanIndexMap(void) +{ + return HvCallCfg_getVirtualLanIndexMap(HvLpConfig_getLpIndex()); +} +//=============================================================== +static inline HvLpVirtualLanIndexMap HvLpConfig_getVirtualLanIndexMapForLp(HvLpIndex lp) +{ + return HvCallCfg_getVirtualLanIndexMap(lp); +} +//================================================================ +static inline HvLpIndex HvLpConfig_getBusOwner(HvBusNumber busNumber) +{ + return HvCallCfg_getBusOwner(busNumber); +} +//=============================================================== +static inline int HvLpConfig_isBusDedicated(HvBusNumber busNumber) +{ + return HvCallCfg_isBusDedicated(busNumber); +} +//================================================================ +static inline HvLpIndexMap HvLpConfig_getBusAllocation(HvBusNumber busNumber) +{ + return HvCallCfg_getBusAllocation(busNumber); +} +//================================================================ +// returns the absolute real address of the load area +static inline u64 HvLpConfig_getLoadAddress(void) +{ + return itLpNaca.xLoadAreaAddr & 0x7fffffffffffffff; +} +//================================================================ +static inline u64 HvLpConfig_getLoadPages(void) +{ + return itLpNaca.xLoadAreaChunks * HVPAGESPERCHUNK; +} +//================================================================ +static inline int HvLpConfig_isBusOwnedByThisLp(HvBusNumber busNumber) +{ + HvLpIndex busOwner = HvLpConfig_getBusOwner(busNumber); + return (busOwner == HvLpConfig_getLpIndex()); +} +//================================================================ +static inline int HvLpConfig_doLpsCommunicateOnVirtualLan(HvLpIndex lp1, HvLpIndex lp2) +{ + HvLpVirtualLanIndexMap virtualLanIndexMap1 = HvCallCfg_getVirtualLanIndexMap( lp1 ); + HvLpVirtualLanIndexMap virtualLanIndexMap2 = HvCallCfg_getVirtualLanIndexMap( lp2 ); + return ((virtualLanIndexMap1 & virtualLanIndexMap2) != 0); +} +//================================================================ +static inline HvLpIndex HvLpConfig_getHostingLpIndex(HvLpIndex lp) +{ + return HvCallCfg_getHostingLpIndex(lp); +} +//================================================================ + +#endif // _HVLPCONFIG_H diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/HvLpEvent.h linuxppc64_2_4/include/asm-ppc64/iSeries/HvLpEvent.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/HvLpEvent.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/HvLpEvent.h Fri Aug 17 13:03:16 2001 @@ -0,0 +1,144 @@ +/* + * HvLpEvent.h + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +//====================================================================== +// +// This file contains the class for HV events in the system. +// +//===================================================================== +#ifndef _HVLPEVENT_H +#define _HVLPEVENT_H + +#include +#include +#include +#ifndef _HVCALLEVENT_H +#include +#endif + + +//===================================================================== +// +// HvLpEvent is the structure for Lp Event messages passed between +// partitions through PLIC. +// +//===================================================================== + +struct HvEventFlags +{ + u8 xValid:1; // Indicates a valid request x00-x00 + u8 xRsvd1:4; // Reserved ... + u8 xAckType:1; // Immediate or deferred ... + u8 xAckInd:1; // Indicates if ACK required ... + u8 xFunction:1; // Interrupt or Acknowledge ... +}; + + +struct HvLpEvent +{ + struct HvEventFlags xFlags; // Event flags x00-x00 + u8 xType; // Type of message x01-x01 + u16 xSubtype; // Subtype for event x02-x03 + u8 xSourceLp; // Source LP x04-x04 + u8 xTargetLp; // Target LP x05-x05 + u8 xSizeMinus1; // Size of Derived class - 1 x06-x06 + u8 xRc; // RC for Ack flows x07-x07 + u16 xSourceInstanceId; // Source sides instance id x08-x09 + u16 xTargetInstanceId; // Target sides instance id x0A-x0B + union { + u32 xSubtypeData; // Data usable by the subtype x0C-x0F + u16 xSubtypeDataShort[2]; // Data as 2 shorts + u8 xSubtypeDataChar[4]; // Data as 4 chars + } x; + + u64 xCorrelationToken; // Unique value for source/type x10-x17 +}; + +// Lp Event handler function +typedef void (*LpEventHandler)(struct HvLpEvent *, struct pt_regs *); + +// Register a handler for an event type +// returns 0 on success +extern int HvLpEvent_registerHandler( HvLpEvent_Type eventType, LpEventHandler hdlr); + +// Unregister a handler for an event type +// returns 0 on success +// Unregister will fail if there are any paths open for the type +extern int HvLpEvent_unregisterHandler( HvLpEvent_Type eventType ); + +// Open an Lp Event Path for an event type +// returns 0 on success +// openPath will fail if there is no handler registered for the event type. +// The lpIndex specified is the partition index for the target partition +// (for VirtualIo, VirtualLan and SessionMgr) other types specify zero) +extern int HvLpEvent_openPath( HvLpEvent_Type eventType, HvLpIndex lpIndex ); + + +// Close an Lp Event Path for a type and partition +// returns 0 on sucess +extern int HvLpEvent_closePath( HvLpEvent_Type eventType, HvLpIndex lpIndex ); + +#define HvLpEvent_Type_Hypervisor 0 +#define HvLpEvent_Type_MachineFac 1 +#define HvLpEvent_Type_SessionMgr 2 +#define HvLpEvent_Type_SpdIo 3 +#define HvLpEvent_Type_VirtualBus 4 +#define HvLpEvent_Type_PciIo 5 +#define HvLpEvent_Type_RioIo 6 +#define HvLpEvent_Type_VirtualLan 7 +#define HvLpEvent_Type_VirtualIo 8 +#define HvLpEvent_Type_NumTypes 9 + +#define HvLpEvent_Rc_Good 0 +#define HvLpEvent_Rc_BufferNotAvailable 1 +#define HvLpEvent_Rc_Cancelled 2 +#define HvLpEvent_Rc_GenericError 3 +#define HvLpEvent_Rc_InvalidAddress 4 +#define HvLpEvent_Rc_InvalidPartition 5 +#define HvLpEvent_Rc_InvalidSize 6 +#define HvLpEvent_Rc_InvalidSubtype 7 +#define HvLpEvent_Rc_InvalidSubtypeData 8 +#define HvLpEvent_Rc_InvalidType 9 +#define HvLpEvent_Rc_PartitionDead 10 +#define HvLpEvent_Rc_PathClosed 11 +#define HvLpEvent_Rc_SubtypeError 12 + +#define HvLpEvent_Function_Ack 0 +#define HvLpEvent_Function_Int 1 + +#define HvLpEvent_AckInd_NoAck 0 +#define HvLpEvent_AckInd_DoAck 1 + +#define HvLpEvent_AckType_ImmediateAck 0 +#define HvLpEvent_AckType_DeferredAck 1 + +#define HvLpDma_Direction_LocalToRemote 0 +#define HvLpDma_Direction_RemoteToLocal 1 + +#define HvLpDma_AddressType_TceIndex 0 +#define HvLpDma_AddressType_RealAddress 1 + +#define HvLpDma_Rc_Good 0 +#define HvLpDma_Rc_Error 1 +#define HvLpDma_Rc_PartitionDead 2 +#define HvLpDma_Rc_PathClosed 3 +#define HvLpDma_Rc_InvalidAddress 4 +#define HvLpDma_Rc_InvalidLength 5 + +#endif // _HVLPEVENT_H diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/HvReleaseData.h linuxppc64_2_4/include/asm-ppc64/iSeries/HvReleaseData.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/HvReleaseData.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/HvReleaseData.h Fri Aug 17 13:03:16 2001 @@ -0,0 +1,70 @@ +/* + * HvReleaseData.h + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +//============================================================================= +// +// This control block contains the critical information about the +// release so that it can be changed in the future (ie, the virtual +// address of the OS's NACA). +// +//----------------------------------------------------------------------------- +// Standard Includes +//----------------------------------------------------------------------------- +#ifndef _PPC64_TYPES_H +#include +#endif + +#ifndef _HVRELEASEDATA_H +#define _HVRELEASEDATA_H + +//============================================================================= +// +// When we IPL a secondary partition, we will check if if the +// secondary xMinPlicVrmIndex > the primary xVrmIndex. +// If it is then this tells PLIC that this secondary is not +// supported running on this "old" of a level of PLIC. +// +// Likewise, we will compare the primary xMinSlicVrmIndex to +// the secondary xVrmIndex. +// If the primary xMinSlicVrmDelta > secondary xVrmDelta then we +// know that this PLIC does not support running an OS "that old". +// +//============================================================================= + +struct HvReleaseData +{ + u32 xDesc; // Descriptor "HvRD" ebcdic x00-x03 + u16 xSize; // Size of this control block x04-x05 + u16 xVpdAreasPtrOffset; // Offset in NACA of ItVpdAreas x06-x07 + struct Naca * xSlicNacaAddr; // Virtual address of SLIC NACA x08-x0F + u32 xMsNucDataOffset; // Offset of Linux Mapping Data x10-x13 + u32 xRsvd1; // Reserved x14-x17 + u16 xTagsMode:1; // 0 == tags active, 1 == tags inactive + u16 xAddressSize:1; // 0 == 64-bit, 1 == 32-bit + u16 xNoSharedProcs:1; // 0 == shared procs, 1 == no shared + u16 xNoHMT:1; // 0 == allow HMT, 1 == no HMT + u16 xRsvd2:12; // Reserved x18-x19 + u16 xVrmIndex; // VRM Index of OS image x1A-x1B + u16 xMinSupportedPlicVrmIndex;// Min PLIC level (soft) x1C-x1D + u16 xMinCompatablePlicVrmIndex;// Min PLIC levelP (hard) x1E-x1F + char xVrmName[12]; // Displayable name x20-x2B + char xRsvd3[20]; // Reserved x2C-x3F +}; + +#endif // _HVRELEASEDATA_H diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/HvTypes.h linuxppc64_2_4/include/asm-ppc64/iSeries/HvTypes.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/HvTypes.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/HvTypes.h Mon Sep 24 18:25:00 2001 @@ -0,0 +1,127 @@ +/* + * HvTypes.h + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +//=========================================================================== +// Header File Id +// Name______________: HvTypes.H +// +// Description_______: +// +// General typedefs for the hypervisor. +// +// Declared Class(es): +// +//=========================================================================== + +#ifndef _PPC_TYPES_H +#include +#endif + + +#ifndef _HVTYPES_H +#define _HVTYPES_H + +//------------------------------------------------------------------- +// Typedefs +//------------------------------------------------------------------- +typedef u8 HvLpIndex; +typedef u16 HvLpInstanceId; +typedef u64 HvLpTOD; +typedef u64 HvLpSystemSerialNum; +typedef u8 HvLpDeviceSerialNum[12]; +typedef u16 HvLpSanHwSet; +typedef u16 HvLpBus; +typedef u16 HvLpBoard; +typedef u16 HvLpCard; +typedef u8 HvLpDeviceType[4]; +typedef u8 HvLpDeviceModel[3]; +typedef u64 HvIoToken; +typedef u8 HvLpName[8]; +typedef u32 HvIoId; +typedef u64 HvRealMemoryIndex; +typedef u32 HvLpIndexMap; // Must hold HvMaxArchitectedLps bits!!! +typedef u16 HvLpVrmIndex; +typedef u32 HvXmGenerationId; +typedef u8 HvLpBusPool; +typedef u8 HvLpSharedPoolIndex; +typedef u16 HvLpSharedProcUnitsX100; +typedef u8 HvLpVirtualLanIndex; +typedef u16 HvLpVirtualLanIndexMap; // Must hold HvMaxArchitectedVirtualLans bits!!! +typedef u16 HvBusNumber; // Hypervisor Bus Number +typedef u8 HvSubBusNumber; // Hypervisor SubBus Number +typedef u8 HvAgentId; // Hypervisor DevFn + + +#define HVMAXARCHITECTEDLPS 32 +#define HVCHUNKSIZE 256 * 1024 +#define HVPAGESIZE 4 * 1024 +#define HVLPMINMEGSPRIMARY 256 +#define HVLPMINMEGSSECONDARY 64 +#define HVCHUNKSPERMEG 4 +#define HVPAGESPERMEG 256 +#define HVPAGESPERCHUNK 64 + +#define HvMaxArchitectedLps ((HvLpIndex)HVMAXARCHITECTEDLPS) +#define HvMaxArchitectedVirtualLans ((HvLpVirtualLanIndex)16) +#define HvLpIndexInvalid ((HvLpIndex)0xff) + +//-------------------------------------------------------------------- +// Enums for the sub-components under PLIC +// Used in HvCall and HvPrimaryCall +//-------------------------------------------------------------------- +enum HvCallCompIds +{ + HvCallCompId = 0, + HvCallCpuCtlsCompId = 1, + HvCallCfgCompId = 2, + HvCallEventCompId = 3, + HvCallHptCompId = 4, + HvCallPciCompId = 5, + HvCallSlmCompId = 6, + HvCallSmCompId = 7, + HvCallSpdCompId = 8, + HvCallXmCompId = 9, + HvCallRioCompId = 10, + HvCallRsvd3CompId = 11, + HvCallRsvd2CompId = 12, + HvCallRsvd1CompId = 13, + HvCallMaxCompId = 14, + HvPrimaryCallCompId = 0, + HvPrimaryCallCfgCompId = 1, + HvPrimaryCallPciCompId = 2, + HvPrimaryCallSmCompId = 3, + HvPrimaryCallSpdCompId = 4, + HvPrimaryCallXmCompId = 5, + HvPrimaryCallRioCompId = 6, + HvPrimaryCallRsvd7CompId = 7, + HvPrimaryCallRsvd6CompId = 8, + HvPrimaryCallRsvd5CompId = 9, + HvPrimaryCallRsvd4CompId = 10, + HvPrimaryCallRsvd3CompId = 11, + HvPrimaryCallRsvd2CompId = 12, + HvPrimaryCallRsvd1CompId = 13, + HvPrimaryCallMaxCompId = HvCallMaxCompId +}; + +struct HvLpBufferList { + u64 addr; + u64 len; +}; + +#endif // _HVTYPES_H diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/IoHriMainStore.h linuxppc64_2_4/include/asm-ppc64/iSeries/IoHriMainStore.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/IoHriMainStore.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/IoHriMainStore.h Fri Oct 19 15:37:15 2001 @@ -0,0 +1,167 @@ +/* + * IoHriMainStore.h + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _IOHRIMAINSTORE_H +#define _IOHRIMAINSTORE_H + +/* Main Store Vpd for Condor,iStar,sStar */ +struct IoHriMainStoreSegment4 { + u8 msArea0Exists:1; + u8 msArea1Exists:1; + u8 msArea2Exists:1; + u8 msArea3Exists:1; + u8 reserved1:4; + u8 reserved2; + + u8 msArea0Functional:1; + u8 msArea1Functional:1; + u8 msArea2Functional:1; + u8 msArea3Functional:1; + u8 reserved3:4; + u8 reserved4; + + u32 totalMainStore; + + u64 msArea0Ptr; + u64 msArea1Ptr; + u64 msArea2Ptr; + u64 msArea3Ptr; + + u32 cardProductionLevel; + + u32 msAdrHole; + + u8 msArea0HasRiserVpd:1; + u8 msArea1HasRiserVpd:1; + u8 msArea2HasRiserVpd:1; + u8 msArea3HasRiserVpd:1; + u8 reserved5:4; + u8 reserved6; + u16 reserved7; + + u8 reserved8[28]; + + u64 nonInterleavedBlocksStartAdr; + u64 nonInterleavedBlocksEndAdr; +}; + +/* Main Store VPD for Power4 */ +struct IoHriMainStoreChipInfo1 { + u32 chipMfgID __attribute((packed)); + char chipECLevel[4] __attribute((packed)); +}; + +struct IoHriMainStoreVpdIdData { + char typeNumber[4]; + char modelNumber[4]; + char partNumber[12]; + char serialNumber[12]; +}; + +struct IoHriMainStoreVpdFruData { + char fruLabel[8] __attribute((packed)); + u8 numberOfSlots __attribute((packed)); + u8 pluggingType __attribute((packed)); + u16 slotMapIndex __attribute((packed)); +}; + +struct IoHriMainStoreAdrRangeBlock { + void * blockStart __attribute((packed)); + void * blockEnd __attribute((packed)); + u32 blockProcChipId __attribute((packed)); +}; + +#define MaxAreaAdrRangeBlocks 4 + +struct IoHriMainStoreArea4 { + u32 msVpdFormat __attribute((packed)); + u8 containedVpdType __attribute((packed)); + u8 reserved1 __attribute((packed)); + u16 reserved2 __attribute((packed)); + + u64 msExists __attribute((packed)); + u64 msFunctional __attribute((packed)); + + u32 memorySize __attribute((packed)); + u32 procNodeId __attribute((packed)); + + u32 numAdrRangeBlocks __attribute((packed)); + struct IoHriMainStoreAdrRangeBlock xAdrRangeBlock[MaxAreaAdrRangeBlocks] __attribute((packed)); + + struct IoHriMainStoreChipInfo1 chipInfo0 __attribute((packed)); + struct IoHriMainStoreChipInfo1 chipInfo1 __attribute((packed)); + struct IoHriMainStoreChipInfo1 chipInfo2 __attribute((packed)); + struct IoHriMainStoreChipInfo1 chipInfo3 __attribute((packed)); + struct IoHriMainStoreChipInfo1 chipInfo4 __attribute((packed)); + struct IoHriMainStoreChipInfo1 chipInfo5 __attribute((packed)); + struct IoHriMainStoreChipInfo1 chipInfo6 __attribute((packed)); + struct IoHriMainStoreChipInfo1 chipInfo7 __attribute((packed)); + + void * msRamAreaArray __attribute((packed)); + u32 msRamAreaArrayNumEntries __attribute((packed)); + u32 msRamAreaArrayEntrySize __attribute((packed)); + + u32 numaDimmExists __attribute((packed)); + u32 numaDimmFunctional __attribute((packed)); + void * numaDimmArray __attribute((packed)); + u32 numaDimmArrayNumEntries __attribute((packed)); + u32 numaDimmArrayEntrySize __attribute((packed)); + + struct IoHriMainStoreVpdIdData idData __attribute((packed)); + + u64 powerData __attribute((packed)); + u64 cardAssemblyPartNum __attribute((packed)); + u64 chipSerialNum __attribute((packed)); + + u64 reserved3 __attribute((packed)); + char reserved4[16] __attribute((packed)); + + struct IoHriMainStoreVpdFruData fruData __attribute((packed)); + + u8 vpdPortNum __attribute((packed)); + u8 reserved5 __attribute((packed)); + u8 frameId __attribute((packed)); + u8 rackUnit __attribute((packed)); + char asciiKeywordVpd[256] __attribute((packed)); + u32 reserved6 __attribute((packed)); +}; + + +struct IoHriMainStoreSegment5 { + u16 reserved1; + u8 reserved2; + u8 msVpdFormat; + + u32 totalMainStore; + u64 maxConfiguredMsAdr; + + struct IoHriMainStoreArea4* msAreaArray; + u32 msAreaArrayNumEntries; + u32 msAreaArrayEntrySize; + + u32 msAreaExists; + u32 msAreaFunctional; + + u64 reserved3; +}; + + + +#endif // _IOHRIMAINSTORE_H + diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/IoHriProcessorVpd.h linuxppc64_2_4/include/asm-ppc64/iSeries/IoHriProcessorVpd.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/IoHriProcessorVpd.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/IoHriProcessorVpd.h Fri Aug 17 13:03:16 2001 @@ -0,0 +1,90 @@ +/* + * IoHriProcessorVpd.h + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +//=================================================================== +// +// This struct maps Processor Vpd that is DMAd to SLIC by CSP +// + +#ifndef _TYPES_H +#include +#endif + +#ifndef _IOHRIPROCESSORVPD_H +#define _IOHRIPROCESSORVPD_H + +struct IoHriProcessorVpd +{ + + u8 xFormat; // VPD format indicator x00-x00 + u8 xProcStatus:8; // Processor State x01-x01 + u8 xSecondaryThreadCount; // Secondary thread cnt x02-x02 + u8 xSrcType:1; // Src Type x03-x03 + u8 xSrcSoft:1; // Src stay soft ... + u8 xSrcParable:1; // Src parable ... + u8 xRsvd1:5; // Reserved ... + u16 xHvPhysicalProcIndex; // Hypervisor physical proc index04-x05 + u16 xRsvd2; // Reserved x06-x07 + u32 xHwNodeId; // Hardware node id x08-x0B + u32 xHwProcId; // Hardware processor id x0C-x0F + + u32 xTypeNum; // Card Type/CCIN number x10-x13 + u32 xModelNum; // Model/Feature number x14-x17 + u64 xSerialNum; // Serial number x18-x1F + char xPartNum[12]; // Book Part or FPU number x20-x2B + char xMfgID[4]; // Manufacturing ID x2C-x2F + + u32 xProcFreq; // Processor Frequency x30-x33 + u32 xTimeBaseFreq; // Time Base Frequency x34-x37 + + u32 xChipEcLevel; // Chip EC Levels x38-x3B + u32 xProcIdReg; // PIR SPR value x3C-x3F + u32 xPVR; // PVR value x40-x43 + u8 xRsvd3[12]; // Reserved x44-x4F + + u32 xInstCacheSize; // Instruction cache size in KB x50-x53 + u32 xInstBlockSize; // Instruction cache block size x54-x57 + u32 xDataCacheOperandSize; // Data cache operand size x58-x5B + u32 xInstCacheOperandSize; // Inst cache operand size x5C-x5F + + u32 xDataL1CacheSizeKB; // L1 data cache size in KB x60-x63 + u32 xDataL1CacheLineSize; // L1 data cache block size x64-x67 + u64 xRsvd4; // Reserved x68-x6F + + u32 xDataL2CacheSizeKB; // L2 data cache size in KB x70-x73 + u32 xDataL2CacheLineSize; // L2 data cache block size x74-x77 + u64 xRsvd5; // Reserved x78-x7F + + u32 xDataL3CacheSizeKB; // L3 data cache size in KB x80-x83 + u32 xDataL3CacheLineSize; // L3 data cache block size x84-x87 + u64 xRsvd6; // Reserved x88-x8F + + u64 xFruLabel; // Card Location Label x90-x97 + u8 xSlotsOnCard; // Slots on card (0=no slots) x98-x98 + u8 xPartLocFlag; // Location flag (0-pluggable 1-imbedded) x99-x99 + u16 xSlotMapIndex; // Index in slot map table x9A-x9B + u8 xSmartCardPortNo; // Smart card port number x9C-x9C + u8 xRsvd7; // Reserved x9D-x9D + u16 xFrameIdAndRackUnit; // Frame ID and rack unit adr x9E-x9F + + u8 xRsvd8[24]; // Reserved xA0-xB7 + + char xProcSrc[72]; // CSP format SRC xB8-xFF +}; +#endif // _IOHRIPROCESSORVPD_H diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/ItIplParmsReal.h linuxppc64_2_4/include/asm-ppc64/iSeries/ItIplParmsReal.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/ItIplParmsReal.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/ItIplParmsReal.h Fri Aug 17 13:03:16 2001 @@ -0,0 +1,78 @@ +/* + * ItIplParmsReal.h + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +//============================================================================== +// +// This struct maps the IPL Parameters DMA'd from the SP. +// +// Warning: +// This data must map in exactly 64 bytes and match the architecture for +// the IPL parms +// +//============================================================================= + + +//------------------------------------------------------------------- +// Standard Includes +//------------------------------------------------------------------- +#ifndef _PPC_TYPES_H +#include +#endif + +#ifndef _ITIPLPARMSREAL_H +#define _ITIPLPARMSREAL_H + +struct ItIplParmsReal +{ + u8 xFormat; // Defines format of IplParms x00-x00 + u8 xRsvd01:6; // Reserved x01-x01 + u8 xAlternateSearch:1; // Alternate search indicator ... + u8 xUaSupplied:1; // UA Supplied on programmed IPL ... + u8 xLsUaFormat; // Format byte for UA x02-x02 + u8 xRsvd02; // Reserved x03-x03 + u32 xLsUa; // LS UA x04-x07 + u32 xUnusedLsLid; // First OS LID to load x08-x0B + u16 xLsBusNumber; // LS Bus Number x0C-x0D + u8 xLsCardAdr; // LS Card Address x0E-x0E + u8 xLsBoardAdr; // LS Board Address x0F-x0F + u32 xRsvd03; // Reserved x10-x13 + u8 xSpcnPresent:1; // SPCN present x14-x14 + u8 xCpmPresent:1; // CPM present ... + u8 xRsvd04:6; // Reserved ... + u8 xRsvd05:4; // Reserved x15-x15 + u8 xKeyLock:4; // Keylock setting ... + u8 xRsvd06:6; // Reserved x16-x16 + u8 xIplMode:2; // Ipl mode (A|B|C|D) ... + u8 xHwIplType; // Fast v slow v slow EC HW IPL x17-x17 + u16 xCpmEnabledIpl:1; // CPM in effect when IPL initiated x18-x19 + u16 xPowerOnResetIpl:1; // Indicate POR condition ... + u16 xMainStorePreserved:1; // Main Storage is preserved ... + u16 xRsvd07:13; // Reserved ... + u16 xIplSource:16; // Ipl source x1A-x1B + u8 xIplReason:8; // Reason for this IPL x1C-x1C + u8 xRsvd08; // Reserved x1D-x1D + u16 xRsvd09; // Reserved x1E-x1F + u16 xSysBoxType; // System Box Type x20-x21 + u16 xSysProcType; // System Processor Type x22-x23 + u32 xRsvd10; // Reserved x24-x27 + u64 xRsvd11; // Reserved x28-x2F + u64 xRsvd12; // Reserved x30-x37 + u64 xRsvd13; // Reserved x38-x3F +}; +#endif // _ITIPLPARMSREAL_H diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/ItLpNaca.h linuxppc64_2_4/include/asm-ppc64/iSeries/ItLpNaca.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/ItLpNaca.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/ItLpNaca.h Fri Aug 17 13:03:16 2001 @@ -0,0 +1,90 @@ +/* + * ItLpNaca.h + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +//============================================================================= +// +// This control block contains the data that is shared between the +// hypervisor (PLIC) and the OS. +// +//============================================================================= + + +#ifndef _ITLPNACA_H +#define _ITLPNACA_H + +struct ItLpNaca +{ +//============================================================================= +// CACHE_LINE_1 0x0000 - 0x007F Contains read-only data +//============================================================================= + u32 xDesc; // Eye catcher x00-x03 + u16 xSize; // Size of this class x04-x05 + u16 xIntHdlrOffset; // Offset to IntHdlr array x06-x07 + u8 xMaxIntHdlrEntries; // Number of entries in array x08-x08 + u8 xPrimaryLpIndex; // LP Index of Primary x09-x09 + u8 xServiceLpIndex; // LP Ind of Service Focal Pointx0A-x0A + u8 xLpIndex; // LP Index x0B-x0B + u16 xMaxLpQueues; // Number of allocated queues x0C-x0D + u16 xLpQueueOffset; // Offset to start of LP queues x0E-x0F + u8 xPirEnvironMode:8; // Piranha or hardware x10-x10 + u8 xPirConsoleMode:8; // Piranha console indicator x11-x11 + u8 xPirDasdMode:8; // Piranha dasd indicator x12-x12 + u8 xRsvd1_0[5]; // Reserved for Piranha related x13-x17 + u8 xLparInstalled:1; // Is LPAR installed on system x18-x1F + u8 xSysPartitioned:1; // Is the system partitioned ... + u8 xHwSyncedTBs:1; // Hardware synced TBs ... + u8 xIntProcUtilHmt:1; // Utilize HMT for interrupts ... + u8 xRsvd1_1:4; // Reserved ... + u8 xSpVpdFormat:8; // VPD areas are in CSP format ... + u8 xIntProcRatio:8; // Ratio of int procs to procs ... + u8 xRsvd1_2[5]; // Reserved ... + u16 xRsvd1_3; // Reserved x20-x21 + u16 xPlicVrmIndex; // VRM index of PLIC x22-x23 + u16 xMinSupportedSlicVrmInd;// Min supported OS VRM index x24-x25 + u16 xMinCompatableSlicVrmInd;// Min compatable OS VRM index x26-x27 + u64 xLoadAreaAddr; // ER address of load area x28-x2F + u32 xLoadAreaChunks; // Chunks for the load area x30-x33 + u32 xPaseSysCallCRMask; // Mask used to test CR before x34-x37 + // doing an ASR switch on PASE + // system call. + u64 xSlicSegmentTablePtr; // Pointer to Slic seg table. x38-x3f + u8 xRsvd1_4[64]; // x40-x7F + +//============================================================================= +// CACHE_LINE_2 0x0080 - 0x00FF Contains local read-write data +//============================================================================= + u8 xRsvd2_0[128]; // Reserved x00-x7F + +//============================================================================= +// CACHE_LINE_3-6 0x0100 - 0x02FF Contains LP Queue indicators +// NB: Padding required to keep xInterrruptHdlr at x300 which is required +// for v4r4 PLIC. +//============================================================================= + u8 xOldLpQueue[128]; // LP Queue needed for v4r4 100-17F + u8 xRsvd3_0[384]; // Reserved 180-2FF +//============================================================================= +// CACHE_LINE_7-8 0x0300 - 0x03FF Contains the address of the OS interrupt +// handlers +//============================================================================= + u64 xInterruptHdlr[32]; // Interrupt handlers 300-x3FF +}; + +//============================================================================= + +#endif // _ITLPNACA_H diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/ItLpPaca.h linuxppc64_2_4/include/asm-ppc64/iSeries/ItLpPaca.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/ItLpPaca.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/ItLpPaca.h Fri Aug 17 13:03:16 2001 @@ -0,0 +1,134 @@ +/* + * ItLpPaca.h + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +//============================================================================= +// +// This control block contains the data that is shared between the +// hypervisor (PLIC) and the OS. +// +// +//---------------------------------------------------------------------------- +#ifndef _PPC_TYPES_H +#include +#endif + +#ifndef _ITLPPACA_H +#define _ITLPPACA_H + + +struct ItLpPaca +{ +//============================================================================= +// CACHE_LINE_1 0x0000 - 0x007F Contains read-only data +// NOTE: The xDynXyz fields are fields that will be dynamically changed by +// PLIC when preparing to bring a processor online or when dispatching a +// virtual processor! +//============================================================================= + u32 xDesc; // Eye catcher 0xD397D781 x00-x03 + u16 xSize; // Size of this struct x04-x05 + u16 xRsvd1_0; // Reserved x06-x07 + u16 xRsvd1_1:14; // Reserved x08-x09 + u8 xSharedProc:1; // Shared processor indicator ... + u8 xSecondaryThread:1; // Secondary thread indicator ... + volatile u8 xDynProcStatus:8; // Dynamic Status of this proc x0A-x0A + u8 xSecondaryThreadCnt; // Secondary thread count x0B-x0B + volatile u16 xDynHvPhysicalProcIndex;// Dynamic HV Physical Proc Index0C-x0D + volatile u16 xDynHvLogicalProcIndex;// Dynamic HV Logical Proc Indexx0E-x0F + u32 xDecrVal; // Value for Decr programming x10-x13 + u32 xPMCVal; // Value for PMC regs x14-x17 + volatile u32 xDynHwNodeId; // Dynamic Hardware Node id x18-x1B + volatile u32 xDynHwProcId; // Dynamic Hardware Proc Id x1C-x1F + volatile u32 xDynPIR; // Dynamic ProcIdReg value x20-x23 + u32 xDseiData; // DSEI data x24-x27 + u64 xSPRG3; // SPRG3 value x28-x2F + u8 xRsvd1_3[80]; // Reserved x30-x7F + +//============================================================================= +// CACHE_LINE_2 0x0080 - 0x00FF Contains local read-write data +//============================================================================= + // This Dword contains a byte for each type of interrupt that can occur. + // The IPI is a count while the others are just a binary 1 or 0. + union { + u64 xAnyInt; + struct { + u16 xRsvd; // Reserved - cleared by #mpasmbl + u8 xXirrInt; // Indicates xXirrValue is valid or Immed IO + u8 xIpiCnt; // IPI Count + u8 xDecrInt; // DECR interrupt occurred + u8 xPdcInt; // PDC interrupt occurred + u8 xQuantumInt; // Interrupt quantum reached + u8 xOldPlicDeferredExtInt; // Old PLIC has a deferred XIRR pending + } xFields; + } xIntDword; + + // Whenever any fields in this Dword are set then PLIC will defer the + // processing of external interrupts. Note that PLIC will store the + // XIRR directly into the xXirrValue field so that another XIRR will + // not be presented until this one clears. The layout of the low + // 4-bytes of this Dword is upto SLIC - PLIC just checks whether the + // entire Dword is zero or not. A non-zero value in the low order + // 2-bytes will result in SLIC being granted the highest thread + // priority upon return. A 0 will return to SLIC as medium priority. + u64 xPlicDeferIntsArea; // Entire Dword + + // Used to pass the real SRR0/1 from PLIC to SLIC as well as to + // pass the target SRR0/1 from SLIC to PLIC on a SetAsrAndRfid. + u64 xSavedSrr0; // Saved SRR0 x10-x17 + u64 xSavedSrr1; // Saved SRR1 x18-x1F + + // Used to pass parms from the OS to PLIC for SetAsrAndRfid + u64 xSavedGpr3; // Saved GPR3 x20-x27 + u64 xSavedGpr4; // Saved GPR4 x28-x2F + u64 xSavedGpr5; // Saved GPR5 x30-x37 + + u8 xRsvd2_1; // Reserved x38-x38 + u8 xCpuCtlsTaskAttributes; // Task attributes for cpuctls x39-x39 + u8 xFPRegsInUse; // FP regs in use x3A-x3A + u8 xPMCRegsInUse; // PMC regs in use x3B-x3B + volatile u32 xSavedDecr; // Saved Decr Value x3C-x3F + volatile u64 xEmulatedTimeBase; // Emulated TB for this thread x40-x47 + volatile u64 xCurPLICLatency; // Unaccounted PLIC latency x48-x4F + u64 xTotPLICLatency; // Accumulated PLIC latency x50-x57 + u64 xWaitStateCycles; // Wait cycles for this proc x58-x5F + u64 xEndOfQuantum; // TB at end of quantum x60-x67 + u64 xPDCSavedSPRG1; // Saved SPRG1 for PMC int x68-x6F + u64 xPDCSavedSRR0; // Saved SRR0 for PMC int x70-x77 + volatile u32 xVirtualDecr; // Virtual DECR for shared procsx78-x7B + u32 xRsvd2_2; // Reserved x7C-x7F + +//============================================================================= +// CACHE_LINE_3 0x0100 - 0x007F: This line is shared with other processors +//============================================================================= + // This is the xYieldCount. An "odd" value (low bit on) means that + // the processor is yielded (either because of an OS yield or a PLIC + // preempt). An even value implies that the processor is currently + // executing. + // NOTE: This value will ALWAYS be zero for dedicated processors and + // will NEVER be zero for shared processors (ie, initialized to a 1). + volatile u32 xYieldCount; // PLIC increments each dispatchx00-x03 + u8 xRsvd3_0[124]; // Reserved x04-x7F + +//============================================================================= +// CACHE_LINE_4-5 0x0100 - 0x01FF Contains PMC interrupt data +//============================================================================= + u8 xPmcSaveArea[256]; // PMC interrupt Area x00-xFF + + +}; +#endif // _ITLPPACA_H diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/ItLpQueue.h linuxppc64_2_4/include/asm-ppc64/iSeries/ItLpQueue.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/ItLpQueue.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/ItLpQueue.h Fri Sep 14 10:02:27 2001 @@ -0,0 +1,99 @@ +/* + * ItLpQueue.h + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +//============================================================================= +// +// This control block defines the simple LP queue structure that is +// shared between the hypervisor (PLIC) and the OS in order to send +// events to an LP. +// + +#ifndef _PPC_TYPES_H +#include +#endif +#include + + +struct HvLpEvent; + + +#ifndef _ITLPQUEUE_H +#define _ITLPQUEUE_H + +#define ITMaxLpQueues 8 + +#define NotUsed 0 // Queue will not be used by PLIC +#define DedicatedIo 1 // Queue dedicated to IO processor specified +#define DedicatedLp 2 // Queue dedicated to LP specified +#define Shared 3 // Queue shared for both IO and LP + +#define LpEventStackSize 4096 +#define LpEventMaxSize 256 +#define LpEventAlign 64 + +struct ItLpQueue +{ +// +// The xSlicCurEventPtr is the pointer to the next event stack entry that will +// become valid. The OS must peek at this entry to determine if it is valid. +// PLIC will set the valid indicator as the very last store into that entry. +// +// When the OS has completed processing of the event then it will mark the event +// as invalid so that PLIC knows it can store into that event location again. +// +// If the event stack fills and there are overflow events, then PLIC will set +// the xPlicOverflowIntPending flag in which case the OS will have to fetch the +// additional LP events once they have drained the event stack. +// +// The first 16-bytes are known by both the OS and PLIC. The remainder of the +// cache line is for use by the OS. +// +//============================================================================= + u8 xPlicOverflowIntPending; // 0x00 Overflow events are pending + u8 xPlicStatus; // 0x01 DedicatedIo or DedicatedLp or NotUsed + u16 xSlicLogicalProcIndex; // 0x02 Logical Proc Index for correlation + u8 xPlicRsvd[12]; // 0x04 + char* xSlicCurEventPtr; // 0x10 + char* xSlicLastValidEventPtr; // 0x18 + char* xSlicEventStackPtr; // 0x20 + u8 xIndex; // 0x28 unique sequential index. + u8 xSlicRsvd[3]; // 0x29-2b + u32 xInUseWord; // 0x2C + u64 xLpIntCount; // 0x30 Total Lp Int msgs processed + u64 xLpIntCountByType[9]; // 0x38-0x7F Event counts by type +}; + +extern struct ItLpQueue xItLpQueue; + +extern struct HvLpEvent * ItLpQueue_getNextLpEvent( struct ItLpQueue * ); +extern int ItLpQueue_isLpIntPending( struct ItLpQueue * ); +extern unsigned ItLpQueue_process( struct ItLpQueue *, struct pt_regs * ); +extern void ItLpQueue_clearValid( struct HvLpEvent * ); + +static __inline__ void process_iSeries_events( void ) +{ + __asm__ __volatile__ ( + " li 0,0x5555 \n\ + sc" + : : : "r0", "r3" ); +} + + +//============================================================================= +#endif // _ITLPQUEUE_H diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/ItLpRegSave.h linuxppc64_2_4/include/asm-ppc64/iSeries/ItLpRegSave.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/ItLpRegSave.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/ItLpRegSave.h Fri Aug 17 13:03:16 2001 @@ -0,0 +1,87 @@ +/* + * ItLpRegSave.h + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +//===================================================================================== +// +// This control block contains the data that is shared between PLIC +// and the OS +// +// + +#ifndef _ITLPREGSAVE_H +#define _ITLPREGSAVE_H + +struct ItLpRegSave +{ + u32 xDesc; // Eye catcher "LpRS" ebcdic 000-003 + u16 xSize; // Size of this class 004-005 + u8 xInUse; // Area is live 006-007 + u8 xRsvd1[9]; // Reserved 007-00F + + u8 xFixedRegSave[352]; // Fixed Register Save Area 010-16F + u32 xCTRL; // Control Register 170-173 + u32 xDEC; // Decrementer 174-177 + u32 xFPSCR; // FP Status and Control Reg 178-17B + u32 xPVR; // Processor Version Number 17C-17F + + u64 xMMCR0; // Monitor Mode Control Reg 0 180-187 + u32 xPMC1; // Perf Monitor Counter 1 188-18B + u32 xPMC2; // Perf Monitor Counter 2 18C-18F + u32 xPMC3; // Perf Monitor Counter 3 190-193 + u32 xPMC4; // Perf Monitor Counter 4 194-197 + u32 xPIR; // Processor ID Reg 198-19B + + u32 xMMCR1; // Monitor Mode Control Reg 1 19C-19F + u32 xMMCRA; // Monitor Mode Control Reg A 1A0-1A3 + u32 xPMC5; // Perf Monitor Counter 5 1A4-1A7 + u32 xPMC6; // Perf Monitor Counter 6 1A8-1AB + u32 xPMC7; // Perf Monitor Counter 7 1AC-1AF + u32 xPMC8; // Perf Monitor Counter 8 1B0-1B3 + u32 xTSC; // Thread Switch Control 1B4-1B7 + u32 xTST; // Thread Switch Timeout 1B8-1BB + u32 xRsvd; // Reserved 1BC-1BF + + u64 xACCR; // Address Compare Control Reg 1C0-1C7 + u64 xIMR; // Instruction Match Register 1C8-1CF + u64 xSDR1; // Storage Description Reg 1 1D0-1D7 + u64 xSPRG0; // Special Purpose Reg General0 1D8-1DF + u64 xSPRG1; // Special Purpose Reg General1 1E0-1E7 + u64 xSPRG2; // Special Purpose Reg General2 1E8-1EF + u64 xSPRG3; // Special Purpose Reg General3 1F0-1F7 + u64 xTB; // Time Base Register 1F8-1FF + + u64 xFPR[32]; // Floating Point Registers 200-2FF + + u64 xMSR; // Machine State Register 300-307 + u64 xNIA; // Next Instruction Address 308-30F + + u64 xDABR; // Data Address Breakpoint Reg 310-317 + u64 xIABR; // Inst Address Breakpoint Reg 318-31F + + u64 xHID0; // HW Implementation Dependent0 320-327 + + u64 xHID4; // HW Implementation Dependent4 328-32F + u64 xSCOMd; // SCON Data Reg (SPRG4) 330-337 + u64 xSCOMc; // SCON Command Reg (SPRG5) 338-33F + u64 xSDAR; // Sample Data Address Register 340-347 + u64 xSIAR; // Sample Inst Address Register 348-34F + + u8 xRsvd3[176]; // Reserved 350-3FF +}; +#endif // _ITLPREGSAVE_H diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/ItSpCommArea.h linuxppc64_2_4/include/asm-ppc64/iSeries/ItSpCommArea.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/ItSpCommArea.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/ItSpCommArea.h Fri Aug 17 13:03:16 2001 @@ -0,0 +1,39 @@ +/* + * ItSpCommArea.h + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +#ifndef _ITSPCOMMAREA_H +#define _ITSPCOMMAREA_H + + +struct SpCommArea +{ + u32 xDesc; // Descriptor (only in new formats) 000-003 + u8 xFormat; // Format (only in new formats) 004-004 + u8 xRsvd1[11]; // Reserved 005-00F + u64 xRawTbAtIplStart; // Raw HW TB value when IPL is started 010-017 + u64 xRawTodAtIplStart; // Raw HW TOD value when IPL is started 018-01F + u64 xBcdTimeAtIplStart; // BCD time when IPL is started 020-027 + u64 xBcdTimeAtOsStart; // BCD time when OS passed control 028-02F + u8 xRsvd2[80]; // Reserved 030-07F +}; + +extern struct SpCommArea xSpCommArea; + +#endif /* _ITSPCOMMAREA_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/ItVpdAreas.h linuxppc64_2_4/include/asm-ppc64/iSeries/ItVpdAreas.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/ItVpdAreas.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/ItVpdAreas.h Fri Aug 17 13:03:16 2001 @@ -0,0 +1,100 @@ +/* + * ItVpdAreas.h + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +//===================================================================================== +// +// This file defines the address and length of all of the VPD area passed to +// the OS from PLIC (most of which start from the SP). +// + +#ifndef _PPC_TYPES_H +#include +#endif + + +#ifndef _ITVPDAREAS_H +#define _ITVPDAREAS_H + +// VPD Entry index is carved in stone - cannot be changed (easily). +#define ItVpdCecVpd 0 +#define ItVpdDynamicSpace 1 +#define ItVpdExtVpd 2 +#define ItVpdExtVpdOnPanel 3 +#define ItVpdFirstPaca 4 +#define ItVpdIoVpd 5 +#define ItVpdIplParms 6 +#define ItVpdMsVpd 7 +#define ItVpdPanelVpd 8 +#define ItVpdLpNaca 9 +#define ItVpdBackplaneAndMaybeClockCardVpd 10 +#define ItVpdRecoveryLogBuffer 11 +#define ItVpdSpCommArea 12 +#define ItVpdSpLogBuffer 13 +#define ItVpdSpLogBufferSave 14 +#define ItVpdSpCardVpd 15 +#define ItVpdFirstProcVpd 16 +#define ItVpdApModelVpd 17 +#define ItVpdClockCardVpd 18 +#define ItVpdBusExtCardVpd 19 +#define ItVpdProcCapacityVpd 20 +#define ItVpdInteractiveCapacityVpd 21 +#define ItVpdFirstSlotLabel 22 +#define ItVpdFirstLpQueue 23 +#define ItVpdFirstL3CacheVpd 24 +#define ItVpdFirstProcFruVpd 25 + +#define ItVpdMaxEntries 26 + + +#define ItDmaMaxEntries 10 + +#define ItVpdAreasMaxSlotLabels 192 + + +struct SlicVpdAdrs { + u32 pad1; + void * vpdAddr; +}; + + +struct ItVpdAreas +{ + u32 xSlicDesc; // Descriptor 000-003 + u16 xSlicSize; // Size of this control block 004-005 + u16 xPlicAdjustVpdLens:1; // Flag to indicate new interface 006-007 + u16 xRsvd1:15; // Reserved bits ... + u16 xSlicVpdEntries; // Number of VPD entries 008-009 + u16 xSlicDmaEntries; // Number of DMA entries 00A-00B + u16 xSlicMaxLogicalProcs; // Maximum logical processors 00C-00D + u16 xSlicMaxPhysicalProcs; // Maximum physical processors 00E-00F + u16 xSlicDmaToksOffset; // Offset into this of array 010-011 + u16 xSlicVpdAdrsOffset; // Offset into this of array 012-013 + u16 xSlicDmaLensOffset; // Offset into this of array 014-015 + u16 xSlicVpdLensOffset; // Offset into this of array 016-017 + u16 xSlicMaxSlotLabels; // Maximum number of slot labels 018-019 + u16 xSlicMaxLpQueues; // Maximum number of LP Queues 01A-01B + u8 xRsvd2[4]; // Reserved 01C-01F + u64 xRsvd3[12]; // Reserved 020-07F + u32 xPlicDmaLens[ItDmaMaxEntries];// Array of DMA lengths 080-0A7 + u32 xPlicDmaToks[ItDmaMaxEntries];// Array of DMA tokens 0A8-0CF + u32 xSlicVpdLens[ItVpdMaxEntries];// Array of VPD lengths 0D0-12F + void * xSlicVpdAdrs[ItVpdMaxEntries];// Array of VPD buffers 130-1EF +}; + +#endif // _ITVPDAREAS_H diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/LparData.h linuxppc64_2_4/include/asm-ppc64/iSeries/LparData.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/LparData.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/LparData.h Fri Aug 17 13:03:16 2001 @@ -0,0 +1,57 @@ +/* + * LparData.h + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _PPC_TYPES_H +#include +#endif + +#ifndef _LPARDATA_H +#define _LPARDATA_H + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern struct LparMap xLparMap; +extern struct Naca xNaca; +extern struct Paca xPaca[]; +extern struct HvReleaseData hvReleaseData; +extern struct ItLpNaca itLpNaca; +extern struct ItIplParmsReal xItIplParmsReal; +extern struct IoHriProcessorVpd xIoHriProcessorVpd[]; +extern struct ItLpQueue xItLpQueue; +extern struct ItVpdAreas itVpdAreas; +extern u64 xMsVpd[]; +extern struct msChunks msChunks; + + +#endif /* _LPARDATA_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/LparMap.h linuxppc64_2_4/include/asm-ppc64/iSeries/LparMap.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/LparMap.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/LparMap.h Fri Aug 17 13:03:16 2001 @@ -0,0 +1,70 @@ +/* + * LparMap.h + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _PPC_TYPES_H +#include +#endif + +#ifndef _LPARMAP_H +#define _LPARMAP_H + +/* The iSeries hypervisor will set up mapping for one or more + * ESID/VSID pairs (in SLB/segment registers) and will set up + * mappings of one or more ranges of pages to VAs. + * We will have the hypervisor set up the ESID->VSID mapping + * for the four kernel segments (C-F). With shared processors, + * the hypervisor will clear all segment registers and reload + * these four whenever the processor is switched from one + * partition to another. + */ + +/* The Vsid and Esid identified below will be used by the hypervisor + * to set up a memory mapping for part of the load area before giving + * control to the Linux kernel. The load area is 64 MB, but this must + * not attempt to map the whole load area. The Hashed Page Table may + * need to be located within the load area (if the total partition size + * is 64 MB), but cannot be mapped. Typically, this should specify + * to map half (32 MB) of the load area. + * + * The hypervisor will set up page table entries for the number of + * pages specified. + * + * In 32-bit mode, the hypervisor will load all four of the + * segment registers (identified by the low-order four bits of the + * Esid field. In 64-bit mode, the hypervisor will load one SLB + * entry to map the Esid to the Vsid. +*/ + +// Hypervisor initially maps 32MB of the load area +#define HvPagesToMap 8192 + +struct LparMap +{ + u64 xNumberEsids; // Number of ESID/VSID pairs (1) + u64 xNumberRanges; // Number of VA ranges to map (1) + u64 xSegmentTableOffs; // Page number within load area of seg table (0) + u64 xRsvd[5]; // Reserved (0) + u64 xKernelEsid; // Esid used to map kernel load (0x0C00000000) + u64 xKernelVsid; // Vsid used to map kernel load (0x0C00000000) + u64 xPages; // Number of pages to be mapped (8192) + u64 xOffset; // Offset from start of load area (0) + u64 xVPN; // Virtual Page Number (0x000C000000000000) +}; + +#endif /* _LPARMAP_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/XmPciLpEvent.h linuxppc64_2_4/include/asm-ppc64/iSeries/XmPciLpEvent.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/XmPciLpEvent.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/XmPciLpEvent.h Fri Aug 17 13:05:18 2001 @@ -0,0 +1,18 @@ + +#ifndef __XMPCILPEVENT_H__ +#define __XMPCILPEVENT_H__ + + +#ifdef __cplusplus +extern "C" { +#endif + +int XmPciLpEvent_init(void); +void ppc_irq_dispatch_handler(struct pt_regs *regs, int irq); + + +#ifdef __cplusplus +} +#endif + +#endif /* __XMPCILPEVENT_H__ */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/iSeries_FlightRecorder.h linuxppc64_2_4/include/asm-ppc64/iSeries/iSeries_FlightRecorder.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/iSeries_FlightRecorder.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/iSeries_FlightRecorder.h Fri Aug 17 13:05:18 2001 @@ -0,0 +1,85 @@ +#ifndef _ISERIES_FLIGHTRECORDER_H +#define _ISERIES_FLIGHTRECORDER_H +/************************************************************************/ +/* File iSeries_FlightRecorder.h created by Allan Trautman Jan 22 2001. */ +/************************************************************************/ +/* This code supports the pci interface on the IBM iSeries systems. */ +/* Copyright (C) 20yy */ +/* */ +/* This program is free software; you can redistribute it and/or modify */ +/* it under the terms of the GNU General Public License as published by */ +/* the Free Software Foundation; either version 2 of the License, or */ +/* (at your option) any later version. */ +/* */ +/* This program is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ +/* GNU General Public License for more details. */ +/* */ +/* You should have received a copy of the GNU General Public License */ +/* along with this program; if not, write to the: */ +/* Free Software Foundation, Inc., */ +/* 59 Temple Place, Suite 330, */ +/* Boston, MA 02111-1307 USA */ +/************************************************************************/ +/* Change Activity: */ +/* Created, Jan 22, 2001 */ +/* Added Time stamp methods. Apr 12, 2001 */ +/* End Change Activity */ +/************************************************************************/ +/* This is a generic Flight Recorder, simply stuffs line entries into a */ +/* buffer for debug purposes. */ +/* */ +/* To use, */ +/* 1. Create one, make it global so it isn't on the stack. */ +/* FlightRecorder PciFlightRecorder; */ +/* */ +/* 2. Optionally create a pointer to it, just makes it easier to use. */ +/* FlightRecorder* PciFr = &PciFlightRecorder; */ +/* */ +/* 3. Initialize with you signature. */ +/* iSeries_Fr_Initialize(PciFr, "Pci Flight Recorder"); */ +/* */ +/* 4. Log entries. */ +/* PciFr->logEntry(PciFr,"In Main"); */ +/* */ +/* 5. Later, you can find the Flight Recorder by looking in the */ +/* System.map */ +/************************************************************************/ +struct iSeries_FlightRecorder; /* Forward declares */ +struct rtc_time; +void logEntry(struct iSeries_FlightRecorder*, char* Text); +void logTime( struct iSeries_FlightRecorder*, char* Text); +void logDate( struct iSeries_FlightRecorder*, char* Text); +#define FlightRecorderSize 4096 + +/************************************************************************/ +/* Generic Flight Recorder Structure */ +/************************************************************************/ +struct iSeries_FlightRecorder { /* Structure Defination */ + char Signature[16]; /* Eye Catcher */ + char* StartingPointer; /* Buffer Starting Address */ + char* CurrentPointer; /* Next Entry Address */ + int WrapCount; /* Number of Buffer Wraps */ + void (*logEntry)(struct iSeries_FlightRecorder*,char*); + void (*logTime) (struct iSeries_FlightRecorder*,char*); + void (*logDate) (struct iSeries_FlightRecorder*,char*); + char Buffer[FlightRecorderSize]; +}; + +typedef struct iSeries_FlightRecorder FlightRecorder; /* Short Name */ +extern void iSeries_Fr_Initialize(FlightRecorder*, char* Signature); +/************************************************************************/ +/* extern void iSeries_LogFr_Entry( FlightRecorder*, char* Text); */ +/* extern void iSeries_LogFr_Date( FlightRecorder*, char* Text); */ +/* extern void iSeries_LogFr_Time( FlightRecorder*, char* Text); */ +/************************************************************************/ +/* PCI Flight Recorder Helpers */ +/************************************************************************/ +extern FlightRecorder* PciFr; /* Ptr to Pci Fr */ +extern char* PciFrBuffer; /* Ptr to Fr Work Buffer */ +#define ISERIES_PCI_FR(buffer) PciFr->logEntry(PciFr,buffer); +#define ISERIES_PCI_FR_TIME(buffer) PciFr->logTime(PciFr,buffer); +#define ISERIES_PCI_FR_DATE(buffer) PciFr->logDate(PciFr,buffer); + +#endif /* _ISERIES_FLIGHTRECORDER_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/iSeries_VpdInfo.h linuxppc64_2_4/include/asm-ppc64/iSeries/iSeries_VpdInfo.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/iSeries_VpdInfo.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/iSeries_VpdInfo.h Fri May 25 14:27:42 2001 @@ -0,0 +1,56 @@ +#ifndef _ISERIES_VPDINFO_H +#define _ISERIES_VPDINFO_H +/************************************************************************/ +/* File iSeries_VpdInfo.h created by Allan Trautman Feb 08 2001. */ +/************************************************************************/ +/* This code supports the location data fon on the IBM iSeries systems. */ +/* Copyright (C) 20yy */ +/* */ +/* This program is free software; you can redistribute it and/or modify */ +/* it under the terms of the GNU General Public License as published by */ +/* the Free Software Foundation; either version 2 of the License, or */ +/* (at your option) any later version. */ +/* */ +/* This program is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ +/* GNU General Public License for more details. */ +/* */ +/* You should have received a copy of the GNU General Public License */ +/* along with this program; if not, write to the: */ +/* Free Software Foundation, Inc., */ +/* 59 Temple Place, Suite 330, */ +/* Boston, MA 02111-1307 USA */ +/************************************************************************/ +/* Change Activity: */ +/* Created, Feg 8, 2001 */ +/* Reformated for Card, March 8, 2001 */ +/* End Change Activity */ +/************************************************************************/ + +struct pci_dev; /* Forward Declare */ +/************************************************************************/ +/* Location Data extracted from the VPD list and device info. */ +/************************************************************************/ +struct LocationDataStruct { /* Location data structure for device */ + u16 Bus; /* iSeries Bus Number 0x00*/ + u16 Board; /* iSeries Board 0x02*/ + u8 FrameId; /* iSeries spcn Frame Id 0x04*/ + u8 PhbId; /* iSeries Phb Location 0x05*/ + u16 Card; /* iSeries Card Slot 0x06*/ + char CardLocation[4]; /* Char format of planar vpd 0x08*/ + u8 AgentId; /* iSeries AgentId 0x0C*/ + u8 SecondaryAgentId; /* iSeries Secondary Agent Id 0x0D*/ + u8 LinuxBus; /* Linux Bus Number 0x0E*/ + u8 LinuxDevFn; /* Linux Device Function 0x0F*/ +}; +typedef struct LocationDataStruct LocationData; +#define LOCATION_DATA_SIZE 16 + +/************************************************************************/ +/* Protypes */ +/************************************************************************/ +extern LocationData* iSeries_GetLocationData(struct pci_dev* PciDev); +extern int iSeries_Device_Information(struct pci_dev*,char*, int); + +#endif /* _ISERIES_VPDINFO_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/iSeries_dma.h linuxppc64_2_4/include/asm-ppc64/iSeries/iSeries_dma.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/iSeries_dma.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/iSeries_dma.h Fri May 4 17:12:35 2001 @@ -0,0 +1,97 @@ +/* + * iSeries_dma.h + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _ISERIES_DMA_H +#define _ISERIES_DMA_H + +#include +#ifndef __LINUX_SPINLOCK_H +#include +#endif + +// NUM_TCE_LEVELS defines the largest contiguous block +// of dma (tce) space we can get. NUM_TCE_LEVELS = 10 +// allows up to 2**9 pages (512 * 4096) = 2 MB +#define NUM_TCE_LEVELS 10 + +#define NO_TCE ((dma_addr_t)-1) + +// Tces come in two formats, one for the virtual bus and a different +// format for PCI +#define TCE_VB 0 +#define TCE_PCI 1 + + +union Tce { + u64 wholeTce; + struct { + u64 cacheBits :6; /* Cache hash bits - not used */ + u64 rsvd :6; + u64 rpn :40; /* Absolute page number */ + u64 valid :1; /* Tce is valid (vb only) */ + u64 allIo :1; /* Tce is valid for all lps (vb only) */ + u64 lpIndex :8; /* LpIndex for user of TCE (vb only) */ + u64 pciWrite :1; /* Write allowed (pci only) */ + u64 readWrite :1; /* Read allowed (pci), Write allowed + (vb) */ + } tceBits; +}; + +struct Bitmap { + unsigned long numBits; + unsigned long numBytes; + unsigned char * map; +}; + +struct MultiLevelBitmap { + unsigned long maxLevel; + struct Bitmap level[NUM_TCE_LEVELS]; +}; + +struct TceTable { + u64 busNumber; + u64 size; + u64 startOffset; + u64 index; + spinlock_t lock; + struct MultiLevelBitmap mlbm; +}; + +struct HvTceTableManagerCB { + u64 busNumber; /* Bus number for this tce table */ + u64 start; /* Will be NULL for secondary */ + u64 totalSize; /* Size (in pages) of whole table */ + u64 startOffset; /* Index into real tce table of the + start of our section */ + u64 size; /* Size (in pages) of our section */ + u64 index; /* Index of this tce table (token?) */ + u16 maxTceTableIndex; /* Max number of tables for partition */ + u8 virtualBusFlag; /* Flag to indicate virtual bus */ + u8 rsvd[5]; +}; + +extern struct TceTable virtBusTceTable; /* Tce table for virtual bus */ + +extern struct TceTable * build_tce_table( struct HvTceTableManagerCB *, + struct TceTable *); +extern void create_virtual_bus_tce_table( void ); + +extern void create_pci_bus_tce_table( unsigned busNumber ); + +#endif // _ISERIES_DMA_H diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/iSeries_fixup.h linuxppc64_2_4/include/asm-ppc64/iSeries/iSeries_fixup.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/iSeries_fixup.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/iSeries_fixup.h Fri Aug 17 13:05:18 2001 @@ -0,0 +1,25 @@ + +#ifndef __ISERIES_FIXUP_H__ +#define __ISERIES_FIXUP_H__ +#include + +#ifdef __cplusplus +extern "C" { +#endif + +void iSeries_fixup (void); +void iSeries_fixup_bus (struct pci_bus*); +unsigned int iSeries_scan_slot (struct pci_dev*, u16, u8, u8); + + +/* Need to store information related to the PHB bucc and make it accessible to the hose */ +struct iSeries_hose_arch_data { + u32 hvBusNumber; +}; + + +#ifdef __cplusplus +} +#endif + +#endif /* __ISERIES_FIXUP_H__ */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/iSeries_io.h linuxppc64_2_4/include/asm-ppc64/iSeries/iSeries_io.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/iSeries_io.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/iSeries_io.h Tue Oct 2 07:17:28 2001 @@ -0,0 +1,44 @@ +#ifdef CONFIG_PPC_ISERIES +#ifndef _ISERIES_IO_H +#define _ISERIES_IO_H +#include +/************************************************************************/ +/* File iSeries_io.h created by Allan Trautman on Thu Dec 28 2000. */ +/************************************************************************/ +/* Remaps the io.h for the iSeries Io */ +/* Copyright (C) 20yy Allan H Trautman, IBM Corporation */ +/* */ +/* This program is free software; you can redistribute it and/or modify */ +/* it under the terms of the GNU General Public License as published by */ +/* the Free Software Foundation; either version 2 of the License, or */ +/* (at your option) any later version. */ +/* */ +/* This program is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ +/* GNU General Public License for more details. */ +/* */ +/* You should have received a copy of the GNU General Public License */ +/* along with this program; if not, write to the: */ +/* Free Software Foundation, Inc., */ +/* 59 Temple Place, Suite 330, */ +/* Boston, MA 02111-1307 USA */ +/************************************************************************/ +/* Change Activity: */ +/* Created December 28, 2000 */ +/* End Change Activity */ +/************************************************************************/ +extern u8 iSeries_Read_Byte(void* IoAddress); +extern u16 iSeries_Read_Word(void* IoAddress); +extern u32 iSeries_Read_Long(void* IoAddress); +extern void iSeries_Write_Byte(u8 IoData,void* IoAddress); +extern void iSeries_Write_Word(u16 IoData,void* IoAddress); +extern void iSeries_Write_Long(u32 IoData,void* IoAddress); + +extern void* iSeries_memset_io(void *dest, char x, size_t n); +extern void* iSeries_memcpy_toio(void *dest, void *source, size_t n); +extern void* iSeries_memcpy_fromio(void *dest, void *source, size_t n); + +#endif /* _ISERIES_IO_H */ +#endif /* CONFIG_PPC_ISERIES */ + diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/iSeries_irq.h linuxppc64_2_4/include/asm-ppc64/iSeries/iSeries_irq.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/iSeries_irq.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/iSeries_irq.h Sat Aug 25 08:08:30 2001 @@ -0,0 +1,27 @@ + +#ifndef __ISERIES_IRQ_H__ +#define __ISERIES_IRQ_H__ + + +#ifdef __cplusplus +extern "C" { +#endif + +unsigned int iSeries_startup_IRQ(unsigned int); +void iSeries_shutdown_IRQ(unsigned int); +void iSeries_enable_IRQ(unsigned int); +void iSeries_disable_IRQ(unsigned int); +void iSeries_end_IRQ(unsigned int); +void iSeries_init_IRQ(void); +void iSeries_init_irqMap(int); +int iSeries_allocate_IRQ(HvBusNumber, HvSubBusNumber, HvAgentId); +int iSeries_assign_IRQ(int, HvBusNumber, HvSubBusNumber, HvAgentId); +void iSeries_activate_IRQs(void); + +int XmPciLpEvent_init(void); + +#ifdef __cplusplus +} +#endif + +#endif /* __ISERIES_IRQ_H__ */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/iSeries_pci.h linuxppc64_2_4/include/asm-ppc64/iSeries/iSeries_pci.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/iSeries_pci.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/iSeries_pci.h Fri Nov 30 16:53:36 2001 @@ -0,0 +1,126 @@ +#ifndef _ISERIES_64_PCI_H +#define _ISERIES_64_PCI_H +/************************************************************************/ +/* File iSeries_pci.h created by Allan Trautman on Tue Feb 20, 2001. */ +/************************************************************************/ +/* Define some useful macros for the iSeries pci routines. */ +/* Copyright (C) 20yy Allan H Trautman, IBM Corporation */ +/* */ +/* This program is free software; you can redistribute it and/or modify */ +/* it under the terms of the GNU General Public License as published by */ +/* the Free Software Foundation; either version 2 of the License, or */ +/* (at your option) any later version. */ +/* */ +/* This program is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ +/* GNU General Public License for more details. */ +/* */ +/* You should have received a copy of the GNU General Public License */ +/* along with this program; if not, write to the: */ +/* Free Software Foundation, Inc., */ +/* 59 Temple Place, Suite 330, */ +/* Boston, MA 02111-1307 USA */ +/************************************************************************/ +/* Change Activity: */ +/* Created Feb 20, 2001 */ +/* Added device reset, March 22, 2001 */ +/* Ported to ppc64, May 25, 2001 */ +/* End Change Activity */ +/************************************************************************/ +#include +#include + +struct pci_dev; /* For Forward Reference */ +struct iSeries_Device_Node; +/************************************************************************/ +/* Gets iSeries Bus, SubBus, of DevFn using pci_dev* structure */ +/************************************************************************/ +#define ISERIES_BUS(DevPtr) DevPtr->DsaAddr.busNumber +#define ISERIES_SUBBUS(DevPtr) DevPtr->DsaAddr.subBusNumber +#define ISERIES_DEVICE(DevPtr) DevPtr->DsaAddr.deviceId +#define ISERIES_DEVFUN(DevPtr) DevPtr->DevFn +#define ISERIES_DSA(DevPtr) (*(u64*)&DevPtr->DsaAddr) + +#define EADsMaxAgents 7 +/************************************************************************************/ +/* Decodes Linux DevFn to iSeries DevFn, bridge device, or function. */ +/* For Linux, see PCI_SLOT and PCI_FUNC in include/linux/pci.h */ +/************************************************************************************/ +#define ISERIES_DECODE_DEVFN(linuxdevfn) (((linuxdevfn & 0x71) << 1) | (linuxdevfn & 0x07)) +#define ISERIES_DECODE_DEVICE(linuxdevfn) (((linuxdevfn & 0x38) >> 3) |(((linuxdevfn & 0x40) >> 2) + 0x10)) +#define ISERIES_DECODE_FUNCTION(linuxdevfn) (linuxdevfn & 0x07) +#define ISERIES_PCI_AGENTID(idsel,func) ((idsel & 0x0F) << 4) | (func & 0x07) + +#define ISERIES_GET_DEVICE_FROM_SUBBUS(subbus) ((subbus >> 5) & 0x7) +#define ISERIES_GET_FUNCTION_FROM_SUBBUS(subbus) ((subbus >> 2) & 0x7) + +#define ISERIES_ENCODE_DEVICE(agentid) ((0x10) | ((agentid&0x20)>>2) | (agentid&07)) +/************************************************************************************/ +/* Converts Virtual Address to Real Address for Hypervisor calls */ +/************************************************************************************/ +#define REALADDR(virtaddr) (0x8000000000000000 | (virt_to_absolute((u64)virtaddr) )) + +/************************************************************************************/ +/* Define TRUE and FALSE Values for Al */ +/************************************************************************************/ +#ifndef TRUE +#define TRUE 1 +#endif +#ifndef FALSE +#define FALSE 0 +#endif + +/************************************************************************/ +/* iSeries Device Information */ +/************************************************************************/ +struct iSeries_Device_Node { + struct list_head Device_List; /* Must be first for cast to wo*/ + struct pci_dev* PciDev; /* Pointer to pci_dev structure*/ + struct HvCallPci_DsaAddr DsaAddr;/* Direct Select Address */ + /* busNumber,subBusNumber, */ + /* deviceId, barNumber */ + HvAgentId AgentId; /* Hypervisor DevFn */ + int DevFn; /* Linux devfn */ + int BarOffset; + int Irq; /* Assigned IRQ */ + int ReturnCode; /* Return Code Holder */ + int IoRetry; /* Current Retry Count */ + int Flags; /* Possible flags(disable/bist)*/ + u16 Vendor; /* Vendor ID */ + struct TceTable* DevTceTable; /* Device TCE Table */ + u8 PhbId; /* Phb Card is on. */ + u16 Board; /* Board Number */ + u8 FrameId; /* iSeries spcn Frame Id */ + char CardLocation[4];/* Char format of planar vpd */ + char Location[20]; /* Frame 1, Card C10 */ +}; +/************************************************************************/ +/* Location Data extracted from the VPD list and device info. */ +/************************************************************************/ +struct LocationDataStruct { /* Location data structure for device */ + u16 Bus; /* iSeries Bus Number 0x00*/ + u16 Board; /* iSeries Board 0x02*/ + u8 FrameId; /* iSeries spcn Frame Id 0x04*/ + u8 PhbId; /* iSeries Phb Location 0x05*/ + u8 AgentId; /* iSeries AgentId 0x06*/ + u8 Card; + char CardLocation[4]; +}; +typedef struct LocationDataStruct LocationData; +#define LOCATION_DATA_SIZE 48 +/************************************************************************/ +/* Flight Recorder tracing */ +/************************************************************************/ +extern int iSeries_Set_PciTraceFlag(int TraceFlag); +extern int iSeries_Get_PciTraceFlag(void); + +/************************************************************************/ +/* Functions */ +/************************************************************************/ +extern LocationData* iSeries_GetLocationData(struct pci_dev* PciDev); +extern int iSeries_Device_Information(struct pci_dev*,char*, int); +extern void iSeries_Get_Location_Code(struct iSeries_Device_Node*); +extern int iSeries_Device_ToggleReset(struct pci_dev* PciDev, int AssertTime, int DelayTime); + +#endif /* _ISERIES_64_PCI_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/iSeries_proc.h linuxppc64_2_4/include/asm-ppc64/iSeries/iSeries_proc.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/iSeries_proc.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/iSeries_proc.h Fri May 4 17:12:35 2001 @@ -0,0 +1,37 @@ +/* + * iSeries_proc.h + * Copyright (C) 2001 Kyle A. Lucke IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +/* Change Activity: */ +/* End Change Activity */ + +#ifndef _ISERIES_PROC_H +#define _ISERIES_PROC_H + +#include + +extern void iSeries_proc_early_init(void); +extern void iSeries_proc_create(void); + +typedef void (*iSeriesProcFunction)(struct proc_dir_entry *iSeries_proc); + +extern void iSeries_proc_callback(iSeriesProcFunction initFunction); + +#endif /* _iSeries_PROC_H */ + diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/mf.h linuxppc64_2_4/include/asm-ppc64/iSeries/mf.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/mf.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/mf.h Fri Aug 17 13:03:16 2001 @@ -0,0 +1,83 @@ +/* + * mf.h + * Copyright (C) 2001 Troy D. Armstrong IBM Corporation + * + * This modules exists as an interface between a Linux secondary partition + * running on an iSeries and the primary partition's Virtual Service + * Processor (VSP) object. The VSP has final authority over powering on/off + * all partitions in the iSeries. It also provides miscellaneous low-level + * machine facility type operations. + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef MF_H_INCLUDED +#define MF_H_INCLUDED + +#include +#include + +struct rtc_time; + +typedef void (*MFCompleteHandler)( void * clientToken, int returnCode ); + +extern void mf_allocateLpEvents( HvLpIndex targetLp, + HvLpEvent_Type type, + unsigned size, + unsigned amount, + MFCompleteHandler hdlr, + void * userToken ); + +extern void mf_deallocateLpEvents( HvLpIndex targetLp, + HvLpEvent_Type type, + unsigned count, + MFCompleteHandler hdlr, + void * userToken ); + +extern void mf_powerOff( void ); + +extern void mf_reboot( void ); + +extern void mf_displaySrc( u32 word ); +extern void mf_displayProgress( u16 value ); + +extern void mf_clearSrc( void ); + +extern void mf_init( void ); + +extern void mf_setSide(char side); + +extern char mf_getSide(void); + +extern void mf_setCmdLine(const char *cmdline, int size, u64 side); + +extern int mf_getCmdLine(char *cmdline, int *size, u64 side); + +extern void mf_getSrcHistory(char *buffer, int size); + +extern int mf_setVmlinuxChunk(const char *buffer, int size, int offset, u64 side); + +extern int mf_getVmlinuxChunk(char *buffer, int *size, int offset, u64 side); + +extern int mf_setRtcTime(unsigned long time); + +extern int mf_getRtcTime(unsigned long *time); + +extern int mf_getRtc( struct rtc_time * tm ); + +extern int mf_setRtc( struct rtc_time * tm ); + +#endif /* MF_H_INCLUDED */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/mf_proc.h linuxppc64_2_4/include/asm-ppc64/iSeries/mf_proc.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/mf_proc.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/mf_proc.h Fri May 4 17:12:35 2001 @@ -0,0 +1,33 @@ +/* + * mf_proc.h + * Copyright (C) 2001 Kyle A. Lucke IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +/* Change Activity: */ +/* End Change Activity */ + +#ifndef _MF_PROC_H +#define _MF_PROC_H + +#include + +void mf_proc_init(struct proc_dir_entry *iSeries_proc); + + +#endif /* _MF_PROC_H */ + diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/iSeries/veth-proc.h linuxppc64_2_4/include/asm-ppc64/iSeries/veth-proc.h --- ../kernel.org/linux/include/asm-ppc64/iSeries/veth-proc.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/iSeries/veth-proc.h Fri Aug 17 13:03:16 2001 @@ -0,0 +1,32 @@ +/* + * veth-proc.h + * Copyright (C) 2001 Kyle A. Lucke IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +/* Change Activity: */ +/* End Change Activity */ + +#ifndef _VETH_PROC_H +#define _VETH_PROC_H + +#include + +void veth_proc_init(struct proc_dir_entry *iSeries_proc); + +#endif /* _VETH-PROC_H */ + diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/ide.h linuxppc64_2_4/include/asm-ppc64/ide.h --- ../kernel.org/linux/include/asm-ppc64/ide.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/ide.h Mon Oct 29 20:36:27 2001 @@ -0,0 +1,198 @@ +/* + * linux/include/asm-ppc/ide.h + * + * Copyright (C) 1994-1996 Linus Torvalds & authors + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* + * This file contains the ppc architecture specific IDE code. + */ + +#ifndef __ASMPPC64_IDE_H +#define __ASMPPC64_IDE_H + +#include +#include + +#ifndef MAX_HWIFS +#define MAX_HWIFS 4 +#endif + +#include + +#ifdef __KERNEL__ + +#include +#include +#include +#include + +extern int pmac_ide_ports_known; +extern ide_ioreg_t pmac_ide_regbase[MAX_HWIFS]; +extern int pmac_ide_irq[MAX_HWIFS]; +extern void pmac_ide_probe(void); + +extern int chrp_ide_ports_known; +extern ide_ioreg_t chrp_ide_regbase[MAX_HWIFS]; +extern ide_ioreg_t chrp_idedma_regbase; /* one for both channels */ +extern unsigned int chrp_ide_irq; +extern void chrp_ide_probe(void); + +struct ide_machdep_calls { + void (*insw)(ide_ioreg_t port, void *buf, int ns); + void (*outsw)(ide_ioreg_t port, void *buf, int ns); + int (*default_irq)(ide_ioreg_t base); + ide_ioreg_t (*default_io_base)(int index); + int (*ide_check_region)(ide_ioreg_t from, unsigned int extent); + void (*ide_request_region)(ide_ioreg_t from, + unsigned int extent, + const char *name); + void (*ide_release_region)(ide_ioreg_t from, + unsigned int extent); + void (*fix_driveid)(struct hd_driveid *id); + void (*ide_init_hwif)(hw_regs_t *hw, + ide_ioreg_t data_port, + ide_ioreg_t ctrl_port, + int *irq); + + int io_base; +}; + +extern struct ide_machdep_calls ppc_ide_md; + +void ide_insw(ide_ioreg_t port, void *buf, int ns); +void ide_outsw(ide_ioreg_t port, void *buf, int ns); + +#if 0 +#undef insw +#define insw(port, buf, ns) do { \ + ppc_ide_md.insw((port), (buf), (ns)); \ +} while (0) + +#undef outsw +#define outsw(port, buf, ns) do { \ + ppc_ide_md.outsw((port), (buf), (ns)); \ +} while (0) +#endif + +#undef SUPPORT_SLOW_DATA_PORTS +#define SUPPORT_SLOW_DATA_PORTS 0 +#undef SUPPORT_VLB_SYNC +#define SUPPORT_VLB_SYNC 0 + +#define ide__sti() __sti() + +static __inline__ int ide_default_irq(ide_ioreg_t base) +{ + if ( ppc_ide_md.default_irq ) + return ppc_ide_md.default_irq(base); + else + return -1; +} + +static __inline__ ide_ioreg_t ide_default_io_base(int index) +{ + if ( ppc_ide_md.default_io_base ) + return ppc_ide_md.default_io_base(index); + else + return -1; +} + +static __inline__ void ide_init_hwif_ports(hw_regs_t *hw, + ide_ioreg_t data_port, + ide_ioreg_t ctrl_port, int *irq) +{ + if (ppc_ide_md.ide_init_hwif != NULL) + ppc_ide_md.ide_init_hwif(hw, data_port, ctrl_port, irq); +} + +static __inline__ void ide_init_default_hwifs(void) +{ +#ifndef CONFIG_BLK_DEV_IDEPCI + hw_regs_t hw; + int index; + ide_ioreg_t base; + + for (index = 0; index < MAX_HWIFS; index++) { + base = ide_default_io_base(index); + if (base == 0) + continue; + ide_init_hwif_ports(&hw, base, 0, NULL); + hw.irq = ide_default_irq(base); + ide_register_hw(&hw, NULL); + } +#endif /* CONFIG_BLK_DEV_IDEPCI */ +} + +static __inline__ int ide_check_region (ide_ioreg_t from, unsigned int extent) +{ + if ( ppc_ide_md.ide_check_region ) + return ppc_ide_md.ide_check_region(from, extent); + else + return -1; +} + +static __inline__ void ide_request_region (ide_ioreg_t from, unsigned int extent, const char *name) +{ + if ( ppc_ide_md.ide_request_region ) + ppc_ide_md.ide_request_region(from, extent, name); +} + +static __inline__ void ide_release_region (ide_ioreg_t from, unsigned int extent) +{ + if ( ppc_ide_md.ide_release_region ) + ppc_ide_md.ide_release_region(from, extent); +} + +static __inline__ void ide_fix_driveid (struct hd_driveid *id) +{ + if ( ppc_ide_md.fix_driveid ) + ppc_ide_md.fix_driveid(id); +} + +#if 0 /* inb/outb from io.h is OK now -- paulus */ +#undef inb +#define inb(port) in_8((unsigned char *)((port) + ppc_ide_md.io_base)) +#undef inb_p +#define inb_p(port) inb(port) + +#undef outb +#define outb(val, port) \ + out_8((unsigned char *)((port) + ppc_ide_md.io_base), (val) ) +#undef outb_p +#define outb_p(val, port) outb(val, port) +#endif + +typedef union { + unsigned all : 8; /* all of the bits together */ + struct { + unsigned bit7 : 1; /* always 1 */ + unsigned lba : 1; /* using LBA instead of CHS */ + unsigned bit5 : 1; /* always 1 */ + unsigned unit : 1; /* drive select number, 0/1 */ + unsigned head : 4; /* always zeros here */ + } b; +} select_t; + +#define ide_request_irq(irq,hand,flg,dev,id) request_irq((irq),(hand),(flg),(dev),(id)) +#define ide_free_irq(irq,dev_id) free_irq((irq), (dev_id)) + +/* + * The following are not needed for the non-m68k ports + */ +#ifdef CONFIG_APUS +#define ide_ack_intr(hwif) (hwif->hw.ack_intr ? hwif->hw.ack_intr(hwif) : 1) +#else +#define ide_ack_intr(hwif) (1) +#endif +#define ide_release_lock(lock) do {} while (0) +#define ide_get_lock(lock, hdlr, data) do {} while (0) + +#endif /* __KERNEL__ */ + +#endif /* __ASMPPC64_IDE_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/init.h linuxppc64_2_4/include/asm-ppc64/init.h --- ../kernel.org/linux/include/asm-ppc64/init.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/init.h Tue Aug 21 14:50:07 2001 @@ -0,0 +1,29 @@ +#ifndef _PPC64_INIT_H +#define _PPC64_INIT_H + +#include + +#if __GNUC__ > 2 || __GNUC_MINOR__ >= 90 /* egcs */ +/* DRENG add back in when we get section attribute support */ +#define __chrp __attribute__ ((__section__ (".text.chrp"))) +#define __chrpdata __attribute__ ((__section__ (".data.chrp"))) +#define __chrpfunc(__argchrp) \ + __argchrp __chrp; \ + __argchrp + +/* this is actually just common chrp/pmac code, not OF code -- Cort */ +#define __openfirmware __attribute__ ((__section__ (".text.openfirmware"))) +#define __openfirmwaredata __attribute__ ((__section__ (".data.openfirmware"))) +#define __openfirmwarefunc(__argopenfirmware) \ + __argopenfirmware __openfirmware; \ + __argopenfirmware + +#else /* not egcs */ + +#define __openfirmware +#define __openfirmwaredata +#define __openfirmwarefunc(x) x + +#endif /* egcs */ + +#endif /* _PPC64_INIT_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/io.h linuxppc64_2_4/include/asm-ppc64/io.h --- ../kernel.org/linux/include/asm-ppc64/io.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/io.h Mon Nov 19 21:43:25 2001 @@ -0,0 +1,325 @@ +#ifndef _PPC64_IO_H +#define _PPC64_IO_H + +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#ifdef CONFIG_PPC_ISERIES +#include +#endif +#include + +#define SIO_CONFIG_RA 0x398 +#define SIO_CONFIG_RD 0x399 + +#define SLOW_DOWN_IO +/* Define this if you want to see virt_to_* messages */ +#undef __IO_DEBUG + +extern unsigned long isa_io_base; +extern unsigned long isa_mem_base; +extern unsigned long pci_io_base; +extern unsigned long pci_dram_offset; +extern int have_print; +#define _IO_BASE isa_io_base +#define _ISA_MEM_BASE isa_mem_base +#define PCI_DRAM_OFFSET pci_dram_offset + +#ifdef CONFIG_PPC_ISERIES +#define readb(addr) iSeries_Read_Byte((void*)(addr)) +#define readw(addr) iSeries_Read_Word((void*)(addr)) +#define readl(addr) iSeries_Read_Long((void*)(addr)) +#define writeb(data, addr) iSeries_Write_Byte(data,((void*)(addr))) +#define writew(data, addr) iSeries_Write_Word(data,((void*)(addr))) +#define writel(data, addr) iSeries_Write_Long(data,((void*)(addr))) +#define memset_io(a,b,c) iSeries_memset((void *)(a),(b),(c)) +#define memcpy_fromio(a,b,c) iSeries_memcpy_fromio((void *)(a), (void *)(b), (c)) +#define memcpy_toio(a,b,c) iSeries_memcpy_toio((void *)(a), (void *)(b), (c)) +#define inb(addr) readb(((unsigned long)(addr))) +#define inw(addr) readw(((unsigned long)(addr))) +#define inl(addr) readl(((unsigned long)(addr))) +#define outb(data,addr) writeb(data,((unsigned long)(addr))) +#define outw(data,addr) writew(data,((unsigned long)(addr))) +#define outl(data,addr) writel(data,((unsigned long)(addr))) +#else +#define IS_MAPPED_VADDR(port) ((unsigned long)(port) >> 60UL) +#ifdef CONFIG_PPC_EEH +#define readb(addr) eeh_readb((void*)(addr)) +#define readw(addr) eeh_readw((void*)(addr)) +#define readl(addr) eeh_readl((void*)(addr)) +#define writeb(data, addr) eeh_writeb((data), ((void*)(addr))) +#define writew(data, addr) eeh_writew((data), ((void*)(addr))) +#define writel(data, addr) eeh_writel((data), ((void*)(addr))) +#define memset_io(a,b,c) eeh_memset((void *)(a),(b),(c)) +#define memcpy_fromio(a,b,c) eeh_memcpy_fromio((a),(void *)(b),(c)) +#define memcpy_toio(a,b,c) eeh_memcpy_toio((void *)(a),(b),(c)) +#else +#define readb(addr) in_8((volatile u8 *)(addr)) +#define writeb(b,addr) out_8((volatile u8 *)(addr), (b)) +#define readw(addr) in_le16((volatile u16 *)(addr)) +#define readl(addr) in_le32((volatile u32 *)(addr)) +#define writew(b,addr) out_le16((volatile u16 *)(addr),(b)) +#define writel(b,addr) out_le32((volatile u32 *)(addr),(b)) +#define memset_io(a,b,c) memset((void *)(a),(b),(c)) +#define memcpy_fromio(a,b,c) memcpy((a),(void *)(b),(c)) +#define memcpy_toio(a,b,c) memcpy((void *)(a),(b),(c)) +#endif +#define inb(port) _inb((unsigned long)port) +#define outb(val, port) _outb(val, (unsigned long)port) +#define inw(port) _inw((unsigned long)port) +#define outw(val, port) _outw(val, (unsigned long)port) +#define inl(port) _inl((unsigned long)port) +#define outl(val, port) _outl(val, (unsigned long)port) +#endif + +/* Pause versions are not special */ +#define inb_p(port) inb(port) +#define outb_p(val, port) outb((val), (port)) +#define inw_p(port) inw(port) +#define outw_p(val, port) outw((val), (port)) +#define inl_p(port) inl(port) +#define outl_p(val, port) outl((val, (port)) + +/* + * The insw/outsw/insl/outsl macros don't do byte-swapping. + * They are only used in practice for transferring buffers which + * are arrays of bytes, and byte-swapping is not appropriate in + * that case. - paulus + */ +#define insb(port, buf, ns) _insb((u8 *)((port)+_IO_BASE), (buf), (ns)) +#define outsb(port, buf, ns) _outsb((u8 *)((port)+_IO_BASE), (buf), (ns)) +#define insw(port, buf, ns) _insw_ns((u16 *)((port)+_IO_BASE), (buf), (ns)) +#define outsw(port, buf, ns) _outsw_ns((u16 *)((port)+_IO_BASE), (buf), (ns)) +#define insl(port, buf, nl) _insl_ns((u32 *)((port)+_IO_BASE), (buf), (nl)) +#define outsl(port, buf, nl) _outsl_ns((u32 *)((port)+_IO_BASE), (buf), (nl)) + + +extern void _insb(volatile u8 *port, void *buf, int ns); +extern void _outsb(volatile u8 *port, const void *buf, int ns); +extern void _insw(volatile u16 *port, void *buf, int ns); +extern void _outsw(volatile u16 *port, const void *buf, int ns); +extern void _insl(volatile u32 *port, void *buf, int nl); +extern void _outsl(volatile u32 *port, const void *buf, int nl); +extern void _insw_ns(volatile u16 *port, void *buf, int ns); +extern void _outsw_ns(volatile u16 *port, const void *buf, int ns); +extern void _insl_ns(volatile u32 *port, void *buf, int nl); +extern void _outsl_ns(volatile u32 *port, const void *buf, int nl); + +/* + * The *_ns versions below don't do byte-swapping. + * Neither do the standard versions now, these are just here + * for older code. + */ +#define insw_ns(port, buf, ns) _insw_ns((u16 *)((port)+_IO_BASE), (buf), (ns)) +#define outsw_ns(port, buf, ns) _outsw_ns((u16 *)((port)+_IO_BASE), (buf), (ns)) +#define insl_ns(port, buf, nl) _insl_ns((u32 *)((port)+_IO_BASE), (buf), (nl)) +#define outsl_ns(port, buf, nl) _outsl_ns((u32 *)((port)+_IO_BASE), (buf), (nl)) + + +#define IO_SPACE_LIMIT ~(0UL) +#define MEM_SPACE_LIMIT ~(0UL) + + +#ifdef __KERNEL__ +/* + * Map in an area of physical address space, for accessing + * I/O devices etc. + */ +extern void *__ioremap(unsigned long address, unsigned long size, + unsigned long flags); +extern void *ioremap(unsigned long address, unsigned long size); +#define ioremap_nocache(addr, size) ioremap((addr), (size)) +extern void iounmap(void *addr); + +/* + * Change virtual addresses to physical addresses and vv, for + * addresses in the area where the kernel has the RAM mapped. + */ +extern inline unsigned long virt_to_phys(volatile void * address) +{ +#ifdef __IO_DEBUG + printk("virt_to_phys: 0x%08lx -> 0x%08lx\n", + (unsigned long) address, + __pa((unsigned long)address)); +#endif + return __pa((unsigned long)address); +} + +extern inline void * phys_to_virt(unsigned long address) +{ +#ifdef __IO_DEBUG + printk("phys_to_virt: 0x%08lx -> 0x%08lx\n", address, __va(address)); +#endif + return (void *) __va(address); +} + +#endif /* __KERNEL__ */ + +extern inline void iosync(void) +{ + __asm__ __volatile__ ("sync" : : : "memory"); +} + +/* Enforce in-order execution of data I/O. + * No distinction between read/write on PPC; use eieio for all three. + */ +#define iobarrier_rw() eieio() +#define iobarrier_r() eieio() +#define iobarrier_w() eieio() + +/* + * 8, 16 and 32 bit, big and little endian I/O operations, with barrier. + */ +extern inline int in_8(volatile unsigned char *addr) +{ + int ret; + + __asm__ __volatile__("eieio; lbz%U1%X1 %0,%1" : "=r" (ret) : "m" (*addr)); + return ret; +} + +extern inline void out_8(volatile unsigned char *addr, int val) +{ + __asm__ __volatile__("stb%U0%X0 %1,%0" : "=m" (*addr) : "r" (val)); +} + +extern inline int in_le16(volatile unsigned short *addr) +{ + int ret; + + __asm__ __volatile__("eieio; lhbrx %0,0,%1" : "=r" (ret) : + "r" (addr), "m" (*addr)); + return ret; +} + +extern inline int in_be16(volatile unsigned short *addr) +{ + int ret; + + __asm__ __volatile__("eieio; lhz%U1%X1 %0,%1" : "=r" (ret) : "m" (*addr)); + return ret; +} + +extern inline void out_le16(volatile unsigned short *addr, int val) +{ + __asm__ __volatile__("sthbrx %1,0,%2" : "=m" (*addr) : + "r" (val), "r" (addr)); +} + +extern inline void out_be16(volatile unsigned short *addr, int val) +{ + __asm__ __volatile__("sth%U0%X0 %1,%0" : "=m" (*addr) : "r" (val)); +} + +extern inline unsigned in_le32(volatile unsigned *addr) +{ + unsigned ret; + + __asm__ __volatile__("eieio; lwbrx %0,0,%1" : "=r" (ret) : + "r" (addr), "m" (*addr)); + return ret; +} + +extern inline unsigned in_be32(volatile unsigned *addr) +{ + unsigned ret; + + __asm__ __volatile__("eieio; lwz%U1%X1 %0,%1" : "=r" (ret) : "m" (*addr)); + return ret; +} + +extern inline void out_le32(volatile unsigned *addr, int val) +{ + __asm__ __volatile__("stwbrx %1,0,%2" : "=m" (*addr) : + "r" (val), "r" (addr)); +} + +extern inline void out_be32(volatile unsigned *addr, int val) +{ + __asm__ __volatile__("stw%U0%X0 %1,%0" : "=m" (*addr) : "r" (val)); +} + +#ifdef CONFIG_PPC_EEH +#include +#endif + +#ifndef CONFIG_PPC_ISERIES +static inline u8 _inb(unsigned long port) { + if (IS_MAPPED_VADDR(port)) + return readb((void *)port); + else if (_IO_BASE) + return in_8((u8 *)((port)+_IO_BASE)); + else + return 0xff; +} +static inline void _outb(u8 val, unsigned long port) { + if (IS_MAPPED_VADDR(port)) + return writeb(val, (void *)port); + else if (_IO_BASE) + out_8((u8 *)((port)+_IO_BASE), val); +} +static inline u16 _inw(unsigned long port) { + if (IS_MAPPED_VADDR(port)) + return readw((void *)port); + else if (_IO_BASE) + return in_le16((u16 *)((port)+_IO_BASE)); + else + return 0xffff; +} +static inline void _outw(u16 val, unsigned long port) { + if (IS_MAPPED_VADDR(port)) + return writew(val, (void *)port); + else if (_IO_BASE) + out_le16((u16 *)((port)+_IO_BASE), val); +} +static inline u32 _inl(unsigned long port) { + if (IS_MAPPED_VADDR(port)) + return readl((void *)port); + else if (_IO_BASE) + return in_le32((u32 *)((port)+_IO_BASE)); + else + return 0xffffffff; +} +static inline void _outl(u32 val, unsigned long port) { + if (IS_MAPPED_VADDR(port)) + return writel(val, (void *)port); + else if (_IO_BASE) + out_le32((u32 *)((port)+_IO_BASE), val); +} +#endif + +#ifdef __KERNEL__ +static inline int check_signature(unsigned long io_addr, + const unsigned char *signature, int length) +{ + int retval = 0; +#ifndef CONFIG_PPC_ISERIES + do { + if (readb(io_addr) != *signature) + goto out; + io_addr++; + signature++; + length--; + } while (length); + retval = 1; +out: +#endif + return retval; +} + +/* Nothing to do */ + +#define dma_cache_inv(_start,_size) do { } while (0) +#define dma_cache_wback(_start,_size) do { } while (0) +#define dma_cache_wback_inv(_start,_size) do { } while (0) + +#endif /* __KERNEL__ */ + +#endif /* _PPC64_IO_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/ioctl.h linuxppc64_2_4/include/asm-ppc64/ioctl.h --- ../kernel.org/linux/include/asm-ppc64/ioctl.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/ioctl.h Mon May 7 13:26:08 2001 @@ -0,0 +1,64 @@ +#ifndef _PPC64_IOCTL_H +#define _PPC64_IOCTL_H + + +/* + * This was copied from the alpha as it's a bit cleaner there. + * -- Cort + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define _IOC_NRBITS 8 +#define _IOC_TYPEBITS 8 +#define _IOC_SIZEBITS 13 +#define _IOC_DIRBITS 3 + +#define _IOC_NRMASK ((1 << _IOC_NRBITS)-1) +#define _IOC_TYPEMASK ((1 << _IOC_TYPEBITS)-1) +#define _IOC_SIZEMASK ((1 << _IOC_SIZEBITS)-1) +#define _IOC_DIRMASK ((1 << _IOC_DIRBITS)-1) + +#define _IOC_NRSHIFT 0 +#define _IOC_TYPESHIFT (_IOC_NRSHIFT+_IOC_NRBITS) +#define _IOC_SIZESHIFT (_IOC_TYPESHIFT+_IOC_TYPEBITS) +#define _IOC_DIRSHIFT (_IOC_SIZESHIFT+_IOC_SIZEBITS) + +/* + * Direction bits _IOC_NONE could be 0, but OSF/1 gives it a bit. + * And this turns out useful to catch old ioctl numbers in header + * files for us. + */ +#define _IOC_NONE 1U +#define _IOC_READ 2U +#define _IOC_WRITE 4U + +#define _IOC(dir,type,nr,size) \ + (((dir) << _IOC_DIRSHIFT) | \ + ((type) << _IOC_TYPESHIFT) | \ + ((nr) << _IOC_NRSHIFT) | \ + ((size) << _IOC_SIZESHIFT)) + +/* used to create numbers */ +#define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0) +#define _IOR(type,nr,size) _IOC(_IOC_READ,(type),(nr),sizeof(size)) +#define _IOW(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),sizeof(size)) +#define _IOWR(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size)) + +/* used to decode them.. */ +#define _IOC_DIR(nr) (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK) +#define _IOC_TYPE(nr) (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK) +#define _IOC_NR(nr) (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK) +#define _IOC_SIZE(nr) (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK) + +/* various drivers, such as the pcmcia stuff, need these... */ +#define IOC_IN (_IOC_WRITE << _IOC_DIRSHIFT) +#define IOC_OUT (_IOC_READ << _IOC_DIRSHIFT) +#define IOC_INOUT ((_IOC_WRITE|_IOC_READ) << _IOC_DIRSHIFT) +#define IOCSIZE_MASK (_IOC_SIZEMASK << _IOC_SIZESHIFT) +#define IOCSIZE_SHIFT (_IOC_SIZESHIFT) + +#endif /* _PPC64_IOCTL_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/ioctls.h linuxppc64_2_4/include/asm-ppc64/ioctls.h --- ../kernel.org/linux/include/asm-ppc64/ioctls.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/ioctls.h Fri Aug 10 13:25:19 2001 @@ -0,0 +1,115 @@ +#ifndef _ASM_PPC64_IOCTLS_H +#define _ASM_PPC64_IOCTLS_H + +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include + +#define FIOCLEX _IO('f', 1) +#define FIONCLEX _IO('f', 2) +#define FIOASYNC _IOW('f', 125, int) +#define FIONBIO _IOW('f', 126, int) +#define FIONREAD _IOR('f', 127, int) +#define TIOCINQ FIONREAD +#define FIOQSIZE _IOR('f', 128, loff_t) + +#define TIOCGETP _IOR('t', 8, struct sgttyb) +#define TIOCSETP _IOW('t', 9, struct sgttyb) +#define TIOCSETN _IOW('t', 10, struct sgttyb) /* TIOCSETP wo flush */ + +#define TIOCSETC _IOW('t', 17, struct tchars) +#define TIOCGETC _IOR('t', 18, struct tchars) +#define TCGETS _IOR('t', 19, struct termios) +#define TCSETS _IOW('t', 20, struct termios) +#define TCSETSW _IOW('t', 21, struct termios) +#define TCSETSF _IOW('t', 22, struct termios) + +#define TCGETA _IOR('t', 23, struct termio) +#define TCSETA _IOW('t', 24, struct termio) +#define TCSETAW _IOW('t', 25, struct termio) +#define TCSETAF _IOW('t', 28, struct termio) + +#define TCSBRK _IO('t', 29) +#define TCXONC _IO('t', 30) +#define TCFLSH _IO('t', 31) + +#define TIOCSWINSZ _IOW('t', 103, struct winsize) +#define TIOCGWINSZ _IOR('t', 104, struct winsize) +#define TIOCSTART _IO('t', 110) /* start output, like ^Q */ +#define TIOCSTOP _IO('t', 111) /* stop output, like ^S */ +#define TIOCOUTQ _IOR('t', 115, int) /* output queue size */ + +#define TIOCGLTC _IOR('t', 116, struct ltchars) +#define TIOCSLTC _IOW('t', 117, struct ltchars) +#define TIOCSPGRP _IOW('t', 118, int) +#define TIOCGPGRP _IOR('t', 119, int) + +#define TIOCEXCL 0x540C +#define TIOCNXCL 0x540D +#define TIOCSCTTY 0x540E + +#define TIOCSTI 0x5412 +#define TIOCMGET 0x5415 +#define TIOCMBIS 0x5416 +#define TIOCMBIC 0x5417 +#define TIOCMSET 0x5418 +# define TIOCM_LE 0x001 +# define TIOCM_DTR 0x002 +# define TIOCM_RTS 0x004 +# define TIOCM_ST 0x008 +# define TIOCM_SR 0x010 +# define TIOCM_CTS 0x020 +# define TIOCM_CAR 0x040 +# define TIOCM_RNG 0x080 +# define TIOCM_DSR 0x100 +# define TIOCM_CD TIOCM_CAR +# define TIOCM_RI TIOCM_RNG + +#define TIOCGSOFTCAR 0x5419 +#define TIOCSSOFTCAR 0x541A +#define TIOCLINUX 0x541C +#define TIOCCONS 0x541D +#define TIOCGSERIAL 0x541E +#define TIOCSSERIAL 0x541F +#define TIOCPKT 0x5420 +# define TIOCPKT_DATA 0 +# define TIOCPKT_FLUSHREAD 1 +# define TIOCPKT_FLUSHWRITE 2 +# define TIOCPKT_STOP 4 +# define TIOCPKT_START 8 +# define TIOCPKT_NOSTOP 16 +# define TIOCPKT_DOSTOP 32 + + +#define TIOCNOTTY 0x5422 +#define TIOCSETD 0x5423 +#define TIOCGETD 0x5424 +#define TCSBRKP 0x5425 /* Needed for POSIX tcsendbreak() */ +#define TIOCTTYGSTRUCT 0x5426 /* For debugging only */ +#define TIOCSBRK 0x5427 /* BSD compatibility */ +#define TIOCCBRK 0x5428 /* BSD compatibility */ +#define TIOCGSID 0x5429 /* Return the session ID of FD */ +#define TIOCGPTN _IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */ +#define TIOCSPTLCK _IOW('T',0x31, int) /* Lock/unlock Pty */ + +#define TIOCSERCONFIG 0x5453 +#define TIOCSERGWILD 0x5454 +#define TIOCSERSWILD 0x5455 +#define TIOCGLCKTRMIOS 0x5456 +#define TIOCSLCKTRMIOS 0x5457 +#define TIOCSERGSTRUCT 0x5458 /* For debugging only */ +#define TIOCSERGETLSR 0x5459 /* Get line status register */ + /* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ +# define TIOCSER_TEMT 0x01 /* Transmitter physically empty */ +#define TIOCSERGETMULTI 0x545A /* Get multiport config */ +#define TIOCSERSETMULTI 0x545B /* Set multiport config */ + +#define TIOCMIWAIT 0x545C /* wait for a change on serial input line(s) */ +#define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */ + +#endif /* _ASM_PPC64_IOCTLS_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/ipc.h linuxppc64_2_4/include/asm-ppc64/ipc.h --- ../kernel.org/linux/include/asm-ppc64/ipc.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/ipc.h Mon Sep 24 12:30:35 2001 @@ -0,0 +1,33 @@ +#ifndef __PPC64_IPC_H__ +#define __PPC64_IPC_H__ + +/* + * These are used to wrap system calls on PowerPC. + * + * See arch/ppc/kernel/syscalls.c for ugly details.. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +struct ipc_kludge { + u32 msgp; + s32 msgtyp; +}; + +#define SEMOP 1 +#define SEMGET 2 +#define SEMCTL 3 +#define MSGSND 11 +#define MSGRCV 12 +#define MSGGET 13 +#define MSGCTL 14 +#define SHMAT 21 +#define SHMDT 22 +#define SHMGET 23 +#define SHMCTL 24 + +#define IPCCALL(version,op) ((version)<<16 | (op)) + +#endif /* __PPC64_IPC_H__ */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/ipcbuf.h linuxppc64_2_4/include/asm-ppc64/ipcbuf.h --- ../kernel.org/linux/include/asm-ppc64/ipcbuf.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/ipcbuf.h Mon Nov 19 21:26:53 2001 @@ -0,0 +1,28 @@ +#ifndef __PPC64_IPCBUF_H__ +#define __PPC64_IPCBUF_H__ + +/* + * The ipc64_perm structure for the PPC is identical to kern_ipc_perm + * as we have always had 32-bit UIDs and GIDs in the kernel. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +struct ipc64_perm +{ + __kernel_key_t key; + __kernel_uid_t uid; + __kernel_gid_t gid; + __kernel_uid_t cuid; + __kernel_gid_t cgid; + __kernel_mode_t mode; + unsigned int seq; + unsigned int __pad1; + unsigned long __unused1; + unsigned long __unused2; +}; + +#endif /* __PPC64_IPCBUF_H__ */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/irq.h linuxppc64_2_4/include/asm-ppc64/irq.h --- ../kernel.org/linux/include/asm-ppc64/irq.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/irq.h Wed Oct 24 10:47:30 2001 @@ -0,0 +1,55 @@ +#ifdef __KERNEL__ +#ifndef _ASM_IRQ_H +#define _ASM_IRQ_H + +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include + +extern void disable_irq(unsigned int); +extern void disable_irq_nosync(unsigned int); +extern void enable_irq(unsigned int); + +/* + * this is the maximum number of virtual irqs we will use. + */ +#define NR_IRQS 512 + +#define NUM_8259_INTERRUPTS 16 + +/* Interrupt numbers are virtual in case they are sparsely + * distributed by the hardware. + */ +#define NR_HW_IRQS 8192 +extern unsigned short real_irq_to_virt_map[NR_HW_IRQS]; +extern unsigned short virt_irq_to_real_map[NR_IRQS]; +/* Create a mapping for a real_irq if it doesn't already exist. + * Return the virtual irq as a convenience. + */ +unsigned long virt_irq_create_mapping(unsigned long real_irq); + +/* These funcs map irqs between real and virtual */ +static inline unsigned long real_irq_to_virt(unsigned long real_irq) { + return real_irq_to_virt_map[real_irq]; +} +static inline unsigned long virt_irq_to_real(unsigned long virt_irq) { + return virt_irq_to_real_map[virt_irq]; +} + +/* + * This gets called from serial.c, which is now used on + * powermacs as well as prep/chrp boxes. + * Prep and chrp both have cascaded 8259 PICs. + */ +static __inline__ int irq_cannonicalize(int irq) +{ + return irq; +} + +#endif /* _ASM_IRQ_H */ +#endif /* __KERNEL__ */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/kdb.h linuxppc64_2_4/include/asm-ppc64/kdb.h --- ../kernel.org/linux/include/asm-ppc64/kdb.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/kdb.h Mon Jun 4 10:26:12 2001 @@ -0,0 +1,61 @@ +/* + * Minimalist Kernel Debugger + * + * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) Scott Lurndal (slurn@engr.sgi.com) + * Copyright (C) Scott Foehner (sfoehner@engr.sgi.com) + * Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com) + * + * See the file LIA-COPYRIGHT for additional information. + * + * Written March 1999 by Scott Lurndal at Silicon Graphics, Inc. + * + * Modifications from: + * Richard Bass 1999/07/20 + * Many bug fixes and enhancements. + * Scott Foehner + * Port to ia64 + * Scott Lurndal 1999/12/12 + * v1.0 restructuring. + */ +#if !defined(_ASM_KDB_H) +#define _ASM_KDB_H + /* + * KDB_ENTER() is a macro which causes entry into the kernel + * debugger from any point in the kernel code stream. If it + * is intended to be used from interrupt level, it must use + * a non-maskable entry method. + */ +#define KDB_ENTER() asm("\ttrap\n") + + /* + * Define the exception frame for this architeture + */ +struct pt_regs; +typedef struct pt_regs *kdb_eframe_t; + + /* + * Needed for exported symbols. + */ +typedef unsigned long kdb_machreg_t; + +#define kdb_machreg_fmt "0x%016lx" +#define kdb_machreg_fmt0 "0x%016lx" +#define kdb_bfd_vma_fmt "0x%016lx" +#define kdb_bfd_vma_fmt0 "0x%016lx" +#define kdb_elfw_addr_fmt "0x%016lx" +#define kdb_elfw_addr_fmt0 "0x%016lx" + + /* + * Per cpu arch specific kdb state. Must be in range 0xff000000. + */ +#define KDB_STATE_A_IF 0x01000000 /* Saved IF flag */ + + /* + * Interface from kernel trap handling code to kernel debugger. + */ +extern int kdba_callback_die(struct pt_regs *, int, long, void*); +extern int kdba_callback_bp(struct pt_regs *, int, long, void*); +extern int kdba_callback_debug(struct pt_regs *, int, long, void *); + +#endif /* ASM_KDB_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/kdbprivate.h linuxppc64_2_4/include/asm-ppc64/kdbprivate.h --- ../kernel.org/linux/include/asm-ppc64/kdbprivate.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/kdbprivate.h Mon Jun 4 10:26:12 2001 @@ -0,0 +1,118 @@ +/* + * Minimalist Kernel Debugger + * + * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) Scott Lurndal (slurn@engr.sgi.com) + * Copyright (C) Scott Foehner (sfoehner@engr.sgi.com) + * Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com) + * + * See the file LIA-COPYRIGHT for additional information. + * + * Written March 1999 by Scott Lurndal at Silicon Graphics, Inc. + * + * Modifications from: + * Richard Bass 1999/07/20 + * Many bug fixes and enhancements. + * Scott Foehner + * Port to ia64 + * Scott Lurndal 1999/12/12 + * v1.0 restructuring. + * Keith Owens 2000/05/23 + * KDB v1.2 + */ +#if !defined(_ASM_KDBPRIVATE_H) +#define _ASM_KDBPRIVATE_H + +typedef unsigned char kdb_machinst_t; + + /* + * KDB_MAXBPT describes the total number of breakpoints + * supported by this architecure. + */ +#define KDB_MAXBPT 16 + /* + * KDB_MAXHARDBPT describes the total number of hardware + * breakpoint registers that exist. + */ +#define KDB_MAXHARDBPT 4 + /* + * Provide space for KDB_MAX_COMMANDS commands. + */ +#define KDB_MAX_COMMANDS 125 + + /* + * Platform specific environment entries + */ +#define KDB_PLATFORM_ENV "IDMODE=PPC", "BYTESPERWORD=8", "IDCOUNT=16" + + /* + * Define the direction that the stack grows + */ +#define KDB_STACK_DIRECTION -1 /* Stack grows down */ + + /* + * Support for ia32 debug registers + */ +typedef struct _kdbhard_bp { + kdb_machreg_t bph_reg; /* Register this breakpoint uses */ + + unsigned int bph_free:1; /* Register available for use */ + unsigned int bph_data:1; /* Data Access breakpoint */ + + unsigned int bph_write:1; /* Write Data breakpoint */ + unsigned int bph_mode:2; /* 0=inst, 1=write, 2=io, 3=read */ + unsigned int bph_length:2; /* 0=1, 1=2, 2=BAD, 3=4 (bytes) */ +} kdbhard_bp_t; + +extern kdbhard_bp_t kdb_hardbreaks[/* KDB_MAXHARDBPT */]; + +#define KDB_HAVE_LONGJMP 1 +#ifdef KDB_HAVE_LONGJMP +typedef struct __kdb_jmp_buf { + unsigned int regs[100]; +} kdb_jmp_buf; +extern int kdb_setjmp(kdb_jmp_buf *); +extern void kdb_longjmp(kdb_jmp_buf *, int); +extern kdb_jmp_buf kdbjmpbuf[]; +#endif /* KDB_HAVE_LONGJMP */ + + +/* + A traceback table typically follows each function. + The find_tb_table() func will fill in this struct. Note that the struct + is not an exact match with the encoded table defined by the ABI. It is + defined here more for programming convenience. + */ +typedef struct { + unsigned long flags; /* flags: */ +#define KDBTBTAB_FLAGSGLOBALLINK (1L<<47) +#define KDBTBTAB_FLAGSISEPROL (1L<<46) +#define KDBTBTAB_FLAGSHASTBOFF (1L<<45) +#define KDBTBTAB_FLAGSINTPROC (1L<<44) +#define KDBTBTAB_FLAGSHASCTL (1L<<43) +#define KDBTBTAB_FLAGSTOCLESS (1L<<42) +#define KDBTBTAB_FLAGSFPPRESENT (1L<<41) +#define KDBTBTAB_FLAGSNAMEPRESENT (1L<<38) +#define KDBTBTAB_FLAGSUSESALLOCA (1L<<37) +#define KDBTBTAB_FLAGSSAVESCR (1L<<33) +#define KDBTBTAB_FLAGSSAVESLR (1L<<32) +#define KDBTBTAB_FLAGSSTORESBC (1L<<31) +#define KDBTBTAB_FLAGSFIXUP (1L<<30) +#define KDBTBTAB_FLAGSPARMSONSTK (1L<<0) + unsigned char fp_saved; /* num fp regs saved f(32-n)..f31 */ + unsigned char gpr_saved; /* num gpr's saved */ + unsigned char fixedparms; /* num fixed point parms */ + unsigned char floatparms; /* num float parms */ + unsigned char parminfo[32]; /* types of args. null terminated */ +#define KDBTBTAB_PARMFIXED 1 +#define KDBTBTAB_PARMSFLOAT 2 +#define KDBTBTAB_PARMDFLOAT 3 + unsigned int tb_offset; /* offset from start of func */ + unsigned long funcstart; /* addr of start of function */ + char name[64]; /* name of function (null terminated)*/ + kdb_symtab_t symtab; /* fake symtab entry */ +} kdbtbtable_t; +int kdba_find_tb_table(kdb_machreg_t eip, kdbtbtable_t *tab); + + +#endif /* !_ASM_KDBPRIVATE_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/keyboard.h linuxppc64_2_4/include/asm-ppc64/keyboard.h --- ../kernel.org/linux/include/asm-ppc64/keyboard.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/keyboard.h Wed Oct 24 13:55:10 2001 @@ -0,0 +1,115 @@ +/* + * linux/include/asm-ppc/keyboard.h + * + * Created 3 Nov 1996 by Geert Uytterhoeven + * Modified for Power Macintosh by Paul Mackerras + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* + * This file contains the ppc architecture specific keyboard definitions - + * like the intel pc for prep systems, different for power macs. + */ + +#ifndef __ASMPPC64_KEYBOARD_H +#define __ASMPPC64_KEYBOARD_H + +#ifdef __KERNEL__ + +#include +#include + +#include +#include +#include +#include + +#define KEYBOARD_IRQ 1 +#define DISABLE_KBD_DURING_INTERRUPTS 0 +#define INIT_KBD + +static inline int kbd_setkeycode(unsigned int scancode, unsigned int keycode) +{ + if ( ppc_md.kbd_setkeycode ) + return ppc_md.kbd_setkeycode(scancode, keycode); + else + return 0; +} + +static inline int kbd_getkeycode(unsigned int scancode) +{ + if ( ppc_md.kbd_getkeycode ) + return ppc_md.kbd_getkeycode(scancode); + else + return 0; +} + +static inline int kbd_translate(unsigned char keycode, unsigned char *keycodep, + char raw_mode) +{ + if ( ppc_md.kbd_translate ) + return ppc_md.kbd_translate(keycode, keycodep, raw_mode); + else + return 0; +} + +static inline int kbd_unexpected_up(unsigned char keycode) +{ + if ( ppc_md.kbd_unexpected_up ) + return ppc_md.kbd_unexpected_up(keycode); + else + return 0; +} + +static inline void kbd_leds(unsigned char leds) +{ + if ( ppc_md.kbd_leds ) + ppc_md.kbd_leds(leds); +} + +static inline void kbd_init_hw(void) +{ + if ( ppc_md.kbd_init_hw ) + ppc_md.kbd_init_hw(); +} + +#define kbd_sysrq_xlate (ppc_md.ppc_kbd_sysrq_xlate) + +extern unsigned long SYSRQ_KEY; +#define E1_PAUSE 119 /* PAUSE key */ + +/* resource allocation */ +#define kbd_request_region() +#define kbd_request_irq(handler) request_irq(KEYBOARD_IRQ, handler, 0, \ + "keyboard", NULL) + +/* How to access the keyboard macros on this platform. */ +#define kbd_read_input() inb(KBD_DATA_REG) +static inline int kbd_read_status(void) { + int ret = inb(0x64); + return (ret == 0xff) ? 0 : ret; /* translate float to bad status. */ +} +#define kbd_write_output(val) outb(val, KBD_DATA_REG) +#define kbd_write_command(val) outb(val, KBD_CNTL_REG) + +/* Some stoneage hardware needs delays after some operations. */ +#define kbd_pause() do { } while(0) + +/* + * Machine specific bits for the PS/2 driver + */ + +#define AUX_IRQ 12 + +#define aux_request_irq(hand, dev_id) \ + request_irq(AUX_IRQ, hand, SA_SHIRQ, "PS/2 Mouse", dev_id) + +#define aux_free_irq(dev_id) free_irq(AUX_IRQ, dev_id) + +#endif /* __KERNEL__ */ + +#endif /* __ASMPPC64_KEYBOARD_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/kgdb.h linuxppc64_2_4/include/asm-ppc64/kgdb.h --- ../kernel.org/linux/include/asm-ppc64/kgdb.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/kgdb.h Tue Jun 19 08:38:06 2001 @@ -0,0 +1,53 @@ +/* + * kgdb.h: Defines and declarations for serial line source level + * remote debugging of the Linux kernel using gdb. + * + * PPC Mods (C) 1998 Michael Tesch (tesch@cs.wisc.edu) + * + * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _PPC64_KGDB_H +#define _PPC64_KGDB_H + +#ifndef __ASSEMBLY__ +/* To initialize the serial, first thing called */ +extern void zs_kgdb_hook(int tty_num); +/* To init the kgdb engine. (called by serial hook)*/ +extern void set_debug_traps(void); + +/* To enter the debugger explicitly. */ +extern void breakpoint(void); + +/* For taking exceptions + * these are defined in traps.c + */ +extern void (*debugger)(struct pt_regs *regs); +extern int (*debugger_bpt)(struct pt_regs *regs); +extern int (*debugger_sstep)(struct pt_regs *regs); +extern int (*debugger_iabr_match)(struct pt_regs *regs); +extern int (*debugger_dabr_match)(struct pt_regs *regs); +extern void (*debugger_fault_handler)(struct pt_regs *regs); + +/* What we bring to the party */ +int kgdb_bpt(struct pt_regs *regs); +int kgdb_sstep(struct pt_regs *regs); +void kgdb(struct pt_regs *regs); +int kgdb_iabr_match(struct pt_regs *regs); +int kgdb_dabr_match(struct pt_regs *regs); +static void kgdb_fault_handler(struct pt_regs *regs); +static void handle_exception (struct pt_regs *regs); + +/* + * external low-level support routines (ie macserial.c) + */ +extern void kgdb_interruptible(int); /* control interrupts from serial */ +extern void putDebugChar(char); /* write a single character */ +extern char getDebugChar(void); /* read and return a single char */ + +#endif /* !(__ASSEMBLY__) */ +#endif /* !(_PPC64_KGDB_H) */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/linux_logo.h linuxppc64_2_4/include/asm-ppc64/linux_logo.h --- ../kernel.org/linux/include/asm-ppc64/linux_logo.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/linux_logo.h Mon May 7 13:26:08 2001 @@ -0,0 +1,46 @@ +/* + * include/asm-ppc/linux_logo.h: A linux logo to be displayed on boot + * (pinched from the sparc port). + * + * Copyright (C) 1996 Larry Ewing (lewing@isc.tamu.edu) + * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * + * You can put anything here, but: + * LINUX_LOGO_COLORS has to be less than 224 + * values have to start from 0x20 + * (i.e. linux_logo_{red,green,blue}[0] is color 0x20) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include + +#define linux_logo_banner "Linux/PPC-64 version " UTS_RELEASE + +#define LINUX_LOGO_HEIGHT 80 +#define LINUX_LOGO_WIDTH 80 +#define LINUX_LOGO_COLORS 214 + +#ifdef INCLUDE_LINUX_LOGO_DATA + +#define INCLUDE_LINUX_LOGOBW +#define INCLUDE_LINUX_LOGO16 +#include + +#else + +/* prototypes only */ +extern unsigned char linux_logo_red[]; +extern unsigned char linux_logo_green[]; +extern unsigned char linux_logo_blue[]; +extern unsigned char linux_logo[]; +extern unsigned char linux_logo_bw[]; +extern unsigned char linux_logo16_red[]; +extern unsigned char linux_logo16_green[]; +extern unsigned char linux_logo16_blue[]; +extern unsigned char linux_logo16[]; + +#endif diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/lmb.h linuxppc64_2_4/include/asm-ppc64/lmb.h --- ../kernel.org/linux/include/asm-ppc64/lmb.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/lmb.h Thu Sep 20 14:25:05 2001 @@ -0,0 +1,106 @@ +#ifndef _PPC64_LMB_H +#define _PPC64_LMB_H + +/* + * Definitions for talking to the Open Firmware PROM on + * Power Macintosh computers. + * + * Copyright (C) 2001 Peter Bergner, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include + +extern unsigned long reloc_offset(void); + +#define MAX_LMB_REGIONS 32 + +union lmb_reg_property { + struct reg_property32 addr32[MAX_LMB_REGIONS]; + struct reg_property64 addr64[MAX_LMB_REGIONS]; +}; + +#define LMB_MEMORY_AREA 1 +#define LMB_IO_AREA 2 + +struct lmb_property { + unsigned long base; + unsigned long physbase; + unsigned long size; + unsigned long type; +}; + +struct lmb_region { + unsigned long cnt; + unsigned long size; + unsigned long lcd_size; /* Least Common Denominator */ + struct lmb_property region[MAX_LMB_REGIONS+1]; +}; + +struct lmb { + unsigned long debug; + struct lmb_region memory; + struct lmb_region reserved; +}; + +extern struct lmb lmb; + +extern void lmb_init(void); +extern void lmb_analyze(void); +extern long lmb_add(unsigned long, unsigned long); +extern long lmb_add_io(unsigned long base, unsigned long size); +extern long lmb_reserve(unsigned long, unsigned long); +extern unsigned long lmb_alloc(unsigned long, unsigned long); +extern unsigned long lmb_phys_mem_size(void); +extern unsigned long lmb_end_of_DRAM(void); +extern unsigned long lmb_abs_to_phys(unsigned long); +extern void lmb_dump(char *); + +static inline unsigned long +lmb_addrs_overlap(unsigned long base1, unsigned long size1, + unsigned long base2, unsigned long size2) +{ + return ((base1 < (base2+size2)) && (base2 < (base1+size1))); +} + +static inline long +lmb_regions_overlap(struct lmb_region *rgn, unsigned long r1, unsigned long r2) +{ + unsigned long base1 = rgn->region[r1].base; + unsigned long size1 = rgn->region[r1].size; + unsigned long base2 = rgn->region[r2].base; + unsigned long size2 = rgn->region[r2].size; + + return lmb_addrs_overlap(base1,size1,base2,size2); +} + +static inline long +lmb_addrs_adjacent(unsigned long base1, unsigned long size1, + unsigned long base2, unsigned long size2) +{ + if ( base2 == base1 + size1 ) { + return 1; + } else if ( base1 == base2 + size2 ) { + return -1; + } + return 0; +} + +static inline long +lmb_regions_adjacent(struct lmb_region *rgn, unsigned long r1, unsigned long r2) +{ + unsigned long base1 = rgn->region[r1].base; + unsigned long size1 = rgn->region[r1].size; + unsigned long type1 = rgn->region[r1].type; + unsigned long base2 = rgn->region[r2].base; + unsigned long size2 = rgn->region[r2].size; + unsigned long type2 = rgn->region[r2].type; + + return (type1 == type2) && lmb_addrs_adjacent(base1,size1,base2,size2); +} + +#endif /* _PPC64_LMB_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/machdep.h linuxppc64_2_4/include/asm-ppc64/machdep.h --- ../kernel.org/linux/include/asm-ppc64/machdep.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/machdep.h Wed Oct 17 07:33:28 2001 @@ -0,0 +1,163 @@ +#ifdef __KERNEL__ +#ifndef _PPC_MACHDEP_H +#define _PPC_MACHDEP_H + +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include + +struct pt_regs; +struct pci_bus; +struct pci_dev; +struct kbd_repeat; +struct device_node; +struct TceTable; +struct rtc_time; + +struct machdep_calls { + /* High use functions in the first cachelines, low use functions + * follow. DRENG collect profile data. + */ + void (*hpte_invalidate)(unsigned long slot); + + void (*hpte_updatepp)(long slot, + unsigned long newpp, + unsigned long va); + void (*hpte_updateboltedpp)(unsigned long newpp, + unsigned long ea); + unsigned long (*hpte_getword0)(unsigned long slot); + + long (*hpte_find)( unsigned long vpn ); + + long (*hpte_selectslot)(unsigned long vpn); + + void (*hpte_create_valid)(unsigned long slot, + unsigned long vpn, + unsigned long prpn, + unsigned hash, + void * ptep, + unsigned hpteflags, + unsigned bolted); + void (*tce_build)(struct TceTable * tbl, + long tcenum, + unsigned long uaddr, + int direction); + void (*tce_free)(struct TceTable *tbl, + dma_addr_t dma_addr, + unsigned order, + unsigned numPages); + + void (*smp_message_pass)(int target, + int msg, + unsigned long data, + int wait); + int (*smp_probe)(void); + void (*smp_kick_cpu)(int nr); + void (*smp_setup_cpu)(int nr); + + void (*setup_arch)(void); + /* Optional, may be NULL. */ + int (*setup_residual)(char *buffer); + /* Optional, may be NULL. */ + int (*get_cpuinfo)(char *buffer); + /* Optional, may be NULL. */ + unsigned int (*irq_cannonicalize)(unsigned int irq); + void (*init_IRQ)(void); + void (*init_ras_IRQ)(void); + int (*get_irq)(struct pt_regs *); + void (*post_irq)( struct pt_regs *, int ); + + /* A general init function, called by ppc_init in init/main.c. + May be NULL. */ + void (*init)(void); + + void (*restart)(char *cmd); + void (*power_off)(void); + void (*halt)(void); + + long (*time_init)(void); /* Optional, may be NULL */ + int (*set_rtc_time)(struct rtc_time *); + void (*get_rtc_time)(struct rtc_time *); + void (*get_boot_time)(struct rtc_time *); + void (*calibrate_decr)(void); + + void (*progress)(char *, unsigned short); + + unsigned char (*nvram_read_val)(int addr); + void (*nvram_write_val)(int addr, unsigned char val); + +/* Tons of keyboard stuff. */ + int (*kbd_setkeycode)(unsigned int scancode, + unsigned int keycode); + int (*kbd_getkeycode)(unsigned int scancode); + int (*kbd_translate)(unsigned char scancode, + unsigned char *keycode, + char raw_mode); + char (*kbd_unexpected_up)(unsigned char keycode); + void (*kbd_leds)(unsigned char leds); + void (*kbd_init_hw)(void); +#ifdef CONFIG_MAGIC_SYSRQ + unsigned char *ppc_kbd_sysrq_xlate; +#endif + + /* Debug interface. Low level I/O to some terminal device */ + void (*udbg_putc)(unsigned char c); + unsigned char (*udbg_getc)(void); + int (*udbg_getc_poll)(void); + + /* PCI interfaces */ + int (*pcibios_read_config_byte)(struct device_node *dn, int offset, u8 *val); + int (*pcibios_read_config_word)(struct device_node *dn, int offset, u16 *val); + int (*pcibios_read_config_dword)(struct device_node *dn, int offset, u32 *val); + int (*pcibios_write_config_byte)(struct device_node *dn, int offset, u8 val); + int (*pcibios_write_config_word)(struct device_node *dn, int offset, u16 val); + int (*pcibios_write_config_dword)(struct device_node *dn, int offset, u32 val); + + /* Called after scanning the bus, before allocating + * resources + */ + void (*pcibios_fixup)(void); + + /* Called for each PCI bus in the system + * when it's probed + */ + void (*pcibios_fixup_bus)(struct pci_bus *); + + /* Called when pci_enable_device() is called (initial=0) or + * when a device with no assigned resource is found (initial=1). + * Returns 0 to allow assignement/enabling of the device + */ + int (*pcibios_enable_device_hook)(struct pci_dev *, int initial); + + void* (*pci_dev_io_base)(unsigned char bus, unsigned char devfn, int physical); + void* (*pci_dev_mem_base)(unsigned char bus, unsigned char devfn); + int (*pci_dev_root_bridge)(unsigned char bus, unsigned char devfn); + + /* this is for modules, since _machine can be a define -- Cort */ + int ppc_machine; +}; + +extern struct machdep_calls ppc_md; +extern char cmd_line[512]; + +extern void setup_pci_ptrs(void); + +/* + * Power macintoshes have either a CUDA or a PMU controlling + * system reset, power, NVRAM, RTC. + */ +typedef enum sys_ctrler_kind { + SYS_CTRLER_UNKNOWN = 0, + SYS_CTRLER_CUDA = 1, + SYS_CTRLER_PMU = 2, +} sys_ctrler_t; + +extern sys_ctrler_t sys_ctrler; + +#endif /* _PPC_MACHDEP_H */ +#endif /* __KERNEL__ */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/mc146818rtc.h linuxppc64_2_4/include/asm-ppc64/mc146818rtc.h --- ../kernel.org/linux/include/asm-ppc64/mc146818rtc.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/mc146818rtc.h Mon May 7 15:09:40 2001 @@ -0,0 +1,32 @@ +/* + * Machine dependent access functions for RTC registers. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef __ASM_PPC64_MC146818RTC_H +#define __ASM_PPC64_MC146818RTC_H + +#include + +#ifndef RTC_PORT +#define RTC_PORT(x) (0x70 + (x)) +#define RTC_ALWAYS_BCD 1 /* RTC operates in binary mode */ +#endif + +/* + * The yet supported machines all access the RTC index register via + * an ISA port access but the way to access the date register differs ... + */ +#define CMOS_READ(addr) ({ \ +outb_p((addr),RTC_PORT(0)); \ +inb_p(RTC_PORT(1)); \ +}) +#define CMOS_WRITE(val, addr) ({ \ +outb_p((addr),RTC_PORT(0)); \ +outb_p((val),RTC_PORT(1)); \ +}) + +#endif /* __ASM_PPC64_MC146818RTC_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/md.h linuxppc64_2_4/include/asm-ppc64/md.h --- ../kernel.org/linux/include/asm-ppc64/md.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/md.h Tue Jun 19 08:38:06 2001 @@ -0,0 +1,17 @@ +/* + * md.h: High speed xor_block operation for RAID4/5 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef __ASM_MD_H +#define __ASM_MD_H + +/* #define HAVE_ARCH_XORBLOCK */ + +#define MD_XORBLOCK_ALIGNMENT sizeof(long) + +#endif /* __ASM_MD_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/memory.h linuxppc64_2_4/include/asm-ppc64/memory.h --- ../kernel.org/linux/include/asm-ppc64/memory.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/memory.h Mon Nov 19 21:43:25 2001 @@ -0,0 +1,44 @@ +#ifndef _ASM_PPC64_MEMORY_H_ +#define _ASM_PPC64_MEMORY_H_ + +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include + +/* + * Arguably the bitops and *xchg operations don't imply any memory barrier + * or SMP ordering, but in fact a lot of drivers expect them to imply + * both, since they do on x86 cpus. + */ +#ifdef CONFIG_SMP +#define EIEIO_ON_SMP "eieio\n" +#define ISYNC_ON_SMP "\n\tisync" +#else +#define EIEIO_ON_SMP +#define ISYNC_ON_SMP +#endif + +static inline void eieio(void) +{ + __asm__ __volatile__ ("eieio" : : : "memory"); +} + +static inline void isync(void) +{ + __asm__ __volatile__ ("isync" : : : "memory"); +} + +#ifdef CONFIG_SMP +#define eieio_on_smp() eieio() +#define isync_on_smp() isync() +#else +#define eieio_on_smp() __asm__ __volatile__("": : :"memory") +#define isync_on_smp() __asm__ __volatile__("": : :"memory") +#endif + +#endif diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/mman.h linuxppc64_2_4/include/asm-ppc64/mman.h --- ../kernel.org/linux/include/asm-ppc64/mman.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/mman.h Mon May 7 15:09:40 2001 @@ -0,0 +1,45 @@ +#ifndef __PPC64_MMAN_H__ +#define __PPC64_MMAN_H__ + +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define PROT_READ 0x1 /* page can be read */ +#define PROT_WRITE 0x2 /* page can be written */ +#define PROT_EXEC 0x4 /* page can be executed */ +#define PROT_NONE 0x0 /* page can not be accessed */ + +#define MAP_SHARED 0x01 /* Share changes */ +#define MAP_PRIVATE 0x02 /* Changes are private */ +#define MAP_TYPE 0x0f /* Mask for type of mapping */ +#define MAP_FIXED 0x10 /* Interpret addr exactly */ +#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#define MAP_RENAME MAP_ANONYMOUS /* In SunOS terminology */ +#define MAP_NORESERVE 0x40 /* don't reserve swap pages */ + +#define MAP_GROWSDOWN 0x0100 /* stack-like segment */ +#define MAP_DENYWRITE 0x0800 /* ETXTBSY */ +#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ + +#define MS_ASYNC 1 /* sync memory asynchronously */ +#define MS_INVALIDATE 2 /* invalidate the caches */ +#define MS_SYNC 4 /* synchronous memory sync */ + +#define MCL_CURRENT 0x2000 /* lock all currently mapped pages */ +#define MCL_FUTURE 0x4000 /* lock all additions to address space */ + +#define MADV_NORMAL 0x0 /* default page-in behavior */ +#define MADV_RANDOM 0x1 /* page-in minimum required */ +#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ +#define MADV_WILLNEED 0x3 /* pre-fault pages */ +#define MADV_DONTNEED 0x4 /* discard these pages */ + +/* compatibility flags */ +#define MAP_ANON MAP_ANONYMOUS +#define MAP_FILE 0 + +#endif /* __PPC64_MMAN_H__ */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/mmu.h linuxppc64_2_4/include/asm-ppc64/mmu.h --- ../kernel.org/linux/include/asm-ppc64/mmu.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/mmu.h Thu Nov 8 23:13:41 2001 @@ -0,0 +1,386 @@ +/* + * PowerPC memory management structures + * + * Dave Engebretsen & Mike Corrigan <{engebret|mikejc}@us.ibm.com> + * PPC64 rework. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _PPC64_MMU_H_ +#define _PPC64_MMU_H_ + +#include + +#ifndef __ASSEMBLY__ + +/* Default "unsigned long" context */ +typedef unsigned long mm_context_t; + +/* + * Define the size of the cache used for segment table entries. The first + * entry is used as a cache pointer, therefore the actual number of entries + * stored is one less than defined here. Do not change this value without + * considering the impact it will have on the layout of the paca in Paca.h. + */ +#define STAB_CACHE_SIZE 16 + +/* + * Hardware Segment Lookaside Buffer Entry + * This structure has been padded out to two 64b doublewords (actual SLBE's are + * 94 bits). This padding facilites use by the segment management + * instructions. + */ +typedef struct { + unsigned long esid: 36; /* Effective segment ID */ + unsigned long resv0:20; /* Reserved */ + unsigned long v: 1; /* Entry valid (v=1) or invalid */ + unsigned long resv1: 1; /* Reserved */ + unsigned long ks: 1; /* Supervisor (privileged) state storage key */ + unsigned long kp: 1; /* Problem state storage key */ + unsigned long n: 1; /* No-execute if n=1 */ + unsigned long resv2: 3; /* padding to a 64b boundary */ +} ste_dword0; + +typedef struct { + unsigned long vsid: 52; /* Virtual segment ID */ + unsigned long resv0:12; /* Padding to a 64b boundary */ +} ste_dword1; + +typedef struct _STE { + union { + unsigned long dword0; + ste_dword0 dw0; + } dw0; + + union { + unsigned long dword1; + ste_dword1 dw1; + } dw1; +} STE; + +typedef struct { + unsigned long esid: 36; /* Effective segment ID */ + unsigned long v: 1; /* Entry valid (v=1) or invalid */ + unsigned long null1:15; /* padding to a 64b boundary */ + unsigned long index:12; /* Index to select SLB entry. Used by slbmte */ +} slb_dword0; + +typedef struct { + unsigned long vsid: 52; /* Virtual segment ID */ + unsigned long ks: 1; /* Supervisor (privileged) state storage key */ + unsigned long kp: 1; /* Problem state storage key */ + unsigned long n: 1; /* No-execute if n=1 */ + unsigned long l: 1; /* Virt pages are large (l=1) or 4KB (l=0) */ + unsigned long c: 1; /* Class */ + unsigned long resv0: 7; /* Padding to a 64b boundary */ +} slb_dword1; + +typedef struct _SLBE { + union { + unsigned long dword0; + slb_dword0 dw0; + } dw0; + + union { + unsigned long dword1; + slb_dword1 dw1; + } dw1; +} SLBE; + +/* + * This structure is used in Paca.h where the layout depends on the + * size being 24B. + */ +typedef struct { + unsigned long real; + unsigned long virt; + unsigned long next_round_robin; +} STAB; + +/* Hardware Page Table Entry */ + +#define HPTES_PER_GROUP 8 + +typedef struct { + unsigned long avpn:57; /* vsid | api == avpn */ + unsigned long : 2; /* Software use */ + unsigned long bolted: 1; /* HPTE is "bolted" */ + unsigned long : 1; /* Software use */ + unsigned long : 1; /* Reserved */ + unsigned long h: 1; /* Hash function identifier */ + unsigned long v: 1; /* Valid (v=1) or invalid (v=0) */ + } Hpte_dword0; + +typedef struct { + unsigned long : 6; /* unused - padding */ + unsigned long ac: 1; /* Address compare */ + unsigned long r: 1; /* Referenced */ + unsigned long c: 1; /* Changed */ + unsigned long w: 1; /* Write-thru cache mode */ + unsigned long i: 1; /* Cache inhibited */ + unsigned long m: 1; /* Memory coherence required */ + unsigned long g: 1; /* Guarded */ + unsigned long n: 1; /* No-execute */ + unsigned long pp: 2; /* Page protection bits 1:2 */ +} Hpte_flags; + +typedef struct { + unsigned long pp0: 1; /* Page protection bit 0 */ + unsigned long : 1; /* Reserved */ + unsigned long rpn: 50; /* Real page number */ + unsigned long : 2; /* Reserved */ + unsigned long ac: 1; /* Address compare */ + unsigned long r: 1; /* Referenced */ + unsigned long c: 1; /* Changed */ + unsigned long w: 1; /* Write-thru cache mode */ + unsigned long i: 1; /* Cache inhibited */ + unsigned long m: 1; /* Memory coherence required */ + unsigned long g: 1; /* Guarded */ + unsigned long n: 1; /* No-execute */ + unsigned long pp: 2; /* Page protection bits 1:2 */ +} Hpte_dword1; + +typedef struct { + char padding[6]; /* padding */ + unsigned long : 6; /* padding */ + unsigned long flags: 10; /* HPTE flags */ +} Hpte_dword1_flags; + +typedef struct _HPTE { + union { + unsigned long dword0; + Hpte_dword0 dw0; + } dw0; + + union { + unsigned long dword1; + struct { + unsigned long pp0: 1; /* Page protection bit 0 */ + unsigned long ts: 1; /* Tag set bit */ + unsigned long rpn: 50; /* Real page number */ + unsigned long : 2; /* Unused */ + unsigned long ac: 1; /* Address compare bit */ + unsigned long r: 1; /* Referenced */ + unsigned long c: 1; /* Changed */ + unsigned long w: 1; /* Write-thru cache mode */ + unsigned long i: 1; /* Cache inhibited */ + unsigned long m: 1; /* Memory coherence */ + unsigned long g: 1; /* Guarded */ + unsigned long n: 1; /* No-execute page if N=1 */ + unsigned long pp: 2; /* Page protection bit 1:2 */ + } dw1; + } dw1; +} HPTE; + +/* Values for PP (assumes Ks=0, Kp=1) */ +/* pp0 will always be 0 for linux */ +#define PP_RWXX 0 /* Supervisor read/write, User none */ +#define PP_RWRX 1 /* Supervisor read/write, User read */ +#define PP_RWRW 2 /* Supervisor read/write, User read/write */ +#define PP_RXRX 3 /* Supervisor read, User read */ + + +typedef struct { + HPTE * htab; + unsigned long htab_num_ptegs; + unsigned long htab_hash_mask; + unsigned long next_round_robin; + unsigned long last_kernel_address; +} HTAB; + +extern HTAB htab_data; + +void invalidate_hpte( unsigned long slot ); +long select_hpte_slot( unsigned long vpn ); +void create_valid_hpte( unsigned long slot, unsigned long vpn, + unsigned long prpn, unsigned hash, + void * ptep, unsigned hpteflags, + unsigned bolted ); + +#define PD_SHIFT (10+12) /* Page directory */ +#define PD_MASK 0x02FF +#define PT_SHIFT (12) /* Page Table */ +#define PT_MASK 0x02FF + +static inline unsigned long hpt_hash(unsigned long vpn, int large) +{ + unsigned long vsid; + unsigned long page; + + if (large) { + vsid = vpn >> 4; + page = vpn & 0xf; + } else { + vsid = vpn >> 16; + page = vpn & 0xffff; + } + + return (vsid & 0x7fffffffff) ^ page; +} + +#define PG_SHIFT (12) /* Page Entry */ + +extern __inline__ void _tlbie( unsigned long va ) +{ + __asm__ __volatile__ ( " \n\ + clrldi %0,%0,16 \n\ + ptesync \n\ + tlbie %0 \n\ + eieio \n\ + tlbsync \n\ + ptesync" + : : "r" (va) : "memory" ); +} + +#endif /* __ASSEMBLY__ */ + +/* Block size masks */ +#define BL_128K 0x000 +#define BL_256K 0x001 +#define BL_512K 0x003 +#define BL_1M 0x007 +#define BL_2M 0x00F +#define BL_4M 0x01F +#define BL_8M 0x03F +#define BL_16M 0x07F +#define BL_32M 0x0FF +#define BL_64M 0x1FF +#define BL_128M 0x3FF +#define BL_256M 0x7FF + +/* Used to set up SDR1 register */ +#define HASH_TABLE_SIZE_64K 0x00010000 +#define HASH_TABLE_SIZE_128K 0x00020000 +#define HASH_TABLE_SIZE_256K 0x00040000 +#define HASH_TABLE_SIZE_512K 0x00080000 +#define HASH_TABLE_SIZE_1M 0x00100000 +#define HASH_TABLE_SIZE_2M 0x00200000 +#define HASH_TABLE_SIZE_4M 0x00400000 +#define HASH_TABLE_MASK_64K 0x000 +#define HASH_TABLE_MASK_128K 0x001 +#define HASH_TABLE_MASK_256K 0x003 +#define HASH_TABLE_MASK_512K 0x007 +#define HASH_TABLE_MASK_1M 0x00F +#define HASH_TABLE_MASK_2M 0x01F +#define HASH_TABLE_MASK_4M 0x03F + +/* These are the Ks and Kp from the PowerPC books. For proper operation, + * Ks = 0, Kp = 1. + */ +#define MI_AP 786 +#define MI_Ks 0x80000000 /* Should not be set */ +#define MI_Kp 0x40000000 /* Should always be set */ + +/* The effective page number register. When read, contains the information + * about the last instruction TLB miss. When MI_RPN is written, bits in + * this register are used to create the TLB entry. + */ +#define MI_EPN 787 +#define MI_EPNMASK 0xfffff000 /* Effective page number for entry */ +#define MI_EVALID 0x00000200 /* Entry is valid */ +#define MI_ASIDMASK 0x0000000f /* ASID match value */ + /* Reset value is undefined */ + +/* A "level 1" or "segment" or whatever you want to call it register. + * For the instruction TLB, it contains bits that get loaded into the + * TLB entry when the MI_RPN is written. + */ +#define MI_TWC 789 +#define MI_APG 0x000001e0 /* Access protection group (0) */ +#define MI_GUARDED 0x00000010 /* Guarded storage */ +#define MI_PSMASK 0x0000000c /* Mask of page size bits */ +#define MI_PS8MEG 0x0000000c /* 8M page size */ +#define MI_PS512K 0x00000004 /* 512K page size */ +#define MI_PS4K_16K 0x00000000 /* 4K or 16K page size */ +#define MI_SVALID 0x00000001 /* Segment entry is valid */ + /* Reset value is undefined */ + +/* Real page number. Defined by the pte. Writing this register + * causes a TLB entry to be created for the instruction TLB, using + * additional information from the MI_EPN, and MI_TWC registers. + */ +#define MI_RPN 790 + +/* Define an RPN value for mapping kernel memory to large virtual + * pages for boot initialization. This has real page number of 0, + * large page size, shared page, cache enabled, and valid. + * Also mark all subpages valid and write access. + */ +#define MI_BOOTINIT 0x000001fd + +#define MD_CTR 792 /* Data TLB control register */ +#define MD_GPM 0x80000000 /* Set domain manager mode */ +#define MD_PPM 0x40000000 /* Set subpage protection */ +#define MD_CIDEF 0x20000000 /* Set cache inhibit when MMU dis */ +#define MD_WTDEF 0x10000000 /* Set writethrough when MMU dis */ +#define MD_RSV4I 0x08000000 /* Reserve 4 TLB entries */ +#define MD_TWAM 0x04000000 /* Use 4K page hardware assist */ +#define MD_PPCS 0x02000000 /* Use MI_RPN prob/priv state */ +#define MD_IDXMASK 0x00001f00 /* TLB index to be loaded */ +#define MD_RESETVAL 0x04000000 /* Value of register at reset */ + +#define M_CASID 793 /* Address space ID (context) to match */ +#define MC_ASIDMASK 0x0000000f /* Bits used for ASID value */ + + +/* These are the Ks and Kp from the PowerPC books. For proper operation, + * Ks = 0, Kp = 1. + */ +#define MD_AP 794 +#define MD_Ks 0x80000000 /* Should not be set */ +#define MD_Kp 0x40000000 /* Should always be set */ + +/* The effective page number register. When read, contains the information + * about the last instruction TLB miss. When MD_RPN is written, bits in + * this register are used to create the TLB entry. + */ +#define MD_EPN 795 +#define MD_EPNMASK 0xfffff000 /* Effective page number for entry */ +#define MD_EVALID 0x00000200 /* Entry is valid */ +#define MD_ASIDMASK 0x0000000f /* ASID match value */ + /* Reset value is undefined */ + +/* The pointer to the base address of the first level page table. + * During a software tablewalk, reading this register provides the address + * of the entry associated with MD_EPN. + */ +#define M_TWB 796 +#define M_L1TB 0xfffff000 /* Level 1 table base address */ +#define M_L1INDX 0x00000ffc /* Level 1 index, when read */ + /* Reset value is undefined */ + +/* A "level 1" or "segment" or whatever you want to call it register. + * For the data TLB, it contains bits that get loaded into the TLB entry + * when the MD_RPN is written. It is also provides the hardware assist + * for finding the PTE address during software tablewalk. + */ +#define MD_TWC 797 +#define MD_L2TB 0xfffff000 /* Level 2 table base address */ +#define MD_L2INDX 0xfffffe00 /* Level 2 index (*pte), when read */ +#define MD_APG 0x000001e0 /* Access protection group (0) */ +#define MD_GUARDED 0x00000010 /* Guarded storage */ +#define MD_PSMASK 0x0000000c /* Mask of page size bits */ +#define MD_PS8MEG 0x0000000c /* 8M page size */ +#define MD_PS512K 0x00000004 /* 512K page size */ +#define MD_PS4K_16K 0x00000000 /* 4K or 16K page size */ +#define MD_WT 0x00000002 /* Use writethrough page attribute */ +#define MD_SVALID 0x00000001 /* Segment entry is valid */ + /* Reset value is undefined */ + + +/* Real page number. Defined by the pte. Writing this register + * causes a TLB entry to be created for the data TLB, using + * additional information from the MD_EPN, and MD_TWC registers. + */ +#define MD_RPN 798 + +/* This is a temporary storage register that could be used to save + * a processor working register during a tablewalk. + */ +#define M_TW 799 + +#endif /* _PPC64_MMU_H_ */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/mmu_context.h linuxppc64_2_4/include/asm-ppc64/mmu_context.h --- ../kernel.org/linux/include/asm-ppc64/mmu_context.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/mmu_context.h Thu Oct 18 02:51:46 2001 @@ -0,0 +1,194 @@ +#ifndef __PPC64_MMU_CONTEXT_H +#define __PPC64_MMU_CONTEXT_H + +#include +#include +#include +#include +#include +#include + +/* + * Copyright (C) 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define NO_CONTEXT 0 +#define FIRST_USER_CONTEXT 0x10 /* First 16 reserved for kernel */ +#define LAST_USER_CONTEXT 0x8000 /* Same as PID_MAX for now... */ +#define NUM_USER_CONTEXT (LAST_USER_CONTEXT-FIRST_USER_CONTEXT) + +/* Choose whether we want to implement our context + * number allocator as a LIFO or FIFO queue. + */ +#if 1 +#define MMU_CONTEXT_LIFO +#else +#define MMU_CONTEXT_FIFO +#endif + +struct mmu_context_queue_t { + spinlock_t lock; + long head; + long tail; + long size; + mm_context_t elements[LAST_USER_CONTEXT]; +}; + +extern struct mmu_context_queue_t mmu_context_queue; + +static inline void +enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) +{ +} + +extern void flush_stab(void); + +/* + * The context number queue has underflowed. + * Meaning: we tried to push a context number that was freed + * back onto the context queue and the queue was already full. + */ +static inline void +mmu_context_underflow(void) +{ + printk(KERN_DEBUG "mmu_context_underflow\n"); + panic("mmu_context_underflow"); +} + + +/* + * Set up the context for a new address space. + */ +static inline int +init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ + long head, size; + + spin_lock( &mmu_context_queue.lock ); + + if ( (size = mmu_context_queue.size) <= 0 ) { + spin_unlock( &mmu_context_queue.lock ); + return -ENOMEM; + } + + head = mmu_context_queue.head; + mm->context = mmu_context_queue.elements[head]; + + head = (head < LAST_USER_CONTEXT-1) ? head+1 : 0; + mmu_context_queue.head = head; + mmu_context_queue.size = size-1; + + spin_unlock( &mmu_context_queue.lock ); + + return 0; +} + +/* + * We're finished using the context for an address space. + */ +static inline void +destroy_context(struct mm_struct *mm) +{ + long index, size = mmu_context_queue.size; + + spin_lock( &mmu_context_queue.lock ); + + if ( (size = mmu_context_queue.size) >= NUM_USER_CONTEXT ) { + spin_unlock( &mmu_context_queue.lock ); + mmu_context_underflow(); + } + +#ifdef MMU_CONTEXT_LIFO + index = mmu_context_queue.head; + index = (index > 0) ? index-1 : LAST_USER_CONTEXT-1; + mmu_context_queue.head = index; +#else + index = mmu_context_queue.tail; + index = (index < LAST_USER_CONTEXT-1) ? index+1 : 0; + mmu_context_queue.tail = index; +#endif + + mmu_context_queue.size = size+1; + mmu_context_queue.elements[index] = mm->context; + + spin_unlock( &mmu_context_queue.lock ); +} + + +/* + * switch_mm is the entry point called from the architecture independent + * code in kernel/sched.c + */ +static inline void +switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk, int cpu) +{ + tsk->thread.pgdir = next->pgd; /* cache the pgdir in the thread + maybe not needed any more */ + flush_stab(); +} + +/* + * After we have set current->mm to a new value, this activates + * the context for the new mm so we see the new mappings. + */ +static inline void +activate_mm(struct mm_struct *active_mm, struct mm_struct *mm) +{ + current->thread.pgdir = mm->pgd; + flush_stab(); +} + + +#define VSID_RANDOMIZER 42470972311 +#define VSID_MASK 0xfffffffff + + +/* This is only valid for kernel (including vmalloc, imalloc and bolted) EA's + */ +static inline unsigned long +get_kernel_vsid( unsigned long ea ) +{ + unsigned long ordinal, vsid; + + ordinal = (((ea >> 28) & 0x1fffff) * LAST_USER_CONTEXT) | (ea >> 60); + vsid = (ordinal * VSID_RANDOMIZER) & VSID_MASK; + + ifppcdebug(PPCDBG_HTABSTRESS) { + /* For debug, this path creates a very poor vsid distribuition. + * A user program can access virtual addresses in the form + * 0x0yyyyxxxx000 where yyyy = xxxx to cause multiple mappings + * to hash to the same page table group. + */ + ordinal = ((ea >> 28) & 0x1fff) | (ea >> 44); + vsid = ordinal & VSID_MASK; + } + + return vsid; +} + +/* This is only valid for user EA's (user EA's do not exceed 2^41 (EADDR_SIZE)) + */ +static inline unsigned long +get_vsid( unsigned long context, unsigned long ea ) +{ + unsigned long ordinal, vsid; + + ordinal = (((ea >> 28) & 0x1fffff) * LAST_USER_CONTEXT) | context; + vsid = (ordinal * VSID_RANDOMIZER) & VSID_MASK; + + ifppcdebug(PPCDBG_HTABSTRESS) { + /* See comment above. */ + ordinal = ((ea >> 28) & 0x1fff) | (context << 16); + vsid = ordinal & VSID_MASK; + } + + return vsid; +} + +#endif /* __PPC64_MMU_CONTEXT_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/module.h linuxppc64_2_4/include/asm-ppc64/module.h --- ../kernel.org/linux/include/asm-ppc64/module.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/module.h Tue Sep 4 18:04:16 2001 @@ -0,0 +1,18 @@ +#ifndef _ASM_PPC64_MODULE_H +#define _ASM_PPC64_MODULE_H +/* + * This file contains the PPC architecture specific module code. + * + * Copyright (C) 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define module_map(x) vmalloc(x) +#define module_unmap(x) vfree(x) +#define arch_init_modules(x) do { } while (0) +#define module_arch_init(x) (0) +#endif /* _ASM_PPC64_MODULE_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/msgbuf.h linuxppc64_2_4/include/asm-ppc64/msgbuf.h --- ../kernel.org/linux/include/asm-ppc64/msgbuf.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/msgbuf.h Mon Nov 19 21:26:53 2001 @@ -0,0 +1,27 @@ +#ifndef _PPC64_MSGBUF_H +#define _PPC64_MSGBUF_H + +/* + * The msqid64_ds structure for the PPC architecture. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +struct msqid64_ds { + struct ipc64_perm msg_perm; + __kernel_time_t msg_stime; /* last msgsnd time */ + __kernel_time_t msg_rtime; /* last msgrcv time */ + __kernel_time_t msg_ctime; /* last change time */ + unsigned long msg_cbytes; /* current number of bytes on queue */ + unsigned long msg_qnum; /* number of messages in queue */ + unsigned long msg_qbytes; /* max number of bytes on queue */ + __kernel_pid_t msg_lspid; /* pid of last msgsnd */ + __kernel_pid_t msg_lrpid; /* last receive pid */ + unsigned long __unused1; + unsigned long __unused2; +}; + +#endif /* _PPC64_MSGBUF_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/namei.h linuxppc64_2_4/include/asm-ppc64/namei.h --- ../kernel.org/linux/include/asm-ppc64/namei.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/namei.h Tue Jun 19 08:38:06 2001 @@ -0,0 +1,23 @@ +/* + * linux/include/asm-ppc/namei.h + * Adapted from linux/include/asm-alpha/namei.h + * + * Included from linux/fs/namei.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef __PPC64_NAMEI_H +#define __PPC64_NAMEI_H + +/* This dummy routine maybe changed to something useful + * for /usr/gnemul/ emulation stuff. + * Look at asm-sparc/namei.h for details. + */ + +#define __emul_prefix() NULL + +#endif /* __PPC64_NAMEI_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/nvram.h linuxppc64_2_4/include/asm-ppc64/nvram.h --- ../kernel.org/linux/include/asm-ppc64/nvram.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/nvram.h Mon May 7 15:09:40 2001 @@ -0,0 +1,68 @@ +/* + * PreP compliant NVRAM access + * This needs to be updated for PPC64 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _PPC64_NVRAM_H +#define _PPC64_NVRAM_H + +#define NVRAM_AS0 0x74 +#define NVRAM_AS1 0x75 +#define NVRAM_DATA 0x77 + + +/* RTC Offsets */ + +#define MOTO_RTC_SECONDS 0x1FF9 +#define MOTO_RTC_MINUTES 0x1FFA +#define MOTO_RTC_HOURS 0x1FFB +#define MOTO_RTC_DAY_OF_WEEK 0x1FFC +#define MOTO_RTC_DAY_OF_MONTH 0x1FFD +#define MOTO_RTC_MONTH 0x1FFE +#define MOTO_RTC_YEAR 0x1FFF +#define MOTO_RTC_CONTROLA 0x1FF8 +#define MOTO_RTC_CONTROLB 0x1FF9 + +#ifndef BCD_TO_BIN +#define BCD_TO_BIN(val) ((val)=((val)&15) + ((val)>>4)*10) +#endif + +#ifndef BIN_TO_BCD +#define BIN_TO_BCD(val) ((val)=(((val)/10)<<4) + (val)%10) +#endif + +/* PowerMac specific nvram stuffs */ + +enum { + pmac_nvram_OF, /* Open Firmware partition */ + pmac_nvram_XPRAM, /* MacOS XPRAM partition */ + pmac_nvram_NR /* MacOS Name Registry partition */ +}; + +/* Return partition offset in nvram */ +extern int pmac_get_partition(int partition); + +/* Direct access to XPRAM */ +extern u8 pmac_xpram_read(int xpaddr); +extern void pmac_xpram_write(int xpaddr, u8 data); + +/* Some offsets in XPRAM */ +#define PMAC_XPRAM_MACHINE_LOC 0xe4 +#define PMAC_XPRAM_SOUND_VOLUME 0x08 + +/* Machine location structure in XPRAM */ +struct pmac_machine_location { + u32 latitude; /* 2+30 bit Fractional number */ + u32 longitude; /* 2+30 bit Fractional number */ + u32 delta; /* mix of GMT delta and DLS */ +}; + +/* /dev/nvram ioctls */ +#define PMAC_NVRAM_GET_OFFSET _IOWR('p', 0x40, int) /* Get NVRAM partition offset */ + +#endif /* _PPC64_NVRAM_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/page.h linuxppc64_2_4/include/asm-ppc64/page.h --- ../kernel.org/linux/include/asm-ppc64/page.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/page.h Mon Oct 15 02:54:31 2001 @@ -0,0 +1,225 @@ +#ifndef _PPC64_PAGE_H +#define _PPC64_PAGE_H + +/* + * Copyright (C) 2001 PPC64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* PAGE_SHIFT determines the page size */ +#define PAGE_SHIFT 12 +#define PAGE_SIZE (1UL << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) +#define PAGE_OFFSET_MASK (PAGE_SIZE-1) + +#define SID_SHIFT 28 +#define SID_MASK 0xfffffffff +#define GET_ESID(x) (((x) >> SID_SHIFT) & SID_MASK) + +/* Define an illegal instr to trap on the bug. + * We don't use 0 because that marks the end of a function + * in the ELF ABI. That's "Boo Boo" in case you wonder... + */ +#define BUG_OPCODE .long 0x00b00b00 /* For asm */ +#define BUG_ILLEGAL_INSTR "0x00b00b00" /* For BUG macro */ + +#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ +#include + +#define STRICT_MM_TYPECHECKS + +#define REGION_SIZE 4UL +#define OFFSET_SIZE 60UL +#define REGION_SHIFT 60UL +#define OFFSET_SHIFT 0UL +#define REGION_MASK (((1UL<dCacheL1LineSize; + lines = naca->dCacheL1LinesPerPage; + + __asm__ __volatile__( +" mtctr %1\n\ +1: dcbz 0,%0\n\ + add %0,%0,%3\n\ + bdnz+ 1b" + : "=r" (addr) + : "r" (lines), "0" (addr), "r" (line_size) + : "ctr", "memory"); +} + +extern void copy_page(void *to, void *from); + +#define __HAVE_ARCH_USER_PAGE +struct page; +extern void clear_user_page(struct page *page, unsigned long vaddr); +extern void copy_user_page(struct page *to, struct page *from, unsigned long vaddr); + +#ifdef STRICT_MM_TYPECHECKS +/* + * These are used to make use of C type-checking. + * Entries in the pte table are 64b, while entries in the pgd & pmd are 32b. + */ +typedef struct { unsigned long pte; } pte_t; +typedef struct { unsigned int pmd; } pmd_t; +typedef struct { unsigned int pgd; } pgd_t; +typedef struct { unsigned long pgprot; } pgprot_t; + +#define pte_val(x) ((x).pte) +#define pmd_val(x) ((x).pmd) +#define pgd_val(x) ((x).pgd) +#define pgprot_val(x) ((x).pgprot) + +#define __pte(x) ((pte_t) { (x) } ) +#define __pmd(x) ((pmd_t) { (x) } ) +#define __pgd(x) ((pgd_t) { (x) } ) +#define __pgprot(x) ((pgprot_t) { (x) } ) + +#else +/* + * .. while these make it easier on the compiler + */ +typedef unsigned long pte_t; +typedef unsigned int pmd_t; +typedef unsigned int pgd_t; +typedef unsigned long pgprot_t; + +#define pte_val(x) (x) +#define pmd_val(x) (x) +#define pgd_val(x) (x) +#define pgprot_val(x) (x) + +#define __pte(x) (x) +#define __pmd(x) (x) +#define __pgd(x) (x) +#define __pgprot(x) (x) + +#endif + +#ifdef CONFIG_XMON +#include +extern void xmon(struct pt_regs *excp); +#define BUG() do { \ + printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \ + xmon(0); \ +} while (0) +#else +#define BUG() do { \ + printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \ + __asm__ __volatile__(".long " BUG_ILLEGAL_INSTR); \ +} while (0) +#endif + +#define PAGE_BUG(page) do { BUG(); } while (0) + +/* Pure 2^n version of get_order */ +extern __inline__ int get_order(unsigned long size) +{ + int order; + + size = (size-1) >> (PAGE_SHIFT-1); + order = -1; + do { + size >>= 1; + order++; + } while (size); + return order; +} + +#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) + +#endif /* __ASSEMBLY__ */ + +/* align addr on a size boundry - adjust address up/down if needed */ +#define _ALIGN_UP(addr,size) (((addr)+((size)-1))&(~((size)-1))) +#define _ALIGN_DOWN(addr,size) ((addr)&(~((size)-1))) + +/* align addr on a size boundry - adjust address up if needed */ +#define _ALIGN(addr,size) _ALIGN_UP(addr,size) + +/* to align the pointer to the (next) double word boundary */ +#define DOUBLEWORD_ALIGN(addr) _ALIGN(addr,sizeof(unsigned long)) + +/* to align the pointer to the (next) page boundary */ +#define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE) + +#ifdef MODULE +#define __page_aligned __attribute__((__aligned__(PAGE_SIZE))) +#else +#define __page_aligned \ + __attribute__((__aligned__(PAGE_SIZE), \ + __section__(".data.page_aligned"))) +#endif + + +/* This must match the -Ttext linker address */ +/* Note: tophys & tovirt make assumptions about how */ +/* KERNELBASE is defined for performance reasons. */ +/* When KERNELBASE moves, those macros may have */ +/* to change! */ +#define PAGE_OFFSET 0xC000000000000000 +#define KERNELBASE PAGE_OFFSET +#define VMALLOCBASE 0xD000000000000000 +#define IOREGIONBASE 0xE000000000000000 + +#define IO_REGION_ID (IOREGIONBASE>>REGION_SHIFT) +#define VMALLOC_REGION_ID (VMALLOCBASE>>REGION_SHIFT) +#define KERNEL_REGION_ID (KERNELBASE>>REGION_SHIFT) +#define USER_REGION_ID (0UL) +#define REGION_ID(X) (((unsigned long)(X))>>REGION_SHIFT) + +/* + * Define valid/invalid EA bits (for all ranges) + */ +#define VALID_EA_BITS (0x000001ffffffffffUL) +#define INVALID_EA_BITS (~(REGION_MASK|VALID_EA_BITS)) + +#define IS_VALID_REGION_ID(x) \ + (((x) == USER_REGION_ID) || ((x) >= KERNEL_REGION_ID)) +#define IS_VALID_EA(x) \ + ((!((x) & INVALID_EA_BITS)) && IS_VALID_REGION_ID(REGION_ID(x))) + +#define __bpn_to_ba(x) ((((unsigned long)(x))<> PAGE_SHIFT) + +#define __va(x) ((void *)((unsigned long)(x) + KERNELBASE)) + +/* Given that physical addresses do not map 1-1 to absolute addresses, we + * use these macros to better specify exactly what we want to do. + * The only restriction on their use is that the absolute address + * macros cannot be used until after the LMB structure has been + * initialized in prom.c. -Peter + */ +#define __v2p(x) ((void *) __pa(x)) +#define __v2a(x) ((void *) phys_to_absolute(__pa(x))) +#define __p2a(x) ((void *) phys_to_absolute(x)) +#define __p2v(x) ((void *) __va(x)) +#define __a2p(x) ((void *) absolute_to_phys(x)) +#define __a2v(x) ((void *) __va(absolute_to_phys(x))) + +#define virt_to_page(kaddr) (mem_map+(__pa((unsigned long)kaddr) >> PAGE_SHIFT)) + +#define VALID_PAGE(page) ((page - mem_map) < max_mapnr) + +#define MAP_NR(addr) (__pa(addr) >> PAGE_SHIFT) + +#endif /* __KERNEL__ */ +#endif /* _PPC64_PAGE_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/param.h linuxppc64_2_4/include/asm-ppc64/param.h --- ../kernel.org/linux/include/asm-ppc64/param.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/param.h Thu Oct 4 00:39:50 2001 @@ -0,0 +1,41 @@ +#ifndef _ASM_PPC64_PARAM_H +#define _ASM_PPC64_PARAM_H + +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef HZ +#define HZ 100 +#ifdef __KERNEL__ +#if HZ == 100 +/* ppc (like X86) is defined to provide userspace with a world where HZ=100 + We have to do this, (x*const)/const2 isnt optimised out because its not + a null operation as it might overflow.. */ +#define hz_to_std(a) (a) +#else +#define hz_to_std(a) ((a)*(100/HZ)+((a)*(100%HZ))/HZ) +#endif +#endif +#endif + +#define EXEC_PAGESIZE 4096 + +#ifndef NGROUPS +#define NGROUPS 32 +#endif + +#ifndef NOGROUP +#define NOGROUP (-1) +#endif + +#define MAXHOSTNAMELEN 64 /* max length of hostname */ + +#ifdef __KERNEL__ +# define CLOCKS_PER_SEC HZ /* frequency at which times() counts */ +#endif + +#endif /* _ASM_PPC64_PARAM_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/parport.h linuxppc64_2_4/include/asm-ppc64/parport.h --- ../kernel.org/linux/include/asm-ppc64/parport.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/parport.h Mon Nov 5 12:48:33 2001 @@ -0,0 +1,21 @@ +/* + * BK Id: SCCS/s.parport.h 1.5 05/17/01 18:14:25 cort + */ +/* + * parport.h: platform-specific PC-style parport initialisation + * + * Copyright (C) 1999, 2000 Tim Waugh + * + * This file should only be included by drivers/parport/parport_pc.c. + */ + +#ifndef _ASM_PPC64_PARPORT_H +#define _ASM_PPC64_PARPORT_H + +static int __devinit parport_pc_find_isa_ports (int autoirq, int autodma); +static int __devinit parport_pc_find_nonpci_ports (int autoirq, int autodma) +{ + return parport_pc_find_isa_ports (autoirq, autodma); +} + +#endif /* !(_ASM_PPC_PARPORT_H) */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/pci-bridge.h linuxppc64_2_4/include/asm-ppc64/pci-bridge.h --- ../kernel.org/linux/include/asm-ppc64/pci-bridge.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/pci-bridge.h Fri Sep 21 16:02:33 2001 @@ -0,0 +1,114 @@ +#ifdef __KERNEL__ +#ifndef _ASM_PCI_BRIDGE_H +#define _ASM_PCI_BRIDGE_H + +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +struct device_node; +struct pci_controller; + +/* + * pci_io_base returns the memory address at which you can access + * the I/O space for PCI bus number `bus' (or NULL on error). + */ +extern void *pci_bus_io_base(unsigned int bus); +extern unsigned long pci_bus_io_base_phys(unsigned int bus); +extern unsigned long pci_bus_mem_base_phys(unsigned int bus); + +/* Get the PCI host controller for a bus */ +extern struct pci_controller* pci_bus_to_hose(int bus); + +/* Get the PCI host controller for an OF device */ +extern struct pci_controller* +pci_find_hose_for_OF_device(struct device_node* node); + +enum phb_types { + phb_type_unknown = 0x0, + phb_type_hypervisor = 0x1, + phb_type_python = 0x10, + phb_type_speedwagon = 0x11 +}; + +/* + * Structure of a PCI controller (host bridge) + */ +struct pci_controller { + char what[8]; /* Eye catcher */ + enum phb_types type; /* Type of hardware */ + struct pci_controller *next; + struct pci_bus *bus; + void *arch_data; + + int first_busno; + int last_busno; + + void *io_base_virt; + unsigned long io_base_phys; + + /* Some machines (PReP) have a non 1:1 mapping of + * the PCI memory space in the CPU bus space + */ + unsigned long pci_mem_offset; + unsigned long pci_io_offset; + + struct pci_ops *ops; + volatile unsigned long *cfg_addr; + volatile unsigned char *cfg_data; + volatile unsigned long *phb_regs; + volatile unsigned long *chip_regs; + + /* Currently, we limit ourselves to 1 IO range and 3 mem + * ranges since the common pci_bus structure can't handle more + */ + struct resource io_resource; + struct resource mem_resources[3]; + int mem_resource_count; + int global_number; + int local_number; + int system_bus_number; + unsigned long buid; + unsigned long dma_window_base_cur; + unsigned long dma_window_size; +}; + + +/* This version handles the new Uni-N host bridge, the iobase is now + * a per-device thing. I also added the memory base so PReP can + * be fixed to return 0xc0000000 (I didn't actually implement it) + * + * pci_dev_io_base() returns either a virtual (ioremap'ed) address or + * a physical address. In-kernel clients will use logical while the + * sys_pciconfig_iobase syscall returns a physical one to userland. + */ +void *pci_dev_io_base(unsigned char bus, unsigned char devfn, int physical); +void *pci_dev_mem_base(unsigned char bus, unsigned char devfn); + +/* Returns the root-bridge number (Uni-N number) of a device */ +int pci_dev_root_bridge(unsigned char bus, unsigned char devfn); + +/* + * pci_device_loc returns the bus number and device/function number + * for a device on a PCI bus, given its device_node struct. + * It returns 0 if OK, -1 on error. + */ +int pci_device_loc(struct device_node *dev, unsigned char *bus_ptr, + unsigned char *devfn_ptr); + +struct bridge_data { + volatile unsigned int *cfg_addr; + volatile unsigned char *cfg_data; + void *io_base; /* virtual */ + unsigned long io_base_phys; + int bus_number; + int max_bus; + struct bridge_data *next; + struct device_node *node; +}; + +#endif +#endif /* __KERNEL__ */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/pci.h linuxppc64_2_4/include/asm-ppc64/pci.h --- ../kernel.org/linux/include/asm-ppc64/pci.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/pci.h Tue Nov 13 21:01:06 2001 @@ -0,0 +1,130 @@ +#ifndef __PPC64_PCI_H +#define __PPC64_PCI_H +#ifdef __KERNEL__ + +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* Values for the `which' argument to sys_pciconfig_iobase syscall. */ +#define IOBASE_BRIDGE_NUMBER 0 +#define IOBASE_MEMORY 1 +#define IOBASE_IO 2 +#define IOBASE_ISA_IO 3 +#define IOBASE_ISA_MEM 4 + +/* Can be used to override the logic in pci_scan_bus for skipping + * already-configured bus numbers - to be used for buggy BIOSes + * or architectures with incomplete PCI setup by the loader. + */ +extern int pcibios_assign_all_busses(void); + +#define PCIBIOS_MIN_IO 0x1000 +#define PCIBIOS_MIN_MEM 0x10000000 + +extern inline void pcibios_set_master(struct pci_dev *dev) +{ + /* No special bus mastering setup handling */ +} + +extern inline void pcibios_penalize_isa_irq(int irq) +{ + /* We don't do dynamic PCI IRQ allocation */ +} + +#include +#include +#include +#include +#include +#include + +struct pci_dev; +#define REG_SAVE_SIZE 64 +/************************************************************************ + * Structure to hold the data for PCI Register Save/Restore functions. * + ************************************************************************/ +struct pci_config_reg_save_area { + struct pci_dev* PciDev; /* Pointer to device(Sanity Check) */ + int Flags; /* Control & Info Flags */ + int RCode; /* Return Code on Save/Restore */ + int Register; /* Pointer to current register. */ + u8 Regs[REG_SAVE_SIZE]; /* Save Area */ +}; +/************************************************************************ + * Functions to support device reset * + ************************************************************************/ +extern int pci_reset_device(struct pci_dev*, int, int); +extern int pci_save_config_regs(struct pci_dev*,struct pci_config_reg_save_area*); +extern int pci_restore_config_regs(struct pci_dev*,struct pci_config_reg_save_area*); +extern char* pci_card_location(struct pci_dev*); + +extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, + dma_addr_t *dma_handle); +extern void pci_free_consistent(struct pci_dev *hwdev, size_t size, + void *vaddr, dma_addr_t dma_handle); + +extern dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, + size_t size, int direction); +extern void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, + size_t size, int direction); +extern int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, + int nents, int direction); +extern void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, + int nents, int direction); + +extern void pSeries_pcibios_init_early(void); + +extern inline void pci_dma_sync_single(struct pci_dev *hwdev, + dma_addr_t dma_handle, + size_t size, int direction) +{ + if (direction == PCI_DMA_NONE) + BUG(); + /* nothing to do */ +} + +extern inline void pci_dma_sync_sg(struct pci_dev *hwdev, + struct scatterlist *sg, + int nelems, int direction) +{ + if (direction == PCI_DMA_NONE) + BUG(); + /* nothing to do */ +} + +/* Return whether the given PCI device DMA address mask can + * be supported properly. For example, if your device can + * only drive the low 24-bits during PCI bus mastering, then + * you would pass 0x00ffffff as the mask to this function. + */ +extern inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask) +{ + return 1; +} + +/* Return the index of the PCI controller for device PDEV. */ +extern int pci_controller_num(struct pci_dev *pdev); + +/* Map a range of PCI memory or I/O space for a device into user space */ +int pci_mmap_page_range(struct pci_dev *pdev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state, int write_combine); + +/* Tell drivers/pci/proc.c that we have pci_mmap_page_range() */ +#define HAVE_PCI_MMAP 1 + +#define sg_dma_address(sg) ((sg)->dma_address) +#define sg_dma_len(sg) ((sg)->dma_length) + +#define pci_map_page(dev, page, off, size, dir) \ + pci_map_single(dev, (page_address(page) + (off)), size, dir) +#define pci_unmap_page(dev,addr,sz,dir) pci_unmap_single(dev,addr,sz,dir) + +#define pci_dac_dma_supported(pci_dev, mask) (0) + +#endif /* __KERNEL__ */ + +#endif /* __PPC64_PCI_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/pci_dma.h linuxppc64_2_4/include/asm-ppc64/pci_dma.h --- ../kernel.org/linux/include/asm-ppc64/pci_dma.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/pci_dma.h Fri Nov 30 07:46:39 2001 @@ -0,0 +1,98 @@ +/* + * pci_dma.h + * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _PCI_DMA_H +#define _PCI_DMA_H + +#include +#include + +/* + * NUM_TCE_LEVELS defines the largest contiguous block + * of dma (tce) space we can get. NUM_TCE_LEVELS = 10 + * allows up to 2**9 pages (512 * 4096) = 2 MB + */ +#define NUM_TCE_LEVELS 10 + +#define NO_TCE ((dma_addr_t)-1) + +/* + * Tces come in two formats, one for the virtual bus and a different + * format for PCI + */ +#define TCE_VB 0 +#define TCE_PCI 1 + +union Tce { + u64 wholeTce; + struct { + u64 cacheBits :6; /* Cache hash bits - not used */ + u64 rsvd :6; + u64 rpn :40; /* Absolute page number */ + u64 valid :1; /* Tce is valid (vb only) */ + u64 allIo :1; /* Tce is valid for all lps (vb only) */ + u64 lpIndex :8; /* LpIndex for user of TCE (vb only) */ + u64 pciWrite :1; /* Write allowed (pci only) */ + u64 readWrite :1; /* Read allowed (pci), Write allowed (vb) */ + } tceBits; +}; + +struct Bitmap { + unsigned long numBits; + unsigned long numBytes; + unsigned char * map; +}; + +struct MultiLevelBitmap { + unsigned long maxLevel; + struct Bitmap level[NUM_TCE_LEVELS]; +}; + +struct TceTable { + u64 busNumber; + u64 size; + u64 startOffset; + u64 base; /* pSeries native only */ + u64 index; + u64 tceType; + spinlock_t lock; + struct MultiLevelBitmap mlbm; +}; + +struct TceTableManagerCB { + u64 busNumber; /* Bus number for this tce table */ + u64 start; /* Will be NULL for secondary */ + u64 totalSize; /* Size (in pages) of whole table */ + u64 startOffset; /* Index into real tce table of the + start of our section */ + u64 size; /* Size (in pages) of our section */ + u64 index; /* Index of this tce table (token?) */ + u16 maxTceTableIndex; /* Max num of tables for partition */ + u8 virtualBusFlag; /* Flag to indicate virtual bus */ + u8 rsvd[5]; +}; + +extern struct TceTable virtBusTceTable; /* Tce table for virtual bus */ + +extern void create_tce_tables(void); + +void tce_init_pSeries(void); +void tce_init_iSeries(void); + +#endif diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/pgalloc.h linuxppc64_2_4/include/asm-ppc64/pgalloc.h --- ../kernel.org/linux/include/asm-ppc64/pgalloc.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/pgalloc.h Mon Dec 3 17:12:15 2001 @@ -0,0 +1,128 @@ +#ifndef _PPC64_PGALLOC_H +#define _PPC64_PGALLOC_H + +#include +#include +#include +#include +#include + +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#define quicklists get_paca() + +#define pgd_quicklist (quicklists->pgd_cache) +#define pmd_quicklist (quicklists->pmd_cache) +#define pte_quicklist (quicklists->pte_cache) +#define pgtable_cache_size (quicklists->pgtable_cache_sz) + +static inline pgd_t* +pgd_alloc_one_fast (struct mm_struct *mm) +{ + unsigned long *ret = pgd_quicklist; + + if (ret != NULL) { + pgd_quicklist = (unsigned long *)(*ret); + ret[0] = 0; + --pgtable_cache_size; + } else + ret = NULL; + return (pgd_t *) ret; +} + +static inline pgd_t* +pgd_alloc (struct mm_struct *mm) +{ + /* the VM system never calls pgd_alloc_one_fast(), so we do it here. */ + pgd_t *pgd = pgd_alloc_one_fast(mm); + + if (pgd == NULL) { + pgd = (pgd_t *)__get_free_page(GFP_KERNEL); + if (pgd != NULL) + clear_page(pgd); + } + return pgd; +} + +static inline void +pgd_free (pgd_t *pgd) +{ + *(unsigned long *)pgd = (unsigned long) pgd_quicklist; + pgd_quicklist = (unsigned long *) pgd; + ++pgtable_cache_size; +} + +#define pgd_populate(MM, PGD, PMD) pgd_set(PGD, PMD) + +static inline pmd_t* +pmd_alloc_one_fast (struct mm_struct *mm, unsigned long addr) +{ + unsigned long *ret = (unsigned long *)pmd_quicklist; + + if (ret != NULL) { + pmd_quicklist = (unsigned long *)(*ret); + ret[0] = 0; + --pgtable_cache_size; + } + return (pmd_t *)ret; +} + +static inline pmd_t* +pmd_alloc_one (struct mm_struct *mm, unsigned long addr) +{ + pmd_t *pmd = (pmd_t *) __get_free_page(GFP_KERNEL); + + if (pmd != NULL) + clear_page(pmd); + return pmd; +} + +static inline void +pmd_free (pmd_t *pmd) +{ + *(unsigned long *)pmd = (unsigned long) pmd_quicklist; + pmd_quicklist = (unsigned long *) pmd; + ++pgtable_cache_size; +} + +#define pmd_populate(MM, PMD, PTE) pmd_set(PMD, PTE) + +static inline pte_t* +pte_alloc_one_fast (struct mm_struct *mm, unsigned long addr) +{ + unsigned long *ret = (unsigned long *)pte_quicklist; + + if (ret != NULL) { + pte_quicklist = (unsigned long *)(*ret); + ret[0] = 0; + --pgtable_cache_size; + } + return (pte_t *)ret; +} + + +static inline pte_t* +pte_alloc_one (struct mm_struct *mm, unsigned long addr) +{ + pte_t *pte = (pte_t *) __get_free_page(GFP_KERNEL); + + if (pte != NULL) + clear_page(pte); + return pte; +} + +static inline void +pte_free (pte_t *pte) +{ + *(unsigned long *)pte = (unsigned long) pte_quicklist; + pte_quicklist = (unsigned long *) pte; + ++pgtable_cache_size; +} + +extern int do_check_pgt_cache(int, int); + +#endif /* _PPC64_PGALLOC_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/pgtable.h linuxppc64_2_4/include/asm-ppc64/pgtable.h --- ../kernel.org/linux/include/asm-ppc64/pgtable.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/pgtable.h Tue Nov 20 21:39:22 2001 @@ -0,0 +1,442 @@ +#ifndef _PPC64_PGTABLE_H +#define _PPC64_PGTABLE_H + +#define NEW_HPT 1 + +/* + * This file contains the functions and defines necessary to modify and use + * the ppc64 hashed page table. + */ +#include + +#ifndef __ASSEMBLY__ +#include /* For TASK_SIZE */ +#include +#include +#endif /* __ASSEMBLY__ */ + +/* Certain architectures need to do special things when pte's + * within a page table are directly modified. Thus, the following + * hook is made available. + */ + + +/* PMD_SHIFT determines what a second-level page table entry can map */ +#define PMD_SHIFT (PAGE_SHIFT + PAGE_SHIFT - 3) +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) + +/* PGDIR_SHIFT determines what a third-level page table entry can map */ +#define PGDIR_SHIFT (PAGE_SHIFT + (PAGE_SHIFT - 3) + (PAGE_SHIFT - 2)) +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +/* + * Entries per page directory level. The PTE level must use a 64b record + * for each page table entry. The PMD and PGD level use a 32b record for + * each entry by assuming that each entry is page aligned. + */ +#define PTE_INDEX_SIZE 9 +#define PMD_INDEX_SIZE 10 +#define PGD_INDEX_SIZE 10 + +#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) +#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) +#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) + +// #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) DRENG / PPPBBB This is a compiler bug!!! +#define USER_PTRS_PER_PGD (1024) +#define FIRST_USER_PGD_NR 0 + +#define EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \ + PGD_INDEX_SIZE + PAGE_SHIFT) + +/* + * Define the address range of the vmalloc VM area. + */ +#define VMALLOC_START (0xD000000000000000) +#define VMALLOC_VMADDR(x) ((unsigned long)(x)) +#define VMALLOC_END (VMALLOC_START + VALID_EA_BITS) + +/* + * Define the address range of the imalloc VM area. + * (used for ioremap) + */ +#define IMALLOC_START (ioremap_bot) +#define IMALLOC_VMADDR(x) ((unsigned long)(x)) +#define IMALLOC_BASE (0xE000000000000000) +#define IMALLOC_END (IMALLOC_BASE + VALID_EA_BITS) + +/* + * Define the address range mapped virt <-> physical + */ +#define KRANGE_START KERNELBASE +#define KRANGE_END (KRANGE_START + VALID_EA_BITS) + +/* + * Define the user address range + */ +#define USER_START (0UL) +#define USER_END (USER_START + VALID_EA_BITS) + + +/* + * Bits in a linux-style PTE. These match the bits in the + * (hardware-defined) PowerPC PTE as closely as possible. + */ +#define _PAGE_PRESENT 0x001UL /* software: pte contains a translation */ +#define _PAGE_USER 0x002UL /* matches one of the PP bits */ +#define _PAGE_RW 0x004UL /* software: user write access allowed */ +#define _PAGE_GUARDED 0x008UL +#define _PAGE_COHERENT 0x010UL /* M: enforce memory coherence (SMP systems) */ +#define _PAGE_NO_CACHE 0x020UL /* I: cache inhibit */ +#define _PAGE_WRITETHRU 0x040UL /* W: cache write-through */ +#define _PAGE_DIRTY 0x080UL /* C: page changed */ +#define _PAGE_ACCESSED 0x100UL /* R: page referenced */ +#define _PAGE_HPTENOIX 0x200UL /* software: pte HPTE slot unknown */ +#define _PAGE_HASHPTE 0x400UL /* software: pte has an associated HPTE */ +#define _PAGE_EXEC 0x800UL /* software: i-cache coherence required */ +#define _PAGE_SECONDARY 0x8000UL /* software: HPTE is in secondary group */ +#define _PAGE_GROUP_IX 0x7000UL /* software: HPTE index within group */ +/* Bits 0x7000 identify the index within an HPT Group */ +#define _PAGE_HPTEFLAGS (_PAGE_HASHPTE | _PAGE_HPTENOIX | _PAGE_SECONDARY | _PAGE_GROUP_IX) +/* PAGE_MASK gives the right answer below, but only by accident */ +/* It should be preserving the high 48 bits and then specifically */ +/* preserving _PAGE_SECONDARY | _PAGE_GROUP_IX */ +#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_HPTEFLAGS) + +#define _PAGE_BASE _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_COHERENT + +#define _PAGE_WRENABLE _PAGE_RW | _PAGE_DIRTY + +/* __pgprot defined in asm-ppc64/page.h */ +#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED) + +#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_RW | _PAGE_USER) +#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_RW | _PAGE_USER | _PAGE_EXEC) +#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER) +#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) +#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER) +#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) +#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_WRENABLE) +#define PAGE_KERNEL_CI __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | \ + _PAGE_WRENABLE | _PAGE_NO_CACHE | _PAGE_GUARDED) + +/* + * The PowerPC can only do execute protection on a segment (256MB) basis, + * not on a page basis. So we consider execute permission the same as read. + * Also, write permissions imply read permissions. + * This is the closest we can get.. + */ +#define __P000 PAGE_NONE +#define __P001 PAGE_READONLY_X +#define __P010 PAGE_COPY +#define __P011 PAGE_COPY_X +#define __P100 PAGE_READONLY +#define __P101 PAGE_READONLY_X +#define __P110 PAGE_COPY +#define __P111 PAGE_COPY_X + +#define __S000 PAGE_NONE +#define __S001 PAGE_READONLY_X +#define __S010 PAGE_SHARED +#define __S011 PAGE_SHARED_X +#define __S100 PAGE_READONLY +#define __S101 PAGE_READONLY_X +#define __S110 PAGE_SHARED +#define __S111 PAGE_SHARED_X + +#ifndef __ASSEMBLY__ + +/* + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. + */ +extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; +#define ZERO_PAGE(vaddr) (mem_map + MAP_NR(empty_zero_page)) +#endif /* __ASSEMBLY__ */ + +/* shift to put page number into pte */ +#define PTE_SHIFT (16) + +#ifndef __ASSEMBLY__ + +/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + * + * mk_pte_phys takes a physical address as input + * + * mk_pte takes a (struct page *) as input + */ + +#define mk_pte_phys(physpage,pgprot) \ +({ \ + pte_t pte; \ + pte_val(pte) = (((physpage)<<(PTE_SHIFT-PAGE_SHIFT)) | pgprot_val(pgprot)); \ + pte; \ +}) + +#define mk_pte(page,pgprot) \ +({ \ + pte_t pte; \ + pte_val(pte) = ((unsigned long)((page) - mem_map) << PTE_SHIFT) | \ + pgprot_val(pgprot); \ + pte; \ +}) + +#define pte_modify(_pte, newprot) \ + (__pte((pte_val(_pte) & _PAGE_CHG_MASK) | pgprot_val(newprot))) + +#define pte_none(pte) ((pte_val(pte) & ~_PAGE_HPTEFLAGS) == 0) +#define pte_present(pte) (pte_val(pte) & _PAGE_PRESENT) + +// pte_clear moved to later in this file + +#define pte_pagenr(x) ((unsigned long)((pte_val(x) >> PTE_SHIFT))) +#define pte_page(x) (mem_map+pte_pagenr(x)) + +#define pmd_set(pmdp, ptep) (pmd_val(*(pmdp)) = (__ba_to_bpn(ptep))) +#define pmd_none(pmd) (!pmd_val(pmd)) +#define pmd_bad(pmd) ((pmd_val(pmd)) == 0) +#define pmd_present(pmd) ((pmd_val(pmd)) != 0) +#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0) +#define pmd_page(pmd) (__bpn_to_ba(pmd_val(pmd))) +#define pgd_set(pgdp, pmdp) (pgd_val(*(pgdp)) = (__ba_to_bpn(pmdp))) +#define pgd_none(pgd) (!pgd_val(pgd)) +#define pgd_bad(pgd) ((pgd_val(pgd)) == 0) +#define pgd_present(pgd) (pgd_val(pgd) != 0UL) +#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0UL) +#define pgd_page(pgd) (__bpn_to_ba(pgd_val(pgd))) + +/* + * Find an entry in a page-table-directory. We combine the address region + * (the high order N bits) and the pgd portion of the address. + */ +#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD -1)) + +#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) + +/* Find an entry in the second-level page table.. */ +#define pmd_offset(dir,addr) \ + ((pmd_t *) pgd_page(*(dir)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) + +/* Find an entry in the third-level page table.. */ +#define pte_offset(dir,addr) \ + ((pte_t *) pmd_page(*(dir)) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) + +/* to find an entry in a kernel page-table-directory */ +// This now only contains the vmalloc pages +#define pgd_offset_k(address) pgd_offset(&init_mm, address) + +/* to find an entry in the ioremap page-table-directory */ +#define pgd_offset_i(address) (ioremap_pgd + pgd_index(address)) + +/* + * Given a pointer to an mem_map[] entry, return the kernel virtual + * address corresponding to that page. + */ +#define page_address(page) ((page)->virtual) + +#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) + +/* + * The following only work if pte_present() is true. + * Undefined behaviour if not.. + */ +extern inline int pte_read(pte_t pte) { return pte_val(pte) & _PAGE_USER;} +extern inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW;} +extern inline int pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXEC;} +extern inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY;} +extern inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED;} + +extern inline void pte_uncache(pte_t pte) { pte_val(pte) |= _PAGE_NO_CACHE; } +extern inline void pte_cache(pte_t pte) { pte_val(pte) &= ~_PAGE_NO_CACHE; } + +extern inline pte_t pte_rdprotect(pte_t pte) { + pte_val(pte) &= ~_PAGE_USER; return pte; } +extern inline pte_t pte_exprotect(pte_t pte) { + pte_val(pte) &= ~_PAGE_EXEC; return pte; } +extern inline pte_t pte_wrprotect(pte_t pte) { + pte_val(pte) &= ~(_PAGE_RW); return pte; } +extern inline pte_t pte_mkclean(pte_t pte) { + pte_val(pte) &= ~(_PAGE_DIRTY); return pte; } +extern inline pte_t pte_mkold(pte_t pte) { + pte_val(pte) &= ~_PAGE_ACCESSED; return pte; } + +extern inline pte_t pte_mkread(pte_t pte) { + pte_val(pte) |= _PAGE_USER; return pte; } +extern inline pte_t pte_mkexec(pte_t pte) { + pte_val(pte) |= _PAGE_USER | _PAGE_EXEC; return pte; } +extern inline pte_t pte_mkwrite(pte_t pte) { + pte_val(pte) |= _PAGE_RW; return pte; } +extern inline pte_t pte_mkdirty(pte_t pte) { + pte_val(pte) |= _PAGE_DIRTY; return pte; } +extern inline pte_t pte_mkyoung(pte_t pte) { + pte_val(pte) |= _PAGE_ACCESSED; return pte; } + +/* Atomic PTE updates */ + +static inline unsigned long pte_update( pte_t *p, unsigned long clr, + unsigned long set ) +{ + unsigned long old, tmp; + + __asm__ __volatile__("\n\ +1: ldarx %0,0,%3 \n\ + andc %1,%0,%4 \n\ + or %1,%1,%5 \n\ + stdcx. %1,0,%3 \n\ + bne- 1b" + : "=&r" (old), "=&r" (tmp), "=m" (*p) + : "r" (p), "r" (clr), "r" (set), "m" (*p) + : "cc" ); + return old; +} + +static inline int ptep_test_and_clear_young(pte_t *ptep) +{ + return (pte_update(ptep, _PAGE_ACCESSED, 0) & _PAGE_ACCESSED) != 0; +} + +static inline int ptep_test_and_clear_dirty(pte_t *ptep) +{ + return (pte_update(ptep, _PAGE_DIRTY, 0) & _PAGE_DIRTY) != 0; +} + +static inline pte_t ptep_get_and_clear(pte_t *ptep) +{ + return __pte(pte_update(ptep, ~_PAGE_HPTEFLAGS, 0)); +} + +static inline void ptep_set_wrprotect(pte_t *ptep) +{ + pte_update(ptep, _PAGE_RW, 0); +} + +static inline void ptep_mkdirty(pte_t *ptep) +{ + pte_update(ptep, 0, _PAGE_DIRTY); +} + +#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0) + +/* + * set_pte stores a pte into the Linux page table, removing + * the previous entry. It preserves the _PAGE_HPTEFLAGS flags. + */ +#if 0 +static inline void set_pte( pte_t * ptep, pte_t pte ) +{ + pte_update( ptep, ~_PAGE_HPTEFLAGS, pte_val(pte) & ~_PAGE_HPTEFLAGS ); +} + +#define pte_clear(pmdp) (set_pte((pmdp), __pte(0) )) + +#else +extern void set_pte(pte_t *ptep, pte_t pte); + +static inline void pte_clear(pte_t * ptep) +{ + pte_update(ptep, ~_PAGE_HPTEFLAGS, 0); +} +#endif + + +struct mm_struct; +struct vm_area_struct; +extern void local_flush_tlb_all(void); +extern void local_flush_tlb_mm(struct mm_struct *mm); +extern void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); +extern void local_flush_tlb_range(struct mm_struct *mm, unsigned long start, + unsigned long end); + +#define flush_tlb_all local_flush_tlb_all +#define flush_tlb_mm local_flush_tlb_mm +#define flush_tlb_page local_flush_tlb_page +#define flush_tlb_range local_flush_tlb_range + +extern inline void flush_tlb_pgtables(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + /* PPC has hw page tables. */ +} + +/* + * No cache flushing is required when address mappings are + * changed, because the caches on PowerPCs are physically + * addressed. + */ +#define flush_cache_all() do { } while (0) +#define flush_cache_mm(mm) do { } while (0) +#define flush_cache_range(mm, a, b) do { } while (0) +#define flush_cache_page(vma, p) do { } while (0) +#define flush_page_to_ram(page) do { } while (0) +#define flush_icache_page(vma, page) do { } while (0) + +extern void flush_icache_range(unsigned long, unsigned long); +extern void __flush_dcache_icache(unsigned long page_va); +extern void flush_dcache_page(struct page *page); + +extern unsigned long va_to_phys(unsigned long address); +extern pte_t *va_to_pte(unsigned long address); +extern unsigned long ioremap_bot, ioremap_base; + +#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT) +#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS) + +#define pte_ERROR(e) \ + printk("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e)) +#define pmd_ERROR(e) \ + printk("%s:%d: bad pmd %08x.\n", __FILE__, __LINE__, pmd_val(e)) +#define pgd_ERROR(e) \ + printk("%s:%d: bad pgd %08x.\n", __FILE__, __LINE__, pgd_val(e)) + +extern pgd_t swapper_pg_dir[1024]; +extern pgd_t ioremap_dir[1024]; + +extern void paging_init(void); + +/* + * Page tables may have changed. We don't need to do anything here + * as entries are faulted into the hash table by the low-level + * data/instruction access exception handlers. + */ +// We won't be able to use update_mmu_cache to update the +// hardware page table because we need to update the pte +// as well, but we don't get the address of the pte, only +// its value. +#define update_mmu_cache(vma, addr, pte) do { } while (0) + +extern void flush_hash_segments(unsigned low_vsid, unsigned high_vsid); +extern void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte); + +/* Encode and de-code a swap entry */ +#define SWP_TYPE(entry) (((entry).val >> 1) & 0x3f) +#define SWP_OFFSET(entry) ((entry).val >> 8) +#define SWP_ENTRY(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 8) }) +#define pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> PTE_SHIFT }) +#define swp_entry_to_pte(x) ((pte_t) { (x).val << PTE_SHIFT }) + +// kern_addr_valid is intended to indicate whether an address is a valid +// kernel address. Most 32-bit archs define it as always true (like this) +// but most 64-bit archs actually perform a test. What should we do here? +// The only use is in fs/ncpfs/dir.c +#define kern_addr_valid(addr) (1) + +#define io_remap_page_range remap_page_range + +/* + * No page table caches to initialise + */ +#define pgtable_cache_init() do { } while (0) + +extern void updateBoltedHptePP(unsigned long newpp, unsigned long ea); +extern void hpte_init_pSeries(void); +extern void hpte_init_iSeries(void); + +extern void make_pte(HPTE * htab, unsigned long va, unsigned long pa, + int mode, unsigned long hash_mask, int large); + +#endif /* __ASSEMBLY__ */ +#endif /* _PPC64_PGTABLE_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/pmc.h linuxppc64_2_4/include/asm-ppc64/pmc.h --- ../kernel.org/linux/include/asm-ppc64/pmc.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/pmc.h Tue Sep 25 13:53:55 2001 @@ -0,0 +1,113 @@ +/* + * pmc.h + * Copyright (C) 2001 Dave Engebretsen & Mike Corrigan IBM Corporation. + * + * The PPC64 PMC subsystem encompases both the hardware PMC registers and + * a set of software event counters. An interface is provided via the + * proc filesystem which can be used to access this subsystem. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* Start Change Log + * 2001/06/05 : engebret : Created. + * End Change Log + */ + +#ifndef _PPC64_TYPES_H +#include +#endif + +#ifndef _PMC_H +#define _PMC_H + +#define STAB_ENTRY_MAX 64 + +struct _pmc_hw +{ + u64 mmcr0; + u64 mmcr1; + u64 mmcra; + + u64 pmc1; + u64 pmc2; + u64 pmc3; + u64 pmc4; + u64 pmc5; + u64 pmc6; + u64 pmc7; + u64 pmc8; +}; + +struct _pmc_sw +{ + u64 stab_faults; /* Count of faults on the stab */ + u64 stab_capacity_castouts;/* Count of castouts from the stab */ + u64 stab_invalidations; /* Count of invalidations from the */ + /* stab, not including castouts */ + u64 stab_entry_use[STAB_ENTRY_MAX]; + + u64 htab_primary_overflows; + u64 htab_capacity_castouts; + u64 htab_read_to_write_fault; +}; + +#define PMC_HW_TEXT_ENTRY_COUNT (sizeof(struct _pmc_hw) / sizeof(u64)) +#define PMC_SW_TEXT_ENTRY_COUNT (sizeof(struct _pmc_sw) / sizeof(u64)) +#define PMC_TEXT_ENTRY_SIZE 64 + +struct _pmc_sw_text { + char buffer[PMC_SW_TEXT_ENTRY_COUNT * PMC_TEXT_ENTRY_SIZE]; +}; + +struct _pmc_hw_text { + char buffer[PMC_HW_TEXT_ENTRY_COUNT * PMC_TEXT_ENTRY_SIZE]; +}; + +extern struct _pmc_sw pmc_sw_system; +extern struct _pmc_sw pmc_sw_cpu[]; + +extern struct _pmc_sw_text pmc_sw_text; +extern struct _pmc_hw_text pmc_hw_text; +extern char *ppc64_pmc_stab(int file); +extern char *ppc64_pmc_htab(int file); +extern char *ppc64_pmc_hw(int file); + +#if 1 +#define PMC_SW_PROCESSOR(F) pmc_sw_cpu[smp_processor_id()].F++ +#define PMC_SW_PROCESSOR_A(F, E) (pmc_sw_cpu[smp_processor_id()].F[(E)])++ +#define PMC_SW_SYSTEM(F) pmc_sw_system.F++ +#else +#define PMC_SW_PROCESSOR(F) do {;} while (0) +#define PMC_SW_PROCESSOR_A(F) do {;} while (0) +#define PMC_SW_SYSTEM(F) do {;} while (0) +#endif + +#define MMCR0 795 +#define MMCR1 798 +#define MMCRA 786 +#define PMC1 787 +#define PMC2 788 +#define PMC3 789 +#define PMC4 790 +#define PMC5 791 +#define PMC6 792 +#define PMC7 793 +#define PMC8 794 + +#define PMC_CONTROL_CPI 1 +#define PMC_CONTROL_TLB 2 + +#endif /* _PMC_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/pnp.h linuxppc64_2_4/include/asm-ppc64/pnp.h --- ../kernel.org/linux/include/asm-ppc64/pnp.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/pnp.h Mon May 7 15:16:11 2001 @@ -0,0 +1,648 @@ +/* 11/02/95 */ +/*----------------------------------------------------------------------------*/ +/* Plug and Play header definitions */ +/*----------------------------------------------------------------------------*/ + +/* Structure map for PnP on PowerPC Reference Platform */ +/* See Plug and Play ISA Specification, Version 1.0, May 28, 1993. It */ +/* (or later versions) is available on Compuserve in the PLUGPLAY area. */ +/* This code has extensions to that specification, namely new short and */ +/* long tag types for platform dependent information */ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +/* Warning: LE notation used throughout this file */ + +/* For enum's: if given in hex then they are bit significant, i.e. */ +/* only one bit is on for each enum */ + +#ifndef _PNP_ +#define _PNP_ + +#ifndef __ASSEMBLY__ +#define MAX_MEM_REGISTERS 9 +#define MAX_IO_PORTS 20 +#define MAX_IRQS 7 +/*#define MAX_DMA_CHANNELS 7*/ + +/* Interrupt controllers */ + +#define PNPinterrupt0 "PNP0000" /* AT Interrupt Controller */ +#define PNPinterrupt1 "PNP0001" /* EISA Interrupt Controller */ +#define PNPinterrupt2 "PNP0002" /* MCA Interrupt Controller */ +#define PNPinterrupt3 "PNP0003" /* APIC */ +#define PNPExtInt "IBM000D" /* PowerPC Extended Interrupt Controller */ + +/* Timers */ + +#define PNPtimer0 "PNP0100" /* AT Timer */ +#define PNPtimer1 "PNP0101" /* EISA Timer */ +#define PNPtimer2 "PNP0102" /* MCA Timer */ + +/* DMA controllers */ + +#define PNPdma0 "PNP0200" /* AT DMA Controller */ +#define PNPdma1 "PNP0201" /* EISA DMA Controller */ +#define PNPdma2 "PNP0202" /* MCA DMA Controller */ + +/* start of August 15, 1994 additions */ +/* CMOS */ +#define PNPCMOS "IBM0009" /* CMOS */ + +/* L2 Cache */ +#define PNPL2 "IBM0007" /* L2 Cache */ + +/* NVRAM */ +#define PNPNVRAM "IBM0008" /* NVRAM */ + +/* Power Management */ +#define PNPPM "IBM0005" /* Power Management */ +/* end of August 15, 1994 additions */ + +/* Keyboards */ + +#define PNPkeyboard0 "PNP0300" /* IBM PC/XT KB Cntlr (83 key, no mouse) */ +#define PNPkeyboard1 "PNP0301" /* Olivetti ICO (102 key) */ +#define PNPkeyboard2 "PNP0302" /* IBM PC/AT KB Cntlr (84 key) */ +#define PNPkeyboard3 "PNP0303" /* IBM Enhanced (101/2 key, PS/2 mouse) */ +#define PNPkeyboard4 "PNP0304" /* Nokia 1050 KB Cntlr */ +#define PNPkeyboard5 "PNP0305" /* Nokia 9140 KB Cntlr */ +#define PNPkeyboard6 "PNP0306" /* Standard Japanese KB Cntlr */ +#define PNPkeyboard7 "PNP0307" /* Microsoft Windows (R) KB Cntlr */ + +/* Parallel port controllers */ + +#define PNPparallel0 "PNP0400" /* Standard LPT Parallel Port */ +#define PNPparallel1 "PNP0401" /* ECP Parallel Port */ +#define PNPepp "IBM001C" /* EPP Parallel Port */ + +/* Serial port controllers */ + +#define PNPserial0 "PNP0500" /* Standard PC Serial port */ +#define PNPSerial1 "PNP0501" /* 16550A Compatible Serial port */ + +/* Disk controllers */ + +#define PNPdisk0 "PNP0600" /* Generic ESDI/IDE/ATA Compat HD Cntlr */ +#define PNPdisk1 "PNP0601" /* Plus Hardcard II */ +#define PNPdisk2 "PNP0602" /* Plus Hardcard IIXL/EZ */ + +/* Diskette controllers */ + +#define PNPdiskette0 "PNP0700" /* PC Standard Floppy Disk Controller */ + +/* Display controllers */ + +#define PNPdisplay0 "PNP0900" /* VGA Compatible */ +#define PNPdisplay1 "PNP0901" /* Video Seven VGA */ +#define PNPdisplay2 "PNP0902" /* 8514/A Compatible */ +#define PNPdisplay3 "PNP0903" /* Trident VGA */ +#define PNPdisplay4 "PNP0904" /* Cirrus Logic Laptop VGA */ +#define PNPdisplay5 "PNP0905" /* Cirrus Logic VGA */ +#define PNPdisplay6 "PNP0906" /* Tseng ET4000 or ET4000/W32 */ +#define PNPdisplay7 "PNP0907" /* Western Digital VGA */ +#define PNPdisplay8 "PNP0908" /* Western Digital Laptop VGA */ +#define PNPdisplay9 "PNP0909" /* S3 */ +#define PNPdisplayA "PNP090A" /* ATI Ultra Pro/Plus (Mach 32) */ +#define PNPdisplayB "PNP090B" /* ATI Ultra (Mach 8) */ +#define PNPdisplayC "PNP090C" /* XGA Compatible */ +#define PNPdisplayD "PNP090D" /* ATI VGA Wonder */ +#define PNPdisplayE "PNP090E" /* Weitek P9000 Graphics Adapter */ +#define PNPdisplayF "PNP090F" /* Oak Technology VGA */ + +/* Peripheral busses */ + +#define PNPbuses0 "PNP0A00" /* ISA Bus */ +#define PNPbuses1 "PNP0A01" /* EISA Bus */ +#define PNPbuses2 "PNP0A02" /* MCA Bus */ +#define PNPbuses3 "PNP0A03" /* PCI Bus */ +#define PNPbuses4 "PNP0A04" /* VESA/VL Bus */ + +/* RTC, BIOS, planar devices */ + +#define PNPspeaker0 "PNP0800" /* AT Style Speaker Sound */ +#define PNPrtc0 "PNP0B00" /* AT RTC */ +#define PNPpnpbios0 "PNP0C00" /* PNP BIOS (only created by root enum) */ +#define PNPpnpbios1 "PNP0C01" /* System Board Memory Device */ +#define PNPpnpbios2 "PNP0C02" /* Math Coprocessor */ +#define PNPpnpbios3 "PNP0C03" /* PNP BIOS Event Notification Interrupt */ + +/* PCMCIA controller */ + +#define PNPpcmcia0 "PNP0E00" /* Intel 82365 Compatible PCMCIA Cntlr */ + +/* Mice */ + +#define PNPmouse0 "PNP0F00" /* Microsoft Bus Mouse */ +#define PNPmouse1 "PNP0F01" /* Microsoft Serial Mouse */ +#define PNPmouse2 "PNP0F02" /* Microsoft Inport Mouse */ +#define PNPmouse3 "PNP0F03" /* Microsoft PS/2 Mouse */ +#define PNPmouse4 "PNP0F04" /* Mousesystems Mouse */ +#define PNPmouse5 "PNP0F05" /* Mousesystems 3 Button Mouse - COM2 */ +#define PNPmouse6 "PNP0F06" /* Genius Mouse - COM1 */ +#define PNPmouse7 "PNP0F07" /* Genius Mouse - COM2 */ +#define PNPmouse8 "PNP0F08" /* Logitech Serial Mouse */ +#define PNPmouse9 "PNP0F09" /* Microsoft Ballpoint Serial Mouse */ +#define PNPmouseA "PNP0F0A" /* Microsoft PNP Mouse */ +#define PNPmouseB "PNP0F0B" /* Microsoft PNP Ballpoint Mouse */ + +/* Modems */ + +#define PNPmodem0 "PNP9000" /* Specific IDs TBD */ + +/* Network controllers */ + +#define PNPnetworkC9 "PNP80C9" /* IBM Token Ring */ +#define PNPnetworkCA "PNP80CA" /* IBM Token Ring II */ +#define PNPnetworkCB "PNP80CB" /* IBM Token Ring II/Short */ +#define PNPnetworkCC "PNP80CC" /* IBM Token Ring 4/16Mbs */ +#define PNPnetwork27 "PNP8327" /* IBM Token Ring (All types) */ +#define PNPnetworket "IBM0010" /* IBM Ethernet used by Power PC */ +#define PNPneteisaet "IBM2001" /* IBM Ethernet EISA adapter */ +#define PNPAMD79C970 "IBM0016" /* AMD 79C970 (PCI Ethernet) */ + +/* SCSI controllers */ + +#define PNPscsi0 "PNPA000" /* Adaptec 154x Compatible SCSI Cntlr */ +#define PNPscsi1 "PNPA001" /* Adaptec 174x Compatible SCSI Cntlr */ +#define PNPscsi2 "PNPA002" /* Future Domain 16-700 Compat SCSI Cntlr*/ +#define PNPscsi3 "PNPA003" /* Panasonic CDROM Adapter (SBPro/SB16) */ +#define PNPscsiF "IBM000F" /* NCR 810 SCSI Controller */ +#define PNPscsi825 "IBM001B" /* NCR 825 SCSI Controller */ +#define PNPscsi875 "IBM0018" /* NCR 875 SCSI Controller */ + +/* Sound/Video, Multimedia */ + +#define PNPmm0 "PNPB000" /* Sound Blaster Compatible Sound Device */ +#define PNPmm1 "PNPB001" /* MS Windows Sound System Compat Device */ +#define PNPmmF "IBM000E" /* Crystal CS4231 Audio Device */ +#define PNPv7310 "IBM0015" /* ASCII V7310 Video Capture Device */ +#define PNPmm4232 "IBM0017" /* Crystal CS4232 Audio Device */ +#define PNPpmsyn "IBM001D" /* YMF 289B chip (Yamaha) */ +#define PNPgp4232 "IBM0012" /* Crystal CS4232 Game Port */ +#define PNPmidi4232 "IBM0013" /* Crystal CS4232 MIDI */ + +/* Operator Panel */ +#define PNPopctl "IBM000B" /* Operator's panel */ + +/* Service Processor */ +#define PNPsp "IBM0011" /* IBM Service Processor */ +#define PNPLTsp "IBM001E" /* Lightning/Terlingua Support Processor */ +#define PNPLTmsp "IBM001F" /* Lightning/Terlingua Mini-SP */ + +/* Memory Controller */ +#define PNPmemctl "IBM000A" /* Memory controller */ + +/* Graphics Assist */ +#define PNPg_assist "IBM0014" /* Graphics Assist */ + +/* Miscellaneous Device Controllers */ +#define PNPtablet "IBM0019" /* IBM Tablet Controller */ + +/* PNP Packet Handles */ + +#define S1_Packet 0x0A /* Version resource */ +#define S2_Packet 0x15 /* Logical DEVID (without flags) */ +#define S2_Packet_flags 0x16 /* Logical DEVID (with flags) */ +#define S3_Packet 0x1C /* Compatible device ID */ +#define S4_Packet 0x22 /* IRQ resource (without flags) */ +#define S4_Packet_flags 0x23 /* IRQ resource (with flags) */ +#define S5_Packet 0x2A /* DMA resource */ +#define S6_Packet 0x30 /* Depend funct start (w/o priority) */ +#define S6_Packet_priority 0x31 /* Depend funct start (w/ priority) */ +#define S7_Packet 0x38 /* Depend funct end */ +#define S8_Packet 0x47 /* I/O port resource (w/o fixed loc) */ +#define S9_Packet_fixed 0x4B /* I/O port resource (w/ fixed loc) */ +#define S14_Packet 0x71 /* Vendor defined */ +#define S15_Packet 0x78 /* End of resource (w/o checksum) */ +#define S15_Packet_checksum 0x79 /* End of resource (w/ checksum) */ +#define L1_Packet 0x81 /* Memory range */ +#define L1_Shadow 0x20 /* Memory is shadowable */ +#define L1_32bit_mem 0x18 /* 32-bit memory only */ +#define L1_8_16bit_mem 0x10 /* 8- and 16-bit supported */ +#define L1_Decode_Hi 0x04 /* decode supports high address */ +#define L1_Cache 0x02 /* read cacheable, write-through */ +#define L1_Writeable 0x01 /* Memory is writeable */ +#define L2_Packet 0x82 /* ANSI ID string */ +#define L3_Packet 0x83 /* Unicode ID string */ +#define L4_Packet 0x84 /* Vendor defined */ +#define L5_Packet 0x85 /* Large I/O */ +#define L6_Packet 0x86 /* 32-bit Fixed Loc Mem Range Desc */ +#define END_TAG 0x78 /* End of resource */ +#define DF_START_TAG 0x30 /* Dependent function start */ +#define DF_START_TAG_priority 0x31 /* Dependent function start */ +#define DF_END_TAG 0x38 /* Dependent function end */ +#define SUBOPTIMAL_CONFIGURATION 0x2 /* Priority byte sub optimal config */ + +/* Device Base Type Codes */ + +typedef enum _PnP_BASE_TYPE { + Reserved = 0, + MassStorageDevice = 1, + NetworkInterfaceController = 2, + DisplayController = 3, + MultimediaController = 4, + MemoryController = 5, + BridgeController = 6, + CommunicationsDevice = 7, + SystemPeripheral = 8, + InputDevice = 9, + ServiceProcessor = 0x0A, /* 11/2/95 */ + } PnP_BASE_TYPE; + +/* Device Sub Type Codes */ + +typedef enum _PnP_SUB_TYPE { + SCSIController = 0, + IDEController = 1, + FloppyController = 2, + IPIController = 3, + OtherMassStorageController = 0x80, + + EthernetController = 0, + TokenRingController = 1, + FDDIController = 2, + OtherNetworkController = 0x80, + + VGAController= 0, + SVGAController= 1, + XGAController= 2, + OtherDisplayController = 0x80, + + VideoController = 0, + AudioController = 1, + OtherMultimediaController = 0x80, + + RAM = 0, + FLASH = 1, + OtherMemoryDevice = 0x80, + + HostProcessorBridge = 0, + ISABridge = 1, + EISABridge = 2, + MicroChannelBridge = 3, + PCIBridge = 4, + PCMCIABridge = 5, + VMEBridge = 6, + OtherBridgeDevice = 0x80, + + RS232Device = 0, + ATCompatibleParallelPort = 1, + OtherCommunicationsDevice = 0x80, + + ProgrammableInterruptController = 0, + DMAController = 1, + SystemTimer = 2, + RealTimeClock = 3, + L2Cache = 4, + NVRAM = 5, + PowerManagement = 6, + CMOS = 7, + OperatorPanel = 8, + ServiceProcessorClass1 = 9, + ServiceProcessorClass2 = 0xA, + ServiceProcessorClass3 = 0xB, + GraphicAssist = 0xC, + SystemPlanar = 0xF, /* 10/5/95 */ + OtherSystemPeripheral = 0x80, + + KeyboardController = 0, + Digitizer = 1, + MouseController = 2, + TabletController = 3, /* 10/27/95 */ + OtherInputController = 0x80, + + GeneralMemoryController = 0, + } PnP_SUB_TYPE; + +/* Device Interface Type Codes */ + +typedef enum _PnP_INTERFACE { + General = 0, + GeneralSCSI = 0, + GeneralIDE = 0, + ATACompatible = 1, + + GeneralFloppy = 0, + Compatible765 = 1, + NS398_Floppy = 2, /* NS Super I/O wired to use index + register at port 398 and data + register at port 399 */ + NS26E_Floppy = 3, /* Ports 26E and 26F */ + NS15C_Floppy = 4, /* Ports 15C and 15D */ + NS2E_Floppy = 5, /* Ports 2E and 2F */ + CHRP_Floppy = 6, /* CHRP Floppy in PR*P system */ + + GeneralIPI = 0, + + GeneralEther = 0, + GeneralToken = 0, + GeneralFDDI = 0, + + GeneralVGA = 0, + GeneralSVGA = 0, + GeneralXGA = 0, + + GeneralVideo = 0, + GeneralAudio = 0, + CS4232Audio = 1, /* CS 4232 Plug 'n Play Configured */ + + GeneralRAM = 0, + GeneralFLASH = 0, + PCIMemoryController = 0, /* PCI Config Method */ + RS6KMemoryController = 1, /* RS6K Config Method */ + + GeneralHostBridge = 0, + GeneralISABridge = 0, + GeneralEISABridge = 0, + GeneralMCABridge = 0, + GeneralPCIBridge = 0, + PCIBridgeDirect = 0, + PCIBridgeIndirect = 1, + PCIBridgeRS6K = 2, + GeneralPCMCIABridge = 0, + GeneralVMEBridge = 0, + + GeneralRS232 = 0, + COMx = 1, + Compatible16450 = 2, + Compatible16550 = 3, + NS398SerPort = 4, /* NS Super I/O wired to use index + register at port 398 and data + register at port 399 */ + NS26ESerPort = 5, /* Ports 26E and 26F */ + NS15CSerPort = 6, /* Ports 15C and 15D */ + NS2ESerPort = 7, /* Ports 2E and 2F */ + + GeneralParPort = 0, + LPTx = 1, + NS398ParPort = 2, /* NS Super I/O wired to use index + register at port 398 and data + register at port 399 */ + NS26EParPort = 3, /* Ports 26E and 26F */ + NS15CParPort = 4, /* Ports 15C and 15D */ + NS2EParPort = 5, /* Ports 2E and 2F */ + + GeneralPIC = 0, + ISA_PIC = 1, + EISA_PIC = 2, + MPIC = 3, + RS6K_PIC = 4, + + GeneralDMA = 0, + ISA_DMA = 1, + EISA_DMA = 2, + + GeneralTimer = 0, + ISA_Timer = 1, + EISA_Timer = 2, + GeneralRTC = 0, + ISA_RTC = 1, + + StoreThruOnly = 1, + StoreInEnabled = 2, + RS6KL2Cache = 3, + + IndirectNVRAM = 0, /* Indirectly addressed */ + DirectNVRAM = 1, /* Memory Mapped */ + IndirectNVRAM24 = 2, /* Indirectly addressed - 24 bit */ + + GeneralPowerManagement = 0, + EPOWPowerManagement = 1, + PowerControl = 2, // d1378 + + GeneralCMOS = 0, + + GeneralOPPanel = 0, + HarddiskLight = 1, + CDROMLight = 2, + PowerLight = 3, + KeyLock = 4, + ANDisplay = 5, /* AlphaNumeric Display */ + SystemStatusLED = 6, /* 3 digit 7 segment LED */ + CHRP_SystemStatusLED = 7, /* CHRP LEDs in PR*P system */ + + GeneralServiceProcessor = 0, + + TransferData = 1, + IGMC32 = 2, + IGMC64 = 3, + + GeneralSystemPlanar = 0, /* 10/5/95 */ + + } PnP_INTERFACE; + +/* PnP resources */ + +/* Compressed ASCII is 5 bits per char; 00001=A ... 11010=Z */ + +typedef struct _SERIAL_ID { + unsigned char VendorID0; /* Bit(7)=0 */ + /* Bits(6:2)=1st character in */ + /* compressed ASCII */ + /* Bits(1:0)=2nd character in */ + /* compressed ASCII bits(4:3) */ + unsigned char VendorID1; /* Bits(7:5)=2nd character in */ + /* compressed ASCII bits(2:0) */ + /* Bits(4:0)=3rd character in */ + /* compressed ASCII */ + unsigned char VendorID2; /* Product number - vendor assigned */ + unsigned char VendorID3; /* Product number - vendor assigned */ + +/* Serial number is to provide uniqueness if more than one board of same */ +/* type is in system. Must be "FFFFFFFF" if feature not supported. */ + + unsigned char Serial0; /* Unique serial number bits (7:0) */ + unsigned char Serial1; /* Unique serial number bits (15:8) */ + unsigned char Serial2; /* Unique serial number bits (23:16) */ + unsigned char Serial3; /* Unique serial number bits (31:24) */ + unsigned char Checksum; + } SERIAL_ID; + +typedef enum _PnPItemName { + Unused = 0, + PnPVersion = 1, + LogicalDevice = 2, + CompatibleDevice = 3, + IRQFormat = 4, + DMAFormat = 5, + StartDepFunc = 6, + EndDepFunc = 7, + IOPort = 8, + FixedIOPort = 9, + Res1 = 10, + Res2 = 11, + Res3 = 12, + SmallVendorItem = 14, + EndTag = 15, + MemoryRange = 1, + ANSIIdentifier = 2, + UnicodeIdentifier = 3, + LargeVendorItem = 4, + MemoryRange32 = 5, + MemoryRangeFixed32 = 6, + } PnPItemName; + +/* Define a bunch of access functions for the bits in the tag field */ + +/* Tag type - 0 = small; 1 = large */ +#define tag_type(t) (((t) & 0x80)>>7) +#define set_tag_type(t,v) (t = (t & 0x7f) | ((v)<<7)) + +/* Small item name is 4 bits - one of PnPItemName enum above */ +#define tag_small_item_name(t) (((t) & 0x78)>>3) +#define set_tag_small_item_name(t,v) (t = (t & 0x07) | ((v)<<3)) + +/* Small item count is 3 bits - count of further bytes in packet */ +#define tag_small_count(t) ((t) & 0x07) +#define set_tag_count(t,v) (t = (t & 0x78) | (v)) + +/* Large item name is 7 bits - one of PnPItemName enum above */ +#define tag_large_item_name(t) ((t) & 0x7f) +#define set_tag_large_item_name(t,v) (t = (t | 0x80) | (v)) + +/* a PnP resource is a bunch of contiguous TAG packets ending with an end tag */ + +typedef union _PnP_TAG_PACKET { + struct _S1_Pack{ /* VERSION PACKET */ + unsigned char Tag; /* small tag = 0x0a */ + unsigned char Version[2]; /* PnP version, Vendor version */ + } S1_Pack; + + struct _S2_Pack{ /* LOGICAL DEVICE ID PACKET */ + unsigned char Tag; /* small tag = 0x15 or 0x16 */ + unsigned char DevId[4]; /* Logical device id */ + unsigned char Flags[2]; /* bit(0) boot device; */ + /* bit(7:1) cmd in range x31-x37 */ + /* bit(7:0) cmd in range x28-x3f (opt)*/ + } S2_Pack; + + struct _S3_Pack{ /* COMPATIBLE DEVICE ID PACKET */ + unsigned char Tag; /* small tag = 0x1c */ + unsigned char CompatId[4]; /* Compatible device id */ + } S3_Pack; + + struct _S4_Pack{ /* IRQ PACKET */ + unsigned char Tag; /* small tag = 0x22 or 0x23 */ + unsigned char IRQMask[2]; /* bit(0) is IRQ0, ...; */ + /* bit(0) is IRQ8 ... */ + unsigned char IRQInfo; /* optional; assume bit(0)=1; else */ + /* bit(0) - high true edge sensitive */ + /* bit(1) - low true edge sensitive */ + /* bit(2) - high true level sensitive*/ + /* bit(3) - low true level sensitive */ + /* bit(7:4) - must be 0 */ + } S4_Pack; + + struct _S5_Pack{ /* DMA PACKET */ + unsigned char Tag; /* small tag = 0x2a */ + unsigned char DMAMask; /* bit(0) is channel 0 ... */ + unsigned char DMAInfo; + } S5_Pack; + + struct _S6_Pack{ /* START DEPENDENT FUNCTION PACKET */ + unsigned char Tag; /* small tag = 0x30 or 0x31 */ + unsigned char Priority; /* Optional; if missing then x01; else*/ + /* x00 = best possible */ + /* x01 = acceptible */ + /* x02 = sub-optimal but functional */ + } S6_Pack; + + struct _S7_Pack{ /* END DEPENDENT FUNCTION PACKET */ + unsigned char Tag; /* small tag = 0x38 */ + } S7_Pack; + + struct _S8_Pack{ /* VARIABLE I/O PORT PACKET */ + unsigned char Tag; /* small tag x47 */ + unsigned char IOInfo; /* x0 = decode only bits(9:0); */ +#define ISAAddr16bit 0x01 /* x01 = decode bits(15:0) */ + unsigned char RangeMin[2]; /* Min base address */ + unsigned char RangeMax[2]; /* Max base address */ + unsigned char IOAlign; /* base alignmt, incr in 1B blocks */ + unsigned char IONum; /* number of contiguous I/O ports */ + } S8_Pack; + + struct _S9_Pack{ /* FIXED I/O PORT PACKET */ + unsigned char Tag; /* small tag = 0x4b */ + unsigned char Range[2]; /* base address 10 bits */ + unsigned char IONum; /* number of contiguous I/O ports */ + } S9_Pack; + + struct _S14_Pack{ /* VENDOR DEFINED PACKET */ + unsigned char Tag; /* small tag = 0x7m m = 1-7 */ + union _S14_Data{ + unsigned char Data[7]; /* Vendor defined */ + struct _S14_PPCPack{ /* Pr*p s14 pack */ + unsigned char Type; /* 00=non-IBM */ + unsigned char PPCData[6]; /* Vendor defined */ + } S14_PPCPack; + } S14_Data; + } S14_Pack; + + struct _S15_Pack{ /* END PACKET */ + unsigned char Tag; /* small tag = 0x78 or 0x79 */ + unsigned char Check; /* optional - checksum */ + } S15_Pack; + + struct _L1_Pack{ /* MEMORY RANGE PACKET */ + unsigned char Tag; /* large tag = 0x81 */ + unsigned char Count0; /* x09 */ + unsigned char Count1; /* x00 */ + unsigned char Data[9]; /* a variable array of bytes, */ + /* count in tag */ + } L1_Pack; + + struct _L2_Pack{ /* ANSI ID STRING PACKET */ + unsigned char Tag; /* large tag = 0x82 */ + unsigned char Count0; /* Length of string */ + unsigned char Count1; + unsigned char Identifier[1]; /* a variable array of bytes, */ + /* count in tag */ + } L2_Pack; + + struct _L3_Pack{ /* UNICODE ID STRING PACKET */ + unsigned char Tag; /* large tag = 0x83 */ + unsigned char Count0; /* Length + 2 of string */ + unsigned char Count1; + unsigned char Country0; /* TBD */ + unsigned char Country1; /* TBD */ + unsigned char Identifier[1]; /* a variable array of bytes, */ + /* count in tag */ + } L3_Pack; + + struct _L4_Pack{ /* VENDOR DEFINED PACKET */ + unsigned char Tag; /* large tag = 0x84 */ + unsigned char Count0; + unsigned char Count1; + union _L4_Data{ + unsigned char Data[1]; /* a variable array of bytes, */ + /* count in tag */ + struct _L4_PPCPack{ /* Pr*p L4 packet */ + unsigned char Type; /* 00=non-IBM */ + unsigned char PPCData[1]; /* a variable array of bytes, */ + /* count in tag */ + } L4_PPCPack; + } L4_Data; + } L4_Pack; + + struct _L5_Pack{ + unsigned char Tag; /* large tag = 0x85 */ + unsigned char Count0; /* Count = 17 */ + unsigned char Count1; + unsigned char Data[17]; + } L5_Pack; + + struct _L6_Pack{ + unsigned char Tag; /* large tag = 0x86 */ + unsigned char Count0; /* Count = 9 */ + unsigned char Count1; + unsigned char Data[9]; + } L6_Pack; + + } PnP_TAG_PACKET; + +#endif /* __ASSEMBLY__ */ +#endif /* ndef _PNP_ */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/poll.h linuxppc64_2_4/include/asm-ppc64/poll.h --- ../kernel.org/linux/include/asm-ppc64/poll.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/poll.h Wed May 23 10:19:30 2001 @@ -0,0 +1,31 @@ +#ifndef __PPC64_POLL_H +#define __PPC64_POLL_H + +/* + * Copyright (C) 2001 PPC64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define POLLIN 0x0001 +#define POLLPRI 0x0002 +#define POLLOUT 0x0004 +#define POLLERR 0x0008 +#define POLLHUP 0x0010 +#define POLLNVAL 0x0020 +#define POLLRDNORM 0x0040 +#define POLLRDBAND 0x0080 +#define POLLWRNORM 0x0100 +#define POLLWRBAND 0x0200 +#define POLLMSG 0x0400 + +struct pollfd { + int fd; + short events; + short revents; +}; + +#endif /* __PPC64_POLL_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/posix_types.h linuxppc64_2_4/include/asm-ppc64/posix_types.h --- ../kernel.org/linux/include/asm-ppc64/posix_types.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/posix_types.h Thu Sep 20 16:10:01 2001 @@ -0,0 +1,141 @@ +#ifndef _PPC64_POSIX_TYPES_H +#define _PPC64_POSIX_TYPES_H + +/* + * This file is generally used by user-level software, so you need to + * be a little careful about namespace pollution etc. Also, we cannot + * assume GCC is being used. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +typedef unsigned int __kernel_dev_t; +typedef unsigned int __kernel_ino_t; +typedef unsigned int __kernel_mode_t; +typedef unsigned int __kernel_nlink_t; +typedef long __kernel_off_t; +typedef long long __kernel_loff_t; +typedef int __kernel_pid_t; +typedef int __kernel_ipc_pid_t; +typedef unsigned int __kernel_uid_t; +typedef unsigned int __kernel_gid_t; +typedef unsigned long __kernel_size_t; +typedef long __kernel_ssize_t; +typedef long __kernel_ptrdiff_t; +typedef long __kernel_time_t; +typedef long __kernel_suseconds_t; +typedef long __kernel_clock_t; +typedef int __kernel_daddr_t; +typedef char * __kernel_caddr_t; +typedef unsigned short __kernel_uid16_t; +typedef unsigned short __kernel_gid16_t; +typedef unsigned int __kernel_uid32_t; +typedef unsigned int __kernel_gid32_t; + +typedef unsigned int __kernel_old_uid_t; +typedef unsigned int __kernel_old_gid_t; + +typedef struct { + int val[2]; +} __kernel_fsid_t; + + +/* These are here to support 32-bit syscalls on a 64-bit kernel. */ +typedef unsigned int __kernel_size_t32; +typedef int __kernel_ssize_t32; +typedef int __kernel_ptrdiff_t32; +typedef int __kernel_time_t32; +typedef int __kernel_clock_t32; +typedef int __kernel_pid_t32; +typedef unsigned short __kernel_ipc_pid_t32; +typedef unsigned int __kernel_uid_t32; +typedef unsigned int __kernel_gid_t32; +typedef unsigned int __kernel_dev_t32; +typedef unsigned int __kernel_ino_t32; +typedef unsigned int __kernel_mode_t32; +typedef unsigned int __kernel_umode_t32; +typedef short __kernel_nlink_t32; +typedef int __kernel_daddr_t32; +typedef int __kernel_off_t32; +typedef unsigned int __kernel_caddr_t32; +typedef int __kernel_loff_t32; +/* typedef __kernel_fsid_t __kernel_fsid_t32; */ + + + +#ifndef __GNUC__ + +#define __FD_SET(d, set) ((set)->fds_bits[__FDELT(d)] |= __FDMASK(d)) +#define __FD_CLR(d, set) ((set)->fds_bits[__FDELT(d)] &= ~__FDMASK(d)) +#define __FD_ISSET(d, set) ((set)->fds_bits[__FDELT(d)] & __FDMASK(d)) +#define __FD_ZERO(set) \ + ((void) memset ((__ptr_t) (set), 0, sizeof (__kernel_fd_set))) + +#else /* __GNUC__ */ + +#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2) \ + || (__GLIBC__ == 2 && __GLIBC_MINOR__ == 0) +/* With GNU C, use inline functions instead so args are evaluated only once: */ + +#undef __FD_SET +static __inline__ void __FD_SET(unsigned long fd, __kernel_fd_set *fdsetp) +{ + unsigned long _tmp = fd / __NFDBITS; + unsigned long _rem = fd % __NFDBITS; + fdsetp->fds_bits[_tmp] |= (1UL<<_rem); +} + +#undef __FD_CLR +static __inline__ void __FD_CLR(unsigned long fd, __kernel_fd_set *fdsetp) +{ + unsigned long _tmp = fd / __NFDBITS; + unsigned long _rem = fd % __NFDBITS; + fdsetp->fds_bits[_tmp] &= ~(1UL<<_rem); +} + +#undef __FD_ISSET +static __inline__ int __FD_ISSET(unsigned long fd, __kernel_fd_set *p) +{ + unsigned long _tmp = fd / __NFDBITS; + unsigned long _rem = fd % __NFDBITS; + return (p->fds_bits[_tmp] & (1UL<<_rem)) != 0; +} + +/* + * This will unroll the loop for the normal constant case (8 ints, + * for a 256-bit fd_set) + */ +#undef __FD_ZERO +static __inline__ void __FD_ZERO(__kernel_fd_set *p) +{ + unsigned long *tmp = (unsigned long *)p->fds_bits; + int i; + + if (__builtin_constant_p(__FDSET_LONGS)) { + switch (__FDSET_LONGS) { + case 16: + tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0; + tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0; + + case 8: + tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0; + + case 4: + tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; + return; + } + } + i = __FDSET_LONGS; + while (i) { + i--; + *tmp = 0; + tmp++; + } +} + +#endif /* defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2) */ +#endif /* __GNUC__ */ +#endif /* _PPC64_POSIX_TYPES_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/ppcdebug.h linuxppc64_2_4/include/asm-ppc64/ppcdebug.h --- ../kernel.org/linux/include/asm-ppc64/ppcdebug.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/ppcdebug.h Wed Oct 3 07:44:54 2001 @@ -0,0 +1,110 @@ +#ifndef __PPCDEBUG_H +#define __PPCDEBUG_H +/******************************************************************** + * Author: Adam Litke, IBM Corp + * (c) 2001 + * + * This file contains definitions and macros for a runtime debugging + * system for ppc64 (This should also work on 32 bit with a few + * adjustments. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + ********************************************************************/ + +#include +#include +#include "linux/autoconf.h" + +#define PPCDBG_BITVAL(X) ((1UL)<<((unsigned long)(X))) + +/* Defined below are the bit positions of various debug flags in the + * debug_switch variable (defined in Naca.h). + * -- When adding new values, please enter them into trace names below -- + * + * Values 62 & 63 can be used to stress the hardware page table management + * code. They must be set statically, any attempt to change them dynamically + * would be a very bad idea. + */ +#define PPCDBG_MMINIT PPCDBG_BITVAL(0) +#define PPCDBG_MM PPCDBG_BITVAL(1) +#define PPCDBG_SYS32 PPCDBG_BITVAL(2) +#define PPCDBG_SYS32NI PPCDBG_BITVAL(3) +#define PPCDBG_SYS32X PPCDBG_BITVAL(4) +#define PPCDBG_SYS32M PPCDBG_BITVAL(5) +#define PPCDBG_SYS64 PPCDBG_BITVAL(6) +#define PPCDBG_SYS64NI PPCDBG_BITVAL(7) +#define PPCDBG_SYS64X PPCDBG_BITVAL(8) +#define PPCDBG_SIGNAL PPCDBG_BITVAL(9) +#define PPCDBG_SIGNALXMON PPCDBG_BITVAL(10) +#define PPCDBG_BINFMT PPCDBG_BITVAL(11) +#define PPCDBG_BINFMTXMON PPCDBG_BITVAL(12) +#define PPCDBG_ALIGNFIXUP PPCDBG_BITVAL(13) +#define PPCDBG_TCEINIT PPCDBG_BITVAL(14) +#define PPCDBG_TCE PPCDBG_BITVAL(15) +#define PPCDBG_PHBINIT PPCDBG_BITVAL(16) +#define PPCDBG_SMP PPCDBG_BITVAL(17) +#define PPCDBG_BOOT PPCDBG_BITVAL(18) +#define PPCDBG_BUSWALK PPCDBG_BITVAL(19) +#define PPCDBG_HTABSTRESS PPCDBG_BITVAL(62) +#define PPCDBG_HTABSIZE PPCDBG_BITVAL(63) +#define PPCDBG_NONE (0UL) +#define PPCDBG_ALL (0xffffffffUL) + +/* The default initial value for the debug switch */ +#define PPC_DEBUG_DEFAULT PPCDBG_ALIGNFIXUP +/* #define PPC_DEBUG_DEFAULT PPCDBG_ALL */ + +#define PPCDBG_NUM_FLAGS 64 + +#ifdef WANT_PPCDBG_TAB +/* A table of debug switch names to allow name lookup in xmon + * (and whoever else wants it. + */ +char *trace_names[PPCDBG_NUM_FLAGS] = { + /* Known debug names */ + "mminit", "mm", + "syscall32", "syscall32_ni", "syscall32x", "syscall32m", + "syscall64", "syscall64_ni", "syscall64x", + "signal", "signal_xmon", + "binfmt", "binfmt_xmon", + "alignfixup", "tceinit", "tce", "phb_init", + "smp", "boot", "buswalk" +}; +#else +extern char *trace_names[64]; +#endif /* WANT_PPCDBG_TAB */ + +#ifdef CONFIG_PPCDBG +/* Macro to conditionally print debug based on debug_switch */ +#define PPCDBG(...) udbg_ppcdbg(__VA_ARGS__) + +/* Macro to conditionally call a debug routine based on debug_switch */ +#define PPCDBGCALL(FLAGS,FUNCTION) ifppcdebug(FLAGS) FUNCTION + +/* Macro to test for debug states */ +#define ifppcdebug(FLAGS) if (udbg_ifdebug(FLAGS)) + +#ifdef CONFIG_XMON +#define PPCDBG_ENTER_DEBUGGER() xmon(0) +#define PPCDBG_ENTER_DEBUGGER_REGS(X) xmon(X) +#endif +#ifdef CONFIG_KDB +#include +#define PPCDBG_ENTER_DEBUGGER() kdb(KDB_REASON_CALL, 0, 0) +#endif + +#else +#define PPCDBG(...) do {;} while (0) +#define PPCDBGCALL(FLAGS,FUNCTION) do {;} while (0) +#define ifppcdebug(...) if (0) +#endif /* CONFIG_PPCDBG */ + +#ifndef PPCDBG_ENTER_DEBUGGER +#define PPCDBG_ENTER_DEBUGGER() do {;} while(0) +#endif + +#endif /*__PPCDEBUG_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/proc_fs.h linuxppc64_2_4/include/asm-ppc64/proc_fs.h --- ../kernel.org/linux/include/asm-ppc64/proc_fs.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/proc_fs.h Tue Aug 14 14:55:53 2001 @@ -0,0 +1,35 @@ +#ifndef _PPC64_PROC_FS_H +#define _PPC64_PROC_FS_H +/* + * proc_fs.h + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* Change Activity: */ +/* tgall -- merge of iSeries/iSeries_proc.h and proc_pmc.h */ +/* End Change Activity */ + +#include + +void pmc_proc_init(struct proc_dir_entry *iSeries_proc); +void proc_ppc64_init(void); + +#ifdef CONFIG_PPC_ISERIES +#include +#endif + +#endif diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/proc_pmc.h linuxppc64_2_4/include/asm-ppc64/proc_pmc.h --- ../kernel.org/linux/include/asm-ppc64/proc_pmc.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/proc_pmc.h Fri Jun 15 13:26:46 2001 @@ -0,0 +1,33 @@ +/* + * pmc_proc.h + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +/* Change Activity: */ +/* End Change Activity */ + +#ifndef _PMC_PROC_H +#define _PMC_PROC_H + +#include + +void pmc_proc_init(struct proc_dir_entry *iSeries_proc); +void proc_ppc64_init(void); + +#endif /* _PMC_PROC_H */ + diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/processor.h linuxppc64_2_4/include/asm-ppc64/processor.h --- ../kernel.org/linux/include/asm-ppc64/processor.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/processor.h Wed Nov 21 15:38:27 2001 @@ -0,0 +1,775 @@ +#ifndef __ASM_PPC64_PROCESSOR_H +#define __ASM_PPC64_PROCESSOR_H + +/* + * Copyright (C) 2001 PPC 64 Team, IBM Corp + * + * Default implementation of macro that returns current + * instruction pointer ("program counter"). + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#define current_text_addr() ({ __label__ _l; _l: &&_l;}) + +#include + +#ifndef __ASSEMBLY__ +#include +#include +#endif +#include +#include + +/* Machine State Register (MSR) Fields */ +#define MSR_SF_LG 63 /* Enable 64 bit mode */ +#define MSR_ISF_LG 61 /* Interrupt 64b mode valid on 630 */ +#define MSR_HV_LG 60 /* Hypervisor state */ +#define MSR_VEC_LG 25 /* Enable AltiVec */ +#define MSR_POW_LG 18 /* Enable Power Management */ +#define MSR_WE_LG 18 /* Wait State Enable */ +#define MSR_TGPR_LG 17 /* TLB Update registers in use */ +#define MSR_CE_LG 17 /* Critical Interrupt Enable */ +#define MSR_ILE_LG 16 /* Interrupt Little Endian */ +#define MSR_EE_LG 15 /* External Interrupt Enable */ +#define MSR_PR_LG 14 /* Problem State / Privilege Level */ +#define MSR_FP_LG 13 /* Floating Point enable */ +#define MSR_ME_LG 12 /* Machine Check Enable */ +#define MSR_FE0_LG 11 /* Floating Exception mode 0 */ +#define MSR_SE_LG 10 /* Single Step */ +#define MSR_BE_LG 9 /* Branch Trace */ +#define MSR_DE_LG 9 /* Debug Exception Enable */ +#define MSR_FE1_LG 8 /* Floating Exception mode 1 */ +#define MSR_IP_LG 6 /* Exception prefix 0x000/0xFFF */ +#define MSR_IR_LG 5 /* Instruction Relocate */ +#define MSR_DR_LG 4 /* Data Relocate */ +#define MSR_PE_LG 3 /* Protection Enable */ +#define MSR_PX_LG 2 /* Protection Exclusive Mode */ +#define MSR_RI_LG 1 /* Recoverable Exception */ +#define MSR_LE_LG 0 /* Little Endian */ + +#ifdef __ASSEMBLY__ +#define MASK(X) (1<<(X)) +#else +#define MASK(X) (1UL<<(X)) +#endif + +#define MSR_SF MASK(MSR_SF_LG) /* Enable 64 bit mode */ +#define MSR_ISF MASK(MSR_ISF_LG) /* Interrupt 64b mode valid on 630 */ +#define MSR_HV MASK(MSR_HV_LG) /* Hypervisor state */ +#define MSR_VEC MASK(MSR_VEC_LG) /* Enable AltiVec */ +#define MSR_POW MASK(MSR_POW_LG) /* Enable Power Management */ +#define MSR_WE MASK(MSR_WE_LG) /* Wait State Enable */ +#define MSR_TGPR MASK(MSR_TGPR_LG)/* TLB Update registers in use */ +#define MSR_CE MASK(MSR_CE_LG) /* Critical Interrupt Enable */ +#define MSR_ILE MASK(MSR_ILE_LG) /* Interrupt Little Endian */ +#define MSR_EE MASK(MSR_EE_LG) /* External Interrupt Enable */ +#define MSR_PR MASK(MSR_PR_LG) /* Problem State / Privilege Level */ +#define MSR_FP MASK(MSR_FP_LG) /* Floating Point enable */ +#define MSR_ME MASK(MSR_ME_LG) /* Machine Check Enable */ +#define MSR_FE0 MASK(MSR_FE0_LG) /* Floating Exception mode 0 */ +#define MSR_SE MASK(MSR_SE_LG) /* Single Step */ +#define MSR_BE MASK(MSR_BE_LG) /* Branch Trace */ +#define MSR_DE MASK(MSR_DE_LG) /* Debug Exception Enable */ +#define MSR_FE1 MASK(MSR_FE1_LG) /* Floating Exception mode 1 */ +#define MSR_IP MASK(MSR_IP_LG) /* Exception prefix 0x000/0xFFF */ +#define MSR_IR MASK(MSR_IR_LG) /* Instruction Relocate */ +#define MSR_DR MASK(MSR_DR_LG) /* Data Relocate */ +#define MSR_PE MASK(MSR_PE_LG) /* Protection Enable */ +#define MSR_PX MASK(MSR_PX_LG) /* Protection Exclusive Mode */ +#define MSR_RI MASK(MSR_RI_LG) /* Recoverable Exception */ +#define MSR_LE MASK(MSR_LE_LG) /* Little Endian */ + +#define MSR_ MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_ISF +#define MSR_KERNEL MSR_ | MSR_SF | MSR_HV + +#define MSR_USER32 MSR_ | MSR_PR | MSR_EE +#define MSR_USER64 MSR_USER32 | MSR_SF + +/* Floating Point Status and Control Register (FPSCR) Fields */ + +#define FPSCR_FX 0x80000000 /* FPU exception summary */ +#define FPSCR_FEX 0x40000000 /* FPU enabled exception summary */ +#define FPSCR_VX 0x20000000 /* Invalid operation summary */ +#define FPSCR_OX 0x10000000 /* Overflow exception summary */ +#define FPSCR_UX 0x08000000 /* Underflow exception summary */ +#define FPSCR_ZX 0x04000000 /* Zero-devide exception summary */ +#define FPSCR_XX 0x02000000 /* Inexact exception summary */ +#define FPSCR_VXSNAN 0x01000000 /* Invalid op for SNaN */ +#define FPSCR_VXISI 0x00800000 /* Invalid op for Inv - Inv */ +#define FPSCR_VXIDI 0x00400000 /* Invalid op for Inv / Inv */ +#define FPSCR_VXZDZ 0x00200000 /* Invalid op for Zero / Zero */ +#define FPSCR_VXIMZ 0x00100000 /* Invalid op for Inv * Zero */ +#define FPSCR_VXVC 0x00080000 /* Invalid op for Compare */ +#define FPSCR_FR 0x00040000 /* Fraction rounded */ +#define FPSCR_FI 0x00020000 /* Fraction inexact */ +#define FPSCR_FPRF 0x0001f000 /* FPU Result Flags */ +#define FPSCR_FPCC 0x0000f000 /* FPU Condition Codes */ +#define FPSCR_VXSOFT 0x00000400 /* Invalid op for software request */ +#define FPSCR_VXSQRT 0x00000200 /* Invalid op for square root */ +#define FPSCR_VXCVI 0x00000100 /* Invalid op for integer convert */ +#define FPSCR_VE 0x00000080 /* Invalid op exception enable */ +#define FPSCR_OE 0x00000040 /* IEEE overflow exception enable */ +#define FPSCR_UE 0x00000020 /* IEEE underflow exception enable */ +#define FPSCR_ZE 0x00000010 /* IEEE zero divide exception enable */ +#define FPSCR_XE 0x00000008 /* FP inexact exception enable */ +#define FPSCR_NI 0x00000004 /* FPU non IEEE-Mode */ +#define FPSCR_RN 0x00000003 /* FPU rounding control */ + +/* Special Purpose Registers (SPRNs)*/ + +#define SPRN_CDBCR 0x3D7 /* Cache Debug Control Register */ +#define SPRN_CTR 0x009 /* Count Register */ +#define SPRN_DABR 0x3F5 /* Data Address Breakpoint Register */ +#define SPRN_DAC1 0x3F6 /* Data Address Compare 1 */ +#define SPRN_DAC2 0x3F7 /* Data Address Compare 2 */ +#define SPRN_DAR 0x013 /* Data Address Register */ +#define SPRN_DBAT0L 0x219 /* Data BAT 0 Lower Register */ +#define SPRN_DBAT0U 0x218 /* Data BAT 0 Upper Register */ +#define SPRN_DBAT1L 0x21B /* Data BAT 1 Lower Register */ +#define SPRN_DBAT1U 0x21A /* Data BAT 1 Upper Register */ +#define SPRN_DBAT2L 0x21D /* Data BAT 2 Lower Register */ +#define SPRN_DBAT2U 0x21C /* Data BAT 2 Upper Register */ +#define SPRN_DBAT3L 0x21F /* Data BAT 3 Lower Register */ +#define SPRN_DBAT3U 0x21E /* Data BAT 3 Upper Register */ +#define SPRN_DBCR 0x3F2 /* Debug Control Regsiter */ +#define DBCR_EDM 0x80000000 +#define DBCR_IDM 0x40000000 +#define DBCR_RST(x) (((x) & 0x3) << 28) +#define DBCR_RST_NONE 0 +#define DBCR_RST_CORE 1 +#define DBCR_RST_CHIP 2 +#define DBCR_RST_SYSTEM 3 +#define DBCR_IC 0x08000000 /* Instruction Completion Debug Evnt */ +#define DBCR_BT 0x04000000 /* Branch Taken Debug Event */ +#define DBCR_EDE 0x02000000 /* Exception Debug Event */ +#define DBCR_TDE 0x01000000 /* TRAP Debug Event */ +#define DBCR_FER 0x00F80000 /* First Events Remaining Mask */ +#define DBCR_FT 0x00040000 /* Freeze Timers on Debug Event */ +#define DBCR_IA1 0x00020000 /* Instr. Addr. Compare 1 Enable */ +#define DBCR_IA2 0x00010000 /* Instr. Addr. Compare 2 Enable */ +#define DBCR_D1R 0x00008000 /* Data Addr. Compare 1 Read Enable */ +#define DBCR_D1W 0x00004000 /* Data Addr. Compare 1 Write Enable */ +#define DBCR_D1S(x) (((x) & 0x3) << 12) /* Data Adrr. Compare 1 Size */ +#define DAC_BYTE 0 +#define DAC_HALF 1 +#define DAC_WORD 2 +#define DAC_QUAD 3 +#define DBCR_D2R 0x00000800 /* Data Addr. Compare 2 Read Enable */ +#define DBCR_D2W 0x00000400 /* Data Addr. Compare 2 Write Enable */ +#define DBCR_D2S(x) (((x) & 0x3) << 8) /* Data Addr. Compare 2 Size */ +#define DBCR_SBT 0x00000040 /* Second Branch Taken Debug Event */ +#define DBCR_SED 0x00000020 /* Second Exception Debug Event */ +#define DBCR_STD 0x00000010 /* Second Trap Debug Event */ +#define DBCR_SIA 0x00000008 /* Second IAC Enable */ +#define DBCR_SDA 0x00000004 /* Second DAC Enable */ +#define DBCR_JOI 0x00000002 /* JTAG Serial Outbound Int. Enable */ +#define DBCR_JII 0x00000001 /* JTAG Serial Inbound Int. Enable */ +#define SPRN_DBCR0 0x3F2 /* Debug Control Register 0 */ +#define SPRN_DBCR1 0x3BD /* Debug Control Register 1 */ +#define SPRN_DBSR 0x3F0 /* Debug Status Register */ +#define SPRN_DCCR 0x3FA /* Data Cache Cacheability Register */ +#define DCCR_NOCACHE 0 /* Noncacheable */ +#define DCCR_CACHE 1 /* Cacheable */ +#define SPRN_DCMP 0x3D1 /* Data TLB Compare Register */ +#define SPRN_DCWR 0x3BA /* Data Cache Write-thru Register */ +#define DCWR_COPY 0 /* Copy-back */ +#define DCWR_WRITE 1 /* Write-through */ +#define SPRN_DEAR 0x3D5 /* Data Error Address Register */ +#define SPRN_DEC 0x016 /* Decrement Register */ +#define SPRN_DMISS 0x3D0 /* Data TLB Miss Register */ +#define SPRN_DSISR 0x012 /* Data Storage Interrupt Status Register */ +#define SPRN_EAR 0x11A /* External Address Register */ +#define SPRN_ESR 0x3D4 /* Exception Syndrome Register */ +#define ESR_IMCP 0x80000000 /* Instr. Machine Check - Protection */ +#define ESR_IMCN 0x40000000 /* Instr. Machine Check - Non-config */ +#define ESR_IMCB 0x20000000 /* Instr. Machine Check - Bus error */ +#define ESR_IMCT 0x10000000 /* Instr. Machine Check - Timeout */ +#define ESR_PIL 0x08000000 /* Program Exception - Illegal */ +#define ESR_PPR 0x04000000 /* Program Exception - Priveleged */ +#define ESR_PTR 0x02000000 /* Program Exception - Trap */ +#define ESR_DST 0x00800000 /* Storage Exception - Data miss */ +#define ESR_DIZ 0x00400000 /* Storage Exception - Zone fault */ +#define SPRN_EVPR 0x3D6 /* Exception Vector Prefix Register */ +#define SPRN_HASH1 0x3D2 /* Primary Hash Address Register */ +#define SPRN_HASH2 0x3D3 /* Secondary Hash Address Resgister */ +#define SPRN_HID0 0x3F0 /* Hardware Implementation Register 0 */ +#define HID0_EMCP (1<<31) /* Enable Machine Check pin */ +#define HID0_EBA (1<<29) /* Enable Bus Address Parity */ +#define HID0_EBD (1<<28) /* Enable Bus Data Parity */ +#define HID0_SBCLK (1<<27) +#define HID0_EICE (1<<26) +#define HID0_ECLK (1<<25) +#define HID0_PAR (1<<24) +#define HID0_DOZE (1<<23) +#define HID0_NAP (1<<22) +#define HID0_SLEEP (1<<21) +#define HID0_DPM (1<<20) +#define HID0_ICE (1<<15) /* Instruction Cache Enable */ +#define HID0_DCE (1<<14) /* Data Cache Enable */ +#define HID0_ILOCK (1<<13) /* Instruction Cache Lock */ +#define HID0_DLOCK (1<<12) /* Data Cache Lock */ +#define HID0_ICFI (1<<11) /* Instr. Cache Flash Invalidate */ +#define HID0_DCI (1<<10) /* Data Cache Invalidate */ +#define HID0_SPD (1<<9) /* Speculative disable */ +#define HID0_SGE (1<<7) /* Store Gathering Enable */ +#define HID0_SIED (1<<7) /* Serial Instr. Execution [Disable] */ +#define HID0_BTIC (1<<5) /* Branch Target Instruction Cache Enable */ +#define HID0_ABE (1<<3) /* Address Broadcast Enable */ +#define HID0_BHTE (1<<2) /* Branch History Table Enable */ +#define HID0_BTCD (1<<1) /* Branch target cache disable */ +#define SPRN_MSRDORM 0x3F1 /* Hardware Implementation Register 1 */ +#define SPRN_IABR 0x3F2 /* Instruction Address Breakpoint Register */ +#define SPRN_NIADORM 0x3F3 /* Hardware Implementation Register 2 */ +#define SPRN_TSC 0x3FD /* Thread switch control */ +#define SPRN_TST 0x3FC /* Thread switch timeout */ +#define SPRN_IAC1 0x3F4 /* Instruction Address Compare 1 */ +#define SPRN_IAC2 0x3F5 /* Instruction Address Compare 2 */ +#define SPRN_IBAT0L 0x211 /* Instruction BAT 0 Lower Register */ +#define SPRN_IBAT0U 0x210 /* Instruction BAT 0 Upper Register */ +#define SPRN_IBAT1L 0x213 /* Instruction BAT 1 Lower Register */ +#define SPRN_IBAT1U 0x212 /* Instruction BAT 1 Upper Register */ +#define SPRN_IBAT2L 0x215 /* Instruction BAT 2 Lower Register */ +#define SPRN_IBAT2U 0x214 /* Instruction BAT 2 Upper Register */ +#define SPRN_IBAT3L 0x217 /* Instruction BAT 3 Lower Register */ +#define SPRN_IBAT3U 0x216 /* Instruction BAT 3 Upper Register */ +#define SPRN_ICCR 0x3FB /* Instruction Cache Cacheability Register */ +#define ICCR_NOCACHE 0 /* Noncacheable */ +#define ICCR_CACHE 1 /* Cacheable */ +#define SPRN_ICDBDR 0x3D3 /* Instruction Cache Debug Data Register */ +#define SPRN_ICMP 0x3D5 /* Instruction TLB Compare Register */ +#define SPRN_ICTC 0x3FB /* Instruction Cache Throttling Control Reg */ +#define SPRN_IMISS 0x3D4 /* Instruction TLB Miss Register */ +#define SPRN_IMMR 0x27E /* Internal Memory Map Register */ +#define SPRN_L2CR 0x3F9 /* Level 2 Cache Control Regsiter */ +#define SPRN_LR 0x008 /* Link Register */ +#define SPRN_MMCR0 0x3B8 /* Monitor Mode Control Register 0 */ +#define SPRN_MMCR1 0x3BC /* Monitor Mode Control Register 1 */ +#define SPRN_PBL1 0x3FC /* Protection Bound Lower 1 */ +#define SPRN_PBL2 0x3FE /* Protection Bound Lower 2 */ +#define SPRN_PBU1 0x3FD /* Protection Bound Upper 1 */ +#define SPRN_PBU2 0x3FF /* Protection Bound Upper 2 */ +#define SPRN_PID 0x3B1 /* Process ID */ +#define SPRN_PIR 0x3FF /* Processor Identification Register */ +#define SPRN_PIT 0x3DB /* Programmable Interval Timer */ +#define SPRN_PMC1 0x3B9 /* Performance Counter Register 1 */ +#define SPRN_PMC2 0x3BA /* Performance Counter Register 2 */ +#define SPRN_PMC3 0x3BD /* Performance Counter Register 3 */ +#define SPRN_PMC4 0x3BE /* Performance Counter Register 4 */ +#define SPRN_PVR 0x11F /* Processor Version Register */ +#define SPRN_RPA 0x3D6 /* Required Physical Address Register */ +#define SPRN_SDA 0x3BF /* Sampled Data Address Register */ +#define SPRN_SDR1 0x019 /* MMU Hash Base Register */ +#define SPRN_SGR 0x3B9 /* Storage Guarded Register */ +#define SGR_NORMAL 0 +#define SGR_GUARDED 1 +#define SPRN_SIA 0x3BB /* Sampled Instruction Address Register */ +#define SPRN_SPRG0 0x110 /* Special Purpose Register General 0 */ +#define SPRN_SPRG1 0x111 /* Special Purpose Register General 1 */ +#define SPRN_SPRG2 0x112 /* Special Purpose Register General 2 */ +#define SPRN_SPRG3 0x113 /* Special Purpose Register General 3 */ +#define SPRN_SRR0 0x01A /* Save/Restore Register 0 */ +#define SPRN_SRR1 0x01B /* Save/Restore Register 1 */ +#define SPRN_SRR2 0x3DE /* Save/Restore Register 2 */ +#define SPRN_SRR3 0x3DF /* Save/Restore Register 3 */ +#define SPRN_TBHI 0x3DC /* Time Base High */ +#define SPRN_TBHU 0x3CC /* Time Base High User-mode */ +#define SPRN_TBLO 0x3DD /* Time Base Low */ +#define SPRN_TBLU 0x3CD /* Time Base Low User-mode */ +#define SPRN_TBRL 0x10D /* Time Base Read Lower Register */ +#define SPRN_TBRU 0x10C /* Time Base Read Upper Register */ +#define SPRN_TBWL 0x11D /* Time Base Write Lower Register */ +#define SPRN_TBWU 0x11C /* Time Base Write Upper Register */ +#define SPRN_TCR 0x3DA /* Timer Control Register */ +#define TCR_WP(x) (((x)&0x3)<<30) /* WDT Period */ +#define WP_2_17 0 /* 2^17 clocks */ +#define WP_2_21 1 /* 2^21 clocks */ +#define WP_2_25 2 /* 2^25 clocks */ +#define WP_2_29 3 /* 2^29 clocks */ +#define TCR_WRC(x) (((x)&0x3)<<28) /* WDT Reset Control */ +#define WRC_NONE 0 /* No reset will occur */ +#define WRC_CORE 1 /* Core reset will occur */ +#define WRC_CHIP 2 /* Chip reset will occur */ +#define WRC_SYSTEM 3 /* System reset will occur */ +#define TCR_WIE 0x08000000 /* WDT Interrupt Enable */ +#define TCR_PIE 0x04000000 /* PIT Interrupt Enable */ +#define TCR_FP(x) (((x)&0x3)<<24) /* FIT Period */ +#define FP_2_9 0 /* 2^9 clocks */ +#define FP_2_13 1 /* 2^13 clocks */ +#define FP_2_17 2 /* 2^17 clocks */ +#define FP_2_21 3 /* 2^21 clocks */ +#define TCR_FIE 0x00800000 /* FIT Interrupt Enable */ +#define TCR_ARE 0x00400000 /* Auto Reload Enable */ +#define SPRN_THRM1 0x3FC /* Thermal Management Register 1 */ +#define THRM1_TIN (1<<0) +#define THRM1_TIV (1<<1) +#define THRM1_THRES (0x7f<<2) +#define THRM1_TID (1<<29) +#define THRM1_TIE (1<<30) +#define THRM1_V (1<<31) +#define SPRN_THRM2 0x3FD /* Thermal Management Register 2 */ +#define SPRN_THRM3 0x3FE /* Thermal Management Register 3 */ +#define THRM3_E (1<<31) +#define SPRN_TSR 0x3D8 /* Timer Status Register */ +#define TSR_ENW 0x80000000 /* Enable Next Watchdog */ +#define TSR_WIS 0x40000000 /* WDT Interrupt Status */ +#define TSR_WRS(x) (((x)&0x3)<<28) /* WDT Reset Status */ +#define WRS_NONE 0 /* No WDT reset occurred */ +#define WRS_CORE 1 /* WDT forced core reset */ +#define WRS_CHIP 2 /* WDT forced chip reset */ +#define WRS_SYSTEM 3 /* WDT forced system reset */ +#define TSR_PIS 0x08000000 /* PIT Interrupt Status */ +#define TSR_FIS 0x04000000 /* FIT Interrupt Status */ +#define SPRN_UMMCR0 0x3A8 /* User Monitor Mode Control Register 0 */ +#define SPRN_UMMCR1 0x3AC /* User Monitor Mode Control Register 0 */ +#define SPRN_UPMC1 0x3A9 /* User Performance Counter Register 1 */ +#define SPRN_UPMC2 0x3AA /* User Performance Counter Register 2 */ +#define SPRN_UPMC3 0x3AD /* User Performance Counter Register 3 */ +#define SPRN_UPMC4 0x3AE /* User Performance Counter Register 4 */ +#define SPRN_USIA 0x3AB /* User Sampled Instruction Address Register */ +#define SPRN_XER 0x001 /* Fixed Point Exception Register */ +#define SPRN_ZPR 0x3B0 /* Zone Protection Register */ + +/* Short-hand versions for a number of the above SPRNs */ + +#define CTR SPRN_CTR /* Counter Register */ +#define DAR SPRN_DAR /* Data Address Register */ +#define DABR SPRN_DABR /* Data Address Breakpoint Register */ +#define DBAT0L SPRN_DBAT0L /* Data BAT 0 Lower Register */ +#define DBAT0U SPRN_DBAT0U /* Data BAT 0 Upper Register */ +#define DBAT1L SPRN_DBAT1L /* Data BAT 1 Lower Register */ +#define DBAT1U SPRN_DBAT1U /* Data BAT 1 Upper Register */ +#define DBAT2L SPRN_DBAT2L /* Data BAT 2 Lower Register */ +#define DBAT2U SPRN_DBAT2U /* Data BAT 2 Upper Register */ +#define DBAT3L SPRN_DBAT3L /* Data BAT 3 Lower Register */ +#define DBAT3U SPRN_DBAT3U /* Data BAT 3 Upper Register */ +#define DCMP SPRN_DCMP /* Data TLB Compare Register */ +#define DEC SPRN_DEC /* Decrement Register */ +#define DMISS SPRN_DMISS /* Data TLB Miss Register */ +#define DSISR SPRN_DSISR /* Data Storage Interrupt Status Register */ +#define EAR SPRN_EAR /* External Address Register */ +#define HASH1 SPRN_HASH1 /* Primary Hash Address Register */ +#define HASH2 SPRN_HASH2 /* Secondary Hash Address Register */ +#define HID0 SPRN_HID0 /* Hardware Implementation Register 0 */ +#define MSRDORM SPRN_MSRDORM /* MSR Dormant Register */ +#define NIADORM SPRN_NIADORM /* NIA Dormant Register */ +#define TSC SPRN_TSC /* Thread switch control */ +#define TST SPRN_TST /* Thread switch timeout */ +#define IABR SPRN_IABR /* Instruction Address Breakpoint Register */ +#define IBAT0L SPRN_IBAT0L /* Instruction BAT 0 Lower Register */ +#define IBAT0U SPRN_IBAT0U /* Instruction BAT 0 Upper Register */ +#define IBAT1L SPRN_IBAT1L /* Instruction BAT 1 Lower Register */ +#define IBAT1U SPRN_IBAT1U /* Instruction BAT 1 Upper Register */ +#define IBAT2L SPRN_IBAT2L /* Instruction BAT 2 Lower Register */ +#define IBAT2U SPRN_IBAT2U /* Instruction BAT 2 Upper Register */ +#define IBAT3L SPRN_IBAT3L /* Instruction BAT 3 Lower Register */ +#define IBAT3U SPRN_IBAT3U /* Instruction BAT 3 Upper Register */ +#define ICMP SPRN_ICMP /* Instruction TLB Compare Register */ +#define IMISS SPRN_IMISS /* Instruction TLB Miss Register */ +#define IMMR SPRN_IMMR /* PPC 860/821 Internal Memory Map Register */ +#define L2CR SPRN_L2CR /* PPC 750 L2 control register */ +#define LR SPRN_LR +#define PVR SPRN_PVR /* Processor Version */ +#define PIR SPRN_PIR /* Processor ID */ +#define RPA SPRN_RPA /* Required Physical Address Register */ +#define SDR1 SPRN_SDR1 /* MMU hash base register */ +#define SPR0 SPRN_SPRG0 /* Supervisor Private Registers */ +#define SPR1 SPRN_SPRG1 +#define SPR2 SPRN_SPRG2 +#define SPR3 SPRN_SPRG3 +#define SPRG0 SPRN_SPRG0 +#define SPRG1 SPRN_SPRG1 +#define SPRG2 SPRN_SPRG2 +#define SPRG3 SPRN_SPRG3 +#define SRR0 SPRN_SRR0 /* Save and Restore Register 0 */ +#define SRR1 SPRN_SRR1 /* Save and Restore Register 1 */ +#define TBRL SPRN_TBRL /* Time Base Read Lower Register */ +#define TBRU SPRN_TBRU /* Time Base Read Upper Register */ +#define TBWL SPRN_TBWL /* Time Base Write Lower Register */ +#define TBWU SPRN_TBWU /* Time Base Write Upper Register */ +#define ICTC 1019 +#define THRM1 SPRN_THRM1 /* Thermal Management Register 1 */ +#define THRM2 SPRN_THRM2 /* Thermal Management Register 2 */ +#define THRM3 SPRN_THRM3 /* Thermal Management Register 3 */ +#define XER SPRN_XER + + +/* Device Control Registers */ + +#define DCRN_BEAR 0x090 /* Bus Error Address Register */ +#define DCRN_BESR 0x091 /* Bus Error Syndrome Register */ +#define BESR_DSES 0x80000000 /* Data-Side Error Status */ +#define BESR_DMES 0x40000000 /* DMA Error Status */ +#define BESR_RWS 0x20000000 /* Read/Write Status */ +#define BESR_ETMASK 0x1C000000 /* Error Type */ +#define ET_PROT 0 +#define ET_PARITY 1 +#define ET_NCFG 2 +#define ET_BUSERR 4 +#define ET_BUSTO 6 +#define DCRN_DMACC0 0x0C4 /* DMA Chained Count Register 0 */ +#define DCRN_DMACC1 0x0CC /* DMA Chained Count Register 1 */ +#define DCRN_DMACC2 0x0D4 /* DMA Chained Count Register 2 */ +#define DCRN_DMACC3 0x0DC /* DMA Chained Count Register 3 */ +#define DCRN_DMACR0 0x0C0 /* DMA Channel Control Register 0 */ +#define DCRN_DMACR1 0x0C8 /* DMA Channel Control Register 1 */ +#define DCRN_DMACR2 0x0D0 /* DMA Channel Control Register 2 */ +#define DCRN_DMACR3 0x0D8 /* DMA Channel Control Register 3 */ +#define DCRN_DMACT0 0x0C1 /* DMA Count Register 0 */ +#define DCRN_DMACT1 0x0C9 /* DMA Count Register 1 */ +#define DCRN_DMACT2 0x0D1 /* DMA Count Register 2 */ +#define DCRN_DMACT3 0x0D9 /* DMA Count Register 3 */ +#define DCRN_DMADA0 0x0C2 /* DMA Destination Address Register 0 */ +#define DCRN_DMADA1 0x0CA /* DMA Destination Address Register 1 */ +#define DCRN_DMADA2 0x0D2 /* DMA Destination Address Register 2 */ +#define DCRN_DMADA3 0x0DA /* DMA Destination Address Register 3 */ +#define DCRN_DMASA0 0x0C3 /* DMA Source Address Register 0 */ +#define DCRN_DMASA1 0x0CB /* DMA Source Address Register 1 */ +#define DCRN_DMASA2 0x0D3 /* DMA Source Address Register 2 */ +#define DCRN_DMASA3 0x0DB /* DMA Source Address Register 3 */ +#define DCRN_DMASR 0x0E0 /* DMA Status Register */ +#define DCRN_EXIER 0x042 /* External Interrupt Enable Register */ +#define EXIER_CIE 0x80000000 /* Critical Interrupt Enable */ +#define EXIER_SRIE 0x08000000 /* Serial Port Rx Int. Enable */ +#define EXIER_STIE 0x04000000 /* Serial Port Tx Int. Enable */ +#define EXIER_JRIE 0x02000000 /* JTAG Serial Port Rx Int. Enable */ +#define EXIER_JTIE 0x01000000 /* JTAG Serial Port Tx Int. Enable */ +#define EXIER_D0IE 0x00800000 /* DMA Channel 0 Interrupt Enable */ +#define EXIER_D1IE 0x00400000 /* DMA Channel 1 Interrupt Enable */ +#define EXIER_D2IE 0x00200000 /* DMA Channel 2 Interrupt Enable */ +#define EXIER_D3IE 0x00100000 /* DMA Channel 3 Interrupt Enable */ +#define EXIER_E0IE 0x00000010 /* External Interrupt 0 Enable */ +#define EXIER_E1IE 0x00000008 /* External Interrupt 1 Enable */ +#define EXIER_E2IE 0x00000004 /* External Interrupt 2 Enable */ +#define EXIER_E3IE 0x00000002 /* External Interrupt 3 Enable */ +#define EXIER_E4IE 0x00000001 /* External Interrupt 4 Enable */ +#define DCRN_EXISR 0x040 /* External Interrupt Status Register */ +#define DCRN_IOCR 0x0A0 /* Input/Output Configuration Register */ +#define IOCR_E0TE 0x80000000 +#define IOCR_E0LP 0x40000000 +#define IOCR_E1TE 0x20000000 +#define IOCR_E1LP 0x10000000 +#define IOCR_E2TE 0x08000000 +#define IOCR_E2LP 0x04000000 +#define IOCR_E3TE 0x02000000 +#define IOCR_E3LP 0x01000000 +#define IOCR_E4TE 0x00800000 +#define IOCR_E4LP 0x00400000 +#define IOCR_EDT 0x00080000 +#define IOCR_SOR 0x00040000 +#define IOCR_EDO 0x00008000 +#define IOCR_2XC 0x00004000 +#define IOCR_ATC 0x00002000 +#define IOCR_SPD 0x00001000 +#define IOCR_BEM 0x00000800 +#define IOCR_PTD 0x00000400 +#define IOCR_ARE 0x00000080 +#define IOCR_DRC 0x00000020 +#define IOCR_RDM(x) (((x) & 0x3) << 3) +#define IOCR_TCS 0x00000004 +#define IOCR_SCS 0x00000002 +#define IOCR_SPC 0x00000001 + + +/* Processor Version Register */ + +/* Processor Version Register (PVR) field extraction */ + +#define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) /* Version field */ +#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revison field */ + +/* Processor Version Numbers */ +#define PV_PULSAR 0x0034 +#define PV_POWER4 0x0035 +#define PV_ICESTAR 0x0036 +#define PV_SSTAR 0x0037 +#define PV_630 0x0040 +#define PV_630p 0x0041 + +/* Platforms supported by PPC64. _machine is actually a set of flags */ +#define _MACH_pSeriesHW 0x00010000 +#define _MACH_iSeriesHW 0x00020000 +#define _MACH_LPAR 0x00000001 + +#define _MACH_unknown 0x00000000 +#define _MACH_pSeries (_MACH_pSeriesHW) +#define _MACH_pSeriesLP (_MACH_pSeriesHW | _MACH_LPAR) +#define _MACH_iSeries (_MACH_iSeriesHW | _MACH_LPAR) + +/* Compat defines for drivers */ +#define _MACH_Pmac 0xf0000000 /* bogus value */ + +/* + * List of interrupt controllers. + */ +#define IC_INVALID 0 +#define IC_OPEN_PIC 1 +#define IC_PPC_XIC 2 + +#define IOS_INVALID 0 +#define IOS_OPEN_PIC 1 +#define IOS_PPC_XIC 2 + + +#define stringify(s) tostring(s) +#define tostring(s) #s +#define XGLUE(a,b) a##b +#define GLUE(a,b) XGLUE(a,b) + +/* + * Begining of traceback info work for asm functions. + */ +#define TB_ASM 0x000C000000000000 +#define TB_GLOBALLINK 0x0000800000000000 +#define TB_IS_EPROL 0x0000400000000000 +#define TB_HAS_TBOFF 0x0000200000000000 +#define TB_INT_PROC 0x0000100000000000 +#define TB_HAS_CTL 0x0000080000000000 +#define TB_TOCLESS 0x0000040000000000 +#define TB_FP_PRESENT 0x0000020000000000 +#define TB_LOG_ABORT 0x0000010000000000 +#define TB_INT_HNDL 0x0000008000000000 +#define TB_NAME_PRESENT 0x0000004000000000 +#define TB_SAVES_CR 0x0000000200000000 +#define TB_SAVES_LR 0x0000000100000000 +#define TB_STORES_BC 0x0000000080000000 +#define TB_PARMINFO 0x000000000000FFFF +#define TB_DEFAULT TB_ASM | TB_HAS_TBOFF | TB_NAME_PRESENT + +#ifdef __ASSEMBLY__ + +#define _GLOBAL(name) \ + .section ".text"; \ + .align 2 ; \ + .globl name; \ + .globl GLUE(.,name); \ + .section ".opd","aw"; \ +name: \ + .quad GLUE(.,name); \ + .quad .TOC.@tocbase; \ + .quad 0; \ + .previous; \ + .type GLUE(.,name),@function; \ +GLUE(.,name): + +#define _STATIC(name) \ + .section ".text"; \ + .align 2 ; \ + .section ".opd","aw"; \ +name: \ + .quad GLUE(.,name); \ + .quad .TOC.@tocbase; \ + .quad 0; \ + .previous; \ + .type GLUE(.,name),@function; \ +GLUE(.,name): + +#define _TRACEBACK(NAME) \ +GLUE(.LT,NAME): ;\ + .long 0 ;\ + .llong TB_DEFAULT ;\ + .long GLUE(.LT,NAME)-GLUE(.,NAME) ;\ + .short GLUE(GLUE(.LT,NAME),_procname_end)-GLUE(GLUE(.LT,NAME),_procname_start) ;\ +GLUE(GLUE(.LT,NAME),_procname_start): ;\ + .ascii stringify(NAME) ;\ +GLUE(GLUE(.LT,NAME),_procname_end): + +#endif /* __ASSEMBLY__ */ + + +/* Macros for setting and retrieving special purpose registers */ + +#define mfmsr() ({unsigned long rval; \ + asm volatile("mfmsr %0" : "=r" (rval)); rval;}) + +#define mtmsrd(v) asm volatile("mtmsrd %0" : : "r" (v)) + +#define mfspr(rn) ({unsigned long rval; \ + asm volatile("mfspr %0," stringify(rn) \ + : "=r" (rval)); rval;}) +#define mtspr(rn, v) asm volatile("mtspr " stringify(rn) ",%0" : : "r" (v)) + +#define mftb() ({unsigned long rval; \ + asm volatile("mftb %0" : "=r" (rval)); rval;}) + +/* iSeries CTRL register (for runlatch) */ + +#define CTRLT 0x098 +#define CTRLF 0x088 +#define RUNLATCH 0x0001 +#define RUN_FLAG 0x0002 + +/* Macros for adjusting thread priority (hardware multi-threading) */ +#define HMT_low() asm volatile("or 1,1,1") +#define HMT_medium() asm volatile("or 2,2,2") +#define HMT_high() asm volatile("or 3,3,3") + +/* Size of an exception stack frame contained in the paca. */ +#define EXC_FRAME_SIZE 64 + +#define mfasr() ({unsigned long rval; \ + asm volatile("mfasr %0" : "=r" (rval)); rval;}) + +#ifndef __ASSEMBLY__ +extern int _machine; +extern int have_of; + +/* + * This is used to identify the board type from a given PReP board + * vendor. Board revision is also made available. + */ +extern unsigned char ucSystemType; +extern unsigned char ucBoardRev; +extern unsigned char ucBoardRevMaj, ucBoardRevMin; + +struct task_struct; +void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp); +void release_thread(struct task_struct *); + +/* + * Create a new kernel thread. + */ +extern long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); + +/* + * Bus types + */ +#define EISA_bus 0 +#define EISA_bus__is_a_macro /* for versions in ksyms.c */ +#define MCA_bus 0 +#define MCA_bus__is_a_macro /* for versions in ksyms.c */ + +/* Lazy FPU handling on uni-processor */ +extern struct task_struct *last_task_used_math; + + +#ifdef __KERNEL__ +/* 64-bit user address space is 41-bits (2TBs user VM) */ +#define TASK_SIZE_USER64 (0x0000020000000000UL) + +/* 32-bit user address space is 32-bits. This value must be + * at least as large as STACK_TOP. + */ +#define TASK_SIZE_USER32 (0x0000000100000000UL) + +#define TASK_SIZE ((current->thread.flags & PPC_FLAG_32BIT) ? \ + TASK_SIZE_USER32 : TASK_SIZE_USER64) +#endif /* __KERNEL__ */ + + +/* This decides where the kernel will search for a free chunk of vm + * space during mmap's. + */ +#define TASK_UNMAPPED_BASE_USER32 (STACK_TOP_USER32 / 4) +#define TASK_UNMAPPED_BASE_USER64 (STACK_TOP_USER64 / 4) + +#ifndef PPC64_32B_ADDR_SPACE +#define TASK_UNMAPPED_BASE ((current->thread.flags & PPC_FLAG_32BIT) ? \ + TASK_UNMAPPED_BASE_USER32 : TASK_UNMAPPED_BASE_USER64 ) +#else +#define TASK_UNMAPPED_BASE TASK_UNMAPPED_BASE_USER32 +#endif + +typedef struct { + unsigned long seg; +} mm_segment_t; + +struct thread_struct { + unsigned long ksp; /* Kernel stack pointer */ + struct pt_regs *regs; /* Pointer to saved register state */ + mm_segment_t fs; /* for get_fs() validation */ + void *pgdir; /* root of page-table tree */ + signed long last_syscall; + unsigned long flags; + double fpr[32]; /* Complete floating point set */ + unsigned long fpscr_pad; /* fpr ... fpscr must be contiguous */ + unsigned long fpscr; /* Floating point status */ +}; + +#define PPC_FLAG_32BIT 0x01 +#define PPC_FLAG_RUN_LIGHT RUN_FLAG + +#define INIT_SP (sizeof(init_stack) + (unsigned long) &init_stack) + +#define INIT_THREAD { \ + INIT_SP, /* ksp */ \ + (struct pt_regs *)INIT_SP - 1, /* regs */ \ + KERNEL_DS, /*fs*/ \ + swapper_pg_dir, /* pgdir */ \ + 0, /* last_syscall */ \ + PPC_FLAG_RUN_LIGHT, /* flags */ \ + {0}, 0, 0 \ +} + +/* + * Note: the vm_start and vm_end fields here should *not* + * be in kernel space. (Could vm_end == vm_start perhaps?) + */ +#define IOREMAP_MMAP { &ioremap_mm, 0, 0x1000, NULL, \ + PAGE_SHARED, VM_READ | VM_WRITE | VM_EXEC, \ + 1, NULL, NULL } + +extern struct mm_struct ioremap_mm; + +/* + * Return saved PC of a blocked thread. For now, this is the "user" PC + */ +static inline unsigned long thread_saved_pc(struct thread_struct *t) +{ + return (t->regs) ? t->regs->nip : 0; +} + +#define copy_segments(tsk, mm) do { } while (0) +#define release_segments(mm) do { } while (0) +#define forget_segments() do { } while (0) + +unsigned long get_wchan(struct task_struct *p); + +#define KSTK_EIP(tsk) ((tsk)->thread.regs? (tsk)->thread.regs->nip: 0) +#define KSTK_ESP(tsk) ((tsk)->thread.regs? (tsk)->thread.regs->gpr[1]: 0) + +/* + * NOTE! The task struct and the stack go together + */ +#define THREAD_SIZE (4*PAGE_SIZE) +struct task_struct * alloc_task_struct(void); + +void free_task_struct(struct task_struct *); +#define get_task_struct(tsk) atomic_inc(&virt_to_page(tsk)->count) + +/* in process.c - for early bootup debug -- Cort */ +int ll_printk(const char *, ...); +void ll_puts(const char *); + +#define init_task (init_task_union.task) +#define init_stack (init_task_union.stack) + +#define cpu_relax() do { } while (0) + +/* In misc.c */ +void _nmask_and_or_msr(unsigned long nmask, unsigned long or_val); + +/* + * Prefetch macros. + */ +#define ARCH_HAS_PREFETCH +#define ARCH_HAS_PREFETCHW +#define ARCH_HAS_SPINLOCK_PREFETCH + +extern inline void prefetch(const void *x) +{ + __asm__ __volatile__ ("dcbt 0,%0" : : "r" (x)); +} + +extern inline void prefetchw(const void *x) +{ + __asm__ __volatile__ ("dcbtst 0,%0" : : "r" (x)); +} + +#define spin_lock_prefetch(x) prefetchw(x) + +#endif /* ASSEMBLY */ + +#endif /* __ASM_PPC64_PROCESSOR_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/prom.h linuxppc64_2_4/include/asm-ppc64/prom.h --- ../kernel.org/linux/include/asm-ppc64/prom.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/prom.h Thu Sep 20 14:25:05 2001 @@ -0,0 +1,202 @@ +#ifndef _PPC64_PROM_H +#define _PPC64_PROM_H + +/* + * Definitions for talking to the Open Firmware PROM on + * Power Macintosh computers. + * + * Copyright (C) 1996 Paul Mackerras. + * + * Updates for PPC64 by Peter Bergner & David Engebretsen, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define PTRRELOC(x) ((typeof(x))((unsigned long)(x) - offset)) +#define PTRUNRELOC(x) ((typeof(x))((unsigned long)(x) + offset)) +#define RELOC(x) (*PTRRELOC(&(x))) + +#define LONG_LSW(X) (((unsigned long)X) & 0xffffffff) +#define LONG_MSW(X) (((unsigned long)X) >> 32) + +typedef u32 phandle; +typedef void *ihandle; +typedef u32 phandle32; +typedef u32 ihandle32; + +extern char *prom_display_paths[]; +extern unsigned int prom_num_displays; + +struct address_range { + unsigned long space; + unsigned long address; + unsigned long size; +}; + +struct interrupt_info { + int line; + int sense; /* +ve/-ve logic, edge or level, etc. */ +}; + +struct pci_address { + u32 a_hi; + u32 a_mid; + u32 a_lo; +}; + +struct pci_range32 { + struct pci_address child_addr; + unsigned int parent_addr; + unsigned long size; +}; + +struct pci_range64 { + struct pci_address child_addr; + unsigned long parent_addr; + unsigned long size; +}; + +union pci_range { + struct { + struct pci_address addr; + u32 phys; + u32 size_hi; + } pci32; + struct { + struct pci_address addr; + u32 phys_hi; + u32 phys_lo; + u32 size_hi; + u32 size_lo; + } pci64; +}; + +struct _of_tce_table { + phandle node; + unsigned long base; + unsigned long size; +}; + +struct reg_property { + unsigned long address; + unsigned long size; +}; + +struct reg_property32 { + unsigned int address; + unsigned int size; +}; + +struct reg_property64 { + unsigned long address; + unsigned long size; +}; + +struct translation_property { + unsigned long virt; + unsigned long size; + unsigned long phys; + unsigned int flags; +}; + +struct property { + char *name; + int length; + unsigned char *value; + struct property *next; +}; + +/* NOTE: the device_node contains PCI specific info for pci devices. + * This perhaps could be hung off the device_node with another struct, + * but for now it is directly in the node. The phb ptr is a good + * indication of a real PCI node. Other nodes leave these fields zeroed. + */ +struct pci_controller; +struct TceTable; +struct device_node { + char *name; + char *type; + phandle node; + int n_addrs; + struct address_range *addrs; + int n_intrs; + struct interrupt_info *intrs; + char *full_name; + int busno; /* for pci devices */ + int devfn; /* for pci devices */ + struct pci_controller *phb; /* for pci devices */ + int status; /* current status of device */ + struct TceTable *tce_table; /* for phb's or bridges */ +#define DN_STATUS_BIST_FAILED (1<<0) + struct property *properties; + struct device_node *parent; + struct device_node *child; + struct device_node *sibling; + struct device_node *next; /* next device of same type */ + struct device_node *allnext; /* next in list of all nodes */ +}; + +typedef u32 prom_arg_t; + +struct prom_args { + u32 service; + u32 nargs; + u32 nret; + prom_arg_t args[10]; + prom_arg_t *rets; /* // Pointer to return values in args[16]. */ +}; + +typedef struct { + u32 printf; /* void (*printf)(char *, ...); */ + u32 memdump; /* void (*memdump)(unsigned char *, unsigned long); */ + u32 dummy; /* void (*dummy)(void); */ +} yaboot_debug_t; + +struct prom_t { + unsigned long entry; + ihandle chosen; + int cpu; + ihandle stdout; + ihandle disp_node; + struct prom_args args; + unsigned long version; + unsigned long encode_phys_size; + struct bi_record *bi_recs; +#ifdef DEBUG_YABOOT + yaboot_debug_t *yaboot; +#endif +}; + +extern struct prom_t prom; + +/* Prototypes */ +extern void abort(void); +extern unsigned long prom_init(unsigned long, unsigned long, unsigned long, + unsigned long, unsigned long, yaboot_debug_t *); +extern void prom_print(const char *msg); +extern void relocate_nodes(void); +extern void finish_device_tree(void); +extern struct device_node *find_devices(const char *name); +extern struct device_node *find_type_devices(const char *type); +extern struct device_node *find_path_device(const char *path); +extern struct device_node *find_compatible_devices(const char *type, + const char *compat); +extern struct device_node *find_pci_device_OFnode(unsigned char bus, + unsigned char dev_fn); +extern struct device_node *find_all_nodes(void); +extern int device_is_compatible(struct device_node *device, const char *); +extern int machine_is_compatible(const char *compat); +extern unsigned char *get_property(struct device_node *node, const char *name, + int *lenp); +extern void print_properties(struct device_node *node); +extern int prom_n_addr_cells(struct device_node* np); +extern int prom_n_size_cells(struct device_node* np); +extern void prom_get_irq_senses(unsigned char *senses, int off, int max); +extern void prom_drawstring(const char *c); +extern void prom_drawhex(unsigned long v); +extern void prom_drawchar(char c); + +#endif /* _PPC64_PROM_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/ptrace.h linuxppc64_2_4/include/asm-ppc64/ptrace.h --- ../kernel.org/linux/include/asm-ppc64/ptrace.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/ptrace.h Mon Oct 1 11:16:17 2001 @@ -0,0 +1,148 @@ +#ifndef _PPC64_PTRACE_H +#define _PPC64_PTRACE_H + +/* + * Copyright (C) 2001 PPC64 Team, IBM Corp + * + * This struct defines the way the registers are stored on the + * kernel stack during a system call or other kernel entry. + * + * this should only contain volatile regs + * since we can keep non-volatile in the thread_struct + * should set this up when only volatiles are saved + * by intr code. + * + * Since this is going on the stack, *CARE MUST BE TAKEN* to insure + * that the overall structure is a multiple of 16 bytes in length. + * + * Note that the offsets of the fields in this struct correspond with + * the PT_* values below. This simplifies arch/ppc/kernel/ptrace.c. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include + +#define INIT_TASK_SIZE 4*PAGE_SIZE + +#ifndef __ASSEMBLY__ +#define PPC_REG unsigned long +struct pt_regs { + PPC_REG gpr[32]; + PPC_REG nip; + PPC_REG msr; + PPC_REG orig_gpr3; /* Used for restarting system calls */ + PPC_REG ctr; + PPC_REG link; + PPC_REG xer; + PPC_REG ccr; + PPC_REG softe; /* Soft enabled/disabled */ + PPC_REG trap; /* Reason for being here */ + PPC_REG dar; /* Fault registers */ + PPC_REG dsisr; + PPC_REG result; /* Result of a system call */ +}; + +#define PPC_REG_32 unsigned int +struct pt_regs32 { + PPC_REG_32 gpr[32]; + PPC_REG_32 nip; + PPC_REG_32 msr; + PPC_REG_32 orig_gpr3; /* Used for restarting system calls */ + PPC_REG_32 ctr; + PPC_REG_32 link; + PPC_REG_32 xer; + PPC_REG_32 ccr; + PPC_REG_32 mq; /* 601 only (not used at present) */ + /* Used on APUS to hold IPL value. */ + PPC_REG_32 trap; /* Reason for being here */ + PPC_REG_32 dar; /* Fault registers */ + PPC_REG_32 dsisr; + PPC_REG_32 result; /* Result of a system call */ +}; + +#endif + +#define STACK_FRAME_OVERHEAD 112 /* size of minimum stack frame */ + +/* Size of stack frame allocated when calling signal handler. */ +/* FIXME: What should this be on 64-bit kernel (64 for 32-bit) */ +#define __SIGNAL_FRAMESIZE 64 +#define __SIGNAL_FRAMESIZE32 64 + +#define instruction_pointer(regs) ((regs)->nip) +#define user_mode(regs) ((((regs)->msr) >> MSR_PR_LG) & 0x1) + +/* + * Offsets used by 'ptrace' system call interface. + */ +#define PT_R0 0 +#define PT_R1 1 +#define PT_R2 2 +#define PT_R3 3 +#define PT_R4 4 +#define PT_R5 5 +#define PT_R6 6 +#define PT_R7 7 +#define PT_R8 8 +#define PT_R9 9 +#define PT_R10 10 +#define PT_R11 11 +#define PT_R12 12 +#define PT_R13 13 +#define PT_R14 14 +#define PT_R15 15 +#define PT_R16 16 +#define PT_R17 17 +#define PT_R18 18 +#define PT_R19 19 +#define PT_R20 20 +#define PT_R21 21 +#define PT_R22 22 +#define PT_R23 23 +#define PT_R24 24 +#define PT_R25 25 +#define PT_R26 26 +#define PT_R27 27 +#define PT_R28 28 +#define PT_R29 29 +#define PT_R30 30 +#define PT_R31 31 + +#define PT_NIP 32 +#define PT_MSR 33 +#ifdef __KERNEL__ +#define PT_ORIG_R3 34 +#endif +#define PT_CTR 35 +#define PT_LNK 36 +#define PT_XER 37 +#define PT_CCR 38 +#define PT_SOFTE 39 +#define PT_RESULT 43 + +#define PT_FPR0 48 +#ifdef __KERNEL__ +#define PT_FPSCR (PT_FPR0 + 32 + 1) /* each FP reg occupies 1 slot in this space */ +#define PT_FPSCR32 (PT_FPR0 + 2*32 + 1) /* To the 32-bit user - each FP reg occupies 2 slots in this space */ +#else +#define PT_FPSCR (PT_FPR0 + 2*32 + 1) /* each FP reg occupies 2 slots in this space -- Fix when 64-bit apps. */ +#endif + +// Additional PTRACE requests implemented on PowerPC. +#define PPC_PTRACE_GETREGS 0x99 /* Get GPRs 0 - 31 */ +#define PPC_PTRACE_SETREGS 0x98 /* Set GPRs 0 - 31 */ +#define PPC_PTRACE_GETFPREGS 0x97 /* Get FPRs 0 - 31 */ +#define PPC_PTRACE_SETFPREGS 0x96 /* Set FPRs 0 - 31 */ +#define PPC_PTRACE_PEEKTEXT_3264 0x95 /* Read word at location ADDR on a 64-bit process from a 32-bit process. */ +#define PPC_PTRACE_PEEKDATA_3264 0x94 /* Read word at location ADDR on a 64-bit process from a 32-bit process. */ +#define PPC_PTRACE_POKETEXT_3264 0x93 /* Write word at location ADDR on a 64-bit process from a 32-bit process. */ +#define PPC_PTRACE_POKEDATA_3264 0x92 /* Write word at location ADDR on a 64-bit process from a 32-bit process. */ +#define PPC_PTRACE_PEEKUSR_3264 0x91 /* Read a register (specified by ADDR) out of the "user area" on a 64-bit process from a 32-bit process. */ +#define PPC_PTRACE_POKEUSR_3264 0x90 /* Write DATA into location ADDR within the "user area" on a 64-bit process from a 32-bit process. */ + + +#endif /* _PPC64_PTRACE_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/resource.h linuxppc64_2_4/include/asm-ppc64/resource.h --- ../kernel.org/linux/include/asm-ppc64/resource.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/resource.h Wed May 23 10:19:30 2001 @@ -0,0 +1,53 @@ +#ifndef _PPC64_RESOURCE_H +#define _PPC64_RESOURCE_H + +/* + * Copyright (C) 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define RLIMIT_CPU 0 /* CPU time in ms */ +#define RLIMIT_FSIZE 1 /* Maximum filesize */ +#define RLIMIT_DATA 2 /* max data size */ +#define RLIMIT_STACK 3 /* max stack size */ +#define RLIMIT_CORE 4 /* max core file size */ +#define RLIMIT_RSS 5 /* max resident set size */ +#define RLIMIT_NPROC 6 /* max number of processes */ +#define RLIMIT_NOFILE 7 /* max number of open files */ +#define RLIMIT_MEMLOCK 8 /* max locked-in-memory address space */ +#define RLIMIT_AS 9 /* address space limit(?) */ +#define RLIMIT_LOCKS 10 /* maximum file locks held */ + +#define RLIM_NLIMITS 11 + +#ifdef __KERNEL__ + +/* + * SuS says limits have to be unsigned. + * Which makes a ton more sense anyway. + */ +#define RLIM_INFINITY (~0UL) + + +#define INIT_RLIMITS \ +{ \ + { RLIM_INFINITY, RLIM_INFINITY }, \ + { RLIM_INFINITY, RLIM_INFINITY }, \ + { RLIM_INFINITY, RLIM_INFINITY }, \ + { _STK_LIM, RLIM_INFINITY }, \ + { 0, RLIM_INFINITY }, \ + { RLIM_INFINITY, RLIM_INFINITY }, \ + { 0, 0 }, \ + { INR_OPEN, INR_OPEN }, \ + { RLIM_INFINITY, RLIM_INFINITY }, \ + { RLIM_INFINITY, RLIM_INFINITY }, \ + { RLIM_INFINITY, RLIM_INFINITY }, \ +} + +#endif /* __KERNEL__ */ + +#endif /* _PPC64_RESOURCE_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/rtas.h linuxppc64_2_4/include/asm-ppc64/rtas.h --- ../kernel.org/linux/include/asm-ppc64/rtas.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/rtas.h Wed Nov 7 13:05:40 2001 @@ -0,0 +1,141 @@ +#ifndef _PPC64_RTAS_H +#define _PPC64_RTAS_H + +#include + +/* + * Definitions for talking to the RTAS on CHRP machines. + * + * Copyright (C) 2001 Peter Bergner + * Copyright (C) 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define RTAS_UNKNOWN_SERVICE (-1) +/* + * In general to call RTAS use rtas_token("string") to lookup + * an RTAS token for the given string (e.g. "event-scan"). + * To actually perform the call use + * ret = rtas_call(token, n_in, n_out, ...) + * Where n_in is the number of input parameters and + * n_out is the number of output parameters + * + * If the "string" is invalid on this system, RTAS_UNKOWN_SERVICE + * will be returned as a token. rtas_call() does look for this + * token and error out gracefully so rtas_call(rtas_token("str"), ...) + * may be safely used for one-shot calls to RTAS. + * + */ + +typedef u32 rtas_arg_t; + +struct rtas_args { + u32 token; + u32 nargs; + u32 nret; + rtas_arg_t args[16]; + spinlock_t lock; + rtas_arg_t *rets; /* Pointer to return values in args[]. */ +}; + +struct rtas_t { + unsigned long entry; /* physical address pointer */ + unsigned long base; /* physical address pointer */ + unsigned long size; + spinlock_t lock; + + struct device_node *dev; /* virtual address pointer */ +}; + +/* Event classes */ +#define INTERNAL_ERROR 0x80000000 /* set bit 0 */ +#define EPOW_WARNING 0x40000000 /* set bit 1 */ +#define POWERMGM_EVENTS 0x20000000 /* set bit 2 */ +#define HOTPLUG_EVENTS 0x10000000 /* set bit 3 */ +#define EVENT_SCAN_ALL_EVENTS 0xf0000000 + +/* event-scan returns */ +#define SEVERITY_FATAL 0x5 +#define SEVERITY_ERROR 0x4 +#define SEVERITY_ERROR_SYNC 0x3 +#define SEVERITY_WARNING 0x2 +#define SEVERITY_EVENT 0x1 +#define SEVERITY_NO_ERROR 0x0 +#define DISP_FULLY_RECOVERED 0x0 +#define DISP_LIMITED_RECOVERY 0x1 +#define DISP_NOT_RECOVERED 0x2 +#define PART_PRESENT 0x0 +#define PART_NOT_PRESENT 0x1 +#define INITIATOR_UNKNOWN 0x0 +#define INITIATOR_CPU 0x1 +#define INITIATOR_PCI 0x2 +#define INITIATOR_ISA 0x3 +#define INITIATOR_MEMORY 0x4 +#define INITIATOR_POWERMGM 0x5 +#define TARGET_UNKNOWN 0x0 +#define TARGET_CPU 0x1 +#define TARGET_PCI 0x2 +#define TARGET_ISA 0x3 +#define TARGET_MEMORY 0x4 +#define TARGET_POWERMGM 0x5 +#define TYPE_RETRY 0x01 +#define TYPE_TCE_ERR 0x02 +#define TYPE_INTERN_DEV_FAIL 0x03 +#define TYPE_TIMEOUT 0x04 +#define TYPE_DATA_PARITY 0x05 +#define TYPE_ADDR_PARITY 0x06 +#define TYPE_CACHE_PARITY 0x07 +#define TYPE_ADDR_INVALID 0x08 +#define TYPE_ECC_UNCORR 0x09 +#define TYPE_ECC_CORR 0x0a +#define TYPE_EPOW 0x40 +/* I don't add PowerMGM events right now, this is a different topic */ +#define TYPE_PMGM_POWER_SW_ON 0x60 +#define TYPE_PMGM_POWER_SW_OFF 0x61 +#define TYPE_PMGM_LID_OPEN 0x62 +#define TYPE_PMGM_LID_CLOSE 0x63 +#define TYPE_PMGM_SLEEP_BTN 0x64 +#define TYPE_PMGM_WAKE_BTN 0x65 +#define TYPE_PMGM_BATTERY_WARN 0x66 +#define TYPE_PMGM_BATTERY_CRIT 0x67 +#define TYPE_PMGM_SWITCH_TO_BAT 0x68 +#define TYPE_PMGM_SWITCH_TO_AC 0x69 +#define TYPE_PMGM_KBD_OR_MOUSE 0x6a +#define TYPE_PMGM_ENCLOS_OPEN 0x6b +#define TYPE_PMGM_ENCLOS_CLOSED 0x6c +#define TYPE_PMGM_RING_INDICATE 0x6d +#define TYPE_PMGM_LAN_ATTENTION 0x6e +#define TYPE_PMGM_TIME_ALARM 0x6f +#define TYPE_PMGM_CONFIG_CHANGE 0x70 +#define TYPE_PMGM_SERVICE_PROC 0x71 + +struct rtas_error_log { + unsigned long version:8; /* Architectural version */ + unsigned long severity:3; /* Severity level of error */ + unsigned long disposition:2; /* Degree of recovery */ + unsigned long extended:1; /* extended log present? */ + unsigned long /* reserved */ :2; /* Reserved for future use */ + unsigned long initiator:4; /* Initiator of event */ + unsigned long target:4; /* Target of failed operation */ + unsigned long type:8; /* General event or error*/ + unsigned long extended_log_length:32; /* length in bytes */ + unsigned char buffer[1]; /* allocated by klimit bump */ +}; + +extern struct rtas_t rtas; + +extern void enter_rtas(struct rtas_args *); +extern int rtas_token(const char *service); +extern long rtas_call(int token, int, int, unsigned long *, ...); +extern void phys_call_rtas(int, int, int, ...); +extern void phys_call_rtas_display_status(char); +extern void call_rtas_display_status(char); +extern void rtas_restart(char *cmd); +extern void rtas_power_off(void); +extern void rtas_halt(void); + +#endif /* _PPC64_RTAS_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/rwsem.h linuxppc64_2_4/include/asm-ppc64/rwsem.h --- ../kernel.org/linux/include/asm-ppc64/rwsem.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/rwsem.h Mon May 21 18:30:20 2001 @@ -0,0 +1,139 @@ +/* + * include/asm-ppc/rwsem.h: R/W semaphores for PPC using the stuff + * in lib/rwsem.c. Adapted largely from include/asm-i386/rwsem.h + * by Paul Mackerras . + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _PPC64_RWSEM_H +#define _PPC64_RWSEM_H + +#ifdef __KERNEL__ +#include +#include +#include +#include + +/* + * the semaphore definition + */ +struct rw_semaphore { + /* XXX this should be able to be an atomic_t -- paulus */ + signed int count; +#define RWSEM_UNLOCKED_VALUE 0x00000000 +#define RWSEM_ACTIVE_BIAS 0x00000001 +#define RWSEM_ACTIVE_MASK 0x0000ffff +#define RWSEM_WAITING_BIAS (-0x00010000) +#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS +#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) + spinlock_t wait_lock; + struct list_head wait_list; +#if RWSEM_DEBUG + int debug; +#endif +}; + +/* + * initialisation + */ +#if RWSEM_DEBUG +#define __RWSEM_DEBUG_INIT , 0 +#else +#define __RWSEM_DEBUG_INIT /* */ +#endif + +#define __RWSEM_INITIALIZER(name) \ + { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ + LIST_HEAD_INIT((name).wait_list) \ + __RWSEM_DEBUG_INIT } + +#define DECLARE_RWSEM(name) \ + struct rw_semaphore name = __RWSEM_INITIALIZER(name) + +extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); +extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); +extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem); + +static inline void init_rwsem(struct rw_semaphore *sem) +{ + sem->count = RWSEM_UNLOCKED_VALUE; + spin_lock_init(&sem->wait_lock); + INIT_LIST_HEAD(&sem->wait_list); +#if RWSEM_DEBUG + sem->debug = 0; +#endif +} + +/* + * lock for reading + */ +static inline void __down_read(struct rw_semaphore *sem) +{ + if (atomic_inc_return((atomic_t *)(&sem->count)) >= 0) + smp_wmb(); + else + rwsem_down_read_failed(sem); +} + +/* + * lock for writing + */ +static inline void __down_write(struct rw_semaphore *sem) +{ + int tmp; + + tmp = atomic_add_return(RWSEM_ACTIVE_WRITE_BIAS, + (atomic_t *)(&sem->count)); + if (tmp == RWSEM_ACTIVE_WRITE_BIAS) + smp_wmb(); + else + rwsem_down_write_failed(sem); +} + +/* + * unlock after reading + */ +static inline void __up_read(struct rw_semaphore *sem) +{ + int tmp; + + smp_wmb(); + tmp = atomic_dec_return((atomic_t *)(&sem->count)); + if (tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0) + rwsem_wake(sem); +} + +/* + * unlock after writing + */ +static inline void __up_write(struct rw_semaphore *sem) +{ + smp_wmb(); + if (atomic_sub_return(RWSEM_ACTIVE_WRITE_BIAS, + (atomic_t *)(&sem->count)) < 0) + rwsem_wake(sem); +} + +/* + * implement atomic add functionality + */ +static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) +{ + atomic_add(delta, (atomic_t *)(&sem->count)); +} + +/* + * implement exchange and add functionality + */ +static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) +{ + smp_mb(); + return atomic_add_return(delta, (atomic_t *)(&sem->count)); +} + +#endif /* __KERNEL__ */ +#endif /* _PPC_RWSEM_XADD_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/scatterlist.h linuxppc64_2_4/include/asm-ppc64/scatterlist.h --- ../kernel.org/linux/include/asm-ppc64/scatterlist.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/scatterlist.h Sat Nov 3 23:51:44 2001 @@ -0,0 +1,30 @@ +#ifndef _PPC64_SCATTERLIST_H +#define _PPC64_SCATTERLIST_H + +/* + * Copyright (C) 2001 PPC64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include + +struct scatterlist { + char *address; /* Virtual addr data is to be transferred to */ + struct page *page; /* Location for highmem page, if any */ + unsigned int offset; /* for highmem, page offset */ + unsigned int length; + + /* For TCE support */ + u32 dma_address; + u32 dma_length; +}; + +#define ISA_DMA_THRESHOLD (~0UL) + +#endif /* !(_PPC64_SCATTERLIST_H) */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/segment.h linuxppc64_2_4/include/asm-ppc64/segment.h --- ../kernel.org/linux/include/asm-ppc64/segment.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/segment.h Fri May 4 17:12:35 2001 @@ -0,0 +1,7 @@ +#ifndef __PPC64_SEGMENT_H +#define __PPC64_SEGMENT_H + +/* Only here because we have some old header files that expect it.. */ + +#endif /* __PPC64_SEGMENT_H */ +#include diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/semaphore.h linuxppc64_2_4/include/asm-ppc64/semaphore.h --- ../kernel.org/linux/include/asm-ppc64/semaphore.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/semaphore.h Mon May 21 18:59:30 2001 @@ -0,0 +1,135 @@ +#ifndef _PPC64_SEMAPHORE_H +#define _PPC64_SEMAPHORE_H + +/* + * Swiped from asm-sparc/semaphore.h and modified + * -- Cort (cort@cs.nmt.edu) + * + * Stole some rw spinlock-based semaphore stuff from asm-alpha/semaphore.h + * -- Ani Joshi (ajoshi@unixbox.com) + * + * Remove spinlock-based RW semaphores; RW semaphore definitions are + * now in rwsem.h and we use the the generic lib/rwsem.c implementation. + * Rework semaphores to use atomic_dec_if_positive. + * -- Paul Mackerras (paulus@samba.org) + */ + +#ifdef __KERNEL__ + +#include +#include +#include +#include + +struct semaphore { + /* + * Note that any negative value of count is equivalent to 0, + * but additionally indicates that some process(es) might be + * sleeping on `wait'. + */ + atomic_t count; + wait_queue_head_t wait; +#if WAITQUEUE_DEBUG + long __magic; +#endif +}; + +#if WAITQUEUE_DEBUG +# define __SEM_DEBUG_INIT(name) \ + , (long)&(name).__magic +#else +# define __SEM_DEBUG_INIT(name) +#endif + +#define __SEMAPHORE_INITIALIZER(name, count) \ + { ATOMIC_INIT(count), \ + __WAIT_QUEUE_HEAD_INITIALIZER((name).wait) \ + __SEM_DEBUG_INIT(name) } + +#define __MUTEX_INITIALIZER(name) \ + __SEMAPHORE_INITIALIZER(name, 1) + +#define __DECLARE_SEMAPHORE_GENERIC(name, count) \ + struct semaphore name = __SEMAPHORE_INITIALIZER(name,count) + +#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name, 1) +#define DECLARE_MUTEX_LOCKED(name) __DECLARE_SEMAPHORE_GENERIC(name, 0) + +static inline void sema_init (struct semaphore *sem, int val) +{ + atomic_set(&sem->count, val); + init_waitqueue_head(&sem->wait); +#if WAITQUEUE_DEBUG + sem->__magic = (long)&sem->__magic; +#endif +} + +static inline void init_MUTEX (struct semaphore *sem) +{ + sema_init(sem, 1); +} + +static inline void init_MUTEX_LOCKED (struct semaphore *sem) +{ + sema_init(sem, 0); +} + +extern void __down(struct semaphore * sem); +extern int __down_interruptible(struct semaphore * sem); +extern void __up(struct semaphore * sem); + +extern inline void down(struct semaphore * sem) +{ +#if WAITQUEUE_DEBUG + CHECK_MAGIC(sem->__magic); +#endif + + /* + * Try to get the semaphore, take the slow path if we fail. + */ + if (atomic_dec_return(&sem->count) < 0) + __down(sem); + smp_wmb(); +} + +extern inline int down_interruptible(struct semaphore * sem) +{ + int ret = 0; + +#if WAITQUEUE_DEBUG + CHECK_MAGIC(sem->__magic); +#endif + + if (atomic_dec_return(&sem->count) < 0) + ret = __down_interruptible(sem); + smp_wmb(); + return ret; +} + +extern inline int down_trylock(struct semaphore * sem) +{ + int ret; + +#if WAITQUEUE_DEBUG + CHECK_MAGIC(sem->__magic); +#endif + + ret = atomic_dec_if_positive(&sem->count) < 0; + smp_wmb(); + return ret; +} + +extern inline void up(struct semaphore * sem) +{ +#if WAITQUEUE_DEBUG + CHECK_MAGIC(sem->__magic); +#endif + + smp_wmb(); + if (atomic_inc_return(&sem->count) <= 0) + __up(sem); +} + +#endif /* __KERNEL__ */ + +#endif /* !(_PPC64_SEMAPHORE_H) */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/sembuf.h linuxppc64_2_4/include/asm-ppc64/sembuf.h --- ../kernel.org/linux/include/asm-ppc64/sembuf.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/sembuf.h Mon May 7 15:21:49 2001 @@ -0,0 +1,27 @@ +#ifndef _PPC64_SEMBUF_H +#define _PPC64_SEMBUF_H + +/* + * The semid64_ds structure for PPC architecture. + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Pad space is left for: + * - 2 miscellaneous 64-bit values + */ + +struct semid64_ds { + struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ + __kernel_time_t sem_otime; /* last semop time */ + __kernel_time_t sem_ctime; /* last change time */ + unsigned long sem_nsems; /* no. of semaphores in array */ + + unsigned long __unused1; + unsigned long __unused2; +}; + +#endif /* _PPC64_SEMBUF_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/serial.h linuxppc64_2_4/include/asm-ppc64/serial.h --- ../kernel.org/linux/include/asm-ppc64/serial.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/serial.h Mon May 7 15:21:49 2001 @@ -0,0 +1,128 @@ +/* + * include/asm-ppc/serial.h + */ + +#include + +/* + * This assumes you have a 1.8432 MHz clock for your UART. + * + * It'd be nice if someone built a serial card with a 24.576 MHz + * clock, since the 16550A is capable of handling a top speed of 1.5 + * megabits/second; but this requires the faster clock. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#define BASE_BAUD ( 1843200 / 16 ) + +#ifdef CONFIG_SERIAL_MANY_PORTS +#define RS_TABLE_SIZE 64 +#else +#define RS_TABLE_SIZE 4 +#endif + +/* Standard COM flags (except for COM4, because of the 8514 problem) */ +#ifdef CONFIG_SERIAL_DETECT_IRQ +#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST | ASYNC_AUTO_IRQ) +#define STD_COM4_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_AUTO_IRQ) +#else +#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST) +#define STD_COM4_FLAGS ASYNC_BOOT_AUTOCONF +#endif + +#ifdef CONFIG_SERIAL_MANY_PORTS +#define FOURPORT_FLAGS ASYNC_FOURPORT +#define ACCENT_FLAGS 0 +#define BOCA_FLAGS 0 +#define HUB6_FLAGS 0 +#endif + +/* + * The following define the access methods for the HUB6 card. All + * access is through two ports for all 24 possible chips. The card is + * selected through the high 2 bits, the port on that card with the + * "middle" 3 bits, and the register on that port with the bottom + * 3 bits. + * + * While the access port and interrupt is configurable, the default + * port locations are 0x302 for the port control register, and 0x303 + * for the data read/write register. Normally, the interrupt is at irq3 + * but can be anything from 3 to 7 inclusive. Note that using 3 will + * require disabling com2. + */ + +#define C_P(card,port) (((card)<<6|(port)<<3) + 1) + +#define STD_SERIAL_PORT_DEFNS \ + /* UART CLK PORT IRQ FLAGS */ \ + { 0, BASE_BAUD, 0x3F8, 4, STD_COM_FLAGS }, /* ttyS0 */ \ + { 0, BASE_BAUD, 0x2F8, 3, STD_COM_FLAGS }, /* ttyS1 */ \ + { 0, BASE_BAUD, 0x3E8, 4, STD_COM_FLAGS }, /* ttyS2 */ \ + { 0, BASE_BAUD, 0x2E8, 3, STD_COM4_FLAGS }, /* ttyS3 */ + + +#ifdef CONFIG_SERIAL_MANY_PORTS +#define EXTRA_SERIAL_PORT_DEFNS \ + { 0, BASE_BAUD, 0x1A0, 9, FOURPORT_FLAGS }, /* ttyS4 */ \ + { 0, BASE_BAUD, 0x1A8, 9, FOURPORT_FLAGS }, /* ttyS5 */ \ + { 0, BASE_BAUD, 0x1B0, 9, FOURPORT_FLAGS }, /* ttyS6 */ \ + { 0, BASE_BAUD, 0x1B8, 9, FOURPORT_FLAGS }, /* ttyS7 */ \ + { 0, BASE_BAUD, 0x2A0, 5, FOURPORT_FLAGS }, /* ttyS8 */ \ + { 0, BASE_BAUD, 0x2A8, 5, FOURPORT_FLAGS }, /* ttyS9 */ \ + { 0, BASE_BAUD, 0x2B0, 5, FOURPORT_FLAGS }, /* ttyS10 */ \ + { 0, BASE_BAUD, 0x2B8, 5, FOURPORT_FLAGS }, /* ttyS11 */ \ + { 0, BASE_BAUD, 0x330, 4, ACCENT_FLAGS }, /* ttyS12 */ \ + { 0, BASE_BAUD, 0x338, 4, ACCENT_FLAGS }, /* ttyS13 */ \ + { 0, BASE_BAUD, 0x000, 0, 0 }, /* ttyS14 (spare) */ \ + { 0, BASE_BAUD, 0x000, 0, 0 }, /* ttyS15 (spare) */ \ + { 0, BASE_BAUD, 0x100, 12, BOCA_FLAGS }, /* ttyS16 */ \ + { 0, BASE_BAUD, 0x108, 12, BOCA_FLAGS }, /* ttyS17 */ \ + { 0, BASE_BAUD, 0x110, 12, BOCA_FLAGS }, /* ttyS18 */ \ + { 0, BASE_BAUD, 0x118, 12, BOCA_FLAGS }, /* ttyS19 */ \ + { 0, BASE_BAUD, 0x120, 12, BOCA_FLAGS }, /* ttyS20 */ \ + { 0, BASE_BAUD, 0x128, 12, BOCA_FLAGS }, /* ttyS21 */ \ + { 0, BASE_BAUD, 0x130, 12, BOCA_FLAGS }, /* ttyS22 */ \ + { 0, BASE_BAUD, 0x138, 12, BOCA_FLAGS }, /* ttyS23 */ \ + { 0, BASE_BAUD, 0x140, 12, BOCA_FLAGS }, /* ttyS24 */ \ + { 0, BASE_BAUD, 0x148, 12, BOCA_FLAGS }, /* ttyS25 */ \ + { 0, BASE_BAUD, 0x150, 12, BOCA_FLAGS }, /* ttyS26 */ \ + { 0, BASE_BAUD, 0x158, 12, BOCA_FLAGS }, /* ttyS27 */ \ + { 0, BASE_BAUD, 0x160, 12, BOCA_FLAGS }, /* ttyS28 */ \ + { 0, BASE_BAUD, 0x168, 12, BOCA_FLAGS }, /* ttyS29 */ \ + { 0, BASE_BAUD, 0x170, 12, BOCA_FLAGS }, /* ttyS30 */ \ + { 0, BASE_BAUD, 0x178, 12, BOCA_FLAGS }, /* ttyS31 */ +#else +#define EXTRA_SERIAL_PORT_DEFNS +#endif + +/* You can have up to four HUB6's in the system, but I've only + * included two cards here for a total of twelve ports. + */ +#if (defined(CONFIG_HUB6) && defined(CONFIG_SERIAL_MANY_PORTS)) +#define HUB6_SERIAL_PORT_DFNS \ + { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(0,0) }, /* ttyS32 */ \ + { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(0,1) }, /* ttyS33 */ \ + { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(0,2) }, /* ttyS34 */ \ + { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(0,3) }, /* ttyS35 */ \ + { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(0,4) }, /* ttyS36 */ \ + { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(0,5) }, /* ttyS37 */ \ + { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(1,0) }, /* ttyS38 */ \ + { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(1,1) }, /* ttyS39 */ \ + { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(1,2) }, /* ttyS40 */ \ + { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(1,3) }, /* ttyS41 */ \ + { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(1,4) }, /* ttyS42 */ \ + { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(1,5) }, /* ttyS43 */ +#else +#define HUB6_SERIAL_PORT_DFNS +#endif + +#define MCA_SERIAL_PORT_DFNS + +#define SERIAL_PORT_DFNS \ + STD_SERIAL_PORT_DEFNS \ + EXTRA_SERIAL_PORT_DEFNS \ + HUB6_SERIAL_PORT_DFNS \ + MCA_SERIAL_PORT_DFNS diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/setup.h linuxppc64_2_4/include/asm-ppc64/setup.h --- ../kernel.org/linux/include/asm-ppc64/setup.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/setup.h Wed May 23 10:38:17 2001 @@ -0,0 +1,6 @@ +#ifndef _PPC_SETUP_H +#define _PPC_SETUP_H + +/* This is a place holder include */ + +#endif /* _PPC_SETUP_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/shmbuf.h linuxppc64_2_4/include/asm-ppc64/shmbuf.h --- ../kernel.org/linux/include/asm-ppc64/shmbuf.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/shmbuf.h Mon Nov 19 21:26:53 2001 @@ -0,0 +1,43 @@ +#ifndef _PPC64_SHMBUF_H +#define _PPC64_SHMBUF_H + +/* + * The shmid64_ds structure for PPC64 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 2 miscellaneous 64-bit values + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +struct shmid64_ds { + struct ipc64_perm shm_perm; /* operation perms */ + __kernel_time_t shm_atime; /* last attach time */ + __kernel_time_t shm_dtime; /* last detach time */ + __kernel_time_t shm_ctime; /* last change time */ + size_t shm_segsz; /* size of segment (bytes) */ + __kernel_pid_t shm_cpid; /* pid of creator */ + __kernel_pid_t shm_lpid; /* pid of last operator */ + unsigned long shm_nattch; /* no. of current attaches */ + unsigned long __unused1; + unsigned long __unused2; +}; + +struct shminfo64 { + unsigned long shmmax; + unsigned long shmmin; + unsigned long shmmni; + unsigned long shmseg; + unsigned long shmall; + unsigned long __unused1; + unsigned long __unused2; + unsigned long __unused3; + unsigned long __unused4; +}; + +#endif /* _PPC64_SHMBUF_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/shmparam.h linuxppc64_2_4/include/asm-ppc64/shmparam.h --- ../kernel.org/linux/include/asm-ppc64/shmparam.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/shmparam.h Mon May 7 15:21:49 2001 @@ -0,0 +1,13 @@ +#ifndef _PPC64_SHMPARAM_H +#define _PPC64_SHMPARAM_H + +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */ + +#endif /* _PPC64_SHMPARAM_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/sigcontext.h linuxppc64_2_4/include/asm-ppc64/sigcontext.h --- ../kernel.org/linux/include/asm-ppc64/sigcontext.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/sigcontext.h Mon May 7 15:21:49 2001 @@ -0,0 +1,35 @@ +#ifndef _ASM_PPC64_SIGCONTEXT_H +#define _ASM_PPC64_SIGCONTEXT_H + +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include + + +struct sigcontext_struct { + unsigned long _unused[4]; + int signal; + unsigned long handler; + unsigned long oldmask; + struct pt_regs *regs; +}; + +#ifdef __KERNEL__ + +struct sigcontext32_struct { + unsigned int _unused[4]; + int signal; + unsigned int handler; + unsigned int oldmask; + u32 regs; // 4 byte pointer to the pt_regs32 structure. +}; + +#endif /* __KERNEL__ */ + + +#endif /* _ASM_PPC64_SIGCONTEXT_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/siginfo.h linuxppc64_2_4/include/asm-ppc64/siginfo.h --- ../kernel.org/linux/include/asm-ppc64/siginfo.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/siginfo.h Mon May 7 15:21:49 2001 @@ -0,0 +1,306 @@ +#ifndef _PPC64_SIGINFO_H +#define _PPC64_SIGINFO_H + +/* Copied from i386 from alpha. */ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include + +typedef union sigval { + int sival_int; + void *sival_ptr; +} sigval_t; + + +#ifdef __KERNEL__ + +typedef union sigval32 { + int sival_int; + unsigned int sival_ptr; +} sigval_t32; + + +#endif /* __KERNEL__ */ + + +#define SI_MAX_SIZE 128 +#define SI_PAD_SIZE ((SI_MAX_SIZE/sizeof(int)) - 3) + +typedef struct siginfo { + int si_signo; + int si_errno; + int si_code; + + union { + int _pad[SI_PAD_SIZE]; + + /* kill() */ + struct { + pid_t _pid; /* sender's pid */ + uid_t _uid; /* sender's uid */ + } _kill; + + /* POSIX.1b timers */ + struct { + unsigned int _timer1; + unsigned int _timer2; + } _timer; + + /* POSIX.1b signals */ + struct { + pid_t _pid; /* sender's pid */ + uid_t _uid; /* sender's uid */ + sigval_t _sigval; + } _rt; + + /* SIGCHLD */ + struct { + pid_t _pid; /* which child */ + uid_t _uid; /* sender's uid */ + int _status; /* exit code */ + clock_t _utime; + clock_t _stime; + } _sigchld; + + /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ + struct { + void *_addr; /* faulting insn/memory ref. */ + } _sigfault; + + /* SIGPOLL */ + struct { + int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ + int _fd; + } _sigpoll; + } _sifields; +} siginfo_t; + + +#ifdef __KERNEL__ + +typedef struct siginfo32 { + int si_signo; + int si_errno; + int si_code; + + union { + int _pad[SI_PAD_SIZE]; + + /* kill() */ + struct { + __kernel_pid_t32 _pid; /* sender's pid */ + unsigned int _uid; /* sender's uid */ + } _kill; + + /* POSIX.1b timers */ + struct { + unsigned int _timer1; + unsigned int _timer2; + } _timer; + + /* POSIX.1b signals */ + struct { + __kernel_pid_t32 _pid; /* sender's pid */ + unsigned int _uid; /* sender's uid */ + sigval_t32 _sigval; + } _rt; + + /* SIGCHLD */ + struct { + __kernel_pid_t32 _pid; /* which child */ + unsigned int _uid; /* sender's uid */ + int _status; /* exit code */ + __kernel_clock_t32 _utime; + __kernel_clock_t32 _stime; + } _sigchld; + + /* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGEMT */ + struct { + u32 _addr; /* faulting insn/memory ref. */ + int _trapno; + } _sigfault; + + /* SIGPOLL */ + struct { + int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ + int _fd; + } _sigpoll; + } _sifields; +} siginfo_t32; + +#endif /* __KERNEL__ */ + + +/* + * How these fields are to be accessed. + */ +#define si_pid _sifields._kill._pid +#define si_uid _sifields._kill._uid +#define si_status _sifields._sigchld._status +#define si_utime _sifields._sigchld._utime +#define si_stime _sifields._sigchld._stime +#define si_value _sifields._rt._sigval +#define si_int _sifields._rt._sigval.sival_int +#define si_ptr _sifields._rt._sigval.sival_ptr +#define si_addr _sifields._sigfault._addr +#define si_band _sifields._sigpoll._band +#define si_fd _sifields._sigpoll._fd + +#ifdef __KERNEL__ +#define __SI_MASK 0xffff0000 +#define __SI_KILL (0 << 16) +#define __SI_TIMER (1 << 16) +#define __SI_POLL (2 << 16) +#define __SI_FAULT (3 << 16) +#define __SI_CHLD (4 << 16) +#define __SI_RT (5 << 16) +#define __SI_CODE(T,N) ((T) << 16 | ((N) & 0xffff)) +#else +#define __SI_KILL 0 +#define __SI_TIMER 0 +#define __SI_POLL 0 +#define __SI_FAULT 0 +#define __SI_CHLD 0 +#define __SI_RT 0 +#define __SI_CODE(T,N) (N) +#endif + +/* + * si_code values + * Digital reserves positive values for kernel-generated signals. + */ +#define SI_USER 0 /* sent by kill, sigsend, raise */ +#define SI_KERNEL 0x80 /* sent by the kernel from somewhere */ +#define SI_QUEUE -1 /* sent by sigqueue */ +#define SI_TIMER __SI_CODE(__SI_TIMER,-2) /* sent by timer expiration */ +#define SI_MESGQ -3 /* sent by real time mesq state change */ +#define SI_ASYNCIO -4 /* sent by AIO completion */ +#define SI_SIGIO -5 /* sent by queued SIGIO */ + +#define SI_FROMUSER(siptr) ((siptr)->si_code <= 0) +#define SI_FROMKERNEL(siptr) ((siptr)->si_code > 0) + +/* + * SIGILL si_codes + */ +#define ILL_ILLOPC (__SI_FAULT|1) /* illegal opcode */ +#define ILL_ILLOPN (__SI_FAULT|2) /* illegal operand */ +#define ILL_ILLADR (__SI_FAULT|3) /* illegal addressing mode */ +#define ILL_ILLTRP (__SI_FAULT|4) /* illegal trap */ +#define ILL_PRVOPC (__SI_FAULT|5) /* privileged opcode */ +#define ILL_PRVREG (__SI_FAULT|6) /* privileged register */ +#define ILL_COPROC (__SI_FAULT|7) /* coprocessor error */ +#define ILL_BADSTK (__SI_FAULT|8) /* internal stack error */ +#define NSIGILL 8 + +/* + * SIGFPE si_codes + */ +#define FPE_INTDIV (__SI_FAULT|1) /* integer divide by zero */ +#define FPE_INTOVF (__SI_FAULT|2) /* integer overflow */ +#define FPE_FLTDIV (__SI_FAULT|3) /* floating point divide by zero */ +#define FPE_FLTOVF (__SI_FAULT|4) /* floating point overflow */ +#define FPE_FLTUND (__SI_FAULT|5) /* floating point underflow */ +#define FPE_FLTRES (__SI_FAULT|6) /* floating point inexact result */ +#define FPE_FLTINV (__SI_FAULT|7) /* floating point invalid operation */ +#define FPE_FLTSUB (__SI_FAULT|8) /* subscript out of range */ +#define NSIGFPE 8 + +/* + * SIGSEGV si_codes + */ +#define SEGV_MAPERR (__SI_FAULT|1) /* address not mapped to object */ +#define SEGV_ACCERR (__SI_FAULT|2) /* invalid permissions for mapped object */ +#define NSIGSEGV 2 + +/* + * SIGBUS si_codes + */ +#define BUS_ADRALN (__SI_FAULT|1) /* invalid address alignment */ +#define BUS_ADRERR (__SI_FAULT|2) /* non-existant physical address */ +#define BUS_OBJERR (__SI_FAULT|3) /* object specific hardware error */ +#define NSIGBUS 3 + +/* + * SIGTRAP si_codes + */ +#define TRAP_BRKPT (__SI_FAULT|1) /* process breakpoint */ +#define TRAP_TRACE (__SI_FAULT|2) /* process trace trap */ +#define NSIGTRAP 2 + +/* + * SIGCHLD si_codes + */ +#define CLD_EXITED (__SI_CHLD|1) /* child has exited */ +#define CLD_KILLED (__SI_CHLD|2) /* child was killed */ +#define CLD_DUMPED (__SI_CHLD|3) /* child terminated abnormally */ +#define CLD_TRAPPED (__SI_CHLD|4) /* traced child has trapped */ +#define CLD_STOPPED (__SI_CHLD|5) /* child has stopped */ +#define CLD_CONTINUED (__SI_CHLD|6) /* stopped child has continued */ +#define NSIGCHLD 6 + +/* + * SIGPOLL si_codes + */ +#define POLL_IN (__SI_POLL|1) /* data input available */ +#define POLL_OUT (__SI_POLL|2) /* output buffers available */ +#define POLL_MSG (__SI_POLL|3) /* input message available */ +#define POLL_ERR (__SI_POLL|4) /* i/o error */ +#define POLL_PRI (__SI_POLL|5) /* high priority input available */ +#define POLL_HUP (__SI_POLL|6) /* device disconnected */ +#define NSIGPOLL 6 + +/* + * sigevent definitions + * + * It seems likely that SIGEV_THREAD will have to be handled from + * userspace, libpthread transmuting it to SIGEV_SIGNAL, which the + * thread manager then catches and does the appropriate nonsense. + * However, everything is written out here so as to not get lost. + */ +#define SIGEV_SIGNAL 0 /* notify via signal */ +#define SIGEV_NONE 1 /* other notification: meaningless */ +#define SIGEV_THREAD 2 /* deliver via thread creation */ + +#define SIGEV_MAX_SIZE 64 +#define SIGEV_PAD_SIZE ((SIGEV_MAX_SIZE/sizeof(int)) - 3) + +typedef struct sigevent { + sigval_t sigev_value; + int sigev_signo; + int sigev_notify; + union { + int _pad[SIGEV_PAD_SIZE]; + + struct { + void (*_function)(sigval_t); + void *_attribute; /* really pthread_attr_t */ + } _sigev_thread; + } _sigev_un; +} sigevent_t; + +#define sigev_notify_function _sigev_un._sigev_thread._function +#define sigev_notify_attributes _sigev_un._sigev_thread._attribute + +#ifdef __KERNEL__ +#include + +extern inline void copy_siginfo(siginfo_t *to, siginfo_t *from) +{ + if (from->si_code < 0) + memcpy(to, from, sizeof(siginfo_t)); + else + /* _sigchld is currently the largest know union member */ + memcpy(to, from, 3*sizeof(int) + sizeof(from->_sifields._sigchld)); +} + +extern int copy_siginfo_to_user(siginfo_t *to, siginfo_t *from); + +#endif /* __KERNEL__ */ + +#endif /* _PPC64_SIGINFO_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/signal.h linuxppc64_2_4/include/asm-ppc64/signal.h --- ../kernel.org/linux/include/asm-ppc64/signal.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/signal.h Mon May 7 16:01:18 2001 @@ -0,0 +1,202 @@ +#ifndef _ASMPPC64_SIGNAL_H +#define _ASMPPC64_SIGNAL_H + +#include + +/* Avoid too many header ordering problems. */ +struct siginfo; + +#define _NSIG 64 +#define _NSIG_BPW 64 +#define _NSIG_WORDS (_NSIG / _NSIG_BPW) + +typedef unsigned long old_sigset_t; /* at least 32 bits */ + +typedef struct { + unsigned long sig[_NSIG_WORDS]; +} sigset_t; + +#define SIGHUP 1 +#define SIGINT 2 +#define SIGQUIT 3 +#define SIGILL 4 +#define SIGTRAP 5 +#define SIGABRT 6 +#define SIGIOT 6 +#define SIGBUS 7 +#define SIGFPE 8 +#define SIGKILL 9 +#define SIGUSR1 10 +#define SIGSEGV 11 +#define SIGUSR2 12 +#define SIGPIPE 13 +#define SIGALRM 14 +#define SIGTERM 15 +#define SIGSTKFLT 16 +#define SIGCHLD 17 +#define SIGCONT 18 +#define SIGSTOP 19 +#define SIGTSTP 20 +#define SIGTTIN 21 +#define SIGTTOU 22 +#define SIGURG 23 +#define SIGXCPU 24 +#define SIGXFSZ 25 +#define SIGVTALRM 26 +#define SIGPROF 27 +#define SIGWINCH 28 +#define SIGIO 29 +#define SIGPOLL SIGIO +/* +#define SIGLOST 29 +*/ +#define SIGPWR 30 +#define SIGSYS 31 +#define SIGUNUSED 31 + +/* These should not be considered constants from userland. */ +#define SIGRTMIN 32 +#define SIGRTMAX (_NSIG-1) + + + +#if defined(__KERNEL__) || defined(__WANT_POSIX1B_SIGNALS__) +#define __old_sigset_t32 old_sigset_t32 +#define __old_sigaction32 old_sigaction32 +#else +#endif + + + + +/* + * SA_FLAGS values: + * + * SA_ONSTACK is not currently supported, but will allow sigaltstack(2). + * SA_INTERRUPT is a no-op, but left due to historical reasons. Use the + * SA_RESTART flag to get restarting signals (which were the default long ago) + * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop. + * SA_RESETHAND clears the handler when the signal is delivered. + * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies. + * SA_NODEFER prevents the current signal from being masked in the handler. + * + * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single + * Unix names RESETHAND and NODEFER respectively. + */ +#define SA_NOCLDSTOP 0x00000001 +#define SA_NOCLDWAIT 0x00000002 /* not supported yet */ +#define SA_SIGINFO 0x00000004 +#define SA_ONSTACK 0x08000000 +#define SA_RESTART 0x10000000 +#define SA_NODEFER 0x40000000 +#define SA_RESETHAND 0x80000000 + +#define SA_NOMASK SA_NODEFER +#define SA_ONESHOT SA_RESETHAND +#define SA_INTERRUPT 0x20000000 /* dummy -- ignored */ + +#define SA_RESTORER 0x04000000 + +/* + * sigaltstack controls + */ +#define SS_ONSTACK 1 +#define SS_DISABLE 2 + +#define MINSIGSTKSZ 2048 +#define SIGSTKSZ 8192 +#ifdef __KERNEL__ + +/* + * These values of sa_flags are used only by the kernel as part of the + * irq handling routines. + * + * SA_INTERRUPT is also used by the irq handling routines. + * SA_SHIRQ is for shared interrupt support on PCI and EISA. + */ +#define SA_PROBE SA_ONESHOT +#define SA_SAMPLE_RANDOM SA_RESTART +#define SA_SHIRQ 0x04000000 +#endif + +#define SIG_BLOCK 0 /* for blocking signals */ +#define SIG_UNBLOCK 1 /* for unblocking signals */ +#define SIG_SETMASK 2 /* for setting the signal mask */ + +/* Type of a signal handler. */ +typedef void (*__sighandler_t)(int); + +#define SIG_DFL ((__sighandler_t)0) /* default signal handling */ +#define SIG_IGN ((__sighandler_t)1) /* ignore signal */ +#define SIG_ERR ((__sighandler_t)-1) /* error return from signal */ + +struct old_sigaction { + __sighandler_t sa_handler; + old_sigset_t sa_mask; + unsigned long sa_flags; + void (*sa_restorer)(void); +}; + +struct sigaction { + __sighandler_t sa_handler; + unsigned long sa_flags; + void (*sa_restorer)(void); + sigset_t sa_mask; /* mask last for extensibility */ +}; + +struct k_sigaction { + struct sigaction sa; +}; + +#ifdef __KERNEL__ + +typedef unsigned int __old_sigset_t32; +struct __old_sigaction32 { + unsigned sa_handler; + __old_sigset_t32 sa_mask; + unsigned int sa_flags; + unsigned sa_restorer; /* not used by Linux/SPARC yet */ +}; + + + +#define _PPC32_NSIG 64 +#define _PPC32_NSIG_BPW 32 +#define _PPC32_NSIG_WORDS (_PPC32_NSIG / _PPC32_NSIG_BPW) + +typedef struct { + unsigned int sig[_PPC32_NSIG_WORDS]; +} sigset32_t; + +struct sigaction32 { + unsigned int sa_handler; /* Really a pointer, but need to deal + with 32 bits */ + unsigned int sa_flags; + unsigned int sa_restorer; /* Another 32 bit pointer */ + sigset32_t sa_mask; /* A 32 bit mask */ +}; + +#endif /* __KERNEL__ */ + + +typedef struct sigaltstack { + void *ss_sp; + int ss_flags; + size_t ss_size; +} stack_t; + +#ifdef __KERNEL__ +#include + +typedef struct sigaltstack_32 { + unsigned int ss_sp; + int ss_flags; + __kernel_size_t32 ss_size; +} stack_32_t; + + + + +#endif + +#endif /* _ASMPPC64_SIGNAL_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/smp.h linuxppc64_2_4/include/asm-ppc64/smp.h --- ../kernel.org/linux/include/asm-ppc64/smp.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/smp.h Wed Sep 5 13:57:08 2001 @@ -0,0 +1,74 @@ +/* + * smp.h: PPC64 specific SMP code. + * + * Original was a copy of sparc smp.h. Now heavily modified + * for PPC. + * + * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 1996-2001 Cort Dougan + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifdef __KERNEL__ +#ifndef _PPC64_SMP_H +#define _PPC64_SMP_H + +#include +#include + + + +#ifdef CONFIG_SMP + +#ifndef __ASSEMBLY__ + +#include + +struct current_set_struct { + struct task_struct *task; + unsigned long *sp_real; +}; + +extern unsigned long cpu_online_map; + +extern void smp_message_pass(int target, int msg, unsigned long data, int wait); +extern void smp_store_cpu_info(int id); +extern void smp_send_tlb_invalidate(int); +extern void smp_send_xmon_break(int cpu); +struct pt_regs; +extern void smp_message_recv(int, struct pt_regs *); + +#define NO_PROC_ID 0xFF /* No processor magic marker */ +#define PROC_CHANGE_PENALTY 20 + +/* 1 to 1 mapping on PPC -- Cort */ +#define cpu_logical_map(cpu) (cpu) +#define cpu_number_map(x) (x) +extern volatile unsigned long cpu_callin_map[NR_CPUS]; + +#define smp_processor_id() (get_paca()->xPacaIndex) +#define hard_smp_processor_id() (get_paca()->xHwProcNum) +#define get_hard_smp_processor_id(CPU) (xPaca[(CPU)].xHwProcNum) + + + +/* Since OpenPIC has only 4 IPIs, we use slightly different message numbers. + * + * Make sure this matches openpic_request_IPIs in open_pic.c, or what shows up + * in /proc/interrupts will be wrong!!! --Troy */ +#define PPC_MSG_CALL_FUNCTION 0 +#define PPC_MSG_RESCHEDULE 1 +#define PPC_MSG_INVALIDATE_TLB 2 +#define PPC_MSG_XMON_BREAK 3 + +void smp_init_iSeries(void); +void smp_init_pSeries(void); + +#endif /* __ASSEMBLY__ */ +#endif /* !(CONFIG_SMP) */ +#endif /* !(_PPC64_SMP_H) */ +#endif /* __KERNEL__ */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/smplock.h linuxppc64_2_4/include/asm-ppc64/smplock.h --- ../kernel.org/linux/include/asm-ppc64/smplock.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/smplock.h Mon May 7 15:32:23 2001 @@ -0,0 +1,56 @@ +/* + * + * + * Default SMP lock implementation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include + +extern spinlock_t kernel_flag; + +#define kernel_locked() spin_is_locked(&kernel_flag) + +/* + * Release global kernel lock and global interrupt lock + */ +#define release_kernel_lock(task, cpu) \ +do { \ + if (task->lock_depth >= 0) \ + spin_unlock(&kernel_flag); \ + release_irqlock(cpu); \ + __sti(); \ +} while (0) + +/* + * Re-acquire the kernel lock + */ +#define reacquire_kernel_lock(task) \ +do { \ + if (task->lock_depth >= 0) \ + spin_lock(&kernel_flag); \ +} while (0) + + +/* + * Getting the big kernel lock. + * + * This cannot happen asynchronously, + * so we only need to worry about other + * CPU's. + */ +extern __inline__ void lock_kernel(void) +{ + if (!++current->lock_depth) + spin_lock(&kernel_flag); +} + +extern __inline__ void unlock_kernel(void) +{ + if (--current->lock_depth < 0) + spin_unlock(&kernel_flag); +} diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/socket.h linuxppc64_2_4/include/asm-ppc64/socket.h --- ../kernel.org/linux/include/asm-ppc64/socket.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/socket.h Fri Aug 10 13:25:19 2001 @@ -0,0 +1,71 @@ +#ifndef _ASM_SOCKET_H +#define _ASM_SOCKET_H + +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include + +/* For setsockoptions(2) */ +#define SOL_SOCKET 1 + +#define SO_DEBUG 1 +#define SO_REUSEADDR 2 +#define SO_TYPE 3 +#define SO_ERROR 4 +#define SO_DONTROUTE 5 +#define SO_BROADCAST 6 +#define SO_SNDBUF 7 +#define SO_RCVBUF 8 +#define SO_KEEPALIVE 9 +#define SO_OOBINLINE 10 +#define SO_NO_CHECK 11 +#define SO_PRIORITY 12 +#define SO_LINGER 13 +#define SO_BSDCOMPAT 14 +/* To add :#define SO_REUSEPORT 15 */ +#define SO_RCVLOWAT 16 +#define SO_SNDLOWAT 17 +#define SO_RCVTIMEO 18 +#define SO_SNDTIMEO 19 +#define SO_PASSCRED 20 +#define SO_PEERCRED 21 + +/* Security levels - as per NRL IPv6 - don't actually do anything */ +#define SO_SECURITY_AUTHENTICATION 22 +#define SO_SECURITY_ENCRYPTION_TRANSPORT 23 +#define SO_SECURITY_ENCRYPTION_NETWORK 24 + +#define SO_BINDTODEVICE 25 + +/* Socket filtering */ +#define SO_ATTACH_FILTER 26 +#define SO_DETACH_FILTER 27 + +#define SO_PEERNAME 28 +#define SO_TIMESTAMP 29 +#define SCM_TIMESTAMP SO_TIMESTAMP + +#define SO_ACCEPTCONN 30 + +/* Nast libc5 fixup - bletch */ +#if defined(__KERNEL__) +/* Socket types. */ +#define SOCK_STREAM 1 /* stream (connection) socket */ +#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ +#define SOCK_RAW 3 /* raw socket */ +#define SOCK_RDM 4 /* reliably-delivered message */ +#define SOCK_SEQPACKET 5 /* sequential packet socket */ +#define SOCK_PACKET 10 /* linux specific way of */ + /* getting packets at the dev */ + /* level. For writing rarp and */ + /* other similar things on the */ + /* user level. */ +#define SOCK_MAX (SOCK_PACKET+1) +#endif + +#endif /* _ASM_SOCKET_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/sockios.h linuxppc64_2_4/include/asm-ppc64/sockios.h --- ../kernel.org/linux/include/asm-ppc64/sockios.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/sockios.h Mon May 7 15:32:23 2001 @@ -0,0 +1,19 @@ +#ifndef _ASM_PPC64_SOCKIOS_H +#define _ASM_PPC64_SOCKIOS_H + +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* Socket-level I/O control calls. */ +#define FIOSETOWN 0x8901 +#define SIOCSPGRP 0x8902 +#define FIOGETOWN 0x8903 +#define SIOCGPGRP 0x8904 +#define SIOCATMARK 0x8905 +#define SIOCGSTAMP 0x8906 /* Get stamp */ + +#endif /* _ASM_PPC64_SOCKIOS_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/softirq.h linuxppc64_2_4/include/asm-ppc64/softirq.h --- ../kernel.org/linux/include/asm-ppc64/softirq.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/softirq.h Fri Oct 26 02:54:47 2001 @@ -0,0 +1,29 @@ +#ifndef __ASM_SOFTIRQ_H +#define __ASM_SOFTIRQ_H + +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include + + +#define local_bh_disable() do { local_bh_count(smp_processor_id())++; barrier(); } while (0) +#define __local_bh_enable() do { barrier(); local_bh_count(smp_processor_id())--; } while (0) + +#define local_bh_enable() \ +do { \ + if (!--local_bh_count(smp_processor_id()) \ + && softirq_pending(smp_processor_id())) { \ + do_softirq(); \ + } \ +} while (0) + + +#define in_softirq() (local_bh_count(smp_processor_id()) != 0) + +#endif /* __ASM_SOFTIRQ_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/spinlock.h linuxppc64_2_4/include/asm-ppc64/spinlock.h --- ../kernel.org/linux/include/asm-ppc64/spinlock.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/spinlock.h Thu Nov 8 23:03:32 2001 @@ -0,0 +1,215 @@ +#ifdef __KERNEL__ +#ifndef __ASM_SPINLOCK_H +#define __ASM_SPINLOCK_H + +/* + * Simple spin lock operations. + * + * Copyright (C) 2001 Paul Mackerras , IBM + * Copyright (C) 2001 Anton Blanchard , IBM + * + * Type of int is used as a full 64b word is not necessary. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +typedef struct { + volatile unsigned int lock; +} spinlock_t; + +#define SPIN_LOCK_UNLOCKED (spinlock_t) { 0 } + +#define spin_is_locked(x) ((x)->lock != 0) + +static __inline__ int spin_trylock(spinlock_t *lock) +{ + unsigned int tmp; + + __asm__ __volatile__( +"1: lwarx %0,0,%1 # spin_trylock\n\ + cmpwi 0,%0,0\n\ + li %0,0\n\ + bne- 2f\n\ + li %0,1\n\ + stwcx. %0,0,%1\n\ + bne- 1b\n\ + isync\n\ +2:" : "=&r"(tmp) + : "r"(&lock->lock) + : "cr0", "memory"); + + return tmp; +} + +static __inline__ void spin_lock(spinlock_t *lock) +{ + unsigned int tmp; + + __asm__ __volatile__( + "b 2f # spin_lock\n\ +1: or 1,1,1 # spin at low priority\n\ + lwzx %0,0,%1\n\ + cmpwi 0,%0,0\n\ + bne+ 1b\n\ + or 2,2,2 # back to medium priority\n\ +2: lwarx %0,0,%1\n\ + cmpwi 0,%0,0\n\ + bne- 1b\n\ + stwcx. %2,0,%1\n\ + bne- 2b\n\ + isync" + : "=&r"(tmp) + : "r"(&lock->lock), "r"(1) + : "cr0", "memory"); +} + +static __inline__ void spin_unlock(spinlock_t *lock) +{ + __asm__ __volatile__("eieio # spin_unlock": : :"memory"); + lock->lock = 0; +} + +/* + * Read-write spinlocks, allowing multiple readers + * but only one writer. + * + * NOTE! it is quite common to have readers in interrupts + * but no interrupt writers. For those circumstances we + * can "mix" irq-safe locks - any writer needs to get a + * irq-safe write-lock, but readers can get non-irqsafe + * read-locks. + */ +typedef struct { + volatile signed int lock; +} rwlock_t; + +#define RW_LOCK_UNLOCKED (rwlock_t) { 0 } + +static __inline__ int read_trylock(rwlock_t *rw) +{ + unsigned int tmp; + unsigned int ret; + + __asm__ __volatile__( +"1: lwarx %0,0,%2 # read_trylock\n\ + li %1,0\n\ + extsw %0,%0\n\ + addic. %0,%0,1\n\ + ble- 2f\n\ + stwcx. %0,0,%2\n\ + bne- 1b\n\ + li %1,1\n\ + isync\n\ +2:" : "=&r"(tmp), "=&r"(ret) + : "r"(&rw->lock) + : "cr0", "memory"); + + return ret; +} + +static __inline__ void read_lock(rwlock_t *rw) +{ + unsigned int tmp; + + __asm__ __volatile__( + "b 2f # read_lock\n\ +1: or 1,1,1 # spin at low priority\n\ + lwax %0,0,%1\n\ + cmpwi 0,%0,0\n\ + blt+ 1b\n\ + or 2,2,2 # back to medium priority\n\ +2: lwarx %0,0,%1\n\ + extsw %0,%0\n\ + addic. %0,%0,1\n\ + ble- 1b\n\ + stwcx. %0,0,%1\n\ + bne- 2b\n\ + isync" + : "=&r"(tmp) + : "r"(&rw->lock) + : "cr0", "memory"); +} + +static __inline__ void read_unlock(rwlock_t *rw) +{ + unsigned int tmp; + + __asm__ __volatile__( + "eieio # read_unlock\n\ +1: lwarx %0,0,%1\n\ + addic %0,%0,-1\n\ + stwcx. %0,0,%1\n\ + bne- 1b" + : "=&r"(tmp) + : "r"(&rw->lock) + : "cr0", "memory"); +} + +static __inline__ int write_trylock(rwlock_t *rw) +{ + unsigned int tmp; + unsigned int ret; + + __asm__ __volatile__( +"1: lwarx %0,0,%2 # write_trylock\n\ + cmpwi 0,%0,0\n\ + li %1,0\n\ + bne- 2f\n\ + stwcx. %3,0,%2\n\ + bne- 1b\n\ + li %1,1\n\ + isync\n\ +2:" : "=&r"(tmp), "=&r"(ret) + : "r"(&rw->lock), "r"(-1) + : "cr0", "memory"); + + return ret; +} + +static __inline__ void write_lock(rwlock_t *rw) +{ + unsigned int tmp; + + __asm__ __volatile__( + "b 2f # write_lock\n\ +1: or 1,1,1 # spin at low priority\n\ + lwax %0,0,%1\n\ + cmpwi 0,%0,0\n\ + bne+ 1b\n\ + or 2,2,2 # back to medium priority\n\ +2: lwarx %0,0,%1\n\ + cmpwi 0,%0,0\n\ + bne- 1b\n\ + stwcx. %2,0,%1\n\ + bne- 2b\n\ + isync" + : "=&r"(tmp) + : "r"(&rw->lock), "r"(-1) + : "cr0", "memory"); +} + +static __inline__ void write_unlock(rwlock_t *rw) +{ + __asm__ __volatile__("eieio # write_unlock": : :"memory"); + rw->lock = 0; +} + +static __inline__ int is_read_locked(rwlock_t *rw) +{ + return rw->lock > 0; +} + +static __inline__ int is_write_locked(rwlock_t *rw) +{ + return rw->lock < 0; +} + +#define spin_lock_init(x) do { *(x) = SPIN_LOCK_UNLOCKED; } while(0) +#define spin_unlock_wait(x) do { barrier(); } while(spin_is_locked(x)) + +#define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0) + +#endif /* __ASM_SPINLOCK_H */ +#endif /* __KERNEL__ */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/stat.h linuxppc64_2_4/include/asm-ppc64/stat.h --- ../kernel.org/linux/include/asm-ppc64/stat.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/stat.h Tue Jul 10 12:45:31 2001 @@ -0,0 +1,109 @@ +#ifndef _PPC64_STAT_H +#define _PPC64_STAT_H + +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include + +struct stat32 { + __kernel_dev_t32 st_dev; /* 2 */ + /* __kernel_dev_t32 __pad1; // 2 */ + __kernel_ino_t32 st_ino; /* 4 */ + __kernel_mode_t32 st_mode;/* 2 */ + short st_nlink; /* 2 */ + __kernel_uid_t32 st_uid; /* 2 */ + __kernel_gid_t32 st_gid; /* 2 */ + __kernel_dev_t32 st_rdev;/* 2 */ + /* __kernel_dev_t32 __pad2; // 2 */ + __kernel_off_t32 st_size;/* 4 */ + __kernel_off_t32 st_blksize; /* 4 */ + __kernel_off_t32 st_blocks; /* 4 */ + __kernel_time_t32 st_atime;/* 4 */ + unsigned int __unused1;/* 4 */ + __kernel_time_t32 st_mtime;/* 4 */ + unsigned int __unused2;/* 4 */ + __kernel_time_t32 st_ctime;/* 4 */ + unsigned int __unused3;/* 4 */ + unsigned int __unused4[2];/* 2*4 */ +}; + +struct __old_kernel_stat { + unsigned short st_dev; + unsigned short st_ino; + unsigned short st_mode; + unsigned short st_nlink; + unsigned short st_uid; + unsigned short st_gid; + unsigned short st_rdev; + unsigned long st_size; + unsigned long st_atime; + unsigned long st_mtime; + unsigned long st_ctime; +}; + +struct __old_kernel_stat32 +{ + unsigned short st_dev; + unsigned short st_ino; + unsigned short st_mode; + unsigned short st_nlink; + unsigned short st_uid; + unsigned short st_gid; + unsigned short st_rdev; + unsigned int st_size; + unsigned int st_atime; + unsigned int st_mtime; + unsigned int st_ctime; +}; + +struct stat { + dev_t st_dev; + ino_t st_ino; + mode_t st_mode; + nlink_t st_nlink; + uid_t st_uid; + gid_t st_gid; + dev_t st_rdev; + off_t st_size; + unsigned long st_blksize; + unsigned long st_blocks; + unsigned long st_atime; + unsigned long __unused1; + unsigned long st_mtime; + unsigned long __unused2; + unsigned long st_ctime; + unsigned long __unused3; + unsigned long __unused4; + unsigned long __unused5; +}; + +/* This matches struct stat64 in glibc2.1. + */ +struct stat64 { + unsigned long st_dev; /* Device. */ + unsigned long st_ino; /* File serial number. */ + unsigned int st_mode; /* File mode. */ + unsigned int st_nlink; /* Link count. */ + unsigned int st_uid; /* User ID of the file's owner. */ + unsigned int st_gid; /* Group ID of the file's group. */ + unsigned long st_rdev; /* Device number, if device. */ + unsigned short __pad2; + long st_size; /* Size of file, in bytes. */ + int st_blksize; /* Optimal block size for I/O. */ + + long st_blocks; /* Number 512-byte blocks allocated. */ + int st_atime; /* Time of last access. */ + unsigned int __unused1; + int st_mtime; /* Time of last modification. */ + unsigned int __unused2; + int st_ctime; /* Time of last status change. */ + unsigned int __unused3; + unsigned int __unused4; + unsigned int __unused5; +}; +#endif diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/statfs.h linuxppc64_2_4/include/asm-ppc64/statfs.h --- ../kernel.org/linux/include/asm-ppc64/statfs.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/statfs.h Wed Jun 6 14:50:26 2001 @@ -0,0 +1,53 @@ +#ifndef _PPC64_STATFS_H +#define _PPC64_STATFS_H + +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef __KERNEL_STRICT_NAMES + +#include + +typedef __kernel_fsid_t fsid_t; +typedef __kernel_fsid_t __kernel_fsid_t32; + +#endif + +/* + * Both SPARC64 & IA64 also define the following - + */ + +struct statfs32 { + int f_type; + int f_bsize; + int f_blocks; + int f_bfree; + int f_bavail; + int f_files; + int f_ffree; + __kernel_fsid_t32 f_fsid; + int f_namelen; /* SunOS ignores this field. */ + int f_spare[6]; +}; + +struct statfs { + long f_type; + long f_bsize; + long f_blocks; + long f_bfree; + long f_bavail; + long f_files; + long f_ffree; + __kernel_fsid_t f_fsid; + long f_namelen; + long f_spare[6]; +}; + +#endif /* _PPC64_STATFS_H */ + + + diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/string.h linuxppc64_2_4/include/asm-ppc64/string.h --- ../kernel.org/linux/include/asm-ppc64/string.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/string.h Mon May 7 15:32:23 2001 @@ -0,0 +1,36 @@ +#ifndef _PPC64_STRING_H_ +#define _PPC64_STRING_H_ + +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define __HAVE_ARCH_STRCPY +#define __HAVE_ARCH_STRNCPY +#define __HAVE_ARCH_STRLEN +#define __HAVE_ARCH_STRCMP +#define __HAVE_ARCH_STRCAT +#define __HAVE_ARCH_MEMSET +#define __HAVE_ARCH_BCOPY +#define __HAVE_ARCH_MEMCPY +#define __HAVE_ARCH_MEMMOVE +#define __HAVE_ARCH_MEMCMP +#define __HAVE_ARCH_MEMCHR + +extern int strcasecmp(const char *, const char *); +extern int strncasecmp(const char *, const char *, int); +extern char * strcpy(char *,const char *); +extern char * strncpy(char *,const char *, __kernel_size_t); +extern __kernel_size_t strlen(const char *); +extern int strcmp(const char *,const char *); +extern char * strcat(char *, const char *); +extern void * memset(void *,int,__kernel_size_t); +extern void * memcpy(void *,const void *,__kernel_size_t); +extern void * memmove(void *,const void *,__kernel_size_t); +extern int memcmp(const void *,const void *,__kernel_size_t); +extern void * memchr(const void *,int,__kernel_size_t); + +#endif /* _PPC64_STRING_H_ */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/system.h linuxppc64_2_4/include/asm-ppc64/system.h --- ../kernel.org/linux/include/asm-ppc64/system.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/system.h Mon Nov 19 21:43:25 2001 @@ -0,0 +1,274 @@ +#ifndef __PPC64_SYSTEM_H +#define __PPC64_SYSTEM_H + +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include + +/* + * System defines. + */ +#define KERNEL_START_PHYS 0x800000 +#define KERNEL_START (PAGE_OFFSET+KERNEL_START_PHYS) +#define START_ADDR (PAGE_OFFSET+KERNEL_START_PHYS+0x00000) + +/* + * Memory barrier. + * The sync instruction guarantees that all memory accesses initiated + * by this processor have been performed (with respect to all other + * mechanisms that access memory). The eieio instruction is a barrier + * providing an ordering (separately) for (a) cacheable stores and (b) + * loads and stores to non-cacheable memory (e.g. I/O devices). + * + * mb() prevents loads and stores being reordered across this point. + * rmb() prevents loads being reordered across this point. + * wmb() prevents stores being reordered across this point. + * + * We can use the eieio instruction for wmb, but since it doesn't + * give any ordering guarantees about loads, we have to use the + * stronger but slower sync instruction for mb and rmb. + */ +#define mb() __asm__ __volatile__ ("sync" : : : "memory") +#define rmb() __asm__ __volatile__ ("lwsync" : : : "memory") +#define wmb() __asm__ __volatile__ ("eieio" : : : "memory") + +#define set_mb(var, value) do { var = value; mb(); } while (0) +#define set_wmb(var, value) do { var = value; wmb(); } while (0) + +#ifdef CONFIG_SMP +#define smp_mb() mb() +#define smp_rmb() rmb() +#define smp_wmb() wmb() +#else +#define smp_mb() __asm__ __volatile__("": : :"memory") +#define smp_rmb() __asm__ __volatile__("": : :"memory") +#define smp_wmb() __asm__ __volatile__("": : :"memory") +#endif /* CONFIG_SMP */ + +#ifdef CONFIG_XMON +extern void xmon_irq(int, void *, struct pt_regs *); +extern void xmon(struct pt_regs *excp); +#endif + +extern void print_backtrace(unsigned long *); +extern void show_regs(struct pt_regs * regs); +extern void flush_instruction_cache(void); +extern void hard_reset_now(void); +extern void poweroff_now(void); +extern int _get_PVR(void); +extern long _get_L2CR(void); +extern void _set_L2CR(unsigned long); +extern void via_cuda_init(void); +extern void pmac_nvram_init(void); +extern void pmac_find_display(void); +extern void giveup_fpu(struct task_struct *); +extern void enable_kernel_fp(void); +extern void giveup_altivec(struct task_struct *); +extern void load_up_altivec(struct task_struct *); +extern void cvt_fd(float *from, double *to, unsigned long *fpscr); +extern void cvt_df(double *from, float *to, unsigned long *fpscr); +extern int abs(int); +extern void cacheable_memzero(void *p, unsigned int nb); + +struct device_node; + +struct task_struct; +#define prepare_to_switch() do { } while(0) +#define switch_to(prev,next,last) _switch_to((prev),(next),&(last)) +extern void _switch_to(struct task_struct *, struct task_struct *, + struct task_struct **); + +struct thread_struct; +extern struct task_struct *_switch(struct thread_struct *prev, + struct thread_struct *next); + +struct pt_regs; +extern void dump_regs(struct pt_regs *); + +#ifndef CONFIG_SMP + +#define cli() __cli() +#define sti() __sti() +#define save_flags(flags) __save_flags(flags) +#define restore_flags(flags) __restore_flags(flags) +#define save_and_cli(flags) __save_and_cli(flags) + +#else /* CONFIG_SMP */ + +extern void __global_cli(void); +extern void __global_sti(void); +extern unsigned long __global_save_flags(void); +extern void __global_restore_flags(unsigned long); +#define cli() __global_cli() +#define sti() __global_sti() +#define save_flags(x) ((x)=__global_save_flags()) +#define restore_flags(x) __global_restore_flags(x) + +#endif /* !CONFIG_SMP */ + +#define local_irq_disable() __cli() +#define local_irq_enable() __sti() +#define local_irq_save(flags) __save_and_cli(flags) +#define local_irq_restore(flags) __restore_flags(flags) + +static __inline__ int __is_processor(unsigned long pv) +{ + unsigned long pvr; + asm volatile("mfspr %0, 0x11F" : "=r" (pvr)); + return(PVR_VER(pvr) == pv); +} + +/* + * Atomic exchange + * + * Changes the memory location '*ptr' to be val and returns + * the previous value stored there. + * + * Inline asm pulled from arch/ppc/kernel/misc.S so ppc64 + * is more like most of the other architectures. + */ +static __inline__ unsigned long +__xchg_u32(volatile int *m, unsigned long val) +{ + unsigned long dummy; + + __asm__ __volatile__( + EIEIO_ON_SMP +"1: lwarx %0,0,%3 # __xchg_u32\n\ + stwcx. %2,0,%3\n\ +2: bne- 1b" + ISYNC_ON_SMP + : "=&r" (dummy), "=m" (*m) + : "r" (val), "r" (m) + : "cc", "memory"); + + return (dummy); +} + +static __inline__ unsigned long +__xchg_u64(volatile long *m, unsigned long val) +{ + unsigned long dummy; + + __asm__ __volatile__( + EIEIO_ON_SMP +"1: ldarx %0,0,%3 # __xchg_u64\n\ + stdcx. %2,0,%3\n\ +2: bne- 1b" + ISYNC_ON_SMP + : "=&r" (dummy), "=m" (*m) + : "r" (val), "r" (m) + : "cc", "memory"); + + return (dummy); +} + +/* + * This function doesn't exist, so you'll get a linker error + * if something tries to do an invalid xchg(). + */ +extern void __xchg_called_with_bad_pointer(void); + +static __inline__ unsigned long +__xchg(volatile void *ptr, unsigned long x, int size) +{ + switch (size) { + case 4: + return __xchg_u32(ptr, x); + case 8: + return __xchg_u64(ptr, x); + } + __xchg_called_with_bad_pointer(); + return x; +} + +#define xchg(ptr,x) \ + ({ \ + __typeof__(*(ptr)) _x_ = (x); \ + (__typeof__(*(ptr))) __xchg((ptr), (unsigned long)_x_, sizeof(*(ptr))); \ + }) + +#define tas(ptr) (xchg((ptr),1)) + +#define __HAVE_ARCH_CMPXCHG 1 + +static __inline__ unsigned long +__cmpxchg_u32(volatile int *p, int old, int new) +{ + int prev; + + __asm__ __volatile__ ( + EIEIO_ON_SMP +"1: lwarx %0,0,%2 # __cmpxchg_u32\n\ + cmpw 0,%0,%3\n\ + bne- 2f\n\ + stwcx. %4,0,%2\n\ + bne- 1b" + ISYNC_ON_SMP + "\n\ +2:" + : "=&r" (prev), "=m" (*p) + : "r" (p), "r" (old), "r" (new), "m" (*p) + : "cc", "memory"); + + return prev; +} + +static __inline__ unsigned long +__cmpxchg_u64(volatile long *p, unsigned long old, unsigned long new) +{ + int prev; + + __asm__ __volatile__ ( + EIEIO_ON_SMP +"1: ldarx %0,0,%2 # __cmpxchg_u64\n\ + cmpd 0,%0,%3\n\ + bne- 2f\n\ + stdcx. %4,0,%2\n\ + bne- 1b" + ISYNC_ON_SMP + "\n\ +2:" + : "=&r" (prev), "=m" (*p) + : "r" (p), "r" (old), "r" (new), "m" (*p) + : "cc", "memory"); + + return prev; +} + +/* This function doesn't exist, so you'll get a linker error + if something tries to do an invalid cmpxchg(). */ +extern void __cmpxchg_called_with_bad_pointer(void); + +static __inline__ unsigned long +__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) +{ + switch (size) { + case 4: + return __cmpxchg_u32(ptr, old, new); + case 8: + return __cmpxchg_u64(ptr, old, new); + } + __cmpxchg_called_with_bad_pointer(); + return old; +} + +#define cmpxchg(ptr,o,n) \ + ({ \ + __typeof__(*(ptr)) _o_ = (o); \ + __typeof__(*(ptr)) _n_ = (n); \ + (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_, \ + (unsigned long)_n_, sizeof(*(ptr))); \ + }) + +#endif diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/termbits.h linuxppc64_2_4/include/asm-ppc64/termbits.h --- ../kernel.org/linux/include/asm-ppc64/termbits.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/termbits.h Mon May 7 15:32:23 2001 @@ -0,0 +1,192 @@ +#ifndef _PPC64_TERMBITS_H +#define _PPC64_TERMBITS_H + +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include + +typedef unsigned char cc_t; +typedef unsigned int speed_t; +typedef unsigned int tcflag_t; + +/* + * termios type and macro definitions. Be careful about adding stuff + * to this file since it's used in GNU libc and there are strict rules + * concerning namespace pollution. + */ + +#define NCCS 19 +struct termios { + tcflag_t c_iflag; /* input mode flags */ + tcflag_t c_oflag; /* output mode flags */ + tcflag_t c_cflag; /* control mode flags */ + tcflag_t c_lflag; /* local mode flags */ + cc_t c_cc[NCCS]; /* control characters */ + cc_t c_line; /* line discipline (== c_cc[19]) */ + speed_t c_ispeed; /* input speed */ + speed_t c_ospeed; /* output speed */ +}; + +/* c_cc characters */ +#define VINTR 0 +#define VQUIT 1 +#define VERASE 2 +#define VKILL 3 +#define VEOF 4 +#define VMIN 5 +#define VEOL 6 +#define VTIME 7 +#define VEOL2 8 +#define VSWTC 9 +#define VWERASE 10 +#define VREPRINT 11 +#define VSUSP 12 +#define VSTART 13 +#define VSTOP 14 +#define VLNEXT 15 +#define VDISCARD 16 + +/* c_iflag bits */ +#define IGNBRK 0000001 +#define BRKINT 0000002 +#define IGNPAR 0000004 +#define PARMRK 0000010 +#define INPCK 0000020 +#define ISTRIP 0000040 +#define INLCR 0000100 +#define IGNCR 0000200 +#define ICRNL 0000400 +#define IXON 0001000 +#define IXOFF 0002000 +#define IXANY 0004000 +#define IUCLC 0010000 +#define IMAXBEL 0020000 + +/* c_oflag bits */ +#define OPOST 0000001 +#define ONLCR 0000002 +#define OLCUC 0000004 + +#define OCRNL 0000010 +#define ONOCR 0000020 +#define ONLRET 0000040 + +#define OFILL 00000100 +#define OFDEL 00000200 +#define NLDLY 00001400 +#define NL0 00000000 +#define NL1 00000400 +#define NL2 00001000 +#define NL3 00001400 +#define TABDLY 00006000 +#define TAB0 00000000 +#define TAB1 00002000 +#define TAB2 00004000 +#define TAB3 00006000 +#define CRDLY 00030000 +#define CR0 00000000 +#define CR1 00010000 +#define CR2 00020000 +#define CR3 00030000 +#define FFDLY 00040000 +#define FF0 00000000 +#define FF1 00040000 +#define BSDLY 00100000 +#define BS0 00000000 +#define BS1 00100000 +#define VTDLY 00200000 +#define VT0 00000000 +#define VT1 00200000 +#define XTABS 01000000 /* Hmm.. Linux/i386 considers this part of TABDLY.. */ + +/* c_cflag bit meaning */ +#define CBAUD 0000377 +#define B0 0000000 /* hang up */ +#define B50 0000001 +#define B75 0000002 +#define B110 0000003 +#define B134 0000004 +#define B150 0000005 +#define B200 0000006 +#define B300 0000007 +#define B600 0000010 +#define B1200 0000011 +#define B1800 0000012 +#define B2400 0000013 +#define B4800 0000014 +#define B9600 0000015 +#define B19200 0000016 +#define B38400 0000017 +#define EXTA B19200 +#define EXTB B38400 +#define CBAUDEX 0000000 +#define B57600 00020 +#define B115200 00021 +#define B230400 00022 +#define B460800 00023 +#define B500000 00024 +#define B576000 00025 +#define B921600 00026 +#define B1000000 00027 +#define B1152000 00030 +#define B1500000 00031 +#define B2000000 00032 +#define B2500000 00033 +#define B3000000 00034 +#define B3500000 00035 +#define B4000000 00036 + +#define CSIZE 00001400 +#define CS5 00000000 +#define CS6 00000400 +#define CS7 00001000 +#define CS8 00001400 + +#define CSTOPB 00002000 +#define CREAD 00004000 +#define PARENB 00010000 +#define PARODD 00020000 +#define HUPCL 00040000 + +#define CLOCAL 00100000 +#define CRTSCTS 020000000000 /* flow control */ + +/* c_lflag bits */ +#define ISIG 0x00000080 +#define ICANON 0x00000100 +#define XCASE 0x00004000 +#define ECHO 0x00000008 +#define ECHOE 0x00000002 +#define ECHOK 0x00000004 +#define ECHONL 0x00000010 +#define NOFLSH 0x80000000 +#define TOSTOP 0x00400000 +#define ECHOCTL 0x00000040 +#define ECHOPRT 0x00000020 +#define ECHOKE 0x00000001 +#define FLUSHO 0x00800000 +#define PENDIN 0x20000000 +#define IEXTEN 0x00000400 + +/* Values for the ACTION argument to `tcflow'. */ +#define TCOOFF 0 +#define TCOON 1 +#define TCIOFF 2 +#define TCION 3 + +/* Values for the QUEUE_SELECTOR argument to `tcflush'. */ +#define TCIFLUSH 0 +#define TCOFLUSH 1 +#define TCIOFLUSH 2 + +/* Values for the OPTIONAL_ACTIONS argument to `tcsetattr'. */ +#define TCSANOW 0 +#define TCSADRAIN 1 +#define TCSAFLUSH 2 + +#endif /* _PPC64_TERMBITS_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/termios.h linuxppc64_2_4/include/asm-ppc64/termios.h --- ../kernel.org/linux/include/asm-ppc64/termios.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/termios.h Mon May 7 15:32:23 2001 @@ -0,0 +1,236 @@ +#ifndef _PPC64_TERMIOS_H +#define _PPC64_TERMIOS_H + +/* + * Liberally adapted from alpha/termios.h. In particular, the c_cc[] + * fields have been reordered so that termio & termios share the + * common subset in the same order (for brain dead programs that don't + * know or care about the differences). + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include + +struct sgttyb { + char sg_ispeed; + char sg_ospeed; + char sg_erase; + char sg_kill; + short sg_flags; +}; + +struct tchars { + char t_intrc; + char t_quitc; + char t_startc; + char t_stopc; + char t_eofc; + char t_brkc; +}; + +struct ltchars { + char t_suspc; + char t_dsuspc; + char t_rprntc; + char t_flushc; + char t_werasc; + char t_lnextc; +}; + +struct winsize { + unsigned short ws_row; + unsigned short ws_col; + unsigned short ws_xpixel; + unsigned short ws_ypixel; +}; + +#define NCC 10 +struct termio { + unsigned short c_iflag; /* input mode flags */ + unsigned short c_oflag; /* output mode flags */ + unsigned short c_cflag; /* control mode flags */ + unsigned short c_lflag; /* local mode flags */ + unsigned char c_line; /* line discipline */ + unsigned char c_cc[NCC]; /* control characters */ +}; + +/* c_cc characters */ +#define _VINTR 0 +#define _VQUIT 1 +#define _VERASE 2 +#define _VKILL 3 +#define _VEOF 4 +#define _VMIN 5 +#define _VEOL 6 +#define _VTIME 7 +#define _VEOL2 8 +#define _VSWTC 9 + +/* line disciplines */ +#define N_TTY 0 +#define N_SLIP 1 +#define N_MOUSE 2 +#define N_PPP 3 +#define N_STRIP 4 +#define N_AX25 5 +#define N_X25 6 /* X.25 async */ +#define N_6PACK 7 +#define N_MASC 8 /* Reserved for Mobitex module */ +#define N_R3964 9 /* Reserved for Simatic R3964 module */ +#define N_PROFIBUS_FDL 10 /* Reserved for Profibus */ +#define N_IRDA 11 /* Linux IrDa - http://www.cs.uit.no/~dagb/irda/irda.html */ +#define N_SMSBLOCK 12 /* SMS block mode - for talking to GSM data cards about SMS messages */ +#define N_HDLC 13 /* synchronous HDLC */ +#define N_SYNC_PPP 14 + +#ifdef __KERNEL__ +/* ^C ^\ del ^U ^D 1 0 0 0 0 ^W ^R ^Z ^Q ^S ^V ^U */ +#define INIT_C_CC "\003\034\177\025\004\001\000\000\000\000\027\022\032\021\023\026\025" +#endif + +#define FIOCLEX _IO('f', 1) +#define FIONCLEX _IO('f', 2) +#define FIOASYNC _IOW('f', 125, int) +#define FIONBIO _IOW('f', 126, int) +#define FIONREAD _IOR('f', 127, int) +#define TIOCINQ FIONREAD + +#define TIOCGETP _IOR('t', 8, struct sgttyb) +#define TIOCSETP _IOW('t', 9, struct sgttyb) +#define TIOCSETN _IOW('t', 10, struct sgttyb) /* TIOCSETP wo flush */ + +#define TIOCSETC _IOW('t', 17, struct tchars) +#define TIOCGETC _IOR('t', 18, struct tchars) +#define TCGETS _IOR('t', 19, struct termios) +#define TCSETS _IOW('t', 20, struct termios) +#define TCSETSW _IOW('t', 21, struct termios) +#define TCSETSF _IOW('t', 22, struct termios) + +#define TCGETA _IOR('t', 23, struct termio) +#define TCSETA _IOW('t', 24, struct termio) +#define TCSETAW _IOW('t', 25, struct termio) +#define TCSETAF _IOW('t', 28, struct termio) + +#define TCSBRK _IO('t', 29) +#define TCXONC _IO('t', 30) +#define TCFLSH _IO('t', 31) + +#define TIOCSWINSZ _IOW('t', 103, struct winsize) +#define TIOCGWINSZ _IOR('t', 104, struct winsize) +#define TIOCSTART _IO('t', 110) /* start output, like ^Q */ +#define TIOCSTOP _IO('t', 111) /* stop output, like ^S */ +#define TIOCOUTQ _IOR('t', 115, int) /* output queue size */ + +#define TIOCGLTC _IOR('t', 116, struct ltchars) +#define TIOCSLTC _IOW('t', 117, struct ltchars) +#define TIOCSPGRP _IOW('t', 118, int) +#define TIOCGPGRP _IOR('t', 119, int) + +#define TIOCEXCL 0x540C +#define TIOCNXCL 0x540D +#define TIOCSCTTY 0x540E + +#define TIOCSTI 0x5412 +#define TIOCMGET 0x5415 +#define TIOCMBIS 0x5416 +#define TIOCMBIC 0x5417 +#define TIOCMSET 0x5418 +#define TIOCGSOFTCAR 0x5419 +#define TIOCSSOFTCAR 0x541A +#define TIOCLINUX 0x541C +#define TIOCCONS 0x541D +#define TIOCGSERIAL 0x541E +#define TIOCSSERIAL 0x541F +#define TIOCPKT 0x5420 + +#define TIOCNOTTY 0x5422 +#define TIOCSETD 0x5423 +#define TIOCGETD 0x5424 +#define TCSBRKP 0x5425 /* Needed for POSIX tcsendbreak() */ +#define TIOCTTYGSTRUCT 0x5426 /* For debugging only */ + +#define TIOCSERCONFIG 0x5453 +#define TIOCSERGWILD 0x5454 +#define TIOCSERSWILD 0x5455 +#define TIOCGLCKTRMIOS 0x5456 +#define TIOCSLCKTRMIOS 0x5457 +#define TIOCSERGSTRUCT 0x5458 /* For debugging only */ +#define TIOCSERGETLSR 0x5459 /* Get line status register */ +#define TIOCSERGETMULTI 0x545A /* Get multiport config */ +#define TIOCSERSETMULTI 0x545B /* Set multiport config */ + +#define TIOCMIWAIT 0x545C /* wait for a change on serial input line(s) */ +#define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */ + +/* Used for packet mode */ +#define TIOCPKT_DATA 0 +#define TIOCPKT_FLUSHREAD 1 +#define TIOCPKT_FLUSHWRITE 2 +#define TIOCPKT_STOP 4 +#define TIOCPKT_START 8 +#define TIOCPKT_NOSTOP 16 +#define TIOCPKT_DOSTOP 32 + +/* modem lines */ +#define TIOCM_LE 0x001 +#define TIOCM_DTR 0x002 +#define TIOCM_RTS 0x004 +#define TIOCM_ST 0x008 +#define TIOCM_SR 0x010 +#define TIOCM_CTS 0x020 +#define TIOCM_CAR 0x040 +#define TIOCM_RNG 0x080 +#define TIOCM_DSR 0x100 +#define TIOCM_CD TIOCM_CAR +#define TIOCM_RI TIOCM_RNG +#define TIOCM_OUT1 0x2000 +#define TIOCM_OUT2 0x4000 +#define TIOCM_LOOP 0x8000 + +/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ +#define TIOCSER_TEMT 0x01 /* Transmitter physically empty */ + +#ifdef __KERNEL__ + +/* + * Translate a "termio" structure into a "termios". Ugh. + */ +#define SET_LOW_TERMIOS_BITS(termios, termio, x) { \ + unsigned short __tmp; \ + get_user(__tmp,&(termio)->x); \ + (termios)->x = (0xffff0000 & (termios)->x) | __tmp; \ +} + +#define user_termio_to_kernel_termios(termios, termio) \ +({ \ + SET_LOW_TERMIOS_BITS(termios, termio, c_iflag); \ + SET_LOW_TERMIOS_BITS(termios, termio, c_oflag); \ + SET_LOW_TERMIOS_BITS(termios, termio, c_cflag); \ + SET_LOW_TERMIOS_BITS(termios, termio, c_lflag); \ + copy_from_user((termios)->c_cc, (termio)->c_cc, NCC); \ +}) + +/* + * Translate a "termios" structure into a "termio". Ugh. + */ +#define kernel_termios_to_user_termio(termio, termios) \ +({ \ + put_user((termios)->c_iflag, &(termio)->c_iflag); \ + put_user((termios)->c_oflag, &(termio)->c_oflag); \ + put_user((termios)->c_cflag, &(termio)->c_cflag); \ + put_user((termios)->c_lflag, &(termio)->c_lflag); \ + put_user((termios)->c_line, &(termio)->c_line); \ + copy_to_user((termio)->c_cc, (termios)->c_cc, NCC); \ +}) + +#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios)) +#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios)) + +#endif /* __KERNEL__ */ + +#endif /* _PPC64_TERMIOS_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/time.h linuxppc64_2_4/include/asm-ppc64/time.h --- ../kernel.org/linux/include/asm-ppc64/time.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/time.h Wed Oct 17 13:02:10 2001 @@ -0,0 +1,99 @@ +/* + * + * Common time prototypes and such for all ppc machines. + * + * Written by Cort Dougan (cort@cs.nmt.edu) to merge + * Paul Mackerras' version and mine for PReP and Pmac. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef __PPC64_TIME_H +#define __PPC64_TIME_H + +#ifdef __KERNEL__ +#include +#include +#include + +#include +#include +#include + +/* time.c */ +extern unsigned long tb_ticks_per_jiffy; +extern unsigned long tb_ticks_per_usec; +extern unsigned long tb_ticks_per_sec; +extern unsigned long tb_to_xs; +extern unsigned tb_to_us; +extern unsigned long tb_last_stamp; + +extern void to_tm(int tim, struct rtc_time * tm); +extern time_t last_rtc_update; + +/* + * By putting all of this stuff into a single struct we + * reduce the number of cache lines touched by do_gettimeofday. + * Both by collecting all of the data in one cache line and + * by touching only one TOC entry + */ +struct gettimeofday_struct { + unsigned long tb_orig_stamp; + unsigned long stamp_xsec; + unsigned long tb_ticks_per_sec; + unsigned long tb_to_xs; + unsigned tb_to_us; +}; + +struct div_result { + unsigned long result_high; + unsigned long result_low; +}; + +int via_calibrate_decr(void); + +static __inline__ unsigned long get_tb(void) +{ + return mftb(); +} + +/* Accessor functions for the decrementer register. */ +static __inline__ unsigned int get_dec(void) +{ + return (mfspr(SPRN_DEC)); +} + +static __inline__ void set_dec(int val) +{ + struct Paca * paca; + int cur_dec; + + paca = (struct Paca *)mfspr(SPRG3); + if ( paca->xLpPaca.xSharedProc ) { + paca->xLpPaca.xVirtualDecr = val; + cur_dec = get_dec(); + if ( cur_dec > val ) + HvCall_setVirtualDecr(); + } + else + mtspr(SPRN_DEC, val); +} + +extern __inline__ unsigned long tb_ticks_since(unsigned long tstamp) { + return get_tb() - tstamp; +} + +#define mulhwu(x,y) \ +({unsigned z; asm ("mulhwu %0,%1,%2" : "=r" (z) : "r" (x), "r" (y)); z;}) +#define mulhdu(x,y) \ +({unsigned long z; asm ("mulhdu %0,%1,%2" : "=r" (z) : "r" (x), "r" (y)); z;}) + + +unsigned mulhwu_scale_factor(unsigned, unsigned); +void div128_by_32( unsigned long dividend_high, unsigned long dividend_low, + unsigned divisor, struct div_result *dr ); +#endif /* __KERNEL__ */ +#endif /* __PPC64_TIME_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/timex.h linuxppc64_2_4/include/asm-ppc64/timex.h --- ../kernel.org/linux/include/asm-ppc64/timex.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/timex.h Mon May 14 04:22:58 2001 @@ -0,0 +1,33 @@ +/* + * linux/include/asm-ppc/timex.h + * + * PPC64 architecture timex specifications + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _ASMPPC64_TIMEX_H +#define _ASMPPC64_TIMEX_H + +#include + +#define CLOCK_TICK_RATE 1193180 /* Underlying HZ */ +#define CLOCK_TICK_FACTOR 20 /* Factor of both 1000000 and CLOCK_TICK_RATE */ +#define FINETUNE ((((((long)LATCH * HZ - CLOCK_TICK_RATE) << SHIFT_HZ) * \ + (1000000/CLOCK_TICK_FACTOR) / (CLOCK_TICK_RATE/CLOCK_TICK_FACTOR)) \ + << (SHIFT_SCALE-SHIFT_HZ)) / HZ) + +typedef unsigned long cycles_t; +extern cycles_t cacheflush_time; + +static inline cycles_t get_cycles(void) +{ + cycles_t ret; + + __asm__ __volatile__("mftb %0" : "=r" (ret) : ); + return ret; +} + +#endif diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/tlb.h linuxppc64_2_4/include/asm-ppc64/tlb.h --- ../kernel.org/linux/include/asm-ppc64/tlb.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/tlb.h Wed Sep 26 03:18:35 2001 @@ -0,0 +1 @@ +#include diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/types.h linuxppc64_2_4/include/asm-ppc64/types.h --- ../kernel.org/linux/include/asm-ppc64/types.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/types.h Mon May 7 15:32:23 2001 @@ -0,0 +1,69 @@ +#ifndef _PPC64_TYPES_H +#define _PPC64_TYPES_H + +#ifndef __ASSEMBLY__ + +/* + * This file is never included by application software unless + * explicitly requested (e.g., via linux/types.h) in which case the + * application is Linux specific so (user-) name space pollution is + * not a major issue. However, for interoperability, libraries still + * need to be careful to avoid a name clashes. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +typedef unsigned int umode_t; + +/* + * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the + * header files exported to user space + */ + +typedef __signed__ char __s8; +typedef unsigned char __u8; + +typedef __signed__ short __s16; +typedef unsigned short __u16; + +typedef __signed__ int __s32; +typedef unsigned int __u32; + +typedef __signed__ long __s64; +typedef unsigned long __u64; + +typedef struct { + __u32 u[4]; +} __attribute((aligned(16))) __vector128; + +#ifdef __KERNEL__ +/* + * These aren't exported outside the kernel to avoid name space clashes + */ +typedef signed char s8; +typedef unsigned char u8; + +typedef signed short s16; +typedef unsigned short u16; + +typedef signed int s32; +typedef unsigned int u32; + +typedef signed long s64; +typedef unsigned long u64; + +typedef __vector128 vector128; + +#define BITS_PER_LONG 64 + +/* PCI dma addresses are 32-bits wide. Ignore PCI64 for now, since + we'll typically be sending it all through iommu tables anyway. */ +typedef u32 dma_addr_t; + +#endif /* __KERNEL__ */ +#endif /* __ASSEMBLY__ */ + +#endif /* _PPC64_TYPES_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/uaccess.h linuxppc64_2_4/include/asm-ppc64/uaccess.h --- ../kernel.org/linux/include/asm-ppc64/uaccess.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/uaccess.h Wed Jun 6 14:50:26 2001 @@ -0,0 +1,285 @@ +#ifndef _PPC64_UACCESS_H +#define _PPC64_UACCESS_H + +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef __ASSEMBLY__ +#include +#include +#include + +#define VERIFY_READ 0 +#define VERIFY_WRITE 1 + +/* + * The fs value determines whether argument validity checking should be + * performed or not. If get_fs() == USER_DS, checking is performed, with + * get_fs() == KERNEL_DS, checking is bypassed. + * + * For historical reasons, these macros are grossly misnamed. + */ + +#define KERNEL_DS ((mm_segment_t) { 0 }) +#define USER_DS ((mm_segment_t) { 1 }) + +#define get_ds() (KERNEL_DS) +#define get_fs() (current->thread.fs) +#define set_fs(val) (current->thread.fs = (val)) + +#define segment_eq(a,b) ((a).seg == (b).seg) + +#define __kernel_ok (segment_eq(get_fs(), KERNEL_DS)) +#define __user_ok(addr,size) (((size) <= TASK_SIZE)&&((addr) <= TASK_SIZE-(size))) +#define __access_ok(addr,size) (__kernel_ok || __user_ok((addr),(size))) +#define access_ok(type,addr,size) __access_ok((unsigned long)(addr),(size)) + +extern inline int verify_area(int type, const void * addr, unsigned long size) +{ + return access_ok(type,addr,size) ? 0 : -EFAULT; +} + + +/* + * The exception table consists of pairs of addresses: the first is the + * address of an instruction that is allowed to fault, and the second is + * the address at which the program should continue. No registers are + * modified, so it is entirely up to the continuation code to figure out + * what to do. + * + * All the routines below use bits of fixup code that are out of line + * with the main instruction path. This means when everything is well, + * we don't even have to jump over them. Further, they do not intrude + * on our cache or tlb entries. + */ + +struct exception_table_entry +{ + unsigned long insn, fixup; +}; + +/* Returns 0 if exception not found and fixup otherwise. */ +extern unsigned long search_exception_table(unsigned long); +extern void sort_exception_table(void); + +/* + * These are the main single-value transfer routines. They automatically + * use the right size if we just have the right pointer type. + * + * This gets kind of ugly. We want to return _two_ values in "get_user()" + * and yet we don't want to do any pointers, because that is too much + * of a performance impact. Thus we have a few rather ugly macros here, + * and hide all the uglyness from the user. + * + * The "__xxx" versions of the user access functions are versions that + * do not verify the address space, that must have been done previously + * with a separate "access_ok()" call (this is used when we do multiple + * accesses to the same area of user memory). + * + * As we use the same address space for kernel and user data on the + * PowerPC, we can just do these as direct assignments. (Of course, the + * exception handling means that it's no longer "just"...) + */ +#define get_user(x,ptr) \ + __get_user_check((x),(ptr),sizeof(*(ptr))) +#define put_user(x,ptr) \ + __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr))) + +#define __get_user(x,ptr) \ + __get_user_nocheck((x),(ptr),sizeof(*(ptr))) +#define __put_user(x,ptr) \ + __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr))) + +extern long __put_user_bad(void); + +#define __put_user_nocheck(x,ptr,size) \ +({ \ + long __pu_err; \ + __put_user_size((x),(ptr),(size),__pu_err); \ + __pu_err; \ +}) + +#define __put_user_check(x,ptr,size) \ +({ \ + long __pu_err = -EFAULT; \ + __typeof__(*(ptr)) *__pu_addr = (ptr); \ + if (access_ok(VERIFY_WRITE,__pu_addr,size)) \ + __put_user_size((x),__pu_addr,(size),__pu_err); \ + __pu_err; \ +}) + +#define __put_user_size(x,ptr,size,retval) \ +do { \ + retval = 0; \ + switch (size) { \ + case 1: __put_user_asm(x,ptr,retval,"stb"); break; \ + case 2: __put_user_asm(x,ptr,retval,"sth"); break; \ + case 4: __put_user_asm(x,ptr,retval,"stw"); break; \ + case 8: __put_user_asm(x,ptr,retval,"std"); break; \ + default: __put_user_bad(); \ + } \ +} while (0) + +struct __large_struct { unsigned long buf[100]; }; +#define __m(x) (*(struct __large_struct *)(x)) + +/* + * We don't tell gcc that we are accessing memory, but this is OK + * because we do not write to any memory gcc knows about, so there + * are no aliasing issues. + */ +#define __put_user_asm(x, addr, err, op) \ + __asm__ __volatile__( \ + "1: "op" %1,0(%2)\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: li %0,%3\n" \ + " b 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 3\n" \ + " .llong 1b,3b\n" \ + ".previous" \ + : "=r"(err) \ + : "r"(x), "b"(addr), "i"(-EFAULT), "0"(err)) + + +#define __get_user_nocheck(x,ptr,size) \ +({ \ + long __gu_err, __gu_val; \ + __get_user_size(__gu_val,(ptr),(size),__gu_err); \ + (x) = (__typeof__(*(ptr)))__gu_val; \ + __gu_err; \ +}) + +#define __get_user_check(x,ptr,size) \ +({ \ + long __gu_err = -EFAULT, __gu_val = 0; \ + const __typeof__(*(ptr)) *__gu_addr = (ptr); \ + if (access_ok(VERIFY_READ,__gu_addr,size)) \ + __get_user_size(__gu_val,__gu_addr,(size),__gu_err); \ + (x) = (__typeof__(*(ptr)))__gu_val; \ + __gu_err; \ +}) + +extern long __get_user_bad(void); + +#define __get_user_size(x,ptr,size,retval) \ +do { \ + retval = 0; \ + switch (size) { \ + case 1: __get_user_asm(x,ptr,retval,"lbz"); break; \ + case 2: __get_user_asm(x,ptr,retval,"lhz"); break; \ + case 4: __get_user_asm(x,ptr,retval,"lwz"); break; \ + case 8: __get_user_asm(x,ptr,retval,"ld"); break; \ + default: (x) = __get_user_bad(); \ + } \ +} while (0) + +#define __get_user_asm(x, addr, err, op) \ + __asm__ __volatile__( \ + "1: "op" %1,0(%2)\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: li %0,%3\n" \ + " li %1,0\n" \ + " b 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 3\n" \ + " .llong 1b,3b\n" \ + ".previous" \ + : "=r"(err), "=r"(x) \ + : "b"(addr), "i"(-EFAULT), "0"(err)) + +/* more complex routines */ + +extern unsigned long __copy_tofrom_user(void *to, const void *from, unsigned long size); + +extern inline unsigned long +copy_from_user(void *to, const void *from, unsigned long n) +{ + unsigned long over; + + if (access_ok(VERIFY_READ, from, n)) + return __copy_tofrom_user(to, from, n); + if ((unsigned long)from < TASK_SIZE) { + over = (unsigned long)from + n - TASK_SIZE; + return __copy_tofrom_user(to, from, n - over) + over; + } + return n; +} + +extern inline unsigned long +copy_to_user(void *to, const void *from, unsigned long n) +{ + unsigned long over; + + if (access_ok(VERIFY_WRITE, to, n)) + return __copy_tofrom_user(to, from, n); + if ((unsigned long)to < TASK_SIZE) { + over = (unsigned long)to + n - TASK_SIZE; + return __copy_tofrom_user(to, from, n - over) + over; + } + return n; +} + +#define __copy_from_user(to, from, size) \ + __copy_tofrom_user((to), (from), (size)) +#define __copy_to_user(to, from, size) \ + __copy_tofrom_user((to), (from), (size)) + +extern unsigned long __clear_user(void *addr, unsigned long size); + +extern inline unsigned long +clear_user(void *addr, unsigned long size) +{ + if (access_ok(VERIFY_WRITE, addr, size)) + return __clear_user(addr, size); + return size? -EFAULT: 0; +} + +extern int __strncpy_from_user(char *dst, const char *src, long count); + +extern inline long +strncpy_from_user(char *dst, const char *src, long count) +{ + if (access_ok(VERIFY_READ, src, 1)) + return __strncpy_from_user(dst, src, count); + return -EFAULT; +} + +/* + * Return the size of a string (including the ending 0) + * + * Return 0 for error + */ + +extern int __strnlen_user(const char *str, long len, unsigned long top); + +/* + * Returns the length of the string at str (including the null byte), + * or 0 if we hit a page we can't access, + * or something > len if we didn't find a null byte. + * + * The `top' parameter to __strnlen_user is to make sure that + * we can never overflow from the user area into kernel space. + */ +extern __inline__ int strnlen_user(const char *str, long len) +{ + unsigned long top = __kernel_ok? ~0UL: TASK_SIZE - 1; + + if ((unsigned long)str > top) + return 0; + return __strnlen_user(str, len, top); +} + +#define strlen_user(str) strnlen_user((str), 0x7ffffffe) + +#endif /* __ASSEMBLY__ */ + +#endif /* _PPC64_UACCESS_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/ucontext.h linuxppc64_2_4/include/asm-ppc64/ucontext.h --- ../kernel.org/linux/include/asm-ppc64/ucontext.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/ucontext.h Mon May 7 15:32:23 2001 @@ -0,0 +1,34 @@ +#ifndef _ASMPPC64_UCONTEXT_H +#define _ASMPPC64_UCONTEXT_H + +/* Copied from i386. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +struct ucontext { + unsigned long uc_flags; + struct ucontext *uc_link; + stack_t uc_stack; + struct sigcontext_struct uc_mcontext; + sigset_t uc_sigmask; /* mask last for extensibility */ +}; + +#ifdef __KERNEL__ + + +struct ucontext32 { + unsigned int uc_flags; + unsigned int uc_link; + stack_32_t uc_stack; + struct sigcontext32_struct uc_mcontext; + sigset_t uc_sigmask; /* mask last for extensibility */ +}; + +#endif /* __KERNEL__ */ + + +#endif /* _ASMPPC64_UCONTEXT_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/udbg.h linuxppc64_2_4/include/asm-ppc64/udbg.h --- ../kernel.org/linux/include/asm-ppc64/udbg.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/udbg.h Tue Sep 25 13:53:55 2001 @@ -0,0 +1,27 @@ +#ifndef __UDBG_HDR +#define __UDBG_HDR + +/* + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +void udbg_init_uart(void *comport); +void udbg_putc(unsigned char c); +unsigned char udbg_getc(void); +int udbg_getc_poll(void); +void udbg_puts(const char *s); +int udbg_write(const char *s, int n); +int udbg_read(char *buf, int buflen); +void udbg_puthex(unsigned long val); +void udbg_printSP(const char *s); +void udbg_printf(const char *fmt, ...); +void udbg_ppcdbg(unsigned long flags, const char *fmt, ...); +unsigned long udbg_ifdebug(unsigned long flags); + +void udbg_init_uart(void *comport); +#endif diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/unaligned.h linuxppc64_2_4/include/asm-ppc64/unaligned.h --- ../kernel.org/linux/include/asm-ppc64/unaligned.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/unaligned.h Mon May 7 15:32:23 2001 @@ -0,0 +1,21 @@ +#ifndef __PPC64_UNALIGNED_H +#define __PPC64_UNALIGNED_H + +/* + * The PowerPC can do unaligned accesses itself in big endian mode. + * + * The strange macros are there to make sure these can't + * be misused in a way that makes them not work on other + * architectures where unaligned accesses aren't as simple. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define get_unaligned(ptr) (*(ptr)) + +#define put_unaligned(val, ptr) ((void)( *(ptr) = (val) )) + +#endif /* __PPC64_UNALIGNED_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/unistd.h linuxppc64_2_4/include/asm-ppc64/unistd.h --- ../kernel.org/linux/include/asm-ppc64/unistd.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/unistd.h Mon May 7 15:32:23 2001 @@ -0,0 +1,422 @@ +#ifndef _ASM_PPC_UNISTD_H_ +#define _ASM_PPC_UNISTD_H_ + +/* + * This file contains the system call numbers. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define __NR_exit 1 +#define __NR_fork 2 +#define __NR_read 3 +#define __NR_write 4 +#define __NR_open 5 +#define __NR_close 6 +#define __NR_waitpid 7 +#define __NR_creat 8 +#define __NR_link 9 +#define __NR_unlink 10 +#define __NR_execve 11 +#define __NR_chdir 12 +#define __NR_time 13 +#define __NR_mknod 14 +#define __NR_chmod 15 +#define __NR_lchown 16 +#define __NR_break 17 +#define __NR_oldstat 18 +#define __NR_lseek 19 +#define __NR_getpid 20 +#define __NR_mount 21 +#define __NR_umount 22 +#define __NR_setuid 23 +#define __NR_getuid 24 +#define __NR_stime 25 +#define __NR_ptrace 26 +#define __NR_alarm 27 +#define __NR_oldfstat 28 +#define __NR_pause 29 +#define __NR_utime 30 +#define __NR_stty 31 +#define __NR_gtty 32 +#define __NR_access 33 +#define __NR_nice 34 +#define __NR_ftime 35 +#define __NR_sync 36 +#define __NR_kill 37 +#define __NR_rename 38 +#define __NR_mkdir 39 +#define __NR_rmdir 40 +#define __NR_dup 41 +#define __NR_pipe 42 +#define __NR_times 43 +#define __NR_prof 44 +#define __NR_brk 45 +#define __NR_setgid 46 +#define __NR_getgid 47 +#define __NR_signal 48 +#define __NR_geteuid 49 +#define __NR_getegid 50 +#define __NR_acct 51 +#define __NR_umount2 52 +#define __NR_lock 53 +#define __NR_ioctl 54 +#define __NR_fcntl 55 +#define __NR_mpx 56 +#define __NR_setpgid 57 +#define __NR_ulimit 58 +#define __NR_oldolduname 59 +#define __NR_umask 60 +#define __NR_chroot 61 +#define __NR_ustat 62 +#define __NR_dup2 63 +#define __NR_getppid 64 +#define __NR_getpgrp 65 +#define __NR_setsid 66 +#define __NR_sigaction 67 +#define __NR_sgetmask 68 +#define __NR_ssetmask 69 +#define __NR_setreuid 70 +#define __NR_setregid 71 +#define __NR_sigsuspend 72 +#define __NR_sigpending 73 +#define __NR_sethostname 74 +#define __NR_setrlimit 75 +#define __NR_getrlimit 76 +#define __NR_getrusage 77 +#define __NR_gettimeofday 78 +#define __NR_settimeofday 79 +#define __NR_getgroups 80 +#define __NR_setgroups 81 +#define __NR_select 82 +#define __NR_symlink 83 +#define __NR_oldlstat 84 +#define __NR_readlink 85 +#define __NR_uselib 86 +#define __NR_swapon 87 +#define __NR_reboot 88 +#define __NR_readdir 89 +#define __NR_mmap 90 +#define __NR_munmap 91 +#define __NR_truncate 92 +#define __NR_ftruncate 93 +#define __NR_fchmod 94 +#define __NR_fchown 95 +#define __NR_getpriority 96 +#define __NR_setpriority 97 +#define __NR_profil 98 +#define __NR_statfs 99 +#define __NR_fstatfs 100 +#define __NR_ioperm 101 +#define __NR_socketcall 102 +#define __NR_syslog 103 +#define __NR_setitimer 104 +#define __NR_getitimer 105 +#define __NR_stat 106 +#define __NR_lstat 107 +#define __NR_fstat 108 +#define __NR_olduname 109 +#define __NR_iopl 110 +#define __NR_vhangup 111 +#define __NR_idle 112 +#define __NR_vm86 113 +#define __NR_wait4 114 +#define __NR_swapoff 115 +#define __NR_sysinfo 116 +#define __NR_ipc 117 +#define __NR_fsync 118 +#define __NR_sigreturn 119 +#define __NR_clone 120 +#define __NR_setdomainname 121 +#define __NR_uname 122 +#define __NR_modify_ldt 123 +#define __NR_adjtimex 124 +#define __NR_mprotect 125 +#define __NR_sigprocmask 126 +#define __NR_create_module 127 +#define __NR_init_module 128 +#define __NR_delete_module 129 +#define __NR_get_kernel_syms 130 +#define __NR_quotactl 131 +#define __NR_getpgid 132 +#define __NR_fchdir 133 +#define __NR_bdflush 134 +#define __NR_sysfs 135 +#define __NR_personality 136 +#define __NR_afs_syscall 137 /* Syscall for Andrew File System */ +#define __NR_setfsuid 138 +#define __NR_setfsgid 139 +#define __NR__llseek 140 +#define __NR_getdents 141 +#define __NR__newselect 142 +#define __NR_flock 143 +#define __NR_msync 144 +#define __NR_readv 145 +#define __NR_writev 146 +#define __NR_getsid 147 +#define __NR_fdatasync 148 +#define __NR__sysctl 149 +#define __NR_mlock 150 +#define __NR_munlock 151 +#define __NR_mlockall 152 +#define __NR_munlockall 153 +#define __NR_sched_setparam 154 +#define __NR_sched_getparam 155 +#define __NR_sched_setscheduler 156 +#define __NR_sched_getscheduler 157 +#define __NR_sched_yield 158 +#define __NR_sched_get_priority_max 159 +#define __NR_sched_get_priority_min 160 +#define __NR_sched_rr_get_interval 161 +#define __NR_nanosleep 162 +#define __NR_mremap 163 +#define __NR_setresuid 164 +#define __NR_getresuid 165 +#define __NR_query_module 166 +#define __NR_poll 167 +#define __NR_nfsservctl 168 +#define __NR_setresgid 169 +#define __NR_getresgid 170 +#define __NR_prctl 171 +#define __NR_rt_sigreturn 172 +#define __NR_rt_sigaction 173 +#define __NR_rt_sigprocmask 174 +#define __NR_rt_sigpending 175 +#define __NR_rt_sigtimedwait 176 +#define __NR_rt_sigqueueinfo 177 +#define __NR_rt_sigsuspend 178 +#define __NR_pread 179 +#define __NR_pwrite 180 +#define __NR_chown 181 +#define __NR_getcwd 182 +#define __NR_capget 183 +#define __NR_capset 184 +#define __NR_sigaltstack 185 +#define __NR_sendfile 186 +#define __NR_getpmsg 187 /* some people actually want streams */ +#define __NR_putpmsg 188 /* some people actually want streams */ +#define __NR_vfork 189 +#define __NR_ugetrlimit 190 /* SuS compliant getrlimit */ +#define __NR_mmap2 192 +#define __NR_truncate64 193 +#define __NR_ftruncate64 194 +#define __NR_stat64 195 +#define __NR_lstat64 196 +#define __NR_fstat64 197 +#define __NR_pciconfig_read 198 +#define __NR_pciconfig_write 199 +#define __NR_pciconfig_iobase 200 +#define __NR_multiplexer 201 +#define __NR_getdents64 202 + +#define __NR(n) #n + + +#define __syscall_return(type) \ + return (__sc_err & 0x10000000 ? errno = __sc_ret, __sc_ret = -1 : 0), \ + (type) __sc_ret + +#define __syscall_clobbers \ + "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + +#define _syscall0(type,name) \ +type name(void) \ +{ \ + unsigned long __sc_ret, __sc_err; \ + { \ + register unsigned long __sc_0 __asm__ ("r0"); \ + register unsigned long __sc_3 __asm__ ("r3"); \ + \ + __sc_0 = __NR_##name; \ + __asm__ __volatile__ \ + ("sc \n\t" \ + "mfcr %1 " \ + : "=&r" (__sc_3), "=&r" (__sc_0) \ + : "0" (__sc_3), "1" (__sc_0) \ + : __syscall_clobbers); \ + __sc_ret = __sc_3; \ + __sc_err = __sc_0; \ + } \ + __syscall_return (type); \ +} + +#define _syscall1(type,name,type1,arg1) \ +type name(type1 arg1) \ +{ \ + unsigned long __sc_ret, __sc_err; \ + { \ + register unsigned long __sc_0 __asm__ ("r0"); \ + register unsigned long __sc_3 __asm__ ("r3"); \ + \ + __sc_3 = (unsigned long) (arg1); \ + __sc_0 = __NR_##name; \ + __asm__ __volatile__ \ + ("sc \n\t" \ + "mfcr %1 " \ + : "=&r" (__sc_3), "=&r" (__sc_0) \ + : "0" (__sc_3), "1" (__sc_0) \ + : __syscall_clobbers); \ + __sc_ret = __sc_3; \ + __sc_err = __sc_0; \ + } \ + __syscall_return (type); \ +} + +#define _syscall2(type,name,type1,arg1,type2,arg2) \ +type name(type1 arg1, type2 arg2) \ +{ \ + unsigned long __sc_ret, __sc_err; \ + { \ + register unsigned long __sc_0 __asm__ ("r0"); \ + register unsigned long __sc_3 __asm__ ("r3"); \ + register unsigned long __sc_4 __asm__ ("r4"); \ + \ + __sc_3 = (unsigned long) (arg1); \ + __sc_4 = (unsigned long) (arg2); \ + __sc_0 = __NR_##name; \ + __asm__ __volatile__ \ + ("sc \n\t" \ + "mfcr %1 " \ + : "=&r" (__sc_3), "=&r" (__sc_0) \ + : "0" (__sc_3), "1" (__sc_0), \ + "r" (__sc_4) \ + : __syscall_clobbers); \ + __sc_ret = __sc_3; \ + __sc_err = __sc_0; \ + } \ + __syscall_return (type); \ +} + +#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ +type name(type1 arg1, type2 arg2, type3 arg3) \ +{ \ + unsigned long __sc_ret, __sc_err; \ + { \ + register unsigned long __sc_0 __asm__ ("r0"); \ + register unsigned long __sc_3 __asm__ ("r3"); \ + register unsigned long __sc_4 __asm__ ("r4"); \ + register unsigned long __sc_5 __asm__ ("r5"); \ + \ + __sc_3 = (unsigned long) (arg1); \ + __sc_4 = (unsigned long) (arg2); \ + __sc_5 = (unsigned long) (arg3); \ + __sc_0 = __NR_##name; \ + __asm__ __volatile__ \ + ("sc \n\t" \ + "mfcr %1 " \ + : "=&r" (__sc_3), "=&r" (__sc_0) \ + : "0" (__sc_3), "1" (__sc_0), \ + "r" (__sc_4), \ + "r" (__sc_5) \ + : __syscall_clobbers); \ + __sc_ret = __sc_3; \ + __sc_err = __sc_0; \ + } \ + __syscall_return (type); \ +} + +#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ +type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4) \ +{ \ + unsigned long __sc_ret, __sc_err; \ + { \ + register unsigned long __sc_0 __asm__ ("r0"); \ + register unsigned long __sc_3 __asm__ ("r3"); \ + register unsigned long __sc_4 __asm__ ("r4"); \ + register unsigned long __sc_5 __asm__ ("r5"); \ + register unsigned long __sc_6 __asm__ ("r6"); \ + \ + __sc_3 = (unsigned long) (arg1); \ + __sc_4 = (unsigned long) (arg2); \ + __sc_5 = (unsigned long) (arg3); \ + __sc_6 = (unsigned long) (arg4); \ + __sc_0 = __NR_##name; \ + __asm__ __volatile__ \ + ("sc \n\t" \ + "mfcr %1 " \ + : "=&r" (__sc_3), "=&r" (__sc_0) \ + : "0" (__sc_3), "1" (__sc_0), \ + "r" (__sc_4), \ + "r" (__sc_5), \ + "r" (__sc_6) \ + : __syscall_clobbers); \ + __sc_ret = __sc_3; \ + __sc_err = __sc_0; \ + } \ + __syscall_return (type); \ +} + +#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5) \ +type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) \ +{ \ + unsigned long __sc_ret, __sc_err; \ + { \ + register unsigned long __sc_0 __asm__ ("r0"); \ + register unsigned long __sc_3 __asm__ ("r3"); \ + register unsigned long __sc_4 __asm__ ("r4"); \ + register unsigned long __sc_5 __asm__ ("r5"); \ + register unsigned long __sc_6 __asm__ ("r6"); \ + register unsigned long __sc_7 __asm__ ("r7"); \ + \ + __sc_3 = (unsigned long) (arg1); \ + __sc_4 = (unsigned long) (arg2); \ + __sc_5 = (unsigned long) (arg3); \ + __sc_6 = (unsigned long) (arg4); \ + __sc_7 = (unsigned long) (arg5); \ + __sc_0 = __NR_##name; \ + __asm__ __volatile__ \ + ("sc \n\t" \ + "mfcr %1 " \ + : "=&r" (__sc_3), "=&r" (__sc_0) \ + : "0" (__sc_3), "1" (__sc_0), \ + "r" (__sc_4), \ + "r" (__sc_5), \ + "r" (__sc_6), \ + "r" (__sc_7) \ + : __syscall_clobbers); \ + __sc_ret = __sc_3; \ + __sc_err = __sc_0; \ + } \ + __syscall_return (type); \ +} + + +#ifdef __KERNEL_SYSCALLS__ + +/* + * Forking from kernel space will result in the child getting a new, + * empty kernel stack area. Thus the child cannot access automatic + * variables set in the parent unless they are in registers, and the + * procedure where the fork was done cannot return to its caller in + * the child. + */ + +/* + * System call prototypes. + */ +#define __NR__exit __NR_exit +static inline _syscall0(int,pause) +static inline _syscall0(int,sync) +static inline _syscall0(pid_t,setsid) +static inline _syscall3(int,write,int,fd,const char *,buf,off_t,count) +static inline _syscall3(int,read,int,fd,char *,buf,off_t,count) +static inline _syscall3(off_t,lseek,int,fd,off_t,offset,int,count) +static inline _syscall1(int,dup,int,fd) +static inline _syscall3(int,execve,const char *,file,char **,argv,char **,envp) +static inline _syscall3(int,open,const char *,file,int,flag,int,mode) +static inline _syscall1(int,close,int,fd) +static inline _syscall1(int,_exit,int,exitcode) +static inline _syscall3(pid_t,waitpid,pid_t,pid,int *,wait_stat,int,options) +static inline _syscall1(int,delete_module,const char *,name) + +static inline pid_t wait(int * wait_stat) +{ + return waitpid(-1,wait_stat,0); +} + +#endif /* __KERNEL_SYSCALLS__ */ + +#endif /* _ASM_PPC_UNISTD_H_ */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/user.h linuxppc64_2_4/include/asm-ppc64/user.h --- ../kernel.org/linux/include/asm-ppc64/user.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/user.h Mon May 7 15:32:23 2001 @@ -0,0 +1,58 @@ +#ifndef _PPC_USER_H +#define _PPC_USER_H + +/* Adapted from + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include + +/* + * Core file format: The core file is written in such a way that gdb + * can understand it and provide useful information to the user (under + * linux we use the `trad-core' bfd, NOT the osf-core). The file contents + * are as follows: + * + * upage: 1 page consisting of a user struct that tells gdb + * what is present in the file. Directly after this is a + * copy of the task_struct, which is currently not used by gdb, + * but it may come in handy at some point. All of the registers + * are stored as part of the upage. The upage should always be + * only one page long. + * data: The data segment follows next. We use current->end_text to + * current->brk to pick up all of the user variables, plus any memory + * that may have been sbrk'ed. No attempt is made to determine if a + * page is demand-zero or if a page is totally unused, we just cover + * the entire range. All of the addresses are rounded in such a way + * that an integral number of pages is written. + * stack: We need the stack information in order to get a meaningful + * backtrace. We need to write the data from usp to + * current->start_stack, so we round each of these in order to be able + * to write an integer number of pages. + */ +struct user { + struct pt_regs regs; /* entire machine state */ + size_t u_tsize; /* text size (pages) */ + size_t u_dsize; /* data size (pages) */ + size_t u_ssize; /* stack size (pages) */ + unsigned long start_code; /* text starting address */ + unsigned long start_data; /* data starting address */ + unsigned long start_stack; /* stack starting address */ + long int signal; /* signal causing core dump */ + struct regs * u_ar0; /* help gdb find registers */ + unsigned long magic; /* identifies a core file */ + char u_comm[32]; /* user command name */ +}; + +#define NBPG PAGE_SIZE +#define UPAGES 1 +#define HOST_TEXT_START_ADDR (u.start_code) +#define HOST_DATA_START_ADDR (u.start_data) +#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG) + +#endif /* _PPC_USER_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/vc_ioctl.h linuxppc64_2_4/include/asm-ppc64/vc_ioctl.h --- ../kernel.org/linux/include/asm-ppc64/vc_ioctl.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/vc_ioctl.h Mon May 7 15:32:23 2001 @@ -0,0 +1,50 @@ +#ifndef _LINUX_VC_IOCTL_H +#define _LINUX_VC_IOCTL_H + +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +struct vc_mode { + int height; + int width; + int depth; + int pitch; + int mode; + char name[32]; + unsigned long fb_address; + unsigned long cmap_adr_address; + unsigned long cmap_data_address; + unsigned long disp_reg_address; +}; + +#define VC_GETMODE 0x7667 +#define VC_SETMODE 0x7668 +#define VC_INQMODE 0x7669 + +#define VC_SETCMAP 0x766a +#define VC_GETCMAP 0x766b + +#define VC_POWERMODE 0x766c + +/* Values for the argument to the VC_POWERMODE ioctl */ +#define VC_POWERMODE_INQUIRY (-1) +#define VESA_NO_BLANKING 0 +#define VESA_VSYNC_SUSPEND 1 +#define VESA_HSYNC_SUSPEND 2 +#define VESA_POWERDOWN 3 + +#ifdef __KERNEL__ +extern int console_getmode(struct vc_mode *); +extern int console_setmode(struct vc_mode *, int); +extern int console_setcmap(int, unsigned char *, unsigned char *, + unsigned char *); +extern int console_powermode(int); +extern struct vc_mode display_info; +extern struct fb_info *console_fb_info; +#endif + +#endif /* _LINUX_VC_IOCTL_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/vga.h linuxppc64_2_4/include/asm-ppc64/vga.h --- ../kernel.org/linux/include/asm-ppc64/vga.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/vga.h Mon May 7 15:32:23 2001 @@ -0,0 +1,50 @@ +/* + * Access to VGA videoram + * + * (c) 1998 Martin Mares + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _LINUX_ASM_VGA_H_ +#define _LINUX_ASM_VGA_H_ + +#include + +#include + +#if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_MDA_CONSOLE) + +#define VT_BUF_HAVE_RW +/* + * These are only needed for supporting VGA or MDA text mode, which use little + * endian byte ordering. + * In other cases, we can optimize by using native byte ordering and + * has already done the right job for us. + */ + +extern inline void scr_writew(u16 val, volatile u16 *addr) +{ + st_le16(addr, val); +} + +extern inline u16 scr_readw(volatile const u16 *addr) +{ + return ld_le16(addr); +} + +#define VT_BUF_HAVE_MEMCPYW +#define scr_memcpyw memcpy + +#endif /* !CONFIG_VGA_CONSOLE && !CONFIG_MDA_CONSOLE */ + +extern unsigned long vgacon_remap_base; +#define VGA_MAP_MEM(x) ((unsigned long) ioremap((x), 0)) + +#define vga_readb(x) (*(x)) +#define vga_writeb(x,y) (*(y) = (x)) + +#endif diff -uNr --exclude=CVS ../kernel.org/linux/include/asm-ppc64/xor.h linuxppc64_2_4/include/asm-ppc64/xor.h --- ../kernel.org/linux/include/asm-ppc64/xor.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/asm-ppc64/xor.h Thu Sep 13 13:06:08 2001 @@ -0,0 +1 @@ +#include diff -uNr --exclude=CVS ../kernel.org/linux/include/linux/blk.h linuxppc64_2_4/include/linux/blk.h --- ../kernel.org/linux/include/linux/blk.h Wed Oct 24 00:01:01 2001 +++ linuxppc64_2_4/include/linux/blk.h Fri Oct 26 02:52:26 2001 @@ -46,6 +46,8 @@ extern int bpcd_init(void); extern int ps2esdi_init(void); extern int jsfd_init(void); +extern int viodasd_init(void); +extern int viocd_init(void); #if defined(CONFIG_ARCH_S390) extern int mdisk_init(void); @@ -314,6 +316,21 @@ #define DEVICE_REQUEST i2ob_request #define DEVICE_NR(device) (MINOR(device)>>4) +#elif (MAJOR_NR == VIODASD_MAJOR) + +#define DEVICE_NAME "viod" +#define TIMEOUT_VALUE (25*HZ) +#define DEVICE_REQUEST do_viodasd_request +#define DEVICE_NR(device) (MINOR(device) >> 3) + +#elif (MAJOR_NR == VIOCD_MAJOR) + +#define DEVICE_NAME "viocd" +#define TIMEOUT_VALUE (25*HZ) +#define DEVICE_REQUEST do_viocd_request +#define DEVICE_NR(device) (MINOR(device)) +#define DEVICE_ON(device) +#define DEVICE_OFF(device) #elif (MAJOR_NR == COMPAQ_SMART2_MAJOR) #define DEVICE_NAME "ida" diff -uNr --exclude=CVS ../kernel.org/linux/include/linux/highmem.h linuxppc64_2_4/include/linux/highmem.h --- ../kernel.org/linux/include/linux/highmem.h Tue Oct 23 23:59:06 2001 +++ linuxppc64_2_4/include/linux/highmem.h Fri Oct 26 02:52:26 2001 @@ -43,20 +43,44 @@ #endif /* CONFIG_HIGHMEM */ /* when CONFIG_HIGHMEM is not set these will be plain clear/copy_page */ -static inline void clear_user_highpage(struct page *page, unsigned long vaddr) +static inline void clear_mem_page(struct page *page) +{ + clear_page(kmap(page)); + kunmap(page); +} + +static inline void copy_mem_page(struct page *to, struct page *from) +{ + char *vfrom, *vto; + + vfrom = kmap(from); + vto = kmap(to); + copy_page(vto, vfrom); + kunmap(from); + kunmap(to); +} + +#ifndef __HAVE_ARCH_USER_PAGE +static inline void clear_user_page(struct page *page, unsigned long vaddr) { void *addr = kmap_atomic(page, KM_USER0); - clear_user_page(addr, vaddr); + clear_page(addr, vaddr); kunmap_atomic(addr, KM_USER0); } -static inline void clear_highpage(struct page *page) +static inline void copy_user_page(struct page *to, struct page *from, unsigned long vaddr) { - clear_page(kmap(page)); - kunmap(page); + char *vfrom, *vto; + + vfrom = kmap_atomic(from, KM_USER0); + vto = kmap_atomic(to, KM_USER1); + copy_user_page(vto, vfrom, vaddr); + kunmap_atomic(vfrom, KM_USER0); + kunmap_atomic(vto, KM_USER1); } +#endif -static inline void memclear_highpage(struct page *page, unsigned int offset, unsigned int size) +static inline void memclear_page(struct page *page, unsigned int offset, unsigned int size) { char *kaddr; @@ -70,7 +94,7 @@ /* * Same but also flushes aliased cache contents to RAM. */ -static inline void memclear_highpage_flush(struct page *page, unsigned int offset, unsigned int size) +static inline void memclear_page_flush(struct page *page, unsigned int offset, unsigned int size) { char *kaddr; @@ -80,28 +104,6 @@ memset(kaddr + offset, 0, size); flush_page_to_ram(page); kunmap(page); -} - -static inline void copy_user_highpage(struct page *to, struct page *from, unsigned long vaddr) -{ - char *vfrom, *vto; - - vfrom = kmap_atomic(from, KM_USER0); - vto = kmap_atomic(to, KM_USER1); - copy_user_page(vto, vfrom, vaddr); - kunmap_atomic(vfrom, KM_USER0); - kunmap_atomic(vto, KM_USER1); -} - -static inline void copy_highpage(struct page *to, struct page *from) -{ - char *vfrom, *vto; - - vfrom = kmap(from); - vto = kmap(to); - copy_page(vto, vfrom); - kunmap(from); - kunmap(to); } #endif /* _LINUX_HIGHMEM_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/linux/jfs/endian24.h linuxppc64_2_4/include/linux/jfs/endian24.h --- ../kernel.org/linux/include/linux/jfs/endian24.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/linux/jfs/endian24.h Thu Sep 13 14:29:39 2001 @@ -0,0 +1,50 @@ +/* + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _H_ENDIAN24 +#define _H_ENDIAN24 + +/* + * linux/jfs/endian24.h: + * + * Endian conversion for 24-byte data + * + */ +#define __swab24(x) \ +({ \ + __u32 __x = (x); \ + ((__u32)( \ + ((__x & (__u32)0x000000ffUL) << 16) | \ + (__x & (__u32)0x0000ff00UL) | \ + ((__x & (__u32)0x00ff0000UL) >> 16) )); \ +}) + +#if (defined(__KERNEL__) && defined(__LITTLE_ENDIAN)) || (defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN)) + #define __cpu_to_le24(x) ((__u32)(x)) + #define __le24_to_cpu(x) ((__u32)(x)) +#else + #define __cpu_to_le24(x) __swab24(x) + #define __le24_to_cpu(x) __swab24(x) +#endif + +#ifdef __KERNEL__ + #define cpu_to_le24 __cpu_to_le24 + #define le24_to_cpu __le24_to_cpu +#endif + +#endif /* !_H_ENDIAN24 */ diff -uNr --exclude=CVS ../kernel.org/linux/include/linux/jfs/jfs_btree.h linuxppc64_2_4/include/linux/jfs/jfs_btree.h --- ../kernel.org/linux/include/linux/jfs/jfs_btree.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/linux/jfs/jfs_btree.h Wed Nov 14 10:19:36 2001 @@ -0,0 +1,162 @@ +/* + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ +#ifndef _H_JFS_BTREE +#define _H_JFS_BTREE +/* + * jfs_btree.h: B+-tree + * + * JFS B+-tree (dtree and xtree) common definitions + */ + +/* + * basic btree page - btpage_t + */ +typedef struct { + s64 next; /* 8: right sibling bn */ + s64 prev; /* 8: left sibling bn */ + + u8 flag; /* 1: */ + u8 rsrvd[7]; /* 7: type specific */ + s64 self; /* 8: self address */ + + u8 entry[4064]; /* 4064: */ +} btpage_t; /* (4096) */ + +/* btpaget_t flag */ +#define BT_TYPE 0x07 /* B+-tree index */ +#define BT_ROOT 0x01 /* root page */ +#define BT_LEAF 0x02 /* leaf page */ +#define BT_INTERNAL 0x04 /* internal page */ +#define BT_RIGHTMOST 0x10 /* rightmost page */ +#define BT_LEFTMOST 0x20 /* leftmost page */ + +/* btorder (in inode) */ +#define BT_RANDOM 0x0000 +#define BT_SEQUENTIAL 0x0001 +#define BT_LOOKUP 0x0010 +#define BT_INSERT 0x0020 +#define BT_DELETE 0x0040 + +/* + * btree page buffer cache access + */ +#define BT_IS_ROOT(MP) (((MP)->xflag & COMMIT_PAGE) == 0) + +/* get page from buffer page */ +#define BT_PAGE(IP, MP, TYPE, ROOT)\ + (BT_IS_ROOT(MP) ? (TYPE *)&JFS_IP(IP)->ROOT : (TYPE *)(MP)->data) + +/* get the page buffer and the page for specified block address */ +#define BT_GETPAGE(IP, BN, MP, TYPE, SIZE, P, RC, ROOT)\ +{\ + if ((BN) == 0)\ + {\ + MP = (metapage_t *)&JFS_IP(IP)->bxflag;\ + P = (TYPE *)&JFS_IP(IP)->ROOT;\ + RC = 0;\ + jEVENT(0,("%d BT_GETPAGE returning root\n", __LINE__));\ + }\ + else\ + {\ + jEVENT(0,("%d BT_GETPAGE reading block %d\n", __LINE__,\ + (int)BN));\ + MP = read_metapage((IP), BN, SIZE, 1);\ + if (MP) {\ + RC = 0;\ + P = (MP)->data;\ + } else {\ + P = NULL;\ + jERROR(1,("bread failed!\n"));\ + RC = EIO;\ + }\ + }\ +} + +#define BT_MARK_DIRTY(MP, IP)\ +{\ + if (BT_IS_ROOT(MP))\ + mark_inode_dirty(IP);\ + else\ + mark_metapage_dirty(MP);\ +} + +/* put the page buffer */ +#define BT_PUTPAGE(MP)\ +{\ + if (! BT_IS_ROOT(MP)) \ + release_metapage(MP); \ +} + + +/* + * btree traversal stack + * + * record the path traversed during the search; + * top frame record the leaf page/entry selected. + */ +#define MAXTREEHEIGHT 8 +typedef struct btframe { /* stack frame */ + s64 bn; /* 8: */ + s16 index; /* 2: */ + s16 lastindex; /* 2: */ + struct metapage *mp; /* 4: */ +} btframe_t; /* (16) */ + +typedef struct btstack { + btframe_t *top; /* 4: */ + int nsplit; /* 4: */ + btframe_t stack[MAXTREEHEIGHT]; +} btstack_t; + +#define BT_CLR(btstack)\ + (btstack)->top = (btstack)->stack + +#define BT_PUSH(BTSTACK, BN, INDEX)\ +{\ + (BTSTACK)->top->bn = BN;\ + (BTSTACK)->top->index = INDEX;\ + ++(BTSTACK)->top;\ + assert((BTSTACK)->top != &((BTSTACK)->stack[MAXTREEHEIGHT]));\ +} + +#define BT_POP(btstack)\ + ( (btstack)->top == (btstack)->stack ? NULL : --(btstack)->top ) + +#define BT_STACK(btstack)\ + ( (btstack)->top == (btstack)->stack ? NULL : (btstack)->top ) + +/* retrieve search results */ +#define BT_GETSEARCH(IP, LEAF, BN, MP, TYPE, P, INDEX, ROOT)\ +{\ + BN = (LEAF)->bn;\ + MP = (LEAF)->mp;\ + if (BN)\ + P = (TYPE *)MP->data;\ + else\ + P = (TYPE *)&JFS_IP(IP)->ROOT;\ + INDEX = (LEAF)->index;\ +} + +/* put the page buffer of search */ +#define BT_PUTSEARCH(BTSTACK)\ +{\ + if (! BT_IS_ROOT((BTSTACK)->top->mp))\ + release_metapage((BTSTACK)->top->mp);\ +} +#endif /* _H_JFS_BTREE */ diff -uNr --exclude=CVS ../kernel.org/linux/include/linux/jfs/jfs_compat.h linuxppc64_2_4/include/linux/jfs/jfs_compat.h --- ../kernel.org/linux/include/linux/jfs/jfs_compat.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/linux/jfs/jfs_compat.h Wed Nov 14 10:22:29 2001 @@ -0,0 +1,70 @@ +/* + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _H_JFS_COMPAT +#define _H_JFS_COMPAT + +/* + * jfs_compat.h: + * + * Definitions to allow JFS to build on older kernels. + * + * This file should be removed when JFS is merged with linux kernel + * + */ + +#include +#include +#include + +#ifndef MODULE_LICENSE +#define MODULE_LICENSE(x) +#endif + +#ifndef GFP_NOFS +#define GFP_NOFS GFP_BUFFER +#endif + +#if !defined(KERNEL_HAS_O_DIRECT) +#define fsync_inode_data_buffers fsync_inode_buffers +#endif + +/* + * Linux 2.4.9 has broken min/max macros. + * Linux < 2.4.9 doesn't have min/max at all. + */ +#if (LINUX_VERSION_CODE == KERNEL_VERSION(2,4,9)) +#undef min +#undef max +#endif + +#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,4,9)) +#define min(x,y) ({ \ + const typeof(x) _x = (x); \ + const typeof(y) _y = (y); \ + (void) (&_x == &_y); \ + _x < _y ? _x : _y; }) + +#define max(x,y) ({ \ + const typeof(x) _x = (x); \ + const typeof(y) _y = (y); \ + (void) (&_x == &_y); \ + _x > _y ? _x : _y; }) +#endif + +#endif /* !_H_JFS_COMPAT */ diff -uNr --exclude=CVS ../kernel.org/linux/include/linux/jfs/jfs_debug.h linuxppc64_2_4/include/linux/jfs/jfs_debug.h --- ../kernel.org/linux/include/linux/jfs/jfs_debug.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/linux/jfs/jfs_debug.h Wed Nov 14 10:19:36 2001 @@ -0,0 +1,127 @@ +/* + * + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * +*/ +#ifndef _H_JFS_DEBUG +#define _H_JFS_DEBUG + +/* + * jfs_debug.h + * + * global debug message, data structure/macro definitions + * under control of CONFIG_JFS_DEBUG, _JFS_STATISTICS; + */ + +#ifdef CONFIG_JFS_DEBUG +void dump_mem(char *label, void *data, int length); +#else +static __inline__ void dump_mem(char *label, void *data, int length) {}; +#endif + +/* + * assert with traditional printf/panic + */ +#ifdef CONFIG_KERNEL_ASSERTS +/* kgdb stuff */ +#define assert(p) KERNEL_ASSERT(#p, p) +#else +#define assert(p) {\ +if (!(p))\ + {\ + printk("assert(%s)\n",#p);\ + BUG();\ + }\ +} +#endif + +/* + * chatterbox control + */ +/* temporary until cleanup */ +#define NOISE(button,prspec) + +/* + * debug ON + * -------- + */ +#ifdef CONFIG_JFS_DEBUG +#define ASSERT(p) assert(p) + +/* information message: e.g., configuration, major event */ +extern int jfsFYI; +#define jFYI(button,prspec)\ +{ if (button && jfsFYI) printk prspec; } + +/* + * chatterbox control + */ +/* debug event message: */ +#define jEVENT(button,prspec)\ +{ if (button) printk prspec; } + +/* alert warning message: e.g., critical event */ +extern int jfsALERT; +#define jALERT(button, prspec)\ +{ if (button && jfsALERT) printk prspec; } + +/* error event message: e.g., i/o error */ +extern int jfsERROR; +#define jERROR(button, prspec)\ +{ if (button && jfsERROR) { printk prspec; if (button > 1) BUG(); } } + +/* if dial is set above volume level of given message, print it */ +#define jNOISESET(dial, level) int dial = (level); +#define jNOISEGET(dial) extern dial; +#define jNOISE(dial,level,prspec)\ +{ if ((dial) >= (level)) printk prspec; } + +/* invoke sanity check function */ +#define jSANITY(funct, arg)\ +{ funct(arg); } + +/* + * debug OFF + * --------- + */ +#else /* CONFIG_JFS_DEBUG */ +#define ASSERT(p) +#define jEVENT(button,prspec) +#define jERROR(button,prspec) +#define jALERT(button,prspec) +#define jFYI(button,prspec) +#define jNOISESET(dial, level) +#define jNOISEGET(dial) +#define jNOISE(dial,level,prspec) +#define jSANITY(funct, arg) +#endif /* CONFIG_JFS_DEBUG */ + +/* + * statistics + * ---------- + */ +#ifdef _JFS_STATISTICS +#define INCREMENT(x) ((x)++) +#define DECREMENT(x) ((x)--) +#define HIGHWATERMARK(x,y) x = MAX((x), (y)) +#else +#define INCREMENT(x) +#define DECREMENT(x) +#define HIGHWATERMARK(x,y) +#endif /* _JFS_STATISTICS */ + +#endif /* _H_JFS_DEBUG */ diff -uNr --exclude=CVS ../kernel.org/linux/include/linux/jfs/jfs_defragfs.h linuxppc64_2_4/include/linux/jfs/jfs_defragfs.h --- ../kernel.org/linux/include/linux/jfs/jfs_defragfs.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/linux/jfs/jfs_defragfs.h Wed Nov 14 10:19:36 2001 @@ -0,0 +1,55 @@ +/* + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ +#ifndef _H_JFS_DEFRAGFS +#define _H_JFS_DEFRAGFS + +/* + * jfs_defragfs.h + */ +/* + * defragfs parameter list + */ +typedef struct { + uint flag; /* 4: */ + u8 dev; /* 1: */ + u8 pad[3]; /* 3: */ + s32 fileset; /* 4: */ + u32 inostamp; /* 4: */ + u32 ino; /* 4: */ + u32 gen; /* 4: */ + s64 xoff; /* 8: */ + s64 old_xaddr; /* 8: */ + s64 new_xaddr; /* 8: */ + s32 xlen; /* 4: */ +} defragfs_t; /* (52) */ + +/* plist flag */ +#define DEFRAGFS_SYNC 0x80000000 +#define DEFRAGFS_COMMIT 0x40000000 +#define DEFRAGFS_RELOCATE 0x10000000 + +#define INODE_TYPE 0x0000F000 /* IFREG or IFDIR */ + +#define EXTENT_TYPE 0x000000ff +#define DTPAGE 0x00000001 +#define XTPAGE 0x00000002 +#define DATAEXT 0x00000004 +#define EAEXT 0x00000008 + +#endif /* _H_JFS_DEFRAGFS */ diff -uNr --exclude=CVS ../kernel.org/linux/include/linux/jfs/jfs_dinode.h linuxppc64_2_4/include/linux/jfs/jfs_dinode.h --- ../kernel.org/linux/include/linux/jfs/jfs_dinode.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/linux/jfs/jfs_dinode.h Wed Nov 14 10:19:36 2001 @@ -0,0 +1,157 @@ +/* + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef _H_JFS_DINODE +#define _H_JFS_DINODE + +/* + * jfs_dinode.h: on-disk inode manager + * + */ + +#define INODESLOTSIZE 128 +#define L2INODESLOTSIZE 7 +#define log2INODESIZE 9 /* log2(bytes per dinode) */ + + +/* + * on-disk inode (dinode_t): 512 bytes + * + * note: align 64-bit fields on 8-byte boundary. + */ +struct dinode { + /* + * I. base area (128 bytes) + * ------------------------ + * + * define generic/POSIX attributes + */ + u32 di_inostamp; /* 4: stamp to show inode belongs to fileset */ + s32 di_fileset; /* 4: fileset number */ + u32 di_number; /* 4: inode number, aka file serial number */ + u32 di_gen; /* 4: inode generation number */ + + pxd_t di_ixpxd; /* 8: inode extent descriptor */ + + s64 di_size; /* 8: size */ + s64 di_nblocks; /* 8: number of blocks allocated */ + + u32 di_nlink; /* 4: number of links to the object */ + + u32 di_uid; /* 4: user id of owner */ + u32 di_gid; /* 4: group id of owner */ + + u32 di_mode; /* 4: attribute, format and permission */ + + struct timestruc_t di_atime; /* 8: time last data accessed */ + struct timestruc_t di_ctime; /* 8: time last status changed */ + struct timestruc_t di_mtime; /* 8: time last data modified */ + struct timestruc_t di_otime; /* 8: time created */ + + dxd_t di_acl; /* 16: acl descriptor */ + + dxd_t di_ea; /* 16: ea descriptor */ + + u32 di_next_index; /* 4: Next available dir_table index */ + + s32 di_acltype; /* 4: Type of ACL */ + + /* + * Extension Areas. + * + * Historically, the inode was partitioned into 4 128-byte areas, + * the last 3 being defined as unions which could have multiple + * uses. The first 96 bytes had been completely unused until + * an index table was added to the directory. It is now more + * useful to describe the last 3/4 of the inode as a single + * union. We would probably be better off redesigning the + * entire structure from scratch, but we don't want to break + * commonality with OS/2's JFS at this time. + */ + union { + struct { + /* + * This table contains the information needed to + * find a directory entry from a 32-bit index. + * If the index is small enough, the table is inline, + * otherwise, an x-tree root overlays this table + */ + dir_table_slot_t _table[12]; /* 96: inline */ + + dtroot_t _dtroot; /* 288: dtree root */ + } _dir; /* (384) */ +#define di_dirtable u._dir._table +#define di_dtroot u._dir._dtroot +#define di_parent di_dtroot.header.idotdot +#define di_DASD di_dtroot.header.DASD + + struct { + union { + u8 _data[96]; /* 96: unused */ + struct { + void *_imap; /* 4: unused */ + u32 _gengen; /* 4: generator */ + } _imap; + } _u1; /* 96: */ +#define di_gengen u._file._u1._imap._gengen + + union { + xtpage_t _xtroot; + struct { + u8 unused[16]; /* 16: */ + dxd_t _dxd; /* 16: */ + union { + u32 _rdev; /* 4: */ + u8 _fastsymlink[128]; + } _u; + u8 _inlineea[128]; + } _special; + } _u2; + } _file; +#define di_xtroot u._file._u2._xtroot +#define di_dxd u._file._u2._special._dxd +#define di_btroot di_xtroot +#define di_inlinedata u._file._u2._special._u +#define di_rdev u._file._u2._special._u._rdev +#define di_fastsymlink u._file._u2._special._u._fastsymlink +#define di_inlineea u._file._u2._special._inlineea + } u; +}; + +typedef struct dinode dinode_t; + + +/* extended mode bits (on-disk inode di_mode) */ +#define IFJOURNAL 0x00010000 /* journalled file */ +#define ISPARSE 0x00020000 /* sparse file enabled */ +#define INLINEEA 0x00040000 /* inline EA area free */ +#define ISWAPFILE 0x00800000 /* file open for pager swap space */ + +/* more extended mode bits: attributes for OS/2 */ +#define IREADONLY 0x02000000 /* no write access to file */ +#define IARCHIVE 0x40000000 /* file archive bit */ +#define ISYSTEM 0x08000000 /* system file */ +#define IHIDDEN 0x04000000 /* hidden file */ +#define IRASH 0x4E000000 /* mask for changeable attributes */ +#define INEWNAME 0x80000000 /* non-8.3 filename format */ +#define IDIRECTORY 0x20000000 /* directory (shadow of real bit) */ +#define ATTRSHIFT 25 /* bits to shift to move attribute + specification to mode position */ + +#endif /*_H_JFS_DINODE */ diff -uNr --exclude=CVS ../kernel.org/linux/include/linux/jfs/jfs_dmap.h linuxppc64_2_4/include/linux/jfs/jfs_dmap.h --- ../kernel.org/linux/include/linux/jfs/jfs_dmap.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/linux/jfs/jfs_dmap.h Wed Nov 14 10:19:36 2001 @@ -0,0 +1,301 @@ +/* + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * jfs_dmap.h: block allocation map manager + */ + +#ifndef _H_JFS_DMAP +#define _H_JFS_DMAP + +#include + +#define BMAPVERSION 1 /* version number */ +#define TREESIZE (256+64+16+4+1) /* size of a dmap tree */ +#define LEAFIND (64+16+4+1) /* index of 1st leaf of a dmap tree */ +#define LPERDMAP 256 /* num leaves per dmap tree */ +#define L2LPERDMAP 8 /* l2 number of leaves per dmap tree */ +#define DBWORD 32 /* # of blks covered by a map word */ +#define L2DBWORD 5 /* l2 # of blks covered by a mword */ +#define BUDMIN L2DBWORD /* max free string in a map word */ +#define BPERDMAP (LPERDMAP * DBWORD) /* num of blks per dmap */ +#define L2BPERDMAP 13 /* l2 num of blks per dmap */ +#define CTLTREESIZE (1024+256+64+16+4+1) /* size of a dmapctl tree */ +#define CTLLEAFIND (256+64+16+4+1) /* idx of 1st leaf of a dmapctl tree */ +#define LPERCTL 1024 /* num of leaves per dmapctl tree */ +#define L2LPERCTL 10 /* l2 num of leaves per dmapctl tree */ +#define ROOT 0 /* index of the root of a tree */ +#define NOFREE ((s8) -1) /* no blocks free */ +#define MAXAG 128 /* max number of allocation groups */ +#define L2MAXAG 7 /* l2 max num of AG */ +#define L2MINAGSZ 25 /* l2 of minimum AG size in bytes */ +#define BMAPBLKNO 0 /* lblkno of bmap within the map */ + +/* + * maximum l2 number of disk blocks at the various dmapctl levels. + */ +#define L2MAXL0SIZE (L2BPERDMAP + 1 * L2LPERCTL) +#define L2MAXL1SIZE (L2BPERDMAP + 2 * L2LPERCTL) +#define L2MAXL2SIZE (L2BPERDMAP + 3 * L2LPERCTL) + +/* + * maximum number of disk blocks at the various dmapctl levels. + */ +#define MAXL0SIZE ((s64)1 << L2MAXL0SIZE) +#define MAXL1SIZE ((s64)1 << L2MAXL1SIZE) +#define MAXL2SIZE ((s64)1 << L2MAXL2SIZE) + +#define MAXMAPSIZE MAXL2SIZE /* maximum aggregate map size */ + +/* + * determine the maximum free string for four (lower level) nodes + * of the tree. + */ +static __inline signed char TREEMAX(signed char *cp) +{ + signed char tmp1, tmp2; + + tmp1 = max(*(cp+2), *(cp+3)); + tmp2 = max(*(cp), *(cp+1)); + + return max(tmp1, tmp2); +} + +/* + * convert disk block number to the logical block number of the dmap + * describing the disk block. s is the log2(number of logical blocks per page) + * + * The calculation figures out how many logical pages are in front of the dmap. + * - the number of dmaps preceding it + * - the number of L0 pages preceding its L0 page + * - the number of L1 pages preceding its L1 page + * - 3 is added to account for the L2, L1, and L0 page for this dmap + * - 1 is added to account for the control page of the map. + */ +#define BLKTODMAP(b,s) \ + ((((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1) << (s)) + +/* + * convert disk block number to the logical block number of the LEVEL 0 + * dmapctl describing the disk block. s is the log2(number of logical blocks + * per page) + * + * The calculation figures out how many logical pages are in front of the L0. + * - the number of dmap pages preceding it + * - the number of L0 pages preceding it + * - the number of L1 pages preceding its L1 page + * - 2 is added to account for the L2, and L1 page for this L0 + * - 1 is added to account for the control page of the map. + */ +#define BLKTOL0(b,s) \ + (((((b) >> 23) << 10) + ((b) >> 23) + ((b) >> 33) + 2 + 1) << (s)) + +/* + * convert disk block number to the logical block number of the LEVEL 1 + * dmapctl describing the disk block. s is the log2(number of logical blocks + * per page) + * + * The calculation figures out how many logical pages are in front of the L1. + * - the number of dmap pages preceding it + * - the number of L0 pages preceding it + * - the number of L1 pages preceding it + * - 1 is added to account for the L2 page + * - 1 is added to account for the control page of the map. + */ +#define BLKTOL1(b,s) \ + (((((b) >> 33) << 20) + (((b) >> 33) << 10) + ((b) >> 33) + 1 + 1) << (s)) + +/* + * convert disk block number to the logical block number of the dmapctl + * at the specified level which describes the disk block. + */ +#define BLKTOCTL(b,s,l) \ + (((l) == 2) ? 1 : ((l) == 1) ? BLKTOL1((b),(s)) : BLKTOL0((b),(s))) + +/* + * convert aggregate map size to the zero origin dmapctl level of the + * top dmapctl. + */ +#define BMAPSZTOLEV(size) \ + (((size) <= MAXL0SIZE) ? 0 : ((size) <= MAXL1SIZE) ? 1 : 2) + +/* convert disk block number to allocation group number. + */ +#define BLKTOAG(b,sbi) ((b) >> ((sbi)->bmap->db_agl2size)) + +/* convert allocation group number to starting disk block + * number. + */ +#define AGTOBLK(a,ip) \ + ((s64)(a) << (JFS_SBI((ip)->i_sb)->bmap->db_agl2size)) + +/* + * dmap summary tree + * + * dmaptree_t must be consistent with dmapctl_t. + */ +typedef struct { + s32 nleafs; /* 4: number of tree leafs */ + s32 l2nleafs; /* 4: l2 number of tree leafs */ + s32 leafidx; /* 4: index of first tree leaf */ + s32 height; /* 4: height of the tree */ + s8 budmin; /* 1: min l2 tree leaf value to combine */ + s8 stree[TREESIZE]; /* TREESIZE: tree */ + u8 pad[2]; /* 2: pad to word boundary */ +} dmaptree_t; /* - 360 - */ + +/* + * dmap page per 8K blocks bitmap + */ +typedef struct { + s32 nblocks; /* 4: num blks covered by this dmap */ + s32 nfree; /* 4: num of free blks in this dmap */ + s64 start; /* 8: starting blkno for this dmap */ + dmaptree_t tree; /* 360: dmap tree */ + u8 pad[1672]; /* 1672: pad to 2048 bytes */ + u32 wmap[LPERDMAP]; /* 1024: bits of the working map */ + u32 pmap[LPERDMAP]; /* 1024: bits of the persistent map */ +} dmap_t; /* - 4096 - */ + +/* + * disk map control page per level. + * + * dmapctl_t must be consistent with dmaptree_t. + */ +typedef struct { + s32 nleafs; /* 4: number of tree leafs */ + s32 l2nleafs; /* 4: l2 number of tree leafs */ + s32 leafidx; /* 4: index of the first tree leaf */ + s32 height; /* 4: height of tree */ + s8 budmin; /* 1: minimum l2 tree leaf value */ + s8 stree[CTLTREESIZE]; /* CTLTREESIZE: dmapctl tree */ + u8 pad[2714]; /* 2714: pad to 4096 */ +} dmapctl_t; /* - 4096 - */ + +/* + * common definition for dmaptree_t within dmap and dmapctl + */ +typedef union { + dmaptree_t t1; + dmapctl_t t2; +} dmtree_t; + +/* macros for accessing fields within dmtree_t */ +#define dmt_nleafs t1.nleafs +#define dmt_l2nleafs t1.l2nleafs +#define dmt_leafidx t1.leafidx +#define dmt_height t1.height +#define dmt_budmin t1.budmin +#define dmt_stree t1.stree + +/* + * on-disk aggregate disk allocation map descriptor. + */ +typedef struct { + s64 dn_mapsize; /* 8: number of blocks in aggregate */ + s64 dn_nfree; /* 8: num free blks in aggregate map */ + s32 dn_l2nbperpage; /* 4: number of blks per page */ + s32 dn_numag; /* 4: total number of ags */ + s32 dn_maxlevel; /* 4: number of active ags */ + s32 dn_maxag; /* 4: max active alloc group number */ + s32 dn_agpref; /* 4: preferred alloc group (hint) */ + s32 dn_aglevel; /* 4: dmapctl level holding the AG */ + s32 dn_agheigth; /* 4: height in dmapctl of the AG */ + s32 dn_agwidth; /* 4: width in dmapctl of the AG */ + s32 dn_agstart; /* 4: start tree index at AG height */ + s32 dn_agl2size; /* 4: l2 num of blks per alloc group */ + s64 dn_agfree[MAXAG]; /* 8*MAXAG: per AG free count */ + s64 dn_agsize; /* 8: num of blks per alloc group */ + s8 dn_maxfreebud; /* 1: max free buddy system */ + u8 pad[3007]; /* 3007: pad to 4096 */ +} dbmap_t; /* - 4096 - */ + +/* + * in-memory aggregate disk allocation map descriptor. + */ +typedef struct bmap { + dbmap_t db_bmap; /* on-disk aggregate map descriptor */ + struct inode *db_ipbmap; /* ptr to aggregate map incore inode */ + struct semaphore db_bmaplock; /* aggregate map lock */ + u32 *db_DBmap; +} bmap_t; + +/* macros for accessing fields within in-memory aggregate map descriptor */ +#define db_mapsize db_bmap.dn_mapsize +#define db_nfree db_bmap.dn_nfree +#define db_agfree db_bmap.dn_agfree +#define db_agsize db_bmap.dn_agsize +#define db_agl2size db_bmap.dn_agl2size +#define db_agwidth db_bmap.dn_agwidth +#define db_agheigth db_bmap.dn_agheigth +#define db_agstart db_bmap.dn_agstart +#define db_numag db_bmap.dn_numag +#define db_maxlevel db_bmap.dn_maxlevel +#define db_aglevel db_bmap.dn_aglevel +#define db_agpref db_bmap.dn_agpref +#define db_maxag db_bmap.dn_maxag +#define db_maxfreebud db_bmap.dn_maxfreebud +#define db_l2nbperpage db_bmap.dn_l2nbperpage + +/* + * macros for various conversions needed by the allocators. + * blkstol2(), cntlz(), and cnttz() are operating system dependent functions. + */ +/* convert number of blocks to log2 number of blocks, rounding up to + * the next log2 value if blocks is not a l2 multiple. + */ +#define BLKSTOL2(d) (blkstol2(d)) + +/* convert number of leafs to log2 leaf value */ +#define NLSTOL2BSZ(n) (31 - cntlz((n)) + BUDMIN) + +/* convert leaf index to log2 leaf value */ +#define LITOL2BSZ(n,m,b) ((((n) == 0) ? (m) : cnttz((n))) + (b)) + +/* convert a block number to a dmap control leaf index */ +#define BLKTOCTLLEAF(b,m) \ + (((b) & (((s64)1 << ((m) + L2LPERCTL)) - 1)) >> (m)) + +/* convert log2 leaf value to buddy size */ +#define BUDSIZE(s,m) (1 << ((s) - (m))) + +/* + * external references. + */ +extern int dbMount(struct inode *ipbmap); + +extern int dbUnmount(struct inode *ipbmap, int mounterror); + +extern int dbFree(struct inode *ipbmap, s64 blkno, s64 nblocks); + +extern int dbUpdatePMap(struct inode *ipbmap, + int free, s64 blkno, s64 nblocks, tblock_t * tblk); + +extern int dbNextAG(struct inode *ipbmap); + +extern int dbAlloc(struct inode *ipbmap, s64 hint, s64 nblocks, s64 * results); + +extern int dbAllocExact(struct inode *ip, s64 blkno, int nblocks); + +extern int dbReAlloc(struct inode *ipbmap, + s64 blkno, s64 nblocks, s64 addnblocks, s64 * results); + +extern int dbSync(struct inode *ipbmap); +extern int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks); +extern int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks); +extern void dbFinalizeBmap(struct inode *ipbmap); +extern s64 dbMapFileSizeToMapSize(struct inode *ipbmap); +#endif /* _H_JFS_DMAP */ diff -uNr --exclude=CVS ../kernel.org/linux/include/linux/jfs/jfs_dtree.h linuxppc64_2_4/include/linux/jfs/jfs_dtree.h --- ../kernel.org/linux/include/linux/jfs/jfs_dtree.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/linux/jfs/jfs_dtree.h Wed Nov 14 10:19:36 2001 @@ -0,0 +1,284 @@ +/* + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* + * Change History : + * + */ + +#ifndef _H_JFS_DTREE +#define _H_JFS_DTREE + +/* + * jfs_dtree.h: directory B+-tree manager + */ + +#include + +typedef union { + struct { + int tid; + struct inode *ip; + u32 ino; + } leaf; + pxd_t xd; +} ddata_t; + + +/* + * entry segment/slot + * + * an entry consists of type dependent head/only segment/slot and + * additional segments/slots linked vi next field; + * N.B. last/only segment of entry is terminated by next = -1; + */ +/* + * directory page slot + */ +typedef struct { + s8 next; /* 1: */ + s8 cnt; /* 1: */ + wchar_t name[15]; /* 30: */ +} dtslot_t; /* (32) */ + + +#define DATASLOTSIZE 16 +#define L2DATASLOTSIZE 4 +#define DTSLOTSIZE 32 +#define L2DTSLOTSIZE 5 +#define DTSLOTHDRSIZE 2 +#define DTSLOTDATASIZE 30 +#define DTSLOTDATALEN 15 + +/* + * internal node entry head/only segment + */ +typedef struct { + pxd_t xd; /* 8: child extent descriptor */ + + s8 next; /* 1: */ + u8 namlen; /* 1: */ + wchar_t name[11]; /* 22: 2-byte aligned */ +} idtentry_t; /* (32) */ + +#define DTIHDRSIZE 10 +#define DTIHDRDATALEN 11 + +/* compute number of slots for entry */ +#define NDTINTERNAL(klen) ( ((4 + (klen)) + (15 - 1)) / 15 ) + + +/* + * leaf node entry head/only segment + * + * For legacy filesystems, name contains 13 wchars -- no index field + */ +typedef struct { + u32 inumber; /* 4: 4-byte aligned */ + s8 next; /* 1: */ + u8 namlen; /* 1: */ + wchar_t name[11]; /* 22: 2-byte aligned */ + u32 index; /* 4: index into dir_table */ +} ldtentry_t; /* (32) */ + +#define DTLHDRSIZE 6 +#define DTLHDRDATALEN_LEGACY 13 /* Old (OS/2) format */ +#define DTLHDRDATALEN 11 + +/* + * dir_table used for directory traversal during readdir + */ + +/* + * Keep persistent index for directory entries + */ +#define DO_INDEX(INODE) (JFS_SBI((INODE)->i_sb)->mntflag & JFS_DIR_INDEX) + +/* + * Maximum entry in inline directory table + */ +#define MAX_INLINE_DIRTABLE_ENTRY 13 + +typedef struct dir_table_slot { + u8 rsrvd; /* 1: */ + u8 flag; /* 1: 0 if free */ + u8 slot; /* 1: slot within leaf page of entry */ + u8 addr1; /* 1: upper 8 bits of leaf page address */ + u32 addr2; /* 4: lower 32 bits of leaf page address -OR- + index of next entry when this entry was deleted */ +} dir_table_slot_t; /* (8) */ + +/* + * flag values + */ +#define DIR_INDEX_VALID 1 +#define DIR_INDEX_FREE 0 + +#define DTSaddress(dir_table_slot, address64)\ +{\ + (dir_table_slot)->addr1 = ((u64)address64) >> 32;\ + (dir_table_slot)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\ +} + +#define addressDTS(dts)\ + ( ((s64)((dts)->addr1)) << 32 | __le32_to_cpu((dts)->addr2) ) + +/* compute number of slots for entry */ +#define NDTLEAF_LEGACY(klen) ( ((2 + (klen)) + (15 - 1)) / 15 ) +#define NDTLEAF NDTINTERNAL + + +/* + * directory root page (in-line in on-disk inode): + * + * cf. dtpage_t below. + */ +typedef union { + struct { + dasd_t DASD; /* 16: DASD limit/usage info F226941 */ + + u8 flag; /* 1: */ + s8 nextindex; /* 1: next free entry in stbl */ + s8 freecnt; /* 1: free count */ + s8 freelist; /* 1: freelist header */ + + u32 idotdot; /* 4: parent inode number */ + + s8 stbl[8]; /* 8: sorted entry index table */ + } header; /* (32) */ + + dtslot_t slot[9]; +} dtroot_t; + +#define PARENT(IP) \ + (le32_to_cpu(JFS_IP(IP)->i_dtroot.header.idotdot)) + +#define DTROOTMAXSLOT 9 + +#define dtEmpty(IP) (JFS_IP(IP)->i_dtroot.header.nextindex == 0) + + +/* + * directory regular page: + * + * entry slot array of 32 byte slot + * + * sorted entry slot index table (stbl): + * contiguous slots at slot specified by stblindex, + * 1-byte per entry + * 512 byte block: 16 entry tbl (1 slot) + * 1024 byte block: 32 entry tbl (1 slot) + * 2048 byte block: 64 entry tbl (2 slot) + * 4096 byte block: 128 entry tbl (4 slot) + * + * data area: + * 512 byte block: 16 - 2 = 14 slot + * 1024 byte block: 32 - 2 = 30 slot + * 2048 byte block: 64 - 3 = 61 slot + * 4096 byte block: 128 - 5 = 123 slot + * + * N.B. index is 0-based; index fields refer to slot index + * except nextindex which refers to entry index in stbl; + * end of entry stot list or freelist is marked with -1. + */ +typedef union { + struct { + s64 next; /* 8: next sibling */ + s64 prev; /* 8: previous sibling */ + + u8 flag; /* 1: */ + s8 nextindex; /* 1: next entry index in stbl */ + s8 freecnt; /* 1: */ + s8 freelist; /* 1: slot index of head of freelist */ + + u8 maxslot; /* 1: number of slots in page slot[] */ + s8 stblindex; /* 1: slot index of start of stbl */ + u8 rsrvd[2]; /* 2: */ + + pxd_t self; /* 8: self pxd */ + } header; /* (32) */ + + dtslot_t slot[128]; +} dtpage_t; + +#define DTPAGEMAXSLOT 128 + +#define DT8THPGNODEBYTES 512 +#define DT8THPGNODETSLOTS 1 +#define DT8THPGNODESLOTS 16 + +#define DTQTRPGNODEBYTES 1024 +#define DTQTRPGNODETSLOTS 1 +#define DTQTRPGNODESLOTS 32 + +#define DTHALFPGNODEBYTES 2048 +#define DTHALFPGNODETSLOTS 2 +#define DTHALFPGNODESLOTS 64 + +#define DTFULLPGNODEBYTES 4096 +#define DTFULLPGNODETSLOTS 4 +#define DTFULLPGNODESLOTS 128 + +#define DTENTRYSTART 1 + +/* get sorted entry table of the page */ +#define DT_GETSTBL(p) ( ((p)->header.flag & BT_ROOT) ?\ + ((dtroot_t *)(p))->header.stbl : \ + (s8 *)&(p)->slot[(p)->header.stblindex] ) + +/* + * Flags for dtSearch + */ +#define JFS_CREATE 1 +#define JFS_LOOKUP 2 +#define JFS_REMOVE 3 +#define JFS_RENAME 4 + +#define DIRENTSIZ(namlen) \ + ( (sizeof(struct dirent) - 2*(JFS_NAME_MAX+1) + 2*((namlen)+1) + 3) &~ 3 ) + + +/* + * external declarations + */ +extern void dtInitRoot(int tid, struct inode *ip, u32 idotdot); + +extern int dtSearch(struct inode *ip, component_t * key, + ino_t * data, btstack_t * btstack, int flag); + +extern int dtInsert(int tid, struct inode *ip, + component_t * key, ino_t * ino, btstack_t * btstack); + +extern int dtDelete(int tid, + struct inode *ip, component_t * key, ino_t * data, int flag); + +extern int dtRelocate(int tid, + struct inode *ip, s64 lmxaddr, pxd_t * opxd, s64 nxaddr); + +extern int dtModify(int tid, struct inode *ip, + component_t * key, ino_t * orig_ino, ino_t new_ino, int flag); + +extern int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir); + +#ifdef _JFS_DEBUG_DTREE +extern int dtDisplayTree(struct inode *ip); + +extern int dtDisplayPage(struct inode *ip, s64 bn, dtpage_t * p); +#endif /* _JFS_DEBUG_DTREE */ + +#endif /* !_H_JFS_DTREE */ diff -uNr --exclude=CVS ../kernel.org/linux/include/linux/jfs/jfs_extendfs.h linuxppc64_2_4/include/linux/jfs/jfs_extendfs.h --- ../kernel.org/linux/include/linux/jfs/jfs_extendfs.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/linux/jfs/jfs_extendfs.h Wed Nov 14 10:19:36 2001 @@ -0,0 +1,39 @@ +/* + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef _H_JFS_EXTENDFS +#define _H_JFS_EXTENDFS + +/* + * jfs_extendfs.h + */ +/* + * extendfs parameter list + */ +typedef struct { + u32 flag; /* 4: */ + u8 dev; /* 1: */ + u8 pad[3]; /* 3: */ + s64 LVSize; /* 8: LV size in LV block */ + s64 FSSize; /* 8: FS size in LV block */ + s32 LogSize; /* 4: inlinelog size in LV block */ +} extendfs_t; /* (28) */ + +/* plist flag */ +#define EXTENDFS_QUERY 0x00000001 + +#endif /* _H_JFS_EXTENDFS */ diff -uNr --exclude=CVS ../kernel.org/linux/include/linux/jfs/jfs_extent.h linuxppc64_2_4/include/linux/jfs/jfs_extent.h --- ../kernel.org/linux/include/linux/jfs/jfs_extent.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/linux/jfs/jfs_extent.h Wed Nov 14 10:19:36 2001 @@ -0,0 +1,31 @@ +/* + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef _H_JFS_EXTENT +#define _H_JFS_EXTENT + +/* get block allocation allocation hint as location of disk inode */ +#define INOHINT(ip) \ + (addressPXD(&(JFS_IP(ip)->ixpxd)) + lengthPXD(&(JFS_IP(ip)->ixpxd)) - 1) + +extern int extAlloc(struct inode *, s64, s64, xad_t *, boolean_t); +extern int extFill(struct inode *, xad_t *); +extern int extHint(struct inode *, s64, xad_t *); +extern int extRealloc(struct inode *, s64, xad_t *, boolean_t); +extern int extRecord(struct inode *, xad_t *); + +#endif /* _H_JFS_EXTENT */ diff -uNr --exclude=CVS ../kernel.org/linux/include/linux/jfs/jfs_filsys.h linuxppc64_2_4/include/linux/jfs/jfs_filsys.h --- ../kernel.org/linux/include/linux/jfs/jfs_filsys.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/linux/jfs/jfs_filsys.h Wed Nov 14 10:19:36 2001 @@ -0,0 +1,274 @@ +/* + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * +*/ + +#ifndef _H_JFS_FILSYS +#define _H_JFS_FILSYS + +/* + * jfs_filsys.h + * + * file system (implementation-dependent) constants + * + * refer to for system wide implementation-dependent constants + */ + +/* + * file system option (superblock flag) + */ +/* platform option (conditional compilation) */ +#define JFS_AIX 0x80000000 /* AIX support */ +/* POSIX name/directory support */ + +#define JFS_OS2 0x40000000 /* OS/2 support */ +/* case-insensitive name/directory support */ + +#define JFS_DFS 0x20000000 /* DCE DFS LFS support */ + +#define JFS_LINUX 0x10000000 /* Linux support */ +/* case-sensitive name/directory support */ + +/* directory option */ +#define JFS_UNICODE 0x00000001 /* unicode name */ + +/* commit option */ +#define JFS_COMMIT 0x00000f00 /* commit option mask */ +#define JFS_GROUPCOMMIT 0x00000100 /* group (of 1) commit */ +#define JFS_LAZYCOMMIT 0x00000200 /* lazy commit */ +#define JFS_TMPFS 0x00000400 /* temporary file system - + * do not log/commit: + */ + +/* log logical volume option */ +#define JFS_INLINELOG 0x00000800 /* inline log within file system */ +#define JFS_INLINEMOVE 0x00001000 /* inline log being moved */ + +/* Secondary aggregate inode table */ +#define JFS_BAD_SAIT 0x00010000 /* current secondary ait is bad */ + +/* sparse regular file support */ +#define JFS_SPARSE 0x00020000 /* sparse regular file */ + +/* DASD Limits F226941 */ +#define JFS_DASD_ENABLED 0x00040000 /* DASD limits enabled */ +#define JFS_DASD_PRIME 0x00080000 /* Prime DASD usage on boot */ + +/* big endian flag */ +#define JFS_SWAP_BYTES 0x00100000 /* running on big endian computer */ + +/* Directory index */ +#define JFS_DIR_INDEX 0x00200000 /* Persistant index for */ + /* directory entries */ + + +/* + * buffer cache configuration + */ +/* page size */ +#ifdef PSIZE +#undef PSIZE +#endif +#define PSIZE 4096 /* page size (in byte) */ +#define L2PSIZE 12 /* log2(PSIZE) */ +#define POFFSET 4095 /* offset within page */ + +/* buffer page size */ +#define BPSIZE PSIZE + +/* + * fs fundamental size + * + * PSIZE >= file system block size >= PBSIZE >= DISIZE + */ +#define PBSIZE 512 /* physical block size (in byte) */ +#define L2PBSIZE 9 /* log2(PBSIZE) */ + +#define DISIZE 512 /* on-disk inode size (in byte) */ +#define L2DISIZE 9 /* log2(DISIZE) */ + +#define IDATASIZE 256 /* inode inline data size */ +#define IXATTRSIZE 128 /* inode inline extended attribute size */ + +#define XTPAGE_SIZE 4096 +#define log2_PAGESIZE 12 + +#define IAG_SIZE 4096 +#define IAG_EXTENT_SIZE 4096 +#define INOSPERIAG 4096 /* number of disk inodes per iag */ +#define L2INOSPERIAG 12 /* l2 number of disk inodes per iag */ +#define INOSPEREXT 32 /* number of disk inode per extent */ +#define L2INOSPEREXT 5 /* l2 number of disk inode per extent */ +#define IXSIZE (DISIZE * INOSPEREXT) /* inode extent size */ +#define INOSPERPAGE 8 /* number of disk inodes per 4K page */ +#define L2INOSPERPAGE 3 /* log2(INOSPERPAGE) */ + +#define IAGFREELIST_LWM 64 + +#define INODE_EXTENT_SIZE IXSIZE /* inode extent size */ +#define NUM_INODE_PER_EXTENT INOSPEREXT +#define NUM_INODE_PER_IAG INOSPERIAG + +#define MINBLOCKSIZE 512 +#define MAXBLOCKSIZE 4096 +#define MAXFILESIZE ((s64)1 << 52) + +#define JFS_LINK_MAX 65535 /* nlink_t is unsigned short */ + +/* Minimum number of bytes supported for a JFS partition */ +#define MINJFS (0x1000000) +#define MINJFSTEXT "16" + +/* + * file system block size -> physical block size + */ +#define LBOFFSET(x) ((x) & (PBSIZE - 1)) +#define LBNUMBER(x) ((x) >> L2PBSIZE) +#define LBLK2PBLK(sb,b) ((b) << (sb->s_blocksize_bits - L2PBSIZE)) +#define PBLK2LBLK(sb,b) ((b) >> (sb->s_blocksize_bits - L2PBSIZE)) +/* size in byte -> last page number */ +#define SIZE2PN(size) ( ((s64)((size) - 1)) >> (L2PSIZE) ) +/* size in byte -> last file system block number */ +#define SIZE2BN(size, l2bsize) ( ((s64)((size) - 1)) >> (l2bsize) ) + +/* + * fixed physical block address (physical block size = 512 byte) + * + * NOTE: since we can't guarantee a physical block size of 512 bytes the use of + * these macros should be removed and the byte offset macros used instead. + */ +#define SUPER1_B 64 /* primary superblock */ +#define AIMAP_B (SUPER1_B + 8) /* 1st extent of aggregate inode map */ +#define AITBL_B (AIMAP_B + 16) /* + * 1st extent of aggregate inode table + */ +#define SUPER2_B (AITBL_B + 32) /* 2ndary superblock pbn */ +#define BMAP_B (SUPER2_B + 8) /* block allocation map */ + +/* + * SIZE_OF_SUPER defines the total amount of space reserved on disk for the + * superblock. This is not the same as the superblock structure, since all of + * this space is not currently being used. + */ +#define SIZE_OF_SUPER PSIZE + +/* + * SIZE_OF_AG_TABLE defines the amount of space reserved to hold the AG table + */ +#define SIZE_OF_AG_TABLE PSIZE + +/* + * SIZE_OF_MAP_PAGE defines the amount of disk space reserved for each page of + * the inode allocation map (to hold iag) + */ +#define SIZE_OF_MAP_PAGE PSIZE + +/* + * fixed byte offset address + */ +#define SUPER1_OFF 0x8000 /* primary superblock */ +#define AIMAP_OFF (SUPER1_OFF + SIZE_OF_SUPER) + /* + * Control page of aggregate inode map + * followed by 1st extent of map + */ +#define AITBL_OFF (AIMAP_OFF + (SIZE_OF_MAP_PAGE << 1)) + /* + * 1st extent of aggregate inode table + */ +#define SUPER2_OFF (AITBL_OFF + INODE_EXTENT_SIZE) + /* + * secondary superblock + */ +#define BMAP_OFF (SUPER2_OFF + SIZE_OF_SUPER) + /* + * block allocation map + */ + +/* + * The following macro is used to indicate the number of reserved disk blocks at + * the front of an aggregate, in terms of physical blocks. This value is + * currently defined to be 32K. This turns out to be the same as the primary + * superblock's address, since it directly follows the reserved blocks. + */ +#define AGGR_RSVD_BLOCKS SUPER1_B + +/* + * The following macro is used to indicate the number of reserved bytes at the + * front of an aggregate. This value is currently defined to be 32K. This + * turns out to be the same as the primary superblock's byte offset, since it + * directly follows the reserved blocks. + */ +#define AGGR_RSVD_BYTES SUPER1_OFF + +/* + * The following macro defines the byte offset for the first inode extent in + * the aggregate inode table. This allows us to find the self inode to find the + * rest of the table. Currently this value is 44K. + */ +#define AGGR_INODE_TABLE_START AITBL_OFF + +/* + * fixed reserved inode number + */ +/* aggregate inode */ +#define AGGR_RESERVED_I 0 /* aggregate inode (reserved) */ +#define AGGREGATE_I 1 /* aggregate inode map inode */ +#define BMAP_I 2 /* aggregate block allocation map inode */ +#define LOG_I 3 /* aggregate inline log inode */ +#define BADBLOCK_I 4 /* aggregate bad block inode */ +#define FILESYSTEM_I 16 /* 1st/only fileset inode in ait: + * fileset inode map inode + */ + +/* per fileset inode */ +#define FILESET_RSVD_I 0 /* fileset inode (reserved) */ +#define FILESET_EXT_I 1 /* fileset inode extension */ +#define ROOT_I 2 /* fileset root inode */ +#define ACL_I 3 /* fileset ACL inode */ + +#define FILESET_OBJECT_I 4 /* the first fileset inode available for a file + * or directory or link... + */ +#define FIRST_FILESET_INO 16 /* the first aggregate inode which describes + * an inode. (To fsck this is also the first + * inode in part 2 of the agg inode table.) + */ + +/* + * directory configuration + */ +#define JFS_NAME_MAX 255 +#define JFS_PATH_MAX BPSIZE + + +/* + * file system state (superblock state) + */ +#define FM_CLEAN 0x00000000 /* file system is unmounted and clean */ +#define FM_MOUNT 0x00000001 /* file system is mounted cleanly */ +#define FM_DIRTY 0x00000002 /* file system was not unmounted and clean + * when mounted or + * commit failure occurred while being mounted: + * fsck() must be run to repair + */ +#define FM_LOGREDO 0x00000004 /* log based recovery (logredo()) failed: + * fsck() must be run to repair + */ +#define FM_EXTENDFS 0x00000008 /* file system extendfs() in progress */ + +#endif /* _H_JFS_FILSYS */ diff -uNr --exclude=CVS ../kernel.org/linux/include/linux/jfs/jfs_imap.h linuxppc64_2_4/include/linux/jfs/jfs_imap.h --- ../kernel.org/linux/include/linux/jfs/jfs_imap.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/linux/jfs/jfs_imap.h Wed Nov 14 10:19:36 2001 @@ -0,0 +1,161 @@ +/* + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef _H_JFS_IMAP +#define _H_JFS_IMAP + +#include + +/* + * jfs_imap.h: disk inode manager + */ + +#define EXTSPERIAG 128 /* number of disk inode extent per iag */ +#define IMAPBLKNO 0 /* lblkno of dinomap within inode map */ +#define SMAPSZ 4 /* number of words per summary map */ +#define EXTSPERSUM 32 /* number of extents per summary map entry */ +#define L2EXTSPERSUM 5 /* l2 number of extents per summary map */ +#define PGSPERIEXT 4 /* number of 4K pages per dinode extent */ +#define MAXIAGS ((1<<20)-1) /* maximum number of iags */ +#define MAXAG 128 /* maximum number of allocation groups */ + +#define AMAPSIZE 512 /* bytes in the IAG allocation maps */ +#define SMAPSIZE 16 /* bytes in the IAG summary maps */ + +/* convert inode number to iag number */ +#define INOTOIAG(ino) ((ino) >> L2INOSPERIAG) + +/* convert iag number to logical block number of the iag page */ +#define IAGTOLBLK(iagno,l2nbperpg) (((iagno) + 1) << (l2nbperpg)) + +/* get the starting block number of the 4K page of an inode extent + * that contains ino. + */ +#define INOPBLK(pxd,ino,l2nbperpg) (addressPXD((pxd)) + \ + ((((ino) & (INOSPEREXT-1)) >> L2INOSPERPAGE) << (l2nbperpg))) + +/* + * inode allocation map: + * + * inode allocation map consists of + * . the inode map control page and + * . inode allocation group pages (per 4096 inodes) + * which are addressed by standard JFS xtree. + */ +/* + * inode allocation group page (per 4096 inodes of an AG) + */ +typedef struct { + s64 agstart; /* 8: starting block of ag */ + s32 iagnum; /* 4: inode allocation group number */ + s32 inofreefwd; /* 4: ag inode free list forward */ + s32 inofreeback; /* 4: ag inode free list back */ + s32 extfreefwd; /* 4: ag inode extent free list forward */ + s32 extfreeback; /* 4: ag inode extent free list back */ + s32 iagfree; /* 4: iag free list */ + + /* summary map: 1 bit per inode extent */ + s32 inosmap[SMAPSZ]; /* 16: sum map of mapwords w/ free inodes; + * note: this indicates free and backed + * inodes, if the extent is not backed the + * value will be 1. if the extent is + * backed but all inodes are being used the + * value will be 1. if the extent is + * backed but at least one of the inodes is + * free the value will be 0. + */ + s32 extsmap[SMAPSZ]; /* 16: sum map of mapwords w/ free extents */ + s32 nfreeinos; /* 4: number of free inodes */ + s32 nfreeexts; /* 4: number of free extents */ + /* (72) */ + u8 pad[1976]; /* 1976: pad to 2048 bytes */ + /* allocation bit map: 1 bit per inode (0 - free, 1 - allocated) */ + u32 wmap[EXTSPERIAG]; /* 512: working allocation map */ + u32 pmap[EXTSPERIAG]; /* 512: persistent allocation map */ + pxd_t inoext[EXTSPERIAG]; /* 1024: inode extent addresses */ +} iag_t; /* (4096) */ + +/* + * per AG control information (in inode map control page) + */ +typedef struct { + s32 inofree; /* 4: free inode list anchor */ + s32 extfree; /* 4: free extent list anchor */ + s32 numinos; /* 4: number of backed inodes */ + s32 numfree; /* 4: number of free inodes */ +} iagctl_t; /* (16) */ + +/* + * per fileset/aggregate inode map control page + */ +typedef struct { + s32 in_freeiag; /* 4: free iag list anchor */ + s32 in_nextiag; /* 4: next free iag number */ + s32 in_numinos; /* 4: num of backed inodes */ + s32 in_numfree; /* 4: num of free backed inodes */ + s32 in_nbperiext; /* 4: num of blocks per inode extent */ + s32 in_l2nbperiext; /* 4: l2 of in_nbperiext */ + s32 in_diskblock; /* 4: for standalone test driver */ + s32 in_maxag; /* 4: for standalone test driver */ + u8 pad[2016]; /* 2016: pad to 2048 */ + iagctl_t in_agctl[MAXAG]; /* 2048: AG control information */ +} dinomap_t; /* (4096) */ + + +/* + * In-core inode map control page + */ +typedef struct inomap { + dinomap_t im_imap; /* 4096: inode allocation control */ + struct inode *im_ipimap; /* 4: ptr to inode for imap */ + struct semaphore im_freelock; /* 4: iag free list lock */ + struct semaphore im_aglock[MAXAG]; /* 512: per AG locks */ + u32 *im_DBGdimap; + atomic_t im_numinos; /* num of backed inodes */ + atomic_t im_numfree; /* num of free backed inodes */ +} imap_t; + +#define im_freeiag im_imap.in_freeiag +#define im_nextiag im_imap.in_nextiag +#define im_agctl im_imap.in_agctl +#define im_nbperiext im_imap.in_nbperiext +#define im_l2nbperiext im_imap.in_l2nbperiext + +/* for standalone testdriver + */ +#define im_diskblock im_imap.in_diskblock +#define im_maxag im_imap.in_maxag + +extern int diFree(struct inode *); +extern int diAlloc(struct inode *, boolean_t, struct inode *); +extern int diSync(struct inode *); +/* external references */ +extern int diUpdatePMap(struct inode *ipimap, unsigned long inum, + boolean_t is_free, tblock_t * tblk); +#ifdef _STILL_TO_PORT +extern int diExtendFS(inode_t * ipimap, inode_t * ipbmap); +#endif /* _STILL_TO_PORT */ + +extern int diMount(struct inode *); +extern int diUnmount(struct inode *, int); +extern int diRead(struct inode *); +extern void diClearExtension(struct inode *); +extern struct inode *diReadSpecial(struct super_block *, ino_t); +extern void diWriteSpecial(struct inode *); +extern void diFreeSpecial(struct inode *); +extern int diWrite(int tid, struct inode *); +#endif /* _H_JFS_IMAP */ diff -uNr --exclude=CVS ../kernel.org/linux/include/linux/jfs/jfs_incore.h linuxppc64_2_4/include/linux/jfs/jfs_incore.h --- ../kernel.org/linux/include/linux/jfs/jfs_incore.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/linux/jfs/jfs_incore.h Wed Nov 14 10:22:29 2001 @@ -0,0 +1,144 @@ +/* + * + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * +*/ +#ifndef _H_JFS_INCORE +#define _H_JFS_INCORE + +#include +#include +#include +#include + +/* + * JFS magic number + */ +#define JFS_SUPER_MAGIC 0x3153464a /* "JFS1" */ + +/* + * Due to header ordering problems this can't be in jfs_lock.h + */ +typedef struct jfs_rwlock { + struct rw_semaphore rw_sem; + atomic_t in_use; /* for hacked implementation of trylock */ +} jfs_rwlock_t; + +/* + * JFS-private inode information + */ +struct jfs_inode_info { + int fileset; /* 4: fileset number (always 16)*/ + uint mode2; /* 4: jfs-specific mode */ + pxd_t ixpxd; /* 8: inode extent descriptor */ + dxd_t acl; /* 16: dxd describing acl */ + dxd_t ea; /* 16: dxd describing ea */ + time_t otime; /* 4: time created */ + uint next_index; /* 4: next available directory entry index */ + int acltype; /* 4: Type of ACL */ + short btorder; /* 2: access order */ + short btindex; /* 2: btpage entry index*/ + struct inode *ipimap; /* 4: inode map */ + long cflag; /* 4: commit flags */ + ushort bxflag; /* 2: xflag of pseudo buffer? */ + short blid; /* 2: lid of pseudo buffer? */ + ushort atlhead; /* 2: anonymous tlock list head */ + ushort atltail; /* 2: anonymous tlock list tail */ + struct inode *atlnext; /* 4: next inode w/anonymous txn's */ + struct inode *atlprev; /* 4: previous inode w/anonymous txn's */ + jfs_rwlock_t rdwrlock; /* 12/20: read/write lock */ + ushort xtlid; /* 2: lid of xtree lock on directory */ + unchar agno; /* 1: ag number */ + unchar pad; /* 1: pad */ + union { + struct { + xtpage_t _xtroot; /* 288: xtree root */ + struct inomap *_imap; /* 4: inode map header */ + } file; + struct { + dir_table_slot_t _table[12]; /* 96: directory index */ + dtroot_t _dtroot; /* 288: dtree root */ + } dir; + struct { + unchar _unused[16]; /* 16: */ + dxd_t _dxd; /* 16: */ + unchar _inline[128]; /* 128: inline symlink */ + } link; + } u; +}; +#define i_xtroot u.file._xtroot +#define i_imap u.file._imap +#define i_dirtable u.dir._table +#define i_dtroot u.dir._dtroot +#define i_inline u.link._inline + +/* + * cflag + */ +enum cflags { + COMMIT_New, /* never committed inode */ + COMMIT_Nolink, /* inode committed with zero link count */ + COMMIT_Inlineea, /* commit inode inline EA */ + COMMIT_Freewmap, /* free WMAP at iClose() */ + COMMIT_Dirty, /* Inode is really dirty */ + COMMIT_Holdlock, /* Hold the IWRITE_LOCK until commit is done */ + COMMIT_Dirtable, /* commit changes to di_dirtable */ + COMMIT_Stale, /* data extent is no longer valid */ + COMMIT_Synclist, /* metadata pages on group commit synclist */ +}; + +#define set_cflag(flag, ip) set_bit(flag, &(JFS_IP(ip)->cflag)) +#define clear_cflag(flag, ip) clear_bit(flag, &(JFS_IP(ip)->cflag)) +#define test_cflag(flag, ip) test_bit(flag, &(JFS_IP(ip)->cflag)) +#define test_and_clear_cflag(flag, ip) \ + test_and_clear_bit(flag, &(JFS_IP(ip)->cflag)) +/* + * JFS-private superblock information. + */ +struct jfs_sb_info { + unsigned long mntflag; /* 4: aggregate attributes */ + struct inode *ipbmap; /* 4: block map inode */ + struct inode *ipaimap; /* 4: aggregate inode map inode */ + struct inode *ipaimap2; /* 4: secondary aimap inode */ + struct inode *ipimap; /* 4: aggregate inode map inode */ + struct jfs_log *log; /* 4: log */ + short bsize; /* 2: logical block size */ + short l2bsize; /* 2: log2 logical block size */ + short nbperpage; /* 2: blocks per page */ + short l2nbperpage; /* 2: log2 blocks per page */ + short l2niperblk; /* 2: log2 inodes per page */ + short reserved; /* 2: log2 inodes per page */ + pxd_t logpxd; /* 8: pxd describing log */ + pxd_t ait2; /* 8: pxd describing AIT copy */ + /* Formerly in ipimap */ + uint gengen; /* 4: inode generation generator*/ + uint inostamp; /* 4: shows inode belongs to fileset*/ + + /* Formerly in ipbmap */ + struct bmap *bmap; /* 4: incore bmap descriptor */ + struct nls_table *nls_tab; /* 4: current codepage */ + struct inode *direct_inode; /* 4: inode for physical I/O */ + struct address_space *direct_mapping; /* 4: mapping for physical I/O */ + uint state; /* 4: mount/recovery state */ +}; + +#define JFS_IP(ip) ((struct jfs_inode_info *)(ip)->u.generic_ip) +#define JFS_SBI(sb) ((struct jfs_sb_info *)(sb)->u.generic_sbp) + +#define isReadOnly(ip) ((JFS_SBI((ip)->i_sb)->log) ? 0 : 1) + +#endif /* _H_JFS_INCORE */ diff -uNr --exclude=CVS ../kernel.org/linux/include/linux/jfs/jfs_inode.h linuxppc64_2_4/include/linux/jfs/jfs_inode.h --- ../kernel.org/linux/include/linux/jfs/jfs_inode.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/linux/jfs/jfs_inode.h Wed Nov 14 10:19:36 2001 @@ -0,0 +1,23 @@ +/* + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef _H_JFS_INODE +#define _H_JFS_INODE + +extern struct inode *ialloc(struct inode *, umode_t); + +#endif /* _H_JFS_INODE */ diff -uNr --exclude=CVS ../kernel.org/linux/include/linux/jfs/jfs_lock.h linuxppc64_2_4/include/linux/jfs/jfs_lock.h --- ../kernel.org/linux/include/linux/jfs/jfs_lock.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/linux/jfs/jfs_lock.h Wed Nov 14 10:19:36 2001 @@ -0,0 +1,106 @@ +/* + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef _H_JFS_LOCK +#define _H_JFS_LOCK + +#include +#include + +/* + * jfs_lock.h + * + * JFS lock definition for globally referenced locks + */ + +/* readers/writer lock: thread-thread */ + +/* + * RW semaphores do not currently have a trylock function. Since the + * implementation varies by platform, I have implemented a platform-independent + * wrapper around the rw_semaphore routines. If this turns out to be the best + * way of avoiding our locking problems, I will push to get a trylock + * implemented in the kernel, but I'd rather find a way to avoid having to + * use it. + */ +#define RDWRLOCK_T jfs_rwlock_t +static inline void RDWRLOCK_INIT(jfs_rwlock_t * Lock) +{ + init_rwsem(&Lock->rw_sem); + atomic_set(&Lock->in_use, 0); +} +static inline void READ_LOCK(jfs_rwlock_t * Lock) +{ + atomic_inc(&Lock->in_use); + down_read(&Lock->rw_sem); +} +static inline void READ_UNLOCK(jfs_rwlock_t * Lock) +{ + up_read(&Lock->rw_sem); + atomic_dec(&Lock->in_use); +} +static inline void WRITE_LOCK(jfs_rwlock_t * Lock) +{ + atomic_inc(&Lock->in_use); + down_write(&Lock->rw_sem); +} + +static inline int WRITE_TRYLOCK(jfs_rwlock_t * Lock) +{ + if (atomic_read(&Lock->in_use)) + return 0; + WRITE_LOCK(Lock); + return 1; +} +static inline void WRITE_UNLOCK(jfs_rwlock_t * Lock) +{ + up_write(&Lock->rw_sem); + atomic_dec(&Lock->in_use); +} + +#define IREAD_LOCK(ip) READ_LOCK(&JFS_IP(ip)->rdwrlock) +#define IREAD_UNLOCK(ip) READ_UNLOCK(&JFS_IP(ip)->rdwrlock) +#define IWRITE_LOCK(ip) WRITE_LOCK(&JFS_IP(ip)->rdwrlock) +#define IWRITE_TRYLOCK(ip) WRITE_TRYLOCK(&JFS_IP(ip)->rdwrlock) +#define IWRITE_UNLOCK(ip) WRITE_UNLOCK(&JFS_IP(ip)->rdwrlock) +#define IWRITE_LOCK_LIST iwritelocklist + +extern void iwritelocklist(int, ...); + +/* + * Conditional sleep where condition is protected by spinlock + * + * lock_cmd and unlock_cmd take and release the spinlock + */ +#define __SLEEP_COND(wq, cond, lock_cmd, unlock_cmd) \ +do { \ + DECLARE_WAITQUEUE(__wait, current); \ + \ + add_wait_queue(&wq, &__wait); \ + for (;;) { \ + set_current_state(TASK_UNINTERRUPTIBLE);\ + if (cond) \ + break; \ + unlock_cmd; \ + schedule(); \ + lock_cmd; \ + } \ + current->state = TASK_RUNNING; \ + remove_wait_queue(&wq, &__wait); \ +} while (0) + +#endif /* _H_JFS_LOCK */ diff -uNr --exclude=CVS ../kernel.org/linux/include/linux/jfs/jfs_logmgr.h linuxppc64_2_4/include/linux/jfs/jfs_logmgr.h --- ../kernel.org/linux/include/linux/jfs/jfs_logmgr.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/linux/jfs/jfs_logmgr.h Wed Nov 14 10:19:36 2001 @@ -0,0 +1,501 @@ +/* + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _H_JFS_LOGMGR +#define _H_JFS_LOGMGR + + +#include +#include + +/* + * log manager configuration parameters + */ + +/* log page size */ +#define LOGPSIZE 4096 +#define L2LOGPSIZE 12 + +#define LOGPAGES 16 /* Log pages per mounted file system */ + +/* + * log logical volume + * + * a log is used to make the commit operation on journalled + * files within the same logical volume group atomic. + * a log is implemented with a logical volume. + * there is one log per logical volume group. + * + * block 0 of the log logical volume is not used (ipl etc). + * block 1 contains a log "superblock" and is used by logFormat(), + * lmLogInit(), lmLogShutdown(), and logRedo() to record status + * of the log but is not otherwise used during normal processing. + * blocks 2 - (N-1) are used to contain log records. + * + * when a volume group is varied-on-line, logRedo() must have + * been executed before the file systems (logical volumes) in + * the volume group can be mounted. + */ +/* + * log superblock (block 1 of logical volume) + */ +#define LOGSUPER_B 1 +#define LOGSTART_B 2 + +#define LOGMAGIC 0x87654321 +#define LOGVERSION 1 + +typedef struct { + u32 magic; /* 4: log lv identifier */ + s32 version; /* 4: version number */ + s32 serial; /* 4: log open/mount counter */ + s32 size; /* 4: size in number of LOGPSIZE blocks */ + s32 bsize; /* 4: logical block size in byte */ + s32 l2bsize; /* 4: log2 of bsize */ + + u32 flag; /* 4: option */ + u32 state; /* 4: state - see below */ + + s32 end; /* 4: addr of last log record set by logredo */ + u32 active[8]; /* 32: active file systems bit vector */ + s32 rsrvd[LOGPSIZE / 4 - 17]; +} logsuper_t; + +/* log flag: commit option (see jfs_filsys.h) */ + +/* log state */ +#define LOGMOUNT 0 /* log mounted by lmLogInit() */ +#define LOGREDONE 1 /* log shutdown by lmLogShutdown(). + * log redo completed by logredo(). + */ +#define LOGWRAP 2 /* log wrapped */ +#define LOGREADERR 3 /* log read error detected in logredo() */ + + +/* + * log logical page + * + * (this comment should be rewritten !) + * the header and trailer structures (h,t) will normally have + * the same page and eor value. + * An exception to this occurs when a complete page write is not + * accomplished on a power failure. Since the hardware may "split write" + * sectors in the page, any out of order sequence may occur during powerfail + * and needs to be recognized during log replay. The xor value is + * an "exclusive or" of all log words in the page up to eor. This + * 32 bit eor is stored with the top 16 bits in the header and the + * bottom 16 bits in the trailer. logredo can easily recognize pages + * that were not completed by reconstructing this eor and checking + * the log page. + * + * Previous versions of the operating system did not allow split + * writes and detected partially written records in logredo by + * ordering the updates to the header, trailer, and the move of data + * into the logdata area. The order: (1) data is moved (2) header + * is updated (3) trailer is updated. In logredo, when the header + * differed from the trailer, the header and trailer were reconciled + * as follows: if h.page != t.page they were set to the smaller of + * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only) + * h.eor != t.eor they were set to the smaller of their two values. + */ +typedef struct { + struct { /* header */ + s32 page; /* 4: log sequence page number */ + s16 rsrvd; /* 2: */ + s16 eor; /* 2: end-of-log offset of lasrt record write */ + } h; + + s32 data[LOGPSIZE / 4 - 4]; /* log record area */ + + struct { /* trailer */ + s32 page; /* 4: normally the same as h.page */ + s16 rsrvd; /* 2: */ + s16 eor; /* 2: normally the same as h.eor */ + } t; +} logpage_t; + +#define LOGPHDRSIZE 8 /* log page header size */ +#define LOGPTLRSIZE 8 /* log page trailer size */ + + +/* + * log record + * + * (this comment should be rewritten !) + * jfs uses only "after" log records (only a single writer is allowed + * in a page, pages are written to temporary paging space if + * if they must be written to disk before commit, and i/o is + * scheduled for modified pages to their home location after + * the log records containing the after values and the commit + * record is written to the log on disk, undo discards the copy + * in main-memory.) + * + * a log record consists of a data area of variable length followed by + * a descriptor of fixed size LOGRDSIZE bytes. + * the data area is rounded up to an integral number of 4-bytes and + * must be no longer than LOGPSIZE. + * the descriptor is of size of multiple of 4-bytes and aligned on a + * 4-byte boundary. + * records are packed one after the other in the data area of log pages. + * (sometimes a DUMMY record is inserted so that at least one record ends + * on every page or the longest record is placed on at most two pages). + * the field eor in page header/trailer points to the byte following + * the last record on a page. + */ + +/* log record types */ +#define LOG_COMMIT 0x8000 +#define LOG_SYNCPT 0x4000 +#define LOG_MOUNT 0x2000 +#define LOG_REDOPAGE 0x0800 +#define LOG_NOREDOPAGE 0x0080 +#define LOG_NOREDOINOEXT 0x0040 +#define LOG_UPDATEMAP 0x0008 +#define LOG_NOREDOFILE 0x0001 + +/* REDOPAGE/NOREDOPAGE log record data type */ +#define LOG_INODE 0x0001 +#define LOG_XTREE 0x0002 +#define LOG_DTREE 0x0004 +#define LOG_BTROOT 0x0010 +#define LOG_EA 0x0020 +#define LOG_ACL 0x0040 +#define LOG_DATA 0x0080 +#define LOG_NEW 0x0100 +#define LOG_EXTEND 0x0200 +#define LOG_RELOCATE 0x0400 +#define LOG_DIR_XTREE 0x0800 /* Xtree is in directory inode */ + +/* UPDATEMAP log record descriptor type */ +#define LOG_ALLOCXADLIST 0x0080 +#define LOG_ALLOCPXDLIST 0x0040 +#define LOG_ALLOCXAD 0x0020 +#define LOG_ALLOCPXD 0x0010 +#define LOG_FREEXADLIST 0x0008 +#define LOG_FREEPXDLIST 0x0004 +#define LOG_FREEXAD 0x0002 +#define LOG_FREEPXD 0x0001 + + +typedef struct lrd { + /* + * type independent area + */ + s32 logtid; /* 4: log transaction identifier */ + s32 backchain; /* 4: ptr to prev record of same transaction */ + u16 type; /* 2: record type */ + s16 length; /* 2: length of data in record (in byte) */ + s32 aggregate; /* 4: file system lv/aggregate */ + /* (16) */ + + /* + * type dependent area (20) + */ + union { + + /* + * COMMIT: commit + * + * transaction commit: no type-dependent information; + */ + + /* + * REDOPAGE: after-image + * + * apply after-image; + * + * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; + */ + struct { + u32 fileset; /* 4: fileset number */ + u32 inode; /* 4: inode number */ + u16 type; /* 2: REDOPAGE record type */ + s16 l2linesize; /* 2: log2 of line size */ + pxd_t pxd; /* 8: on-disk page pxd */ + } redopage; /* (20) */ + + /* + * NOREDOPAGE: the page is freed + * + * do not apply after-image records which precede this record + * in the log with the same page block number to this page. + * + * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; + */ + struct { + s32 fileset; /* 4: fileset number */ + u32 inode; /* 4: inode number */ + u16 type; /* 2: NOREDOPAGE record type */ + s16 rsrvd; /* 2: reserved */ + pxd_t pxd; /* 8: on-disk page pxd */ + } noredopage; /* (20) */ + + /* + * UPDATEMAP: update block allocation map + * + * either in-line PXD, + * or out-of-line XADLIST; + * + * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; + */ + struct { + u32 fileset; /* 4: fileset number */ + u32 inode; /* 4: inode number */ + u16 type; /* 2: UPDATEMAP record type */ + s16 nxd; /* 2: number of extents */ + pxd_t pxd; /* 8: pxd */ + } updatemap; /* (20) */ + + /* + * NOREDOINOEXT: the inode extent is freed + * + * do not apply after-image records which precede this + * record in the log with the any of the 4 page block + * numbers in this inode extent. + * + * NOTE: The fileset and pxd fields MUST remain in + * the same fields in the REDOPAGE record format. + * + */ + struct { + s32 fileset; /* 4: fileset number */ + s32 iagnum; /* 4: IAG number */ + s32 inoext_idx; /* 4: inode extent index */ + pxd_t pxd; /* 8: on-disk page pxd */ + } noredoinoext; /* (20) */ + + /* + * SYNCPT: log sync point + * + * replay log upto syncpt address specified; + */ + struct { + s32 sync; /* 4: syncpt address (0 = here) */ + } syncpt; + + /* + * MOUNT: file system mount + * + * file system mount: no type-dependent information; + */ + + /* + * ? FREEXTENT: free specified extent(s) + * + * free specified extent(s) from block allocation map + * N.B.: nextents should be length of data/sizeof(xad_t) + */ + struct { + s32 type; /* 4: FREEXTENT record type */ + s32 nextent; /* 4: number of extents */ + + /* data: PXD or XAD list */ + } freextent; + + /* + * ? NOREDOFILE: this file is freed + * + * do not apply records which precede this record in the log + * with the same inode number. + * + * NOREDILE must be the first to be written at commit + * (last to be read in logredo()) - it prevents + * replay of preceding updates of all preceding generations + * of the inumber esp. the on-disk inode itself, + * but does NOT prevent + * replay of the + */ + struct { + s32 fileset; /* 4: fileset number */ + u32 inode; /* 4: inode number */ + } noredofile; + + /* + * ? NEWPAGE: + * + * metadata type dependent + */ + struct { + s32 fileset; /* 4: fileset number */ + u32 inode; /* 4: inode number */ + s32 type; /* 4: NEWPAGE record type */ + pxd_t pxd; /* 8: on-disk page pxd */ + } newpage; + + /* + * ? DUMMY: filler + * + * no type-dependent information + */ + } log; +} lrd_t; /* (36) */ + +#define LOGRDSIZE (sizeof(struct lrd)) + +/* + * line vector descriptor + */ +typedef struct { + s16 offset; + s16 length; +} lvd_t; + + +/* + * log logical volume + */ +typedef struct jfs_log { + + struct super_block *sb; /* 4: This is used to sync metadata + * before writing syncpt. Will + * need to be a list if we share + * the log between fs's + */ + kdev_t dev; /* 4: log lv number */ + struct file *devfp; /* 4: log device file */ + s32 serial; /* 4: log mount serial number */ + + s64 base; /* @8: log extent address (inline log ) */ + int size; /* 4: log size in log page (in page) */ + int l2bsize; /* 4: log2 of bsize */ + + uint flag; /* 4: flag */ + uint state; /* 4: state */ + + struct lbuf *lbuf_free; /* 4: free lbufs */ + wait_queue_head_t free_wait; /* 4: */ + + /* log write */ + int logtid; /* 4: log tid */ + int page; /* 4: page number of eol page */ + int eor; /* 4: eor of last record in eol page */ + struct lbuf *bp; /* 4: current log page buffer */ + + struct semaphore loglock; /* 4: log write serialization lock */ + + /* syncpt */ + int nextsync; /* 4: bytes to write before next syncpt */ + int active; /* 4: */ + int syncbarrier; /* 4: */ + wait_queue_head_t syncwait; /* 4: */ + + /* commit */ + uint cflag; /* 4: */ + struct { /* 8: FIFO commit queue header */ + struct tblock *head; + struct tblock *tail; + } cqueue; + int gcrtc; /* 4: GC_READY transaction count */ + struct tblock *gclrt; /* 4: latest GC_READY transaction */ + spinlock_t gclock; /* 4: group commit lock */ + int logsize; /* 4: log data area size in byte */ + int lsn; /* 4: end-of-log */ + int clsn; /* 4: clsn */ + int syncpt; /* 4: addr of last syncpt record */ + int sync; /* 4: addr from last logsync() */ + struct list_head synclist; /* 8: logsynclist anchor */ + spinlock_t synclock; /* 4: synclist lock */ + struct lbuf *wqueue; /* 4: log pageout queue */ + struct buffer_head *yah; /* 4: marker */ + int count; /* 4: count */ +} log_t; + +/* + * group commit flag + */ +/* log_t */ +#define logGC_PAGEOUT 0x00000001 + +/* tblock_t/lbuf_t */ +#define tblkGC_QUEUE 0x0001 +#define tblkGC_READY 0x0002 +#define tblkGC_COMMIT 0x0004 +#define tblkGC_COMMITTED 0x0008 +#define tblkGC_EOP 0x0010 +#define tblkGC_FREE 0x0020 +#define tblkGC_LEADER 0x0040 +#define tblkGC_ERROR 0x0080 +#define tblkGC_LAZY 0x0100 // D230860 +#define tblkGC_UNLOCKED 0x0200 // D230860 + +/* + * log cache buffer header + */ +typedef struct lbuf { + struct buffer_head l_bh; /* for doing I/O */ + log_t *l_log; /* 4: log associated with buffer */ + + /* + * data buffer base area + */ + uint l_flag; /* 4: pageout control flags */ + + struct lbuf *l_wqnext; /* 4: write queue link */ + struct lbuf *l_freelist; /* 4: freelistlink */ + + int l_pn; /* 4: log page number */ + int l_eor; /* 4: log record eor */ + int l_ceor; /* 4: committed log record eor */ + + s64 l_blkno; /* 8: log page block number */ + caddr_t l_ldata; /* 4: data page */ + + wait_queue_head_t l_ioevent; /* 4: i/o done event */ + struct page *l_page; /* The page itself */ +} lbuf_t; + +/* Reuse l_freelist for redrive list */ +#define l_redrive_next l_freelist + +/* + * logsynclist block + * + * common logsyncblk prefix for jbuf_t and tblock_t + */ +typedef struct logsyncblk { + u16 xflag; /* 2: flags */ + s16 lid; /* 2: tlock id */ + s32 lsn; /* 4: log sequence number */ + struct list_head synclist; /* 8: log sync list link */ +} logsyncblk_t; /* (16) */ + +/* + * logsynclist serialization (per log) + */ + +#define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock) +#define LOGSYNC_LOCK(log) spin_lock(&(log)->synclock) +#define LOGSYNC_UNLOCK(log) spin_unlock(&(log)->synclock) + +/* compute the difference in bytes of lsn from sync point */ +#define logdiff(diff, lsn, log)\ +{\ + diff = (lsn) - (log)->syncpt;\ + if (diff < 0)\ + diff += (log)->logsize;\ +} + +extern int lmInit(void); +extern int lmLogOpen(struct super_block *sb, log_t ** log); +extern int lmLogClose(struct super_block *sb, log_t * log); +extern int lmLogSync(log_t * log, int nosyncwait); +extern int lmLogQuiesce(log_t * log); +extern int lmLogResume(log_t * log, struct super_block *sb); +extern int lmLogFormat(struct super_block *sb, s64 logAddress, int logSize); + +#endif /* _H_JFS_LOGMGR */ diff -uNr --exclude=CVS ../kernel.org/linux/include/linux/jfs/jfs_metapage.h linuxppc64_2_4/include/linux/jfs/jfs_metapage.h --- ../kernel.org/linux/include/linux/jfs/jfs_metapage.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/linux/jfs/jfs_metapage.h Wed Nov 14 10:19:36 2001 @@ -0,0 +1,113 @@ +/* + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef _H_JFS_METAPAGE +#define _H_JFS_METAPAGE + +#include + +typedef struct metapage { + /* Common logsyncblk prefix (see jfs_logmgr.h) */ + u16 xflag; + s16 lid; + int lsn; + struct list_head synclist; + /* End of logsyncblk prefix */ + + unsigned long flag; /* See Below */ + unsigned long count; /* Reference count */ + void *data; /* Data pointer */ + + /* list management stuff */ + struct metapage *hash_prev; + struct metapage *hash_next; /* Also used for free list */ + /* + * mapping & index become redundant, but we need these here to + * add the metapage to the hash before we have the real page + */ + struct address_space *mapping; + unsigned long index; + wait_queue_head_t wait; + + /* implementation */ + struct page *page; + unsigned long logical_size; + + /* Journal management */ + int clsn; + atomic_t nohomeok; + struct jfs_log *log; +} metapage_t; + +/* + * Direct-access address space operations + */ +extern struct address_space_operations direct_aops; + +/* metapage flag */ +#define META_locked 0 +#define META_absolute 1 +#define META_free 2 +#define META_dirty 3 +#define META_sync 4 +#define META_discard 5 +#define META_forced 6 + +#define mark_metapage_dirty(mp) set_bit(META_dirty, &(mp)->flag) + +/* function prototypes */ +extern metapage_t *__get_metapage(struct inode *inode, + unsigned long lblock, unsigned int size, + int absolute, unsigned long new); + +#define read_metapage(inode, lblock, size, absolute)\ + __get_metapage(inode, lblock, size, absolute, FALSE) + +#define get_metapage(inode, lblock, size, absolute)\ + __get_metapage(inode, lblock, size, absolute, TRUE) + +extern void release_metapage(metapage_t *); + +#define flush_metapage(mp) \ +{\ + set_bit(META_dirty, &(mp)->flag);\ + set_bit(META_sync, &(mp)->flag);\ + release_metapage(mp);\ +} + +#define __sync_metapage(mp) \ + generic_buffer_fdatasync((struct inode *)mp->mapping->host,\ + mp->page->index, mp->page->index + 1) + +#define write_metapage(mp) \ +{\ + set_bit(META_dirty, &(mp)->flag);\ + release_metapage(mp);\ +} + +#define discard_metapage(mp) \ +{\ + clear_bit(META_dirty, &(mp)->flag);\ + set_bit(META_discard, &(mp)->flag);\ + release_metapage(mp);\ +} + +extern void hold_metapage(metapage_t *, int); + +extern void invalidate_metapages(struct inode *, unsigned long, unsigned long); + +#endif /* _H_JFS_METAPAGE */ diff -uNr --exclude=CVS ../kernel.org/linux/include/linux/jfs/jfs_superblock.h linuxppc64_2_4/include/linux/jfs/jfs_superblock.h --- ../kernel.org/linux/include/linux/jfs/jfs_superblock.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/linux/jfs/jfs_superblock.h Wed Nov 14 10:19:36 2001 @@ -0,0 +1,143 @@ +/* + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ +#ifndef _H_JFS_SUPERBLOCK +#define _H_JFS_SUPERBLOCK +/* + * jfs_superblock.h + */ + +/* + * make the magic number something a human could read + */ +#define JFS_MAGIC "JFS1" /* Magic word: Version 1 */ + +#define JFS_VERSION 1 /* Version number: Version 1 */ + +#define LV_NAME_SIZE 11 /* MUST BE 11 for OS/2 boot sector */ + +/* + * aggregate superblock + * + * The name superblock is too close to super_block, so the name has been + * changed to jfs_superblock. The utilities are still using the old name. + */ +struct jfs_superblock { + char s_magic[4]; /* 4: magic number */ + u32 s_version; /* 4: version number */ + + s64 s_size; /* 8: aggregate size in hardware/LVM blocks; + * VFS: number of blocks + */ + s32 s_bsize; /* 4: aggregate block size in bytes; + * VFS: fragment size + */ + s16 s_l2bsize; /* 2: log2 of s_bsize */ + s16 s_l2bfactor; /* 2: log2(s_bsize/hardware block size) */ + s32 s_pbsize; /* 4: hardware/LVM block size in bytes */ + s16 s_l2pbsize; /* 2: log2 of s_pbsize */ + s16 pad; /* 2: padding necessary for alignment */ + + u32 s_agsize; /* 4: allocation group size in aggr. blocks */ + + u32 s_flag; /* 4: aggregate attributes: + * see jfs_filsys.h + */ + u32 s_state; /* 4: mount/unmount/recovery state: + * see jfs_filsys.h + */ + s32 s_compress; /* 4: > 0 if data compression */ + + pxd_t s_ait2; /* 8: first extent of secondary + * aggregate inode table + */ + + pxd_t s_aim2; /* 8: first extent of secondary + * aggregate inode map + */ + u32 s_logdev; /* 4: device address of log */ + s32 s_logserial; /* 4: log serial number at aggregate mount */ + pxd_t s_logpxd; /* 8: inline log extent */ + + pxd_t s_fsckpxd; /* 8: inline fsck work space extent */ + + struct timestruc_t s_time; /* 8: time last updated */ + + s32 s_fsckloglen; /* 4: Number of filesystem blocks reserved for + * the fsck service log. + * N.B. These blocks are divided among the + * versions kept. This is not a per + * version size. + * N.B. These blocks are included in the + * length field of s_fsckpxd. + */ + s8 s_fscklog; /* 1: which fsck service log is most recent + * 0 => no service log data yet + * 1 => the first one + * 2 => the 2nd one + */ + char s_fpack[11]; /* 11: file system volume name + * N.B. This must be 11 bytes to + * conform with the OS/2 BootSector + * requirements + */ + + /* extendfs() parameter under s_state & FM_EXTENDFS */ + s64 s_xsize; /* 8: extendfs s_size */ + pxd_t s_xfsckpxd; /* 8: extendfs fsckpxd */ + pxd_t s_xlogpxd; /* 8: extendfs logpxd */ + /* - 128 byte boundary - */ + + /* + * DFS VFS support (preliminary) + */ + char s_attach; /* 1: VFS: flag: set when aggregate is attached + */ + u8 rsrvd4[7]; /* 7: reserved - set to 0 */ + + u64 totalUsable; /* 8: VFS: total of 1K blocks which are + * available to "normal" (non-root) users. + */ + u64 minFree; /* 8: VFS: # of 1K blocks held in reserve for + * exclusive use of root. This value can be 0, + * and if it is then totalUsable will be equal + * to # of blocks in aggregate. I believe this + * means that minFree + totalUsable = # blocks. + * In that case, we don't need to store both + * totalUsable and minFree since we can compute + * one from the other. I would guess minFree + * would be the one we should store, and + * totalUsable would be the one we should + * compute. (Just a guess...) + */ + + u64 realFree; /* 8: VFS: # of free 1K blocks can be used by + * "normal" users. It may be this is something + * we should compute when asked for instead of + * storing in the superblock. I don't know how + * often this information is needed. + */ + /* + * graffiti area + */ +}; + +extern int readSuper(struct super_block *, struct metapage **); +extern int updateSuper(struct super_block *, uint); + +#endif /*_H_JFS_SUPERBLOCK */ diff -uNr --exclude=CVS ../kernel.org/linux/include/linux/jfs/jfs_txnmgr.h linuxppc64_2_4/include/linux/jfs/jfs_txnmgr.h --- ../kernel.org/linux/include/linux/jfs/jfs_txnmgr.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/linux/jfs/jfs_txnmgr.h Wed Nov 14 10:19:36 2001 @@ -0,0 +1,310 @@ +/* + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* + * Change History : + * + */ + +#ifndef _H_JFS_TXNMGR +#define _H_JFS_TXNMGR +/* + * jfs_txnmgr.h: transaction manager + */ + +#include + +/* + * transaction block + */ +typedef struct tblock { + /* + * tblock_t and jbuf_t common area: struct logsyncblk + * + * the following 3 fields are the same as struct logsyncblk + * which is common to tblock and jbuf to form logsynclist + */ + u16 xflag; /* 2: tx commit type */ + u16 flag; /* 2: tx commit state */ + s32 lsn; /* 4: recovery lsn */ + struct list_head synclist; /* 8: logsynclist link */ + /* (16) */ + + /* lock management */ + struct super_block *sb; /* 4: super block */ + u16 next; /* 2: index first tlock of tid or + * of next tblock on freelist + */ + u16 locker; /* 2: tid of holder of tlock tid wants */ + wait_queue_head_t waitor; /* 4: tids waiting on this tid */ + + /* log management */ + u32 logtid; /* 4: log transaction id */ + /* (32) */ + + /* commit management */ + struct tblock *cqnext; /* 4: commit queue link */ + s32 clsn; /* 4: commit lsn */ + struct lbuf *bp; /* 4: */ + s32 pn; /* 4: commit record log page number */ + s32 eor; /* 4: commit record eor */ + wait_queue_head_t gcwait; /* 4: group commit event list: + * ready transactions wait on this + * event for group commit completion. + */ + struct inode *ip; /* 4: inode being created or deleted */ + s32 rsrvd; /* 4: */ +} tblock_t; /* (64) */ + +extern struct tblock *TxBlock; /* transaction block table */ + +/* commit flags: tblk->xflag */ +#define COMMIT_SYNC 0x0001 /* synchronous commit */ +#define COMMIT_FORCE 0x0002 /* force pageout at end of commit */ +#define COMMIT_FLUSH 0x0004 /* init flush at end of commit */ +#define COMMIT_MAP 0x00f0 +#define COMMIT_PMAP 0x0010 /* update pmap */ +#define COMMIT_WMAP 0x0020 /* update wmap */ +#define COMMIT_PWMAP 0x0040 /* update pwmap */ +#define COMMIT_FREE 0x0f00 +#define COMMIT_DELETE 0x0100 /* inode delete */ +#define COMMIT_TRUNCATE 0x0200 /* file truncation */ +#define COMMIT_CREATE 0x0400 /* inode create */ +#define COMMIT_LAZY 0x0800 /* lazy commit */ +#define COMMIT_PAGE 0x1000 /* Identifies element as metapage */ +#define COMMIT_INODE 0x2000 /* Identifies element as inode */ + +/* group commit flags tblk->flag: see jfs_logmgr.h */ + +/* + * transaction lock + */ +typedef struct tlock { + u16 next; /* 2: index next lockword on tid locklist + * next lockword on freelist + */ + u16 tid; /* 2: transaction id holding lock */ + + u16 flag; /* 2: lock control */ + u16 type; /* 2: log type */ + + struct metapage *mp; /* 4: object page buffer locked */ + struct inode *ip; /* 4: object */ + /* (16) */ + + s16 lock[24]; /* 48: overlay area */ +} tlock_t; /* (64) */ + +extern struct tlock *TxLock; /* transaction lock table */ + +/* + * tlock flag + */ +/* txLock state */ +#define tlckPAGELOCK 0x8000 +#define tlckINODELOCK 0x4000 +#define tlckLINELOCK 0x2000 +#define tlckINLINELOCK 0x1000 +/* lmLog state */ +#define tlckLOG 0x0800 +/* updateMap state */ +#define tlckUPDATEMAP 0x0080 +/* freeLock state */ +#define tlckFREELOCK 0x0008 +#define tlckWRITEPAGE 0x0004 +#define tlckFREEPAGE 0x0002 + +/* + * tlock type + */ +#define tlckTYPE 0xfe00 +#define tlckINODE 0x8000 +#define tlckXTREE 0x4000 +#define tlckDTREE 0x2000 +#define tlckMAP 0x1000 +#define tlckEA 0x0800 +#define tlckACL 0x0400 +#define tlckDATA 0x0200 +#define tlckBTROOT 0x0100 + +#define tlckOPERATION 0x00ff +#define tlckGROW 0x0001 /* file grow */ +#define tlckREMOVE 0x0002 /* file delete */ +#define tlckTRUNCATE 0x0004 /* file truncate */ +#define tlckRELOCATE 0x0008 /* file/directory relocate */ +#define tlckENTRY 0x0001 /* directory insert/delete */ +#define tlckEXTEND 0x0002 /* directory extend in-line */ +#define tlckSPLIT 0x0010 /* splited page */ +#define tlckNEW 0x0020 /* new page from split */ +#define tlckFREE 0x0040 /* free page */ +#define tlckRELINK 0x0080 /* update sibling pointer */ + +/* + * linelock for lmLog() + * + * note: linelock_t and its variations are overlaid + * at tlock.lock: watch for alignment; + */ +typedef struct { + u8 offset; /* 1: */ + u8 length; /* 1: */ +} lv_t; /* (2) */ + +#define TLOCKSHORT 20 +#define TLOCKLONG 28 + +typedef struct { + u16 next; /* 2: next linelock */ + + s8 maxcnt; /* 1: */ + s8 index; /* 1: */ + + u16 flag; /* 2: */ + u8 type; /* 1: */ + u8 l2linesize; /* 1: log2 of linesize */ + /* (8) */ + + lv_t lv[20]; /* 40: */ +} linelock_t; /* (48) */ + +#define dtlock_t linelock_t +#define itlock_t linelock_t + +typedef struct { + u16 next; /* 2: */ + + s8 maxcnt; /* 1: */ + s8 index; /* 1: */ + + u16 flag; /* 2: */ + u8 type; /* 1: */ + u8 l2linesize; /* 1: log2 of linesize */ + /* (8) */ + + lv_t header; /* 2: */ + lv_t lwm; /* 2: low water mark */ + lv_t hwm; /* 2: high water mark */ + lv_t twm; /* 2: */ + /* (16) */ + + s32 pxdlock[8]; /* 32: */ +} xtlock_t; /* (48) */ + + +/* + * maplock for txUpdateMap() + * + * note: maplock_t and its variations are overlaid + * at tlock.lock/linelock: watch for alignment; + * N.B. next field may be set by linelock, and should not + * be modified by maplock; + * N.B. index of the first pxdlock specifies index of next + * free maplock (i.e., number of maplock) in the tlock; + */ +typedef struct { + u16 next; /* 2: */ + + u8 maxcnt; /* 2: */ + u8 index; /* 2: next free maplock index */ + + u16 flag; /* 2: */ + u8 type; /* 1: */ + u8 count; /* 1: number of pxd/xad */ + /* (8) */ + + pxd_t pxd; /* 8: */ +} maplock_t; /* (16): */ + +/* maplock flag */ +#define mlckALLOC 0x00f0 +#define mlckALLOCXADLIST 0x0080 +#define mlckALLOCPXDLIST 0x0040 +#define mlckALLOCXAD 0x0020 +#define mlckALLOCPXD 0x0010 +#define mlckFREE 0x000f +#define mlckFREEXADLIST 0x0008 +#define mlckFREEPXDLIST 0x0004 +#define mlckFREEXAD 0x0002 +#define mlckFREEPXD 0x0001 + +#define pxdlock_t maplock_t + +typedef struct { + u16 next; /* 2: */ + + u8 maxcnt; /* 2: */ + u8 index; /* 2: */ + + u16 flag; /* 2: */ + u8 type; /* 1: */ + u8 count; /* 1: number of pxd/xad */ + /* (8) */ + + void *xdlist; /* 4: pxd/xad list */ + s32 rsrvd; /* 4: */ +} xdlistlock_t; /* (16): */ + + +/* + * commit + * + * parameter to the commit manager routines + */ +typedef struct commit { + int tid; /* 4: tid = index of tblock */ + int flag; /* 4: flags */ + log_t *log; /* 4: log */ + struct super_block *sb; /* 4: superblock */ + + int nip; /* 4: number of entries in iplist */ + struct inode **iplist; /* 4: list of pointers to inodes */ + /* (32) */ + + /* log record descriptor on 64-bit boundary */ + lrd_t lrd; /* : log record descriptor */ +} commit_t; + +/* + * external declarations + */ +extern tlock_t *txLock(int tid, struct inode *ip, struct metapage *mp, int flag); + +extern tlock_t *txMaplock(int tid, struct inode *ip, int flag); + +extern int txCommit(int tid, int nip, struct inode **iplist, int flag); + +extern void txBegin(struct super_block *sb, int *tid, int flag); + +extern void txBeginAnon(struct super_block *sb); + +extern void txEnd(int tid); + +extern void txAbort(int tid, int dirty); + +extern linelock_t *txLinelock(linelock_t * tlock); + +extern void txFreeMap(struct inode *ip, + maplock_t * maplock, tblock_t * tblk, int maptype); + +extern void txEA(int tid, struct inode *ip, dxd_t * oldea, dxd_t * newea); + +extern void txFreelock(struct inode *ip); + +extern int lmLog(log_t * log, tblock_t * tblk, lrd_t * lrd, tlock_t * tlck); + +#endif /* _H_JFS_TXNMGR */ diff -uNr --exclude=CVS ../kernel.org/linux/include/linux/jfs/jfs_types.h linuxppc64_2_4/include/linux/jfs/jfs_types.h --- ../kernel.org/linux/include/linux/jfs/jfs_types.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/linux/jfs/jfs_types.h Wed Nov 14 10:19:36 2001 @@ -0,0 +1,184 @@ +/* + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _H_JFS_TYPES +#define _H_JFS_TYPES + +/* + * jfs_types.h: + * + * basic type/utility definitions + * + * note: this header file must be the 1st include file + * of JFS include list in all JFS .c file. + */ + +#include +#include + +#include + +#include + + +/* + * Almost identical to Linux's timespec, but not quite + */ +struct timestruc_t { + u32 tv_sec; + u32 tv_nsec; +}; + +/* + * handy + */ + +#define LEFTMOSTONE 0x80000000 +#define HIGHORDER 0x80000000u /* high order bit on */ +#define ONES 0xffffffffu /* all bit on */ + +typedef int boolean_t; +#define TRUE 1 +#define FALSE 0 + +/* + * logical xd (lxd) + */ +typedef struct { + unsigned len:24; + unsigned off1:8; + u32 off2; +} lxd_t; + +/* lxd_t field construction */ +#define LXDlength(lxd, length32) ( (lxd)->len = length32 ) +#define LXDoffset(lxd, offset64)\ +{\ + (lxd)->off1 = ((s64)offset64) >> 32;\ + (lxd)->off2 = (offset64) & 0xffffffff;\ +} + +/* lxd_t field extraction */ +#define lengthLXD(lxd) ( (lxd)->len ) +#define offsetLXD(lxd)\ + ( ((s64)((lxd)->off1)) << 32 | (lxd)->off2 ) + +/* lxd list */ +typedef struct { + s16 maxnlxd; + s16 nlxd; + lxd_t *lxd; +} lxdlist_t; + +/* + * physical xd (pxd) + */ +typedef struct { + unsigned len:24; + unsigned addr1:8; + u32 addr2; +} pxd_t; + +/* xd_t field construction */ + +#define PXDlength(pxd, length32) ((pxd)->len = __cpu_to_le24(length32)) +#define PXDaddress(pxd, address64)\ +{\ + (pxd)->addr1 = ((s64)address64) >> 32;\ + (pxd)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\ +} + +/* xd_t field extraction */ +#define lengthPXD(pxd) __le24_to_cpu((pxd)->len) +#define addressPXD(pxd)\ + ( ((s64)((pxd)->addr1)) << 32 | __le32_to_cpu((pxd)->addr2)) + +/* pxd list */ +typedef struct { + s16 maxnpxd; + s16 npxd; + pxd_t pxd[8]; +} pxdlist_t; + + +/* + * data extent descriptor (dxd) + */ +typedef struct { + unsigned flag:8; /* 1: flags */ + unsigned rsrvd:24; /* 3: */ + u32 size; /* 4: size in byte */ + unsigned len:24; /* 3: length in unit of fsblksize */ + unsigned addr1:8; /* 1: address in unit of fsblksize */ + u32 addr2; /* 4: address in unit of fsblksize */ +} dxd_t; /* - 16 - */ + +/* dxd_t flags */ +#define DXD_INDEX 0x80 /* B+-tree index */ +#define DXD_INLINE 0x40 /* in-line data extent */ +#define DXD_EXTENT 0x20 /* out-of-line single extent */ +#define DXD_FILE 0x10 /* out-of-line file (inode) */ +#define DXD_CORRUPT 0x08 /* Inconsistency detected */ + +/* dxd_t field construction + * Conveniently, the PXD macros work for DXD + */ +#define DXDlength PXDlength +#define DXDaddress PXDaddress +#define lengthDXD lengthPXD +#define addressDXD addressPXD + +/* + * directory entry argument + */ +typedef struct component_name { + int namlen; + wchar_t *name; +} component_t; + + +/* + * DASD limit information - stored in directory inode + */ +typedef struct dasd { + u8 thresh; /* Alert Threshold (in percent) */ + u8 delta; /* Alert Threshold delta (in percent) */ + u8 rsrvd1; + u8 limit_hi; /* DASD limit (in logical blocks) */ + u32 limit_lo; /* DASD limit (in logical blocks) */ + u8 rsrvd2[3]; + u8 used_hi; /* DASD usage (in logical blocks) */ + u32 used_lo; /* DASD usage (in logical blocks) */ +} dasd_t; + +#define DASDLIMIT(dasdp) \ + (((u64)((dasdp)->limit_hi) << 32) + __le32_to_cpu((dasdp)->limit_lo)) +#define setDASDLIMIT(dasdp, limit)\ +{\ + (dasdp)->limit_hi = ((u64)limit) >> 32;\ + (dasdp)->limit_lo = __cpu_to_le32(limit);\ +} +#define DASDUSED(dasdp) \ + (((u64)((dasdp)->used_hi) << 32) + __le32_to_cpu((dasdp)->used_lo)) +#define setDASDUSED(dasdp, used)\ +{\ + (dasdp)->used_hi = ((u64)used) >> 32;\ + (dasdp)->used_lo = __cpu_to_le32(used);\ +} + +#endif /* !_H_JFS_TYPES */ diff -uNr --exclude=CVS ../kernel.org/linux/include/linux/jfs/jfs_unicode.h linuxppc64_2_4/include/linux/jfs/jfs_unicode.h --- ../kernel.org/linux/include/linux/jfs/jfs_unicode.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/linux/jfs/jfs_unicode.h Wed Nov 14 10:19:36 2001 @@ -0,0 +1,143 @@ +/* + * unistrk: Unicode kernel case support + * + * Function: + * Convert a unicode character to upper or lower case using + * compressed tables. + * + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * + */ + +#include +#include + +typedef struct { + wchar_t start; + wchar_t end; + signed char *table; +} UNICASERANGE; + +extern signed char UniUpperTable[512]; +extern UNICASERANGE UniUpperRange[]; +extern int get_UCSname(component_t *, struct dentry *, struct nls_table *); +extern int jfs_strfromUCS_le(char *, const wchar_t *, int, struct nls_table *); + +#define free_UCSname(COMP) kfree((COMP)->name) + +/* + * UniStrcpy: Copy a string + */ +static inline wchar_t *UniStrcpy(wchar_t * ucs1, const wchar_t * ucs2) +{ + wchar_t *anchor = ucs1; /* save the start of result string */ + + while ((*ucs1++ = *ucs2++)); + return anchor; +} + + + +/* + * UniStrncpy: Copy length limited string with pad + */ +static inline wchar_t *UniStrncpy(wchar_t * ucs1, const wchar_t * ucs2, + size_t n) +{ + wchar_t *anchor = ucs1; + + while (n-- && *ucs2) /* Copy the strings */ + *ucs1++ = *ucs2++; + + n++; + while (n--) /* Pad with nulls */ + *ucs1++ = 0; + return anchor; +} + +/* + * UniStrncmp_le: Compare length limited string - native to little-endian + */ +static inline int UniStrncmp_le(const wchar_t * ucs1, const wchar_t * ucs2, + size_t n) +{ + if (!n) + return 0; /* Null strings are equal */ + while ((*ucs1 == __le16_to_cpu(*ucs2)) && *ucs1 && --n) { + ucs1++; + ucs2++; + } + return (int) *ucs1 - (int) __le16_to_cpu(*ucs2); +} + +/* + * UniStrncpy_le: Copy length limited string with pad to little-endian + */ +static inline wchar_t *UniStrncpy_le(wchar_t * ucs1, const wchar_t * ucs2, + size_t n) +{ + wchar_t *anchor = ucs1; + + while (n-- && *ucs2) /* Copy the strings */ + *ucs1++ = __le16_to_cpu(*ucs2++); + + n++; + while (n--) /* Pad with nulls */ + *ucs1++ = 0; + return anchor; +} + + +/* + * UniToupper: Convert a unicode character to upper case + */ +static inline wchar_t UniToupper(register wchar_t uc) +{ + register UNICASERANGE *rp; + + if (uc < sizeof(UniUpperTable)) { /* Latin characters */ + return uc + UniUpperTable[uc]; /* Use base tables */ + } else { + rp = UniUpperRange; /* Use range tables */ + while (rp->start) { + if (uc < rp->start) /* Before start of range */ + return uc; /* Uppercase = input */ + if (uc <= rp->end) /* In range */ + return uc + rp->table[uc - rp->start]; + rp++; /* Try next range */ + } + } + return uc; /* Past last range */ +} + + +/* + * UniStrupr: Upper case a unicode string + */ +static inline wchar_t *UniStrupr(register wchar_t * upin) +{ + register wchar_t *up; + + up = upin; + while (*up) { /* For all characters */ + *up = UniToupper(*up); + up++; + } + return upin; /* Return input pointer */ +} + diff -uNr --exclude=CVS ../kernel.org/linux/include/linux/jfs/jfs_xtree.h linuxppc64_2_4/include/linux/jfs/jfs_xtree.h --- ../kernel.org/linux/include/linux/jfs/jfs_xtree.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/linux/jfs/jfs_xtree.h Wed Nov 14 10:19:36 2001 @@ -0,0 +1,143 @@ +/* + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Change History : + * +*/ + +#ifndef _H_JFS_XTREE +#define _H_JFS_XTREE + +/* + * jfs_xtree.h: extent allocation descriptor B+-tree manager + */ + +#include + + +/* + * extent allocation descriptor (xad) + */ +typedef struct xad { + unsigned flag:8; /* 1: flag */ + unsigned rsvrd:16; /* 2: reserved */ + unsigned off1:8; /* 1: offset in unit of fsblksize */ + u32 off2; /* 4: offset in unit of fsblksize */ + unsigned len:24; /* 3: length in unit of fsblksize */ + unsigned addr1:8; /* 1: address in unit of fsblksize */ + u32 addr2; /* 4: address in unit of fsblksize */ +} xad_t; /* (16) */ + +#define MAXXLEN ((1 << 24) - 1) + +#define XTSLOTSIZE 16 +#define L2XTSLOTSIZE 4 + +/* xad_t field construction */ +#define XADoffset(xad, offset64)\ +{\ + (xad)->off1 = ((u64)offset64) >> 32;\ + (xad)->off2 = __cpu_to_le32((offset64) & 0xffffffff);\ +} +#define XADaddress(xad, address64)\ +{\ + (xad)->addr1 = ((u64)address64) >> 32;\ + (xad)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\ +} +#define XADlength(xad, length32) (xad)->len = __cpu_to_le24(length32) + +/* xad_t field extraction */ +#define offsetXAD(xad)\ + ( ((s64)((xad)->off1)) << 32 | __le32_to_cpu((xad)->off2)) +#define addressXAD(xad)\ + ( ((s64)((xad)->addr1)) << 32 | __le32_to_cpu((xad)->addr2)) +#define lengthXAD(xad) __le24_to_cpu((xad)->len) + +/* xad list */ +typedef struct { + s16 maxnxad; + s16 nxad; + xad_t *xad; +} xadlist_t; + +/* xad_t flags */ +#define XAD_NEW 0x01 /* new */ +#define XAD_EXTENDED 0x02 /* extended */ +#define XAD_COMPRESSED 0x04 /* compressed with recorded length */ +#define XAD_NOTRECORDED 0x08 /* allocated but not recorded */ +#define XAD_COW 0x10 /* copy-on-write */ + + +/* possible values for maxentry */ +#define XTROOTINITSLOT_DIR 6 +#define XTROOTINITSLOT 10 +#define XTROOTMAXSLOT 18 +#define XTPAGEMAXSLOT 256 +#define XTENTRYSTART 2 + +/* + * xtree page: + */ +typedef union { + struct xtheader { + s64 next; /* 8: */ + s64 prev; /* 8: */ + + u8 flag; /* 1: */ + u8 rsrvd1; /* 1: */ + s16 nextindex; /* 2: next index = number of entries */ + s16 maxentry; /* 2: max number of entries */ + s16 rsrvd2; /* 2: */ + + pxd_t self; /* 8: self */ + } header; /* (32) */ + + xad_t xad[XTROOTMAXSLOT]; /* 16 * maxentry: xad array */ +} xtpage_t; + +/* + * external declaration + */ +extern int xtLookup(struct inode *ip, s64 lstart, s64 llen, + int *pflag, s64 * paddr, int *plen, int flag); +extern int xtLookupList(struct inode *ip, lxdlist_t * lxdlist, + xadlist_t * xadlist, int flag); +extern void xtInitRoot(int tid, struct inode *ip); +extern int xtInsert(int tid, struct inode *ip, + int xflag, s64 xoff, int xlen, s64 * xaddrp, int flag); +extern int xtExtend(int tid, struct inode *ip, s64 xoff, int xlen, + int flag); +extern int xtTailgate(int tid, struct inode *ip, + s64 xoff, int xlen, s64 xaddr, int flag); +extern int xtUpdate(int tid, struct inode *ip, struct xad *nxad); +extern int xtDelete(int tid, struct inode *ip, s64 xoff, int xlen, + int flag); +extern s64 xtTruncate(int tid, struct inode *ip, s64 newsize, int type); +extern s64 xtTruncate_pmap(int tid, struct inode *ip, s64 committed_size); +extern int xtRelocate(int tid, struct inode *ip, + xad_t * oxad, s64 nxaddr, int xtype); +extern int xtAppend(int tid, + struct inode *ip, int xflag, s64 xoff, int maxblocks, + int *xlenp, s64 * xaddrp, int flag); + +#ifdef _JFS_DEBUG_XTREE +extern int xtDisplayTree(struct inode *ip); +extern int xtDisplayPage(struct inode *ip, s64 bn, xtpage_t * p); +#endif /* _JFS_DEBUG_XTREE */ + +#endif /* !_H_JFS_XTREE */ diff -uNr --exclude=CVS ../kernel.org/linux/include/linux/jfs_fs.h linuxppc64_2_4/include/linux/jfs_fs.h --- ../kernel.org/linux/include/linux/jfs_fs.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/linux/jfs_fs.h Wed Nov 14 10:19:36 2001 @@ -0,0 +1,34 @@ +/* + * + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * +*/ +#ifndef _LINUX_JFS_FS_H +#define _LINUX_JFS_FS_H + +#include + +#include +#include +#include + + +/* JFS magic number */ + +#define JFS_SUPER_MAGIC 0x3153464a /* "JFS1" */ + +#endif /* _LINUX_JFS_FS_H */ diff -uNr --exclude=CVS ../kernel.org/linux/include/linux/jfs_fs_i.h linuxppc64_2_4/include/linux/jfs_fs_i.h --- ../kernel.org/linux/include/linux/jfs_fs_i.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/linux/jfs_fs_i.h Wed Nov 14 10:19:36 2001 @@ -0,0 +1,80 @@ +/* + * + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * +*/ +#ifndef _JFS_FS_I +#define _JFS_FS_I + +#include +#include + +typedef struct jfs_rwlock { + struct rw_semaphore rw_sem; + atomic_t in_use; /* for hacked implementation of trylock */ +} jfs_rwlock_t; + +#define JFS_IP(ip) ((struct jfs_inode_info *)(ip)->u.generic_ip) + +struct jfs_inode_info { + int fileset; /* 4: fileset number (always 16)*/ + uint mode2; /* 4: jfs-specific mode */ + pxd_t ixpxd; /* 8: inode extent descriptor */ + dxd_t acl; /* 16: dxd describing acl */ + dxd_t ea; /* 16: dxd describing ea */ + time_t otime; /* 4: time created */ + uint next_index; /* 4: next available directory entry index */ + int acltype; /* 4: Type of ACL */ + short btorder; /* 2: access order */ + short btindex; /* 2: btpage entry index*/ + struct inode *ipimap; /* 4: inode map */ + ushort flag; /* 2: JFS in-memory flag*/ + unchar cflag; /* 1: commit flags */ + unchar agno; /* 1: ag number */ + ushort bxflag; /* 2: xflag of pseudo buffer? */ + short blid; /* 2: lid of pseudo buffer? */ + ushort atlhead; /* 2: anonymous tlock list head */ + ushort atltail; /* 2: anonymous tlock list tail */ + struct inode *atlnext; /* 4: next inode w/anonymous txn's */ + struct inode *atlprev; /* 4: previous inode w/anonymous txn's */ + struct page *extent_page; /* 4: page containing extent */ + jfs_rwlock_t rdwrlock; /* 12/20: read/write lock */ + ushort xtlid; /* 2: lid of xtree lock on directory */ + short pad; /* 2: pad */ + union { + struct { + xtpage_t _xtroot; /* 288: xtree root */ + struct inomap *_imap; /* 4: inode map header */ + } file; + struct { + dir_table_slot_t _table[12]; /* 96: directory index */ + dtroot_t _dtroot; /* 288: dtree root */ + } dir; + struct { + unchar _unused[16]; /* 16: */ + dxd_t _dxd; /* 16: */ + unchar _inline[128]; /* 128: inline symlink */ + } link; + } u; +}; +#define i_xtroot u.file._xtroot +#define i_imap u.file._imap +#define i_dirtable u.dir._table +#define i_dtroot u.dir._dtroot +#define i_inline u.link._inline + +#endif diff -uNr --exclude=CVS ../kernel.org/linux/include/linux/jfs_fs_sb.h linuxppc64_2_4/include/linux/jfs_fs_sb.h --- ../kernel.org/linux/include/linux/jfs_fs_sb.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/linux/jfs_fs_sb.h Wed Nov 14 10:19:36 2001 @@ -0,0 +1,53 @@ +/* + * + * Copyright (c) International Business Machines Corp., 2000 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * +*/ +#ifndef _JFS_FS_SB +#define _JFS_FS_SB + +#define JFS_SBI(sb) ((struct jfs_sb_info *)(sb)->u.generic_sbp) + +struct jfs_sb_info { + unsigned long mntflag; /* 4: aggregate attributes */ + struct inode *ipbmap; /* 4: block map inode */ + struct inode *ipaimap; /* 4: aggregate inode map inode */ + struct inode *ipaimap2; /* 4: secondary aimap inode */ + struct inode *ipimap; /* 4: aggregate inode map inode */ + struct jfs_log *log; /* 4: log */ + short bsize; /* 2: logical block size */ + short l2bsize; /* 2: log2 logical block size */ + short nbperpage; /* 2: blocks per page */ + short l2nbperpage; /* 2: log2 blocks per page */ + short l2niperblk; /* 2: log2 inodes per page */ + short reserved; /* 2: log2 inodes per page */ + pxd_t logpxd; /* 8: pxd describing log */ + pxd_t ait2; /* 8: pxd describing AIT copy */ + /* Formerly in ipimap */ + uint gengen; /* 4: inode generation generator*/ + uint inostamp; /* 4: shows inode belongs to fileset*/ + + /* Formerly in ipbmap */ + struct bmap *bmap; /* 4: incore bmap descriptor */ + struct nls_table *nls_tab; /* 4: current codepage */ + struct inode *direct_inode; /* 4: inode for physical I/O */ + struct address_space *direct_mapping; /* 4: mapping for physical I/O */ +}; /* (72) */ + +#define isReadOnly(ip) ((JFS_SBI((ip)->i_sb)->log) ? 0 : 1) + +#endif diff -uNr --exclude=CVS ../kernel.org/linux/include/linux/major.h linuxppc64_2_4/include/linux/major.h --- ../kernel.org/linux/include/linux/major.h Tue Sep 18 01:23:40 2001 +++ linuxppc64_2_4/include/linux/major.h Thu Aug 30 10:54:52 2001 @@ -117,6 +117,9 @@ #define COMPAQ_CISS_MAJOR6 110 #define COMPAQ_CISS_MAJOR7 111 +#define VIODASD_MAJOR 112 +#define VIOCD_MAJOR 113 + #define ATARAID_MAJOR 114 #define DASD_MAJOR 94 /* Official assignations from Peter */ diff -uNr --exclude=CVS ../kernel.org/linux/include/linux/pci.h linuxppc64_2_4/include/linux/pci.h --- ../kernel.org/linux/include/linux/pci.h Wed Oct 24 00:00:02 2001 +++ linuxppc64_2_4/include/linux/pci.h Fri Oct 26 02:52:26 2001 @@ -416,10 +416,10 @@ void *sysdata; /* hook for sys-specific extension */ struct proc_dir_entry *procdir; /* directory entry in /proc/bus/pci */ - unsigned char number; /* bus number */ - unsigned char primary; /* number of primary bridge */ - unsigned char secondary; /* number of secondary bridge */ - unsigned char subordinate; /* max number of subordinate buses */ + unsigned int number; /* bus number */ + unsigned int primary; /* number of primary bridge */ + unsigned int secondary; /* number of secondary bridge */ + unsigned int subordinate; /* max number of subordinate buses */ char name[48]; unsigned short vendor; diff -uNr --exclude=CVS ../kernel.org/linux/include/linux/proc_fs.h linuxppc64_2_4/include/linux/proc_fs.h --- ../kernel.org/linux/include/linux/proc_fs.h Tue Oct 23 23:59:16 2001 +++ linuxppc64_2_4/include/linux/proc_fs.h Thu Nov 15 00:12:50 2001 @@ -25,7 +25,11 @@ /* Finally, the dynamically allocatable proc entries are reserved: */ #define PROC_DYNAMIC_FIRST 4096 +#ifdef CONFIG_PPC64 +#define PROC_NDYNAMIC 16384 +#else #define PROC_NDYNAMIC 4096 +#endif #define PROC_SUPER_MAGIC 0x9fa0 diff -uNr --exclude=CVS ../kernel.org/linux/include/linux/vethdevice.h linuxppc64_2_4/include/linux/vethdevice.h --- ../kernel.org/linux/include/linux/vethdevice.h Wed Dec 31 18:00:00 1969 +++ linuxppc64_2_4/include/linux/vethdevice.h Tue Jun 19 11:06:08 2001 @@ -0,0 +1,16 @@ +/* File vethdevice.h created by Kyle A. Lucke on Wed Aug 9 2000. */ + +/* Change Activity: */ +/* End Change Activity */ + +#ifndef _LINUX_VETHDEVICE_H +#define _LINUX_VETHDEVICE_H + +#include + +#ifdef __KERNEL__ +extern struct net_device * init_vethdev(struct net_device *, int, int); +#endif + +#endif /* _LINUX_VETHDEVICE_H */ + diff -uNr --exclude=CVS ../kernel.org/linux/kernel/ptrace.c linuxppc64_2_4/kernel/ptrace.c --- ../kernel.org/linux/kernel/ptrace.c Tue Sep 18 18:32:16 2001 +++ linuxppc64_2_4/kernel/ptrace.c Wed Oct 24 00:47:27 2001 @@ -126,10 +126,11 @@ flush_cache_page(vma, addr); if (write) { - maddr = kmap(page); - memcpy(maddr + (addr & ~PAGE_MASK), buf, len); + maddr = kmap(page) + (addr & ~PAGE_MASK); + memcpy(maddr, buf, len); flush_page_to_ram(page); - flush_icache_page(vma, page); + flush_icache_range((unsigned long) maddr, + (unsigned long) maddr + len); kunmap(page); } else { maddr = kmap(page); diff -uNr --exclude=CVS ../kernel.org/linux/kernel/sched.c linuxppc64_2_4/kernel/sched.c --- ../kernel.org/linux/kernel/sched.c Wed Oct 17 16:14:37 2001 +++ linuxppc64_2_4/kernel/sched.c Thu Nov 15 00:11:31 2001 @@ -1176,8 +1176,8 @@ else printk(" (NOTLB)\n"); -#if defined(CONFIG_X86) || defined(CONFIG_SPARC64) || defined(CONFIG_ARM) || defined(CONFIG_ALPHA) -/* This is very useful, but only works on ARM, x86 and sparc64 right now */ +#if defined(CONFIG_X86) || defined(CONFIG_SPARC64) || defined(CONFIG_ARM) || defined(CONFIG_ALPHA) || defined(CONFIG_PPC64) +/* This is very useful, but doesn't work on all archs yet */ { extern void show_trace_task(struct task_struct *tsk); show_trace_task(p); diff -uNr --exclude=CVS ../kernel.org/linux/lib/brlock.c linuxppc64_2_4/lib/brlock.c --- ../kernel.org/linux/lib/brlock.c Fri Jun 29 21:38:26 2001 +++ linuxppc64_2_4/lib/brlock.c Thu Nov 15 00:10:51 2001 @@ -14,6 +14,7 @@ #include #include +#include #ifdef __BRLOCK_USE_ATOMICS @@ -54,6 +55,8 @@ if (__brlock_array[cpu_logical_map(i)][idx] != 0) { spin_unlock(&__br_write_locks[idx].lock); barrier(); + /* We must allow recursive readers to make progress */ + udelay(1); goto again; } } diff -uNr --exclude=CVS ../kernel.org/linux/mm/filemap.c linuxppc64_2_4/mm/filemap.c --- ../kernel.org/linux/mm/filemap.c Tue Oct 23 19:52:48 2001 +++ linuxppc64_2_4/mm/filemap.c Fri Oct 26 02:56:04 2001 @@ -202,7 +202,7 @@ static inline void truncate_partial_page(struct page *page, unsigned partial) { - memclear_highpage_flush(page, partial, PAGE_CACHE_SIZE-partial); + memclear_page_flush(page, partial, PAGE_CACHE_SIZE-partial); if (page->buffers) block_flushpage(page, partial); @@ -1689,7 +1689,7 @@ struct page *new_page = alloc_page(GFP_HIGHUSER); if (new_page) { - copy_user_highpage(new_page, old_page, address); + copy_user_page(new_page, old_page, address); flush_page_to_ram(new_page); } else new_page = NOPAGE_OOM; diff -uNr --exclude=CVS ../kernel.org/linux/mm/memory.c linuxppc64_2_4/mm/memory.c --- ../kernel.org/linux/mm/memory.c Mon Oct 15 14:09:50 2001 +++ linuxppc64_2_4/mm/memory.c Fri Oct 26 02:56:04 2001 @@ -62,10 +62,10 @@ static inline void copy_cow_page(struct page * from, struct page * to, unsigned long address) { if (from == ZERO_PAGE(address)) { - clear_user_highpage(to, address); + clear_user_page(to, address); return; } - copy_user_highpage(to, from, address); + copy_user_page(to, from, address); } mem_map_t * mem_map; @@ -1202,7 +1202,7 @@ page = alloc_page(GFP_HIGHUSER); if (!page) goto no_mem; - clear_user_highpage(page, addr); + clear_user_page(page, addr); spin_lock(&mm->page_table_lock); if (!pte_none(*page_table)) { diff -uNr --exclude=CVS ../kernel.org/linux/mm/page_alloc.c linuxppc64_2_4/mm/page_alloc.c --- ../kernel.org/linux/mm/page_alloc.c Tue Oct 23 23:40:32 2001 +++ linuxppc64_2_4/mm/page_alloc.c Fri Oct 26 02:56:04 2001 @@ -423,9 +423,8 @@ page = alloc_pages(gfp_mask, 0); if (page) { - void *address = page_address(page); - clear_page(address); - return (unsigned long) address; + clear_mem_page(page); + return (unsigned long) page_address(page); } return 0; } diff -uNr --exclude=CVS ../kernel.org/linux/mm/shmem.c linuxppc64_2_4/mm/shmem.c --- ../kernel.org/linux/mm/shmem.c Wed Oct 17 16:19:20 2001 +++ linuxppc64_2_4/mm/shmem.c Fri Oct 26 02:56:04 2001 @@ -581,7 +581,7 @@ page = page_cache_alloc(mapping); if (!page) return ERR_PTR(-ENOMEM); - clear_highpage(page); + clear_mem_page(page); inode->i_blocks += BLOCKS_PER_PAGE; add_to_page_cache (page, mapping, idx); } @@ -644,7 +644,7 @@ struct page *new_page = page_cache_alloc(inode->i_mapping); if (new_page) { - copy_user_highpage(new_page, page, address); + copy_user_page(new_page, page, address); flush_page_to_ram(new_page); } else new_page = NOPAGE_OOM;