diff -Nru a/Documentation/mmio_barrier.txt b/Documentation/mmio_barrier.txt --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/Documentation/mmio_barrier.txt Tue Sep 17 23:47:52 2002 @@ -0,0 +1,15 @@ +On some platforms, so-called memory-mapped I/O is weakly ordered. For +example, the following might occur: + +CPU A writes 0x1 to Device #1 +CPU B writes 0x2 to Device #1 +Device #1 sees 0x2 +Device #1 sees 0x1 + +On such platforms, driver writers are responsible for ensuring that I/O +writes to memory-mapped addresses on their device arrive in the order +intended. The mmiob() macro is provided for this purpose. A typical use +of this macro might be immediately prior to the exit of a critical +section of code proteced by spinlocks. This would ensure that subsequent +writes to I/O space arrived only after all prior writes (much like a +typical memory barrier op, mb(), only with respect to I/O). diff -Nru a/Makefile b/Makefile --- a/Makefile Tue Sep 17 23:47:51 2002 +++ b/Makefile Tue Sep 17 23:47:51 2002 @@ -235,7 +235,7 @@ CPPFLAGS := -D__KERNEL__ -I$(objtree)/include -CFLAGS := $(CPPFLAGS) -Wall -Wstrict-prototypes -Wno-trigraphs -O2 \ +CFLAGS := $(CPPFLAGS) -Wall -Wstrict-prototypes -Wno-trigraphs -g -O2 \ -fomit-frame-pointer -fno-strict-aliasing -fno-common AFLAGS := -D__ASSEMBLY__ $(CPPFLAGS) diff -Nru a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c --- a/arch/alpha/kernel/traps.c Tue Sep 17 23:47:51 2002 +++ b/arch/alpha/kernel/traps.c Tue Sep 17 23:47:51 2002 @@ -171,6 +171,11 @@ dik_show_trace(sp); } +void dump_stack(void) +{ + show_stack(NULL); +} + void die_if_kernel(char * str, struct pt_regs *regs, long err, unsigned long *r9_15) { diff -Nru a/arch/cris/kernel/traps.c b/arch/cris/kernel/traps.c --- a/arch/cris/kernel/traps.c Tue Sep 17 23:47:52 2002 +++ b/arch/cris/kernel/traps.c Tue Sep 17 23:47:52 2002 @@ -230,8 +230,12 @@ #endif } -/* This is normally the 'Oops' routine */ +void dump_stack(void) +{ + show_stack(NULL); +} +/* This is normally the 'Oops' routine */ void die_if_kernel(const char * str, struct pt_regs * regs, long err) { diff -Nru a/arch/i386/Config.help b/arch/i386/Config.help --- a/arch/i386/Config.help Tue Sep 17 23:47:51 2002 +++ b/arch/i386/Config.help Tue Sep 17 23:47:51 2002 @@ -25,6 +25,15 @@ If you don't know what to do here, say N. +CONFIG_HUGETLB_PAGE + This enables support for huge pages. User space applications + can make use of this support with the sys_alloc_hugepages and + sys_free_hugepages system calls. If your applications are + huge page aware and your processor (Pentium or later for x86) + supports this, then say Y here. + + Otherwise, say N. + CONFIG_PREEMPT This option reduces the latency of the kernel when reacting to real-time or interactive events by allowing a low priority process to diff -Nru a/arch/i386/config.in b/arch/i386/config.in --- a/arch/i386/config.in Tue Sep 17 23:47:51 2002 +++ b/arch/i386/config.in Tue Sep 17 23:47:51 2002 @@ -154,6 +154,8 @@ define_bool CONFIG_X86_OOSTORE y fi +bool 'IA-32 Huge TLB Page Support (if available on processor)' CONFIG_HUGETLB_PAGE + bool 'Symmetric multi-processing support' CONFIG_SMP bool 'Preemptible Kernel' CONFIG_PREEMPT if [ "$CONFIG_SMP" != "y" ]; then diff -Nru a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S --- a/arch/i386/kernel/entry.S Tue Sep 17 23:47:51 2002 +++ b/arch/i386/kernel/entry.S Tue Sep 17 23:47:51 2002 @@ -759,8 +759,8 @@ .long sys_io_getevents .long sys_io_submit .long sys_io_cancel - .long sys_ni_syscall /* 250 */ /* sys_alloc_hugepages */ - .long sys_ni_syscall /* sys_free_hugepages */ + .long sys_alloc_hugepages /* 250 */ + .long sys_free_hugepages .long sys_exit_group .rept NR_syscalls-(.-sys_call_table)/4 diff -Nru a/arch/i386/kernel/sys_i386.c b/arch/i386/kernel/sys_i386.c --- a/arch/i386/kernel/sys_i386.c Tue Sep 17 23:47:51 2002 +++ b/arch/i386/kernel/sys_i386.c Tue Sep 17 23:47:51 2002 @@ -246,3 +246,94 @@ return error; } + +#ifdef CONFIG_HUGETLB_PAGE +#define HPAGE_ALIGN(x) (((unsigned long)x + (HPAGE_SIZE -1)) & HPAGE_MASK) +extern long sys_munmap(unsigned long, size_t); +/* get_addr function gets the currently unused virtaul range in + * current process's address space. It returns the LARGE_PAGE_SIZE + * aligned address (in cases of success). Other kernel generic + * routines only could gurantee that allocated address is PAGE_SIZSE aligned. + */ +static unsigned long +get_addr(unsigned long addr, unsigned long len) +{ + struct vm_area_struct *vma; + if (addr) { + addr = HPAGE_ALIGN(addr); + vma = find_vma(current->mm, addr); + if (((TASK_SIZE - len) >= addr) && + (!vma || addr + len <= vma->vm_start)) + goto found_addr; + } + addr = HPAGE_ALIGN(TASK_UNMAPPED_BASE); + for (vma = find_vma(current->mm, addr); ; vma = vma->vm_next) { + if (TASK_SIZE - len < addr) + return -ENOMEM; + if (!vma || ((addr + len) < vma->vm_start)) + goto found_addr; + addr = vma->vm_end; + } +found_addr: + addr = HPAGE_ALIGN(addr); + return addr; +} + +asmlinkage unsigned long +sys_alloc_hugepages(int key, unsigned long addr, unsigned long len, int prot, int flag) +{ + struct mm_struct *mm = current->mm; + unsigned long raddr; + int retval = 0; + extern int alloc_hugetlb_pages(int, unsigned long, unsigned long, int, int); + if (!(cpu_has_pse)) + return -EINVAL; + if (key < 0) + return -EINVAL; + if (len & (HPAGE_SIZE - 1)) + return -EINVAL; + down_write(&mm->mmap_sem); + raddr = get_addr(addr, len); + if (raddr == -ENOMEM) + goto raddr_out; + retval = alloc_hugetlb_pages(key, raddr, len, prot, flag); + +raddr_out: up_write(&mm->mmap_sem); + if (retval < 0) + return (unsigned long) retval; + return raddr; +} + +asmlinkage int +sys_free_hugepages(unsigned long addr) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int retval; + extern int free_hugepages(struct vm_area_struct *); + + vma = find_vma(current->mm, addr); + if ((!vma) || (!is_vm_hugetlb_page(vma)) || (vma->vm_start!=addr)) + return -EINVAL; + down_write(&mm->mmap_sem); + spin_lock(&mm->page_table_lock); + retval = free_hugepages(vma); + spin_unlock(&mm->page_table_lock); + up_write(&mm->mmap_sem); + return retval; +} + +#else + +asmlinkage unsigned long +sys_alloc_hugepages(int key, unsigned long addr, size_t len, int prot, int flag) +{ + return -ENOSYS; +} +asmlinkage int +sys_free_hugepages(unsigned long addr) +{ + return -ENOSYS; +} + +#endif diff -Nru a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c --- a/arch/i386/kernel/traps.c Tue Sep 17 23:47:51 2002 +++ b/arch/i386/kernel/traps.c Tue Sep 17 23:47:51 2002 @@ -189,6 +189,14 @@ show_trace(esp); } +/* + * The architecture-independent dump_stack generator + */ +void dump_stack(void) +{ + show_stack(0); +} + void show_registers(struct pt_regs *regs) { int i; diff -Nru a/arch/i386/mm/Makefile b/arch/i386/mm/Makefile --- a/arch/i386/mm/Makefile Tue Sep 17 23:47:51 2002 +++ b/arch/i386/mm/Makefile Tue Sep 17 23:47:51 2002 @@ -12,5 +12,6 @@ obj-y := init.o pgtable.o fault.o ioremap.o extable.o pageattr.o obj-$(CONFIG_DISCONTIGMEM) += discontig.o export-objs := pageattr.o +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o include $(TOPDIR)/Rules.make diff -Nru a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c --- a/arch/i386/mm/fault.c Tue Sep 17 23:47:51 2002 +++ b/arch/i386/mm/fault.c Tue Sep 17 23:47:51 2002 @@ -28,8 +28,6 @@ extern void die(const char *,struct pt_regs *,long); -extern int console_loglevel; - /* * Ugly, ugly, but the goto's result in better assembly.. */ diff -Nru a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/arch/i386/mm/hugetlbpage.c Tue Sep 17 23:47:52 2002 @@ -0,0 +1,545 @@ +/* + * IA-32 Huge TLB Page Support for Kernel. + * + * Copyright (C) 2002, Rohit Seth + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +static struct vm_operations_struct hugetlb_vm_ops; +struct list_head htlbpage_freelist; +spinlock_t htlbpage_lock = SPIN_LOCK_UNLOCKED; +extern long htlbpagemem; + +void zap_hugetlb_resources(struct vm_area_struct *); + +#define MAX_ID 32 +struct htlbpagekey { + struct inode *in; + int key; +} htlbpagek[MAX_ID]; + +static struct inode * +find_key_inode(int key) +{ + int i; + + for (i = 0; i < MAX_ID; i++) { + if (htlbpagek[i].key == key) + return (htlbpagek[i].in); + } + return NULL; +} +static struct page * +alloc_hugetlb_page(void) +{ + struct list_head *curr, *head; + struct page *page; + + spin_lock(&htlbpage_lock); + + head = &htlbpage_freelist; + curr = head->next; + + if (curr == head) { + spin_unlock(&htlbpage_lock); + return NULL; + } + page = list_entry(curr, struct page, list); + list_del(curr); + htlbpagemem--; + spin_unlock(&htlbpage_lock); + set_page_count(page, 1); + memset(page_address(page), 0, HPAGE_SIZE); + return page; +} + +static void +free_hugetlb_page(struct page *page) +{ + spin_lock(&htlbpage_lock); + if ((page->mapping != NULL) && (page_count(page) == 2)) { + struct inode *inode = page->mapping->host; + int i; + + ClearPageDirty(page); + remove_from_page_cache(page); + set_page_count(page, 1); + if ((inode->i_size -= HPAGE_SIZE) == 0) { + for (i = 0; i < MAX_ID; i++) + if (htlbpagek[i].key == inode->i_ino) { + htlbpagek[i].key = 0; + htlbpagek[i].in = NULL; + break; + } + kfree(inode); + } + } + if (put_page_testzero(page)) { + list_add(&page->list, &htlbpage_freelist); + htlbpagemem++; + } + spin_unlock(&htlbpage_lock); +} + +static pte_t * +huge_pte_alloc(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + pmd_t *pmd = NULL; + + pgd = pgd_offset(mm, addr); + pmd = pmd_alloc(mm, pgd, addr); + return (pte_t *) pmd; +} + +static pte_t * +huge_pte_offset(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + pmd_t *pmd = NULL; + + pgd = pgd_offset(mm, addr); + pmd = pmd_offset(pgd, addr); + return (pte_t *) pmd; +} + +#define mk_pte_huge(entry) {entry.pte_low |= (_PAGE_PRESENT | _PAGE_PSE);} + +static void +set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma, + struct page *page, pte_t * page_table, int write_access) +{ + pte_t entry; + + mm->rss += (HPAGE_SIZE / PAGE_SIZE); + if (write_access) { + entry = + pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); + } else + entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot)); + entry = pte_mkyoung(entry); + mk_pte_huge(entry); + set_pte(page_table, entry); + return; +} + +static int +anon_get_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, + int write_access, pte_t * page_table) +{ + struct page *page; + + page = alloc_hugetlb_page(); + if (page == NULL) + return -1; + set_huge_pte(mm, vma, page, page_table, write_access); + return 1; +} + +int +make_hugetlb_pages_present(unsigned long addr, unsigned long end, int flags) +{ + int write; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + pte_t *pte; + + vma = find_vma(mm, addr); + if (!vma) + goto out_error1; + + write = (vma->vm_flags & VM_WRITE) != 0; + if ((vma->vm_end - vma->vm_start) & (HPAGE_SIZE - 1)) + goto out_error1; + spin_lock(&mm->page_table_lock); + do { + pte = huge_pte_alloc(mm, addr); + if ((pte) && (pte_none(*pte))) { + if (anon_get_hugetlb_page(mm, vma, + write ? VM_WRITE : VM_READ, + pte) == -1) + goto out_error; + } else + goto out_error; + addr += HPAGE_SIZE; + } while (addr < end); + spin_unlock(&mm->page_table_lock); + vma->vm_flags |= (VM_HUGETLB | VM_RESERVED); + if (flags & MAP_PRIVATE) + vma->vm_flags |= VM_DONTCOPY; + vma->vm_ops = &hugetlb_vm_ops; + return 0; +out_error: /* Error case, remove the partial lp_resources. */ + if (addr > vma->vm_start) { + vma->vm_end = addr; + zap_hugetlb_resources(vma); + vma->vm_end = end; + } + spin_unlock(&mm->page_table_lock); + out_error1: + return -1; +} + +int +copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, + struct vm_area_struct *vma) +{ + pte_t *src_pte, *dst_pte, entry; + struct page *ptepage; + unsigned long addr = vma->vm_start; + unsigned long end = vma->vm_end; + + while (addr < end) { + dst_pte = huge_pte_alloc(dst, addr); + if (!dst_pte) + goto nomem; + src_pte = huge_pte_offset(src, addr); + entry = *src_pte; + ptepage = pte_page(entry); + get_page(ptepage); + set_pte(dst_pte, entry); + dst->rss += (HPAGE_SIZE / PAGE_SIZE); + addr += HPAGE_SIZE; + } + return 0; + +nomem: + return -ENOMEM; +} + +int +follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, + struct page **pages, struct vm_area_struct **vmas, + unsigned long *st, int *length, int i) +{ + pte_t *ptep, pte; + unsigned long start = *st; + unsigned long pstart; + int len = *length; + struct page *page; + + do { + pstart = start; + ptep = huge_pte_offset(mm, start); + pte = *ptep; + +back1: + page = pte_page(pte); + if (pages) { + page += ((start & ~HPAGE_MASK) >> PAGE_SHIFT); + pages[i] = page; + } + if (vmas) + vmas[i] = vma; + i++; + len--; + start += PAGE_SIZE; + if (((start & HPAGE_MASK) == pstart) && len && + (start < vma->vm_end)) + goto back1; + } while (len && start < vma->vm_end); + *length = len; + *st = start; + return i; +} + +void +zap_hugetlb_resources(struct vm_area_struct *mpnt) +{ + struct mm_struct *mm = mpnt->vm_mm; + unsigned long len, addr, end; + pte_t *ptep; + struct page *page; + + addr = mpnt->vm_start; + end = mpnt->vm_end; + len = end - addr; + do { + ptep = huge_pte_offset(mm, addr); + page = pte_page(*ptep); + pte_clear(ptep); + free_hugetlb_page(page); + addr += HPAGE_SIZE; + } while (addr < end); + mm->rss -= (len >> PAGE_SHIFT); + mpnt->vm_ops = NULL; + flush_tlb_range(mpnt, end - len, end); +} + +static void +unlink_vma(struct vm_area_struct *mpnt) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + + vma = mm->mmap; + if (vma == mpnt) { + mm->mmap = vma->vm_next; + } else { + while (vma->vm_next != mpnt) { + vma = vma->vm_next; + } + vma->vm_next = mpnt->vm_next; + } + rb_erase(&mpnt->vm_rb, &mm->mm_rb); + mm->mmap_cache = NULL; + mm->map_count--; +} + +int +free_hugepages(struct vm_area_struct *mpnt) +{ + unlink_vma(mpnt); + zap_hugetlb_resources(mpnt); + kmem_cache_free(vm_area_cachep, mpnt); + return 1; +} + +static struct inode * +set_new_inode(unsigned long len, int prot, int flag, int key) +{ + struct inode *inode; + int i; + + for (i = 0; i < MAX_ID; i++) { + if (htlbpagek[i].key == 0) + break; + } + if (i == MAX_ID) + return NULL; + inode = kmalloc(sizeof (struct inode), GFP_KERNEL); + if (inode == NULL) + return NULL; + + inode_init_once(inode); + atomic_inc(&inode->i_writecount); + inode->i_mapping = &inode->i_data; + inode->i_mapping->host = inode; + inode->i_ino = (unsigned long)key; + + htlbpagek[i].key = key; + htlbpagek[i].in = inode; + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_mode = prot; + inode->i_size = len; + return inode; +} + +static int +check_size_prot(struct inode *inode, unsigned long len, int prot, int flag) +{ + if (inode->i_uid != current->fsuid) + return -1; + if (inode->i_gid != current->fsgid) + return -1; + if (inode->i_size != len) + return -1; + return 0; +} + +static int +alloc_shared_hugetlb_pages(int key, unsigned long addr, unsigned long len, + int prot, int flag) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + struct inode *inode; + struct address_space *mapping; + struct page *page; + int idx; + int retval = -ENOMEM; + int newalloc = 0; + +try_again: + spin_lock(&htlbpage_lock); + + inode = find_key_inode(key); + if (inode == NULL) { + if (!capable(CAP_SYS_ADMIN)) { + if (!in_group_p(0)) { + retval = -EPERM; + goto out_err; + } + } + if (!(flag & IPC_CREAT)) { + retval = -ENOENT; + goto out_err; + } + inode = set_new_inode(len, prot, flag, key); + if (inode == NULL) + goto out_err; + newalloc = 1; + } else { + if (check_size_prot(inode, len, prot, flag) < 0) { + retval = -EINVAL; + goto out_err; + } + else if (atomic_read(&inode->i_writecount)) { + spin_unlock(&htlbpage_lock); + goto try_again; + } + } + spin_unlock(&htlbpage_lock); + mapping = inode->i_mapping; + + addr = do_mmap_pgoff(NULL, addr, len, (unsigned long) prot, + MAP_NORESERVE|MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, 0); + if (IS_ERR((void *) addr)) + goto freeinode; + + vma = find_vma(mm, addr); + if (!vma) { + retval = -EINVAL; + goto freeinode; + } + + spin_lock(&mm->page_table_lock); + do { + pte_t *pte = huge_pte_alloc(mm, addr); + if ((pte) && (pte_none(*pte))) { + idx = (addr - vma->vm_start) >> HPAGE_SHIFT; + page = find_get_page(mapping, idx); + if (page == NULL) { + page = alloc_hugetlb_page(); + if (page == NULL) + goto out; + add_to_page_cache(page, mapping, idx); + } + set_huge_pte(mm, vma, page, pte, + (vma->vm_flags & VM_WRITE)); + } else + goto out; + addr += HPAGE_SIZE; + } while (addr < vma->vm_end); + retval = 0; + vma->vm_flags |= (VM_HUGETLB | VM_RESERVED); + vma->vm_ops = &hugetlb_vm_ops; + spin_unlock(&mm->page_table_lock); + spin_lock(&htlbpage_lock); + atomic_set(&inode->i_writecount, 0); + spin_unlock(&htlbpage_lock); + return retval; +out: + if (addr > vma->vm_start) { + unsigned long raddr; + raddr = vma->vm_end; + vma->vm_end = addr; + zap_hugetlb_resources(vma); + vma->vm_end = raddr; + } + spin_unlock(&mm->page_table_lock); + do_munmap(mm, vma->vm_start, len); + if (newalloc) + goto freeinode; + return retval; +out_err: spin_unlock(&htlbpage_lock); +freeinode: + if (newalloc) { + for(idx=0;idxi_ino) { + htlbpagek[idx].key = 0; + htlbpagek[idx].in = NULL; + break; + } + kfree(inode); + } + return retval; +} + +static int +alloc_private_hugetlb_pages(int key, unsigned long addr, unsigned long len, + int prot, int flag) +{ + if (!capable(CAP_SYS_ADMIN)) { + if (!in_group_p(0)) + return -EPERM; + } + addr = do_mmap_pgoff(NULL, addr, len, prot, + MAP_NORESERVE|MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, 0); + if (IS_ERR((void *) addr)) + return -ENOMEM; + if (make_hugetlb_pages_present(addr, (addr + len), flag) < 0) { + do_munmap(current->mm, addr, len); + return -ENOMEM; + } + return 0; +} + +int +alloc_hugetlb_pages(int key, unsigned long addr, unsigned long len, int prot, + int flag) +{ + if (key > 0) + return alloc_shared_hugetlb_pages(key, addr, len, prot, flag); + return alloc_private_hugetlb_pages(key, addr, len, prot, flag); +} + +int +set_hugetlb_mem_size(int count) +{ + int j, lcount; + struct page *page, *map; + extern long htlbzone_pages; + extern struct list_head htlbpage_freelist; + + if (count < 0) + lcount = count; + else + lcount = count - htlbzone_pages; + + if (lcount > 0) { /* Increase the mem size. */ + while (lcount--) { + page = alloc_pages(GFP_ATOMIC, HUGETLB_PAGE_ORDER); + if (page == NULL) + break; + map = page; + for (j = 0; j < (HPAGE_SIZE / PAGE_SIZE); j++) { + SetPageReserved(map); + map++; + } + spin_lock(&htlbpage_lock); + list_add(&page->list, &htlbpage_freelist); + htlbpagemem++; + htlbzone_pages++; + spin_unlock(&htlbpage_lock); + } + return (int) htlbzone_pages; + } + /* Shrink the memory size. */ + while (lcount++) { + page = alloc_hugetlb_page(); + if (page == NULL) + break; + spin_lock(&htlbpage_lock); + htlbzone_pages--; + spin_unlock(&htlbpage_lock); + map = page; + for (j = 0; j < (HPAGE_SIZE / PAGE_SIZE); j++) { + map->flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced | + 1 << PG_dirty | 1 << PG_active | 1 << PG_reserved | + 1 << PG_private | 1<< PG_writeback); + set_page_count(page, 0); + map++; + } + set_page_count(page, 1); + __free_pages(page, HUGETLB_PAGE_ORDER); + } + return (int) htlbzone_pages; +} + +static struct vm_operations_struct hugetlb_vm_ops = { + .close = zap_hugetlb_resources, +}; diff -Nru a/arch/i386/mm/init.c b/arch/i386/mm/init.c --- a/arch/i386/mm/init.c Tue Sep 17 23:47:51 2002 +++ b/arch/i386/mm/init.c Tue Sep 17 23:47:51 2002 @@ -215,19 +215,14 @@ void __init one_highpage_init(struct page *page, int pfn, int bad_ppro) { - if (!page_is_ram(pfn)) { + if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) { + ClearPageReserved(page); + set_bit(PG_highmem, &page->flags); + set_page_count(page, 1); + __free_page(page); + totalhigh_pages++; + } else SetPageReserved(page); - return; - } - if (bad_ppro && page_kills_ppro(pfn)) { - SetPageReserved(page); - return; - } - ClearPageReserved(page); - set_bit(PG_highmem, &page->flags); - atomic_set(&page->count, 1); - __free_page(page); - totalhigh_pages++; } #ifndef CONFIG_DISCONTIGMEM @@ -431,6 +426,13 @@ extern void set_max_mapnr_init(void); #endif /* !CONFIG_DISCONTIGMEM */ +#ifdef CONFIG_HUGETLB_PAGE +long htlbpagemem = 0; +int htlbpage_max; +long htlbzone_pages; +extern struct list_head htlbpage_freelist; +#endif + void __init mem_init(void) { extern int ppro_with_ram_bug(void); @@ -492,6 +494,30 @@ */ #ifndef CONFIG_SMP zap_low_mappings(); +#endif +#ifdef CONFIG_HUGETLB_PAGE + { + long i, j; + struct page *page, *map; + /*For now reserve quarter for hugetlb_pages.*/ + htlbzone_pages = (max_low_pfn >> ((HPAGE_SHIFT - PAGE_SHIFT) + 2)) ; + /*Will make this kernel command line. */ + INIT_LIST_HEAD(&htlbpage_freelist); + for (i=0; ilist, &htlbpage_freelist); + } + printk("Total Huge_TLB_Page memory pages allocated %ld\n", i); + htlbzone_pages = htlbpagemem = i; + htlbpage_max = i; + } #endif } diff -Nru a/arch/ia64/Makefile b/arch/ia64/Makefile --- a/arch/ia64/Makefile Tue Sep 17 23:47:51 2002 +++ b/arch/ia64/Makefile Tue Sep 17 23:47:51 2002 @@ -104,30 +104,14 @@ rawboot: @$(MAKEBOOT) rawboot -# -# My boot writes directly to a specific disk partition, I doubt most -# people will want to do that without changes.. -# -msb my-special-boot: - @$(MAKEBOOT) msb - -bootimage: - @$(MAKEBOOT) bootimage - -srmboot: - @$(MAKEBOOT) srmboot - archclean: @$(MAKEBOOT) clean archmrproper: @$(MAKE) -C arch/$(ARCH)/tools mrproper -bootpfile: - @$(MAKEBOOT) bootpfile - prepare: $(TOPDIR)/include/asm-ia64/offsets.h $(TOPDIR)/include/asm-ia64/offsets.h: include/asm include/linux/version.h \ include/config/MARKER - @$(MAKE) -C arch/$(ARCH)/tools $@ \ No newline at end of file + @$(MAKE) -C arch/$(ARCH)/tools $@ diff -Nru a/arch/ia64/config.in b/arch/ia64/config.in --- a/arch/ia64/config.in Tue Sep 17 23:47:52 2002 +++ b/arch/ia64/config.in Tue Sep 17 23:47:52 2002 @@ -86,6 +86,31 @@ define_bool CONFIG_KCORE_ELF y # On IA-64, we always want an ELF /proc/kcore. +define_int CONFIG_FORCE_MAX_ZONEORDER 18 + +bool 'IA-64 Huge TLB Page Support' CONFIG_HUGETLB_PAGE + +if [ "$CONFIG_HUGETLB_PAGE" = "y" ]; then + if [ "$CONFIG_MCKINLEY" = "y" ]; then + choice ' IA-64 Huge TLB Page Size' \ + "4GB CONFIG_HUGETLB_PAGE_SIZE_4GB \ + 256MB CONFIG_HUGETLB_PAGE_SIZE_256MB \ + 64MB CONFIG_HUGETLB_PAGE_SIZE_64MB \ + 16MB CONFIG_HUGETLB_PAGE_SIZE_16MB \ + 4MB CONFIG_HUGETLB_PAGE_SIZE_4MB \ + 1MB CONFIG_HUGETLB_PAGE_SIZE_1MB \ + 256KB CONFIG_HUGETLB_PAGE_SIZE_256KB" 16MB + else + choice ' IA-64 Huge TLB Page Size' \ + "256MB CONFIG_HUGETLB_PAGE_SIZE_256MB \ + 64MB CONFIG_HUGETLB_PAGE_SIZE_64MB \ + 16MB CONFIG_HUGETLB_PAGE_SIZE_16MB \ + 4MB CONFIG_HUGETLB_PAGE_SIZE_4MB \ + 1MB CONFIG_HUGETLB_PAGE_SIZE_1MB \ + 256KB CONFIG_HUGETLB_PAGE_SIZE_256KB" 16MB + fi +fi + bool 'SMP support' CONFIG_SMP bool 'Support running of Linux/x86 binaries' CONFIG_IA32_SUPPORT bool 'Performance monitor support' CONFIG_PERFMON diff -Nru a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c --- a/arch/ia64/hp/common/sba_iommu.c Tue Sep 17 23:47:51 2002 +++ b/arch/ia64/hp/common/sba_iommu.c Tue Sep 17 23:47:51 2002 @@ -39,9 +39,7 @@ #define DRIVER_NAME "SBA" -#ifndef CONFIG_IA64_HP_PROTO #define ALLOW_IOV_BYPASS -#endif #define ENABLE_MARK_CLEAN /* ** The number of debug flags is a clue - this code is fragile. @@ -1252,10 +1250,6 @@ ** Firmware programs the maximum IOV space size into the imask reg */ iova_space_size = ~(READ_REG(ioc->ioc_hpa + IOC_IMASK) & 0xFFFFFFFFUL) + 1; -#ifdef CONFIG_IA64_HP_PROTO - if (!iova_space_size) - iova_space_size = GB(1); -#endif /* ** iov_order is always based on a 1GB IOVA space since we want to @@ -1625,10 +1619,8 @@ device->slot_name, hpa); if ((hw_rev & 0xFF) < 0x20) { - printk(KERN_INFO "%s WARNING rev 2.0 or greater will be required for IO MMU support in the future\n", DRIVER_NAME); -#ifndef CONFIG_IA64_HP_PROTO - panic("%s: CONFIG_IA64_HP_PROTO MUST be enabled to support SBA rev less than 2.0", DRIVER_NAME); -#endif + printk("%s: SBA rev less than 2.0 not supported", DRIVER_NAME); + return; } sba_dev = kmalloc(sizeof(struct sba_device), GFP_KERNEL); diff -Nru a/arch/ia64/hp/zx1/hpzx1_misc.c b/arch/ia64/hp/zx1/hpzx1_misc.c --- a/arch/ia64/hp/zx1/hpzx1_misc.c Tue Sep 17 23:47:51 2002 +++ b/arch/ia64/hp/zx1/hpzx1_misc.c Tue Sep 17 23:47:51 2002 @@ -33,60 +33,73 @@ static struct pci_ops *orig_pci_ops; -#define HP_CFG_RD(sz, bits, name) \ -static int hp_cfg_read##sz (struct pci_dev *dev, int where, u##bits *value) \ -{ \ - struct fake_pci_dev *fake_dev; \ - if (!(fake_dev = (struct fake_pci_dev *) dev->sysdata)) \ - return orig_pci_ops->name(dev, where, value); \ - \ - if (where == PCI_BASE_ADDRESS_0) { \ - if (fake_dev->sizing) \ - *value = ~(fake_dev->csr_size - 1); \ - else \ - *value = (fake_dev->csr_base & \ - PCI_BASE_ADDRESS_MEM_MASK) | \ - PCI_BASE_ADDRESS_SPACE_MEMORY; \ - fake_dev->sizing = 0; \ - return PCIBIOS_SUCCESSFUL; \ - } \ - *value = read##sz(fake_dev->mapped_csrs + where); \ - if (where == PCI_COMMAND) \ - *value |= PCI_COMMAND_MEMORY; /* SBA omits this */ \ - return PCIBIOS_SUCCESSFUL; \ +struct fake_pci_dev * +lookup_fake_dev (struct pci_bus *bus, unsigned int devfn) +{ + struct pci_dev *dev; + list_for_each_entry(dev, &bus->devices, bus_list) + if (dev->devfn == devfn) + return (struct fake_pci_dev *) dev->sysdata; + return NULL; } -#define HP_CFG_WR(sz, bits, name) \ -static int hp_cfg_write##sz (struct pci_dev *dev, int where, u##bits value) \ -{ \ - struct fake_pci_dev *fake_dev; \ - \ - if (!(fake_dev = (struct fake_pci_dev *) dev->sysdata)) \ - return orig_pci_ops->name(dev, where, value); \ - \ - if (where == PCI_BASE_ADDRESS_0) { \ - if (value == (u##bits) ~0) \ - fake_dev->sizing = 1; \ - return PCIBIOS_SUCCESSFUL; \ - } else \ - write##sz(value, fake_dev->mapped_csrs + where); \ - return PCIBIOS_SUCCESSFUL; \ +static int +hp_cfg_read (struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value) +{ + struct fake_pci_dev *fake_dev = lookup_fake_dev(bus, devfn); + + if (!fake_dev) + return (*orig_pci_ops->read)(bus, devfn, where, size, value); + + if (where == PCI_BASE_ADDRESS_0) { + if (fake_dev->sizing) + *value = ~(fake_dev->csr_size - 1); + else + *value = ((fake_dev->csr_base & PCI_BASE_ADDRESS_MEM_MASK) + | PCI_BASE_ADDRESS_SPACE_MEMORY); + fake_dev->sizing = 0; + return PCIBIOS_SUCCESSFUL; + } + switch (size) { + case 1: *value = readb(fake_dev->mapped_csrs + where); break; + case 2: *value = readw(fake_dev->mapped_csrs + where); break; + case 4: *value = readl(fake_dev->mapped_csrs + where); break; + default: + printk(KERN_WARNING"hp_cfg_read: bad size = %d bytes", size); + break; + } + if (where == PCI_COMMAND) + *value |= PCI_COMMAND_MEMORY; /* SBA omits this */ + return PCIBIOS_SUCCESSFUL; } -HP_CFG_RD(b, 8, read_byte) -HP_CFG_RD(w, 16, read_word) -HP_CFG_RD(l, 32, read_dword) -HP_CFG_WR(b, 8, write_byte) -HP_CFG_WR(w, 16, write_word) -HP_CFG_WR(l, 32, write_dword) +static int +hp_cfg_write (struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value) +{ + struct fake_pci_dev *fake_dev = lookup_fake_dev(bus, devfn); + + if (!fake_dev) + return (*orig_pci_ops->write)(bus, devfn, where, size, value); + + if (where == PCI_BASE_ADDRESS_0) { + if (value == ((1UL << 8*size) - 1)) + fake_dev->sizing = 1; + return PCIBIOS_SUCCESSFUL; + } + switch (size) { + case 1: writeb(value, fake_dev->mapped_csrs + where); break; + case 2: writew(value, fake_dev->mapped_csrs + where); break; + case 4: writel(value, fake_dev->mapped_csrs + where); break; + default: + printk(KERN_WARNING"hp_cfg_write: bad size = %d bytes", size); + break; + } + return PCIBIOS_SUCCESSFUL; +} static struct pci_ops hp_pci_conf = { - hp_cfg_readb, - hp_cfg_readw, - hp_cfg_readl, - hp_cfg_writeb, - hp_cfg_writew, - hp_cfg_writel, + .read = hp_cfg_read, + .write = hp_cfg_write }; static void @@ -309,40 +322,8 @@ * HWP0003: AGP LBA device */ acpi_get_devices("HWP0001", hpzx1_sba_probe, "HWP0001", NULL); -#ifdef CONFIG_IA64_HP_PROTO - if (hpzx1_devices) { -#endif acpi_get_devices("HWP0002", hpzx1_lba_probe, "HWP0002 PCI LBA", NULL); acpi_get_devices("HWP0003", hpzx1_lba_probe, "HWP0003 AGP LBA", NULL); - -#ifdef CONFIG_IA64_HP_PROTO - } - -#define ZX1_FUNC_ID_VALUE (PCI_DEVICE_ID_HP_ZX1_SBA << 16) | PCI_VENDOR_ID_HP - /* - * Early protos don't have bridges in the ACPI namespace, so - * if we didn't find anything, add the things we know are - * there. - */ - if (hpzx1_devices == 0) { - u64 hpa, csr_base; - - csr_base = 0xfed00000UL; - hpa = (u64) ioremap(csr_base, 0x2000); - if (__raw_readl(hpa) == ZX1_FUNC_ID_VALUE) { - hpzx1_fake_pci_dev("HWP0001 SBA", 0, csr_base, 0x1000); - hpzx1_fake_pci_dev("HWP0001 IOC", 0, csr_base + 0x1000, - 0x1000); - - csr_base = 0xfed24000UL; - iounmap(hpa); - hpa = (u64) ioremap(csr_base, 0x1000); - hpzx1_fake_pci_dev("HWP0003 AGP LBA", 0x40, csr_base, - 0x1000); - } - iounmap(hpa); - } -#endif } extern void sba_init(void); diff -Nru a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c --- a/arch/ia64/ia32/sys_ia32.c Tue Sep 17 23:47:51 2002 +++ b/arch/ia64/ia32/sys_ia32.c Tue Sep 17 23:47:51 2002 @@ -2111,8 +2111,8 @@ }; struct ipc_kludge { - struct msgbuf *msgp; - long msgtyp; + u32 msgp; + s32 msgtyp; }; #define SEMOP 1 diff -Nru a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c --- a/arch/ia64/kernel/efi.c Tue Sep 17 23:47:51 2002 +++ b/arch/ia64/kernel/efi.c Tue Sep 17 23:47:51 2002 @@ -33,6 +33,15 @@ #define EFI_DEBUG 0 +#ifdef CONFIG_HUGETLB_PAGE + +/* By default at total of 512MB is reserved huge pages. */ +#define HTLBZONE_SIZE_DEFAULT 0x20000000 + +unsigned long htlbzone_pages = (HTLBZONE_SIZE_DEFAULT >> HPAGE_SHIFT); + +#endif + extern efi_status_t efi_call_phys (void *, ...); struct efi efi; @@ -399,6 +408,25 @@ ++cp; } } +#ifdef CONFIG_HUGETLB_PAGE + /* Just duplicating the above algo for lpzone start */ + for (cp = saved_command_line; *cp; ) { + if (memcmp(cp, "lpmem=", 8) == 0) { + cp += 8; + htlbzone_pages = memparse(cp, &end); + htlbzone_pages = (htlbzone_pages >> HPAGE_SHIFT); + if (end != cp) + break; + cp = end; + } else { + while (*cp != ' ' && *cp) + ++cp; + while (*cp == ' ') + ++cp; + } + } + printk("Total HugeTLB_Page memory pages requested 0x%lx \n", htlbzone_pages); +#endif if (mem_limit != ~0UL) printk("Ignoring memory above %luMB\n", mem_limit >> 20); diff -Nru a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S --- a/arch/ia64/kernel/entry.S Tue Sep 17 23:47:51 2002 +++ b/arch/ia64/kernel/entry.S Tue Sep 17 23:47:51 2002 @@ -90,15 +90,23 @@ br.ret.sptk.many rp END(ia64_execve) +/* + * sys_clone2(u64 flags, u64 ustack_base, u64 ustack_size, u64 user_tid, u64 tls) + */ GLOBAL_ENTRY(sys_clone2) .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2) - alloc r16=ar.pfs,3,2,4,0 + alloc r16=ar.pfs,5,2,5,0 DO_SAVE_SWITCH_STACK + adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp mov loc0=rp mov loc1=r16 // save ar.pfs across do_fork .body mov out1=in1 mov out3=in2 + tbit.nz p6,p0=in0,CLONE_SETTLS_BIT + mov out4=in3 // valid only w/CLONE_SETTID and/or CLONE_CLEARTID + ;; +(p6) st8 [r2]=in4 // store TLS in r13 (tp) adds out2=IA64_SWITCH_STACK_SIZE+16,sp // out2 = ®s dep out0=0,in0,CLONE_IDLETASK_BIT,1 // out0 = clone_flags & ~CLONE_IDLETASK br.call.sptk.many rp=do_fork @@ -115,15 +123,24 @@ br.ret.sptk.many rp END(sys_clone2) +/* + * sys_clone(u64 flags, u64 ustack_base, u64 user_tid, u64 tls) + * Deprecated. Use sys_clone2() instead. + */ GLOBAL_ENTRY(sys_clone) .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2) - alloc r16=ar.pfs,2,2,4,0 + alloc r16=ar.pfs,4,2,5,0 DO_SAVE_SWITCH_STACK + adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp mov loc0=rp mov loc1=r16 // save ar.pfs across do_fork .body mov out1=in1 mov out3=16 // stacksize (compensates for 16-byte scratch area) + tbit.nz p6,p0=in0,CLONE_SETTLS_BIT + mov out4=in2 // out4 = user_tid (optional) + ;; +(p6) st8 [r2]=in3 // store TLS in r13 (tp) adds out2=IA64_SWITCH_STACK_SIZE+16,sp // out2 = ®s dep out0=0,in0,CLONE_IDLETASK_BIT,1 // out0 = clone_flags & ~CLONE_IDLETASK br.call.sptk.many rp=do_fork @@ -521,7 +538,7 @@ #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) /* * We need to call schedule_tail() to complete the scheduling process. - * Called by ia64_switch_to after do_fork()->copy_thread(). r8 contains the + * Called by ia64_switch_to() after do_fork()->copy_thread(). r8 contains the * address of the previously executing task. */ br.call.sptk.many rp=ia64_invoke_schedule_tail @@ -872,7 +889,7 @@ #endif /* __GNUC__ < 3 */ /* - * Setup stack and call ia64_do_signal. Note that pSys and pNonSys need to + * Setup stack and call do_notify_resume_user(). Note that pSys and pNonSys need to * be set up by the caller. We declare 8 input registers so the system call * args get preserved, in case we need to restart a system call. */ @@ -900,7 +917,7 @@ mov ar.unat=r9 mov ar.pfs=loc1 br.ret.sptk.many rp -END(do_notify_resume_user) +END(notify_resume_user) GLOBAL_ENTRY(sys_rt_sigsuspend) .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) @@ -1224,10 +1241,10 @@ data8 sys_futex // 1230 data8 sys_sched_setaffinity data8 sys_sched_getaffinity - data8 ia64_ni_syscall - data8 ia64_ni_syscall - data8 ia64_ni_syscall // 1235 - data8 ia64_ni_syscall + data8 sys_security + data8 sys_alloc_hugepages + data8 sys_free_hugepages // 1235 + data8 sys_exit_group data8 ia64_ni_syscall data8 sys_io_setup data8 sys_io_destroy diff -Nru a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S --- a/arch/ia64/kernel/gate.S Tue Sep 17 23:47:51 2002 +++ b/arch/ia64/kernel/gate.S Tue Sep 17 23:47:51 2002 @@ -2,7 +2,7 @@ * This file contains the code that gets mapped at the upper end of each task's text * region. For now, it contains the signal trampoline code only. * - * Copyright (C) 1999-2001 Hewlett-Packard Co + * Copyright (C) 1999-2002 Hewlett-Packard Co * David Mosberger-Tang */ @@ -135,7 +135,7 @@ ;; ld8 r8=[base0] // restore (perhaps modified) CFM0, EC0, and CPL0 cmp.ne p8,p0=r14,r15 // do we need to restore the rbs? -(p8) br.cond.spnt restore_rbs // yup -> (clobbers r14 and r16) +(p8) br.cond.spnt restore_rbs // yup -> (clobbers r14-r18, f6 & f7) ;; back_from_restore_rbs: adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp @@ -189,20 +189,69 @@ .spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF .body restore_rbs: + // On input: + // r14 = bsp1 (bsp at the time of return from signal handler) + // r15 = bsp0 (bsp at the time the signal occurred) + // + // Here, we need to calculate bspstore0, the value that ar.bspstore needs + // to be set to, based on bsp0 and the size of the dirty partition on + // the alternate stack (sc_loadrs >> 16). This can be done with the + // following algorithm: + // + // bspstore0 = rse_skip_regs(bsp0, -rse_num_regs(bsp1 - (loadrs >> 19), bsp1)); + // + // This is what the code below does. + // alloc r2=ar.pfs,0,0,0,0 // alloc null frame adds r16=(LOADRS_OFF+SIGCONTEXT_OFF),sp + adds r18=(RNAT_OFF+SIGCONTEXT_OFF),sp ;; - ld8 r14=[r16] - adds r16=(RNAT_OFF+SIGCONTEXT_OFF),sp + ld8 r17=[r16] + ld8 r16=[r18] // get new rnat + extr.u r18=r15,3,6 // r18 <- rse_slot_num(bsp0) ;; - mov ar.rsc=r14 // put RSE into enforced lazy mode - ld8 r14=[r16] // get new rnat + mov ar.rsc=r17 // put RSE into enforced lazy mode + shr.u r17=r17,16 ;; - loadrs // restore dirty partition + sub r14=r14,r17 // r14 (bspstore1) <- bsp1 - (sc_loadrs >> 16) + shr.u r17=r17,3 // r17 <- (sc_loadrs >> 19) + ;; + loadrs // restore dirty partition + extr.u r14=r14,3,6 // r14 <- rse_slot_num(bspstore1) + ;; + add r14=r14,r17 // r14 <- rse_slot_num(bspstore1) + (sc_loadrs >> 19) + ;; + shr.u r14=r14,6 // r14 <- (rse_slot_num(bspstore1) + (sc_loadrs >> 19))/0x40 + ;; + sub r14=r14,r17 // r14 <- -rse_num_regs(bspstore1, bsp1) + movl r17=0x8208208208208209 + ;; + add r18=r18,r14 // r18 (delta) <- rse_slot_num(bsp0) - rse_num_regs(bspstore1,bsp1) + setf.sig f7=r17 + cmp.lt p7,p0=r14,r0 // p7 <- (r14 < 0)? + ;; +(p7) adds r18=-62,r18 // delta -= 62 + ;; + setf.sig f6=r18 + ;; + xmpy.h f6=f6,f7 + ;; + getf.sig r17=f6 + ;; + add r17=r17,r18 + shr r18=r18,63 + ;; + shr r17=r17,5 + ;; + sub r17=r17,r18 // r17 = delta/63 + ;; + add r17=r14,r17 // r17 <- delta/63 - rse_num_regs(bspstore1, bsp1) + ;; + shladd r15=r17,3,r15 // r15 <- bsp0 + 8*(delta/63 - rse_num_regs(bspstore1, bsp1)) ;; mov ar.bspstore=r15 // switch back to old register backing store area ;; - mov ar.rnat=r14 // restore RNaT + mov ar.rnat=r16 // restore RNaT mov ar.rsc=0xf // (will be restored later on from sc_ar_rsc) // invala not necessary as that will happen when returning to user-mode br.cond.sptk back_from_restore_rbs diff -Nru a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S --- a/arch/ia64/kernel/head.S Tue Sep 17 23:47:51 2002 +++ b/arch/ia64/kernel/head.S Tue Sep 17 23:47:51 2002 @@ -13,6 +13,8 @@ * Copyright (C) 1999 Intel Corp. * Copyright (C) 1999 Asit Mallick * Copyright (C) 1999 Don Dugger + * Copyright (C) 2002 Fenghua Yu + * -Optimize __ia64_save_fpu() and __ia64_load_fpu() for Itanium 2. */ #include @@ -260,302 +262,297 @@ END(ia64_load_debug_regs) GLOBAL_ENTRY(__ia64_save_fpu) - alloc r2=ar.pfs,1,0,0,0 - adds r3=16,in0 + alloc r2=ar.pfs,1,4,0,0 + adds loc0=96*16-16,in0 + adds loc1=96*16-16-128,in0 ;; - stf.spill.nta [in0]=f32,32 - stf.spill.nta [ r3]=f33,32 + stf.spill.nta [loc0]=f127,-256 + stf.spill.nta [loc1]=f119,-256 ;; - stf.spill.nta [in0]=f34,32 - stf.spill.nta [ r3]=f35,32 + stf.spill.nta [loc0]=f111,-256 + stf.spill.nta [loc1]=f103,-256 ;; - stf.spill.nta [in0]=f36,32 - stf.spill.nta [ r3]=f37,32 + stf.spill.nta [loc0]=f95,-256 + stf.spill.nta [loc1]=f87,-256 ;; - stf.spill.nta [in0]=f38,32 - stf.spill.nta [ r3]=f39,32 + stf.spill.nta [loc0]=f79,-256 + stf.spill.nta [loc1]=f71,-256 ;; - stf.spill.nta [in0]=f40,32 - stf.spill.nta [ r3]=f41,32 + stf.spill.nta [loc0]=f63,-256 + stf.spill.nta [loc1]=f55,-256 + adds loc2=96*16-32,in0 ;; - stf.spill.nta [in0]=f42,32 - stf.spill.nta [ r3]=f43,32 + stf.spill.nta [loc0]=f47,-256 + stf.spill.nta [loc1]=f39,-256 + adds loc3=96*16-32-128,in0 ;; - stf.spill.nta [in0]=f44,32 - stf.spill.nta [ r3]=f45,32 + stf.spill.nta [loc2]=f126,-256 + stf.spill.nta [loc3]=f118,-256 ;; - stf.spill.nta [in0]=f46,32 - stf.spill.nta [ r3]=f47,32 + stf.spill.nta [loc2]=f110,-256 + stf.spill.nta [loc3]=f102,-256 ;; - stf.spill.nta [in0]=f48,32 - stf.spill.nta [ r3]=f49,32 + stf.spill.nta [loc2]=f94,-256 + stf.spill.nta [loc3]=f86,-256 ;; - stf.spill.nta [in0]=f50,32 - stf.spill.nta [ r3]=f51,32 + stf.spill.nta [loc2]=f78,-256 + stf.spill.nta [loc3]=f70,-256 ;; - stf.spill.nta [in0]=f52,32 - stf.spill.nta [ r3]=f53,32 + stf.spill.nta [loc2]=f62,-256 + stf.spill.nta [loc3]=f54,-256 + adds loc0=96*16-48,in0 ;; - stf.spill.nta [in0]=f54,32 - stf.spill.nta [ r3]=f55,32 + stf.spill.nta [loc2]=f46,-256 + stf.spill.nta [loc3]=f38,-256 + adds loc1=96*16-48-128,in0 ;; - stf.spill.nta [in0]=f56,32 - stf.spill.nta [ r3]=f57,32 + stf.spill.nta [loc0]=f125,-256 + stf.spill.nta [loc1]=f117,-256 ;; - stf.spill.nta [in0]=f58,32 - stf.spill.nta [ r3]=f59,32 + stf.spill.nta [loc0]=f109,-256 + stf.spill.nta [loc1]=f101,-256 ;; - stf.spill.nta [in0]=f60,32 - stf.spill.nta [ r3]=f61,32 + stf.spill.nta [loc0]=f93,-256 + stf.spill.nta [loc1]=f85,-256 ;; - stf.spill.nta [in0]=f62,32 - stf.spill.nta [ r3]=f63,32 + stf.spill.nta [loc0]=f77,-256 + stf.spill.nta [loc1]=f69,-256 ;; - stf.spill.nta [in0]=f64,32 - stf.spill.nta [ r3]=f65,32 + stf.spill.nta [loc0]=f61,-256 + stf.spill.nta [loc1]=f53,-256 + adds loc2=96*16-64,in0 ;; - stf.spill.nta [in0]=f66,32 - stf.spill.nta [ r3]=f67,32 + stf.spill.nta [loc0]=f45,-256 + stf.spill.nta [loc1]=f37,-256 + adds loc3=96*16-64-128,in0 ;; - stf.spill.nta [in0]=f68,32 - stf.spill.nta [ r3]=f69,32 + stf.spill.nta [loc2]=f124,-256 + stf.spill.nta [loc3]=f116,-256 ;; - stf.spill.nta [in0]=f70,32 - stf.spill.nta [ r3]=f71,32 + stf.spill.nta [loc2]=f108,-256 + stf.spill.nta [loc3]=f100,-256 ;; - stf.spill.nta [in0]=f72,32 - stf.spill.nta [ r3]=f73,32 + stf.spill.nta [loc2]=f92,-256 + stf.spill.nta [loc3]=f84,-256 ;; - stf.spill.nta [in0]=f74,32 - stf.spill.nta [ r3]=f75,32 + stf.spill.nta [loc2]=f76,-256 + stf.spill.nta [loc3]=f68,-256 ;; - stf.spill.nta [in0]=f76,32 - stf.spill.nta [ r3]=f77,32 + stf.spill.nta [loc2]=f60,-256 + stf.spill.nta [loc3]=f52,-256 + adds loc0=96*16-80,in0 ;; - stf.spill.nta [in0]=f78,32 - stf.spill.nta [ r3]=f79,32 + stf.spill.nta [loc2]=f44,-256 + stf.spill.nta [loc3]=f36,-256 + adds loc1=96*16-80-128,in0 ;; - stf.spill.nta [in0]=f80,32 - stf.spill.nta [ r3]=f81,32 + stf.spill.nta [loc0]=f123,-256 + stf.spill.nta [loc1]=f115,-256 ;; - stf.spill.nta [in0]=f82,32 - stf.spill.nta [ r3]=f83,32 + stf.spill.nta [loc0]=f107,-256 + stf.spill.nta [loc1]=f99,-256 ;; - stf.spill.nta [in0]=f84,32 - stf.spill.nta [ r3]=f85,32 + stf.spill.nta [loc0]=f91,-256 + stf.spill.nta [loc1]=f83,-256 ;; - stf.spill.nta [in0]=f86,32 - stf.spill.nta [ r3]=f87,32 + stf.spill.nta [loc0]=f75,-256 + stf.spill.nta [loc1]=f67,-256 ;; - stf.spill.nta [in0]=f88,32 - stf.spill.nta [ r3]=f89,32 + stf.spill.nta [loc0]=f59,-256 + stf.spill.nta [loc1]=f51,-256 + adds loc2=96*16-96,in0 ;; - stf.spill.nta [in0]=f90,32 - stf.spill.nta [ r3]=f91,32 + stf.spill.nta [loc0]=f43,-256 + stf.spill.nta [loc1]=f35,-256 + adds loc3=96*16-96-128,in0 ;; - stf.spill.nta [in0]=f92,32 - stf.spill.nta [ r3]=f93,32 + stf.spill.nta [loc2]=f122,-256 + stf.spill.nta [loc3]=f114,-256 ;; - stf.spill.nta [in0]=f94,32 - stf.spill.nta [ r3]=f95,32 + stf.spill.nta [loc2]=f106,-256 + stf.spill.nta [loc3]=f98,-256 ;; - stf.spill.nta [in0]=f96,32 - stf.spill.nta [ r3]=f97,32 + stf.spill.nta [loc2]=f90,-256 + stf.spill.nta [loc3]=f82,-256 ;; - stf.spill.nta [in0]=f98,32 - stf.spill.nta [ r3]=f99,32 + stf.spill.nta [loc2]=f74,-256 + stf.spill.nta [loc3]=f66,-256 ;; - stf.spill.nta [in0]=f100,32 - stf.spill.nta [ r3]=f101,32 + stf.spill.nta [loc2]=f58,-256 + stf.spill.nta [loc3]=f50,-256 + adds loc0=96*16-112,in0 ;; - stf.spill.nta [in0]=f102,32 - stf.spill.nta [ r3]=f103,32 + stf.spill.nta [loc2]=f42,-256 + stf.spill.nta [loc3]=f34,-256 + adds loc1=96*16-112-128,in0 ;; - stf.spill.nta [in0]=f104,32 - stf.spill.nta [ r3]=f105,32 + stf.spill.nta [loc0]=f121,-256 + stf.spill.nta [loc1]=f113,-256 ;; - stf.spill.nta [in0]=f106,32 - stf.spill.nta [ r3]=f107,32 + stf.spill.nta [loc0]=f105,-256 + stf.spill.nta [loc1]=f97,-256 ;; - stf.spill.nta [in0]=f108,32 - stf.spill.nta [ r3]=f109,32 + stf.spill.nta [loc0]=f89,-256 + stf.spill.nta [loc1]=f81,-256 ;; - stf.spill.nta [in0]=f110,32 - stf.spill.nta [ r3]=f111,32 + stf.spill.nta [loc0]=f73,-256 + stf.spill.nta [loc1]=f65,-256 ;; - stf.spill.nta [in0]=f112,32 - stf.spill.nta [ r3]=f113,32 + stf.spill.nta [loc0]=f57,-256 + stf.spill.nta [loc1]=f49,-256 + adds loc2=96*16-128,in0 ;; - stf.spill.nta [in0]=f114,32 - stf.spill.nta [ r3]=f115,32 + stf.spill.nta [loc0]=f41,-256 + stf.spill.nta [loc1]=f33,-256 + adds loc3=96*16-128-128,in0 ;; - stf.spill.nta [in0]=f116,32 - stf.spill.nta [ r3]=f117,32 + stf.spill.nta [loc2]=f120,-256 + stf.spill.nta [loc3]=f112,-256 ;; - stf.spill.nta [in0]=f118,32 - stf.spill.nta [ r3]=f119,32 + stf.spill.nta [loc2]=f104,-256 + stf.spill.nta [loc3]=f96,-256 ;; - stf.spill.nta [in0]=f120,32 - stf.spill.nta [ r3]=f121,32 + stf.spill.nta [loc2]=f88,-256 + stf.spill.nta [loc3]=f80,-256 ;; - stf.spill.nta [in0]=f122,32 - stf.spill.nta [ r3]=f123,32 + stf.spill.nta [loc2]=f72,-256 + stf.spill.nta [loc3]=f64,-256 ;; - stf.spill.nta [in0]=f124,32 - stf.spill.nta [ r3]=f125,32 + stf.spill.nta [loc2]=f56,-256 + stf.spill.nta [loc3]=f48,-256 ;; - stf.spill.nta [in0]=f126,32 - stf.spill.nta [ r3]=f127,32 + stf.spill.nta [loc2]=f40 + stf.spill.nta [loc3]=f32 br.ret.sptk.many rp END(__ia64_save_fpu) GLOBAL_ENTRY(__ia64_load_fpu) - alloc r2=ar.pfs,1,0,0,0 - adds r3=16,in0 - ;; - ldf.fill.nta f32=[in0],32 - ldf.fill.nta f33=[ r3],32 - ;; - ldf.fill.nta f34=[in0],32 - ldf.fill.nta f35=[ r3],32 - ;; - ldf.fill.nta f36=[in0],32 - ldf.fill.nta f37=[ r3],32 - ;; - ldf.fill.nta f38=[in0],32 - ldf.fill.nta f39=[ r3],32 - ;; - ldf.fill.nta f40=[in0],32 - ldf.fill.nta f41=[ r3],32 - ;; - ldf.fill.nta f42=[in0],32 - ldf.fill.nta f43=[ r3],32 - ;; - ldf.fill.nta f44=[in0],32 - ldf.fill.nta f45=[ r3],32 - ;; - ldf.fill.nta f46=[in0],32 - ldf.fill.nta f47=[ r3],32 - ;; - ldf.fill.nta f48=[in0],32 - ldf.fill.nta f49=[ r3],32 - ;; - ldf.fill.nta f50=[in0],32 - ldf.fill.nta f51=[ r3],32 - ;; - ldf.fill.nta f52=[in0],32 - ldf.fill.nta f53=[ r3],32 - ;; - ldf.fill.nta f54=[in0],32 - ldf.fill.nta f55=[ r3],32 - ;; - ldf.fill.nta f56=[in0],32 - ldf.fill.nta f57=[ r3],32 - ;; - ldf.fill.nta f58=[in0],32 - ldf.fill.nta f59=[ r3],32 - ;; - ldf.fill.nta f60=[in0],32 - ldf.fill.nta f61=[ r3],32 - ;; - ldf.fill.nta f62=[in0],32 - ldf.fill.nta f63=[ r3],32 - ;; - ldf.fill.nta f64=[in0],32 - ldf.fill.nta f65=[ r3],32 - ;; - ldf.fill.nta f66=[in0],32 - ldf.fill.nta f67=[ r3],32 - ;; - ldf.fill.nta f68=[in0],32 - ldf.fill.nta f69=[ r3],32 - ;; - ldf.fill.nta f70=[in0],32 - ldf.fill.nta f71=[ r3],32 - ;; - ldf.fill.nta f72=[in0],32 - ldf.fill.nta f73=[ r3],32 - ;; - ldf.fill.nta f74=[in0],32 - ldf.fill.nta f75=[ r3],32 - ;; - ldf.fill.nta f76=[in0],32 - ldf.fill.nta f77=[ r3],32 - ;; - ldf.fill.nta f78=[in0],32 - ldf.fill.nta f79=[ r3],32 - ;; - ldf.fill.nta f80=[in0],32 - ldf.fill.nta f81=[ r3],32 - ;; - ldf.fill.nta f82=[in0],32 - ldf.fill.nta f83=[ r3],32 - ;; - ldf.fill.nta f84=[in0],32 - ldf.fill.nta f85=[ r3],32 - ;; - ldf.fill.nta f86=[in0],32 - ldf.fill.nta f87=[ r3],32 - ;; - ldf.fill.nta f88=[in0],32 - ldf.fill.nta f89=[ r3],32 - ;; - ldf.fill.nta f90=[in0],32 - ldf.fill.nta f91=[ r3],32 - ;; - ldf.fill.nta f92=[in0],32 - ldf.fill.nta f93=[ r3],32 - ;; - ldf.fill.nta f94=[in0],32 - ldf.fill.nta f95=[ r3],32 - ;; - ldf.fill.nta f96=[in0],32 - ldf.fill.nta f97=[ r3],32 - ;; - ldf.fill.nta f98=[in0],32 - ldf.fill.nta f99=[ r3],32 - ;; - ldf.fill.nta f100=[in0],32 - ldf.fill.nta f101=[ r3],32 - ;; - ldf.fill.nta f102=[in0],32 - ldf.fill.nta f103=[ r3],32 - ;; - ldf.fill.nta f104=[in0],32 - ldf.fill.nta f105=[ r3],32 - ;; - ldf.fill.nta f106=[in0],32 - ldf.fill.nta f107=[ r3],32 - ;; - ldf.fill.nta f108=[in0],32 - ldf.fill.nta f109=[ r3],32 - ;; - ldf.fill.nta f110=[in0],32 - ldf.fill.nta f111=[ r3],32 - ;; - ldf.fill.nta f112=[in0],32 - ldf.fill.nta f113=[ r3],32 - ;; - ldf.fill.nta f114=[in0],32 - ldf.fill.nta f115=[ r3],32 - ;; - ldf.fill.nta f116=[in0],32 - ldf.fill.nta f117=[ r3],32 - ;; - ldf.fill.nta f118=[in0],32 - ldf.fill.nta f119=[ r3],32 - ;; - ldf.fill.nta f120=[in0],32 - ldf.fill.nta f121=[ r3],32 - ;; - ldf.fill.nta f122=[in0],32 - ldf.fill.nta f123=[ r3],32 - ;; - ldf.fill.nta f124=[in0],32 - ldf.fill.nta f125=[ r3],32 - ;; - ldf.fill.nta f126=[in0],32 - ldf.fill.nta f127=[ r3],32 + alloc r2=ar.pfs,1,2,0,0 + adds r3=128,in0 + adds r14=256,in0 + adds r15=384,in0 + mov loc0=512 + mov loc1=-1024+16 + ;; + ldf.fill.nta f32=[in0],loc0 + ldf.fill.nta f40=[ r3],loc0 + ldf.fill.nta f48=[r14],loc0 + ldf.fill.nta f56=[r15],loc0 + ;; + ldf.fill.nta f64=[in0],loc0 + ldf.fill.nta f72=[ r3],loc0 + ldf.fill.nta f80=[r14],loc0 + ldf.fill.nta f88=[r15],loc0 + ;; + ldf.fill.nta f96=[in0],loc1 + ldf.fill.nta f104=[ r3],loc1 + ldf.fill.nta f112=[r14],loc1 + ldf.fill.nta f120=[r15],loc1 + ;; + ldf.fill.nta f33=[in0],loc0 + ldf.fill.nta f41=[ r3],loc0 + ldf.fill.nta f49=[r14],loc0 + ldf.fill.nta f57=[r15],loc0 + ;; + ldf.fill.nta f65=[in0],loc0 + ldf.fill.nta f73=[ r3],loc0 + ldf.fill.nta f81=[r14],loc0 + ldf.fill.nta f89=[r15],loc0 + ;; + ldf.fill.nta f97=[in0],loc1 + ldf.fill.nta f105=[ r3],loc1 + ldf.fill.nta f113=[r14],loc1 + ldf.fill.nta f121=[r15],loc1 + ;; + ldf.fill.nta f34=[in0],loc0 + ldf.fill.nta f42=[ r3],loc0 + ldf.fill.nta f50=[r14],loc0 + ldf.fill.nta f58=[r15],loc0 + ;; + ldf.fill.nta f66=[in0],loc0 + ldf.fill.nta f74=[ r3],loc0 + ldf.fill.nta f82=[r14],loc0 + ldf.fill.nta f90=[r15],loc0 + ;; + ldf.fill.nta f98=[in0],loc1 + ldf.fill.nta f106=[ r3],loc1 + ldf.fill.nta f114=[r14],loc1 + ldf.fill.nta f122=[r15],loc1 + ;; + ldf.fill.nta f35=[in0],loc0 + ldf.fill.nta f43=[ r3],loc0 + ldf.fill.nta f51=[r14],loc0 + ldf.fill.nta f59=[r15],loc0 + ;; + ldf.fill.nta f67=[in0],loc0 + ldf.fill.nta f75=[ r3],loc0 + ldf.fill.nta f83=[r14],loc0 + ldf.fill.nta f91=[r15],loc0 + ;; + ldf.fill.nta f99=[in0],loc1 + ldf.fill.nta f107=[ r3],loc1 + ldf.fill.nta f115=[r14],loc1 + ldf.fill.nta f123=[r15],loc1 + ;; + ldf.fill.nta f36=[in0],loc0 + ldf.fill.nta f44=[ r3],loc0 + ldf.fill.nta f52=[r14],loc0 + ldf.fill.nta f60=[r15],loc0 + ;; + ldf.fill.nta f68=[in0],loc0 + ldf.fill.nta f76=[ r3],loc0 + ldf.fill.nta f84=[r14],loc0 + ldf.fill.nta f92=[r15],loc0 + ;; + ldf.fill.nta f100=[in0],loc1 + ldf.fill.nta f108=[ r3],loc1 + ldf.fill.nta f116=[r14],loc1 + ldf.fill.nta f124=[r15],loc1 + ;; + ldf.fill.nta f37=[in0],loc0 + ldf.fill.nta f45=[ r3],loc0 + ldf.fill.nta f53=[r14],loc0 + ldf.fill.nta f61=[r15],loc0 + ;; + ldf.fill.nta f69=[in0],loc0 + ldf.fill.nta f77=[ r3],loc0 + ldf.fill.nta f85=[r14],loc0 + ldf.fill.nta f93=[r15],loc0 + ;; + ldf.fill.nta f101=[in0],loc1 + ldf.fill.nta f109=[ r3],loc1 + ldf.fill.nta f117=[r14],loc1 + ldf.fill.nta f125=[r15],loc1 + ;; + ldf.fill.nta f38 =[in0],loc0 + ldf.fill.nta f46 =[ r3],loc0 + ldf.fill.nta f54 =[r14],loc0 + ldf.fill.nta f62 =[r15],loc0 + ;; + ldf.fill.nta f70 =[in0],loc0 + ldf.fill.nta f78 =[ r3],loc0 + ldf.fill.nta f86 =[r14],loc0 + ldf.fill.nta f94 =[r15],loc0 + ;; + ldf.fill.nta f102=[in0],loc1 + ldf.fill.nta f110=[ r3],loc1 + ldf.fill.nta f118=[r14],loc1 + ldf.fill.nta f126=[r15],loc1 + ;; + ldf.fill.nta f39 =[in0],loc0 + ldf.fill.nta f47 =[ r3],loc0 + ldf.fill.nta f55 =[r14],loc0 + ldf.fill.nta f63 =[r15],loc0 + ;; + ldf.fill.nta f71 =[in0],loc0 + ldf.fill.nta f79 =[ r3],loc0 + ldf.fill.nta f87 =[r14],loc0 + ldf.fill.nta f95 =[r15],loc0 + ;; + ldf.fill.nta f103=[in0] + ldf.fill.nta f111=[ r3] + ldf.fill.nta f119=[r14] + ldf.fill.nta f127=[r15] br.ret.sptk.many rp END(__ia64_load_fpu) diff -Nru a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c --- a/arch/ia64/kernel/ia64_ksyms.c Tue Sep 17 23:47:51 2002 +++ b/arch/ia64/kernel/ia64_ksyms.c Tue Sep 17 23:47:51 2002 @@ -127,6 +127,8 @@ EXPORT_SYMBOL(ia64_pal_call_phys_static); EXPORT_SYMBOL(ia64_pal_call_stacked); EXPORT_SYMBOL(ia64_pal_call_static); +EXPORT_SYMBOL(ia64_load_scratch_fpregs); +EXPORT_SYMBOL(ia64_save_scratch_fpregs); extern struct efi efi; EXPORT_SYMBOL(efi); diff -Nru a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c --- a/arch/ia64/kernel/init_task.c Tue Sep 17 23:47:52 2002 +++ b/arch/ia64/kernel/init_task.c Tue Sep 17 23:47:52 2002 @@ -16,7 +16,7 @@ static struct fs_struct init_fs = INIT_FS; static struct files_struct init_files = INIT_FILES; -static struct signal_struct init_signals = INIT_SIGNALS; +static struct signal_struct init_signals = INIT_SIGNALS(init_signals); struct mm_struct init_mm = INIT_MM(init_mm); /* diff -Nru a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c --- a/arch/ia64/kernel/irq.c Tue Sep 17 23:47:51 2002 +++ b/arch/ia64/kernel/irq.c Tue Sep 17 23:47:51 2002 @@ -403,8 +403,8 @@ break; desc->status &= ~IRQ_PENDING; } - out: desc->status &= ~IRQ_INPROGRESS; + out: /* * The ->end() handler has to deal with interrupts which got * disabled while the handler was running. @@ -788,7 +788,7 @@ if (!shared) { desc->depth = 0; - desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING); + desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING | IRQ_INPROGRESS); desc->handler->startup(irq); } spin_unlock_irqrestore(&desc->lock,flags); diff -Nru a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S --- a/arch/ia64/kernel/ivt.S Tue Sep 17 23:47:51 2002 +++ b/arch/ia64/kernel/ivt.S Tue Sep 17 23:47:51 2002 @@ -70,24 +70,6 @@ mov r19=n;; /* prepare to save predicates */ \ br.sptk.many dispatch_to_fault_handler -/* - * As we don't (hopefully) use the space available, we need to fill it with - * nops. the parameter may be used for debugging and is representing the entry - * number - */ -#define BREAK_BUNDLE(a) break.m (a); \ - break.i (a); \ - break.i (a) -/* - * 4 breaks bundles all together - */ -#define BREAK_BUNDLE4(a); BREAK_BUNDLE(a); BREAK_BUNDLE(a); BREAK_BUNDLE(a); BREAK_BUNDLE(a) - -/* - * 8 bundles all together (too lazy to use only 4 at a time !) - */ -#define BREAK_BUNDLE8(a); BREAK_BUNDLE4(a); BREAK_BUNDLE4(a) - .section .text.ivt,"ax" .align 32768 // align on 32KB boundary @@ -115,6 +97,10 @@ * - the faulting virtual address has no L1, L2, or L3 mapping */ mov r16=cr.ifa // get address that caused the TLB miss +#ifdef CONFIG_HUGETLB_PAGE + movl r18=PAGE_SHIFT + mov r25=cr.itir +#endif ;; rsm psr.dt // use physical addressing for data mov r31=pr // save the predicate registers @@ -122,8 +108,18 @@ shl r21=r16,3 // shift bit 60 into sign bit shr.u r17=r16,61 // get the region number into r17 ;; + shr r22=r21,3 +#ifdef CONFIG_HUGETLB_PAGE + extr.u r26=r25,2,6 + ;; + cmp.eq p8,p0=HPAGE_SHIFT,r26 + ;; +(p8) dep r25=r18,r25,2,6 +(p8) shr r22=r22,HPAGE_SHIFT-PAGE_SHIFT + ;; +#endif cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5? - shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of the faulting address + shr.u r18=r22,PGDIR_SHIFT // get bits 33-63 of the faulting address ;; (p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place srlz.d // ensure "rsm psr.dt" has taken effect @@ -134,7 +130,7 @@ (p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8 (p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8) cmp.eq p7,p6=0,r21 // unused address bits all zeroes? - shr.u r18=r16,PMD_SHIFT // shift L2 index into position + shr.u r18=r22,PMD_SHIFT // shift L2 index into position ;; ld8 r17=[r17] // fetch the L1 entry (may be 0) ;; @@ -142,7 +138,7 @@ dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry ;; (p7) ld8 r20=[r17] // fetch the L2 entry (may be 0) - shr.u r19=r16,PAGE_SHIFT // shift L3 index into position + shr.u r19=r22,PAGE_SHIFT // shift L3 index into position ;; (p7) cmp.eq.or.andcm p6,p7=r20,r0 // was L2 entry NULL? dep r21=r19,r20,3,(PAGE_SHIFT-3) // compute address of L3 page table entry @@ -160,6 +156,10 @@ (p11) itc.d r18 // insert the data TLB entry (p6) br.cond.spnt.many page_fault // handle bad address/page not present (page fault) mov cr.ifa=r22 + +#ifdef CONFIG_HUGETLB_PAGE +(p8) mov cr.itir=r25 // change to default page-size for VHPT +#endif /* * Now compute and insert the TLB entry for the virtual page table. We never diff -Nru a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S --- a/arch/ia64/kernel/pal.S Tue Sep 17 23:47:52 2002 +++ b/arch/ia64/kernel/pal.S Tue Sep 17 23:47:52 2002 @@ -245,3 +245,48 @@ br.ret.sptk.many b0 END(ia64_pal_call_phys_stacked) +/* + * Save scratch fp scratch regs which aren't saved in pt_regs already (fp10-fp15). + * + * NOTE: We need to do this since firmware (SAL and PAL) may use any of the scratch + * regs fp-low partition. + * + * Inputs: + * in0 Address of stack storage for fp regs + */ +GLOBAL_ENTRY(ia64_save_scratch_fpregs) + alloc r3=ar.pfs,1,0,0,0 + add r2=16,in0 + ;; + stf.spill [in0] = f10,32 + stf.spill [r2] = f11,32 + ;; + stf.spill [in0] = f12,32 + stf.spill [r2] = f13,32 + ;; + stf.spill [in0] = f14,32 + stf.spill [r2] = f15,32 + br.ret.sptk.many rp +END(ia64_save_scratch_fpregs) + +/* + * Load scratch fp scratch regs (fp10-fp15) + * + * Inputs: + * in0 Address of stack storage for fp regs + */ + +GLOBAL_ENTRY(ia64_load_scratch_fpregs) + alloc r3=ar.pfs,1,0,0,0 + add r2=16,in0 + ;; + ldf.fill f10 = [in0],32 + ldf.fill f11 = [r2],32 + ;; + ldf.fill f12 = [in0],32 + ldf.fill f13 = [r2],32 + ;; + ldf.fill f14 = [in0],32 + ldf.fill f15 = [r2],32 + br.ret.sptk.many rp +END(ia64_load_scratch_fpregs) diff -Nru a/arch/ia64/kernel/pci.c b/arch/ia64/kernel/pci.c --- a/arch/ia64/kernel/pci.c Tue Sep 17 23:47:51 2002 +++ b/arch/ia64/kernel/pci.c Tue Sep 17 23:47:51 2002 @@ -2,6 +2,11 @@ * pci.c - Low-Level PCI Access in IA-64 * * Derived from bios32.c of i386 tree. + * + * Copyright (C) 2002 Hewlett-Packard Co + * David Mosberger-Tang + * + * Note: Above list of copyright holders is incomplete... */ #include @@ -85,15 +90,15 @@ static int pci_sal_read (struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value) { - return __pci_sal_read(0, bus->number, PCI_SLOT(devfn), - PCI_FUNC(devfn), where, size, value); + return __pci_sal_read(0, bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn), + where, size, value); } static int pci_sal_write (struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value) { - return __pci_sal_write(0, bus->number, PCI_SLOT(devfn), - PCI_FUNC(devfn), where, size, value); + return __pci_sal_write(0, bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn), + where, size, value); } struct pci_ops pci_sal_ops = { @@ -202,8 +207,8 @@ { } -int -pcibios_enable_device (struct pci_dev *dev) +static inline int +pcibios_enable_resources (struct pci_dev *dev, int mask) { u16 cmd, old_cmd; int idx; @@ -215,6 +220,10 @@ pci_read_config_word(dev, PCI_COMMAND, &cmd); old_cmd = cmd; for (idx=0; idx<6; idx++) { + /* Only set up the desired resources. */ + if (!(mask & (1 << idx))) + continue; + r = &dev->resource[idx]; if (!r->start && r->end) { printk(KERN_ERR @@ -233,9 +242,19 @@ printk("PCI: Enabling device %s (%04x -> %04x)\n", dev->slot_name, old_cmd, cmd); pci_write_config_word(dev, PCI_COMMAND, cmd); } + return 0; +} - printk(KERN_INFO "PCI: Found IRQ %d for device %s\n", dev->irq, dev->slot_name); +int +pcibios_enable_device (struct pci_dev *dev, int mask) +{ + int ret; + ret = pcibios_enable_resources(dev, mask); + if (ret < 0) + return ret; + + printk(KERN_INFO "PCI: Found IRQ %d for device %s\n", dev->irq, dev->slot_name); return 0; } diff -Nru a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c --- a/arch/ia64/kernel/perfmon.c Tue Sep 17 23:47:52 2002 +++ b/arch/ia64/kernel/perfmon.c Tue Sep 17 23:47:52 2002 @@ -76,7 +76,7 @@ /* XXX: these three assume that register i is implemented */ #define PMD_IS_COUNTING(i) (pmu_conf.pmd_desc[i].type == PFM_REG_COUNTING) #define PMC_IS_COUNTING(i) (pmu_conf.pmc_desc[i].type == PFM_REG_COUNTING) -#define PMC_IS_MONITOR(c) (pmu_conf.pmc_desc[i].type == PFM_REG_MONITOR) +#define PMC_IS_MONITOR(i) (pmu_conf.pmc_desc[i].type == PFM_REG_MONITOR) /* k assume unsigned */ #define IBR_IS_IMPL(k) (kpid) { - ctx->ctx_notify_task = task = current; - current->thread.pfm_context = ctx; + ctx->ctx_notify_task = current; + task->thread.pfm_context = ctx; } else if (notify_pid!=0) { struct task_struct *notify_task; @@ -1034,7 +1041,10 @@ /* * check if we can send this task a signal */ - if (pfm_bad_permissions(notify_task)) goto buffer_error; + if (pfm_bad_permissions(notify_task)) { + read_unlock(&tasklist_lock); + goto buffer_error; + } /* * make visible @@ -1044,7 +1054,7 @@ * okay because child will do the scan for nothing which * won't hurt. */ - current->thread.pfm_context = ctx; + task->thread.pfm_context = ctx; /* * will cause task to check on exit for monitored @@ -1101,7 +1111,7 @@ sema_init(&ctx->ctx_restart_sem, 0); /* init this semaphore to locked */ - if (copy_to_user(req, &tmp, sizeof(tmp))) { + if (__copy_to_user(req, &tmp, sizeof(tmp))) { ret = -EFAULT; goto buffer_error; } @@ -1147,16 +1157,38 @@ abort: UNLOCK_PFS(); + /* make sure we don't leave anything behind */ + task->thread.pfm_context = NULL; + return ret; } +static inline unsigned long +pfm_new_counter_value (pfm_counter_t *reg, int is_long_reset) +{ + unsigned long val = is_long_reset ? reg->long_reset : reg->short_reset; + unsigned long new_seed, old_seed = reg->seed, mask = reg->mask; + extern unsigned long carta_random32 (unsigned long seed); + + if (reg->flags & PFM_REGFL_RANDOM) { + new_seed = carta_random32(old_seed); + val -= (old_seed & mask); /* counter values are negative numbers! */ + if ((mask >> 32) != 0) + /* construct a full 64-bit random value: */ + new_seed |= carta_random32(old_seed >> 32) << 32; + reg->seed = new_seed; + } + reg->lval = val; + return val; +} + static void pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int flag) { unsigned long mask = ovfl_regs[0]; unsigned long reset_others = 0UL; unsigned long val; - int i; + int i, is_long_reset = (flag & PFM_RELOAD_LONG_RESET); DBprintk(("masks=0x%lx\n", mask)); @@ -1166,15 +1198,11 @@ mask >>= PMU_FIRST_COUNTER; for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) { if (mask & 0x1) { - val = flag == PFM_RELOAD_LONG_RESET ? - ctx->ctx_soft_pmds[i].long_reset: - ctx->ctx_soft_pmds[i].short_reset; - + val = pfm_new_counter_value(ctx->ctx_soft_pmds + i, is_long_reset); reset_others |= ctx->ctx_soft_pmds[i].reset_pmds[0]; - DBprintk(("[%d] %s reset soft_pmd[%d]=%lx\n", - current->pid, - flag == PFM_RELOAD_LONG_RESET ? "long" : "short", i, val)); + DBprintk(("[%d] %s reset soft_pmd[%d]=%lx\n", current->pid, + is_long_reset ? "long" : "short", i, val)); /* upper part is ignored on rval */ pfm_write_soft_counter(ctx, i, val); @@ -1188,19 +1216,15 @@ if ((reset_others & 0x1) == 0) continue; - val = flag == PFM_RELOAD_LONG_RESET ? - ctx->ctx_soft_pmds[i].long_reset: - ctx->ctx_soft_pmds[i].short_reset; + val = pfm_new_counter_value(ctx->ctx_soft_pmds + i, is_long_reset); if (PMD_IS_COUNTING(i)) { pfm_write_soft_counter(ctx, i, val); } else { ia64_set_pmd(i, val); } - - DBprintk(("[%d] %s reset_others pmd[%d]=%lx\n", - current->pid, - flag == PFM_RELOAD_LONG_RESET ? "long" : "short", i, val)); + DBprintk(("[%d] %s reset_others pmd[%d]=%lx\n", current->pid, + is_long_reset ? "long" : "short", i, val)); } ia64_srlz_d(); /* just in case ! */ @@ -1212,9 +1236,10 @@ { struct thread_struct *th = &task->thread; pfarg_reg_t tmp, *req = (pfarg_reg_t *)arg; - unsigned int cnum; + unsigned long value; + unsigned int cnum, reg_flags, flags; int i; - int ret = 0, reg_retval = 0; + int ret = -EINVAL; /* we don't quite support this right now */ if (task != current) return -EINVAL; @@ -1225,10 +1250,12 @@ for (i = 0; i < count; i++, req++) { + if (__copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT; - if (copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT; - - cnum = tmp.reg_num; + cnum = tmp.reg_num; + reg_flags = tmp.reg_flags; + value = tmp.reg_value; + flags = 0; /* * we reject all non implemented PMC as well @@ -1237,8 +1264,7 @@ */ if (!PMC_IS_IMPL(cnum) || cnum < 4) { DBprintk(("pmc[%u] is unimplemented or invalid\n", cnum)); - ret = -EINVAL; - goto abort_mission; + goto error; } /* * A PMC used to configure monitors must be: @@ -1247,73 +1273,79 @@ * any other configuration is rejected. */ if (PMC_IS_MONITOR(cnum) || PMC_IS_COUNTING(cnum)) { - DBprintk(("pmc[%u].pm=%ld\n", cnum, PMC_PM(cnum, tmp.reg_value))); - - if (ctx->ctx_fl_system ^ PMC_PM(cnum, tmp.reg_value)) { - DBprintk(("pmc_pm=%ld fl_system=%d\n", PMC_PM(cnum, tmp.reg_value), ctx->ctx_fl_system)); - ret = -EINVAL; - goto abort_mission; + if (ctx->ctx_fl_system ^ PMC_PM(cnum, value)) { + DBprintk(("pmc_pm=%ld fl_system=%d\n", PMC_PM(cnum, value), ctx->ctx_fl_system)); + goto error; } } if (PMC_IS_COUNTING(cnum)) { - pfm_monitor_t *p = (pfm_monitor_t *)&tmp.reg_value; + pfm_monitor_t *p = (pfm_monitor_t *)&value; /* * enforce generation of overflow interrupt. Necessary on all * CPUs. */ p->pmc_oi = 1; - if (tmp.reg_flags & PFM_REGFL_OVFL_NOTIFY) { + if (reg_flags & PFM_REGFL_OVFL_NOTIFY) { /* - * must have a target for the signal - */ + * must have a target for the signal + */ if (ctx->ctx_notify_task == NULL) { - DBprintk(("no notify_task && PFM_REGFL_OVFL_NOTIFY\n")); - ret = -EINVAL; - goto abort_mission; + DBprintk(("cannot set ovfl_notify: no notify_task\n")); + goto error; } - - ctx->ctx_soft_pmds[cnum].flags |= PFM_REGFL_OVFL_NOTIFY; + flags |= PFM_REGFL_OVFL_NOTIFY; } - /* - * copy reset vector - */ - ctx->ctx_soft_pmds[cnum].reset_pmds[0] = tmp.reg_reset_pmds[0]; - ctx->ctx_soft_pmds[cnum].reset_pmds[1] = tmp.reg_reset_pmds[1]; - ctx->ctx_soft_pmds[cnum].reset_pmds[2] = tmp.reg_reset_pmds[2]; - ctx->ctx_soft_pmds[cnum].reset_pmds[3] = tmp.reg_reset_pmds[3]; + + if (reg_flags & PFM_REGFL_RANDOM) flags |= PFM_REGFL_RANDOM; + + } else if (reg_flags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) { + DBprintk(("cannot set ovfl_notify or random on pmc%u\n", cnum)); + goto error; } + /* * execute write checker, if any */ - if (PMC_WR_FUNC(cnum)) ret = PMC_WR_FUNC(cnum)(task, cnum, &tmp.reg_value, regs); -abort_mission: - if (ret == -EINVAL) reg_retval = PFM_REG_RETFL_EINVAL; + if (PMC_WR_FUNC(cnum)) { + ret = PMC_WR_FUNC(cnum)(task, cnum, &value, regs); + if (ret) goto error; + ret = -EINVAL; + } - PFM_REG_RETFLAG_SET(tmp.reg_flags, reg_retval); + /* + * no error on this register + */ + PFM_REG_RETFLAG_SET(tmp.reg_flags, 0); /* * update register return value, abort all if problem during copy. + * we only modify the reg_flags field. no check mode is fine because + * access has been verified upfront in sys_perfmonctl(). + * + * If this fails, then the software state is not modified */ - if (copy_to_user(req, &tmp, sizeof(tmp))) return -EFAULT; + if (__put_user(tmp.reg_flags, &req->reg_flags)) return -EFAULT; /* - * if there was something wrong on this register, don't touch - * the hardware at all and abort write request for others. - * - * On error, the user mut sequentially scan the table and the first - * entry which has a return flag set is the one that caused the error. + * Now we commit the changes to the software state */ - if (ret != 0) { - DBprintk(("[%d] pmc[%u]=0x%lx error %d\n", - task->pid, cnum, tmp.reg_value, reg_retval)); - break; - } /* - * We can proceed with this register! + * full flag update each time a register is programmed */ + ctx->ctx_soft_pmds[cnum].flags = flags; + + if (PMC_IS_COUNTING(cnum)) { + /* + * copy reset vector + */ + ctx->ctx_soft_pmds[cnum].reset_pmds[0] = tmp.reg_reset_pmds[0]; + ctx->ctx_soft_pmds[cnum].reset_pmds[1] = tmp.reg_reset_pmds[1]; + ctx->ctx_soft_pmds[cnum].reset_pmds[2] = tmp.reg_reset_pmds[2]; + ctx->ctx_soft_pmds[cnum].reset_pmds[3] = tmp.reg_reset_pmds[3]; + } /* * Needed in case the user does not initialize the equivalent @@ -1325,16 +1357,26 @@ /* * keep copy the pmc, used for register reload */ - th->pmc[cnum] = tmp.reg_value; + th->pmc[cnum] = value; - ia64_set_pmc(cnum, tmp.reg_value); + ia64_set_pmc(cnum, value); DBprintk(("[%d] pmc[%u]=0x%lx flags=0x%x used_pmds=0x%lx\n", - task->pid, cnum, tmp.reg_value, + task->pid, cnum, value, ctx->ctx_soft_pmds[cnum].flags, ctx->ctx_used_pmds[0])); } + + return 0; + +error: + PFM_REG_RETFLAG_SET(tmp.reg_flags, PFM_REG_RETFL_EINVAL); + + if (__put_user(tmp.reg_flags, &req->reg_flags)) ret = -EFAULT; + + DBprintk(("[%d] pmc[%u]=0x%lx error %d\n", task->pid, cnum, value, ret)); + return ret; } @@ -1342,9 +1384,10 @@ pfm_write_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) { pfarg_reg_t tmp, *req = (pfarg_reg_t *)arg; + unsigned long value; unsigned int cnum; int i; - int ret = 0, reg_retval = 0; + int ret; /* we don't quite support this right now */ if (task != current) return -EINVAL; @@ -1354,65 +1397,72 @@ */ if (!CTX_IS_ENABLED(ctx)) return -EINVAL; - /* XXX: ctx locking may be required here */ + ret = -EINVAL; + for (i = 0; i < count; i++, req++) { - if (copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT; + if (__copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT; + + cnum = tmp.reg_num; + value = tmp.reg_value; - cnum = tmp.reg_num; if (!PMD_IS_IMPL(cnum)) { - ret = -EINVAL; + DBprintk(("pmd[%u] is unimplemented or invalid\n", cnum)); goto abort_mission; } - /* update virtualized (64bits) counter */ - if (PMD_IS_COUNTING(cnum)) { - ctx->ctx_soft_pmds[cnum].ival = tmp.reg_value; - ctx->ctx_soft_pmds[cnum].val = tmp.reg_value & ~pmu_conf.perf_ovfl_val; - ctx->ctx_soft_pmds[cnum].long_reset = tmp.reg_long_reset; - ctx->ctx_soft_pmds[cnum].short_reset = tmp.reg_short_reset; - - } /* * execute write checker, if any */ - if (PMD_WR_FUNC(cnum)) ret = PMD_WR_FUNC(cnum)(task, cnum, &tmp.reg_value, regs); -abort_mission: - if (ret == -EINVAL) reg_retval = PFM_REG_RETFL_EINVAL; + if (PMD_WR_FUNC(cnum)) { + unsigned long v = value; + ret = PMD_WR_FUNC(cnum)(task, cnum, &v, regs); + if (ret) goto abort_mission; + value = v; + ret = -EINVAL; + } - PFM_REG_RETFLAG_SET(tmp.reg_flags, reg_retval); + /* + * no error on this register + */ + PFM_REG_RETFLAG_SET(tmp.reg_flags, 0); - if (copy_to_user(req, &tmp, sizeof(tmp))) return -EFAULT; + if (__put_user(tmp.reg_flags, &req->reg_flags)) return -EFAULT; /* - * if there was something wrong on this register, don't touch - * the hardware at all and abort write request for others. - * - * On error, the user mut sequentially scan the table and the first - * entry which has a return flag set is the one that caused the error. + * now commit changes to software state */ - if (ret != 0) { - DBprintk(("[%d] pmc[%u]=0x%lx error %d\n", - task->pid, cnum, tmp.reg_value, reg_retval)); - break; + + /* update virtualized (64bits) counter */ + if (PMD_IS_COUNTING(cnum)) { + ctx->ctx_soft_pmds[cnum].lval = value; + ctx->ctx_soft_pmds[cnum].val = value & ~pmu_conf.perf_ovfl_val; + + ctx->ctx_soft_pmds[cnum].long_reset = tmp.reg_long_reset; + ctx->ctx_soft_pmds[cnum].short_reset = tmp.reg_short_reset; + + ctx->ctx_soft_pmds[cnum].seed = tmp.reg_random_seed; + ctx->ctx_soft_pmds[cnum].mask = tmp.reg_random_mask; } /* keep track of what we use */ CTX_USED_PMD(ctx, pmu_conf.pmd_desc[(cnum)].dep_pmd[0]); + /* mark this register as used as well */ CTX_USED_PMD(ctx, RDEP(cnum)); /* writes to unimplemented part is ignored, so this is safe */ - ia64_set_pmd(cnum, tmp.reg_value & pmu_conf.perf_ovfl_val); + ia64_set_pmd(cnum, value); /* to go away */ ia64_srlz_d(); - DBprintk(("[%d] pmd[%u]: soft_pmd=0x%lx short_reset=0x%lx " + DBprintk(("[%d] pmd[%u]: value=0x%lx soft_pmd=0x%lx short_reset=0x%lx " "long_reset=0x%lx hw_pmd=%lx notify=%c used_pmds=0x%lx reset_pmds=0x%lx\n", task->pid, cnum, + value, ctx->ctx_soft_pmds[cnum].val, ctx->ctx_soft_pmds[cnum].short_reset, ctx->ctx_soft_pmds[cnum].long_reset, @@ -1421,17 +1471,36 @@ ctx->ctx_used_pmds[0], ctx->ctx_soft_pmds[cnum].reset_pmds[0])); } + + return 0; + +abort_mission: + /* + * for now, we have only one possibility for error + */ + PFM_REG_RETFLAG_SET(tmp.reg_flags, PFM_REG_RETFL_EINVAL); + + /* + * we change the return value to EFAULT in case we cannot write register return code. + * The caller first must correct this error, then a resubmission of the request will + * eventually yield the EINVAL. + */ + if (__put_user(tmp.reg_flags, &req->reg_flags)) ret = -EFAULT; + + DBprintk(("[%d] pmc[%u]=0x%lx ret %d\n", task->pid, cnum, value, ret)); + return ret; } + static int pfm_read_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) { struct thread_struct *th = &task->thread; - unsigned long val=0; - pfarg_reg_t tmp, *req = (pfarg_reg_t *)arg; - unsigned int cnum; - int i, ret = 0; + unsigned long val = 0UL; + pfarg_reg_t *req = (pfarg_reg_t *)arg; + unsigned int cnum, reg_flags = 0; + int i, ret = -EINVAL; if (!CTX_IS_ENABLED(ctx)) return -EINVAL; @@ -1447,11 +1516,9 @@ DBprintk(("ctx_last_cpu=%d for [%d]\n", atomic_read(&ctx->ctx_last_cpu), task->pid)); for (i = 0; i < count; i++, req++) { - unsigned long ctx_val = ~0UL; - if (copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT; - - cnum = tmp.reg_num; + if (__get_user(cnum, &req->reg_num)) return -EFAULT; + if (__get_user(reg_flags, &req->reg_flags)) return -EFAULT; if (!PMD_IS_IMPL(cnum)) goto abort_mission; /* @@ -1501,34 +1568,40 @@ */ val &= pmu_conf.perf_ovfl_val; - val += ctx_val = ctx->ctx_soft_pmds[cnum].val; + val += ctx->ctx_soft_pmds[cnum].val; } - tmp.reg_value = val; - /* * execute read checker, if any */ if (PMD_RD_FUNC(cnum)) { - ret = PMD_RD_FUNC(cnum)(task, cnum, &tmp.reg_value, regs); + unsigned long v = val; + ret = PMD_RD_FUNC(cnum)(task, cnum, &v, regs); + val = v; } - PFM_REG_RETFLAG_SET(tmp.reg_flags, ret); + PFM_REG_RETFLAG_SET(reg_flags, 0); DBprintk(("read pmd[%u] ret=%d value=0x%lx pmc=0x%lx\n", - cnum, ret, val, ia64_get_pmc(cnum))); - - if (copy_to_user(req, &tmp, sizeof(tmp))) return -EFAULT; + cnum, ret, val, ia64_get_pmc(cnum))); + /* + * update register return value, abort all if problem during copy. + * we only modify the reg_flags field. no check mode is fine because + * access has been verified upfront in sys_perfmonctl(). + */ + if (__put_user(cnum, &req->reg_num)) return -EFAULT; + if (__put_user(val, &req->reg_value)) return -EFAULT; + if (__put_user(reg_flags, &req->reg_flags)) return -EFAULT; } + return 0; + abort_mission: - PFM_REG_RETFLAG_SET(tmp.reg_flags, PFM_REG_RETFL_EINVAL); - /* - * XXX: if this fails, we stick with the original failure, flag not updated! - */ - copy_to_user(req, &tmp, sizeof(tmp)); - return -EINVAL; + PFM_REG_RETFLAG_SET(reg_flags, PFM_REG_RETFL_EINVAL); + + if (__put_user(reg_flags, &req->reg_flags)) ret = -EFAULT; + return ret; } #ifdef PFM_PMU_USES_DBR @@ -1697,44 +1770,6 @@ return 0; } -#ifndef CONFIG_SMP -/* - * On UP kernels, we do not need to constantly set the psr.pp bit - * when a task is scheduled. The psr.pp bit can only be changed in - * the kernel because of a user request. Given we are on a UP non preeemptive - * kernel we know that no other task is running, so we cna simply update their - * psr.pp from their saved state. There is this no impact on the context switch - * code compared to the SMP case. - */ -static void -pfm_tasklist_toggle_pp(unsigned int val) -{ - struct task_struct *p; - struct pt_regs *regs; - - DBprintk(("invoked by [%d] pp=%u\n", current->pid, val)); - - read_lock(&tasklist_lock); - - for_each_task(p) { - regs = (struct pt_regs *)((unsigned long) p + IA64_STK_OFFSET); - - /* - * position on pt_regs saved on stack on 1st entry into the kernel - */ - regs--; - - /* - * update psr.pp - */ - ia64_psr(regs)->pp = val; - } - read_unlock(&tasklist_lock); -} -#endif - - - static int pfm_stop(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) @@ -1763,11 +1798,8 @@ ia64_srlz_i(); -#ifdef CONFIG_SMP __get_cpu_var(pfm_dcr_pp) = 0; -#else - pfm_tasklist_toggle_pp(0); -#endif + ia64_psr(regs)->pp = 0; } else { @@ -2013,7 +2045,7 @@ for (i = 0; i < count; i++, req++) { - if (copy_from_user(&tmp, req, sizeof(tmp))) goto abort_mission; + if (__copy_from_user(&tmp, req, sizeof(tmp))) goto abort_mission; rnum = tmp.dbreg_num; dbreg.val = tmp.dbreg_value; @@ -2046,7 +2078,7 @@ PFM_REG_RETFLAG_SET(tmp.dbreg_flags, 0); - if (copy_to_user(req, &tmp, sizeof(tmp))) goto abort_mission; + if (__copy_to_user(req, &tmp, sizeof(tmp))) goto abort_mission; /* * Debug registers, just like PMC, can only be modified @@ -2101,7 +2133,7 @@ * XXX: for now we can only come here on EINVAL */ PFM_REG_RETFLAG_SET(tmp.dbreg_flags, PFM_REG_RETFL_EINVAL); - copy_to_user(req, &tmp, sizeof(tmp)); + __put_user(tmp.dbreg_flags, &req->dbreg_flags); } return ret; } @@ -2142,7 +2174,7 @@ tmp.ft_version = PFM_VERSION; tmp.ft_smpl_version = PFM_SMPL_VERSION; - if (copy_to_user(arg, &tmp, sizeof(tmp))) return -EFAULT; + if (__copy_to_user(arg, &tmp, sizeof(tmp))) return -EFAULT; return 0; } @@ -2171,11 +2203,8 @@ if (ctx->ctx_fl_system) { -#ifdef CONFIG_SMP __get_cpu_var(pfm_dcr_pp) = 1; -#else - pfm_tasklist_toggle_pp(1); -#endif + /* set user level psr.pp */ ia64_psr(regs)->pp = 1; @@ -2226,10 +2255,8 @@ __asm__ __volatile__ ("rsm psr.pp;;"::: "memory"); ia64_srlz_i(); -#ifdef CONFIG_SMP - __get_cpu_var(pfm_syst_wide) = 1; __get_cpu_var(pfm_dcr_pp) = 0; -#endif + __get_cpu_var(pfm_syst_wide) = 1; } else { /* * needed in case the task was a passive task during @@ -2270,11 +2297,11 @@ { pfarg_reg_t tmp, *req = (pfarg_reg_t *)arg; unsigned int cnum; - int i; + int i, ret = -EINVAL; for (i = 0; i < count; i++, req++) { - if (copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT; + if (__copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT; cnum = tmp.reg_num; @@ -2286,16 +2313,13 @@ DBprintk(("pmc_reset_val pmc[%u]=0x%lx\n", cnum, tmp.reg_value)); - if (copy_to_user(req, &tmp, sizeof(tmp))) return -EFAULT; + if (__copy_to_user(req, &tmp, sizeof(tmp))) return -EFAULT; } return 0; abort_mission: PFM_REG_RETFLAG_SET(tmp.reg_flags, PFM_REG_RETFL_EINVAL); - /* - * XXX: if this fails, we stick with the original failure, flag not updated! - */ - copy_to_user(req, &tmp, sizeof(tmp)); - return -EINVAL; + if (__copy_to_user(req, &tmp, sizeof(tmp))) ret = -EFAULT; + return ret; } /* @@ -2303,21 +2327,21 @@ */ static pfm_cmd_desc_t pfm_cmd_tab[]={ /* 0 */{ NULL, 0, 0, 0}, /* not used */ -/* 1 */{ pfm_write_pmcs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_READ|PFM_CMD_ARG_WRITE, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)}, -/* 2 */{ pfm_write_pmds, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_READ, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)}, -/* 3 */{ pfm_read_pmds,PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_READ|PFM_CMD_ARG_WRITE, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)}, +/* 1 */{ pfm_write_pmcs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)}, +/* 2 */{ pfm_write_pmds, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)}, +/* 3 */{ pfm_read_pmds,PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)}, /* 4 */{ pfm_stop, PFM_CMD_PID|PFM_CMD_CTX, 0, 0}, /* 5 */{ pfm_start, PFM_CMD_PID|PFM_CMD_CTX, 0, 0}, /* 6 */{ pfm_enable, PFM_CMD_PID|PFM_CMD_CTX, 0, 0}, /* 7 */{ pfm_disable, PFM_CMD_PID|PFM_CMD_CTX, 0, 0}, -/* 8 */{ pfm_context_create, PFM_CMD_PID|PFM_CMD_ARG_READ|PFM_CMD_ARG_WRITE, 1, sizeof(pfarg_context_t)}, +/* 8 */{ pfm_context_create, PFM_CMD_PID|PFM_CMD_ARG_RW, 1, sizeof(pfarg_context_t)}, /* 9 */{ pfm_context_destroy, PFM_CMD_PID|PFM_CMD_CTX, 0, 0}, /* 10 */{ pfm_restart, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_NOCHK, 0, 0}, /* 11 */{ pfm_protect_context, PFM_CMD_PID|PFM_CMD_CTX, 0, 0}, -/* 12 */{ pfm_get_features, PFM_CMD_ARG_WRITE, 0, 0}, +/* 12 */{ pfm_get_features, PFM_CMD_ARG_RW, 0, 0}, /* 13 */{ pfm_debug, 0, 1, sizeof(unsigned int)}, /* 14 */{ pfm_context_unprotect, PFM_CMD_PID|PFM_CMD_CTX, 0, 0}, -/* 15 */{ pfm_get_pmc_reset, PFM_CMD_ARG_READ|PFM_CMD_ARG_WRITE, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)}, +/* 15 */{ pfm_get_pmc_reset, PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)}, /* 16 */{ NULL, 0, 0, 0}, /* not used */ /* 17 */{ NULL, 0, 0, 0}, /* not used */ /* 18 */{ NULL, 0, 0, 0}, /* not used */ @@ -2335,8 +2359,8 @@ /* 30 */{ NULL, 0, 0, 0}, /* not used */ /* 31 */{ NULL, 0, 0, 0}, /* not used */ #ifdef PFM_PMU_USES_DBR -/* 32 */{ pfm_write_ibrs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_READ|PFM_CMD_ARG_WRITE, PFM_CMD_ARG_MANY, sizeof(pfarg_dbreg_t)}, -/* 33 */{ pfm_write_dbrs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_READ|PFM_CMD_ARG_WRITE, PFM_CMD_ARG_MANY, sizeof(pfarg_dbreg_t)} +/* 32 */{ pfm_write_ibrs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_dbreg_t)}, +/* 33 */{ pfm_write_dbrs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_dbreg_t)} #endif }; #define PFM_CMD_COUNT (sizeof(pfm_cmd_tab)/sizeof(pfm_cmd_desc_t)) @@ -2392,7 +2416,7 @@ if (PFM_CMD_READ_ARG(cmd) && !access_ok(VERIFY_READ, arg, sz*count)) return -EFAULT; - if (PFM_CMD_WRITE_ARG(cmd) && !access_ok(VERIFY_WRITE, arg, sz*count)) return -EFAULT; + if (PFM_CMD_RW_ARG(cmd) && !access_ok(VERIFY_WRITE, arg, sz*count)) return -EFAULT; if (PFM_CMD_USE_PID(cmd)) { /* @@ -2554,9 +2578,16 @@ */ h->pid = current->pid; h->cpu = smp_processor_id(); - h->rate = 0; /* XXX: add the sampling rate used here */ - h->ip = regs ? regs->cr_iip : 0x0; /* where did the fault happened */ - h->regs = ovfl_mask; /* which registers overflowed */ + h->last_reset_value = ovfl_mask ? ctx->ctx_soft_pmds[ffz(~ovfl_mask)].lval : 0UL; + /* + * where did the fault happen + */ + h->ip = regs ? regs->cr_iip | ((regs->cr_ipsr >> 41) & 0x3): 0x0UL; + + /* + * which registers overflowed + */ + h->regs = ovfl_mask; /* guaranteed to monotonically increase on each cpu */ h->stamp = pfm_get_stamp(); @@ -2575,15 +2606,13 @@ if (PMD_IS_COUNTING(j)) { *e = pfm_read_soft_counter(ctx, j); - /* check if this pmd overflowed as well */ - *e += ovfl_mask & (1UL<ctx_soft_pmds[i].val)); /* - * Because we sometimes (EARS/BTB) reset to a specific value, we cannot simply use - * val to count the number of times we overflowed. Otherwise we would loose the - * current value in the PMD (which can be >0). So to make sure we don't loose - * the residual counts we set val to contain full 64bits value of the counter. + * Note that the pmd is not necessarily 0 at this point as qualified events + * may have happened before the PMU was frozen. The residual count is not + * taken into consideration here but will be with any read of the pmd via + * pfm_read_pmds(). */ old_val = ctx->ctx_soft_pmds[i].val; - ctx->ctx_soft_pmds[i].val = 1 + pmu_conf.perf_ovfl_val + pfm_read_soft_counter(ctx, i); - - DBprintk_ovfl(("soft_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx\n", - i, ctx->ctx_soft_pmds[i].val, old_val, - ia64_get_pmd(i) & pmu_conf.perf_ovfl_val)); - - /* - * now that we have extracted the hardware counter, we can clear it to ensure - * that a subsequent PFM_READ_PMDS will not include it again. - */ - ia64_set_pmd(i, 0UL); + ctx->ctx_soft_pmds[i].val += 1 + pmu_conf.perf_ovfl_val; /* * check for overflow condition @@ -2702,12 +2721,15 @@ ovfl_pmds |= 1UL << i; - DBprintk_ovfl(("soft_pmd[%d] overflowed flags=0x%x, ovfl=0x%lx\n", i, ctx->ctx_soft_pmds[i].flags, ovfl_pmds)); - if (PMC_OVFL_NOTIFY(ctx, i)) { ovfl_notify |= 1UL << i; } } + DBprintk_ovfl(("soft_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx ovfl_pmds=0x%lx ovfl_notify=0x%lx\n", + i, ctx->ctx_soft_pmds[i].val, old_val, + ia64_get_pmd(i) & pmu_conf.perf_ovfl_val, ovfl_pmds, ovfl_notify)); + + } /* @@ -2896,7 +2918,7 @@ struct task_struct *task; pfm_context_t *ctx; - pfm_stats.pfm_ovfl_intr_count++; + pfm_stats[smp_processor_id()].pfm_ovfl_intr_count++; /* * srlz.d done before arriving here @@ -2954,10 +2976,7 @@ atomic_set(&ctx->ctx_is_busy, 0); #endif } else { - pfm_stats.pfm_spurious_ovfl_intr_count++; - - printk("perfmon: Spurious PMU overflow interrupt on CPU%d: pmc0=0x%lx owner=%p\n", - smp_processor_id(), pmc0, (void *)PMU_OWNER()); + pfm_stats[smp_processor_id()].pfm_spurious_ovfl_intr_count++; } } @@ -2968,28 +2987,24 @@ char *p = page; int i; - p += sprintf(p, "enabled : %s\n", pmu_conf.pfm_is_disabled ? "No": "Yes"); - p += sprintf(p, "fastctxsw : %s\n", pfm_sysctl.fastctxsw > 0 ? "Yes": "No"); - p += sprintf(p, "ovfl_mask : 0x%lx\n", pmu_conf.perf_ovfl_val); - p += sprintf(p, "overflow intrs : %lu\n", pfm_stats.pfm_ovfl_intr_count); - p += sprintf(p, "spurious intrs : %lu\n", pfm_stats.pfm_spurious_ovfl_intr_count); - p += sprintf(p, "recorded samples : %lu\n", pfm_stats.pfm_recorded_samples_count); - p += sprintf(p, "smpl buffer full : %lu\n", pfm_stats.pfm_full_smpl_buffer_count); + p += sprintf(p, "enabled : %s\n", pmu_conf.pfm_is_disabled ? "No": "Yes"); + p += sprintf(p, "fastctxsw : %s\n", pfm_sysctl.fastctxsw > 0 ? "Yes": "No"); + p += sprintf(p, "ovfl_mask : 0x%lx\n", pmu_conf.perf_ovfl_val); -#ifdef CONFIG_SMP - p += sprintf(p, "CPU%d syst_wide : %d\n" - "CPU%d dcr_pp : %d\n", - smp_processor_id(), - __get_cpu_var(pfm_syst_wide), - smp_processor_id(), - __get_cpu_var(pfm_dcr_pp)); -#endif + for(i=0; i < NR_CPUS; i++) { + if (cpu_is_online(i) == 0) continue; + p += sprintf(p, "CPU%-2d overflow intrs : %lu\n", i, pfm_stats[i].pfm_ovfl_intr_count); + p += sprintf(p, "CPU%-2d spurious intrs : %lu\n", i, pfm_stats[i].pfm_spurious_ovfl_intr_count); + p += sprintf(p, "CPU%-2d recorded samples : %lu\n", i, pfm_stats[i].pfm_recorded_samples_count); + p += sprintf(p, "CPU%-2d smpl buffer full : %lu\n", i, pfm_stats[i].pfm_full_smpl_buffer_count); + p += sprintf(p, "CPU%-2d owner : %d\n", i, pmu_owners[i].owner ? pmu_owners[i].owner->pid: -1); + } LOCK_PFS(); - p += sprintf(p, "proc_sessions : %lu\n" - "sys_sessions : %lu\n" - "sys_use_dbregs : %lu\n" - "ptrace_use_dbregs: %lu\n", + p += sprintf(p, "proc_sessions : %lu\n" + "sys_sessions : %lu\n" + "sys_use_dbregs : %lu\n" + "ptrace_use_dbregs : %lu\n", pfm_sessions.pfs_task_sessions, pfm_sessions.pfs_sys_sessions, pfm_sessions.pfs_sys_use_dbregs, @@ -2997,30 +3012,6 @@ UNLOCK_PFS(); - for(i=0; i < NR_CPUS; i++) { - if (cpu_is_online(i)) { - p += sprintf(p, "CPU%d owner : %-6d\n", - i, - pmu_owners[i].owner ? pmu_owners[i].owner->pid: -1); - } - } - - for(i=0; pmd_desc[i].type != PFM_REG_NONE; i++) { - p += sprintf(p, "PMD%-2d: %d 0x%lx 0x%lx\n", - i, - pmd_desc[i].type, - pmd_desc[i].dep_pmd[0], - pmd_desc[i].dep_pmc[0]); - } - - for(i=0; pmc_desc[i].type != PFM_REG_NONE; i++) { - p += sprintf(p, "PMC%-2d: %d 0x%lx 0x%lx\n", - i, - pmc_desc[i].type, - pmc_desc[i].dep_pmd[0], - pmc_desc[i].dep_pmc[0]); - } - return p - page; } @@ -3041,7 +3032,6 @@ return len; } -#ifdef CONFIG_SMP void pfm_syst_wide_update_task(struct task_struct *task, int mode) { @@ -3054,8 +3044,6 @@ */ ia64_psr(regs)->pp = mode ? __get_cpu_var(pfm_dcr_pp) : 0; } -#endif - void pfm_save_regs (struct task_struct *task) @@ -3295,6 +3283,30 @@ owner = PMU_OWNER(); ctx = task->thread.pfm_context; + t = &task->thread; + + /* + * we restore ALL the debug registers to avoid picking up + * stale state. + * + * This must be done even when the task is still the owner + * as the registers may have been modified via ptrace() + * (not perfmon) by the previous task. + * + * XXX: dealing with this in a lazy fashion requires modifications + * to the way the the debug registers are managed. This is will done + * in the next version of perfmon. + */ + if (ctx->ctx_fl_using_dbreg) { + for (i=0; i < pmu_conf.num_ibrs; i++) { + ia64_set_ibr(i, t->ibr[i]); + } + ia64_srlz_i(); + for (i=0; i < pmu_conf.num_dbrs; i++) { + ia64_set_dbr(i, t->dbr[i]); + } + ia64_srlz_d(); + } /* * if we were the last user, then nothing to do except restore psr @@ -3330,7 +3342,6 @@ pfm_fetch_regs(cpu, task, ctx); } #endif - t = &task->thread; /* * To avoid leaking information to the user level when psr.sp=0, @@ -3360,21 +3371,6 @@ if (mask & 0x1) ia64_set_pmc(i, t->pmc[i]); } - /* - * we restore ALL the debug registers to avoid picking up - * stale state. - */ - if (ctx->ctx_fl_using_dbreg) { - for (i=0; i < pmu_conf.num_ibrs; i++) { - ia64_set_ibr(i, t->ibr[i]); - } - ia64_srlz_i(); - for (i=0; i < pmu_conf.num_dbrs; i++) { - ia64_set_dbr(i, t->dbr[i]); - } - } - ia64_srlz_d(); - if (t->pmc[0] & ~0x1) { pfm_overflow_handler(task, ctx, t->pmc[0], NULL); } @@ -3545,12 +3541,8 @@ ia64_srlz_i(); -#ifdef CONFIG_SMP __get_cpu_var(pfm_syst_wide) = 0; __get_cpu_var(pfm_dcr_pp) = 0; -#else - pfm_tasklist_toggle_pp(0); -#endif } else { /* stop monitoring */ @@ -3769,18 +3761,12 @@ m = nctx->ctx_used_pmds[0] >> PMU_FIRST_COUNTER; for(i = PMU_FIRST_COUNTER ; m ; m>>=1, i++) { if ((m & 0x1) && pmu_conf.pmd_desc[i].type == PFM_REG_COUNTING) { - nctx->ctx_soft_pmds[i].val = nctx->ctx_soft_pmds[i].ival & ~pmu_conf.perf_ovfl_val; - thread->pmd[i] = nctx->ctx_soft_pmds[i].ival & pmu_conf.perf_ovfl_val; + nctx->ctx_soft_pmds[i].val = nctx->ctx_soft_pmds[i].lval & ~pmu_conf.perf_ovfl_val; + thread->pmd[i] = nctx->ctx_soft_pmds[i].lval & pmu_conf.perf_ovfl_val; + } else { + thread->pmd[i] = 0UL; /* reset to initial state */ } - /* what about the other pmds? zero or keep as is */ - } - /* - * clear BTB index register - * XXX: CPU-model specific knowledge! - */ - thread->pmd[16] = 0; - nctx->ctx_fl_frozen = 0; nctx->ctx_ovfl_regs[0] = 0UL; @@ -3950,7 +3936,8 @@ pfm_sessions.pfs_sys_session[ctx->ctx_cpu] = NULL; pfm_sessions.pfs_sys_sessions--; DBprintk(("freeing syswide session on CPU%ld\n", ctx->ctx_cpu)); - /* update perfmon debug register counter */ + + /* update perfmon debug register usage counter */ if (ctx->ctx_fl_using_dbreg) { if (pfm_sessions.pfs_sys_use_dbregs == 0) { printk("perfmon: invalid release for [%d] sys_use_dbregs=0\n", task->pid); @@ -3993,7 +3980,8 @@ * Walk through the list and free the sampling buffer and psb */ while (psb) { - DBprintk(("[%d] freeing smpl @%p size %ld\n", current->pid, psb->psb_hdr, psb->psb_size)); + DBprintk(("[%d] freeing smpl @%p size %ld\n", + current->pid, psb->psb_hdr, psb->psb_size)); pfm_rvfree(psb->psb_hdr, psb->psb_size); tmp = psb->psb_next; @@ -4014,14 +4002,14 @@ void pfm_cleanup_owners(struct task_struct *task) { - struct task_struct *p; + struct task_struct *g, *p; pfm_context_t *ctx; DBprintk(("called by [%d] for [%d]\n", current->pid, task->pid)); read_lock(&tasklist_lock); - for_each_task(p) { + do_each_thread(g, p) { /* * It is safe to do the 2-step test here, because thread.ctx * is cleaned up only in release_thread() and at that point @@ -4059,7 +4047,8 @@ DBprintk(("done for notifier [%d] in [%d]\n", task->pid, p->pid)); } - } + } while_each_thread(g, p); + read_unlock(&tasklist_lock); atomic_set(&task->thread.pfm_owners_check, 0); @@ -4073,23 +4062,21 @@ void pfm_cleanup_notifiers(struct task_struct *task) { - struct task_struct *p; + struct task_struct *g, *p; pfm_context_t *ctx; DBprintk(("called by [%d] for [%d]\n", current->pid, task->pid)); read_lock(&tasklist_lock); - for_each_task(p) { + do_each_thread(g, p) { /* - * It is safe to do the 2-step test here, because thread.ctx - * is cleaned up only in release_thread() and at that point - * the task has been detached from the tasklist which is an - * operation which uses the write_lock() on the tasklist_lock - * so it cannot run concurrently to this loop. So we have the - * guarantee that if we find p and it has a perfmon ctx then - * it is going to stay like this for the entire execution of this - * loop. + * It is safe to do the 2-step test here, because thread.ctx is cleaned up + * only in release_thread() and at that point the task has been detached + * from the tasklist which is an operation which uses the write_lock() on + * the tasklist_lock so it cannot run concurrently to this loop. So we + * have the guarantee that if we find p and it has a perfmon ctx then it + * is going to stay like this for the entire execution of this loop. */ ctx = p->thread.pfm_context; @@ -4098,16 +4085,16 @@ if (ctx && ctx->ctx_notify_task == task) { DBprintk(("trying for notifier [%d] in [%d]\n", task->pid, p->pid)); /* - * the spinlock is required to take care of a race condition - * with the send_sig_info() call. We must make sure that - * either the send_sig_info() completes using a valid task, - * or the notify_task is cleared before the send_sig_info() - * can pick up a stale value. Note that by the time this - * function is executed the 'task' is already detached from the - * tasklist. The problem is that the notifiers have a direct - * pointer to it. It is okay to send a signal to a task in this - * stage, it simply will have no effect. But it is better than sending - * to a completely destroyed task or worse to a new task using the same + * the spinlock is required to take care of a race condition with + * the send_sig_info() call. We must make sure that either the + * send_sig_info() completes using a valid task, or the + * notify_task is cleared before the send_sig_info() can pick up a + * stale value. Note that by the time this function is executed + * the 'task' is already detached from the tasklist. The problem + * is that the notifiers have a direct pointer to it. It is okay + * to send a signal to a task in this stage, it simply will have + * no effect. But it is better than sending to a completely + * destroyed task or worse to a new task using the same * task_struct address. */ LOCK_CTX(ctx); @@ -4118,7 +4105,8 @@ DBprintk(("done for notifier [%d] in [%d]\n", task->pid, p->pid)); } - } + } while_each_thread(g, p); + read_unlock(&tasklist_lock); atomic_set(&task->thread.pfm_notifiers_check, 0); diff -Nru a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c --- a/arch/ia64/kernel/process.c Tue Sep 17 23:47:51 2002 +++ b/arch/ia64/kernel/process.c Tue Sep 17 23:47:51 2002 @@ -81,6 +81,12 @@ } void +dump_stack (void) +{ + show_stack(NULL); +} + +void show_regs (struct pt_regs *regs) { unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri; @@ -248,18 +254,15 @@ * | | <-- sp (lowest addr) * +---------------------+ * - * Note: if we get called through kernel_thread() then the memory - * above "(highest addr)" is valid kernel stack memory that needs to - * be copied as well. - * - * Observe that we copy the unat values that are in pt_regs and - * switch_stack. Spilling an integer to address X causes bit N in - * ar.unat to be set to the NaT bit of the register, with N=(X & - * 0x1ff)/8. Thus, copying the unat value preserves the NaT bits ONLY - * if the pt_regs structure in the parent is congruent to that of the - * child, modulo 512. Since the stack is page aligned and the page - * size is at least 4KB, this is always the case, so there is nothing - * to worry about. + * Note: if we get called through kernel_thread() then the memory above "(highest addr)" + * is valid kernel stack memory that needs to be copied as well. + * + * Observe that we copy the unat values that are in pt_regs and switch_stack. Spilling an + * integer to address X causes bit N in ar.unat to be set to the NaT bit of the register, + * with N=(X & 0x1ff)/8. Thus, copying the unat value preserves the NaT bits ONLY if the + * pt_regs structure in the parent is congruent to that of the child, modulo 512. Since + * the stack is page aligned and the page size is at least 4KB, this is always the case, + * so there is nothing to worry about. */ int copy_thread (int nr, unsigned long clone_flags, @@ -300,6 +303,8 @@ memcpy((void *) child_rbs, (void *) rbs, rbs_size); if (user_mode(child_ptregs)) { + if (clone_flags & CLONE_SETTLS) + child_ptregs->r13 = regs->r16; /* see sys_clone2() in entry.S */ if (user_stack_base) { child_ptregs->r12 = user_stack_base + user_stack_size - 16; child_ptregs->ar_bspstore = user_stack_base; diff -Nru a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c --- a/arch/ia64/kernel/ptrace.c Tue Sep 17 23:47:52 2002 +++ b/arch/ia64/kernel/ptrace.c Tue Sep 17 23:47:52 2002 @@ -474,7 +474,7 @@ { struct switch_stack *sw; unsigned long urbs_end; - struct task_struct *p; + struct task_struct *g, *p; struct mm_struct *mm; struct pt_regs *pt; long multi_threaded; @@ -495,7 +495,7 @@ } else { read_lock(&tasklist_lock); { - for_each_task(p) { + do_each_thread(g, p) { if (p->mm == mm && p->state != TASK_RUNNING) { sw = (struct switch_stack *) (p->thread.ksp + 16); pt = ia64_task_regs(p); @@ -504,7 +504,7 @@ if (make_writable) user_flushrs(p, pt); } - } + } while_each_thread(g, p); } read_unlock(&tasklist_lock); } diff -Nru a/arch/ia64/kernel/semaphore.c b/arch/ia64/kernel/semaphore.c --- a/arch/ia64/kernel/semaphore.c Tue Sep 17 23:47:51 2002 +++ b/arch/ia64/kernel/semaphore.c Tue Sep 17 23:47:51 2002 @@ -15,8 +15,8 @@ * test if they need to do any extra work (up needs to do something * only if count was negative before the increment operation. * - * "sleepers" and the contention routine ordering is protected by the - * semaphore spinlock. + * "sleeping" and the contention routine ordering is protected + * by the spinlock in the semaphore's waitqueue head. * * Note that these functions are only called when there is contention * on the lock, and as such all this is the "non-critical" part of the @@ -44,40 +44,42 @@ wake_up(&sem->wait); } -static spinlock_t semaphore_lock = SPIN_LOCK_UNLOCKED; - void __down (struct semaphore *sem) { struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); + unsigned long flags; + tsk->state = TASK_UNINTERRUPTIBLE; - add_wait_queue_exclusive(&sem->wait, &wait); + spin_lock_irqsave(&sem->wait.lock, flags); + add_wait_queue_exclusive_locked(&sem->wait, &wait); - spin_lock_irq(&semaphore_lock); sem->sleepers++; for (;;) { int sleepers = sem->sleepers; /* * Add "everybody else" into it. They aren't - * playing, because we own the spinlock. + * playing, because we own the spinlock in + * the wait_queue_head. */ if (!atomic_add_negative(sleepers - 1, &sem->count)) { sem->sleepers = 0; break; } sem->sleepers = 1; /* us - see -1 above */ - spin_unlock_irq(&semaphore_lock); + spin_unlock_irqrestore(&sem->wait.lock, flags); schedule(); + + spin_lock_irqsave(&sem->wait.lock, flags); tsk->state = TASK_UNINTERRUPTIBLE; - spin_lock_irq(&semaphore_lock); } - spin_unlock_irq(&semaphore_lock); - remove_wait_queue(&sem->wait, &wait); + remove_wait_queue_locked(&sem->wait, &wait); + wake_up_locked(&sem->wait); + spin_unlock_irqrestore(&sem->wait.lock, flags); tsk->state = TASK_RUNNING; - wake_up(&sem->wait); } int @@ -86,10 +88,12 @@ int retval = 0; struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); + unsigned long flags; + tsk->state = TASK_INTERRUPTIBLE; - add_wait_queue_exclusive(&sem->wait, &wait); + spin_lock_irqsave(&sem->wait.lock, flags); + add_wait_queue_exclusive_locked(&sem->wait, &wait); - spin_lock_irq(&semaphore_lock); sem->sleepers ++; for (;;) { int sleepers = sem->sleepers; @@ -110,25 +114,27 @@ /* * Add "everybody else" into it. They aren't - * playing, because we own the spinlock. The - * "-1" is because we're still hoping to get - * the lock. + * playing, because we own the spinlock in + * wait_queue_head. The "-1" is because we're + * still hoping to get the semaphore. */ if (!atomic_add_negative(sleepers - 1, &sem->count)) { sem->sleepers = 0; break; } sem->sleepers = 1; /* us - see -1 above */ - spin_unlock_irq(&semaphore_lock); + spin_unlock_irqrestore(&sem->wait.lock, flags); schedule(); + + spin_lock_irqsave(&sem->wait.lock, flags); tsk->state = TASK_INTERRUPTIBLE; - spin_lock_irq(&semaphore_lock); } - spin_unlock_irq(&semaphore_lock); + remove_wait_queue_locked(&sem->wait, &wait); + wake_up_locked(&sem->wait); + spin_unlock_irqrestore(&sem->wait.lock, flags); + tsk->state = TASK_RUNNING; - remove_wait_queue(&sem->wait, &wait); - wake_up(&sem->wait); return retval; } @@ -142,17 +148,19 @@ unsigned long flags; int sleepers; - spin_lock_irqsave(&semaphore_lock, flags); + spin_lock_irqsave(&sem->wait.lock, flags); sleepers = sem->sleepers + 1; sem->sleepers = 0; /* * Add "everybody else" and us into it. They aren't - * playing, because we own the spinlock. + * playing, because we own the spinlock in the + * wait_queue_head. */ - if (!atomic_add_negative(sleepers, &sem->count)) - wake_up(&sem->wait); + if (!atomic_add_negative(sleepers, &sem->count)) { + wake_up_locked(&sem->wait); + } - spin_unlock_irqrestore(&semaphore_lock, flags); + spin_unlock_irqrestore(&sem->wait.lock, flags); return 1; } diff -Nru a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c --- a/arch/ia64/kernel/signal.c Tue Sep 17 23:47:51 2002 +++ b/arch/ia64/kernel/signal.c Tue Sep 17 23:47:51 2002 @@ -354,6 +354,15 @@ return err; } +/* + * Check whether the register-backing store is already on the signal stack. + */ +static inline int +rbs_on_sig_stack (unsigned long bsp) +{ + return (bsp - current->sas_ss_sp < current->sas_ss_size); +} + static long setup_frame (int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct sigscratch *scr) @@ -366,10 +375,17 @@ frame = (void *) scr->pt.r12; tramp_addr = GATE_ADDR + (ia64_sigtramp - __start_gate_section); - if ((ka->sa.sa_flags & SA_ONSTACK) != 0 && !on_sig_stack((unsigned long) frame)) { - new_rbs = (current->sas_ss_sp + sizeof(long) - 1) & ~(sizeof(long) - 1); - frame = (void *) ((current->sas_ss_sp + current->sas_ss_size) - & ~(STACK_ALIGN - 1)); + if (ka->sa.sa_flags & SA_ONSTACK) { + /* + * We need to check the memory and register stacks separately, because + * they're switched separately (memory stack is switched in the kernel, + * register stack is switched in the signal trampoline). + */ + if (!on_sig_stack((unsigned long) frame)) + frame = (void *) ((current->sas_ss_sp + current->sas_ss_size) + & ~(STACK_ALIGN - 1)); + if (!rbs_on_sig_stack(scr->pt.ar_bspstore)) + new_rbs = (current->sas_ss_sp + sizeof(long) - 1) & ~(sizeof(long) - 1); } frame = (void *) frame - ((sizeof(*frame) + STACK_ALIGN - 1) & ~(STACK_ALIGN - 1)); @@ -460,7 +476,6 @@ long ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall) { - struct signal_struct *sig; struct k_sigaction *ka; siginfo_t info; long restart = in_syscall; @@ -487,7 +502,7 @@ } } else #endif - if (scr->pt.r10 != -1) { + if (scr->pt.r10 != -1) /* * A system calls has to be restarted only if one of the error codes * ERESTARTNOHAND, ERESTARTSYS, or ERESTARTNOINTR is returned. If r10 @@ -495,101 +510,14 @@ * restart the syscall, so we can clear the "restart" flag here. */ restart = 0; - } - - for (;;) { - unsigned long signr; - spin_lock_irq(¤t->sigmask_lock); - signr = dequeue_signal(¤t->blocked, &info); - spin_unlock_irq(¤t->sigmask_lock); + while (1) { + int signr = get_signal_to_deliver(&info, &scr->pt); - if (!signr) + if (signr <= 0) break; - if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) { - /* Let the debugger run. */ - current->exit_code = signr; - current->thread.siginfo = &info; - current->state = TASK_STOPPED; - notify_parent(current, SIGCHLD); - schedule(); - - signr = current->exit_code; - current->thread.siginfo = 0; - - /* We're back. Did the debugger cancel the sig? */ - if (!signr) - continue; - current->exit_code = 0; - - /* The debugger continued. Ignore SIGSTOP. */ - if (signr == SIGSTOP) - continue; - - /* Update the siginfo structure. Is this good? */ - if (signr != info.si_signo) { - info.si_signo = signr; - info.si_errno = 0; - info.si_code = SI_USER; - info.si_pid = current->parent->pid; - info.si_uid = current->parent->uid; - } - - /* If the (new) signal is now blocked, requeue it. */ - if (sigismember(¤t->blocked, signr)) { - send_sig_info(signr, &info, current); - continue; - } - } - ka = ¤t->sig->action[signr - 1]; - if (ka->sa.sa_handler == SIG_IGN) { - if (signr != SIGCHLD) - continue; - /* Check for SIGCHLD: it's special. */ - while (sys_wait4(-1, NULL, WNOHANG, NULL) > 0) - /* nothing */; - continue; - } - - if (ka->sa.sa_handler == SIG_DFL) { - int exit_code = signr; - - /* Init gets no signals it doesn't want. */ - if (current->pid == 1) - continue; - - switch (signr) { - case SIGCONT: case SIGCHLD: case SIGWINCH: case SIGURG: - continue; - - case SIGTSTP: case SIGTTIN: case SIGTTOU: - if (is_orphaned_pgrp(current->pgrp)) - continue; - /* FALLTHRU */ - - case SIGSTOP: - current->state = TASK_STOPPED; - current->exit_code = signr; - sig = current->parent->sig; - if (sig && !(sig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP)) - notify_parent(current, SIGCHLD); - schedule(); - continue; - - case SIGQUIT: case SIGILL: case SIGTRAP: - case SIGABRT: case SIGFPE: case SIGSEGV: - case SIGBUS: case SIGSYS: case SIGXCPU: case SIGXFSZ: - if (do_coredump(signr, &scr->pt)) - exit_code |= 0x80; - /* FALLTHRU */ - - default: - sig_exit(signr, exit_code, &info); - /* NOTREACHED */ - } - } if (restart) { switch (errno) { @@ -601,7 +529,7 @@ scr->pt.r8 = -EINTR; else #endif - scr->pt.r8 = EINTR; + scr->pt.r8 = EINTR; /* note: scr->pt.r10 is already -1 */ break; } @@ -612,13 +540,14 @@ scr->pt.cr_iip -= 2; } else #endif - ia64_decrement_ip(&scr->pt); + ia64_decrement_ip(&scr->pt); } } - /* Whee! Actually deliver the signal. If the - delivery failed, we need to continue to iterate in - this loop so we can deliver the SIGSEGV... */ + /* + * Whee! Actually deliver the signal. If the delivery failed, we need to + * continue to iterate in this loop so we can deliver the SIGSEGV... + */ if (handle_signal(signr, ka, &info, oldset, scr)) return 1; } @@ -634,9 +563,8 @@ } else #endif /* - * Note: the syscall number is in r15 which is - * saved in pt_regs so all we need to do here - * is adjust ip so that the "break" + * Note: the syscall number is in r15 which is saved in pt_regs so + * all we need to do here is adjust ip so that the "break" * instruction gets re-executed. */ ia64_decrement_ip(&scr->pt); diff -Nru a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c --- a/arch/ia64/kernel/smpboot.c Tue Sep 17 23:47:51 2002 +++ b/arch/ia64/kernel/smpboot.c Tue Sep 17 23:47:51 2002 @@ -257,31 +257,6 @@ local_cpu_data->prof_multiplier = 1; } -/* - * Architecture specific routine called by the kernel just before init is - * fired off. This allows the BP to have everything in order [we hope]. - * At the end of this all the APs will hit the system scheduling and off - * we go. Each AP will jump through the kernel - * init into idle(). At this point the scheduler will one day take over - * and give them jobs to do. smp_callin is a standard routine - * we use to track CPUs as they power up. - */ - -static volatile atomic_t smp_commenced = ATOMIC_INIT(0); - -static void __init -smp_commence (void) -{ - /* - * Lets the callins below out of their loop. - */ - Dprintk("Setting commenced=1, go go go\n"); - - wmb(); - atomic_set(&smp_commenced, 1); -} - - static void __init smp_callin (void) { @@ -361,7 +336,7 @@ * don't care about the eip and regs settings since we'll never reschedule the * forked task. */ - return do_fork(CLONE_VM|CLONE_IDLETASK, 0, 0, 0); + return do_fork(CLONE_VM|CLONE_IDLETASK, 0, 0, 0, NULL); } static int __init diff -Nru a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c --- a/arch/ia64/kernel/sys_ia64.c Tue Sep 17 23:47:51 2002 +++ b/arch/ia64/kernel/sys_ia64.c Tue Sep 17 23:47:51 2002 @@ -19,6 +19,12 @@ #include #include +#ifdef CONFIG_HUGETLB_PAGE +# define SHMLBA_HPAGE HPAGE_SIZE +# define COLOR_HALIGN(addr) (((addr) + SHMLBA_HPAGE - 1) & ~(SHMLBA_HPAGE - 1)) +# define TASK_HPAGE_BASE ((REGION_HPAGE << REGION_SHIFT) | HPAGE_SIZE) +#endif + unsigned long arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) @@ -56,16 +62,14 @@ } asmlinkage long -ia64_getpriority (int which, int who, long arg2, long arg3, long arg4, long arg5, long arg6, - long arg7, long stack) +ia64_getpriority (int which, int who) { - struct pt_regs *regs = (struct pt_regs *) &stack; extern long sys_getpriority (int, int); long prio; prio = sys_getpriority(which, who); if (prio >= 0) { - regs->r8 = 0; /* ensure negative priority is not mistaken as error code */ + force_successful_syscall_return(); prio = 20 - prio; } return prio; @@ -79,10 +83,8 @@ } asmlinkage unsigned long -ia64_shmat (int shmid, void *shmaddr, int shmflg, long arg3, long arg4, long arg5, long arg6, - long arg7, long stack) +ia64_shmat (int shmid, void *shmaddr, int shmflg) { - struct pt_regs *regs = (struct pt_regs *) &stack; unsigned long raddr; int retval; @@ -90,16 +92,14 @@ if (retval < 0) return retval; - regs->r8 = 0; /* ensure negative addresses are not mistaken as an error code */ + force_successful_syscall_return(); return raddr; } asmlinkage unsigned long -ia64_brk (unsigned long brk, long arg1, long arg2, long arg3, - long arg4, long arg5, long arg6, long arg7, long stack) +ia64_brk (unsigned long brk) { extern int vm_enough_memory (long pages); - struct pt_regs *regs = (struct pt_regs *) &stack; unsigned long rlim, retval, newbrk, oldbrk; struct mm_struct *mm = current->mm; @@ -145,7 +145,7 @@ out: retval = mm->brk; up_write(&mm->mmap_sem); - regs->r8 = 0; /* ensure large retval isn't mistaken as error code */ + force_successful_syscall_return(); return retval; } @@ -222,32 +222,98 @@ * of) files that are larger than the address space of the CPU. */ asmlinkage unsigned long -sys_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, long pgoff, - long arg6, long arg7, long stack) +sys_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, long pgoff) { - struct pt_regs *regs = (struct pt_regs *) &stack; - addr = do_mmap2(addr, len, prot, flags, fd, pgoff); if (!IS_ERR((void *) addr)) - regs->r8 = 0; /* ensure large addresses are not mistaken as failures... */ + force_successful_syscall_return(); return addr; } asmlinkage unsigned long -sys_mmap (unsigned long addr, unsigned long len, int prot, int flags, - int fd, long off, long arg6, long arg7, long stack) +sys_mmap (unsigned long addr, unsigned long len, int prot, int flags, int fd, long off) { - struct pt_regs *regs = (struct pt_regs *) &stack; - if ((off & ~PAGE_MASK) != 0) return -EINVAL; addr = do_mmap2(addr, len, prot, flags, fd, off >> PAGE_SHIFT); if (!IS_ERR((void *) addr)) - regs->r8 = 0; /* ensure large addresses are not mistaken as failures... */ + force_successful_syscall_return(); return addr; } +#ifdef CONFIG_HUGETLB_PAGE + +asmlinkage unsigned long +sys_alloc_hugepages (int key, unsigned long addr, size_t len, int prot, int flag) +{ + struct mm_struct *mm = current->mm; + long retval; + extern int alloc_hugetlb_pages (int, unsigned long, unsigned long, int, int); + + if ((key < 0) || (len & (HPAGE_SIZE - 1))) + return -EINVAL; + + if (addr && ((REGION_NUMBER(addr) != REGION_HPAGE) || (addr & (HPAGE_SIZE - 1)))) + addr = TASK_HPAGE_BASE; + + if (!addr) + addr = TASK_HPAGE_BASE; + down_write(&mm->mmap_sem); + { + retval = arch_get_unmapped_area(NULL, COLOR_HALIGN(addr), len, 0, 0); + if (retval != -ENOMEM) + retval = alloc_hugetlb_pages(key, retval, len, prot, flag); + } + up_write(&mm->mmap_sem); + + if (IS_ERR((void *) retval)) + return retval; + + force_successful_syscall_return(); + return retval; +} + +asmlinkage int +sys_free_hugepages (unsigned long addr) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + extern int free_hugepages(struct vm_area_struct *); + int retval; + + vma = find_vma(mm, addr); + if (!vma || !is_vm_hugetlb_page(vma) || (vma->vm_start != addr)) + return -EINVAL; + + down_write(&mm->mmap_sem); + { + spin_lock(&mm->page_table_lock); + { + retval = free_hugepages(vma); + } + spin_unlock(&mm->page_table_lock); + } + up_write(&mm->mmap_sem); + return retval; +} + +#else /* !CONFIG_HUGETLB_PAGE */ + +asmlinkage unsigned long +sys_alloc_hugepages (int key, size_t addr, unsigned long len, int prot, int flag) +{ + return -ENOSYS; +} + +asmlinkage unsigned long +sys_free_hugepages (unsigned long addr) +{ + return -ENOSYS; +} + +#endif /* !CONFIG_HUGETLB_PAGE */ + asmlinkage long sys_vm86 (long arg0, long arg1, long arg2, long arg3) { @@ -256,16 +322,14 @@ } asmlinkage unsigned long -ia64_create_module (const char *name_user, size_t size, long arg2, long arg3, - long arg4, long arg5, long arg6, long arg7, long stack) +ia64_create_module (const char *name_user, size_t size) { extern unsigned long sys_create_module (const char *, size_t); - struct pt_regs *regs = (struct pt_regs *) &stack; unsigned long addr; addr = sys_create_module (name_user, size); if (!IS_ERR((void *) addr)) - regs->r8 = 0; /* ensure large addresses are not mistaken as failures... */ + force_successful_syscall_return(); return addr; } diff -Nru a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c --- a/arch/ia64/kernel/time.c Tue Sep 17 23:47:51 2002 +++ b/arch/ia64/kernel/time.c Tue Sep 17 23:47:51 2002 @@ -104,7 +104,8 @@ tv->tv_sec--; } - xtime = *tv; + xtime.tv_sec = tv->tv_sec; + xtime.tv_nsec = 1000 * tv->tv_usec; time_adjust = 0; /* stop active adjtime() */ time_status |= STA_UNSYNC; time_maxerror = NTP_PHASE_LIMIT; @@ -135,7 +136,7 @@ } while (cmpxchg(&last_time_offset, old, usec) != old); sec = xtime.tv_sec; - usec += xtime.tv_usec; + usec += xtime.tv_nsec / 1000; } read_unlock_irqrestore(&xtime_lock, flags); diff -Nru a/arch/ia64/lib/Makefile b/arch/ia64/lib/Makefile --- a/arch/ia64/lib/Makefile Tue Sep 17 23:47:52 2002 +++ b/arch/ia64/lib/Makefile Tue Sep 17 23:47:52 2002 @@ -15,6 +15,7 @@ obj-$(CONFIG_ITANIUM) += copy_page.o copy_user.o memcpy.o obj-$(CONFIG_MCKINLEY) += copy_page_mck.o memcpy_mck.o +obj-$(CONFIG_PERFMON) += carta_random.o IGNORE_FLAGS_OBJS = __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o \ __divdi3.o __udivdi3.o __moddi3.o __umoddi3.o diff -Nru a/arch/ia64/lib/carta_random.S b/arch/ia64/lib/carta_random.S --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/arch/ia64/lib/carta_random.S Tue Sep 17 23:47:52 2002 @@ -0,0 +1,54 @@ +/* + * Fast, simple, yet decent quality random number generator based on + * a paper by David G. Carta ("Two Fast Implementations of the + * `Minimal Standard' Random Number Generator," Communications of the + * ACM, January, 1990). + * + * Copyright (C) 2002 Hewlett-Packard Co + * David Mosberger-Tang + */ + +#include + +#define a r2 +#define m r3 +#define lo r8 +#define hi r9 +#define t0 r16 +#define t1 r17 +#define seed r32 + +GLOBAL_ENTRY(carta_random32) + movl a = (16807 << 16) | 16807 + ;; + pmpyshr2.u t0 = a, seed, 0 + pmpyshr2.u t1 = a, seed, 16 + ;; + unpack2.l t0 = t1, t0 + dep m = -1, r0, 0, 31 + ;; + zxt4 lo = t0 + shr.u hi = t0, 32 + ;; + dep t0 = 0, hi, 15, 49 // t0 = (hi & 0x7fff) + ;; + shl t0 = t0, 16 // t0 = (hi & 0x7fff) << 16 + shr t1 = hi, 15 // t1 = (hi >> 15) + ;; + add lo = lo, t0 + ;; + cmp.gtu p6, p0 = lo, m + ;; +(p6) and lo = lo, m + ;; +(p6) add lo = 1, lo + ;; + add lo = lo, t1 + ;; + cmp.gtu p6, p0 = lo, m + ;; +(p6) and lo = lo, m + ;; +(p6) add lo = 1, lo + br.ret.sptk.many rp +END(carta_random32) diff -Nru a/arch/ia64/mm/Makefile b/arch/ia64/mm/Makefile --- a/arch/ia64/mm/Makefile Tue Sep 17 23:47:51 2002 +++ b/arch/ia64/mm/Makefile Tue Sep 17 23:47:51 2002 @@ -10,5 +10,6 @@ O_TARGET := mm.o obj-y := init.o fault.o tlb.o extable.o +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o include $(TOPDIR)/Rules.make diff -Nru a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c --- a/arch/ia64/mm/init.c Tue Sep 17 23:47:51 2002 +++ b/arch/ia64/mm/init.c Tue Sep 17 23:47:51 2002 @@ -78,7 +78,7 @@ vma->vm_mm = current->mm; vma->vm_start = IA64_RBS_BOT; vma->vm_end = vma->vm_start + PAGE_SIZE; - vma->vm_page_prot = PAGE_COPY; + vma->vm_page_prot = protection_map[VM_READ | VM_WRITE]; vma->vm_flags = VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE|VM_GROWSUP; vma->vm_ops = NULL; vma->vm_pgoff = 0; @@ -339,6 +339,14 @@ /* * Set up the page tables. */ + +#ifdef CONFIG_HUGETLB_PAGE +long htlbpagemem; +int htlbpage_max; +extern long htlbzone_pages; +extern struct list_head htlbpage_freelist; +#endif + void paging_init (void) { @@ -438,5 +446,30 @@ #ifdef CONFIG_IA32_SUPPORT ia32_gdt_init(); +#endif +#ifdef CONFIG_HUGETLB_PAGE + { + long i; + int j; + struct page *page, *map; + + if ((htlbzone_pages << (HPAGE_SHIFT - PAGE_SHIFT)) >= max_low_pfn) + htlbzone_pages = (max_low_pfn >> ((HPAGE_SHIFT - PAGE_SHIFT) + 1)); + INIT_LIST_HEAD(&htlbpage_freelist); + for (i = 0; i < htlbzone_pages; i++) { + page = alloc_pages(GFP_ATOMIC, HUGETLB_PAGE_ORDER); + if (!page) + break; + map = page; + for (j = 0; j < (HPAGE_SIZE/PAGE_SIZE); j++) { + SetPageReserved(map); + map++; + } + list_add(&page->list, &htlbpage_freelist); + } + printk("Total Huge_TLB_Page memory pages allocated %ld \n", i); + htlbzone_pages = htlbpagemem = i; + htlbpage_max = (int)i; + } #endif } diff -Nru a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c --- a/arch/ia64/mm/tlb.c Tue Sep 17 23:47:51 2002 +++ b/arch/ia64/mm/tlb.c Tue Sep 17 23:47:51 2002 @@ -63,7 +63,7 @@ read_lock(&tasklist_lock); repeat: - for_each_task(tsk) { + for_each_process(tsk) { if (!tsk->mm) continue; tsk_context = tsk->mm->context; diff -Nru a/arch/ia64/tools/print_offsets.awk b/arch/ia64/tools/print_offsets.awk --- a/arch/ia64/tools/print_offsets.awk Tue Sep 17 23:47:51 2002 +++ b/arch/ia64/tools/print_offsets.awk Tue Sep 17 23:47:51 2002 @@ -9,6 +9,7 @@ print " */" print "" print "#define CLONE_IDLETASK_BIT 12" + print "#define CLONE_SETTLS_BIT 19" } # look for .tab: diff -Nru a/arch/ia64/tools/print_offsets.c b/arch/ia64/tools/print_offsets.c --- a/arch/ia64/tools/print_offsets.c Tue Sep 17 23:47:52 2002 +++ b/arch/ia64/tools/print_offsets.c Tue Sep 17 23:47:52 2002 @@ -202,6 +202,7 @@ } printf ("\n#define CLONE_IDLETASK_BIT %ld\n", ia64_fls (CLONE_IDLETASK)); + printf ("\n#define CLONE_SETTLS_BIT %ld\n", ia64_fls (CLONE_SETTLS)); printf ("\n#endif /* _ASM_IA64_OFFSETS_H */\n"); return 0; diff -Nru a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c --- a/arch/parisc/kernel/traps.c Tue Sep 17 23:47:52 2002 +++ b/arch/parisc/kernel/traps.c Tue Sep 17 23:47:52 2002 @@ -43,7 +43,6 @@ static inline void console_verbose(void) { - extern int console_loglevel; console_loglevel = 15; } diff -Nru a/drivers/acpi/bus.c b/drivers/acpi/bus.c --- a/drivers/acpi/bus.c Tue Sep 17 23:47:51 2002 +++ b/drivers/acpi/bus.c Tue Sep 17 23:47:51 2002 @@ -51,7 +51,9 @@ extern void eisa_set_level_irq(unsigned int irq); +#ifndef CONFIG_IA64 extern int acpi_disabled; +#endif FADT_DESCRIPTOR acpi_fadt; struct acpi_device *acpi_root; @@ -2129,10 +2131,12 @@ /* Initial core debug level excludes drivers, so include them now */ acpi_set_debug(ACPI_DEBUG_LOW); +#ifndef CONFIG_IA64 if (acpi_disabled) { printk(KERN_INFO PREFIX "Disabled via command line (acpi=off)\n"); return -ENODEV; } +#endif #ifdef CONFIG_PM if (PM_IS_ACTIVE()) { @@ -2168,6 +2172,7 @@ } +#ifndef CONFIG_IA64 int __init acpi_setup(char *str) { @@ -2181,6 +2186,7 @@ return 1; } -subsys_initcall(acpi_init); - __setup("acpi=", acpi_setup); +#endif + +arch_initcall(acpi_init); /* XXX fix me: should be subsys_initcall */ diff -Nru a/drivers/acpi/osl.c b/drivers/acpi/osl.c --- a/drivers/acpi/osl.c Tue Sep 17 23:47:51 2002 +++ b/drivers/acpi/osl.c Tue Sep 17 23:47:51 2002 @@ -38,15 +38,10 @@ #include "acpi.h" #ifdef CONFIG_ACPI_EFI -#include +#include u64 efi_mem_attributes (u64 phys_addr); #endif -#ifdef CONFIG_IA64 -#include -#include -#endif - #define _COMPONENT ACPI_OS_SERVICES ACPI_MODULE_NAME ("osl") @@ -81,6 +76,7 @@ * it while walking the namespace (bus 0 and root bridges w/ _BBNs). */ #ifdef CONFIG_ACPI_PCI + pcibios_config_init(); if (!pci_root_ops) { printk(KERN_ERR PREFIX "Access to PCI configuration space unavailable\n"); return AE_NULL_ENTRY; @@ -167,10 +163,10 @@ acpi_os_map_memory(ACPI_PHYSICAL_ADDRESS phys, ACPI_SIZE size, void **virt) { #ifdef CONFIG_ACPI_EFI - if (!(EFI_MEMORY_WB & efi_mem_attributes(phys))) { - *virt = ioremap(phys, size); - } else { + if (EFI_MEMORY_WB & efi_mem_attributes(phys)) { *virt = phys_to_virt(phys); + } else { + *virt = ioremap(phys, size); } #else if (phys > ULONG_MAX) { @@ -226,7 +222,14 @@ acpi_os_install_interrupt_handler(u32 irq, OSD_HANDLER handler, void *context) { #ifdef CONFIG_IA64 - irq = gsi_to_vector(irq); + int vector; + + vector = acpi_irq_to_vector(irq); + if (vector < 0) { + printk(KERN_ERR PREFIX "SCI (IRQ%d) not registerd\n", irq); + return AE_OK; + } + irq = vector; #endif acpi_irq_irq = irq; acpi_irq_handler = handler; @@ -244,7 +247,7 @@ { if (acpi_irq_handler) { #ifdef CONFIG_IA64 - irq = gsi_to_vector(irq); + irq = acpi_irq_to_vector(irq); #endif free_irq(irq, acpi_irq); acpi_irq_handler = NULL; @@ -342,8 +345,7 @@ if (EFI_MEMORY_WB & efi_mem_attributes(phys_addr)) { virt_addr = phys_to_virt(phys_addr); - } - else { + } else { iomem = 1; virt_addr = ioremap(phys_addr, width); } @@ -388,8 +390,7 @@ if (EFI_MEMORY_WB & efi_mem_attributes(phys_addr)) { virt_addr = phys_to_virt(phys_addr); - } - else { + } else { iomem = 1; virt_addr = ioremap(phys_addr, width); } diff -Nru a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c --- a/drivers/acpi/pci_irq.c Tue Sep 17 23:47:51 2002 +++ b/drivers/acpi/pci_irq.c Tue Sep 17 23:47:51 2002 @@ -33,7 +33,9 @@ #include #include #include +#ifdef CONFIG_X86_IO_APIC #include +#endif #include "acpi_bus.h" #include "acpi_drivers.h" diff -Nru a/drivers/char/agp/agp.c b/drivers/char/agp/agp.c --- a/drivers/char/agp/agp.c Tue Sep 17 23:47:51 2002 +++ b/drivers/char/agp/agp.c Tue Sep 17 23:47:51 2002 @@ -25,6 +25,7 @@ * TODO: * - Allocate more than order 0 pages to avoid too much linear map splitting. */ + #include #include #include @@ -33,6 +34,7 @@ #include #include #include +#include #include "agp.h" MODULE_AUTHOR("Jeff Hartmann "); @@ -134,6 +136,9 @@ { int i; + pr_debug("agp_free_memory(curr=%p): type=%u, page_count=%Zu\n", + curr, curr->type, curr->page_count); + if ((agp_bridge.type == NOT_SUPPORTED) || (curr == NULL)) return; @@ -146,7 +151,6 @@ } if (curr->page_count != 0) { for (i = 0; i < curr->page_count; i++) { - curr->memory[i] &= ~(0x00000fff); agp_bridge.agp_destroy_page(phys_to_virt(curr->memory[i])); } } @@ -164,6 +168,8 @@ agp_memory *new; int i; + pr_debug("agp_allocate_memory(count=%Zu, type=%u)\n", page_count, type); + if (agp_bridge.type == NOT_SUPPORTED) return NULL; @@ -199,12 +205,13 @@ agp_free_memory(new); return NULL; } - new->memory[i] = agp_bridge.mask_memory(virt_to_phys(addr), type); + new->memory[i] = virt_to_phys(addr); new->page_count++; } flush_agp_mappings(); + pr_debug("agp_allocate_memory: new=%p\n", new); return new; } @@ -648,7 +655,7 @@ } for (i = 0, j = pg_start; i < mem->page_count; i++, j++) - agp_bridge.gatt_table[j] = mem->memory[i]; + agp_bridge.gatt_table[j] = agp_bridge.mask_memory(mem->memory[i], mem->type); agp_bridge.tlb_flush(mem); return 0; @@ -966,6 +973,17 @@ }, #endif /* CONFIG_AGP_INTEL */ + +#ifdef CONFIG_AGP_I460 + { + .device_id = PCI_DEVICE_ID_INTEL_84460GX, + .vendor_id = PCI_VENDOR_ID_INTEL, + .chipset = INTEL_460GX, + .vendor_name = "Intel", + .chipset_name = "460GX", + .chipset_setup = intel_i460_setup + }, +#endif #ifdef CONFIG_AGP_SIS { diff -Nru a/drivers/char/agp/agp.h b/drivers/char/agp/agp.h --- a/drivers/char/agp/agp.h Tue Sep 17 23:47:51 2002 +++ b/drivers/char/agp/agp.h Tue Sep 17 23:47:51 2002 @@ -82,7 +82,7 @@ flush_agp_cache(); } #else -static void global_cache_flush(void) +static void __attribute__((unused)) global_cache_flush(void) { flush_agp_cache(); } diff -Nru a/drivers/char/agp/amd-agp.c b/drivers/char/agp/amd-agp.c --- a/drivers/char/agp/amd-agp.c Tue Sep 17 23:47:52 2002 +++ b/drivers/char/agp/amd-agp.c Tue Sep 17 23:47:52 2002 @@ -330,7 +330,7 @@ for (i = 0, j = pg_start; i < mem->page_count; i++, j++) { addr = (j * PAGE_SIZE) + agp_bridge.gart_bus_addr; cur_gatt = GET_GATT(addr); - cur_gatt[GET_GATT_OFF(addr)] = mem->memory[i]; + cur_gatt[GET_GATT_OFF(addr)] = agp_bridge.mask_memory(mem->memory[i], mem->type); } agp_bridge.tlb_flush(mem); return 0; diff -Nru a/drivers/char/agp/hp-agp.c b/drivers/char/agp/hp-agp.c --- a/drivers/char/agp/hp-agp.c Tue Sep 17 23:47:51 2002 +++ b/drivers/char/agp/hp-agp.c Tue Sep 17 23:47:51 2002 @@ -43,8 +43,7 @@ #define HP_ZX1_SBA_IOMMU_COOKIE 0x0000badbadc0ffeeUL #define HP_ZX1_PDIR_VALID_BIT 0x8000000000000000UL -#define HP_ZX1_IOVA_TO_PDIR(va) ((va - hp_private.iova_base) >> \ - hp_private.io_tlb_shift) +#define HP_ZX1_IOVA_TO_PDIR(va) ((va - hp_private.iova_base) >> hp_private.io_tlb_shift) static struct aper_size_info_fixed hp_zx1_sizes[] = { @@ -357,12 +356,7 @@ return HP_ZX1_PDIR_VALID_BIT | addr; } -static unsigned long hp_zx1_unmask_memory(unsigned long addr) -{ - return addr & ~(HP_ZX1_PDIR_VALID_BIT); -} - -int __init hp_zx1_setup (struct pci_dev *pdev) +int __init hp_zx1_setup (struct pci_dev *pdev __attribute__((unused))) { agp_bridge.masks = hp_zx1_masks; agp_bridge.num_of_masks = 1; @@ -374,7 +368,6 @@ agp_bridge.cleanup = hp_zx1_cleanup; agp_bridge.tlb_flush = hp_zx1_tlbflush; agp_bridge.mask_memory = hp_zx1_mask_memory; - agp_bridge.unmask_memory = hp_zx1_unmask_memory; agp_bridge.agp_enable = agp_generic_agp_enable; agp_bridge.cache_flush = global_cache_flush; agp_bridge.create_gatt_table = hp_zx1_create_gatt_table; @@ -388,7 +381,4 @@ agp_bridge.cant_use_aperture = 1; return hp_zx1_ioc_init(); - - (void) pdev; /* unused */ } - diff -Nru a/drivers/char/agp/i460-agp.c b/drivers/char/agp/i460-agp.c --- a/drivers/char/agp/i460-agp.c Tue Sep 17 23:47:51 2002 +++ b/drivers/char/agp/i460-agp.c Tue Sep 17 23:47:51 2002 @@ -4,6 +4,9 @@ * Copyright (C) 1999 Precision Insight, Inc. * Copyright (C) 1999 Xi Graphics, Inc. * + * 460GX support by Chris Ahna + * Clean up & simplification by David Mosberger-Tang + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation @@ -17,55 +20,105 @@ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * JEFF HARTMANN, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * JEFF HARTMANN, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * - * TODO: + * TODO: * - Allocate more than order 0 pages to avoid too much linear map splitting. */ +/* + * For documentation on the i460 AGP interface, see Chapter 7 (AGP Subsystem) of + * the "Intel 460GTX Chipset Software Developer's Manual": + * http://developer.intel.com/design/itanium/downloads/24870401s.htm + */ +#include +#include #include #include #include #include -#include "agp.h" -/* BIOS configures the chipset so that one of two apbase registers are used */ -static u8 intel_i460_dynamic_apbase = 0x10; +#include "agp.h" -/* 460 supports multiple GART page sizes, so GART pageshift is dynamic */ -static u8 intel_i460_pageshift = 12; -static u32 intel_i460_pagesize; - -/* Keep track of which is larger, chipset or kernel page size. */ -static u32 intel_i460_cpk = 1; - -/* Structure for tracking partial use of 4MB GART pages */ -static u32 **i460_pg_detail = NULL; -static u32 *i460_pg_count = NULL; +/* + * The i460 can operate with large (4MB) pages, but there is no sane way to support this + * within the current kernel/DRM environment, so we disable the relevant code for now. + * See also comments in ia64_alloc_page()... + */ +#define I460_LARGE_IO_PAGES 0 -#define I460_CPAGES_PER_KPAGE (PAGE_SIZE >> intel_i460_pageshift) -#define I460_KPAGES_PER_CPAGE ((1 << intel_i460_pageshift) >> PAGE_SHIFT) +#if I460_LARGE_IO_PAGES +# define I460_IO_PAGE_SHIFT i460.io_page_shift +#else +# define I460_IO_PAGE_SHIFT 12 +#endif +#define I460_IOPAGES_PER_KPAGE (PAGE_SIZE >> I460_IO_PAGE_SHIFT) +#define I460_KPAGES_PER_IOPAGE (1 << (I460_IO_PAGE_SHIFT - PAGE_SHIFT)) #define I460_SRAM_IO_DISABLE (1 << 4) #define I460_BAPBASE_ENABLE (1 << 3) #define I460_AGPSIZ_MASK 0x7 #define I460_4M_PS (1 << 1) -#define log2(x) ffz(~(x)) +/* Control bits for Out-Of-GART coherency and Burst Write Combining */ +#define I460_GXBCTL_OOG (1UL << 0) +#define I460_GXBCTL_BWC (1UL << 2) + +/* + * gatt_table entries are 32-bits wide on the i460; the generic code ought to declare the + * gatt_table and gatt_table_real pointers a "void *"... + */ +#define RD_GATT(index) readl((u32 *) i460.gatt + (index)) +#define WR_GATT(index, val) writel((val), (u32 *) i460.gatt + (index)) +/* + * The 460 spec says we have to read the last location written to make sure that all + * writes have taken effect + */ +#define WR_FLUSH_GATT(index) RD_GATT(index) + +#define log2(x) ffz(~(x)) + +static struct { + void *gatt; /* ioremap'd GATT area */ + + /* i460 supports multiple GART page sizes, so GART pageshift is dynamic: */ + u8 io_page_shift; + + /* BIOS configures chipset to one of 2 possible apbase values: */ + u8 dynamic_apbase; -static inline void intel_i460_read_back (volatile u32 *entry) + /* structure for tracking partial use of 4MB GART pages: */ + struct lp_desc { + unsigned long *alloced_map; /* bitmap of kernel-pages in use */ + int refcount; /* number of kernel pages using the large page */ + u64 paddr; /* physical address of large page */ + } *lp_desc; +} i460; + +static const struct aper_size_info_8 i460_sizes[3] = { /* - * The 460 spec says we have to read the last location written to - * make sure that all writes have taken effect + * The 32GB aperture is only available with a 4M GART page size. Due to the + * dynamic GART page size, we can't figure out page_order or num_entries until + * runtime. */ - *entry; -} + {32768, 0, 0, 4}, + {1024, 0, 0, 2}, + {256, 0, 0, 1} +}; -static int intel_i460_fetch_size(void) +static struct gatt_mask i460_masks[] = +{ + { + .mask = INTEL_I460_GATT_VALID | INTEL_I460_GATT_COHERENT, + .type = 0 + } +}; + +static int i460_fetch_size (void) { int i; u8 temp; @@ -73,8 +126,15 @@ /* Determine the GART page size */ pci_read_config_byte(agp_bridge.dev, INTEL_I460_GXBCTL, &temp); - intel_i460_pageshift = (temp & I460_4M_PS) ? 22 : 12; - intel_i460_pagesize = 1UL << intel_i460_pageshift; + i460.io_page_shift = (temp & I460_4M_PS) ? 22 : 12; + pr_debug("i460_fetch_size: io_page_shift=%d\n", i460.io_page_shift); + + if (i460.io_page_shift != I460_IO_PAGE_SHIFT) { + printk(KERN_ERR PFX + "I/O (GART) page-size %ZuKB doesn't match expected size %ZuKB\n", + 1UL << (i460.io_page_shift - 10), 1UL << (I460_IO_PAGE_SHIFT)); + return 0; + } values = A_SIZE_8(agp_bridge.aperture_sizes); @@ -88,16 +148,16 @@ } /* Make sure we don't try to create an 2 ^ 23 entry GATT */ - if ((intel_i460_pageshift == 0) && ((temp & I460_AGPSIZ_MASK) == 4)) { + if ((i460.io_page_shift == 0) && ((temp & I460_AGPSIZ_MASK) == 4)) { printk(KERN_ERR PFX "We can't have a 32GB aperture with 4KB GART pages\n"); return 0; } /* Determine the proper APBASE register */ if (temp & I460_BAPBASE_ENABLE) - intel_i460_dynamic_apbase = INTEL_I460_BAPBASE; + i460.dynamic_apbase = INTEL_I460_BAPBASE; else - intel_i460_dynamic_apbase = INTEL_I460_APBASE; + i460.dynamic_apbase = INTEL_I460_APBASE; for (i = 0; i < agp_bridge.num_aperture_sizes; i++) { /* @@ -105,7 +165,7 @@ * the define aperture sizes. Take care not to shift off the end of * values[i].size. */ - values[i].num_entries = (values[i].size << 8) >> (intel_i460_pageshift - 12); + values[i].num_entries = (values[i].size << 8) >> (I460_IO_PAGE_SHIFT - 12); values[i].page_order = log2((sizeof(u32)*values[i].num_entries) >> PAGE_SHIFT); } @@ -122,7 +182,7 @@ } /* There isn't anything to do here since 460 has no GART TLB. */ -static void intel_i460_tlb_flush(agp_memory * mem) +static void i460_tlb_flush (agp_memory * mem) { return; } @@ -131,7 +191,7 @@ * This utility function is needed to prevent corruption of the control bits * which are stored along with the aperture size in 460's AGPSIZ register */ -static void intel_i460_write_agpsiz(u8 size_value) +static void i460_write_agpsiz (u8 size_value) { u8 temp; @@ -140,47 +200,39 @@ ((temp & ~I460_AGPSIZ_MASK) | size_value)); } -static void intel_i460_cleanup(void) +static void i460_cleanup (void) { struct aper_size_info_8 *previous_size; previous_size = A_SIZE_8(agp_bridge.previous_size); - intel_i460_write_agpsiz(previous_size->size_value); + i460_write_agpsiz(previous_size->size_value); - if (intel_i460_cpk == 0) { - vfree(i460_pg_detail); - vfree(i460_pg_count); - } + if (I460_IO_PAGE_SHIFT > PAGE_SHIFT) + kfree(i460.lp_desc); } - -/* Control bits for Out-Of-GART coherency and Burst Write Combining */ -#define I460_GXBCTL_OOG (1UL << 0) -#define I460_GXBCTL_BWC (1UL << 2) - -static int intel_i460_configure(void) +static int i460_configure (void) { union { u32 small[2]; u64 large; } temp; + size_t size; u8 scratch; - int i; - struct aper_size_info_8 *current_size; temp.large = 0; current_size = A_SIZE_8(agp_bridge.current_size); - intel_i460_write_agpsiz(current_size->size_value); + i460_write_agpsiz(current_size->size_value); /* * Do the necessary rigmarole to read all eight bytes of APBASE. * This has to be done since the AGP aperture can be above 4GB on * 460 based systems. */ - pci_read_config_dword(agp_bridge.dev, intel_i460_dynamic_apbase, &(temp.small[0])); - pci_read_config_dword(agp_bridge.dev, intel_i460_dynamic_apbase + 4, &(temp.small[1])); + pci_read_config_dword(agp_bridge.dev, i460.dynamic_apbase, &(temp.small[0])); + pci_read_config_dword(agp_bridge.dev, i460.dynamic_apbase + 4, &(temp.small[1])); /* Clear BAR control bits */ agp_bridge.gart_bus_addr = temp.large & ~((1UL << 3) - 1); @@ -190,406 +242,349 @@ (scratch & 0x02) | I460_GXBCTL_OOG | I460_GXBCTL_BWC); /* - * Initialize partial allocation trackers if a GART page is bigger than - * a kernel page. + * Initialize partial allocation trackers if a GART page is bigger than a kernel + * page. */ - if (I460_CPAGES_PER_KPAGE >= 1) { - intel_i460_cpk = 1; - } else { - intel_i460_cpk = 0; - - i460_pg_detail = vmalloc(sizeof(*i460_pg_detail) * current_size->num_entries); - i460_pg_count = vmalloc(sizeof(*i460_pg_count) * current_size->num_entries); - - for (i = 0; i < current_size->num_entries; i++) { - i460_pg_count[i] = 0; - i460_pg_detail[i] = NULL; - } + if (I460_IO_PAGE_SHIFT > PAGE_SHIFT) { + size = current_size->num_entries * sizeof(i460.lp_desc[0]); + i460.lp_desc = kmalloc(size, GFP_KERNEL); + if (!i460.lp_desc) + return -ENOMEM; + memset(i460.lp_desc, 0, size); } return 0; } -static int intel_i460_create_gatt_table(void) +static int i460_create_gatt_table (void) { - char *table; - int i; - int page_order; - int num_entries; + int page_order, num_entries, i; void *temp; /* - * Load up the fixed address of the GART SRAMS which hold our - * GATT table. + * Load up the fixed address of the GART SRAMS which hold our GATT table. */ - table = (char *) __va(INTEL_I460_ATTBASE); - temp = agp_bridge.current_size; page_order = A_SIZE_8(temp)->page_order; num_entries = A_SIZE_8(temp)->num_entries; - agp_bridge.gatt_table_real = (u32 *) table; - agp_bridge.gatt_table = ioremap_nocache(virt_to_phys(table), - (PAGE_SIZE * (1 << page_order))); - agp_bridge.gatt_bus_addr = virt_to_phys(agp_bridge.gatt_table_real); - - for (i = 0; i < num_entries; i++) { - agp_bridge.gatt_table[i] = 0; - } + i460.gatt = ioremap(INTEL_I460_ATTBASE, PAGE_SIZE << page_order); - intel_i460_read_back(agp_bridge.gatt_table + i - 1); + /* These are no good, the should be removed from the agp_bridge strucure... */ + agp_bridge.gatt_table_real = NULL; + agp_bridge.gatt_table = NULL; + agp_bridge.gatt_bus_addr = 0; + + for (i = 0; i < num_entries; ++i) + WR_GATT(i, 0); + WR_FLUSH_GATT(i - 1); return 0; } -static int intel_i460_free_gatt_table(void) +static int i460_free_gatt_table (void) { - int num_entries; - int i; + int num_entries, i; void *temp; temp = agp_bridge.current_size; num_entries = A_SIZE_8(temp)->num_entries; - for (i = 0; i < num_entries; i++) { - agp_bridge.gatt_table[i] = 0; - } - - intel_i460_read_back(agp_bridge.gatt_table + i - 1); + for (i = 0; i < num_entries; ++i) + WR_GATT(i, 0); + WR_FLUSH_GATT(num_entries - 1); - iounmap(agp_bridge.gatt_table); + iounmap(i460.gatt); return 0; } -/* These functions are called when PAGE_SIZE exceeds the GART page size */ +/* + * The following functions are called when the I/O (GART) page size is smaller than + * PAGE_SIZE. + */ -static int intel_i460_insert_memory_cpk(agp_memory * mem, off_t pg_start, int type) +static int i460_insert_memory_small_io_page (agp_memory *mem, off_t pg_start, int type) { + unsigned long paddr, io_pg_start, io_page_size; int i, j, k, num_entries; void *temp; - unsigned long paddr; - /* - * The rest of the kernel will compute page offsets in terms of - * PAGE_SIZE. - */ - pg_start = I460_CPAGES_PER_KPAGE * pg_start; + pr_debug("i460_insert_memory_small_io_page(mem=%p, pg_start=%ld, type=%d, paddr0=0x%lx)\n", + mem, pg_start, type, mem->memory[0]); + + io_pg_start = I460_IOPAGES_PER_KPAGE * pg_start; temp = agp_bridge.current_size; num_entries = A_SIZE_8(temp)->num_entries; - if ((pg_start + I460_CPAGES_PER_KPAGE * mem->page_count) > num_entries) { + if ((io_pg_start + I460_IOPAGES_PER_KPAGE * mem->page_count) > num_entries) { printk(KERN_ERR PFX "Looks like we're out of AGP memory\n"); return -EINVAL; } - j = pg_start; - while (j < (pg_start + I460_CPAGES_PER_KPAGE * mem->page_count)) { - if (!PGE_EMPTY(agp_bridge.gatt_table[j])) { + j = io_pg_start; + while (j < (io_pg_start + I460_IOPAGES_PER_KPAGE * mem->page_count)) { + if (!PGE_EMPTY(RD_GATT(j))) { + pr_debug("i460_insert_memory_small_io_page: GATT[%d]=0x%x is busy\n", + j, RD_GATT(j)); return -EBUSY; } j++; } -#if 0 - /* not necessary since 460 GART is operated in coherent mode... */ - if (mem->is_flushed == FALSE) { - CACHE_FLUSH(); - mem->is_flushed = TRUE; - } -#endif - - for (i = 0, j = pg_start; i < mem->page_count; i++) { + io_page_size = 1UL << I460_IO_PAGE_SHIFT; + for (i = 0, j = io_pg_start; i < mem->page_count; i++) { paddr = mem->memory[i]; - for (k = 0; k < I460_CPAGES_PER_KPAGE; k++, j++, paddr += intel_i460_pagesize) - agp_bridge.gatt_table[j] = (u32) agp_bridge.mask_memory(paddr, mem->type); + for (k = 0; k < I460_IOPAGES_PER_KPAGE; k++, j++, paddr += io_page_size) + WR_GATT(j, agp_bridge.mask_memory(paddr, mem->type)); } - - intel_i460_read_back(agp_bridge.gatt_table + j - 1); + WR_FLUSH_GATT(j - 1); return 0; } -static int intel_i460_remove_memory_cpk(agp_memory * mem, off_t pg_start, int type) +static int i460_remove_memory_small_io_page(agp_memory * mem, off_t pg_start, int type) { int i; - pg_start = I460_CPAGES_PER_KPAGE * pg_start; + pr_debug("i460_remove_memory_small_io_page(mem=%p, pg_start=%ld, type=%d)\n", + mem, pg_start, type); - for (i = pg_start; i < (pg_start + I460_CPAGES_PER_KPAGE * mem->page_count); i++) - agp_bridge.gatt_table[i] = 0; + pg_start = I460_IOPAGES_PER_KPAGE * pg_start; - intel_i460_read_back(agp_bridge.gatt_table + i - 1); + for (i = pg_start; i < (pg_start + I460_IOPAGES_PER_KPAGE * mem->page_count); i++) + WR_GATT(i, 0); + WR_FLUSH_GATT(i - 1); return 0; } +#if I460_LARGE_IO_PAGES + /* - * These functions are called when the GART page size exceeds PAGE_SIZE. + * These functions are called when the I/O (GART) page size exceeds PAGE_SIZE. * - * This situation is interesting since AGP memory allocations that are - * smaller than a single GART page are possible. The structures i460_pg_count - * and i460_pg_detail track partial allocation of the large GART pages to - * work around this issue. + * This situation is interesting since AGP memory allocations that are smaller than a + * single GART page are possible. The i460.lp_desc array tracks partial allocation of the + * large GART pages to work around this issue. * - * i460_pg_count[pg_num] tracks the number of kernel pages in use within - * GART page pg_num. i460_pg_detail[pg_num] is an array containing a - * psuedo-GART entry for each of the aforementioned kernel pages. The whole - * of i460_pg_detail is equivalent to a giant GATT with page size equal to - * that of the kernel. + * i460.lp_desc[pg_num].refcount tracks the number of kernel pages in use within GART page + * pg_num. i460.lp_desc[pg_num].paddr is the physical address of the large page and + * i460.lp_desc[pg_num].alloced_map is a bitmap of kernel pages that are in use (allocated). */ -static void *intel_i460_alloc_large_page(int pg_num) +static int i460_alloc_large_page (struct lp_desc *lp) { - int i; - void *bp, *bp_end; - struct page *page; - - i460_pg_detail[pg_num] = (void *) vmalloc(sizeof(u32) * I460_KPAGES_PER_CPAGE); - if (i460_pg_detail[pg_num] == NULL) { - printk(KERN_ERR PFX "Out of memory, we're in trouble...\n"); - return NULL; - } - - for (i = 0; i < I460_KPAGES_PER_CPAGE; i++) - i460_pg_detail[pg_num][i] = 0; + unsigned long order = I460_IO_PAGE_SHIFT - PAGE_SHIFT; + size_t map_size; + void *lpage; - bp = (void *) __get_free_pages(GFP_KERNEL, intel_i460_pageshift - PAGE_SHIFT); - if (bp == NULL) { + lpage = (void *) __get_free_pages(GFP_KERNEL, order); + if (!lpage) { printk(KERN_ERR PFX "Couldn't alloc 4M GART page...\n"); - return NULL; + return -ENOMEM; } - bp_end = bp + ((PAGE_SIZE * (1 << (intel_i460_pageshift - PAGE_SHIFT))) - 1); - - for (page = virt_to_page(bp); page <= virt_to_page(bp_end); page++) { - atomic_inc(&agp_bridge.current_memory_agp); + map_size = ((I460_KPAGES_PER_IOPAGE + BITS_PER_LONG - 1) & -BITS_PER_LONG)/8; + lp->alloced_map = kmalloc(map_size, GFP_KERNEL); + if (!lp->alloced_map) { + free_pages((unsigned long) lpage, order); + printk(KERN_ERR PFX "Out of memory, we're in trouble...\n"); + return -ENOMEM; } - return bp; + memset(lp->alloced_map, 0, map_size); + + lp->paddr = virt_to_phys(lpage); + lp->refcount = 0; + atomic_add(I460_KPAGES_PER_IOPAGE, &agp_bridge.current_memory_agp); + return 0; } -static void intel_i460_free_large_page(int pg_num, unsigned long addr) +static void i460_free_large_page (struct lp_desc *lp) { - struct page *page; - void *bp, *bp_end; - - bp = (void *) __va(addr); - bp_end = bp + (PAGE_SIZE * (1 << (intel_i460_pageshift - PAGE_SHIFT))); + kfree(lp->alloced_map); + lp->alloced_map = NULL; - vfree(i460_pg_detail[pg_num]); - i460_pg_detail[pg_num] = NULL; - - for (page = virt_to_page(bp); page < virt_to_page(bp_end); page++) { - atomic_dec(&agp_bridge.current_memory_agp); - } - - free_pages((unsigned long) bp, intel_i460_pageshift - PAGE_SHIFT); + free_pages((unsigned long) phys_to_virt(lp->paddr), I460_IO_PAGE_SHIFT - PAGE_SHIFT); + atomic_sub(I460_KPAGES_PER_IOPAGE, &agp_bridge.current_memory_agp); } -static int intel_i460_insert_memory_kpc(agp_memory * mem, off_t pg_start, int type) +static int i460_insert_memory_large_io_page (agp_memory * mem, off_t pg_start, int type) { - int i, pg, start_pg, end_pg, start_offset, end_offset, idx; - int num_entries; + int i, start_offset, end_offset, idx, pg, num_entries; + struct lp_desc *start, *end, *lp; void *temp; - unsigned long paddr; temp = agp_bridge.current_size; num_entries = A_SIZE_8(temp)->num_entries; /* Figure out what pg_start means in terms of our large GART pages */ - start_pg = pg_start / I460_KPAGES_PER_CPAGE; - start_offset = pg_start % I460_KPAGES_PER_CPAGE; - end_pg = (pg_start + mem->page_count - 1) / I460_KPAGES_PER_CPAGE; - end_offset = (pg_start + mem->page_count - 1) % I460_KPAGES_PER_CPAGE; + start = &i460.lp_desc[pg_start / I460_KPAGES_PER_IOPAGE]; + end = &i460.lp_desc[(pg_start + mem->page_count - 1) / I460_KPAGES_PER_IOPAGE]; + start_offset = pg_start % I460_KPAGES_PER_IOPAGE; + end_offset = (pg_start + mem->page_count - 1) % I460_KPAGES_PER_IOPAGE; - if (end_pg > num_entries) { + if (end > i460.lp_desc + num_entries) { printk(KERN_ERR PFX "Looks like we're out of AGP memory\n"); return -EINVAL; } /* Check if the requested region of the aperture is free */ - for (pg = start_pg; pg <= end_pg; pg++) { - /* Allocate new GART pages if necessary */ - if (i460_pg_detail[pg] == NULL) { - temp = intel_i460_alloc_large_page(pg); - if (temp == NULL) - return -ENOMEM; - agp_bridge.gatt_table[pg] = agp_bridge.mask_memory((unsigned long) temp, - 0); - intel_i460_read_back(agp_bridge.gatt_table + pg); - } + for (lp = start; lp <= end; ++lp) { + if (!lp->alloced_map) + continue; /* OK, the entire large page is available... */ - for (idx = ((pg == start_pg) ? start_offset : 0); - idx < ((pg == end_pg) ? (end_offset + 1) : I460_KPAGES_PER_CPAGE); + for (idx = ((lp == start) ? start_offset : 0); + idx < ((lp == end) ? (end_offset + 1) : I460_KPAGES_PER_IOPAGE); idx++) { - if (i460_pg_detail[pg][idx] != 0) + if (test_bit(idx, lp->alloced_map)) return -EBUSY; } } -#if 0 - /* not necessary since 460 GART is operated in coherent mode... */ - if (mem->is_flushed == FALSE) { - CACHE_FLUSH(); - mem->is_flushed = TRUE; - } -#endif + for (lp = start, i = 0; lp <= end; ++lp) { + if (!lp->alloced_map) { + /* Allocate new GART pages... */ + if (i460_alloc_large_page(lp) < 0) + return -ENOMEM; + pg = lp - i460.lp_desc; + WR_GATT(pg, agp_bridge.mask_memory(lp->paddr, 0)); + WR_FLUSH_GATT(pg); + } - for (pg = start_pg, i = 0; pg <= end_pg; pg++) { - paddr = agp_bridge.unmask_memory(agp_bridge.gatt_table[pg]); - for (idx = ((pg == start_pg) ? start_offset : 0); - idx < ((pg == end_pg) ? (end_offset + 1) : I460_KPAGES_PER_CPAGE); + for (idx = ((lp == start) ? start_offset : 0); + idx < ((lp == end) ? (end_offset + 1) : I460_KPAGES_PER_IOPAGE); idx++, i++) { - mem->memory[i] = paddr + (idx * PAGE_SIZE); - i460_pg_detail[pg][idx] = agp_bridge.mask_memory(mem->memory[i], - mem->type); - i460_pg_count[pg]++; + mem->memory[i] = lp->paddr + idx*PAGE_SIZE; + __set_bit(idx, lp->alloced_map); + ++lp->refcount; } } - return 0; } -static int intel_i460_remove_memory_kpc(agp_memory * mem, off_t pg_start, int type) +static int i460_remove_memory_large_io_page (agp_memory * mem, off_t pg_start, int type) { - int i, pg, start_pg, end_pg, start_offset, end_offset, idx; - int num_entries; + int i, pg, start_offset, end_offset, idx, num_entries; + struct lp_desc *start, *end, *lp; void *temp; - unsigned long paddr; temp = agp_bridge.current_size; num_entries = A_SIZE_8(temp)->num_entries; /* Figure out what pg_start means in terms of our large GART pages */ - start_pg = pg_start / I460_KPAGES_PER_CPAGE; - start_offset = pg_start % I460_KPAGES_PER_CPAGE; - end_pg = (pg_start + mem->page_count - 1) / I460_KPAGES_PER_CPAGE; - end_offset = (pg_start + mem->page_count - 1) % I460_KPAGES_PER_CPAGE; - - for (i = 0, pg = start_pg; pg <= end_pg; pg++) { - for (idx = ((pg == start_pg) ? start_offset : 0); - idx < ((pg == end_pg) ? (end_offset + 1) : I460_KPAGES_PER_CPAGE); - idx++, i++) + start = &i460.lp_desc[pg_start / I460_KPAGES_PER_IOPAGE]; + end = &i460.lp_desc[(pg_start + mem->page_count - 1) / I460_KPAGES_PER_IOPAGE]; + start_offset = pg_start % I460_KPAGES_PER_IOPAGE; + end_offset = (pg_start + mem->page_count - 1) % I460_KPAGES_PER_IOPAGE; + + for (i = 0, lp = start; lp <= end; ++lp) { + for (idx = ((lp == start) ? start_offset : 0); + idx < ((lp == end) ? (end_offset + 1) : I460_KPAGES_PER_IOPAGE); + idx++, i++) { mem->memory[i] = 0; - i460_pg_detail[pg][idx] = 0; - i460_pg_count[pg]--; + __clear_bit(idx, lp->alloced_map); + --lp->refcount; } /* Free GART pages if they are unused */ - if (i460_pg_count[pg] == 0) { - paddr = agp_bridge.unmask_memory(agp_bridge.gatt_table[pg]); - agp_bridge.gatt_table[pg] = agp_bridge.scratch_page; - intel_i460_read_back(agp_bridge.gatt_table + pg); - intel_i460_free_large_page(pg, paddr); + if (lp->refcount == 0) { + pg = lp - i460.lp_desc; + WR_GATT(pg, 0); + WR_FLUSH_GATT(pg); + i460_free_large_page(lp); } } return 0; } -/* Dummy routines to call the approriate {cpk,kpc} function */ +/* Wrapper routines to call the approriate {small_io_page,large_io_page} function */ -static int intel_i460_insert_memory(agp_memory * mem, off_t pg_start, int type) +static int i460_insert_memory (agp_memory * mem, off_t pg_start, int type) { - if (intel_i460_cpk) - return intel_i460_insert_memory_cpk(mem, pg_start, type); + if (I460_IO_PAGE_SHIFT <= PAGE_SHIFT) + return i460_insert_memory_small_io_page(mem, pg_start, type); else - return intel_i460_insert_memory_kpc(mem, pg_start, type); + return i460_insert_memory_large_io_page(mem, pg_start, type); } -static int intel_i460_remove_memory(agp_memory * mem, off_t pg_start, int type) +static int i460_remove_memory (agp_memory * mem, off_t pg_start, int type) { - if (intel_i460_cpk) - return intel_i460_remove_memory_cpk(mem, pg_start, type); + if (I460_IO_PAGE_SHIFT <= PAGE_SHIFT) + return i460_remove_memory_small_io_page(mem, pg_start, type); else - return intel_i460_remove_memory_kpc(mem, pg_start, type); + return i460_remove_memory_large_io_page(mem, pg_start, type); } /* - * If the kernel page size is smaller that the chipset page size, we don't - * want to allocate memory until we know where it is to be bound in the - * aperture (a multi-kernel-page alloc might fit inside of an already - * allocated GART page). Consequently, don't allocate or free anything - * if i460_cpk (meaning chipset pages per kernel page) isn't set. + * If the I/O (GART) page size is bigger than the kernel page size, we don't want to + * allocate memory until we know where it is to be bound in the aperture (a + * multi-kernel-page alloc might fit inside of an already allocated GART page). * - * Let's just hope nobody counts on the allocated AGP memory being there - * before bind time (I don't think current drivers do)... + * Let's just hope nobody counts on the allocated AGP memory being there before bind time + * (I don't think current drivers do)... */ -static void * intel_i460_alloc_page(void) +static void *i460_alloc_page (void) { - if (intel_i460_cpk) - return agp_generic_alloc_page(); + void *page; - /* Returning NULL would cause problems */ - /* AK: really dubious code. */ - return (void *)~0UL; + if (I460_IO_PAGE_SHIFT <= PAGE_SHIFT) + page = agp_generic_alloc_page(); + else + /* Returning NULL would cause problems */ + /* AK: really dubious code. */ + page = (void *)~0UL; + return page; } -static void intel_i460_destroy_page(void *page) +static void i460_destroy_page (void *page) { - if (intel_i460_cpk) + if (I460_IO_PAGE_SHIFT <= PAGE_SHIFT) agp_generic_destroy_page(page); } -static struct gatt_mask intel_i460_masks[] = -{ - { - .mask = INTEL_I460_GATT_VALID | INTEL_I460_GATT_COHERENT, - .type = 0 - } -}; +#endif /* I460_LARGE_IO_PAGES */ -static unsigned long intel_i460_mask_memory(unsigned long addr, int type) +static unsigned long i460_mask_memory (unsigned long addr, int type) { /* Make sure the returned address is a valid GATT entry */ return (agp_bridge.masks[0].mask - | (((addr & ~((1 << intel_i460_pageshift) - 1)) & 0xffffff000) >> 12)); + | (((addr & ~((1 << I460_IO_PAGE_SHIFT) - 1)) & 0xffffff000) >> 12)); } -static unsigned long intel_i460_unmask_memory(unsigned long addr) -{ - /* Turn a GATT entry into a physical address */ - return ((addr & 0xffffff) << 12); -} - -static struct aper_size_info_8 intel_i460_sizes[3] = -{ - /* - * The 32GB aperture is only available with a 4M GART page size. - * Due to the dynamic GART page size, we can't figure out page_order - * or num_entries until runtime. - */ - {32768, 0, 0, 4}, - {1024, 0, 0, 2}, - {256, 0, 0, 1} -}; - int __init intel_i460_setup (struct pci_dev *pdev __attribute__((unused))) { - agp_bridge.masks = intel_i460_masks; - agp_bridge.aperture_sizes = (void *) intel_i460_sizes; + agp_bridge.num_of_masks = 1; + agp_bridge.masks = i460_masks; + agp_bridge.aperture_sizes = (void *) i460_sizes; agp_bridge.size_type = U8_APER_SIZE; agp_bridge.num_aperture_sizes = 3; agp_bridge.dev_private_data = NULL; agp_bridge.needs_scratch_page = FALSE; - agp_bridge.configure = intel_i460_configure; - agp_bridge.fetch_size = intel_i460_fetch_size; - agp_bridge.cleanup = intel_i460_cleanup; - agp_bridge.tlb_flush = intel_i460_tlb_flush; - agp_bridge.mask_memory = intel_i460_mask_memory; - agp_bridge.unmask_memory = intel_i460_unmask_memory; + agp_bridge.configure = i460_configure; + agp_bridge.fetch_size = i460_fetch_size; + agp_bridge.cleanup = i460_cleanup; + agp_bridge.tlb_flush = i460_tlb_flush; + agp_bridge.mask_memory = i460_mask_memory; agp_bridge.agp_enable = agp_generic_agp_enable; agp_bridge.cache_flush = global_cache_flush; - agp_bridge.create_gatt_table = intel_i460_create_gatt_table; - agp_bridge.free_gatt_table = intel_i460_free_gatt_table; - agp_bridge.insert_memory = intel_i460_insert_memory; - agp_bridge.remove_memory = intel_i460_remove_memory; + agp_bridge.create_gatt_table = i460_create_gatt_table; + agp_bridge.free_gatt_table = i460_free_gatt_table; +#if I460_LARGE_IO_PAGES + agp_bridge.insert_memory = i460_insert_memory; + agp_bridge.remove_memory = i460_remove_memory; + agp_bridge.agp_alloc_page = i460_alloc_page; + agp_bridge.agp_destroy_page = i460_destroy_page; +#else + agp_bridge.insert_memory = i460_insert_memory_small_io_page; + agp_bridge.remove_memory = i460_remove_memory_small_io_page; + agp_bridge.agp_alloc_page = agp_generic_alloc_page; + agp_bridge.agp_destroy_page = agp_generic_destroy_page; +#endif agp_bridge.alloc_by_type = agp_generic_alloc_by_type; agp_bridge.free_by_type = agp_generic_free_by_type; - agp_bridge.agp_alloc_page = intel_i460_alloc_page; - agp_bridge.agp_destroy_page = intel_i460_destroy_page; agp_bridge.suspend = agp_generic_suspend; agp_bridge.resume = agp_generic_resume; agp_bridge.cant_use_aperture = 1; return 0; } - diff -Nru a/drivers/char/agp/i810-agp.c b/drivers/char/agp/i810-agp.c --- a/drivers/char/agp/i810-agp.c Tue Sep 17 23:47:51 2002 +++ b/drivers/char/agp/i810-agp.c Tue Sep 17 23:47:51 2002 @@ -179,7 +179,8 @@ CACHE_FLUSH(); for (i = 0, j = pg_start; i < mem->page_count; i++, j++) { OUTREG32(intel_i810_private.registers, - I810_PTE_BASE + (j * 4), mem->memory[i]); + I810_PTE_BASE + (j * 4), agp_bridge.mask_memory(mem->memory[i], + mem->type)); } CACHE_FLUSH(); @@ -246,11 +247,11 @@ agp_free_memory(new); return NULL; } - new->memory[0] = agp_bridge.mask_memory(virt_to_phys(addr), type); + new->memory[0] = virt_to_phys(addr); new->page_count = 1; new->num_scratch_pages = 1; new->type = AGP_PHYS_MEMORY; - new->physical = virt_to_phys((void *) new->memory[0]); + new->physical = virt_to_phys(addr); return new; } @@ -483,7 +484,8 @@ CACHE_FLUSH(); for (i = 0, j = pg_start; i < mem->page_count; i++, j++) - OUTREG32(intel_i830_private.registers,I810_PTE_BASE + (j * 4),mem->memory[i]); + OUTREG32(intel_i830_private.registers,I810_PTE_BASE + (j * 4), + agp_bridge.mask_memory(mem->memory[i], mem->type)); CACHE_FLUSH(); @@ -543,7 +545,7 @@ return(NULL); } - nw->memory[0] = agp_bridge.mask_memory(virt_to_phys(addr),type); + nw->memory[0] = virt_to_phys(addr); nw->page_count = 1; nw->num_scratch_pages = 1; nw->type = AGP_PHYS_MEMORY; diff -Nru a/drivers/char/agp/sworks-agp.c b/drivers/char/agp/sworks-agp.c --- a/drivers/char/agp/sworks-agp.c Tue Sep 17 23:47:51 2002 +++ b/drivers/char/agp/sworks-agp.c Tue Sep 17 23:47:51 2002 @@ -405,7 +405,7 @@ for (i = 0, j = pg_start; i < mem->page_count; i++, j++) { addr = (j * PAGE_SIZE) + agp_bridge.gart_bus_addr; cur_gatt = SVRWRKS_GET_GATT(addr); - cur_gatt[GET_GATT_OFF(addr)] = mem->memory[i]; + cur_gatt[GET_GATT_OFF(addr)] = agp_bridge.mask_memory(mem->memory[i], mem->type); } agp_bridge.tlb_flush(mem); return 0; diff -Nru a/drivers/char/drm/ati_pcigart.h b/drivers/char/drm/ati_pcigart.h --- a/drivers/char/drm/ati_pcigart.h Tue Sep 17 23:47:52 2002 +++ b/drivers/char/drm/ati_pcigart.h Tue Sep 17 23:47:52 2002 @@ -30,14 +30,20 @@ #define __NO_VERSION__ #include "drmP.h" -#if PAGE_SIZE == 8192 +#if PAGE_SIZE == 65536 +# define ATI_PCIGART_TABLE_ORDER 0 +# define ATI_PCIGART_TABLE_PAGES (1 << 0) +#elif PAGE_SIZE == 16384 +# define ATI_PCIGART_TABLE_ORDER 1 +# define ATI_PCIGART_TABLE_PAGES (1 << 1) +#elif PAGE_SIZE == 8192 # define ATI_PCIGART_TABLE_ORDER 2 # define ATI_PCIGART_TABLE_PAGES (1 << 2) #elif PAGE_SIZE == 4096 # define ATI_PCIGART_TABLE_ORDER 3 # define ATI_PCIGART_TABLE_PAGES (1 << 3) #else -# error - PAGE_SIZE not 8K or 4K +# error - PAGE_SIZE not 64K, 16K, 8K or 4K #endif # define ATI_MAX_PCIGART_PAGES 8192 /* 32 MB aperture, 4K pages */ diff -Nru a/drivers/char/drm/drmP.h b/drivers/char/drm/drmP.h --- a/drivers/char/drm/drmP.h Tue Sep 17 23:47:51 2002 +++ b/drivers/char/drm/drmP.h Tue Sep 17 23:47:51 2002 @@ -230,16 +230,16 @@ if (len > DRM_PROC_LIMIT) { ret; *eof = 1; return len - offset; } /* Mapping helper macros */ -#define DRM_IOREMAP(map) \ - (map)->handle = DRM(ioremap)( (map)->offset, (map)->size ) +#define DRM_IOREMAP(map, dev) \ + (map)->handle = DRM(ioremap)( (map)->offset, (map)->size, (dev) ) -#define DRM_IOREMAP_NOCACHE(map) \ - (map)->handle = DRM(ioremap_nocache)((map)->offset, (map)->size) +#define DRM_IOREMAP_NOCACHE(map, dev) \ + (map)->handle = DRM(ioremap_nocache)((map)->offset, (map)->size, (dev)) -#define DRM_IOREMAPFREE(map) \ - do { \ - if ( (map)->handle && (map)->size ) \ - DRM(ioremapfree)( (map)->handle, (map)->size ); \ +#define DRM_IOREMAPFREE(map, dev) \ + do { \ + if ( (map)->handle && (map)->size ) \ + DRM(ioremapfree)( (map)->handle, (map)->size, (dev) ); \ } while (0) #define DRM_FIND_MAP(_map, _o) \ @@ -677,9 +677,10 @@ extern unsigned long DRM(alloc_pages)(int order, int area); extern void DRM(free_pages)(unsigned long address, int order, int area); -extern void *DRM(ioremap)(unsigned long offset, unsigned long size); -extern void *DRM(ioremap_nocache)(unsigned long offset, unsigned long size); -extern void DRM(ioremapfree)(void *pt, unsigned long size); +extern void *DRM(ioremap)(unsigned long offset, unsigned long size, drm_device_t *dev); +extern void *DRM(ioremap_nocache)(unsigned long offset, unsigned long size, + drm_device_t *dev); +extern void DRM(ioremapfree)(void *pt, unsigned long size, drm_device_t *dev); #if __REALLY_HAVE_AGP extern agp_memory *DRM(alloc_agp)(int pages, u32 type); diff -Nru a/drivers/char/drm/drm_bufs.h b/drivers/char/drm/drm_bufs.h --- a/drivers/char/drm/drm_bufs.h Tue Sep 17 23:47:51 2002 +++ b/drivers/char/drm/drm_bufs.h Tue Sep 17 23:47:51 2002 @@ -107,7 +107,7 @@ switch ( map->type ) { case _DRM_REGISTERS: case _DRM_FRAME_BUFFER: -#if !defined(__sparc__) && !defined(__alpha__) +#if !defined(__sparc__) && !defined(__alpha__) && !defined(__ia64__) if ( map->offset + map->size < map->offset || map->offset < virt_to_phys(high_memory) ) { DRM(free)( map, sizeof(*map), DRM_MEM_MAPS ); @@ -124,7 +124,7 @@ MTRR_TYPE_WRCOMB, 1 ); } #endif - map->handle = DRM(ioremap)( map->offset, map->size ); + map->handle = DRM(ioremap)( map->offset, map->size, dev ); break; case _DRM_SHM: @@ -245,7 +245,7 @@ DRM_DEBUG("mtrr_del = %d\n", retcode); } #endif - DRM(ioremapfree)(map->handle, map->size); + DRM(ioremapfree)(map->handle, map->size, dev); break; case _DRM_SHM: vfree(map->handle); diff -Nru a/drivers/char/drm/drm_drv.h b/drivers/char/drm/drm_drv.h --- a/drivers/char/drm/drm_drv.h Tue Sep 17 23:47:51 2002 +++ b/drivers/char/drm/drm_drv.h Tue Sep 17 23:47:51 2002 @@ -439,7 +439,7 @@ DRM_DEBUG( "mtrr_del=%d\n", retcode ); } #endif - DRM(ioremapfree)( map->handle, map->size ); + DRM(ioremapfree)( map->handle, map->size, dev ); break; case _DRM_SHM: vfree(map->handle); diff -Nru a/drivers/char/drm/drm_memory.h b/drivers/char/drm/drm_memory.h --- a/drivers/char/drm/drm_memory.h Tue Sep 17 23:47:51 2002 +++ b/drivers/char/drm/drm_memory.h Tue Sep 17 23:47:51 2002 @@ -33,6 +33,10 @@ #include #include "drmP.h" #include +#include + +#include +#include typedef struct drm_mem_stats { const char *name; @@ -291,17 +295,108 @@ } } -void *DRM(ioremap)(unsigned long offset, unsigned long size) +#if __REALLY_HAVE_AGP + +/* + * Find the drm_map that covers the range [offset, offset+size). + */ +static inline drm_map_t * +DRM(lookup_map)(unsigned long offset, unsigned long size, drm_device_t *dev) +{ + struct list_head *list; + drm_map_list_t *r_list; + drm_map_t *map; + + list_for_each(list, &dev->maplist->head) { + r_list = (drm_map_list_t *) list; + map = r_list->map; + if (!map) + continue; + if (map->offset <= offset && (offset + size) <= (map->offset + map->size)) + return map; + } + return NULL; +} + +static inline void * +DRM(agp_remap) (unsigned long offset, unsigned long size, drm_device_t *dev) { + unsigned long *phys_addr_map, i, num_pages = PAGE_ALIGN(size) / PAGE_SIZE; + struct page **page_map, **page_map_ptr; + struct drm_agp_mem *agpmem; + struct vm_struct *area; + + + size = PAGE_ALIGN(size); + + for (agpmem = dev->agp->memory; agpmem; agpmem = agpmem->next) + if (agpmem->bound <= offset + && (agpmem->bound + (agpmem->pages << PAGE_SHIFT)) >= (offset + size)) + break; + if (!agpmem) + return NULL; + + /* + * OK, we're mapping AGP space on a chipset/platform on which memory accesses by + * the CPU do not get remapped by the GART. We fix this by using the kernel's + * page-table instead (that's probably faster anyhow...). + */ + area = get_vm_area(size, VM_AGP_REMAP); + if (!area) + return NULL; + + flush_cache_all(); + + /* note: use vmalloc() because num_pages could be large... */ + page_map = vmalloc(num_pages * sizeof(struct page *)); + if (!page_map) + return NULL; + + phys_addr_map = agpmem->memory->memory + (offset - agpmem->bound) / PAGE_SIZE; + for (i = 0; i < num_pages; ++i) + page_map[i] = pfn_to_page(phys_addr_map[i] >> PAGE_SHIFT); + page_map_ptr = page_map; + if (map_vm_area(area, PAGE_AGP, &page_map_ptr) < 0) { + vunmap(area->addr); + vfree(page_map); + return NULL; + } + vfree(page_map); + + flush_tlb_kernel_range(area->addr, area->addr + size); + return area->addr; +} + +#else /* !__REALLY_HAVE_AGP */ + +static inline void * +DRM(agp_remap) (unsigned long offset, unsigned long size, drm_device_t *dev) { return NULL; } + +#endif /* !__REALLY_HAVE_AGP */ + +void *DRM(ioremap)(unsigned long offset, unsigned long size, drm_device_t *dev) +{ + int remap_aperture = 0; void *pt; if (!size) { - DRM_MEM_ERROR(DRM_MEM_MAPPINGS, - "Mapping 0 bytes at 0x%08lx\n", offset); + DRM_MEM_ERROR(DRM_MEM_MAPPINGS, "Mapping 0 bytes at 0x%08lx\n", offset); return NULL; } - if (!(pt = ioremap(offset, size))) { +#if __REALLY_HAVE_AGP + if (dev->agp->cant_use_aperture) { + drm_map_t *map = DRM(lookup_map)(offset, size, dev); + + if (map && map->type == _DRM_AGP) + remap_aperture = 1; + } +#endif + if (remap_aperture) + pt = DRM(agp_remap)(offset, size, dev); + else + pt = ioremap(offset, size); + if (!pt) { spin_lock(&DRM(mem_lock)); ++DRM(mem_stats)[DRM_MEM_MAPPINGS].fail_count; spin_unlock(&DRM(mem_lock)); @@ -314,8 +409,9 @@ return pt; } -void *DRM(ioremap_nocache)(unsigned long offset, unsigned long size) +void *DRM(ioremap_nocache)(unsigned long offset, unsigned long size, drm_device_t *dev) { + int remap_aperture = 0; void *pt; if (!size) { @@ -324,7 +420,19 @@ return NULL; } - if (!(pt = ioremap_nocache(offset, size))) { +#if __REALLY_HAVE_AGP + if (dev->agp->cant_use_aperture) { + drm_map_t *map = DRM(lookup_map)(offset, size, dev); + + if (map && map->type == _DRM_AGP) + remap_aperture = 1; + } +#endif + if (remap_aperture) + pt = DRM(agp_remap)(offset, size, dev); + else + pt = ioremap_nocache(offset, size); + if (!pt) { spin_lock(&DRM(mem_lock)); ++DRM(mem_stats)[DRM_MEM_MAPPINGS].fail_count; spin_unlock(&DRM(mem_lock)); @@ -337,7 +445,7 @@ return pt; } -void DRM(ioremapfree)(void *pt, unsigned long size) +void DRM(ioremapfree)(void *pt, unsigned long size, drm_device_t *dev) { int alloc_count; int free_count; @@ -345,8 +453,14 @@ if (!pt) DRM_MEM_ERROR(DRM_MEM_MAPPINGS, "Attempt to free NULL pointer\n"); - else - iounmap(pt); + else { +#if __REALLY_HAVE_AGP + if (dev->agp->cant_use_aperture && (vgetflags(pt) & VM_AGP_REMAP)) + vunmap(pt); + else +#endif + iounmap(pt); + } spin_lock(&DRM(mem_lock)); DRM(mem_stats)[DRM_MEM_MAPPINGS].bytes_freed += size; diff -Nru a/drivers/char/drm/drm_vm.h b/drivers/char/drm/drm_vm.h --- a/drivers/char/drm/drm_vm.h Tue Sep 17 23:47:52 2002 +++ b/drivers/char/drm/drm_vm.h Tue Sep 17 23:47:52 2002 @@ -108,12 +108,12 @@ * Get the page, inc the use count, and return it */ offset = (baddr - agpmem->bound) >> PAGE_SHIFT; - agpmem->memory->memory[offset] &= dev->agp->page_mask; page = virt_to_page(__va(agpmem->memory->memory[offset])); get_page(page); - DRM_DEBUG("baddr = 0x%lx page = 0x%p, offset = 0x%lx\n", - baddr, __va(agpmem->memory->memory[offset]), offset); + DRM_DEBUG("baddr = 0x%lx page = 0x%p, offset = 0x%lx, count=%d\n", + baddr, __va(agpmem->memory->memory[offset]), offset, + atomic_read(&page->count)); return page; } @@ -207,7 +207,7 @@ DRM_DEBUG("mtrr_del = %d\n", retcode); } #endif - DRM(ioremapfree)(map->handle, map->size); + DRM(ioremapfree)(map->handle, map->size, dev); break; case _DRM_SHM: vfree(map->handle); @@ -421,15 +421,16 @@ switch (map->type) { case _DRM_AGP: -#if defined(__alpha__) +#if __REALLY_HAVE_AGP + if (dev->agp->cant_use_aperture) { /* - * On Alpha we can't talk to bus dma address from the - * CPU, so for memory of type DRM_AGP, we'll deal with - * sorting out the real physical pages and mappings - * in nopage() + * On some platforms we can't talk to bus dma address from the CPU, so for + * memory of type DRM_AGP, we'll deal with sorting out the real physical + * pages and mappings in nopage() */ vma->vm_ops = &DRM(vm_ops); break; + } #endif /* fall through to _DRM_FRAME_BUFFER... */ case _DRM_FRAME_BUFFER: @@ -440,15 +441,15 @@ pgprot_val(vma->vm_page_prot) |= _PAGE_PCD; pgprot_val(vma->vm_page_prot) &= ~_PAGE_PWT; } -#elif defined(__ia64__) - if (map->type != _DRM_AGP) - vma->vm_page_prot = - pgprot_writecombine(vma->vm_page_prot); #elif defined(__powerpc__) pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE | _PAGE_GUARDED; #endif vma->vm_flags |= VM_IO; /* not in core dump */ } +#if defined(__ia64__) + if (map->type != _DRM_AGP) + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); +#endif offset = DRIVER_GET_REG_OFS(); #ifdef __sparc__ if (io_remap_page_range(DRM_RPR_ARG(vma) vma->vm_start, diff -Nru a/drivers/char/drm/gamma_dma.c b/drivers/char/drm/gamma_dma.c --- a/drivers/char/drm/gamma_dma.c Tue Sep 17 23:47:52 2002 +++ b/drivers/char/drm/gamma_dma.c Tue Sep 17 23:47:52 2002 @@ -638,7 +638,7 @@ } else { DRM_FIND_MAP( dev_priv->buffers, init->buffers_offset ); - DRM_IOREMAP( dev_priv->buffers ); + DRM_IOREMAP( dev_priv->buffers, dev ); buf = dma->buflist[GLINT_DRI_BUF_COUNT]; pgt = buf->address; @@ -668,7 +668,7 @@ if ( dev->dev_private ) { drm_gamma_private_t *dev_priv = dev->dev_private; - DRM_IOREMAPFREE( dev_priv->buffers ); + DRM_IOREMAPFREE( dev_priv->buffers, dev ); DRM(free)( dev->dev_private, sizeof(drm_gamma_private_t), DRM_MEM_DRIVER ); diff -Nru a/drivers/char/drm/i810_dma.c b/drivers/char/drm/i810_dma.c --- a/drivers/char/drm/i810_dma.c Tue Sep 17 23:47:51 2002 +++ b/drivers/char/drm/i810_dma.c Tue Sep 17 23:47:51 2002 @@ -313,7 +313,7 @@ if(dev_priv->ring.virtual_start) { DRM(ioremapfree)((void *) dev_priv->ring.virtual_start, - dev_priv->ring.Size); + dev_priv->ring.Size, dev); } if(dev_priv->hw_status_page != 0UL) { i810_free_page(dev, dev_priv->hw_status_page); @@ -327,7 +327,7 @@ for (i = 0; i < dma->buf_count; i++) { drm_buf_t *buf = dma->buflist[ i ]; drm_i810_buf_priv_t *buf_priv = buf->dev_private; - DRM(ioremapfree)(buf_priv->kernel_virtual, buf->total); + DRM(ioremapfree)(buf_priv->kernel_virtual, buf->total, dev); } } return 0; @@ -397,7 +397,7 @@ *buf_priv->in_use = I810_BUF_FREE; buf_priv->kernel_virtual = DRM(ioremap)(buf->bus_address, - buf->total); + buf->total, dev); } return 0; } @@ -450,7 +450,7 @@ dev_priv->ring.virtual_start = DRM(ioremap)(dev->agp->base + init->ring_start, - init->ring_size); + init->ring_size, dev); if (dev_priv->ring.virtual_start == NULL) { dev->dev_private = (void *) dev_priv; diff -Nru a/drivers/char/drm/i830_dma.c b/drivers/char/drm/i830_dma.c --- a/drivers/char/drm/i830_dma.c Tue Sep 17 23:47:51 2002 +++ b/drivers/char/drm/i830_dma.c Tue Sep 17 23:47:51 2002 @@ -344,7 +344,7 @@ if(dev_priv->ring.virtual_start) { DRM(ioremapfree)((void *) dev_priv->ring.virtual_start, - dev_priv->ring.Size); + dev_priv->ring.Size, dev); } if(dev_priv->hw_status_page != 0UL) { i830_free_page(dev, dev_priv->hw_status_page); @@ -358,7 +358,7 @@ for (i = 0; i < dma->buf_count; i++) { drm_buf_t *buf = dma->buflist[ i ]; drm_i830_buf_priv_t *buf_priv = buf->dev_private; - DRM(ioremapfree)(buf_priv->kernel_virtual, buf->total); + DRM(ioremapfree)(buf_priv->kernel_virtual, buf->total, dev); } } return 0; @@ -430,7 +430,7 @@ *buf_priv->in_use = I830_BUF_FREE; buf_priv->kernel_virtual = DRM(ioremap)(buf->bus_address, - buf->total); + buf->total, dev); } return 0; } @@ -487,7 +487,7 @@ dev_priv->ring.virtual_start = DRM(ioremap)(dev->agp->base + init->ring_start, - init->ring_size); + init->ring_size, dev); if (dev_priv->ring.virtual_start == NULL) { dev->dev_private = (void *) dev_priv; diff -Nru a/drivers/char/drm/mga_dma.c b/drivers/char/drm/mga_dma.c --- a/drivers/char/drm/mga_dma.c Tue Sep 17 23:47:51 2002 +++ b/drivers/char/drm/mga_dma.c Tue Sep 17 23:47:51 2002 @@ -545,9 +545,9 @@ (drm_mga_sarea_t *)((u8 *)dev_priv->sarea->handle + init->sarea_priv_offset); - DRM_IOREMAP( dev_priv->warp ); - DRM_IOREMAP( dev_priv->primary ); - DRM_IOREMAP( dev_priv->buffers ); + DRM_IOREMAP( dev_priv->warp, dev ); + DRM_IOREMAP( dev_priv->primary, dev ); + DRM_IOREMAP( dev_priv->buffers, dev ); if(!dev_priv->warp->handle || !dev_priv->primary->handle || @@ -633,9 +633,9 @@ if ( dev->dev_private ) { drm_mga_private_t *dev_priv = dev->dev_private; - DRM_IOREMAPFREE( dev_priv->warp ); - DRM_IOREMAPFREE( dev_priv->primary ); - DRM_IOREMAPFREE( dev_priv->buffers ); + DRM_IOREMAPFREE( dev_priv->warp, dev ); + DRM_IOREMAPFREE( dev_priv->primary, dev ); + DRM_IOREMAPFREE( dev_priv->buffers, dev ); if ( dev_priv->head != NULL ) { mga_freelist_cleanup( dev ); diff -Nru a/drivers/char/drm/mga_drv.h b/drivers/char/drm/mga_drv.h --- a/drivers/char/drm/mga_drv.h Tue Sep 17 23:47:52 2002 +++ b/drivers/char/drm/mga_drv.h Tue Sep 17 23:47:52 2002 @@ -235,7 +235,7 @@ if ( MGA_VERBOSE ) { \ DRM_INFO( "BEGIN_DMA( %d ) in %s\n", \ (n), __FUNCTION__ ); \ - DRM_INFO( " space=0x%x req=0x%x\n", \ + DRM_INFO( " space=0x%x req=0x%Zx\n", \ dev_priv->prim.space, (n) * DMA_BLOCK_SIZE ); \ } \ prim = dev_priv->prim.start; \ @@ -285,7 +285,7 @@ #define DMA_WRITE( offset, val ) \ do { \ if ( MGA_VERBOSE ) { \ - DRM_INFO( " DMA_WRITE( 0x%08x ) at 0x%04x\n", \ + DRM_INFO( " DMA_WRITE( 0x%08x ) at 0x%04Zx\n", \ (u32)(val), write + (offset) * sizeof(u32) ); \ } \ *(volatile u32 *)(prim + write + (offset) * sizeof(u32)) = val; \ diff -Nru a/drivers/char/drm/r128_cce.c b/drivers/char/drm/r128_cce.c --- a/drivers/char/drm/r128_cce.c Tue Sep 17 23:47:51 2002 +++ b/drivers/char/drm/r128_cce.c Tue Sep 17 23:47:51 2002 @@ -350,8 +350,8 @@ R128_WRITE( R128_PM4_BUFFER_DL_RPTR_ADDR, entry->busaddr[page_ofs]); - DRM_DEBUG( "ring rptr: offset=0x%08x handle=0x%08lx\n", - entry->busaddr[page_ofs], + DRM_DEBUG( "ring rptr: offset=0x%08lx handle=0x%08lx\n", + (unsigned long) entry->busaddr[page_ofs], entry->handle + tmp_ofs ); } @@ -540,9 +540,9 @@ init->sarea_priv_offset); if ( !dev_priv->is_pci ) { - DRM_IOREMAP( dev_priv->cce_ring ); - DRM_IOREMAP( dev_priv->ring_rptr ); - DRM_IOREMAP( dev_priv->buffers ); + DRM_IOREMAP( dev_priv->cce_ring, dev ); + DRM_IOREMAP( dev_priv->ring_rptr, dev ); + DRM_IOREMAP( dev_priv->buffers, dev ); if(!dev_priv->cce_ring->handle || !dev_priv->ring_rptr->handle || !dev_priv->buffers->handle) { @@ -618,9 +618,9 @@ #if __REALLY_HAVE_SG if ( !dev_priv->is_pci ) { #endif - DRM_IOREMAPFREE( dev_priv->cce_ring ); - DRM_IOREMAPFREE( dev_priv->ring_rptr ); - DRM_IOREMAPFREE( dev_priv->buffers ); + DRM_IOREMAPFREE( dev_priv->cce_ring, dev ); + DRM_IOREMAPFREE( dev_priv->ring_rptr, dev ); + DRM_IOREMAPFREE( dev_priv->buffers, dev ); #if __REALLY_HAVE_SG } else { if (!DRM(ati_pcigart_cleanup)( dev, diff -Nru a/drivers/char/drm/radeon_cp.c b/drivers/char/drm/radeon_cp.c --- a/drivers/char/drm/radeon_cp.c Tue Sep 17 23:47:51 2002 +++ b/drivers/char/drm/radeon_cp.c Tue Sep 17 23:47:51 2002 @@ -904,8 +904,8 @@ RADEON_WRITE( RADEON_CP_RB_RPTR_ADDR, entry->busaddr[page_ofs]); - DRM_DEBUG( "ring rptr: offset=0x%08x handle=0x%08lx\n", - entry->busaddr[page_ofs], + DRM_DEBUG( "ring rptr: offset=0x%08lx handle=0x%08lx\n", + (unsigned long) entry->busaddr[page_ofs], entry->handle + tmp_ofs ); } @@ -1157,9 +1157,9 @@ init->sarea_priv_offset); if ( !dev_priv->is_pci ) { - DRM_IOREMAP( dev_priv->cp_ring ); - DRM_IOREMAP( dev_priv->ring_rptr ); - DRM_IOREMAP( dev_priv->buffers ); + DRM_IOREMAP( dev_priv->cp_ring, dev ); + DRM_IOREMAP( dev_priv->ring_rptr, dev ); + DRM_IOREMAP( dev_priv->buffers, dev ); if(!dev_priv->cp_ring->handle || !dev_priv->ring_rptr->handle || !dev_priv->buffers->handle) { @@ -1278,9 +1278,9 @@ drm_radeon_private_t *dev_priv = dev->dev_private; if ( !dev_priv->is_pci ) { - DRM_IOREMAPFREE( dev_priv->cp_ring ); - DRM_IOREMAPFREE( dev_priv->ring_rptr ); - DRM_IOREMAPFREE( dev_priv->buffers ); + DRM_IOREMAPFREE( dev_priv->cp_ring, dev ); + DRM_IOREMAPFREE( dev_priv->ring_rptr, dev ); + DRM_IOREMAPFREE( dev_priv->buffers, dev ); } else { #if __REALLY_HAVE_SG if (!DRM(ati_pcigart_cleanup)( dev, diff -Nru a/drivers/char/mem.c b/drivers/char/mem.c --- a/drivers/char/mem.c Tue Sep 17 23:47:51 2002 +++ b/drivers/char/mem.c Tue Sep 17 23:47:51 2002 @@ -511,10 +511,12 @@ case 0: file->f_pos = offset; ret = file->f_pos; + force_successful_syscall_return(); break; case 1: file->f_pos += offset; ret = file->f_pos; + force_successful_syscall_return(); break; default: ret = -EINVAL; diff -Nru a/drivers/media/radio/Makefile b/drivers/media/radio/Makefile --- a/drivers/media/radio/Makefile Tue Sep 17 23:47:51 2002 +++ b/drivers/media/radio/Makefile Tue Sep 17 23:47:51 2002 @@ -5,6 +5,8 @@ # All of the (potential) objects that export symbols. # This list comes from 'grep -l EXPORT_SYMBOL *.[hc]'. +obj-y := dummy.o + export-objs := miropcm20-rds-core.o miropcm20-objs := miropcm20-rds-core.o miropcm20-radio.o diff -Nru a/drivers/media/radio/dummy.c b/drivers/media/radio/dummy.c --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/drivers/media/radio/dummy.c Tue Sep 17 23:47:52 2002 @@ -0,0 +1 @@ +/* just so the linker knows what kind of object files it's deadling with... */ diff -Nru a/drivers/media/video/Makefile b/drivers/media/video/Makefile --- a/drivers/media/video/Makefile Tue Sep 17 23:47:51 2002 +++ b/drivers/media/video/Makefile Tue Sep 17 23:47:51 2002 @@ -5,7 +5,8 @@ # All of the (potential) objects that export symbols. # This list comes from 'grep -l EXPORT_SYMBOL *.[hc]'. -export-objs := videodev.o bttv-if.o cpia.o video-buf.o +obj-y := dummy.o + bttv-objs := bttv-driver.o bttv-cards.o bttv-if.o \ bttv-risc.o bttv-vbi.o diff -Nru a/drivers/media/video/dummy.c b/drivers/media/video/dummy.c --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/drivers/media/video/dummy.c Tue Sep 17 23:47:52 2002 @@ -0,0 +1 @@ +/* just so the linker knows what kind of object files it's deadling with... */ diff -Nru a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c --- a/drivers/message/fusion/mptscsih.c Tue Sep 17 23:47:51 2002 +++ b/drivers/message/fusion/mptscsih.c Tue Sep 17 23:47:51 2002 @@ -99,6 +99,8 @@ MODULE_PARM(mptscsih, "s"); #endif +static spinlock_t detect_lock = SPIN_LOCK_UNLOCKED; + /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ typedef struct _BIG_SENSE_BUF { @@ -1156,8 +1158,7 @@ #endif sh = scsi_register(tpnt, sizeof(MPT_SCSI_HOST)); if (sh != NULL) { - save_flags(flags); - cli(); + spin_lock_irqsave(&detect_lock, flags); sh->io_port = 0; sh->n_io_port = 0; sh->irq = 0; @@ -1221,7 +1222,7 @@ */ scsi_set_pci_device(sh, this->pcidev); - restore_flags(flags); + spin_unlock_irqrestore(&detect_lock, flags); hd = (MPT_SCSI_HOST *) sh->hostdata; hd->ioc = this; diff -Nru a/drivers/net/tulip/media.c b/drivers/net/tulip/media.c --- a/drivers/net/tulip/media.c Tue Sep 17 23:47:51 2002 +++ b/drivers/net/tulip/media.c Tue Sep 17 23:47:51 2002 @@ -278,6 +278,10 @@ for (i = 0; i < init_length; i++) outl(init_sequence[i], ioaddr + CSR12); } + + (void) inl(ioaddr + CSR6); /* flush CSR12 writes */ + udelay(500); /* Give MII time to recover */ + tmp_info = get_u16(&misc_info[1]); if (tmp_info) tp->advertising[phy_num] = tmp_info | 1; diff -Nru a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c --- a/drivers/scsi/megaraid.c Tue Sep 17 23:47:51 2002 +++ b/drivers/scsi/megaraid.c Tue Sep 17 23:47:51 2002 @@ -2047,7 +2047,7 @@ return; mbox = (mega_mailbox *) pScb->mboxData; - printk ("%u cmd:%x id:%x #scts:%x lba:%x addr:%x logdrv:%x #sg:%x\n", + printk ("%lu cmd:%x id:%x #scts:%x lba:%x addr:%x logdrv:%x #sg:%x\n", pScb->SCpnt->pid, mbox->cmd, mbox->cmdid, mbox->numsectors, mbox->lba, mbox->xferaddr, mbox->logdrv, mbox->numsgelements); @@ -3356,9 +3356,13 @@ mbox[0] = IS_BIOS_ENABLED; mbox[2] = GET_BIOS; - mboxpnt->xferaddr = virt_to_bus ((void *) megacfg->mega_buffer); + mboxpnt->xferaddr = pci_map_single(megacfg->dev, + (void *) megacfg->mega_buffer, (2 * 1024L), + PCI_DMA_FROMDEVICE); ret = megaIssueCmd (megacfg, mbox, NULL, 0); + + pci_unmap_single(megacfg->dev, mboxpnt->xferaddr, 2 * 1024L, PCI_DMA_FROMDEVICE); return (*(char *) megacfg->mega_buffer); } diff -Nru a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c --- a/drivers/scsi/scsi_ioctl.c Tue Sep 17 23:47:52 2002 +++ b/drivers/scsi/scsi_ioctl.c Tue Sep 17 23:47:52 2002 @@ -196,6 +196,9 @@ unsigned int needed, buf_needed; int timeout, retries, result; int data_direction, gfp_mask = GFP_KERNEL; +#if __GNUC__ < 3 + int foo; +#endif if (!sic) return -EINVAL; @@ -209,11 +212,21 @@ if (verify_area(VERIFY_READ, sic, sizeof(Scsi_Ioctl_Command))) return -EFAULT; +#if __GNUC__ < 3 + foo = __get_user(inlen, &sic->inlen); + if (foo) + return -EFAULT; + + foo = __get_user(outlen, &sic->outlen); + if (foo) + return -EFAULT; +#else if(__get_user(inlen, &sic->inlen)) return -EFAULT; if(__get_user(outlen, &sic->outlen)) return -EFAULT; +#endif /* * We do not transfer more than MAX_BUF with this interface. diff -Nru a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c --- a/drivers/scsi/sym53c8xx_2/sym_glue.c Tue Sep 17 23:47:51 2002 +++ b/drivers/scsi/sym53c8xx_2/sym_glue.c Tue Sep 17 23:47:51 2002 @@ -295,11 +295,7 @@ #ifndef SYM_LINUX_DYNAMIC_DMA_MAPPING typedef u_long bus_addr_t; #else -#if SYM_CONF_DMA_ADDRESSING_MODE > 0 -typedef dma64_addr_t bus_addr_t; -#else typedef dma_addr_t bus_addr_t; -#endif #endif /* diff -Nru a/drivers/scsi/sym53c8xx_2/sym_malloc.c b/drivers/scsi/sym53c8xx_2/sym_malloc.c --- a/drivers/scsi/sym53c8xx_2/sym_malloc.c Tue Sep 17 23:47:52 2002 +++ b/drivers/scsi/sym53c8xx_2/sym_malloc.c Tue Sep 17 23:47:52 2002 @@ -143,12 +143,14 @@ a = (m_addr_t) ptr; while (1) { -#ifdef SYM_MEM_FREE_UNUSED if (s == SYM_MEM_CLUSTER_SIZE) { +#ifdef SYM_MEM_FREE_UNUSED M_FREE_MEM_CLUSTER(a); - break; - } +#else + ((m_link_p) a)->next = h[i].next; + h[i].next = (m_link_p) a; #endif + } b = a ^ s; q = &h[i]; while (q->next && q->next != (m_link_p) b) { diff -Nru a/drivers/serial/8250.c b/drivers/serial/8250.c --- a/drivers/serial/8250.c Tue Sep 17 23:47:51 2002 +++ b/drivers/serial/8250.c Tue Sep 17 23:47:51 2002 @@ -1909,6 +1909,17 @@ return 0; } +int __init early_register_port (struct uart_port *port) +{ + if (port->line >= ARRAY_SIZE(serial8250_ports)) + return -ENODEV; + + serial8250_isa_init_ports(); /* force ISA defaults */ + serial8250_ports[port->line].port = *port; + serial8250_ports[port->line].port.ops = &serial8250_pops; + return 0; +} + /** * unregister_serial - remove a 16x50 serial port at runtime * @line: serial line number diff -Nru a/drivers/serial/8250_acpi.c b/drivers/serial/8250_acpi.c --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/drivers/serial/8250_acpi.c Tue Sep 17 23:47:52 2002 @@ -0,0 +1,179 @@ +/* + * linux/drivers/char/acpi_serial.c + * + * Copyright (C) 2000, 20002 Hewlett-Packard Co. + * Khalid Aziz + * + * Detect and initialize the headless console serial port defined in SPCR table and debug + * serial port defined in DBGP table. + * + * 2002/08/29 davidm Adjust it to new 2.5 serial driver infrastructure. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#undef SERIAL_DEBUG_ACPI + +/* + * Query ACPI tables for a debug and a headless console serial port. If found, add them to + * rs_table[]. A pointer to either SPCR or DBGP table is passed as parameter. This + * function should be called before serial_console_init() is called to make sure the SPCR + * serial console will be available for use. IA-64 kernel calls this function from within + * acpi.c when it encounters SPCR or DBGP tables as it parses the ACPI 2.0 tables during + * bootup. + */ +void __init +setup_serial_acpi (void *tablep) +{ + acpi_ser_t *acpi_ser_p; + struct uart_port port; + unsigned long iobase; + int gsi; + extern int early_register_port (struct uart_port *); /* XXX fix me */ + +#ifdef SERIAL_DEBUG_ACPI + printk("Entering setup_serial_acpi()\n"); +#endif + + /* Now get the table */ + if (!tablep) + return; + + memset(&port, 0, sizeof(port)); + + acpi_ser_p = (acpi_ser_t *) tablep; + + /* + * Perform a sanity check on the table. Table should have a signature of "SPCR" or + * "DBGP" and it should be atleast 52 bytes long. + */ + if (strncmp(acpi_ser_p->signature, ACPI_SPCRT_SIGNATURE, ACPI_SIG_LEN) != 0 && + strncmp(acpi_ser_p->signature, ACPI_DBGPT_SIGNATURE, ACPI_SIG_LEN) != 0) + return; + if (acpi_ser_p->length < 52) + return; + + iobase = (((u64) acpi_ser_p->base_addr.addrh) << 32) | acpi_ser_p->base_addr.addrl; + gsi = ( (acpi_ser_p->global_int[3] << 24) | (acpi_ser_p->global_int[2] << 16) + | (acpi_ser_p->global_int[1] << 8) | (acpi_ser_p->global_int[0] << 0)); + +#ifdef SERIAL_DEBUG_ACPI + printk("setup_serial_acpi(): table pointer = 0x%p\n", acpi_ser_p); + printk(" sig = '%c%c%c%c'\n", acpi_ser_p->signature[0], + acpi_ser_p->signature[1], acpi_ser_p->signature[2], acpi_ser_p->signature[3]); + printk(" length = %d\n", acpi_ser_p->length); + printk(" Rev = %d\n", acpi_ser_p->rev); + printk(" Interface type = %d\n", acpi_ser_p->intfc_type); + printk(" Base address = 0x%lX\n", iobase); + printk(" IRQ = %d\n", acpi_ser_p->irq); + printk(" Global System Int = %d\n", gsi); + printk(" Baud rate = "); + switch (acpi_ser_p->baud) { + case ACPI_SERIAL_BAUD_9600: + printk("9600\n"); + break; + + case ACPI_SERIAL_BAUD_19200: + printk("19200\n"); + break; + + case ACPI_SERIAL_BAUD_57600: + printk("57600\n"); + break; + + case ACPI_SERIAL_BAUD_115200: + printk("115200\n"); + break; + + default: + printk("Huh (%d)\n", acpi_ser_p->baud); + break; + } + if (acpi_ser_p->base_addr.space_id == ACPI_SERIAL_PCICONF_SPACE) { + printk(" PCI serial port:\n"); + printk(" Bus %d, Device %d, Vendor ID 0x%x, Dev ID 0x%x\n", + acpi_ser_p->pci_bus, acpi_ser_p->pci_dev, + acpi_ser_p->pci_vendor_id, acpi_ser_p->pci_dev_id); + } +#endif + /* + * Now build a serial_req structure to update the entry in rs_table for the + * headless console port. + */ + switch (acpi_ser_p->intfc_type) { + case ACPI_SERIAL_INTFC_16550: + port.type = PORT_16550; + port.uartclk = BASE_BAUD * 16; + break; + + case ACPI_SERIAL_INTFC_16450: + port.type = PORT_16450; + port.uartclk = BASE_BAUD * 16; + break; + + default: + port.type = PORT_UNKNOWN; + break; + } + if (strncmp(acpi_ser_p->signature, ACPI_SPCRT_SIGNATURE, ACPI_SIG_LEN) == 0) + port.line = ACPI_SERIAL_CONSOLE_PORT; + else if (strncmp(acpi_ser_p->signature, ACPI_DBGPT_SIGNATURE, ACPI_SIG_LEN) == 0) + port.line = ACPI_SERIAL_DEBUG_PORT; + /* + * Check if this is an I/O mapped address or a memory mapped address + */ + if (acpi_ser_p->base_addr.space_id == ACPI_SERIAL_MEM_SPACE) { + port.iobase = 0; + port.mapbase = iobase; + port.membase = ioremap(iobase, 64); + port.iotype = SERIAL_IO_MEM; + } else if (acpi_ser_p->base_addr.space_id == ACPI_SERIAL_IO_SPACE) { + port.iobase = iobase; + port.mapbase = 0; + port.membase = NULL; + port.iotype = SERIAL_IO_PORT; + } else if (acpi_ser_p->base_addr.space_id == ACPI_SERIAL_PCICONF_SPACE) { + printk("WARNING: No support for PCI serial console\n"); + return; + } + + /* + * If the table does not have IRQ information, use 0 for IRQ. This will force + * rs_init() to probe for IRQ. + */ + if (acpi_ser_p->length < 53) + port.irq = 0; + else { + port.flags = UPF_SKIP_TEST | UPF_BOOT_AUTOCONF | UPF_AUTO_IRQ; + if (acpi_ser_p->int_type & (ACPI_SERIAL_INT_APIC | ACPI_SERIAL_INT_SAPIC)) + port.irq = gsi; + else if (acpi_ser_p->int_type & ACPI_SERIAL_INT_PCAT) + port.irq = acpi_ser_p->irq; + else + /* + * IRQ type not being set would mean UART will run in polling + * mode. Do not probe for IRQ in that case. + */ + port.flags &= UPF_AUTO_IRQ; + } + port.fifosize = 0; + port.hub6 = 0; + port.regshift = 0; + if (early_register_port(&port) < 0) { + printk("early_register_port() for ACPI serial console port failed\n"); + return; + } + +#ifdef SERIAL_DEBUG_ACPI + printk("Leaving setup_serial_acpi()\n"); +#endif +} diff -Nru a/drivers/serial/8250_hcdp.c b/drivers/serial/8250_hcdp.c --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/drivers/serial/8250_hcdp.c Tue Sep 17 23:47:52 2002 @@ -0,0 +1,242 @@ +/* + * linux/drivers/char/hcdp_serial.c + * + * Copyright (C) 2002 Hewlett-Packard Co. + * Khalid Aziz + * + * Parse the EFI HCDP table to locate serial console and debug ports and initialize them. + * + * 2002/08/29 davidm Adjust it to new 2.5 serial driver infrastructure (untested). + */ +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define SERIAL_DEBUG_HCDP + +/* + * Parse the HCDP table to find descriptions for headless console and debug serial ports + * and add them to rs_table[]. A pointer to HCDP table is passed as parameter. This + * function should be called before serial_console_init() is called to make sure the HCDP + * serial console will be available for use. IA-64 kernel calls this function from + * setup_arch() after the EFI and ACPI tables have been parsed. + */ +void __init +setup_serial_hcdp (void *tablep) +{ + hcdp_dev_t *hcdp_dev; + struct uart_port port; + unsigned long iobase; + hcdp_t hcdp; + int gsi, i = 0, nr; +#if 0 + static int shift_once = 1; +#endif + extern int early_register_port (struct uart_port *); /* XXX fix me */ + +#ifdef SERIAL_DEBUG_HCDP + printk("Entering setup_serial_hcdp()\n"); +#endif + + /* Verify we have a valid table pointer */ + if (!tablep) + return; + + memset(&port, 0, sizeof(port)); + + /* + * Don't trust firmware to give us a table starting at an aligned address. Make a + * local copy of the HCDP table with aligned structures. + */ + memcpy(&hcdp, tablep, sizeof(hcdp)); + + /* + * Perform a sanity check on the table. Table should have a signature of "HCDP" + * and it should be atleast 82 bytes long to have any useful information. + */ + if ((strncmp(hcdp.signature, HCDP_SIGNATURE, HCDP_SIG_LEN) != 0)) + return; + if (hcdp.len < 82) + return; + +#ifdef SERIAL_DEBUG_HCDP + printk("setup_serial_hcdp(): table pointer = 0x%p\n", tablep); + printk(" sig = '%c%c%c%c'\n", + hcdp.signature[0], hcdp.signature[1], hcdp.signature[2], hcdp.signature[3]); + printk(" length = %d\n", hcdp.len); + printk(" Rev = %d\n", hcdp.rev); + printk(" OEM ID = %c%c%c%c%c%c\n", + hcdp.oemid[0], hcdp.oemid[1], hcdp.oemid[2], + hcdp.oemid[3], hcdp.oemid[4], hcdp.oemid[5]); + printk(" Number of entries = %d\n", hcdp.num_entries); +#endif + + /* + * Parse each device entry + */ + for (nr = 0; nr < hcdp.num_entries; nr++) { + hcdp_dev = hcdp.hcdp_dev + nr; + /* + * We will parse only the primary console device which is the first entry + * for these devices. We will ignore rest of the entries for the same type + * device that has already been parsed and initialized + */ + if (hcdp_dev->type != HCDP_DEV_CONSOLE) + continue; + + iobase = ((u64) hcdp_dev->base_addr.addrhi << 32) | hcdp_dev->base_addr.addrlo; + gsi = hcdp_dev->global_int; +#ifdef SERIAL_DEBUG_HCDP + printk(" type = %s\n", + ((hcdp_dev->type == HCDP_DEV_CONSOLE) + ? "Headless Console" : ((hcdp_dev->type == HCDP_DEV_DEBUG) + ? "Debug port" : "Huh????"))); + printk(" Base address space = %s\n", + ((hcdp_dev->base_addr.space_id == ACPI_MEM_SPACE) + ? "Memory Space" : ((hcdp_dev->base_addr.space_id == ACPI_IO_SPACE) + ? "I/O space" : "PCI space"))); + printk(" Base address = 0x%lx\n", iobase); + printk(" Global System Int = %d\n", gsi); + printk(" Baud rate = %lu\n", (unsigned long) hcdp_dev->baud); + printk(" Bits = %d\n", hcdp_dev->bits); + printk(" Clock rate = %d\n", hcdp_dev->clock_rate); + if (hcdp_dev->base_addr.space_id == ACPI_PCICONF_SPACE) { + printk(" PCI serial port:\n"); + printk(" Bus %d, Device %d, Vendor ID 0x%x, Dev ID 0x%x\n", + hcdp_dev->pci_bus, hcdp_dev->pci_dev, + hcdp_dev->pci_vendor_id, hcdp_dev->pci_dev_id); + } +#endif + /* + * Now fill in a port structure to update the 8250 port table.. + */ + if (hcdp_dev->clock_rate) + port.uartclk = hcdp_dev->clock_rate; + else + port.uartclk = DEFAULT_UARTCLK; + /* + * Check if this is an I/O mapped address or a memory mapped address + */ + if (hcdp_dev->base_addr.space_id == ACPI_MEM_SPACE) { + port.iobase = 0; + port.mapbase = iobase; + port.membase = ioremap(iobase, 64); + port.iotype = SERIAL_IO_MEM; + } else if (hcdp_dev->base_addr.space_id == ACPI_IO_SPACE) { + port.iobase = iobase; + port.mapbase = 0; + port.membase = NULL; + port.iotype = SERIAL_IO_PORT; + } else if (hcdp_dev->base_addr.space_id == ACPI_PCICONF_SPACE) { + printk("WARNING: No support for PCI serial console\n"); + return; + } + +#if 0 + /* + * Check if HCDP defines a port already in rs_table + */ + for (i = 0; i < serial_nr_ports; i++) { + if ((rs_table[i].port == serial_req.port) && + (rs_table[i].iomem_base==serial_req.iomem_base)) + break; + } + if (i == serial_nr_ports) { + /* + * We have reserved a slot for HCDP defined console port at + * HCDP_SERIAL_CONSOLE_PORT in rs_table which is not 0. This means + * using this slot would put the console at a device other than + * ttyS0. Users expect to see the console at ttyS0. Now that we + * have determined HCDP does describe a serial console and it is + * not one of the compiled in ports, let us move the entries in + * rs_table up by a slot towards HCDP_SERIAL_CONSOLE_PORT to make + * room for the HCDP console at ttyS0. We may go through this loop + * more than once if early_serial_setup() fails. Make sure we + * shift the entries in rs_table only once. + */ + if (shift_once) { + int j; + + for (j = HCDP_SERIAL_CONSOLE_PORT; j > 0; j--) + memcpy(rs_table+j, rs_table+j-1, + sizeof(struct serial_state)); + shift_once = 0; + } + serial_req.line = 0; + } else + serial_req.line = i; +#else + port.line = i++; /* XXX fix me */ +#endif + + port.irq = gsi; + port.flags = UPF_SKIP_TEST | UPF_BOOT_AUTOCONF; + if (gsi) + port.flags |= ASYNC_AUTO_IRQ; + + port.fifosize = 0; + port.hub6 = 0; + port.regshift = 0; + if (early_register_port(&port) < 0) { + printk("setup_serial_hcdp(): early_register_port() for HCDP serial " + "console port failed. Will try any additional consoles in HCDP.\n"); + continue; + } else if (hcdp_dev->type == HCDP_DEV_CONSOLE) + break; +#ifdef SERIAL_DEBUG_HCDP + printk("\n"); +#endif + } + +#ifdef SERIAL_DEBUG_HCDP + printk("Leaving setup_serial_hcdp()\n"); +#endif +} + +#ifdef CONFIG_IA64_EARLY_PRINTK_UART +unsigned long +hcdp_early_uart (void) +{ + efi_system_table_t *systab; + efi_config_table_t *config_tables; + hcdp_t *hcdp = 0; + hcdp_dev_t *dev; + int i; + + systab = (efi_system_table_t *) ia64_boot_param->efi_systab; + if (!systab) + return 0; + systab = __va(systab); + + config_tables = (efi_config_table_t *) systab->tables; + if (!config_tables) + return 0; + config_tables = __va(config_tables); + + for (i = 0; i < systab->nr_tables; i++) { + if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) { + hcdp = (hcdp_t *) config_tables[i].table; + break; + } + } + if (!hcdp) + return 0; + hcdp = __va(hcdp); + + for (i = 0, dev = hcdp->hcdp_dev; i < hcdp->num_entries; i++, dev++) { + if (dev->type == HCDP_DEV_CONSOLE) + return (u64) dev->base_addr.addrhi << 32 | dev->base_addr.addrlo; + } + return 0; +} +#endif /* CONFIG_IA64_EARLY_PRINTK_UART */ diff -Nru a/drivers/serial/Config.in b/drivers/serial/Config.in --- a/drivers/serial/Config.in Tue Sep 17 23:47:51 2002 +++ b/drivers/serial/Config.in Tue Sep 17 23:47:51 2002 @@ -11,6 +11,10 @@ dep_tristate '8250/16550 and compatible serial support (EXPERIMENTAL)' CONFIG_SERIAL_8250 $CONFIG_EXPERIMENTAL dep_bool ' Console on 8250/16550 and compatible serial port (EXPERIMENTAL)' CONFIG_SERIAL_8250_CONSOLE $CONFIG_SERIAL_8250 $CONFIG_EXPERIMENTAL dep_tristate ' 8250/16550 PCMCIA device support' CONFIG_SERIAL_8250_CS $CONFIG_PCMCIA $CONFIG_SERIAL_8250 +if [ "$CONFIG_IA64" = "y" ]; then + dep_tristate ' 8250/16550 device discovery support via ACPI SPCR/DBGP tables' CONFIG_SERIAL_8250_ACPI + dep_tristate ' 8250/16550 device discovery support via EFI HCDP table' CONFIG_SERIAL_8250_HCDP +fi dep_mbool 'Extended 8250/16550 serial driver options' CONFIG_SERIAL_8250_EXTENDED $CONFIG_SERIAL_8250 dep_bool ' Support more than 4 serial ports' CONFIG_SERIAL_8250_MANY_PORTS $CONFIG_SERIAL_8250_EXTENDED diff -Nru a/drivers/serial/Makefile b/drivers/serial/Makefile --- a/drivers/serial/Makefile Tue Sep 17 23:47:51 2002 +++ b/drivers/serial/Makefile Tue Sep 17 23:47:51 2002 @@ -9,6 +9,8 @@ serial-8250-y := serial-8250-$(CONFIG_PCI) += 8250_pci.o serial-8250-$(CONFIG_ISAPNP) += 8250_pnp.o +obj-$(CONFIG_SERIAL_8250_ACPI) += 8250_acpi.o +obj-$(CONFIG_SERIAL_8250_HCDP) += 8250_hcdp.o obj-$(CONFIG_SERIAL_CORE) += core.o obj-$(CONFIG_SERIAL_21285) += 21285.o obj-$(CONFIG_SERIAL_8250) += 8250.o $(serial-8250-y) diff -Nru a/drivers/video/radeonfb.c b/drivers/video/radeonfb.c --- a/drivers/video/radeonfb.c Tue Sep 17 23:47:52 2002 +++ b/drivers/video/radeonfb.c Tue Sep 17 23:47:52 2002 @@ -233,8 +233,8 @@ u32 mmio_base_phys; u32 fb_base_phys; - u32 mmio_base; - u32 fb_base; + void *mmio_base; + void *fb_base; struct pci_dev *pdev; @@ -727,8 +727,7 @@ } /* map the regions */ - rinfo->mmio_base = (u32) ioremap (rinfo->mmio_base_phys, - RADEON_REGSIZE); + rinfo->mmio_base = ioremap (rinfo->mmio_base_phys, RADEON_REGSIZE); if (!rinfo->mmio_base) { printk ("radeonfb: cannot map MMIO\n"); release_mem_region (rinfo->mmio_base_phys, @@ -858,8 +857,7 @@ } } - rinfo->fb_base = (u32) ioremap (rinfo->fb_base_phys, - rinfo->video_ram); + rinfo->fb_base = ioremap (rinfo->fb_base_phys, rinfo->video_ram); if (!rinfo->fb_base) { printk ("radeonfb: cannot map FB\n"); iounmap ((void*)rinfo->mmio_base); diff -Nru a/fs/binfmt_misc.c b/fs/binfmt_misc.c --- a/fs/binfmt_misc.c Tue Sep 17 23:47:51 2002 +++ b/fs/binfmt_misc.c Tue Sep 17 23:47:51 2002 @@ -36,6 +36,7 @@ static int enabled = 1; enum {Enabled, Magic}; +#define MISC_FMT_PRESERVE_ARGV0 (1<<31) typedef struct { struct list_head list; @@ -124,7 +125,9 @@ bprm->file = NULL; /* Build args for interpreter */ - remove_arg_zero(bprm); + if (!(fmt->flags & MISC_FMT_PRESERVE_ARGV0)) { + remove_arg_zero(bprm); + } retval = copy_strings_kernel(1, &bprm->filename, bprm); if (retval < 0) goto _ret; bprm->argc++; @@ -289,6 +292,11 @@ *p++ = '\0'; if (!e->interpreter[0]) goto Einval; + + if (*p == 'P') { + p++; + e->flags |= MISC_FMT_PRESERVE_ARGV0; + } if (*p == '\n') p++; diff -Nru a/fs/block_dev.c b/fs/block_dev.c --- a/fs/block_dev.c Tue Sep 17 23:47:52 2002 +++ b/fs/block_dev.c Tue Sep 17 23:47:52 2002 @@ -332,6 +332,29 @@ return bdev; } +long nr_blockdev_pages(void) +{ + long ret = 0; + int i; + + spin_lock(&bdev_lock); + for (i = 0; i < ARRAY_SIZE(bdev_hashtable); i++) { + struct list_head *head = &bdev_hashtable[i]; + struct list_head *lh; + + if (head == NULL) + continue; + list_for_each(lh, head) { + struct block_device *bdev; + + bdev = list_entry(lh, struct block_device, bd_hash); + ret += bdev->bd_inode->i_mapping->nrpages; + } + } + spin_unlock(&bdev_lock); + return ret; +} + static inline void __bd_forget(struct inode *inode) { list_del_init(&inode->i_devices); diff -Nru a/fs/buffer.c b/fs/buffer.c --- a/fs/buffer.c Tue Sep 17 23:47:51 2002 +++ b/fs/buffer.c Tue Sep 17 23:47:51 2002 @@ -61,10 +61,8 @@ return; enough++; printk("buffer layer error at %s:%d\n", file, line); -#ifdef CONFIG_X86 printk("Pass this trace through ksymoops for reporting\n"); - show_stack(0); -#endif + dump_stack(); } EXPORT_SYMBOL(__buffer_error); diff -Nru a/fs/exec.c b/fs/exec.c --- a/fs/exec.c Tue Sep 17 23:47:51 2002 +++ b/fs/exec.c Tue Sep 17 23:47:51 2002 @@ -394,7 +394,7 @@ mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p; mpnt->vm_end = STACK_TOP; #endif - mpnt->vm_page_prot = PAGE_COPY; + mpnt->vm_page_prot = protection_map[VM_STACK_FLAGS & 0x7]; mpnt->vm_flags = VM_STACK_FLAGS; mpnt->vm_ops = NULL; mpnt->vm_pgoff = 0; diff -Nru a/fs/ext3/balloc.c b/fs/ext3/balloc.c --- a/fs/ext3/balloc.c Tue Sep 17 23:47:51 2002 +++ b/fs/ext3/balloc.c Tue Sep 17 23:47:51 2002 @@ -46,18 +46,18 @@ unsigned long desc; struct ext3_group_desc * gdp; - if (block_group >= sb->u.ext3_sb.s_groups_count) { + if (block_group >= EXT3_SB(sb)->s_groups_count) { ext3_error (sb, "ext3_get_group_desc", "block_group >= groups_count - " "block_group = %d, groups_count = %lu", - block_group, sb->u.ext3_sb.s_groups_count); + block_group, EXT3_SB(sb)->s_groups_count); return NULL; } group_desc = block_group / EXT3_DESC_PER_BLOCK(sb); desc = block_group % EXT3_DESC_PER_BLOCK(sb); - if (!sb->u.ext3_sb.s_group_desc[group_desc]) { + if (!EXT3_SB(sb)->s_group_desc[group_desc]) { ext3_error (sb, "ext3_get_group_desc", "Group descriptor not loaded - " "block_group = %d, group_desc = %lu, desc = %lu", @@ -66,9 +66,9 @@ } gdp = (struct ext3_group_desc *) - sb->u.ext3_sb.s_group_desc[group_desc]->b_data; + EXT3_SB(sb)->s_group_desc[group_desc]->b_data; if (bh) - *bh = sb->u.ext3_sb.s_group_desc[group_desc]; + *bh = EXT3_SB(sb)->s_group_desc[group_desc]; return gdp + desc; } @@ -119,7 +119,7 @@ return; } lock_super (sb); - es = sb->u.ext3_sb.s_es; + es = EXT3_SB(sb)->s_es; if (block < le32_to_cpu(es->s_first_data_block) || (block + count) > le32_to_cpu(es->s_blocks_count)) { ext3_error (sb, "ext3_free_blocks", @@ -155,9 +155,9 @@ if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) || in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) || in_range (block, le32_to_cpu(gdp->bg_inode_table), - sb->u.ext3_sb.s_itb_per_group) || + EXT3_SB(sb)->s_itb_per_group) || in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table), - sb->u.ext3_sb.s_itb_per_group)) + EXT3_SB(sb)->s_itb_per_group)) ext3_error (sb, "ext3_free_blocks", "Freeing blocks in system zones - " "Block = %lu, count = %lu", @@ -183,8 +183,8 @@ if (err) goto error_return; - BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access"); - err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh); + BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access"); + err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); if (err) goto error_return; @@ -253,8 +253,8 @@ if (!err) err = ret; /* And the superblock */ - BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "dirtied superblock"); - ret = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh); + BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "dirtied superblock"); + ret = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); if (!err) err = ret; if (overflow && !err) { @@ -408,12 +408,12 @@ } lock_super(sb); - es = sb->u.ext3_sb.s_es; + es = EXT3_SB(sb)->s_es; if (le32_to_cpu(es->s_free_blocks_count) <= le32_to_cpu(es->s_r_blocks_count) && - ((sb->u.ext3_sb.s_resuid != current->fsuid) && - (sb->u.ext3_sb.s_resgid == 0 || - !in_group_p(sb->u.ext3_sb.s_resgid)) && + ((EXT3_SB(sb)->s_resuid != current->fsuid) && + (EXT3_SB(sb)->s_resgid == 0 || + !in_group_p(EXT3_SB(sb)->s_resgid)) && !capable(CAP_SYS_RESOURCE))) goto out; @@ -464,9 +464,9 @@ * Now search the rest of the groups. We assume that * i and gdp correctly point to the last group visited. */ - for (bit = 0; bit < sb->u.ext3_sb.s_groups_count; bit++) { + for (bit = 0; bit < EXT3_SB(sb)->s_groups_count; bit++) { group_no++; - if (group_no >= sb->u.ext3_sb.s_groups_count) + if (group_no >= EXT3_SB(sb)->s_groups_count) group_no = 0; gdp = ext3_get_group_desc(sb, group_no, &gdp_bh); if (!gdp) { @@ -518,8 +518,8 @@ if (fatal) goto out; - BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access"); - fatal = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh); + BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access"); + fatal = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); if (fatal) goto out; @@ -529,7 +529,7 @@ if (target_block == le32_to_cpu(gdp->bg_block_bitmap) || target_block == le32_to_cpu(gdp->bg_inode_bitmap) || in_range(target_block, le32_to_cpu(gdp->bg_inode_table), - sb->u.ext3_sb.s_itb_per_group)) + EXT3_SB(sb)->s_itb_per_group)) ext3_error(sb, "ext3_new_block", "Allocating block in system zone - " "block = %u", target_block); @@ -594,9 +594,9 @@ if (!fatal) fatal = err; - BUFFER_TRACE(sb->u.ext3_sb.s_sbh, + BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "journal_dirty_metadata for superblock"); - err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh); + err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); if (!fatal) fatal = err; @@ -637,11 +637,11 @@ int i; lock_super(sb); - es = sb->u.ext3_sb.s_es; + es = EXT3_SB(sb)->s_es; desc_count = 0; bitmap_count = 0; gdp = NULL; - for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) { + for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) { gdp = ext3_get_group_desc(sb, i, NULL); if (!gdp) continue; @@ -662,7 +662,7 @@ unlock_super(sb); return bitmap_count; #else - return le32_to_cpu(sb->u.ext3_sb.s_es->s_free_blocks_count); + return le32_to_cpu(EXT3_SB(sb)->s_es->s_free_blocks_count); #endif } @@ -671,7 +671,7 @@ unsigned char * map) { return ext3_test_bit ((block - - le32_to_cpu(sb->u.ext3_sb.s_es->s_first_data_block)) % + le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) % EXT3_BLOCKS_PER_GROUP(sb), map); } @@ -738,11 +738,11 @@ struct ext3_group_desc *gdp; int i; - es = sb->u.ext3_sb.s_es; + es = EXT3_SB(sb)->s_es; desc_count = 0; bitmap_count = 0; gdp = NULL; - for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) { + for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) { gdp = ext3_get_group_desc (sb, i, NULL); if (!gdp) continue; @@ -776,7 +776,7 @@ "Inode bitmap for group %d is marked free", i); - for (j = 0; j < sb->u.ext3_sb.s_itb_per_group; j++) + for (j = 0; j < EXT3_SB(sb)->s_itb_per_group; j++) if (!block_in_use (le32_to_cpu(gdp->bg_inode_table) + j, sb, bitmap_bh->b_data)) ext3_error (sb, "ext3_check_blocks_bitmap", diff -Nru a/fs/ext3/dir.c b/fs/ext3/dir.c --- a/fs/ext3/dir.c Tue Sep 17 23:47:51 2002 +++ b/fs/ext3/dir.c Tue Sep 17 23:47:51 2002 @@ -54,7 +54,7 @@ else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize) error_msg = "directory entry across blocks"; else if (le32_to_cpu(de->inode) > - le32_to_cpu(dir->i_sb->u.ext3_sb.s_es->s_inodes_count)) + le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count)) error_msg = "inode out of bounds"; if (error_msg != NULL) diff -Nru a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c --- a/fs/ext3/ialloc.c Tue Sep 17 23:47:51 2002 +++ b/fs/ext3/ialloc.c Tue Sep 17 23:47:51 2002 @@ -127,7 +127,7 @@ clear_inode (inode); lock_super (sb); - es = sb->u.ext3_sb.s_es; + es = EXT3_SB(sb)->s_es; if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { ext3_error (sb, "ext3_free_inode", "reserved or nonexistent inode %lu", ino); @@ -155,8 +155,8 @@ fatal = ext3_journal_get_write_access(handle, bh2); if (fatal) goto error_return; - BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get write access"); - fatal = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh); + BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get write access"); + fatal = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); if (fatal) goto error_return; if (gdp) { @@ -171,9 +171,9 @@ if (!fatal) fatal = err; es->s_free_inodes_count = cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) + 1); - BUFFER_TRACE(sb->u.ext3_sb.s_sbh, + BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "call ext3_journal_dirty_metadata"); - err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh); + err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); if (!fatal) fatal = err; } BUFFER_TRACE(bitmap_bh, "call ext3_journal_dirty_metadata"); @@ -222,16 +222,16 @@ ei = EXT3_I(inode); lock_super (sb); - es = sb->u.ext3_sb.s_es; + es = EXT3_SB(sb)->s_es; repeat: gdp = NULL; i = 0; if (S_ISDIR(mode)) { avefreei = le32_to_cpu(es->s_free_inodes_count) / - sb->u.ext3_sb.s_groups_count; + EXT3_SB(sb)->s_groups_count; if (!gdp) { - for (j = 0; j < sb->u.ext3_sb.s_groups_count; j++) { + for (j = 0; j < EXT3_SB(sb)->s_groups_count; j++) { struct buffer_head *temp_buffer; tmp = ext3_get_group_desc (sb, j, &temp_buffer); if (tmp && @@ -261,10 +261,10 @@ * Use a quadratic hash to find a group with a * free inode */ - for (j = 1; j < sb->u.ext3_sb.s_groups_count; j <<= 1) { + for (j = 1; j < EXT3_SB(sb)->s_groups_count; j <<= 1) { i += j; - if (i >= sb->u.ext3_sb.s_groups_count) - i -= sb->u.ext3_sb.s_groups_count; + if (i >= EXT3_SB(sb)->s_groups_count) + i -= EXT3_SB(sb)->s_groups_count; tmp = ext3_get_group_desc (sb, i, &bh2); if (tmp && le16_to_cpu(tmp->bg_free_inodes_count)) { @@ -278,8 +278,8 @@ * That failed: try linear search for a free inode */ i = EXT3_I(dir)->i_block_group + 1; - for (j = 2; j < sb->u.ext3_sb.s_groups_count; j++) { - if (++i >= sb->u.ext3_sb.s_groups_count) + for (j = 2; j < EXT3_SB(sb)->s_groups_count; j++) { + if (++i >= EXT3_SB(sb)->s_groups_count) i = 0; tmp = ext3_get_group_desc (sb, i, &bh2); if (tmp && @@ -357,13 +357,13 @@ err = ext3_journal_dirty_metadata(handle, bh2); if (err) goto fail; - BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access"); - err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh); + BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access"); + err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); if (err) goto fail; es->s_free_inodes_count = cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) - 1); - BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "call ext3_journal_dirty_metadata"); - err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh); + BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "call ext3_journal_dirty_metadata"); + err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); sb->s_dirt = 1; if (err) goto fail; @@ -417,7 +417,7 @@ if (IS_DIRSYNC(inode)) handle->h_sync = 1; insert_inode_hash(inode); - inode->i_generation = sb->u.ext3_sb.s_next_generation++; + inode->i_generation = EXT3_SB(sb)->s_next_generation++; ei->i_state = EXT3_STATE_NEW; err = ext3_mark_inode_dirty(handle, inode); @@ -512,11 +512,11 @@ int i; lock_super (sb); - es = sb->u.ext3_sb.s_es; + es = EXT3_SB(sb)->s_es; desc_count = 0; bitmap_count = 0; gdp = NULL; - for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) { + for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) { gdp = ext3_get_group_desc (sb, i, NULL); if (!gdp) continue; @@ -537,7 +537,7 @@ unlock_super(sb); return desc_count; #else - return le32_to_cpu(sb->u.ext3_sb.s_es->s_free_inodes_count); + return le32_to_cpu(EXT3_SB(sb)->s_es->s_free_inodes_count); #endif } @@ -551,11 +551,11 @@ struct ext3_group_desc * gdp; int i; - es = sb->u.ext3_sb.s_es; + es = EXT3_SB(sb)->s_es; desc_count = 0; bitmap_count = 0; gdp = NULL; - for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) { + for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) { gdp = ext3_get_group_desc (sb, i, NULL); if (!gdp) continue; diff -Nru a/fs/ext3/inode.c b/fs/ext3/inode.c --- a/fs/ext3/inode.c Tue Sep 17 23:47:51 2002 +++ b/fs/ext3/inode.c Tue Sep 17 23:47:51 2002 @@ -471,7 +471,7 @@ * the same cylinder group then. */ return (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + - le32_to_cpu(inode->i_sb->u.ext3_sb.s_es->s_first_data_block); + le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); } /** @@ -2141,20 +2141,20 @@ inode->i_ino != EXT3_JOURNAL_INO && inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) || inode->i_ino > le32_to_cpu( - inode->i_sb->u.ext3_sb.s_es->s_inodes_count)) { + EXT3_SB(inode->i_sb)->s_es->s_inodes_count)) { ext3_error (inode->i_sb, "ext3_get_inode_loc", "bad inode number: %lu", inode->i_ino); goto bad_inode; } block_group = (inode->i_ino - 1) / EXT3_INODES_PER_GROUP(inode->i_sb); - if (block_group >= inode->i_sb->u.ext3_sb.s_groups_count) { + if (block_group >= EXT3_SB(inode->i_sb)->s_groups_count) { ext3_error (inode->i_sb, "ext3_get_inode_loc", "group >= groups count"); goto bad_inode; } group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(inode->i_sb); desc = block_group & (EXT3_DESC_PER_BLOCK(inode->i_sb) - 1); - bh = inode->i_sb->u.ext3_sb.s_group_desc[group_desc]; + bh = EXT3_SB(inode->i_sb)->s_group_desc[group_desc]; if (!bh) { ext3_error (inode->i_sb, "ext3_get_inode_loc", "Descriptor not loaded"); @@ -2224,7 +2224,7 @@ */ if (inode->i_nlink == 0) { if (inode->i_mode == 0 || - !(inode->i_sb->u.ext3_sb.s_mount_state & EXT3_ORPHAN_FS)) { + !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) { /* this inode is deleted */ brelse (bh); goto bad_inode; @@ -2394,7 +2394,7 @@ * created, add a flag to the superblock. */ err = ext3_journal_get_write_access(handle, - sb->u.ext3_sb.s_sbh); + EXT3_SB(sb)->s_sbh); if (err) goto out_brelse; ext3_update_dynamic_rev(sb); @@ -2403,7 +2403,7 @@ sb->s_dirt = 1; handle->h_sync = 1; err = ext3_journal_dirty_metadata(handle, - sb->u.ext3_sb.s_sbh); + EXT3_SB(sb)->s_sbh); } } } diff -Nru a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c --- a/fs/ext3/ioctl.c Tue Sep 17 23:47:51 2002 +++ b/fs/ext3/ioctl.c Tue Sep 17 23:47:51 2002 @@ -159,12 +159,12 @@ int ret = 0; set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&sb->u.ext3_sb.ro_wait_queue, &wait); - if (timer_pending(&sb->u.ext3_sb.turn_ro_timer)) { + add_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait); + if (timer_pending(&EXT3_SB(sb)->turn_ro_timer)) { schedule(); ret = 1; } - remove_wait_queue(&sb->u.ext3_sb.ro_wait_queue, &wait); + remove_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait); return ret; } #endif diff -Nru a/fs/ext3/namei.c b/fs/ext3/namei.c --- a/fs/ext3/namei.c Tue Sep 17 23:47:51 2002 +++ b/fs/ext3/namei.c Tue Sep 17 23:47:51 2002 @@ -729,8 +729,8 @@ J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); - BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access"); - err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh); + BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access"); + err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); if (err) goto out_unlock; @@ -741,7 +741,7 @@ /* Insert this inode at the head of the on-disk orphan list... */ NEXT_ORPHAN(inode) = le32_to_cpu(EXT3_SB(sb)->s_es->s_last_orphan); EXT3_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); - err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh); + err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); rc = ext3_mark_iloc_dirty(handle, inode, &iloc); if (!err) err = rc; diff -Nru a/fs/ext3/super.c b/fs/ext3/super.c --- a/fs/ext3/super.c Tue Sep 17 23:47:52 2002 +++ b/fs/ext3/super.c Tue Sep 17 23:47:52 2002 @@ -120,7 +120,7 @@ /* If no overrides were specified on the mount, then fall back * to the default behaviour set in the filesystem's superblock * on disk. */ - switch (le16_to_cpu(sb->u.ext3_sb.s_es->s_errors)) { + switch (le16_to_cpu(EXT3_SB(sb)->s_es->s_errors)) { case EXT3_ERRORS_PANIC: return EXT3_ERRORS_PANIC; case EXT3_ERRORS_RO: @@ -268,9 +268,9 @@ return; printk (KERN_CRIT "Remounting filesystem read-only\n"); - sb->u.ext3_sb.s_mount_state |= EXT3_ERROR_FS; + EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; sb->s_flags |= MS_RDONLY; - sb->u.ext3_sb.s_mount_opt |= EXT3_MOUNT_ABORT; + EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT; journal_abort(EXT3_SB(sb)->s_journal, -EIO); } @@ -439,7 +439,8 @@ ext3_blkdev_remove(sbi); } clear_ro_after(sb); - + sb->u.generic_sbp = NULL; + kfree(sbi); return; } @@ -877,7 +878,7 @@ sb->s_flags &= ~MS_RDONLY; } - if (sb->u.ext3_sb.s_mount_state & EXT3_ERROR_FS) { + if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) { if (es->s_last_orphan) jbd_debug(1, "Errors on filesystem, " "clearing orphan list.\n"); @@ -949,7 +950,7 @@ { struct buffer_head * bh; struct ext3_super_block *es = 0; - struct ext3_sb_info *sbi = EXT3_SB(sb); + struct ext3_sb_info *sbi; unsigned long sb_block = 1; unsigned long logic_sb_block = 1; unsigned long offset = 0; @@ -970,7 +971,11 @@ * This is important for devices that have a hardware * sectorsize that is larger than the default. */ - + sbi = kmalloc(sizeof(*sbi), GFP_KERNEL); + if (!sbi) + return -ENOMEM; + sb->u.generic_sbp = sbi; + memset(sbi, 0, sizeof(*sbi)); sbi->s_mount_opt = 0; sbi->s_resuid = EXT3_DEF_RESUID; sbi->s_resgid = EXT3_DEF_RESGID; @@ -1266,6 +1271,8 @@ ext3_blkdev_remove(sbi); brelse(bh); out_fail: + sb->u.generic_sbp = NULL; + kfree(sbi); return -EINVAL; } @@ -1520,11 +1527,11 @@ int sync) { es->s_wtime = cpu_to_le32(CURRENT_TIME); - BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "marking dirty"); - mark_buffer_dirty(sb->u.ext3_sb.s_sbh); + BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "marking dirty"); + mark_buffer_dirty(EXT3_SB(sb)->s_sbh); if (sync) { - ll_rw_block(WRITE, 1, &sb->u.ext3_sb.s_sbh); - wait_on_buffer(sb->u.ext3_sb.s_sbh); + ll_rw_block(WRITE, 1, &EXT3_SB(sb)->s_sbh); + wait_on_buffer(EXT3_SB(sb)->s_sbh); } } @@ -1575,7 +1582,7 @@ ext3_warning(sb, __FUNCTION__, "Marking fs in need of " "filesystem check."); - sb->u.ext3_sb.s_mount_state |= EXT3_ERROR_FS; + EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; es->s_state |= cpu_to_le16(EXT3_ERROR_FS); ext3_commit_super (sb, es, 1); diff -Nru a/fs/fcntl.c b/fs/fcntl.c --- a/fs/fcntl.c Tue Sep 17 23:47:51 2002 +++ b/fs/fcntl.c Tue Sep 17 23:47:51 2002 @@ -329,6 +329,7 @@ * to fix this will be in libc. */ err = filp->f_owner.pid; + force_successful_syscall_return(); break; case F_SETOWN: err = f_setown(filp, arg, 1); diff -Nru a/fs/proc/array.c b/fs/proc/array.c --- a/fs/proc/array.c Tue Sep 17 23:47:51 2002 +++ b/fs/proc/array.c Tue Sep 17 23:47:51 2002 @@ -487,7 +487,18 @@ while (vma) { pgd_t *pgd = pgd_offset(mm, vma->vm_start); int pages = 0, shared = 0, dirty = 0, total = 0; + if (is_vm_hugetlb_page(vma)) { + int num_pages = ((vma->vm_end - vma->vm_start)/PAGE_SIZE); + resident += num_pages; + if (!(vma->vm_flags & VM_DONTCOPY)) + share += num_pages; + if (vma->vm_flags & VM_WRITE) + dt += num_pages; + drs += num_pages; + vma = vma->vm_next; + continue; + } statm_pgd_range(pgd, vma->vm_start, vma->vm_end, &pages, &shared, &dirty, &total); resident += pages; share += shared; diff -Nru a/fs/proc/base.c b/fs/proc/base.c --- a/fs/proc/base.c Tue Sep 17 23:47:51 2002 +++ b/fs/proc/base.c Tue Sep 17 23:47:51 2002 @@ -508,7 +508,24 @@ } #endif +static loff_t mem_lseek(struct file * file, loff_t offset, int orig) +{ + switch (orig) { + case 0: + file->f_pos = offset; + break; + case 1: + file->f_pos += offset; + break; + default: + return -EINVAL; + } + force_successful_syscall_return(); + return file->f_pos; +} + static struct file_operations proc_mem_operations = { + llseek: mem_lseek, read: mem_read, write: mem_write, open: mem_open, diff -Nru a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c --- a/fs/proc/proc_misc.c Tue Sep 17 23:47:51 2002 +++ b/fs/proc/proc_misc.c Tue Sep 17 23:47:51 2002 @@ -136,16 +136,8 @@ struct sysinfo i; int len, committed; struct page_state ps; - int cpu; unsigned long inactive; unsigned long active; - unsigned long flushes = 0; - unsigned long non_flushes = 0; - - for (cpu = 0; cpu < NR_CPUS; cpu++) { - flushes += mmu_gathers[cpu].flushes; - non_flushes += mmu_gathers[cpu].avoided_flushes; - } get_page_state(&ps); get_zone_counts(&active, &inactive); @@ -165,6 +157,7 @@ "MemTotal: %8lu kB\n" "MemFree: %8lu kB\n" "MemShared: %8lu kB\n" + "Buffers: %8lu kB\n" "Cached: %8lu kB\n" "SwapCached: %8lu kB\n" "Active: %8lu kB\n" @@ -177,15 +170,15 @@ "SwapFree: %8lu kB\n" "Dirty: %8lu kB\n" "Writeback: %8lu kB\n" + "Mapped: %8lu kB\n" "Committed_AS: %8u kB\n" "PageTables: %8lu kB\n" - "ReverseMaps: %8lu\n" - "TLB flushes: %8lu\n" - "non flushes: %8lu\n", + "ReverseMaps: %8lu\n", K(i.totalram), K(i.freeram), K(i.sharedram), - K(ps.nr_pagecache-swapper_space.nrpages), + K(i.bufferram), + K(ps.nr_pagecache-swapper_space.nrpages-i.bufferram), K(swapper_space.nrpages), K(active), K(inactive), @@ -197,13 +190,25 @@ K(i.freeswap), K(ps.nr_dirty), K(ps.nr_writeback), + K(ps.nr_mapped), K(committed), K(ps.nr_page_table_pages), - ps.nr_reverse_maps, - flushes, - non_flushes + ps.nr_reverse_maps ); +#ifdef CONFIG_HUGETLB_PAGE + { + extern unsigned long htlbpagemem, htlbzone_pages; + len += sprintf(page + len, + "HugePages: %8lu\n" + "Available: %8lu\n" + "Size: %8lu kB\n", + htlbzone_pages, + htlbpagemem, + HPAGE_SIZE/1024); + } + +#endif return proc_calc_metrics(page, start, off, count, eof, len); #undef K } diff -Nru a/include/asm-alpha/agp.h b/include/asm-alpha/agp.h --- a/include/asm-alpha/agp.h Tue Sep 17 23:47:51 2002 +++ b/include/asm-alpha/agp.h Tue Sep 17 23:47:51 2002 @@ -8,4 +8,11 @@ #define flush_agp_mappings() #define flush_agp_cache() mb() +/* + * Page-protection value to be used for AGP memory mapped into kernel space. For + * platforms which use coherent AGP DMA, this can be PAGE_KERNEL. For others, it needs to + * be an uncached mapping (such as write-combining). + */ +#define PAGE_AGP PAGE_KERNEL_NOCACHE /* XXX fix me */ + #endif diff -Nru a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h --- a/include/asm-generic/tlb.h Tue Sep 17 23:47:51 2002 +++ b/include/asm-generic/tlb.h Tue Sep 17 23:47:51 2002 @@ -21,7 +21,7 @@ * and page free order so much.. */ #ifdef CONFIG_SMP - #define FREE_PTE_NR 507 + #define FREE_PTE_NR 506 #define tlb_fast_mode(tlb) ((tlb)->nr == ~0U) #else #define FREE_PTE_NR 1 @@ -40,8 +40,6 @@ unsigned int fullmm; /* non-zero means full mm flush */ unsigned long freed; struct page * pages[FREE_PTE_NR]; - unsigned long flushes;/* stats: count avoided flushes */ - unsigned long avoided_flushes; } mmu_gather_t; /* Users of the generic TLB shootdown code must declare this storage space. */ @@ -67,17 +65,10 @@ static inline void tlb_flush_mmu(mmu_gather_t *tlb, unsigned long start, unsigned long end) { - unsigned long nr; - - if (!tlb->need_flush) { - tlb->avoided_flushes++; + if (!tlb->need_flush) return; - } tlb->need_flush = 0; - tlb->flushes++; - tlb_flush(tlb); - nr = tlb->nr; if (!tlb_fast_mode(tlb)) { free_pages_and_swap_cache(tlb->pages, tlb->nr); tlb->nr = 0; diff -Nru a/include/asm-i386/agp.h b/include/asm-i386/agp.h --- a/include/asm-i386/agp.h Tue Sep 17 23:47:51 2002 +++ b/include/asm-i386/agp.h Tue Sep 17 23:47:51 2002 @@ -20,4 +20,11 @@ worth it. Would need a page for it. */ #define flush_agp_cache() asm volatile("wbinvd":::"memory") +/* + * Page-protection value to be used for AGP memory mapped into kernel space. For + * platforms which use coherent AGP DMA, this can be PAGE_KERNEL. For others, it needs to + * be an uncached mapping (such as write-combining). + */ +#define PAGE_AGP PAGE_KERNEL_NOCACHE + #endif diff -Nru a/include/asm-i386/hw_irq.h b/include/asm-i386/hw_irq.h --- a/include/asm-i386/hw_irq.h Tue Sep 17 23:47:52 2002 +++ b/include/asm-i386/hw_irq.h Tue Sep 17 23:47:52 2002 @@ -107,4 +107,6 @@ static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {} #endif +extern irq_desc_t irq_desc [NR_IRQS]; + #endif /* _ASM_HW_IRQ_H */ diff -Nru a/include/asm-i386/page.h b/include/asm-i386/page.h --- a/include/asm-i386/page.h Tue Sep 17 23:47:51 2002 +++ b/include/asm-i386/page.h Tue Sep 17 23:47:51 2002 @@ -44,13 +44,21 @@ typedef struct { unsigned long long pmd; } pmd_t; typedef struct { unsigned long long pgd; } pgd_t; #define pte_val(x) ((x).pte_low | ((unsigned long long)(x).pte_high << 32)) +#define HPAGE_SHIFT 21 #else typedef struct { unsigned long pte_low; } pte_t; typedef struct { unsigned long pmd; } pmd_t; typedef struct { unsigned long pgd; } pgd_t; #define pte_val(x) ((x).pte_low) +#define HPAGE_SHIFT 22 #endif #define PTE_MASK PAGE_MASK + +#ifdef CONFIG_HUGETLB_PAGE +#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) +#define HPAGE_MASK (~(HPAGE_SIZE - 1)) +#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) +#endif typedef struct { unsigned long pgprot; } pgprot_t; diff -Nru a/include/asm-i386/ptrace.h b/include/asm-i386/ptrace.h --- a/include/asm-i386/ptrace.h Tue Sep 17 23:47:51 2002 +++ b/include/asm-i386/ptrace.h Tue Sep 17 23:47:51 2002 @@ -58,6 +58,7 @@ #define user_mode(regs) ((VM_MASK & (regs)->eflags) || (3 & (regs)->xcs)) #define instruction_pointer(regs) ((regs)->eip) extern void show_regs(struct pt_regs *); +#define force_successful_syscall_return() do { } while (0) #endif #endif diff -Nru a/include/asm-ia64/hardirq.h b/include/asm-ia64/hardirq.h --- a/include/asm-ia64/hardirq.h Tue Sep 17 23:47:51 2002 +++ b/include/asm-ia64/hardirq.h Tue Sep 17 23:47:51 2002 @@ -83,6 +83,7 @@ #define hardirq_trylock() (!in_interrupt()) #define hardirq_endlock() do { } while (0) +#define in_atomic() (preempt_count() != 0) #define irq_enter() (preempt_count() += HARDIRQ_OFFSET) #if CONFIG_PREEMPT diff -Nru a/include/asm-ia64/ide.h b/include/asm-ia64/ide.h --- a/include/asm-ia64/ide.h Tue Sep 17 23:47:51 2002 +++ b/include/asm-ia64/ide.h Tue Sep 17 23:47:51 2002 @@ -90,20 +90,6 @@ #endif } -#define ide_request_irq(irq,hand,flg,dev,id) request_irq((irq),(hand),(flg),(dev),(id)) -#define ide_free_irq(irq,dev_id) free_irq((irq), (dev_id)) -#define ide_check_region(from,extent) check_region((from), (extent)) -#define ide_request_region(from,extent,name) request_region((from), (extent), (name)) -#define ide_release_region(from,extent) release_region((from), (extent)) - -/* - * The following are not needed for the non-m68k ports - */ -#define ide_ack_intr(hwif) (1) -#define ide_fix_driveid(id) do {} while (0) -#define ide_release_lock(lock) do {} while (0) -#define ide_get_lock(lock, hdlr, data) do {} while (0) - #endif /* __KERNEL__ */ #endif /* __ASM_IA64_IDE_H */ diff -Nru a/include/asm-ia64/mmu_context.h b/include/asm-ia64/mmu_context.h --- a/include/asm-ia64/mmu_context.h Tue Sep 17 23:47:51 2002 +++ b/include/asm-ia64/mmu_context.h Tue Sep 17 23:47:51 2002 @@ -110,6 +110,10 @@ rr2 = rr0 + 2*rid_incr; rr3 = rr0 + 3*rid_incr; rr4 = rr0 + 4*rid_incr; +#ifdef CONFIG_HUGETLB_PAGE + rr4 = (rr4 & (~(0xfcUL))) | (HPAGE_SHIFT << 2); +#endif + ia64_set_rr(0x0000000000000000, rr0); ia64_set_rr(0x2000000000000000, rr1); ia64_set_rr(0x4000000000000000, rr2); diff -Nru a/include/asm-ia64/page.h b/include/asm-ia64/page.h --- a/include/asm-ia64/page.h Tue Sep 17 23:47:51 2002 +++ b/include/asm-ia64/page.h Tue Sep 17 23:47:51 2002 @@ -30,6 +30,32 @@ #define PAGE_MASK (~(PAGE_SIZE - 1)) #define PAGE_ALIGN(addr) (((addr) + PAGE_SIZE - 1) & PAGE_MASK) +#ifdef CONFIG_HUGETLB_PAGE + +# if defined(CONFIG_HUGETLB_PAGE_SIZE_4GB) +# define HPAGE_SHIFT 32 +# elif defined(CONFIG_HUGETLB_PAGE_SIZE_256MB) +# define HPAGE_SHIFT 28 +# elif defined(CONFIG_HUGETLB_PAGE_SIZE_64MB) +# define HPAGE_SHIFT 26 +# elif defined(CONFIG_HUGETLB_PAGE_SIZE_16MB) +# define HPAGE_SHIFT 24 +# elif defined(CONFIG_HUGETLB_PAGE_SIZE_4MB) +# define HPAGE_SHIFT 22 +# elif defined(CONFIG_HUGETLB_PAGE_SIZE_1MB) +# define HPAGE_SHIFT 20 +# elif defined(CONFIG_HUGETLB_PAGE_SIZE_256KB) +# define HPAGE_SHIFT 18 +# else +# error Unsupported IA-64 HugeTLB Page Size! +# endif + +# define REGION_HPAGE (4UL) /* note: this is hardcoded in mmu_context.h:reload_context()!*/ +# define REGION_SHIFT 61 +# define HPAGE_SIZE (__IA64_UL_CONST(1) << HPAGE_SHIFT) +# define HPAGE_MASK (~(HPAGE_SIZE - 1)) +#endif /* CONFIG_HUGETLB_PAGE */ + #ifdef __ASSEMBLY__ # define __pa(x) ((x) - PAGE_OFFSET) # define __va(x) ((x) + PAGE_OFFSET) @@ -86,6 +112,12 @@ #define REGION_SIZE REGION_NUMBER(1) #define REGION_KERNEL 7 + +#ifdef CONFIG_HUGETLB_PAGE +# define htlbpage_to_page(x) ((REGION_NUMBER(x) << 61) \ + | (REGION_OFFSET(x) >> (HPAGE_SHIFT-PAGE_SHIFT))) +# define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) +#endif #if (__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1) # define ia64_abort() __builtin_trap() diff -Nru a/include/asm-ia64/pal.h b/include/asm-ia64/pal.h --- a/include/asm-ia64/pal.h Tue Sep 17 23:47:51 2002 +++ b/include/asm-ia64/pal.h Tue Sep 17 23:47:51 2002 @@ -78,6 +78,7 @@ #ifndef __ASSEMBLY__ #include +#include /* * Data types needed to pass information into PAL procedures and @@ -649,12 +650,43 @@ extern struct ia64_pal_retval ia64_pal_call_stacked (u64, u64, u64, u64); extern struct ia64_pal_retval ia64_pal_call_phys_static (u64, u64, u64, u64); extern struct ia64_pal_retval ia64_pal_call_phys_stacked (u64, u64, u64, u64); +extern void ia64_save_scratch_fpregs (struct ia64_fpreg *); +extern void ia64_load_scratch_fpregs (struct ia64_fpreg *); -#define PAL_CALL(iprv,a0,a1,a2,a3) iprv = ia64_pal_call_static(a0, a1, a2, a3, 0) -#define PAL_CALL_IC_OFF(iprv,a0,a1,a2,a3) iprv = ia64_pal_call_static(a0, a1, a2, a3, 1) -#define PAL_CALL_STK(iprv,a0,a1,a2,a3) iprv = ia64_pal_call_stacked(a0, a1, a2, a3) -#define PAL_CALL_PHYS(iprv,a0,a1,a2,a3) iprv = ia64_pal_call_phys_static(a0, a1, a2, a3) -#define PAL_CALL_PHYS_STK(iprv,a0,a1,a2,a3) iprv = ia64_pal_call_phys_stacked(a0, a1, a2, a3) +#define PAL_CALL(iprv,a0,a1,a2,a3) do { \ + struct ia64_fpreg fr[6]; \ + ia64_save_scratch_fpregs(fr); \ + iprv = ia64_pal_call_static(a0, a1, a2, a3, 0); \ + ia64_load_scratch_fpregs(fr); \ +} while (0) + +#define PAL_CALL_IC_OFF(iprv,a0,a1,a2,a3) do { \ + struct ia64_fpreg fr[6]; \ + ia64_save_scratch_fpregs(fr); \ + iprv = ia64_pal_call_static(a0, a1, a2, a3, 1); \ + ia64_load_scratch_fpregs(fr); \ +} while (0) + +#define PAL_CALL_STK(iprv,a0,a1,a2,a3) do { \ + struct ia64_fpreg fr[6]; \ + ia64_save_scratch_fpregs(fr); \ + iprv = ia64_pal_call_stacked(a0, a1, a2, a3); \ + ia64_load_scratch_fpregs(fr); \ +} while (0) + +#define PAL_CALL_PHYS(iprv,a0,a1,a2,a3) do { \ + struct ia64_fpreg fr[6]; \ + ia64_save_scratch_fpregs(fr); \ + iprv = ia64_pal_call_phys_static(a0, a1, a2, a3); \ + ia64_load_scratch_fpregs(fr); \ +} while (0) + +#define PAL_CALL_PHYS_STK(iprv,a0,a1,a2,a3) do { \ + struct ia64_fpreg fr[6]; \ + ia64_save_scratch_fpregs(fr); \ + iprv = ia64_pal_call_phys_stacked(a0, a1, a2, a3); \ + ia64_load_scratch_fpregs(fr); \ +} while (0) typedef int (*ia64_pal_handler) (u64, ...); extern ia64_pal_handler ia64_pal; diff -Nru a/include/asm-ia64/perfmon.h b/include/asm-ia64/perfmon.h --- a/include/asm-ia64/perfmon.h Tue Sep 17 23:47:51 2002 +++ b/include/asm-ia64/perfmon.h Tue Sep 17 23:47:51 2002 @@ -45,6 +45,7 @@ * PMC flags */ #define PFM_REGFL_OVFL_NOTIFY 0x1 /* send notification on overflow */ +#define PFM_REGFL_RANDOM 0x2 /* randomize sampling periods */ /* * PMD/PMC/IBR/DBR return flags (ignored on input) @@ -86,8 +87,10 @@ unsigned long reg_short_reset;/* reset after counter overflow (small) */ unsigned long reg_reset_pmds[4]; /* which other counters to reset on overflow */ + unsigned long reg_random_seed; /* seed value when randomization is used */ + unsigned long reg_random_mask; /* bitmask used to limit random value */ - unsigned long reserved[16]; /* for future use */ + unsigned long reserved[14]; /* for future use */ } pfarg_reg_t; typedef struct { @@ -132,28 +135,28 @@ #define PFM_VERSION_MINOR(x) ((x) & 0xffff) /* - * Entry header in the sampling buffer. - * The header is directly followed with the PMDS saved in increasing index - * order: PMD4, PMD5, .... How many PMDs are present is determined by the - * user program during context creation. + * Entry header in the sampling buffer. The header is directly followed + * with the PMDs saved in increasing index order: PMD4, PMD5, .... How + * many PMDs are present is determined by the user program during + * context creation. * - * XXX: in this version of the entry, only up to 64 registers can be recorded - * This should be enough for quite some time. Always check sampling format - * before parsing entries! + * XXX: in this version of the entry, only up to 64 registers can be + * recorded. This should be enough for quite some time. Always check + * sampling format before parsing entries! * - * Inn the case where multiple counters have overflowed at the same time, the - * rate field indicate the initial value of the first PMD, based on the index. - * For instance, if PMD2 and PMD5 have ovewrflowed for this entry, the rate field - * will show the initial value of PMD2. + * In the case where multiple counters overflow at the same time, the + * last_reset_value member indicates the initial value of the PMD with + * the smallest index. For instance, if PMD2 and PMD5 have overflowed, + * the last_reset_value member contains the initial value of PMD2. */ typedef struct { - int pid; /* identification of process */ - int cpu; /* which cpu was used */ - unsigned long rate; /* initial value of overflowed counter */ - unsigned long stamp; /* timestamp */ - unsigned long ip; /* where did the overflow interrupt happened */ - unsigned long regs; /* bitmask of which registers overflowed */ - unsigned long period; /* sampling period used by overflowed counter (smallest pmd index) */ + int pid; /* identification of process */ + int cpu; /* which cpu was used */ + unsigned long last_reset_value; /* initial value of counter that overflowed */ + unsigned long stamp; /* timestamp */ + unsigned long ip; /* where did the overflow interrupt happened */ + unsigned long regs; /* bitmask of which registers overflowed */ + unsigned long period; /* unused */ } perfmon_smpl_entry_t; extern int perfmonctl(pid_t pid, int cmd, void *arg, int narg); diff -Nru a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h --- a/include/asm-ia64/processor.h Tue Sep 17 23:47:51 2002 +++ b/include/asm-ia64/processor.h Tue Sep 17 23:47:51 2002 @@ -236,7 +236,15 @@ __u64 ssd; /* IA32 stack selector descriptor */ __u64 old_k1; /* old value of ar.k1 */ __u64 old_iob; /* old IOBase value */ -# define INIT_THREAD_IA32 0, 0, 0x17800000037fULL, 0, 0, 0, 0, 0, 0, +# define INIT_THREAD_IA32 .eflag = 0, \ + .fsr = 0, \ + .fcr = 0x17800000037fULL, \ + .fir = 0, \ + .fdr = 0, \ + .csd = 0, \ + .ssd = 0, \ + .old_k1 = 0, \ + .old_iob = 0, #else # define INIT_THREAD_IA32 #endif /* CONFIG_IA32_SUPPORT */ @@ -248,7 +256,13 @@ atomic_t pfm_notifiers_check; /* when >0, will cleanup ctx_notify_task in tasklist */ atomic_t pfm_owners_check; /* when >0, will cleanup ctx_owner in tasklist */ void *pfm_smpl_buf_list; /* list of sampling buffers to vfree */ -# define INIT_THREAD_PM {0, }, {0, }, 0, NULL, {0}, {0}, NULL, +# define INIT_THREAD_PM .pmc = {0, }, \ + .pmd = {0, }, \ + .pfm_ovfl_block_reset = 0, \ + .pfm_context = NULL, \ + .pfm_notifiers_check = { 0 }, \ + .pfm_owners_check = { 0 }, \ + .pfm_smpl_buf_list = NULL, #else # define INIT_THREAD_PM #endif @@ -258,16 +272,17 @@ }; #define INIT_THREAD { \ - flags: 0, \ - ksp: 0, \ - map_base: DEFAULT_MAP_BASE, \ - task_size: DEFAULT_TASK_SIZE, \ - siginfo: 0, \ + .flags = 0, \ + .ksp = 0, \ + .map_base = DEFAULT_MAP_BASE, \ + .task_size = DEFAULT_TASK_SIZE, \ + .siginfo = 0, \ + .last_fph_cpu = 0, \ INIT_THREAD_IA32 \ INIT_THREAD_PM \ - dbr: {0, }, \ - ibr: {0, }, \ - fph: {{{{0}}}, } \ + .dbr = {0, }, \ + .ibr = {0, }, \ + .fph = {{{{0}}}, } \ } #define start_thread(regs,new_ip,new_sp) do { \ diff -Nru a/include/asm-ia64/rse.h b/include/asm-ia64/rse.h --- a/include/asm-ia64/rse.h Tue Sep 17 23:47:52 2002 +++ b/include/asm-ia64/rse.h Tue Sep 17 23:47:52 2002 @@ -37,9 +37,9 @@ } /* - * Calcuate the number of registers in the dirty partition starting at - * BSPSTORE with a size of DIRTY bytes. This isn't simply DIRTY - * divided by eight because the 64th slot is used to store ar.rnat. + * Calculate the number of registers in the dirty partition starting at BSPSTORE and + * ending at BSP. This isn't simply (BSP-BSPSTORE)/8 because every 64th slot stores + * ar.rnat. */ static __inline__ unsigned long ia64_rse_num_regs (unsigned long *bspstore, unsigned long *bsp) diff -Nru a/include/asm-ia64/sal.h b/include/asm-ia64/sal.h --- a/include/asm-ia64/sal.h Tue Sep 17 23:47:52 2002 +++ b/include/asm-ia64/sal.h Tue Sep 17 23:47:52 2002 @@ -38,9 +38,12 @@ # define SAL_CALL(result,args...) do { \ unsigned long flags; \ + struct ia64_fpreg fr[6]; \ + ia64_save_scratch_fpregs(fr); \ spin_lock_irqsave(&sal_lock, flags); \ __SAL_CALL(result,args); \ spin_unlock_irqrestore(&sal_lock, flags); \ + ia64_load_scratch_fpregs(fr); \ } while (0) #define SAL_SET_VECTORS 0x01000000 diff -Nru a/include/asm-ia64/siginfo.h b/include/asm-ia64/siginfo.h --- a/include/asm-ia64/siginfo.h Tue Sep 17 23:47:51 2002 +++ b/include/asm-ia64/siginfo.h Tue Sep 17 23:47:51 2002 @@ -66,6 +66,7 @@ long _band; /* POLL_IN, POLL_OUT, POLL_MSG (XPG requires a "long") */ int _fd; } _sigpoll; + /* SIGPROF */ struct { pid_t _pid; /* which child */ diff -Nru a/include/asm-ia64/signal.h b/include/asm-ia64/signal.h --- a/include/asm-ia64/signal.h Tue Sep 17 23:47:51 2002 +++ b/include/asm-ia64/signal.h Tue Sep 17 23:47:51 2002 @@ -166,7 +166,6 @@ # include -#define HAVE_ARCH_GET_SIGNAL_TO_DELIVER #define HAVE_ARCH_SYS_PAUSE #endif /* __KERNEL__ */ diff -Nru a/include/asm-ia64/spinlock.h b/include/asm-ia64/spinlock.h --- a/include/asm-ia64/spinlock.h Tue Sep 17 23:47:51 2002 +++ b/include/asm-ia64/spinlock.h Tue Sep 17 23:47:51 2002 @@ -108,7 +108,8 @@ } rwlock_t; #define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0 } -#define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0) +#define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0) +#define rwlock_is_locked(x) (*(volatile int *) (x) != 0) #define _raw_read_lock(rw) \ do { \ diff -Nru a/include/asm-ia64/suspend.h b/include/asm-ia64/suspend.h --- a/include/asm-ia64/suspend.h Tue Sep 17 23:47:51 2002 +++ b/include/asm-ia64/suspend.h Tue Sep 17 23:47:51 2002 @@ -0,0 +1 @@ +/* dummy (must be non-empty to prevent prejudicial removal...) */ diff -Nru a/include/asm-ia64/system.h b/include/asm-ia64/system.h --- a/include/asm-ia64/system.h Tue Sep 17 23:47:51 2002 +++ b/include/asm-ia64/system.h Tue Sep 17 23:47:51 2002 @@ -148,7 +148,7 @@ "cmp.ne p6,p7=%1,r0;;" \ "(p6) ssm psr.i;" \ "(p7) rsm psr.i;;" \ - "srlz.d" \ + "(p6) srlz.d" \ : "=&r" (old_psr) : "r"((psr) & IA64_PSR_I) \ : "p6", "p7", "memory"); \ if ((old_psr & IA64_PSR_I) && !(psr & IA64_PSR_I)) { \ @@ -174,6 +174,13 @@ #define local_irq_enable() __asm__ __volatile__ (";; ssm psr.i;; srlz.d" ::: "memory") #define local_save_flags(flags) __asm__ __volatile__ ("mov %0=psr" : "=r" (flags) :: "memory") +#define irqs_disabled() \ +({ \ + unsigned long flags; \ + local_save_flags(flags); \ + (flags & IA64_PSR_I) == 0; \ +}) + /* * Force an unresolved reference if someone tries to use * ia64_fetch_and_add() with a bad value. @@ -367,14 +374,14 @@ extern void ia64_save_extra (struct task_struct *task); extern void ia64_load_extra (struct task_struct *task); -#if defined(CONFIG_SMP) && defined(CONFIG_PERFMON) +#ifdef CONFIG_PERFMON DECLARE_PER_CPU(int, pfm_syst_wide); # define PERFMON_IS_SYSWIDE() (get_cpu_var(pfm_syst_wide) != 0) #else # define PERFMON_IS_SYSWIDE() (0) #endif -#define __switch_to(prev,next,last) do { \ +#define __switch_to(prev,next,last) do { \ if (((prev)->thread.flags & (IA64_THREAD_DBG_VALID|IA64_THREAD_PM_VALID)) \ || IS_IA32_PROCESS(ia64_task_regs(prev)) || PERFMON_IS_SYSWIDE()) \ ia64_save_extra(prev); \ diff -Nru a/include/asm-ia64/timex.h b/include/asm-ia64/timex.h --- a/include/asm-ia64/timex.h Tue Sep 17 23:47:51 2002 +++ b/include/asm-ia64/timex.h Tue Sep 17 23:47:51 2002 @@ -3,14 +3,18 @@ /* * Copyright (C) 1998-2001 Hewlett-Packard Co - * Copyright (C) 1998-2001 David Mosberger-Tang + * David Mosberger-Tang */ /* * 2001/01/18 davidm Removed CLOCK_TICK_RATE. It makes no sense on IA-64. * Also removed cacheflush_time as it's entirely unused. */ +#include + typedef unsigned long cycles_t; + +#define CLOCK_TICK_RATE 100000000 static inline cycles_t get_cycles (void) diff -Nru a/include/asm-ia64/tlb.h b/include/asm-ia64/tlb.h --- a/include/asm-ia64/tlb.h Tue Sep 17 23:47:51 2002 +++ b/include/asm-ia64/tlb.h Tue Sep 17 23:47:51 2002 @@ -199,4 +199,8 @@ #define tlb_start_vma(tlb, vma) do { } while (0) #define tlb_end_vma(tlb, vma) do { } while (0) +#define tlb_remove_tlb_entry(tlb, ptep, addr) __tlb_remove_tlb_entry(tlb, ptep, addr) +#define pte_free_tlb(tlb, ptep) __pte_free_tlb(tlb, ptep) +#define pmd_free_tlb(tlb, ptep) __pmd_free_tlb(tlb, ptep) + #endif /* _ASM_IA64_TLB_H */ diff -Nru a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h --- a/include/asm-ia64/unistd.h Tue Sep 17 23:47:51 2002 +++ b/include/asm-ia64/unistd.h Tue Sep 17 23:47:51 2002 @@ -225,7 +225,7 @@ #define __NR_security 1233 #define __NR_alloc_hugepages 1234 #define __NR_free_hugepages 1235 -/* 1236 currently unused */ +#define __NR_exit_group 1236 /* 1237 currently unused */ #define __NR_io_setup 1238 #define __NR_io_destroy 1239 diff -Nru a/include/asm-sparc64/agp.h b/include/asm-sparc64/agp.h --- a/include/asm-sparc64/agp.h Tue Sep 17 23:47:51 2002 +++ b/include/asm-sparc64/agp.h Tue Sep 17 23:47:51 2002 @@ -8,4 +8,11 @@ #define flush_agp_mappings() #define flush_agp_cache() mb() +/* + * Page-protection value to be used for AGP memory mapped into kernel space. For + * platforms which use coherent AGP DMA, this can be PAGE_KERNEL. For others, it needs to + * be an uncached mapping (such as write-combining). + */ +#define PAGE_AGP PAGE_KERNEL_NOCACHE + #endif diff -Nru a/include/asm-x86_64/agp.h b/include/asm-x86_64/agp.h --- a/include/asm-x86_64/agp.h Tue Sep 17 23:47:51 2002 +++ b/include/asm-x86_64/agp.h Tue Sep 17 23:47:51 2002 @@ -20,4 +20,11 @@ worth it. Would need a page for it. */ #define flush_agp_cache() asm volatile("wbinvd":::"memory") +/* + * Page-protection value to be used for AGP memory mapped into kernel space. For + * platforms which use coherent AGP DMA, this can be PAGE_KERNEL. For others, it needs to + * be an uncached mapping (such as write-combining). + */ +#define PAGE_AGP PAGE_KERNEL_NOCACHE + #endif diff -Nru a/include/linux/acpi_serial.h b/include/linux/acpi_serial.h --- a/include/linux/acpi_serial.h Tue Sep 17 23:47:51 2002 +++ b/include/linux/acpi_serial.h Tue Sep 17 23:47:51 2002 @@ -9,6 +9,8 @@ * */ +#include + extern void setup_serial_acpi(void *); #define ACPI_SIG_LEN 4 diff -Nru a/include/linux/blkdev.h b/include/linux/blkdev.h --- a/include/linux/blkdev.h Tue Sep 17 23:47:51 2002 +++ b/include/linux/blkdev.h Tue Sep 17 23:47:51 2002 @@ -327,7 +327,7 @@ extern int blk_rq_map_sg(request_queue_t *, struct request *, struct scatterlist *); extern void blk_dump_rq_flags(struct request *, char *); extern void generic_unplug_device(void *); - +extern long nr_blockdev_pages(void); /* * tag stuff diff -Nru a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h --- a/include/linux/ext3_fs.h Tue Sep 17 23:47:51 2002 +++ b/include/linux/ext3_fs.h Tue Sep 17 23:47:51 2002 @@ -97,9 +97,9 @@ # define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10) #endif #ifdef __KERNEL__ -#define EXT3_ADDR_PER_BLOCK_BITS(s) ((s)->u.ext3_sb.s_addr_per_block_bits) -#define EXT3_INODE_SIZE(s) ((s)->u.ext3_sb.s_inode_size) -#define EXT3_FIRST_INO(s) ((s)->u.ext3_sb.s_first_ino) +#define EXT3_ADDR_PER_BLOCK_BITS(s) (EXT3_SB(s)->s_addr_per_block_bits) +#define EXT3_INODE_SIZE(s) (EXT3_SB(s)->s_inode_size) +#define EXT3_FIRST_INO(s) (EXT3_SB(s)->s_first_ino) #else #define EXT3_INODE_SIZE(s) (((s)->s_rev_level == EXT3_GOOD_OLD_REV) ? \ EXT3_GOOD_OLD_INODE_SIZE : \ @@ -116,8 +116,8 @@ #define EXT3_MAX_FRAG_SIZE 4096 #define EXT3_MIN_FRAG_LOG_SIZE 10 #ifdef __KERNEL__ -# define EXT3_FRAG_SIZE(s) ((s)->u.ext3_sb.s_frag_size) -# define EXT3_FRAGS_PER_BLOCK(s) ((s)->u.ext3_sb.s_frags_per_block) +# define EXT3_FRAG_SIZE(s) (EXT3_SB(s)->s_frag_size) +# define EXT3_FRAGS_PER_BLOCK(s) (EXT3_SB(s)->s_frags_per_block) #else # define EXT3_FRAG_SIZE(s) (EXT3_MIN_FRAG_SIZE << (s)->s_log_frag_size) # define EXT3_FRAGS_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / EXT3_FRAG_SIZE(s)) @@ -164,10 +164,10 @@ * Macro-instructions used to manage group descriptors */ #ifdef __KERNEL__ -# define EXT3_BLOCKS_PER_GROUP(s) ((s)->u.ext3_sb.s_blocks_per_group) -# define EXT3_DESC_PER_BLOCK(s) ((s)->u.ext3_sb.s_desc_per_block) -# define EXT3_INODES_PER_GROUP(s) ((s)->u.ext3_sb.s_inodes_per_group) -# define EXT3_DESC_PER_BLOCK_BITS(s) ((s)->u.ext3_sb.s_desc_per_block_bits) +# define EXT3_BLOCKS_PER_GROUP(s) (EXT3_SB(s)->s_blocks_per_group) +# define EXT3_DESC_PER_BLOCK(s) (EXT3_SB(s)->s_desc_per_block) +# define EXT3_INODES_PER_GROUP(s) (EXT3_SB(s)->s_inodes_per_group) +# define EXT3_DESC_PER_BLOCK_BITS(s) (EXT3_SB(s)->s_desc_per_block_bits) #else # define EXT3_BLOCKS_PER_GROUP(s) ((s)->s_blocks_per_group) # define EXT3_DESC_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (struct ext3_group_desc)) @@ -346,7 +346,7 @@ #ifndef _LINUX_EXT2_FS_H #define clear_opt(o, opt) o &= ~EXT3_MOUNT_##opt #define set_opt(o, opt) o |= EXT3_MOUNT_##opt -#define test_opt(sb, opt) ((sb)->u.ext3_sb.s_mount_opt & \ +#define test_opt(sb, opt) (EXT3_SB(sb)->s_mount_opt & \ EXT3_MOUNT_##opt) #else #define EXT2_MOUNT_NOLOAD EXT3_MOUNT_NOLOAD @@ -444,7 +444,10 @@ }; #ifdef __KERNEL__ -#define EXT3_SB(sb) (&((sb)->u.ext3_sb)) +static inline struct ext3_sb_info * EXT3_SB(struct super_block *sb) +{ + return sb->u.generic_sbp; +} static inline struct ext3_inode_info *EXT3_I(struct inode *inode) { return container_of(inode, struct ext3_inode_info, vfs_inode); diff -Nru a/include/linux/hcdp_serial.h b/include/linux/hcdp_serial.h --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/include/linux/hcdp_serial.h Tue Sep 17 23:47:52 2002 @@ -0,0 +1,83 @@ +/* + * linux/include/linux/hcdp_serial.h + * + * Copyright (C) 2002 Hewlett-Packard Co. + * Khalid Aziz + * + * Definitions for HCDP defined serial ports (Serial console and debug + * ports) + */ +#ifndef _LINUX_HCDP_SERIAL_H +#define _LINUX_HCDP_SERIAL_H + +/* ACPI table signatures */ +#define HCDP_SIG_LEN 4 +#define HCDP_SIGNATURE "HCDP" + +/* Space ID as defined in ACPI generic address structure */ +#define ACPI_MEM_SPACE 0 +#define ACPI_IO_SPACE 1 +#define ACPI_PCICONF_SPACE 2 + +/* + * Maximum number of HCDP devices we want to read in + */ +#define MAX_HCDP_DEVICES 6 + +/* + * Default UART clock rate if clock rate is 0 in HCDP table. + */ +#define DEFAULT_UARTCLK 115200 + +/* + * ACPI Generic Address Structure + */ +typedef struct { + u8 space_id; + u8 bit_width; + u8 bit_offset; + u8 resv; + u32 addrlo; + u32 addrhi; +} acpi_gen_addr; + +/* HCDP Device descriptor entry types */ +#define HCDP_DEV_CONSOLE 0 +#define HCDP_DEV_DEBUG 1 + +/* HCDP Device descriptor type */ +typedef struct { + u8 type; + u8 bits; + u8 parity; + u8 stop_bits; + u8 pci_seg; + u8 pci_bus; + u8 pci_dev; + u8 pci_func; + u64 baud; + acpi_gen_addr base_addr; + u16 pci_dev_id; + u16 pci_vendor_id; + u32 global_int; + u32 clock_rate; + u8 pci_prog_intfc; + u8 resv; +} hcdp_dev_t; + +/* HCDP Table format */ +typedef struct { + u8 signature[4]; + u32 len; + u8 rev; + u8 chksum; + u8 oemid[6]; + u8 oem_tabid[8]; + u32 oem_rev; + u8 creator_id[4]; + u32 creator_rev; + u32 num_entries; + hcdp_dev_t hcdp_dev[MAX_HCDP_DEVICES]; +} hcdp_t; + +#endif /* _LINUX_HCDP_SERIAL_H */ diff -Nru a/include/linux/highmem.h b/include/linux/highmem.h --- a/include/linux/highmem.h Tue Sep 17 23:47:51 2002 +++ b/include/linux/highmem.h Tue Sep 17 23:47:51 2002 @@ -3,6 +3,8 @@ #include #include +#include + #include #ifdef CONFIG_HIGHMEM diff -Nru a/include/linux/irq.h b/include/linux/irq.h --- a/include/linux/irq.h Tue Sep 17 23:47:51 2002 +++ b/include/linux/irq.h Tue Sep 17 23:47:51 2002 @@ -56,15 +56,13 @@ * * Pad this out to 32 bytes for cache and indexing reasons. */ -typedef struct { +typedef struct irq_desc { unsigned int status; /* IRQ status */ hw_irq_controller *handler; struct irqaction *action; /* IRQ action list */ unsigned int depth; /* nested irq disables */ spinlock_t lock; } ____cacheline_aligned irq_desc_t; - -extern irq_desc_t irq_desc [NR_IRQS]; #include /* the arch dependent stuff */ diff -Nru a/include/linux/irq_cpustat.h b/include/linux/irq_cpustat.h --- a/include/linux/irq_cpustat.h Tue Sep 17 23:47:51 2002 +++ b/include/linux/irq_cpustat.h Tue Sep 17 23:47:51 2002 @@ -24,7 +24,7 @@ #define __IRQ_STAT(cpu, member) (irq_stat[cpu].member) #else #define __IRQ_STAT(cpu, member) ((void)(cpu), irq_stat[0].member) -#endif +#endif #endif /* arch independent irq_stat fields */ @@ -33,5 +33,10 @@ #define ksoftirqd_task(cpu) __IRQ_STAT((cpu), __ksoftirqd_task) /* arch dependent irq_stat fields */ #define nmi_count(cpu) __IRQ_STAT((cpu), __nmi_count) /* i386, ia64 */ + +#define local_softirq_pending() softirq_pending(smp_processor_id()) +#define local_syscall_count() syscall_count(smp_processor_id()) +#define local_ksoftirqd_task() ksoftirqd_task(smp_processor_id()) +#define local_nmi_count() nmi_count(smp_processor_id()) #endif /* __irq_cpustat_h */ diff -Nru a/include/linux/kernel.h b/include/linux/kernel.h --- a/include/linux/kernel.h Tue Sep 17 23:47:51 2002 +++ b/include/linux/kernel.h Tue Sep 17 23:47:51 2002 @@ -38,6 +38,13 @@ #define KERN_INFO "<6>" /* informational */ #define KERN_DEBUG "<7>" /* debug-level messages */ +extern int console_printk[]; + +#define console_loglevel (console_printk[0]) +#define default_message_loglevel (console_printk[1]) +#define minimum_console_loglevel (console_printk[2]) +#define default_console_loglevel (console_printk[3]) + struct completion; extern struct notifier_block *panic_notifier_list; @@ -74,8 +81,6 @@ asmlinkage int printk(const char * fmt, ...) __attribute__ ((format (printf, 1, 2))); -extern int console_loglevel; - static inline void console_silent(void) { console_loglevel = 0; @@ -95,6 +100,8 @@ #define TAINT_PROPRIETORY_MODULE (1<<0) #define TAINT_FORCED_MODULE (1<<1) #define TAINT_UNSAFE_SMP (1<<2) + +extern void dump_stack(void); #if DEBUG #define pr_debug(fmt,arg...) \ diff -Nru a/include/linux/mm.h b/include/linux/mm.h --- a/include/linux/mm.h Tue Sep 17 23:47:51 2002 +++ b/include/linux/mm.h Tue Sep 17 23:47:51 2002 @@ -19,9 +19,6 @@ extern unsigned long num_physpages; extern void * high_memory; extern int page_cluster; -/* The inactive_clean lists are per zone. */ -extern struct list_head active_list; -extern struct list_head inactive_list; #include #include @@ -104,6 +101,7 @@ #define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */ #define VM_RESERVED 0x00080000 /* Don't unmap it from swap_out */ #define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ +#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ #define VM_STACK_FLAGS (0x00000100 | VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT) @@ -376,6 +374,20 @@ int __set_page_dirty_buffers(struct page *page); int __set_page_dirty_nobuffers(struct page *page); + +#ifdef CONFIG_HUGETLB_PAGE +#define is_vm_hugetlb_page(vma) (vma->vm_flags & VM_HUGETLB) +extern int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); +extern int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int); +extern int free_hugepages(struct vm_area_struct *); + +#else +#define is_vm_hugetlb_page(vma) (0) +#define follow_hugetlb_page(mm, vma, pages, vmas, start, len, i) (0) +#define copy_hugetlb_page_range(dst, src, vma) (0) +#define free_hugepages(mpnt) do { } while(0) +#endif + /* * If the mapping doesn't provide a set_page_dirty a_op, then diff -Nru a/include/linux/mmzone.h b/include/linux/mmzone.h --- a/include/linux/mmzone.h Tue Sep 17 23:47:51 2002 +++ b/include/linux/mmzone.h Tue Sep 17 23:47:51 2002 @@ -16,7 +16,7 @@ */ #ifndef CONFIG_FORCE_MAX_ZONEORDER -#define MAX_ORDER 10 +#define MAX_ORDER 11 #else #define MAX_ORDER CONFIG_FORCE_MAX_ZONEORDER #endif @@ -118,7 +118,8 @@ * rarely used fields: */ char *name; - unsigned long size; + unsigned long totalsize; /* total size, including holes */ + unsigned long memsize; /* amount of memory (excluding holes) */ } ____cacheline_maxaligned_in_smp; #define ZONE_DMA 0 @@ -151,8 +152,8 @@ * On NUMA machines, each NUMA node would have a pg_data_t to describe * it's memory layout. * - * XXX: we need to move the global memory statistics (active_list, ...) - * into the pg_data_t to properly support NUMA. + * Memory statistics and page replacement data structures are maintained on a + * per-zone basis. */ struct bootmem_data; typedef struct pglist_data { diff -Nru a/include/linux/page-flags.h b/include/linux/page-flags.h --- a/include/linux/page-flags.h Tue Sep 17 23:47:51 2002 +++ b/include/linux/page-flags.h Tue Sep 17 23:47:51 2002 @@ -42,6 +42,10 @@ * address space... */ +#include + +struct page; + /* * Don't use the *_dontuse flags. Use the macros. Otherwise you'll break * locked- and dirty-page accounting. The top eight bits of page->flags are @@ -78,6 +82,7 @@ unsigned long nr_pagecache; unsigned long nr_page_table_pages; unsigned long nr_reverse_maps; + unsigned long nr_mapped; } ____cacheline_aligned_in_smp page_states[NR_CPUS]; extern void get_page_state(struct page_state *ret); diff -Nru a/include/linux/percpu.h b/include/linux/percpu.h --- a/include/linux/percpu.h Tue Sep 17 23:47:51 2002 +++ b/include/linux/percpu.h Tue Sep 17 23:47:51 2002 @@ -1,8 +1,7 @@ #ifndef __LINUX_PERCPU_H #define __LINUX_PERCPU_H -#include /* For preempt_disable() */ +#include /* For preempt_disable() */ #include - #define get_cpu_var(var) ({ preempt_disable(); __get_cpu_var(var); }) #define put_cpu_var(var) preempt_enable() diff -Nru a/include/linux/ptrace.h b/include/linux/ptrace.h --- a/include/linux/ptrace.h Tue Sep 17 23:47:51 2002 +++ b/include/linux/ptrace.h Tue Sep 17 23:47:51 2002 @@ -4,6 +4,7 @@ /* structs and defines to help the user use the ptrace system call. */ #include +#include /* has the defines to get at the registers. */ diff -Nru a/include/linux/sched.h b/include/linux/sched.h --- a/include/linux/sched.h Tue Sep 17 23:47:51 2002 +++ b/include/linux/sched.h Tue Sep 17 23:47:51 2002 @@ -461,14 +461,14 @@ #ifndef INIT_THREAD_SIZE # define INIT_THREAD_SIZE 2048*sizeof(long) -#endif - union thread_union { struct thread_info thread_info; unsigned long stack[INIT_THREAD_SIZE/sizeof(long)]; }; extern union thread_union init_thread_union; +#endif + extern struct task_struct init_task; extern struct mm_struct init_mm; @@ -690,7 +690,11 @@ extern void FASTCALL(add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait)); extern void FASTCALL(remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)); +#ifdef CONFIG_SMP extern void wait_task_inactive(task_t * p); +#else +#define wait_task_inactive(p) do { } while (0) +#endif extern void kick_if_running(task_t * p); #define __wait_event(wq, condition) \ @@ -955,6 +959,34 @@ if (need_resched()) __cond_resched(); } + +#ifdef CONFIG_PREEMPT + +/* + * cond_resched_lock() - if a reschedule is pending, drop the given lock, + * call schedule, and on return reacquire the lock. + * + * Note: this does not assume the given lock is the _only_ lock held. + * The kernel preemption counter gives us "free" checking that we are + * atomic -- let's use it. + */ +static inline void cond_resched_lock(spinlock_t * lock) +{ + if (need_resched() && preempt_count() == 1) { + _raw_spin_unlock(lock); + preempt_enable_no_resched(); + __cond_resched(); + spin_lock(lock); + } +} + +#else + +static inline void cond_resched_lock(spinlock_t * lock) +{ +} + +#endif /* Reevaluate whether the task has signals pending delivery. This is required every time the blocked sigset_t changes. diff -Nru a/include/linux/serial.h b/include/linux/serial.h --- a/include/linux/serial.h Tue Sep 17 23:47:51 2002 +++ b/include/linux/serial.h Tue Sep 17 23:47:51 2002 @@ -183,9 +183,12 @@ #ifdef CONFIG_ACPI /* tty ports reserved for the ACPI serial console port and debug port */ -#define ACPI_SERIAL_CONSOLE_PORT 4 +#define ACPI_SERIAL_CONSOLE_PORT 0 #define ACPI_SERIAL_DEBUG_PORT 5 #endif + +/* tty port reserved for the HCDP serial console port */ +#define HCDP_SERIAL_CONSOLE_PORT 4 #endif /* __KERNEL__ */ #endif /* _LINUX_SERIAL_H */ diff -Nru a/include/linux/smp.h b/include/linux/smp.h --- a/include/linux/smp.h Tue Sep 17 23:47:51 2002 +++ b/include/linux/smp.h Tue Sep 17 23:47:51 2002 @@ -57,10 +57,6 @@ */ extern int smp_threads_ready; -extern volatile unsigned long smp_msg_data; -extern volatile int smp_src_cpu; -extern volatile int smp_msg_id; - #define MSG_ALL_BUT_SELF 0x8000 /* Assume <32768 CPU's */ #define MSG_ALL 0x8001 @@ -94,7 +90,6 @@ #define cpu_online(cpu) ({ cpu; 1; }) #define num_online_cpus() 1 #define num_booting_cpus() 1 - struct notifier_block; /* Need to know about CPUs going up/down? */ diff -Nru a/include/linux/sysctl.h b/include/linux/sysctl.h --- a/include/linux/sysctl.h Tue Sep 17 23:47:51 2002 +++ b/include/linux/sysctl.h Tue Sep 17 23:47:51 2002 @@ -128,6 +128,7 @@ KERN_TAINTED=53, /* int: various kernel tainted flags */ KERN_CADPID=54, /* int: PID of the process to notify on CAD */ KERN_PIDMAX=55, /* int: PID # limit */ + KERN_HUGETLB_PAGE_NUM=56, /* int: Number of available Huge Pages */ }; diff -Nru a/include/linux/vmalloc.h b/include/linux/vmalloc.h --- a/include/linux/vmalloc.h Tue Sep 17 23:47:52 2002 +++ b/include/linux/vmalloc.h Tue Sep 17 23:47:52 2002 @@ -7,6 +7,7 @@ #define VM_IOREMAP 0x00000001 /* ioremap() and friends */ #define VM_ALLOC 0x00000002 /* vmalloc() */ #define VM_MAP 0x00000004 /* vmap()ed pages */ +#define VM_AGP_REMAP 0x00000008 /* remapped AGP-memory */ struct vm_struct { void *addr; @@ -37,6 +38,9 @@ extern int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages); extern void unmap_vm_area(struct vm_struct *area); + +/* Get the flags associated with the area starting at ADDR. */ +extern unsigned long vgetflags (void *addr); /* * Internals. Dont't use.. diff -Nru a/kernel/exec_domain.c b/kernel/exec_domain.c --- a/kernel/exec_domain.c Tue Sep 17 23:47:51 2002 +++ b/kernel/exec_domain.c Tue Sep 17 23:47:51 2002 @@ -196,8 +196,10 @@ put_exec_domain(oep); +#if 0 printk(KERN_DEBUG "[%s:%d]: set personality to %lx\n", current->comm, current->pid, personality); +#endif return 0; } diff -Nru a/kernel/exit.c b/kernel/exit.c --- a/kernel/exit.c Tue Sep 17 23:47:51 2002 +++ b/kernel/exit.c Tue Sep 17 23:47:51 2002 @@ -55,10 +55,8 @@ if (p->state != TASK_ZOMBIE) BUG(); -#ifdef CONFIG_SMP if (p != current) wait_task_inactive(p); -#endif atomic_dec(&p->user->processes); security_ops->task_free_security(p); free_uid(p->user); diff -Nru a/kernel/fork.c b/kernel/fork.c --- a/kernel/fork.c Tue Sep 17 23:47:51 2002 +++ b/kernel/fork.c Tue Sep 17 23:47:51 2002 @@ -60,6 +60,7 @@ rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED; /* outer */ +#if 0 /* * A per-CPU task cache - this relies on the fact that * the very last portion of sys_exit() is executed with @@ -83,6 +84,7 @@ task_cache[cpu] = current; } } +#endif /* Protects next_safe and last_pid. */ void add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait) @@ -135,7 +137,11 @@ init_task.rlim[RLIMIT_NPROC].rlim_max = max_threads/2; } -static struct task_struct *dup_task_struct(struct task_struct *orig) +#if 1 +extern struct task_struct *dup_task_struct (struct task_struct *orig); +#else + +struct task_struct *dup_task_struct(struct task_struct *orig) { struct task_struct *tsk; struct thread_info *ti; @@ -158,6 +164,8 @@ return tsk; } + +#endif static int get_pid(unsigned long flags) { diff -Nru a/kernel/ksyms.c b/kernel/ksyms.c --- a/kernel/ksyms.c Tue Sep 17 23:47:51 2002 +++ b/kernel/ksyms.c Tue Sep 17 23:47:51 2002 @@ -109,6 +109,7 @@ EXPORT_SYMBOL(kmalloc); EXPORT_SYMBOL(kfree); EXPORT_SYMBOL(vfree); +EXPORT_SYMBOL(vgetflags); EXPORT_SYMBOL(__vmalloc); EXPORT_SYMBOL(vmalloc); EXPORT_SYMBOL(vmalloc_32); @@ -391,7 +392,9 @@ EXPORT_SYMBOL(del_timer); EXPORT_SYMBOL(request_irq); EXPORT_SYMBOL(free_irq); +#if !defined(CONFIG_IA64) EXPORT_SYMBOL(irq_stat); +#endif /* waitqueue handling */ EXPORT_SYMBOL(add_wait_queue); @@ -598,10 +601,15 @@ /* init task, for moving kthread roots - ought to export a function ?? */ EXPORT_SYMBOL(init_task); +#ifndef CONFIG_IA64 EXPORT_SYMBOL(init_thread_union); +#endif EXPORT_SYMBOL(tasklist_lock); EXPORT_SYMBOL(pidhash); #if defined(CONFIG_SMP) && defined(__GENERIC_PER_CPU) EXPORT_SYMBOL(__per_cpu_offset); #endif + +/* debug */ +EXPORT_SYMBOL(dump_stack); diff -Nru a/kernel/printk.c b/kernel/printk.c --- a/kernel/printk.c Tue Sep 17 23:47:52 2002 +++ b/kernel/printk.c Tue Sep 17 23:47:52 2002 @@ -16,6 +16,7 @@ * 01Mar01 Andrew Morton */ +#include #include #include #include @@ -55,11 +56,12 @@ DECLARE_WAIT_QUEUE_HEAD(log_wait); -/* Keep together for sysctl support */ -int console_loglevel = DEFAULT_CONSOLE_LOGLEVEL; -int default_message_loglevel = DEFAULT_MESSAGE_LOGLEVEL; -int minimum_console_loglevel = MINIMUM_CONSOLE_LOGLEVEL; -int default_console_loglevel = DEFAULT_CONSOLE_LOGLEVEL; +int console_printk[4] = { + DEFAULT_CONSOLE_LOGLEVEL, /* console_loglevel */ + DEFAULT_MESSAGE_LOGLEVEL, /* default_message_loglevel */ + MINIMUM_CONSOLE_LOGLEVEL, /* minimum_console_loglevel */ + DEFAULT_CONSOLE_LOGLEVEL, /* default_console_loglevel */ +}; int oops_in_progress; @@ -341,6 +343,12 @@ __call_console_drivers(start, end); } } +#ifdef CONFIG_IA64_EARLY_PRINTK + if (!console_drivers) { + static void early_printk (const char *str, size_t len); + early_printk(&LOG_BUF(start), end - start); + } +#endif } /* @@ -707,3 +715,109 @@ tty->driver.write(tty, 0, msg, strlen(msg)); return; } + +#ifdef CONFIG_IA64_EARLY_PRINTK + +# ifdef CONFIG_IA64_EARLY_PRINTK_VGA + +#include + +#define VGABASE ((char *)0xc0000000000b8000) +#define VGALINES 24 +#define VGACOLS 80 + +static int current_ypos = VGALINES, current_xpos = 0; + +static void +early_printk_vga (const char *str, size_t len) +{ + char c; + int i, k, j; + + while (len-- > 0) { + c = *str++; + if (current_ypos >= VGALINES) { + /* scroll 1 line up */ + for (k = 1, j = 0; k < VGALINES; k++, j++) { + for (i = 0; i < VGACOLS; i++) { + writew(readw(VGABASE + 2*(VGACOLS*k + i)), + VGABASE + 2*(VGACOLS*j + i)); + } + } + for (i = 0; i < VGACOLS; i++) { + writew(0x720, VGABASE + 2*(VGACOLS*j + i)); + } + current_ypos = VGALINES-1; + } + if (c == '\n') { + current_xpos = 0; + current_ypos++; + } else if (c != '\r') { + writew(((0x7 << 8) | (unsigned short) c), + VGABASE + 2*(VGACOLS*current_ypos + current_xpos++)); + if (current_xpos >= VGACOLS) { + current_xpos = 0; + current_ypos++; + } + } + } +} + +# endif /* CONFIG_IA64_EARLY_PRINTK_VGA */ + +# ifdef CONFIG_IA64_EARLY_PRINTK_UART + +#include +#include + +static void early_printk_uart(const char *str, size_t len) +{ + static char *uart = NULL; + unsigned long uart_base; + char c; + + if (!uart) { + uart_base = 0; +# ifdef CONFIG_SERIAL_8250_HCDP + { + extern unsigned long hcdp_early_uart(void); + uart_base = hcdp_early_uart(); + } +# endif +# if CONFIG_IA64_EARLY_PRINTK_UART_BASE + if (!uart_base) + uart_base = CONFIG_IA64_EARLY_PRINTK_UART_BASE; +# endif + if (!uart_base) + return; + + uart = ioremap(uart_base, 64); + if (!uart) + return; + } + + while (len-- > 0) { + c = *str++; + while ((readb(uart + UART_LSR) & UART_LSR_TEMT) == 0) + cpu_relax(); /* spin */ + + writeb(c, uart + UART_TX); + + if (c == '\n') + writeb('\r', uart + UART_TX); + } +} + +# endif /* CONFIG_IA64_EARLY_PRINTK_UART */ + +void early_printk(const char *str, size_t len) +{ +#ifdef CONFIG_IA64_EARLY_PRINTK_UART + early_printk_uart(str, len); +#endif +#ifdef CONFIG_IA64_EARLY_PRINTK_VGA + early_printk_vga(str, len); +#endif +} + +#endif /* CONFIG_IA64_EARLY_PRINTK */ diff -Nru a/kernel/ptrace.c b/kernel/ptrace.c --- a/kernel/ptrace.c Tue Sep 17 23:47:52 2002 +++ b/kernel/ptrace.c Tue Sep 17 23:47:52 2002 @@ -69,9 +69,7 @@ if (!kill) { if (child->state != TASK_STOPPED) return -ESRCH; -#ifdef CONFIG_SMP wait_task_inactive(child); -#endif } /* All systems go.. */ diff -Nru a/kernel/softirq.c b/kernel/softirq.c --- a/kernel/softirq.c Tue Sep 17 23:47:51 2002 +++ b/kernel/softirq.c Tue Sep 17 23:47:51 2002 @@ -38,7 +38,10 @@ - Bottom halves: globally serialized, grr... */ +/* No separate irq_stat for ia64, it is part of PSA */ +#if !defined(CONFIG_IA64) irq_cpustat_t irq_stat[NR_CPUS]; +#endif /* CONFIG_IA64 */ static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp; @@ -69,7 +72,7 @@ local_irq_save(flags); cpu = smp_processor_id(); - pending = softirq_pending(cpu); + pending = local_softirq_pending(); if (pending) { struct softirq_action *h; @@ -78,7 +81,7 @@ local_bh_disable(); restart: /* Reset the pending bitmask before enabling irqs */ - softirq_pending(cpu) = 0; + local_softirq_pending() = 0; local_irq_enable(); @@ -93,7 +96,7 @@ local_irq_disable(); - pending = softirq_pending(cpu); + pending = local_softirq_pending(); if (pending & mask) { mask &= ~pending; goto restart; @@ -101,7 +104,7 @@ __local_bh_enable(); if (pending) - wakeup_softirqd(cpu); + wakeup_softirqd(smp_processor_id()); } local_irq_restore(flags); @@ -371,15 +374,15 @@ __set_current_state(TASK_INTERRUPTIBLE); mb(); - ksoftirqd_task(cpu) = current; + local_ksoftirqd_task() = current; for (;;) { - if (!softirq_pending(cpu)) + if (!local_softirq_pending()) schedule(); __set_current_state(TASK_RUNNING); - while (softirq_pending(cpu)) { + while (local_softirq_pending()) { do_softirq(); cond_resched(); } diff -Nru a/kernel/sysctl.c b/kernel/sysctl.c --- a/kernel/sysctl.c Tue Sep 17 23:47:51 2002 +++ b/kernel/sysctl.c Tue Sep 17 23:47:51 2002 @@ -98,6 +98,11 @@ extern int acct_parm[]; #endif +#ifdef CONFIG_HUGETLB_PAGE +extern int htlbpage_max; +extern int set_hugetlb_mem_size(int); +#endif + static int parse_table(int *, int, void *, size_t *, void *, size_t, ctl_table *, void **); static int proc_doutsstring(ctl_table *table, int write, struct file *filp, @@ -258,6 +263,10 @@ #endif {KERN_PIDMAX, "pid_max", &pid_max, sizeof (int), 0600, NULL, &proc_dointvec}, +#ifdef CONFIG_HUGETLB_PAGE + {KERN_HUGETLB_PAGE_NUM, "numhugepages", &htlbpage_max, sizeof(int), 0644, NULL, + &proc_dointvec}, +#endif {0} }; @@ -897,6 +906,10 @@ val = -val; buffer += len; left -= len; +#ifdef CONFIG_HUGETLB_PAGE + if (i == &htlbpage_max) + val = set_hugetlb_mem_size(val); +#endif switch(op) { case OP_SET: *i = val; break; case OP_AND: *i &= val; break; diff -Nru a/kernel/timer.c b/kernel/timer.c --- a/kernel/timer.c Tue Sep 17 23:47:51 2002 +++ b/kernel/timer.c Tue Sep 17 23:47:51 2002 @@ -888,7 +888,7 @@ if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0) return -EINVAL; - +#if !defined(__ia64__) if (t.tv_sec == 0 && t.tv_nsec <= 2000000L && current->policy != SCHED_NORMAL) { @@ -901,6 +901,7 @@ udelay((t.tv_nsec + 999) / 1000); return 0; } +#endif expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec); diff -Nru a/lib/Makefile b/lib/Makefile --- a/lib/Makefile Tue Sep 17 23:47:51 2002 +++ b/lib/Makefile Tue Sep 17 23:47:51 2002 @@ -12,7 +12,7 @@ crc32.o rbtree.o radix-tree.o obj-y := errno.o ctype.o string.o vsprintf.o brlock.o cmdline.o \ - bust_spinlocks.o rbtree.o radix-tree.o + bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o diff -Nru a/lib/dump_stack.c b/lib/dump_stack.c --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/lib/dump_stack.c Tue Sep 17 23:47:52 2002 @@ -0,0 +1,13 @@ +/* + * Provide a default dump_stack() function for architectures + * which don't implement their own. + */ + +#include +#include + +void dump_stack(void) +{ + printk(KERN_NOTICE + "This architecture does not implement dump_stack()\n"); +} diff -Nru a/mm/bootmem.c b/mm/bootmem.c --- a/mm/bootmem.c Tue Sep 17 23:47:51 2002 +++ b/mm/bootmem.c Tue Sep 17 23:47:51 2002 @@ -143,6 +143,7 @@ static void * __init __alloc_bootmem_core (bootmem_data_t *bdata, unsigned long size, unsigned long align, unsigned long goal) { + static unsigned long last_success; unsigned long i, start = 0; void *ret; unsigned long offset, remaining_size; @@ -168,6 +169,9 @@ if (goal && (goal >= bdata->node_boot_start) && ((goal >> PAGE_SHIFT) < bdata->node_low_pfn)) { preferred = goal - bdata->node_boot_start; + + if (last_success >= preferred) + preferred = last_success; } else preferred = 0; @@ -179,6 +183,8 @@ restart_scan: for (i = preferred; i < eidx; i += incr) { unsigned long j; + i = find_next_zero_bit((char *)bdata->node_bootmem_map, eidx, i); + i = (i + incr - 1) & -incr; if (test_bit(i, bdata->node_bootmem_map)) continue; for (j = i + 1; j < i + areasize; ++j) { @@ -197,6 +203,7 @@ } return NULL; found: + last_success = start << PAGE_SHIFT; if (start >= eidx) BUG(); @@ -256,21 +263,21 @@ map = bdata->node_bootmem_map; for (i = 0; i < idx; ) { unsigned long v = ~map[i / BITS_PER_LONG]; - if (v) { + if (v) { unsigned long m; - for (m = 1; m && i < idx; m<<=1, page++, i++) { + for (m = 1; m && i < idx; m<<=1, page++, i++) { if (v & m) { - count++; - ClearPageReserved(page); - set_page_count(page, 1); - __free_page(page); - } - } + count++; + ClearPageReserved(page); + set_page_count(page, 1); + __free_page(page); + } + } } else { i+=BITS_PER_LONG; - page+=BITS_PER_LONG; - } - } + page+=BITS_PER_LONG; + } + } total += count; /* diff -Nru a/mm/memory.c b/mm/memory.c --- a/mm/memory.c Tue Sep 17 23:47:51 2002 +++ b/mm/memory.c Tue Sep 17 23:47:51 2002 @@ -110,7 +110,7 @@ pmd = pmd_offset(dir, 0); pgd_clear(dir); for (j = 0; j < PTRS_PER_PMD ; j++) { - prefetchw(pmd+j+(PREFETCH_STRIDE/16)); + prefetchw(pmd + j + PREFETCH_STRIDE/sizeof(*pmd)); free_one_pmd(tlb, pmd+j); } pmd_free_tlb(tlb, pmd); @@ -208,6 +208,9 @@ unsigned long end = vma->vm_end; unsigned long cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; + if (is_vm_hugetlb_page(vma)) + return copy_hugetlb_page_range(dst, src, vma); + src_pgd = pgd_offset(src, address)-1; dst_pgd = pgd_offset(dst, address)-1; @@ -389,8 +392,8 @@ { pgd_t * dir; - if (address >= end) - BUG(); + BUG_ON(address >= end); + dir = pgd_offset(vma->vm_mm, address); tlb_start_vma(tlb, vma); do { @@ -401,30 +404,56 @@ tlb_end_vma(tlb, vma); } -/* - * remove user pages in a given range. +/* Dispose of an entire mmu_gather_t per rescheduling point */ +#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT) +#define ZAP_BLOCK_SIZE (FREE_PTE_NR * PAGE_SIZE) +#endif + +/* For UP, 256 pages at a time gives nice low latency */ +#if !defined(CONFIG_SMP) && defined(CONFIG_PREEMPT) +#define ZAP_BLOCK_SIZE (256 * PAGE_SIZE) +#endif + +/* No preempt: go for the best straight-line efficiency */ +#if !defined(CONFIG_PREEMPT) +#define ZAP_BLOCK_SIZE (~(0UL)) +#endif + +/** + * zap_page_range - remove user pages in a given range + * @vma: vm_area_struct holding the applicable pages + * @address: starting address of pages to zap + * @size: number of bytes to zap */ void zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned long size) { struct mm_struct *mm = vma->vm_mm; mmu_gather_t *tlb; - unsigned long start = address, end = address + size; + unsigned long end, block; - /* - * This is a long-lived spinlock. That's fine. - * There's no contention, because the page table - * lock only protects against kswapd anyway, and - * even if kswapd happened to be looking at this - * process we _want_ it to get stuck. - */ - if (address >= end) - BUG(); spin_lock(&mm->page_table_lock); - flush_cache_range(vma, address, end); - tlb = tlb_gather_mmu(mm, 0); - unmap_page_range(tlb, vma, address, end); - tlb_finish_mmu(tlb, start, end); + /* + * This was once a long-held spinlock. Now we break the + * work up into ZAP_BLOCK_SIZE units and relinquish the + * lock after each interation. This drastically lowers + * lock contention and allows for a preemption point. + */ + while (size) { + block = (size > ZAP_BLOCK_SIZE) ? ZAP_BLOCK_SIZE : size; + end = address + block; + + flush_cache_range(vma, address, end); + tlb = tlb_gather_mmu(mm, 0); + unmap_page_range(tlb, vma, address, end); + tlb_finish_mmu(tlb, address, end); + + cond_resched_lock(&mm->page_table_lock); + + address += block; + size -= block; + } + spin_unlock(&mm->page_table_lock); } @@ -504,6 +533,11 @@ || !(flags & vma->vm_flags)) return i ? : -EFAULT; + if (is_vm_hugetlb_page(vma)) { + i = follow_hugetlb_page(mm, vma, pages, vmas, + &start, &len, i); + continue; + } spin_lock(&mm->page_table_lock); do { struct page *map; diff -Nru a/mm/mempool.c b/mm/mempool.c --- a/mm/mempool.c Tue Sep 17 23:47:51 2002 +++ b/mm/mempool.c Tue Sep 17 23:47:51 2002 @@ -196,10 +196,11 @@ return element; /* - * If the pool is less than 50% full then try harder - * to allocate an element: + * If the pool is less than 50% full and we can perform effective + * page reclaim then try harder to allocate an element. */ - if ((gfp_mask != gfp_nowait) && (pool->curr_nr <= pool->min_nr/2)) { + if ((gfp_mask & __GFP_FS) && (gfp_mask != gfp_nowait) && + (pool->curr_nr <= pool->min_nr/2)) { element = pool->alloc(gfp_mask, pool->pool_data); if (likely(element != NULL)) return element; diff -Nru a/mm/mmap.c b/mm/mmap.c --- a/mm/mmap.c Tue Sep 17 23:47:51 2002 +++ b/mm/mmap.c Tue Sep 17 23:47:51 2002 @@ -1031,10 +1031,14 @@ touched = NULL; do { struct vm_area_struct *next = mpnt->vm_next; - mpnt->vm_next = touched; - touched = mpnt; - mm->map_count--; - rb_erase(&mpnt->vm_rb, &mm->mm_rb); + if (!(is_vm_hugetlb_page(mpnt))) { + mpnt->vm_next = touched; + touched = mpnt; + rb_erase(&mpnt->vm_rb, &mm->mm_rb); + mm->map_count--; + } + else + free_hugepages(mpnt); mpnt = next; } while (mpnt && mpnt->vm_start < end); *npp = mpnt; @@ -1273,7 +1277,10 @@ vm_unacct_memory((end - start) >> PAGE_SHIFT); mm->map_count--; - unmap_page_range(tlb, mpnt, start, end); + if (!(is_vm_hugetlb_page(mpnt))) + unmap_page_range(tlb, mpnt, start, end); + else + mpnt->vm_ops->close(mpnt); mpnt = mpnt->vm_next; } diff -Nru a/mm/mprotect.c b/mm/mprotect.c --- a/mm/mprotect.c Tue Sep 17 23:47:51 2002 +++ b/mm/mprotect.c Tue Sep 17 23:47:51 2002 @@ -321,6 +321,11 @@ /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ + if (is_vm_hugetlb_page(vma)) { + error = -EACCES; + goto out; + } + newflags = prot | (vma->vm_flags & ~(PROT_READ | PROT_WRITE | PROT_EXEC)); if ((newflags & ~(newflags >> 4)) & 0xf) { error = -EACCES; diff -Nru a/mm/mremap.c b/mm/mremap.c --- a/mm/mremap.c Tue Sep 17 23:47:51 2002 +++ b/mm/mremap.c Tue Sep 17 23:47:51 2002 @@ -311,6 +311,10 @@ vma = find_vma(current->mm, addr); if (!vma || vma->vm_start > addr) goto out; + if (is_vm_hugetlb_page(vma)) { + ret = -EINVAL; + goto out; + } /* We can't remap across vm area boundaries */ if (old_len > vma->vm_end - addr) goto out; diff -Nru a/mm/page_alloc.c b/mm/page_alloc.c --- a/mm/page_alloc.c Tue Sep 17 23:47:51 2002 +++ b/mm/page_alloc.c Tue Sep 17 23:47:51 2002 @@ -23,6 +23,7 @@ #include #include #include +#include unsigned long totalram_pages; unsigned long totalhigh_pages; @@ -46,7 +47,7 @@ */ static inline int bad_range(struct zone *zone, struct page *page) { - if (page_to_pfn(page) >= zone->zone_start_pfn + zone->size) + if (page_to_pfn(page) >= zone->zone_start_pfn + zone->totalsize) return 1; if (page_to_pfn(page) < zone->zone_start_pfn) return 1; @@ -497,7 +498,7 @@ struct zone *zone; for (zone = *zonep++; zone; zone = *zonep++) { - unsigned long size = zone->size; + unsigned long size = zone->memsize; unsigned long high = zone->pages_high; if (size > high) sum += size - high; @@ -561,6 +562,7 @@ ret->nr_pagecache += ps->nr_pagecache; ret->nr_page_table_pages += ps->nr_page_table_pages; ret->nr_reverse_maps += ps->nr_reverse_maps; + ret->nr_mapped += ps->nr_mapped; } } @@ -589,7 +591,7 @@ val->totalram = totalram_pages; val->sharedram = 0; val->freeram = nr_free_pages(); - val->bufferram = get_page_cache_size(); + val->bufferram = nr_blockdev_pages(); #ifdef CONFIG_HIGHMEM val->totalhigh = totalhigh_pages; val->freehigh = nr_free_highpages(); @@ -656,7 +658,7 @@ struct zone *zone = &pgdat->node_zones[type]; unsigned long nr, flags, order, total = 0; - if (!zone->size) + if (!zone->memsize) continue; spin_lock_irqsave(&zone->lock, flags); @@ -703,7 +705,7 @@ */ case ZONE_HIGHMEM: zone = pgdat->node_zones + ZONE_HIGHMEM; - if (zone->size) { + if (zone->memsize) { #ifndef CONFIG_HIGHMEM BUG(); #endif @@ -711,11 +713,11 @@ } case ZONE_NORMAL: zone = pgdat->node_zones + ZONE_NORMAL; - if (zone->size) + if (zone->memsize) zonelist->zones[j++] = zone; case ZONE_DMA: zone = pgdat->node_zones + ZONE_DMA; - if (zone->size) + if (zone->memsize) zonelist->zones[j++] = zone; } zonelist->zones[j++] = NULL; @@ -823,7 +825,8 @@ realsize -= zholes_size[j]; printk(" %s zone: %lu pages\n", zone_names[j], realsize); - zone->size = size; + zone->totalsize = size; + zone->memsize = realsize; zone->name = zone_names[j]; spin_lock_init(&zone->lock); spin_lock_init(&zone->lru_lock); diff -Nru a/mm/readahead.c b/mm/readahead.c --- a/mm/readahead.c Tue Sep 17 23:47:51 2002 +++ b/mm/readahead.c Tue Sep 17 23:47:51 2002 @@ -48,9 +48,9 @@ struct page *page = list_entry(pages->prev, struct page, list); list_del(&page->list); if (!add_to_page_cache(page, mapping, page->index)) { + mapping->a_ops->readpage(file, page); if (!pagevec_add(&lru_pvec, page)) __pagevec_lru_add(&lru_pvec); - mapping->a_ops->readpage(file, page); } else { page_cache_release(page); } diff -Nru a/mm/rmap.c b/mm/rmap.c --- a/mm/rmap.c Tue Sep 17 23:47:52 2002 +++ b/mm/rmap.c Tue Sep 17 23:47:52 2002 @@ -214,6 +214,7 @@ if (page->pte.direct == 0) { page->pte.direct = pte_paddr; SetPageDirect(page); + inc_page_state(nr_mapped); goto out; } @@ -336,6 +337,8 @@ out: pte_chain_unlock(page); + if (!page_mapped(page)) + dec_page_state(nr_mapped); return; } @@ -447,6 +450,7 @@ ret = try_to_unmap_one(page, page->pte.direct); if (ret == SWAP_SUCCESS) { page->pte.direct = 0; + dec_page_state(nr_reverse_maps); ClearPageDirect(page); } goto out; @@ -500,6 +504,8 @@ } } out: + if (!page_mapped(page)) + dec_page_state(nr_mapped); return ret; } diff -Nru a/mm/slab.c b/mm/slab.c --- a/mm/slab.c Tue Sep 17 23:47:51 2002 +++ b/mm/slab.c Tue Sep 17 23:47:51 2002 @@ -487,7 +487,7 @@ /* Inc off-slab bufctl limit until the ceiling is hit. */ if (!(OFF_SLAB(sizes->cs_cachep))) { offslab_limit = sizes->cs_size-sizeof(slab_t); - offslab_limit /= 2; + offslab_limit /= sizeof(kmem_bufctl_t); } sizes->cs_dmacachep = kmem_cache_create( cache_names[sizes-cache_sizes].name_dma, diff -Nru a/mm/vmalloc.c b/mm/vmalloc.c --- a/mm/vmalloc.c Tue Sep 17 23:47:51 2002 +++ b/mm/vmalloc.c Tue Sep 17 23:47:51 2002 @@ -11,6 +11,8 @@ #include #include #include +#include + #include #include @@ -309,6 +311,7 @@ */ void vfree(void *addr) { + BUG_ON(in_interrupt()); __vunmap(addr, 1); } @@ -324,6 +327,7 @@ */ void vunmap(void *addr) { + BUG_ON(in_interrupt()); __vunmap(addr, 0); } @@ -511,4 +515,21 @@ finished: read_unlock(&vmlist_lock); return buf - buf_start; +} + +unsigned long vgetflags (void * addr) +{ + struct vm_struct *tmp; + unsigned long flags = 0; + + read_lock(&vmlist_lock); + for (tmp = vmlist ; tmp ; tmp = tmp->next) { + if (tmp->addr == addr) { + flags = tmp->flags; + break; + } + + } + read_unlock(&vmlist_lock); + return flags; } diff -Nru a/mm/vmscan.c b/mm/vmscan.c --- a/mm/vmscan.c Tue Sep 17 23:47:51 2002 +++ b/mm/vmscan.c Tue Sep 17 23:47:51 2002 @@ -536,6 +536,20 @@ /* * This is the main entry point to page reclaim. + * + * If a full scan of the inactive list fails to free enough memory then we + * are "out of memory" and something needs to be killed. + * + * If the caller is !__GFP_FS then the probability of a failure is reasonably + * high - the zone may be full of dirty or under-writeback pages, which this + * caller can't do much about. So for !__GFP_FS callers, we just perform a + * small LRU walk and if that didn't work out, fail the allocation back to the + * caller. GFP_NOFS allocators need to know how to deal with it. Kicking + * bdflush, waiting and retrying will work. + * + * This is a fairly lame algorithm - it can result in excessive CPU burning and + * excessive rotation of the inactive list, which is _supposed_ to be an LRU, + * yes? */ int try_to_free_pages(struct zone *classzone, @@ -546,13 +560,16 @@ KERNEL_STAT_INC(pageoutrun); - do { + for (priority = DEF_PRIORITY; priority; priority--) { nr_pages = shrink_caches(classzone, priority, gfp_mask, nr_pages); if (nr_pages <= 0) return 1; - } while (--priority); - out_of_memory(); + if (!(gfp_mask & __GFP_FS)) + break; + } + if (gfp_mask & __GFP_FS) + out_of_memory(); return 0; } diff -Nru a/sound/oss/cs4281/cs4281m.c b/sound/oss/cs4281/cs4281m.c --- a/sound/oss/cs4281/cs4281m.c Tue Sep 17 23:47:52 2002 +++ b/sound/oss/cs4281/cs4281m.c Tue Sep 17 23:47:52 2002 @@ -1942,8 +1942,8 @@ len -= x; } CS_DBGOUT(CS_WAVE_WRITE, 4, printk(KERN_INFO - "cs4281: clear_advance(): memset %d at 0x%.8x for %d size \n", - (unsigned)c, (unsigned)((char *) buf) + bptr, len)); + "cs4281: clear_advance(): memset %d at %p for %d size \n", + (unsigned)c, ((char *) buf) + bptr, len)); memset(((char *) buf) + bptr, c, len); } @@ -1978,9 +1978,8 @@ wake_up(&s->dma_adc.wait); } CS_DBGOUT(CS_PARMS, 8, printk(KERN_INFO - "cs4281: cs4281_update_ptr(): s=0x%.8x hwptr=%d total_bytes=%d count=%d \n", - (unsigned)s, s->dma_adc.hwptr, - s->dma_adc.total_bytes, s->dma_adc.count)); + "cs4281: cs4281_update_ptr(): s=%p hwptr=%d total_bytes=%d count=%d \n", + s, s->dma_adc.hwptr, s->dma_adc.total_bytes, s->dma_adc.count)); } // update DAC pointer // @@ -2012,11 +2011,10 @@ // Continue to play silence until the _release. // CS_DBGOUT(CS_WAVE_WRITE, 6, printk(KERN_INFO - "cs4281: cs4281_update_ptr(): memset %d at 0x%.8x for %d size \n", + "cs4281: cs4281_update_ptr(): memset %d at %p for %d size \n", (unsigned)(s->prop_dac.fmt & (AFMT_U8 | AFMT_U16_LE)) ? 0x80 : 0, - (unsigned)s->dma_dac.rawbuf, - s->dma_dac.dmasize)); + s->dma_dac.rawbuf, s->dma_dac.dmasize)); memset(s->dma_dac.rawbuf, (s->prop_dac. fmt & (AFMT_U8 | AFMT_U16_LE)) ? @@ -2047,9 +2045,8 @@ } } CS_DBGOUT(CS_PARMS, 8, printk(KERN_INFO - "cs4281: cs4281_update_ptr(): s=0x%.8x hwptr=%d total_bytes=%d count=%d \n", - (unsigned) s, s->dma_dac.hwptr, - s->dma_dac.total_bytes, s->dma_dac.count)); + "cs4281: cs4281_update_ptr(): s=%p hwptr=%d total_bytes=%d count=%d \n", + s, s->dma_dac.hwptr, s->dma_dac.total_bytes, s->dma_dac.count)); } } @@ -2180,8 +2177,7 @@ VALIDATE_STATE(s); CS_DBGOUT(CS_FUNCTION, 4, printk(KERN_INFO - "cs4281: mixer_ioctl(): s=0x%.8x cmd=0x%.8x\n", - (unsigned) s, cmd)); + "cs4281: mixer_ioctl(): s=%p cmd=0x%.8x\n", s, cmd)); #if CSDEBUG cs_printioctl(cmd); #endif @@ -2746,9 +2742,8 @@ CS_DBGOUT(CS_FUNCTION, 2, printk(KERN_INFO "cs4281: CopySamples()+ ")); CS_DBGOUT(CS_WAVE_READ, 8, printk(KERN_INFO - " dst=0x%x src=0x%x count=%d iChannels=%d fmt=0x%x\n", - (unsigned) dst, (unsigned) src, (unsigned) count, - (unsigned) iChannels, (unsigned) fmt)); + " dst=%p src=%p count=%d iChannels=%d fmt=0x%x\n", + dst, src, (unsigned) count, (unsigned) iChannels, (unsigned) fmt)); // Gershwin does format conversion in hardware so normally // we don't do any host based coversion. The data formatter @@ -2828,9 +2823,9 @@ void *src = hwsrc; //default to the standard destination buffer addr CS_DBGOUT(CS_FUNCTION, 6, printk(KERN_INFO - "cs_copy_to_user()+ fmt=0x%x fmt_o=0x%x cnt=%d dest=0x%.8x\n", + "cs_copy_to_user()+ fmt=0x%x fmt_o=0x%x cnt=%d dest=%p\n", s->prop_adc.fmt, s->prop_adc.fmt_original, - (unsigned) cnt, (unsigned) dest)); + (unsigned) cnt, dest)); if (cnt > s->dma_adc.dmasize) { cnt = s->dma_adc.dmasize; @@ -2875,7 +2870,7 @@ unsigned copied = 0; CS_DBGOUT(CS_FUNCTION | CS_WAVE_READ, 2, - printk(KERN_INFO "cs4281: cs4281_read()+ %d \n", count)); + printk(KERN_INFO "cs4281: cs4281_read()+ %Zu \n", count)); VALIDATE_STATE(s); if (ppos != &file->f_pos) @@ -2898,7 +2893,7 @@ // while (count > 0) { CS_DBGOUT(CS_WAVE_READ, 8, printk(KERN_INFO - "_read() count>0 count=%d .count=%d .swptr=%d .hwptr=%d \n", + "_read() count>0 count=%Zu .count=%d .swptr=%d .hwptr=%d \n", count, s->dma_adc.count, s->dma_adc.swptr, s->dma_adc.hwptr)); spin_lock_irqsave(&s->lock, flags); @@ -2955,11 +2950,10 @@ // the "cnt" is the number of bytes to read. CS_DBGOUT(CS_WAVE_READ, 2, printk(KERN_INFO - "_read() copy_to cnt=%d count=%d ", cnt, count)); + "_read() copy_to cnt=%d count=%Zu ", cnt, count)); CS_DBGOUT(CS_WAVE_READ, 8, printk(KERN_INFO - " .dmasize=%d .count=%d buffer=0x%.8x ret=%d\n", - s->dma_adc.dmasize, s->dma_adc.count, - (unsigned) buffer, ret)); + " .dmasize=%d .count=%d buffer=%p ret=%Zd\n", + s->dma_adc.dmasize, s->dma_adc.count, buffer, ret)); if (cs_copy_to_user (s, buffer, s->dma_adc.rawbuf + swptr, cnt, &copied)) @@ -2975,7 +2969,7 @@ start_adc(s); } CS_DBGOUT(CS_FUNCTION | CS_WAVE_READ, 2, - printk(KERN_INFO "cs4281: cs4281_read()- %d\n", ret)); + printk(KERN_INFO "cs4281: cs4281_read()- %Zd\n", ret)); return ret; } @@ -2991,7 +2985,7 @@ int cnt; CS_DBGOUT(CS_FUNCTION | CS_WAVE_WRITE, 2, - printk(KERN_INFO "cs4281: cs4281_write()+ count=%d\n", + printk(KERN_INFO "cs4281: cs4281_write()+ count=%Zu\n", count)); VALIDATE_STATE(s); @@ -3047,7 +3041,7 @@ start_dac(s); } CS_DBGOUT(CS_FUNCTION | CS_WAVE_WRITE, 2, - printk(KERN_INFO "cs4281: cs4281_write()- %d\n", ret)); + printk(KERN_INFO "cs4281: cs4281_write()- %Zd\n", ret)); return ret; } @@ -3168,8 +3162,7 @@ int val, mapped, ret; CS_DBGOUT(CS_FUNCTION, 4, printk(KERN_INFO - "cs4281: cs4281_ioctl(): file=0x%.8x cmd=0x%.8x\n", - (unsigned) file, cmd)); + "cs4281: cs4281_ioctl(): file=%p cmd=0x%.8x\n", file, cmd)); #if CSDEBUG cs_printioctl(cmd); #endif @@ -3205,7 +3198,7 @@ "cs4281: cs4281_ioctl(): DSP_RESET\n")); if (file->f_mode & FMODE_WRITE) { stop_dac(s); - synchronize_irq(); + synchronize_irq(s->irq); s->dma_dac.swptr = s->dma_dac.hwptr = s->dma_dac.count = s->dma_dac.total_bytes = s->dma_dac.blocks = s->dma_dac.wakeup = 0; @@ -3213,7 +3206,7 @@ } if (file->f_mode & FMODE_READ) { stop_adc(s); - synchronize_irq(); + synchronize_irq(s->irq); s->dma_adc.swptr = s->dma_adc.hwptr = s->dma_adc.count = s->dma_adc.total_bytes = s->dma_adc.blocks = s->dma_dac.wakeup = 0; @@ -3599,8 +3592,8 @@ (struct cs4281_state *) file->private_data; CS_DBGOUT(CS_FUNCTION | CS_RELEASE, 2, printk(KERN_INFO - "cs4281: cs4281_release(): inode=0x%.8x file=0x%.8x f_mode=%d\n", - (unsigned) inode, (unsigned) file, file->f_mode)); + "cs4281: cs4281_release(): inode=%p file=%p f_mode=%d\n", + inode, file, file->f_mode)); VALIDATE_STATE(s); @@ -3634,8 +3627,8 @@ struct list_head *entry; CS_DBGOUT(CS_FUNCTION | CS_OPEN, 2, printk(KERN_INFO - "cs4281: cs4281_open(): inode=0x%.8x file=0x%.8x f_mode=0x%x\n", - (unsigned) inode, (unsigned) file, file->f_mode)); + "cs4281: cs4281_open(): inode=%p file=%p f_mode=0x%x\n", + inode, file, file->f_mode)); list_for_each(entry, &cs4281_devs) { @@ -4344,10 +4337,8 @@ CS_DBGOUT(CS_INIT, 2, printk(KERN_INFO - "cs4281: probe() BA0=0x%.8x BA1=0x%.8x pBA0=0x%.8x pBA1=0x%.8x \n", - (unsigned) temp1, (unsigned) temp2, - (unsigned) s->pBA0, (unsigned) s->pBA1)); - + "cs4281: probe() BA0=0x%.8x BA1=0x%.8x pBA0=%p pBA1=%p \n", + (unsigned) temp1, (unsigned) temp2, s->pBA0, s->pBA1)); CS_DBGOUT(CS_INIT, 2, printk(KERN_INFO "cs4281: probe() pBA0phys=0x%.8x pBA1phys=0x%.8x\n", @@ -4394,15 +4385,13 @@ if (pmdev) { CS_DBGOUT(CS_INIT | CS_PM, 4, printk(KERN_INFO - "cs4281: probe() pm_register() succeeded (0x%x).\n", - (unsigned)pmdev)); + "cs4281: probe() pm_register() succeeded (%p).\n", pmdev)); pmdev->data = s; } else { CS_DBGOUT(CS_INIT | CS_PM | CS_ERROR, 0, printk(KERN_INFO - "cs4281: probe() pm_register() failed (0x%x).\n", - (unsigned)pmdev)); + "cs4281: probe() pm_register() failed (%p).\n", pmdev)); s->pm.flags |= CS4281_PM_NOT_REGISTERED; } #endif @@ -4452,7 +4441,7 @@ { struct cs4281_state *s = pci_get_drvdata(pci_dev); // stop DMA controller - synchronize_irq(); + synchronize_irq(s->irq); free_irq(s->irq, s); unregister_sound_dsp(s->dev_audio); unregister_sound_mixer(s->dev_mixer); diff -Nru a/sound/oss/cs4281/cs4281pm-24.c b/sound/oss/cs4281/cs4281pm-24.c --- a/sound/oss/cs4281/cs4281pm-24.c Tue Sep 17 23:47:51 2002 +++ b/sound/oss/cs4281/cs4281pm-24.c Tue Sep 17 23:47:51 2002 @@ -38,16 +38,16 @@ #define CS4281_SUSPEND_TBL cs4281_suspend_tbl #define CS4281_RESUME_TBL cs4281_resume_tbl */ -#define CS4281_SUSPEND_TBL cs4281_null -#define CS4281_RESUME_TBL cs4281_null +#define CS4281_SUSPEND_TBL (int (*) (struct pci_dev *, u32)) cs4281_null +#define CS4281_RESUME_TBL (int (*) (struct pci_dev *)) cs4281_null int cs4281_pm_callback(struct pm_dev *dev, pm_request_t rqst, void *data) { struct cs4281_state *state; CS_DBGOUT(CS_PM, 2, printk(KERN_INFO - "cs4281: cs4281_pm_callback dev=0x%x rqst=0x%x state=%d\n", - (unsigned)dev,(unsigned)rqst,(unsigned)data)); + "cs4281: cs4281_pm_callback dev=%p rqst=0x%x state=%p\n", + dev,(unsigned)rqst,data)); state = (struct cs4281_state *) dev->data; if (state) { switch(rqst) { @@ -78,7 +78,7 @@ } #else /* CS4281_PM */ -#define CS4281_SUSPEND_TBL cs4281_null -#define CS4281_RESUME_TBL cs4281_null +#define CS4281_SUSPEND_TBL (int (*) (struct pci_dev *, u32)) cs4281_null +#define CS4281_RESUME_TBL (int (*) (struct pci_dev *)) cs4281_null #endif /* CS4281_PM */