diff -urN 2.2.12/Documentation/Configure.help 2.2.12-bigmem/Documentation/Configure.help --- 2.2.12/Documentation/Configure.help Thu Aug 26 14:20:15 1999 +++ 2.2.12-bigmem/Documentation/Configure.help Mon Sep 13 21:37:30 1999 @@ -168,6 +168,11 @@ on the Alpha. The only time you would ever not say Y is to say M in order to debug the code. Say Y unless you know what you are doing. +Big memory support +CONFIG_BIGMEM + This option is required if you want to utilize physical memory which + is not covered by the kernel virtual address space (> 1GB). + Normal PC floppy disk support CONFIG_BLK_DEV_FD If you want to use the floppy disk drive(s) of your PC under Linux, diff -urN 2.2.12/arch/i386/config.in 2.2.12-bigmem/arch/i386/config.in --- 2.2.12/arch/i386/config.in Thu Aug 12 02:32:28 1999 +++ 2.2.12-bigmem/arch/i386/config.in Mon Sep 13 21:37:30 1999 @@ -54,6 +54,7 @@ mainmenu_option next_comment comment 'General setup' +bool 'BIGMEM support' CONFIG_BIGMEM bool 'Networking support' CONFIG_NET bool 'PCI support' CONFIG_PCI if [ "$CONFIG_PCI" = "y" ]; then diff -urN 2.2.12/arch/i386/kernel/ptrace.c 2.2.12-bigmem/arch/i386/kernel/ptrace.c --- 2.2.12/arch/i386/kernel/ptrace.c Tue Jul 13 00:32:52 1999 +++ 2.2.12-bigmem/arch/i386/kernel/ptrace.c Mon Sep 13 21:37:30 1999 @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -80,6 +81,7 @@ pmd_t * pgmiddle; pte_t * pgtable; unsigned long page; + unsigned long retval; repeat: pgdir = pgd_offset(vma->vm_mm, addr); @@ -112,7 +114,10 @@ if (MAP_NR(page) >= max_mapnr) return 0; page += addr & ~PAGE_MASK; - return *(unsigned long *) page; + page = kmap(page, KM_READ); + retval = *(unsigned long *) page; + kunmap(page, KM_READ); + return retval; } /* @@ -165,7 +170,13 @@ } /* this is a hack for non-kernel-mapped video buffers and similar */ if (MAP_NR(page) < max_mapnr) - *(unsigned long *) (page + (addr & ~PAGE_MASK)) = data; + { + unsigned long vaddr; + + vaddr = kmap(page, KM_WRITE); + *(unsigned long *) (vaddr + (addr & ~PAGE_MASK)) = data; + kunmap(vaddr, KM_WRITE); + } /* we're bypassing pagetables, so we have to set the dirty bit ourselves */ /* this should also re-instate whatever read-only mode there was before */ set_pte(pgtable, pte_mkdirty(mk_pte(page, vma->vm_page_prot))); diff -urN 2.2.12/arch/i386/kernel/setup.c 2.2.12-bigmem/arch/i386/kernel/setup.c --- 2.2.12/arch/i386/kernel/setup.c Thu Aug 26 14:20:16 1999 +++ 2.2.12-bigmem/arch/i386/kernel/setup.c Mon Sep 13 21:37:30 1999 @@ -17,6 +17,8 @@ * * IDT Winchip tweaks, misc clean ups. * Dave Jones , August 1999 + * + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 */ /* @@ -44,6 +46,7 @@ #ifdef CONFIG_BLK_DEV_RAM #include #endif +#include #include #include #include @@ -339,12 +342,31 @@ #define VMALLOC_RESERVE (64 << 20) /* 64MB for vmalloc */ #define MAXMEM ((unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE)) +#ifdef CONFIG_BIGMEM + bigmem_start = bigmem_end = memory_end; +#endif if (memory_end > MAXMEM) { +#ifdef CONFIG_BIGMEM +#define MAXBIGMEM ((unsigned long)(~(VMALLOC_RESERVE-1))) + bigmem_start = MAXMEM; + bigmem_end = (memory_end < MAXBIGMEM) ? memory_end : MAXBIGMEM; +#endif memory_end = MAXMEM; +#ifdef CONFIG_BIGMEM + printk(KERN_NOTICE "%ldMB BIGMEM available.\n", + (bigmem_end-bigmem_start)>>20); +#else printk(KERN_WARNING "Warning only %ldMB will be used.\n", MAXMEM>>20); +#endif } +#if defined(CONFIG_BIGMEM) && defined(BIGMEM_DEBUG) + else { + memory_end -= memory_end/4; + bigmem_start = memory_end; + } +#endif memory_end += PAGE_OFFSET; *memory_start_p = memory_start; diff -urN 2.2.12/arch/i386/mm/Makefile 2.2.12-bigmem/arch/i386/mm/Makefile --- 2.2.12/arch/i386/mm/Makefile Mon Jan 18 02:28:56 1999 +++ 2.2.12-bigmem/arch/i386/mm/Makefile Mon Sep 13 21:37:30 1999 @@ -10,4 +10,8 @@ O_TARGET := mm.o O_OBJS := init.o fault.o ioremap.o extable.o +ifeq ($(CONFIG_BIGMEM),y) +O_OBJS += bigmem.o +endif + include $(TOPDIR)/Rules.make diff -urN 2.2.12/arch/i386/mm/bigmem.c 2.2.12-bigmem/arch/i386/mm/bigmem.c --- 2.2.12/arch/i386/mm/bigmem.c Thu Jan 1 01:00:00 1970 +++ 2.2.12-bigmem/arch/i386/mm/bigmem.c Mon Sep 13 21:37:30 1999 @@ -0,0 +1,33 @@ +/* + * BIGMEM IA32 code and variables. + * + * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de + * Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de + */ + +#include +#include + +unsigned long bigmem_start, bigmem_end; + +/* NOTE: fixmap_init alloc all the fixmap pagetables contigous on the + physical space so we can cache the place of the first one and move + around without checking the pgd every time. */ +pte_t *kmap_pte; +pgprot_t kmap_prot; + +#define kmap_get_fixmap_pte(vaddr) \ + pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)) + +void __init kmap_init(void) +{ + unsigned long kmap_vstart; + + /* cache the first kmap pte */ + kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); + kmap_pte = kmap_get_fixmap_pte(kmap_vstart); + + kmap_prot = PAGE_KERNEL; + if (boot_cpu_data.x86_capability & X86_FEATURE_PGE) + pgprot_val(kmap_prot) |= _PAGE_GLOBAL; +} diff -urN 2.2.12/arch/i386/mm/init.c 2.2.12-bigmem/arch/i386/mm/init.c --- 2.2.12/arch/i386/mm/init.c Thu Aug 12 02:32:28 1999 +++ 2.2.12-bigmem/arch/i386/mm/init.c Mon Sep 13 21:37:30 1999 @@ -2,6 +2,8 @@ * linux/arch/i386/mm/init.c * * Copyright (C) 1995 Linus Torvalds + * + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 */ #include @@ -20,6 +22,7 @@ #ifdef CONFIG_BLK_DEV_INITRD #include #endif +#include #include #include @@ -148,6 +151,7 @@ { int i,free = 0,total = 0,reserved = 0; int shared = 0, cached = 0; + int bigmem = 0; printk("Mem-info:\n"); show_free_areas(); @@ -155,6 +159,8 @@ i = max_mapnr; while (i-- > 0) { total++; + if (PageBIGMEM(mem_map+i)) + bigmem++; if (PageReserved(mem_map+i)) reserved++; else if (PageSwapCache(mem_map+i)) @@ -165,6 +171,7 @@ shared += atomic_read(&mem_map[i].count) - 1; } printk("%d pages of RAM\n",total); + printk("%d pages of BIGMEM\n",bigmem); printk("%d reserved pages\n",reserved); printk("%d pages shared\n",shared); printk("%d pages swap cached\n",cached); @@ -344,7 +351,12 @@ #endif local_flush_tlb(); +#ifndef CONFIG_BIGMEM return free_area_init(start_mem, end_mem); +#else + kmap_init(); /* run after fixmap_init */ + return free_area_init(start_mem, bigmem_end + PAGE_OFFSET); +#endif } /* @@ -391,11 +403,22 @@ int reservedpages = 0; int datapages = 0; int initpages = 0; + int bigpages = 0; unsigned long tmp; end_mem &= PAGE_MASK; +#ifdef CONFIG_BIGMEM + bigmem_start = PAGE_ALIGN(bigmem_start); + bigmem_end &= PAGE_MASK; +#endif high_memory = (void *) end_mem; +#ifndef CONFIG_BIGMEM max_mapnr = num_physpages = MAP_NR(end_mem); +#else + max_mapnr = num_physpages = PHYSMAP_NR(bigmem_end); + /* cache the bigmem_mapnr */ + bigmem_mapnr = PHYSMAP_NR(bigmem_start); +#endif /* clear the zero-page */ memset(empty_zero_page, 0, PAGE_SIZE); @@ -455,13 +478,23 @@ #endif free_page(tmp); } - printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init)\n", +#ifdef CONFIG_BIGMEM + for (tmp = bigmem_start; tmp < bigmem_end; tmp += PAGE_SIZE) { + clear_bit(PG_reserved, &mem_map[PHYSMAP_NR(tmp)].flags); + set_bit(PG_BIGMEM, &mem_map[PHYSMAP_NR(tmp)].flags); + atomic_set(&mem_map[PHYSMAP_NR(tmp)].count, 1); + free_page(tmp + PAGE_OFFSET); + bigpages++; + } +#endif + printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %dk bigmem)\n", (unsigned long) nr_free_pages << (PAGE_SHIFT-10), max_mapnr << (PAGE_SHIFT-10), codepages << (PAGE_SHIFT-10), reservedpages << (PAGE_SHIFT-10), datapages << (PAGE_SHIFT-10), - initpages << (PAGE_SHIFT-10)); + initpages << (PAGE_SHIFT-10), + bigpages << (PAGE_SHIFT-10)); if (boot_cpu_data.wp_works_ok < 0) test_wp_bit(); @@ -489,15 +522,20 @@ val->sharedram = 0; val->freeram = nr_free_pages << PAGE_SHIFT; val->bufferram = buffermem; + val->totalbig = 0; + val->freebig = nr_free_bigpages << PAGE_SHIFT; while (i-- > 0) { if (PageReserved(mem_map+i)) continue; val->totalram++; + if (PageBIGMEM(mem_map+i)) + val->totalbig++; if (!atomic_read(&mem_map[i].count)) continue; val->sharedram += atomic_read(&mem_map[i].count) - 1; } val->totalram <<= PAGE_SHIFT; val->sharedram <<= PAGE_SHIFT; + val->totalbig <<= PAGE_SHIFT; return; } diff -urN 2.2.12/fs/proc/array.c 2.2.12-bigmem/fs/proc/array.c --- 2.2.12/fs/proc/array.c Thu Aug 26 14:20:19 1999 +++ 2.2.12-bigmem/fs/proc/array.c Mon Sep 13 21:38:43 1999 @@ -42,6 +42,8 @@ * Alan Cox : security fixes. * * + * Gerhard Wichert : added BIGMEM support + * Siemens AG */ #include @@ -374,6 +376,8 @@ "MemShared: %8lu kB\n" "Buffers: %8lu kB\n" "Cached: %8lu kB\n" + "BigTotal: %8lu kB\n" + "BigFree: %8lu kB\n" "SwapTotal: %8lu kB\n" "SwapFree: %8lu kB\n", i.totalram >> 10, @@ -381,6 +385,8 @@ i.sharedram >> 10, i.bufferram >> 10, page_cache_size << (PAGE_SHIFT - 10), + i.totalbig >> 10, + i.freebig >> 10, i.totalswap >> 10, i.freeswap >> 10); } @@ -436,6 +442,8 @@ return pte_page(pte) + (ptr & ~PAGE_MASK); } +#include + static int get_array(struct task_struct *p, unsigned long start, unsigned long end, char * buffer) { unsigned long addr; @@ -448,6 +456,7 @@ addr = get_phys_addr(p, start); if (!addr) return result; + addr = kmap(addr, KM_READ); do { c = *(char *) addr; if (!c) @@ -455,12 +464,19 @@ if (size < PAGE_SIZE) buffer[size++] = c; else + { + kunmap(addr, KM_READ); return result; + } addr++; start++; if (!c && start >= end) + { + kunmap(addr, KM_READ); return result; + } } while (addr & ~PAGE_MASK); + kunmap(addr-1, KM_READ); } return result; } diff -urN 2.2.12/fs/proc/mem.c 2.2.12-bigmem/fs/proc/mem.c --- 2.2.12/fs/proc/mem.c Mon Jan 18 02:26:49 1999 +++ 2.2.12-bigmem/fs/proc/mem.c Mon Sep 13 21:37:30 1999 @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -120,7 +121,9 @@ i = PAGE_SIZE-(addr & ~PAGE_MASK); if (i > scount) i = scount; + page = (char *) kmap((unsigned long) page, KM_READ); copy_to_user(tmp, page, i); + kunmap((unsigned long) page, KM_READ); addr += i; tmp += i; scount -= i; @@ -177,7 +180,9 @@ i = PAGE_SIZE-(addr & ~PAGE_MASK); if (i > count) i = count; + page = (unsigned long) kmap((unsigned long) page, KM_WRITE); copy_from_user(page, tmp, i); + kunmap((unsigned long) page, KM_WRITE); addr += i; tmp += i; count -= i; diff -urN 2.2.12/include/asm-i386/bigmem.h 2.2.12-bigmem/include/asm-i386/bigmem.h --- 2.2.12/include/asm-i386/bigmem.h Thu Jan 1 01:00:00 1970 +++ 2.2.12-bigmem/include/asm-i386/bigmem.h Mon Sep 13 21:37:30 1999 @@ -0,0 +1,69 @@ +/* + * bigmem.h: virtual kernel memory mappings for big memory + * + * Used in CONFIG_BIGMEM systems for memory pages which are not + * addressable by direct kernel virtual adresses. + * + * Copyright (C) 1999 Gerhard Wichert, Siemens AG + * Gerhard.Wichert@pdb.siemens.de + */ + +#ifndef _ASM_BIGMEM_H +#define _ASM_BIGMEM_H + +#include + +#undef BIGMEM_DEBUG /* undef for production */ + +/* declarations for bigmem.c */ +extern unsigned long bigmem_start, bigmem_end; +extern int nr_free_bigpages; + +extern pte_t *kmap_pte; +extern pgprot_t kmap_prot; + +extern void kmap_init(void) __init; + +/* kmap helper functions necessary to access the bigmem pages in kernel */ +#include +#include + +extern inline unsigned long kmap(unsigned long kaddr, enum km_type type) +{ + if (__pa(kaddr) < bigmem_start) + return kaddr; + { + enum fixed_addresses idx = type+KM_TYPE_NR*smp_processor_id(); + unsigned long vaddr = __fix_to_virt(FIX_KMAP_BEGIN+idx); + +#ifdef BIGMEM_DEBUG + if (!pte_none(*(kmap_pte-idx))) + { + __label__ here; + here: + printk(KERN_ERR "not null pte on CPU %d from %p\n", + smp_processor_id(), &&here); + } +#endif + set_pte(kmap_pte-idx, mk_pte(kaddr & PAGE_MASK, kmap_prot)); + __flush_tlb_one(vaddr); + + return vaddr | (kaddr & ~PAGE_MASK); + } +} + +extern inline void kunmap(unsigned long vaddr, enum km_type type) +{ +#ifdef BIGMEM_DEBUG + enum fixed_addresses idx = type+KM_TYPE_NR*smp_processor_id(); + if ((vaddr & PAGE_MASK) == __fix_to_virt(FIX_KMAP_BEGIN+idx)) + { + /* force other mappings to Oops if they'll try to access + this pte without first remap it */ + pte_clear(kmap_pte-idx); + __flush_tlb_one(vaddr); + } +#endif +} + +#endif /* _ASM_BIGMEM_H */ diff -urN 2.2.12/include/asm-i386/fixmap.h 2.2.12-bigmem/include/asm-i386/fixmap.h --- 2.2.12/include/asm-i386/fixmap.h Thu Sep 9 01:45:06 1999 +++ 2.2.12-bigmem/include/asm-i386/fixmap.h Mon Sep 13 21:37:30 1999 @@ -6,6 +6,8 @@ * for more details. * * Copyright (C) 1998 Ingo Molnar + * + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 */ #ifndef _ASM_FIXMAP_H @@ -14,6 +16,10 @@ #include #include #include +#ifdef CONFIG_BIGMEM +#include +#include +#endif /* * Here we define all the compile-time 'special' virtual @@ -52,6 +58,10 @@ FIX_CO_APIC, /* Cobalt APIC Redirection Table */ FIX_LI_PCIA, /* Lithium PCI Bridge A */ FIX_LI_PCIB, /* Lithium PCI Bridge B */ +#endif +#ifdef CONFIG_BIGMEM + FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ + FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, #endif __end_of_fixed_addresses }; diff -urN 2.2.12/include/asm-i386/io.h 2.2.12-bigmem/include/asm-i386/io.h --- 2.2.12/include/asm-i386/io.h Thu Sep 9 01:45:07 1999 +++ 2.2.12-bigmem/include/asm-i386/io.h Mon Sep 13 21:37:30 1999 @@ -27,6 +27,7 @@ /* * Bit simplified and optimized by Jan Hubicka + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999. */ #ifdef SLOW_IO_BY_JUMPING @@ -109,12 +110,20 @@ */ extern inline unsigned long virt_to_phys(volatile void * address) { +#ifdef CONFIG_BIGMEM + return __pa(address); +#else return __io_phys(address); +#endif } extern inline void * phys_to_virt(unsigned long address) { +#ifdef CONFIG_BIGMEM + return __va(address); +#else return __io_virt(address); +#endif } extern void * __ioremap(unsigned long offset, unsigned long size, unsigned long flags); diff -urN 2.2.12/include/asm-i386/kmap_types.h 2.2.12-bigmem/include/asm-i386/kmap_types.h --- 2.2.12/include/asm-i386/kmap_types.h Thu Jan 1 01:00:00 1970 +++ 2.2.12-bigmem/include/asm-i386/kmap_types.h Mon Sep 13 21:37:30 1999 @@ -0,0 +1,10 @@ +#ifndef _ASM_KMAP_TYPES_H +#define _ASM_KMAP_TYPES_H + +enum km_type { + KM_READ, + KM_WRITE, + KM_TYPE_NR, +}; + +#endif diff -urN 2.2.12/include/asm-i386/page.h 2.2.12-bigmem/include/asm-i386/page.h --- 2.2.12/include/asm-i386/page.h Thu Sep 9 01:45:06 1999 +++ 2.2.12-bigmem/include/asm-i386/page.h Mon Sep 13 21:37:30 1999 @@ -88,6 +88,7 @@ #define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) #define MAP_NR(addr) (__pa(addr) >> PAGE_SHIFT) +#define PHYSMAP_NR(addr) ((unsigned long)(addr) >> PAGE_SHIFT) #endif /* __KERNEL__ */ diff -urN 2.2.12/include/linux/bigmem.h 2.2.12-bigmem/include/linux/bigmem.h --- 2.2.12/include/linux/bigmem.h Thu Jan 1 01:00:00 1970 +++ 2.2.12-bigmem/include/linux/bigmem.h Mon Sep 13 21:37:30 1999 @@ -0,0 +1,50 @@ +#ifndef _LINUX_BIGMEM_H +#define _LINUX_BIGMEM_H + +#include + +#ifdef CONFIG_BIGMEM + +#include + +/* declarations for linux/mm/bigmem.c */ +extern unsigned long bigmem_mapnr; +extern int nr_free_bigpages; + +extern struct page * prepare_bigmem_swapout(struct page *); +extern struct page * replace_with_bigmem(struct page *); +extern unsigned long prepare_bigmem_shm_swapin(unsigned long); + +#else /* CONFIG_BIGMEM */ + +#define prepare_bigmem_swapout(page) page +#define replace_with_bigmem(page) page +#define prepare_bigmem_shm_swapin(page) page +#define kmap(kaddr, type) kaddr +#define kunmap(vaddr, type) do { } while (0) +#define nr_free_bigpages 0 + +#endif /* CONFIG_BIGMEM */ + +/* when CONFIG_BIGMEM is not set these will be plain clear/copy_page */ +extern inline void clear_bigpage(unsigned long kaddr) +{ + unsigned long vaddr; + + vaddr = kmap(kaddr, KM_WRITE); + clear_page(vaddr); + kunmap(vaddr, KM_WRITE); +} + +extern inline void copy_bigpage(unsigned long to, unsigned long from) +{ + unsigned long vfrom, vto; + + vfrom = kmap(from, KM_READ); + vto = kmap(to, KM_WRITE); + copy_page(vto, vfrom); + kunmap(vfrom, KM_READ); + kunmap(vto, KM_WRITE); +} + +#endif /* _LINUX_BIGMEM_H */ diff -urN 2.2.12/include/linux/kernel.h 2.2.12-bigmem/include/linux/kernel.h --- 2.2.12/include/linux/kernel.h Thu Aug 12 02:32:33 1999 +++ 2.2.12-bigmem/include/linux/kernel.h Mon Sep 13 21:38:29 1999 @@ -90,7 +90,9 @@ unsigned long totalswap; /* Total swap space size */ unsigned long freeswap; /* swap space still available */ unsigned short procs; /* Number of current processes */ - char _f[22]; /* Pads structure to 64 bytes */ + unsigned long totalbig; /* Total big memory size */ + unsigned long freebig; /* Available big memory size */ + char _f[20-2*sizeof(long)]; /* Padding: libc5 uses this.. */ }; #endif diff -urN 2.2.12/include/linux/mm.h 2.2.12-bigmem/include/linux/mm.h --- 2.2.12/include/linux/mm.h Wed Sep 8 19:13:57 1999 +++ 2.2.12-bigmem/include/linux/mm.h Mon Sep 13 21:37:30 1999 @@ -144,6 +144,7 @@ #define PG_Slab 9 #define PG_swap_cache 10 #define PG_skip 11 +#define PG_BIGMEM 12 #define PG_reserved 31 /* Make it prettier to test the above... */ @@ -175,6 +176,11 @@ (test_and_clear_bit(PG_dirty, &(page)->flags)) #define PageTestandClearSwapCache(page) \ (test_and_clear_bit(PG_swap_cache, &(page)->flags)) +#ifdef CONFIG_BIGMEM +#define PageBIGMEM(page) (test_bit(PG_BIGMEM, &(page)->flags)) +#else +#define PageBIGMEM(page) 0 /* needed to optimize away at compile time */ +#endif /* * Various page->flags bits: @@ -332,11 +338,17 @@ #define __GFP_HIGH 0x08 #define __GFP_IO 0x10 #define __GFP_SWAP 0x20 +#ifdef CONFIG_BIGMEM +#define __GFP_BIGMEM 0x40 +#else +#define __GFP_BIGMEM 0x0 /* noop */ +#endif #define __GFP_DMA 0x80 #define GFP_BUFFER (__GFP_LOW | __GFP_WAIT) #define GFP_ATOMIC (__GFP_HIGH) +#define GFP_BIGUSER (__GFP_LOW | __GFP_WAIT | __GFP_IO | __GFP_BIGMEM) #define GFP_USER (__GFP_LOW | __GFP_WAIT | __GFP_IO) #define GFP_KERNEL (__GFP_MED | __GFP_WAIT | __GFP_IO) #define GFP_NFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO) @@ -346,6 +358,11 @@ platforms, used as appropriate on others */ #define GFP_DMA __GFP_DMA + +/* Flag - indicates that the buffer can be taken from big memory which is not + directly addressable by the kernel */ + +#define GFP_BIGMEM __GFP_BIGMEM /* vma is the first one with address < vma->vm_end, * and even address < vma->vm_start. Have to extend vma. */ diff -urN 2.2.12/ipc/shm.c 2.2.12-bigmem/ipc/shm.c --- 2.2.12/ipc/shm.c Thu Aug 26 14:20:20 1999 +++ 2.2.12-bigmem/ipc/shm.c Mon Sep 13 21:37:31 1999 @@ -4,6 +4,7 @@ * Many improvements/fixes by Bruno Haible. * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994. * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli. + * BIGMEM support, Andrea Arcangeli */ #include @@ -12,6 +13,8 @@ #include #include #include +#include +#include #include #include @@ -635,23 +638,32 @@ pte = __pte(shp->shm_pages[idx]); if (!pte_present(pte)) { - unsigned long page = get_free_page(GFP_USER); + unsigned long page = __get_free_page(GFP_BIGUSER); if (!page) { + oom: oom(current); return 0; } + clear_bigpage(page); pte = __pte(shp->shm_pages[idx]); if (pte_present(pte)) { free_page (page); /* doesn't sleep */ goto done; } if (!pte_none(pte)) { + struct page * page_map; + + page = prepare_bigmem_shm_swapin(page); + if (!page) + goto oom; rw_swap_page_nocache(READ, pte_val(pte), (char *)page); pte = __pte(shp->shm_pages[idx]); if (pte_present(pte)) { free_page (page); /* doesn't sleep */ goto done; } + page_map = replace_with_bigmem(&mem_map[MAP_NR(page)]); + page = page_address(page_map); swap_free(pte_val(pte)); shm_swp--; } @@ -681,6 +693,7 @@ unsigned long id, idx; int loop = 0; int counter; + struct page * page_map; counter = shm_rss >> prio; if (!counter || !(swap_nr = get_swap_page())) @@ -709,7 +722,10 @@ page = __pte(shp->shm_pages[idx]); if (!pte_present(page)) goto check_table; - if ((gfp_mask & __GFP_DMA) && !PageDMA(&mem_map[MAP_NR(pte_page(page))])) + page_map = &mem_map[MAP_NR(pte_page(page))]; + if ((gfp_mask & __GFP_DMA) && !PageDMA(page_map)) + goto check_table; + if (!(gfp_mask & __GFP_BIGMEM) && PageBIGMEM(page_map)) goto check_table; swap_attempts++; @@ -718,11 +734,13 @@ swap_free (swap_nr); return 0; } - if (atomic_read(&mem_map[MAP_NR(pte_page(page))].count) != 1) + if (atomic_read(&page_map->count) != 1) + goto check_table; + if (!(page_map = prepare_bigmem_swapout(page_map))) goto check_table; shp->shm_pages[idx] = swap_nr; - rw_swap_page_nocache (WRITE, swap_nr, (char *) pte_page(page)); - free_page(pte_page(page)); + rw_swap_page_nocache (WRITE, swap_nr, (char *) page_address(page_map)); + __free_page(page_map); swap_successes++; shm_swp++; shm_rss--; diff -urN 2.2.12/mm/Makefile 2.2.12-bigmem/mm/Makefile --- 2.2.12/mm/Makefile Mon Jan 18 02:27:01 1999 +++ 2.2.12-bigmem/mm/Makefile Mon Sep 13 21:37:31 1999 @@ -12,4 +12,8 @@ vmalloc.o slab.o \ swap.o vmscan.o page_io.o page_alloc.o swap_state.o swapfile.o +ifeq ($(CONFIG_BIGMEM),y) +O_OBJS += bigmem.o +endif + include $(TOPDIR)/Rules.make diff -urN 2.2.12/mm/bigmem.c 2.2.12-bigmem/mm/bigmem.c --- 2.2.12/mm/bigmem.c Thu Jan 1 01:00:00 1970 +++ 2.2.12-bigmem/mm/bigmem.c Mon Sep 13 21:37:31 1999 @@ -0,0 +1,87 @@ +/* + * BIGMEM common code and variables. + * + * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de + * Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de + */ + +#include +#include +#include + +unsigned long bigmem_mapnr; +int nr_free_bigpages = 0; + +struct page * prepare_bigmem_swapout(struct page * page) +{ + /* if this is a bigmem page so it can't be swapped out directly + otherwise the b_data buffer addresses will break + the lowlevel device drivers. */ + if (PageBIGMEM(page)) + { + unsigned long regular_page; + unsigned long vaddr; + + regular_page = __get_free_page(GFP_ATOMIC); + if (!regular_page) + return NULL; + + vaddr = kmap(page_address(page), KM_READ); + copy_page(regular_page, vaddr); + kunmap(vaddr, KM_READ); + + /* ok, we can just forget about our bigmem page since + we stored its data into the new regular_page. */ + __free_page(page); + + page = MAP_NR(regular_page) + mem_map; + } + return page; +} + +struct page * replace_with_bigmem(struct page * page) +{ + if (!PageBIGMEM(page) && nr_free_bigpages) + { + unsigned long kaddr; + + kaddr = __get_free_page(GFP_ATOMIC|GFP_BIGMEM); + if (kaddr) + { + struct page * bigmem_page; + + bigmem_page = MAP_NR(kaddr) + mem_map; + if (PageBIGMEM(bigmem_page)) + { + unsigned long vaddr; + + vaddr = kmap(kaddr, KM_WRITE); + copy_page(vaddr, page_address(page)); + kunmap(vaddr, KM_WRITE); + + /* Preserve the caching of the swap_entry. */ + bigmem_page->offset = page->offset; + + /* We can just forget the old page since + we stored its data into the new + bigmem_page. */ + __free_page(page); + + page = bigmem_page; + } + } + } + return page; +} + +unsigned long prepare_bigmem_shm_swapin(unsigned long page) +{ + if (!PageBIGMEM(&mem_map[MAP_NR(page)])) + return page; + + free_page(page); + + /* no need to clear the page since it will be rewrited by the + swapin. */ + return __get_free_page(GFP_ATOMIC); +} diff -urN 2.2.12/mm/filemap.c 2.2.12-bigmem/mm/filemap.c --- 2.2.12/mm/filemap.c Tue Jul 13 00:33:10 1999 +++ 2.2.12-bigmem/mm/filemap.c Mon Sep 13 21:37:31 1999 @@ -174,6 +174,9 @@ if (atomic_read(&page->count) != 1) continue; + if (!(gfp_mask & __GFP_BIGMEM) && PageBIGMEM(page)) + continue; + count--; /* diff -urN 2.2.12/mm/memory.c 2.2.12-bigmem/mm/memory.c --- 2.2.12/mm/memory.c Thu Aug 12 02:32:33 1999 +++ 2.2.12-bigmem/mm/memory.c Mon Sep 13 21:37:31 1999 @@ -31,12 +31,16 @@ /* * 05.04.94 - Multi-page memory management added for v1.1. * Idea by Alex Bligh (alex@cconcepts.co.uk) + * + * 16.07.99 - Support of BIGMEM added by Gerhard Wichert, Siemens AG + * (Gerhard.Wichert@pdb.siemens.de) */ #include #include #include #include +#include #include #include @@ -53,10 +57,10 @@ static inline void copy_cow_page(unsigned long from, unsigned long to) { if (from == ZERO_PAGE(to)) { - clear_page(to); + clear_bigpage(to); return; } - copy_page(to, from); + copy_bigpage(to, from); } mem_map_t * mem_map = NULL; @@ -623,7 +627,7 @@ struct page * page_map; pte = *page_table; - new_page = __get_free_page(GFP_USER); + new_page = __get_free_page(GFP_BIGUSER); /* Did swap_out() unmapped the protected page while we slept? */ if (pte_val(*page_table) != pte_val(pte)) goto end_wp_page; @@ -817,10 +821,10 @@ { pte_t entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot)); if (write_access) { - unsigned long page = __get_free_page(GFP_USER); + unsigned long page = __get_free_page(GFP_BIGUSER); if (!page) return 0; - clear_page(page); + clear_bigpage(page); entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); vma->vm_mm->rss++; tsk->min_flt++; diff -urN 2.2.12/mm/page_alloc.c 2.2.12-bigmem/mm/page_alloc.c --- 2.2.12/mm/page_alloc.c Thu Aug 12 02:32:33 1999 +++ 2.2.12-bigmem/mm/page_alloc.c Mon Sep 13 21:37:31 1999 @@ -3,6 +3,7 @@ * * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds * Swap reorganised 29.12.95, Stephen Tweedie + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 */ #include @@ -13,6 +14,7 @@ #include #include #include +#include /* export bigmem vars */ #include #include /* for copy_to/from_user */ @@ -45,7 +47,12 @@ #define memory_head(x) ((struct page *)(x)) +#ifdef CONFIG_BIGMEM +#define BIGMEM_LISTS_OFFSET NR_MEM_LISTS +static struct free_area_struct free_area[NR_MEM_LISTS*2]; +#else static struct free_area_struct free_area[NR_MEM_LISTS]; +#endif static inline void init_mem_queue(struct free_area_struct * head) { @@ -101,6 +108,12 @@ #define list(x) (mem_map+(x)) +#ifdef CONFIG_BIGMEM + if (map_nr >= bigmem_mapnr) { + area += BIGMEM_LISTS_OFFSET; + nr_free_bigpages -= mask; + } +#endif map_nr &= mask; nr_free_pages -= mask; while (mask + (1 << (NR_MEM_LISTS-1))) { @@ -215,6 +228,26 @@ new_order++; area++; \ } while (new_order < NR_MEM_LISTS); \ } while (0) +#ifdef CONFIG_BIGMEM +#define RMQUEUEBIG(order) \ +do { struct free_area_struct * area = free_area+order+BIGMEM_LISTS_OFFSET; \ + unsigned long new_order = order; \ + do { struct page *prev = memory_head(area), *ret = prev->next; \ + if (memory_head(area) != ret) { \ + unsigned long map_nr; \ + (prev->next = ret->next)->prev = prev; \ + map_nr = ret - mem_map; \ + MARK_USED(map_nr, new_order, area); \ + nr_free_pages -= 1 << order; \ + nr_free_bigpages -= 1 << order; \ + EXPAND(ret, map_nr, order, new_order, area); \ + spin_unlock_irqrestore(&page_alloc_lock, flags); \ + return ADDRESS(map_nr); \ + } \ + new_order++; area++; \ + } while (new_order < NR_MEM_LISTS); \ +} while (0) +#endif #define EXPAND(map,index,low,high,area) \ do { unsigned long size = 1 << high; \ @@ -256,6 +289,7 @@ if (!(current->flags & PF_MEMALLOC)) { int freed; +#ifndef CONFIG_BIGMEM if (nr_free_pages > freepages.min) { if (!low_on_memory) goto ok_to_allocate; @@ -266,6 +300,32 @@ } low_on_memory = 1; +#else + static int low_on_bigmemory = 0; + + if (gfp_mask & __GFP_BIGMEM) + { + if (nr_free_pages > freepages.min) { + if (!low_on_bigmemory) + goto ok_to_allocate; + if (nr_free_pages >= freepages.high) { + low_on_bigmemory = 0; + goto ok_to_allocate; + } + } + low_on_bigmemory = 1; + } else { + if (nr_free_pages-nr_free_bigpages > freepages.min) { + if (!low_on_memory) + goto ok_to_allocate; + if (nr_free_pages-nr_free_bigpages >= freepages.high) { + low_on_memory = 0; + goto ok_to_allocate; + } + } + low_on_memory = 1; + } +#endif current->flags |= PF_MEMALLOC; freed = try_to_free_pages(gfp_mask); current->flags &= ~PF_MEMALLOC; @@ -278,6 +338,10 @@ if (gfp_mask & __GFP_DMA) RMQUEUE_DMA(order); else { +#ifdef CONFIG_BIGMEM + if (gfp_mask & __GFP_BIGMEM) + RMQUEUEBIG(order); +#endif RMQUEUE_NODMA(order); RMQUEUE_ANY(order); } @@ -307,7 +371,9 @@ unsigned long order, flags; unsigned long total = 0; - printk("Free pages: %6dkB\n ( ",nr_free_pages<<(PAGE_SHIFT-10)); + printk("Free pages: %6dkB (%6dkB BigMem)\n ( ", + nr_free_pages<<(PAGE_SHIFT-10), + nr_free_bigpages<<(PAGE_SHIFT-10)); printk("Free: %d (%d %d %d)\n", nr_free_pages, freepages.min, @@ -320,6 +386,13 @@ for (tmp = free_area[order].next ; tmp != memory_head(free_area+order) ; tmp = tmp->next) { nr ++; } +#ifdef CONFIG_BIGMEM + for (tmp = free_area[BIGMEM_LISTS_OFFSET+order].next; + tmp != memory_head(free_area+BIGMEM_LISTS_OFFSET+order); + tmp = tmp->next) { + nr ++; + } +#endif total += nr * ((PAGE_SIZE>>10) << order); printk("%lu*%lukB ", nr, (unsigned long)((PAGE_SIZE>>10) << order)); } @@ -372,6 +445,9 @@ for (i = 0 ; i < NR_MEM_LISTS ; i++) { unsigned long bitmap_size; init_mem_queue(free_area+i); +#ifdef CONFIG_BIGMEM + init_mem_queue(free_area+BIGMEM_LISTS_OFFSET+i); +#endif mask += mask; end_mem = (end_mem + ~mask) & mask; bitmap_size = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT + i); @@ -380,6 +456,11 @@ free_area[i].map = (unsigned int *) start_mem; memset((void *) start_mem, 0, bitmap_size); start_mem += bitmap_size; +#ifdef CONFIG_BIGMEM + free_area[BIGMEM_LISTS_OFFSET+i].map = (unsigned int *) start_mem; + memset((void *) start_mem, 0, bitmap_size); + start_mem += bitmap_size; +#endif } return start_mem; } @@ -469,6 +550,8 @@ * this process. */ delete_from_swap_cache(page_map); + page_map = replace_with_bigmem(page_map); + page = page_address(page_map); set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)))); return; } diff -urN 2.2.12/mm/vmalloc.c 2.2.12-bigmem/mm/vmalloc.c --- 2.2.12/mm/vmalloc.c Tue Jul 13 00:33:04 1999 +++ 2.2.12-bigmem/mm/vmalloc.c Mon Sep 13 21:37:31 1999 @@ -2,6 +2,7 @@ * linux/mm/vmalloc.c * * Copyright (C) 1993 Linus Torvalds + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 */ #include @@ -94,7 +95,7 @@ unsigned long page; if (!pte_none(*pte)) printk("alloc_area_pte: page already exists\n"); - page = __get_free_page(GFP_KERNEL); + page = __get_free_page(GFP_KERNEL|GFP_BIGMEM); if (!page) return -ENOMEM; set_pte(pte, mk_pte(page, PAGE_KERNEL)); diff -urN 2.2.12/mm/vmscan.c 2.2.12-bigmem/mm/vmscan.c --- 2.2.12/mm/vmscan.c Tue Jul 13 00:33:04 1999 +++ 2.2.12-bigmem/mm/vmscan.c Mon Sep 13 21:37:31 1999 @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -59,7 +60,8 @@ if (PageReserved(page_map) || PageLocked(page_map) - || ((gfp_mask & __GFP_DMA) && !PageDMA(page_map))) + || ((gfp_mask & __GFP_DMA) && !PageDMA(page_map)) + || (!(gfp_mask & __GFP_BIGMEM) && PageBIGMEM(page_map))) return 0; /* @@ -149,6 +151,9 @@ if (!entry) return 0; /* No swap space left */ + if (!(page_map = prepare_bigmem_swapout(page_map))) + goto out_swap_free; + vma->vm_mm->rss--; tsk->nswap++; set_pte(page_table, __pte(entry)); @@ -160,10 +165,14 @@ set_bit(PG_locked, &page_map->flags); /* OK, do a physical asynchronous write to swap. */ - rw_swap_page(WRITE, entry, (char *) page, 0); + rw_swap_page(WRITE, entry, (char *) page_address(page_map), 0); __free_page(page_map); return 1; + + out_swap_free: + swap_free(entry); + return 0; } /* @@ -485,7 +494,9 @@ * up on a more timely basis. */ do { - if (nr_free_pages >= freepages.high) + /* kswapd is critical to provide GFP_ATOMIC + allocations (not GFP_BIGMEM ones). */ + if (nr_free_pages - nr_free_bigpages >= freepages.high) break; if (!do_try_to_free_pages(GFP_KSWAPD))