diff -urN 2.2.17pre4/Documentation/Configure.help 2.2.17pre4-bigmem/Documentation/Configure.help --- 2.2.17pre4/Documentation/Configure.help Mon Jun 19 01:51:11 2000 +++ 2.2.17pre4-bigmem/Documentation/Configure.help Mon Jun 19 16:31:26 2000 @@ -168,6 +168,11 @@ on the Alpha. The only time you would ever not say Y is to say M in order to debug the code. Say Y unless you know what you are doing. +Big memory support +CONFIG_BIGMEM + This option is required if you want to utilize physical memory which + is not covered by the kernel virtual address space (> 1GB). + Normal PC floppy disk support CONFIG_BLK_DEV_FD If you want to use the floppy disk drive(s) of your PC under Linux, diff -urN 2.2.17pre4/arch/alpha/config.in 2.2.17pre4-bigmem/arch/alpha/config.in --- 2.2.17pre4/arch/alpha/config.in Tue Jun 13 03:48:12 2000 +++ 2.2.17pre4-bigmem/arch/alpha/config.in Mon Jun 19 16:31:26 2000 @@ -21,6 +21,7 @@ mainmenu_option next_comment comment 'General setup' +bool 'BIGMEM support' CONFIG_BIGMEM choice 'Alpha system type' \ "Generic CONFIG_ALPHA_GENERIC \ Alcor/Alpha-XLT CONFIG_ALPHA_ALCOR \ diff -urN 2.2.17pre4/arch/alpha/kernel/setup.c 2.2.17pre4-bigmem/arch/alpha/kernel/setup.c --- 2.2.17pre4/arch/alpha/kernel/setup.c Tue Jun 13 03:48:12 2000 +++ 2.2.17pre4-bigmem/arch/alpha/kernel/setup.c Mon Jun 19 16:31:26 2000 @@ -25,6 +25,7 @@ #include #include #include +#include #ifdef CONFIG_RTC #include @@ -372,16 +373,42 @@ high = tmp; } - /* Round it up to an even number of pages. */ - high = (high + PAGE_SIZE) & (PAGE_MASK*2); +#ifndef CONFIG_BIGMEM +#define MAX_MEMORY 0x80000000UL +#else +#define LOW_MEMORY 0x80000000UL +#define MAX_MEMORY (VMALLOC_START-PAGE_OFFSET) +#endif /* Enforce maximum of 2GB even if there is more, * but only if the platform (support) cannot handle it. */ - if (high > 0x80000000UL) { - printk("Cropping memory from %luMB to 2048MB\n", high >> 20); - high = 0x80000000UL; + if (high > MAX_MEMORY) { + printk("Cropping memory from %luMB to %luMB\n", + high>>20, MAX_MEMORY>>20); + high = MAX_MEMORY; + } + +#ifdef CONFIG_BIGMEM + bigmem_start = bigmem_end = high; + if (high > LOW_MEMORY) + { + high = bigmem_start = LOW_MEMORY; + printk(KERN_NOTICE "%luMB BIGMEM available\n", + (bigmem_end-bigmem_start)>>20); } +#ifdef BIGMEM_DEBUG + else + { + high -= high/4; + bigmem_start = high; + printk(KERN_NOTICE "emulating %luMB BIGMEM\n", + (bigmem_end-bigmem_start)>>20); + } +#endif + bigmem_start += PAGE_OFFSET; + bigmem_end += PAGE_OFFSET; +#endif return (unsigned long) __va(high); } diff -urN 2.2.17pre4/arch/alpha/mm/init.c 2.2.17pre4-bigmem/arch/alpha/mm/init.c --- 2.2.17pre4/arch/alpha/mm/init.c Tue Jun 13 03:48:12 2000 +++ 2.2.17pre4-bigmem/arch/alpha/mm/init.c Mon Jun 19 16:31:26 2000 @@ -18,6 +18,7 @@ #ifdef CONFIG_BLK_DEV_INITRD #include #endif +#include #include #include @@ -30,6 +31,11 @@ extern void die_if_kernel(char *,struct pt_regs *,long); extern void show_net_buffers(void); +static unsigned long totalram_pages, totalbig_pages; + +#ifdef CONFIG_BIGMEM +unsigned long bigmem_start, bigmem_end; +#endif struct thread_struct original_pcb; #ifndef __SMP__ @@ -232,7 +238,11 @@ struct memdesc_struct * memdesc; /* initialize mem_map[] */ +#ifndef CONFIG_BIGMEM start_mem = free_area_init(start_mem, end_mem); +#else + start_mem = free_area_init(start_mem, bigmem_end); +#endif /* find free clusters, update mem_map[] accordingly */ memdesc = (struct memdesc_struct *) @@ -306,9 +316,20 @@ mem_init(unsigned long start_mem, unsigned long end_mem) { unsigned long tmp; + unsigned long reservedpages = 0; +#ifdef CONFIG_BIGMEM + bigmem_start = PAGE_ALIGN(bigmem_start); + bigmem_end &= PAGE_MASK; +#endif end_mem &= PAGE_MASK; +#ifndef CONFIG_BIGMEM max_mapnr = num_physpages = MAP_NR(end_mem); +#else + max_mapnr = num_physpages = MAP_NR(bigmem_end); + /* cache the bigmem_mapnr */ + bigmem_mapnr = MAP_NR(bigmem_start); +#endif high_memory = (void *) end_mem; start_mem = PAGE_ALIGN(start_mem); @@ -325,7 +346,10 @@ if (tmp >= MAX_DMA_ADDRESS) clear_bit(PG_DMA, &mem_map[MAP_NR(tmp)].flags); if (PageReserved(mem_map+MAP_NR(tmp))) + { + reservedpages++; continue; + } atomic_set(&mem_map[MAP_NR(tmp)].count, 1); #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start && tmp >= initrd_start && tmp < initrd_end) @@ -334,8 +358,21 @@ kill_page(tmp); free_page(tmp); } - tmp = nr_free_pages << (PAGE_SHIFT - 10); +#ifdef CONFIG_BIGMEM + for (tmp = bigmem_start; tmp < bigmem_end; tmp += PAGE_SIZE) + { + clear_bit(PG_reserved, &mem_map[MAP_NR(tmp)].flags); + set_bit(PG_BIGMEM, &mem_map[MAP_NR(tmp)].flags); + atomic_set(&mem_map[MAP_NR(tmp)].count, 1); + kill_page(tmp); + free_page(tmp); + totalbig_pages++; + } +#endif + tmp = (unsigned long) nr_free_pages << (PAGE_SHIFT - 10); printk("Memory: %luk available\n", tmp); + + totalram_pages = max_mapnr - reservedpages; return; } @@ -359,22 +396,11 @@ void si_meminfo(struct sysinfo *val) { - int i; - - i = max_mapnr; - val->totalram = 0; + val->totalram = totalram_pages << PAGE_SHIFT; val->sharedram = 0; val->freeram = ((unsigned long)nr_free_pages) << PAGE_SHIFT; val->bufferram = buffermem; - while (i-- > 0) { - if (PageReserved(mem_map+i)) - continue; - val->totalram++; - if (!atomic_read(&mem_map[i].count)) - continue; - val->sharedram += atomic_read(&mem_map[i].count) - 1; - } - val->totalram <<= PAGE_SHIFT; - val->sharedram <<= PAGE_SHIFT; + val->totalbig = totalbig_pages << PAGE_SHIFT; + val->freebig = (unsigned long) nr_free_bigpages << PAGE_SHIFT; return; } diff -urN 2.2.17pre4/arch/i386/config.in 2.2.17pre4-bigmem/arch/i386/config.in --- 2.2.17pre4/arch/i386/config.in Tue Jun 13 03:48:12 2000 +++ 2.2.17pre4-bigmem/arch/i386/config.in Mon Jun 19 16:31:26 2000 @@ -54,6 +54,7 @@ mainmenu_option next_comment comment 'General setup' +bool 'BIGMEM support' CONFIG_BIGMEM bool 'Networking support' CONFIG_NET bool 'PCI support' CONFIG_PCI if [ "$CONFIG_PCI" = "y" ]; then diff -urN 2.2.17pre4/arch/i386/kernel/mtrr.c 2.2.17pre4-bigmem/arch/i386/kernel/mtrr.c --- 2.2.17pre4/arch/i386/kernel/mtrr.c Thu May 4 13:00:36 2000 +++ 2.2.17pre4-bigmem/arch/i386/kernel/mtrr.c Mon Jun 19 16:31:27 2000 @@ -467,9 +467,9 @@ static void intel_get_mtrr (unsigned int reg, unsigned long *base, unsigned long *size, mtrr_type *type) { - unsigned long dummy, mask_lo, base_lo; + unsigned long mask_lo, mask_hi, base_lo, base_hi; - rdmsr (MTRRphysMask_MSR(reg), mask_lo, dummy); + rdmsr (MTRRphysMask_MSR(reg), mask_lo, mask_hi); if ((mask_lo & 0x800) == 0) { /* Invalid (i.e. free) range. */ *base = 0; @@ -478,20 +478,17 @@ return; } - rdmsr(MTRRphysBase_MSR(reg), base_lo, dummy); + rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi); - /* We ignore the extra address bits (32-35). If someone wants to - run x86 Linux on a machine with >4GB memory, this will be the - least of their problems. */ + /* Work out the shifted address mask. */ + mask_lo = 0xff000000 | mask_hi << (32 - PAGE_SHIFT) + | mask_lo >> PAGE_SHIFT; - /* Clean up mask_lo so it gives the real address mask. */ - mask_lo = (mask_lo & 0xfffff000UL); /* This works correctly if size is a power of two, i.e. a contiguous range. */ - *size = ~(mask_lo - 1); - - *base = (base_lo & 0xfffff000UL); - *type = (base_lo & 0xff); + *size = -mask_lo; + *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT; + *type = base_lo & 0xff; } /* End Function intel_get_mtrr */ static void cyrix_get_arr (unsigned int reg, unsigned long *base, @@ -516,13 +513,13 @@ /* Enable interrupts if it was enabled previously */ __restore_flags (flags); shift = ((unsigned char *) base)[1] & 0x0f; - *base &= 0xfffff000UL; + *base >>= PAGE_SHIFT; /* Power of two, at least 4K on ARR0-ARR6, 256K on ARR7 * Note: shift==0xf means 4G, this is unsupported. */ if (shift) - *size = (reg < 7 ? 0x800UL : 0x20000UL) << shift; + *size = (reg < 7 ? 0x1UL : 0x40UL) << (shift - 1); else *size = 0; @@ -555,7 +552,7 @@ /* Upper dword is region 1, lower is region 0 */ if (reg == 1) low = high; /* The base masks off on the right alignment */ - *base = low & 0xFFFE0000; + *base = (low & 0xFFFE0000) >> PAGE_SHIFT; *type = 0; if (low & 1) *type = MTRR_TYPE_UNCACHABLE; if (low & 2) *type = MTRR_TYPE_WRCOMB; @@ -580,7 +577,7 @@ * *128K ... */ low = (~low) & 0x1FFFC; - *size = (low + 4) << 15; + *size = (low + 4) << (15 - PAGE_SHIFT); return; } /* End Function amd_get_mtrr */ @@ -599,8 +596,8 @@ unsigned i; u32 tb; tb = centaur_ctx->mcr[reg].low & 0xfff; - *base = centaur_ctx->mcr[reg].high & 0xfffff000; - *size = (~(centaur_ctx->mcr[reg].low & 0xfffff000))+1; + *base = centaur_ctx->mcr[reg].high >> PAGE_SHIFT; + *size = -(centaur_ctx->mcr[reg].low & 0xfffff000) >> PAGE_SHIFT; if (*size) { for( i=0; itype_bits[i]==tb) { @@ -637,8 +634,10 @@ } else { - wrmsr (MTRRphysBase_MSR (reg), base | type, 0); - wrmsr (MTRRphysMask_MSR (reg), ~(size - 1) | 0x800, 0); + wrmsr (MTRRphysBase_MSR (reg), base << PAGE_SHIFT | type, + (base & 0xf00000) >> (32 - PAGE_SHIFT)); + wrmsr (MTRRphysMask_MSR (reg), -size << PAGE_SHIFT | 0x800, + (-size & 0xf00000) >> (32 - PAGE_SHIFT)); } if (do_safe) set_mtrr_done (&ctxt); } /* End Function intel_set_mtrr_up */ @@ -652,7 +651,9 @@ arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ /* count down from 32M (ARR0-ARR6) or from 2G (ARR7) */ - size >>= (reg < 7 ? 12 : 18); + if (reg >= 7) + size >>= 6; + size &= 0x7fff; /* make sure arr_size <= 14 */ for(arr_size = 0; size; arr_size++, size >>= 1); @@ -673,6 +674,7 @@ } if (do_safe) set_mtrr_prepare (&ctxt); + base <<= PAGE_SHIFT; setCx86(arr, ((unsigned char *) &base)[3]); setCx86(arr+1, ((unsigned char *) &base)[2]); setCx86(arr+2, (((unsigned char *) &base)[1]) | arr_size); @@ -692,34 +694,36 @@ [RETURNS] Nothing. */ { - u32 low, high; + u32 regs[2]; struct set_mtrr_context ctxt; if (do_safe) set_mtrr_prepare (&ctxt); /* * Low is MTRR0 , High MTRR 1 */ - rdmsr (0xC0000085, low, high); + rdmsr (0xC0000085, regs[0], regs[1]); /* * Blank to disable */ if (size == 0) - *(reg ? &high : &low) = 0; + regs[reg] = 0; else - /* Set the register to the base (already shifted for us), the - type (off by one) and an inverted bitmask of the size - The size is the only odd bit. We are fed say 512K - We invert this and we get 111 1111 1111 1011 but - if you subtract one and invert you get the desired - 111 1111 1111 1100 mask - */ - *(reg ? &high : &low)=(((~(size-1))>>15)&0x0001FFFC)|base|(type+1); + /* Set the register to the base, the type (off by one) and an + inverted bitmask of the size The size is the only odd + bit. We are fed say 512K We invert this and we get 111 1111 + 1111 1011 but if you subtract one and invert you get the + desired 111 1111 1111 1100 mask + + But ~(x - 1) == ~x + 1 == -x. Two's complement rocks! */ + regs[reg] = (-size>>(15-PAGE_SHIFT) & 0x0001FFFC) + | (base<type_bits[type]); + high = base << PAGE_SHIFT; + low = -size << PAGE_SHIFT | centaur_ctx->type_bits[type]; } centaur_ctx->mcr[reg].high = high; centaur_ctx->mcr[reg].low = low; @@ -1041,7 +1045,7 @@ for (i = 0; i < max; ++i) { (*get_mtrr) (i, &lbase, &lsize, <ype); - if (lsize < 1) return i; + if (lsize == 0) return i; } return -ENOSPC; } /* End Function generic_get_free_region */ @@ -1058,7 +1062,7 @@ unsigned long lbase, lsize; /* If we are to set up a region >32M then look at ARR7 immediately */ - if (size > 0x2000000UL) { + if (size > 0x2000UL) { cyrix_get_arr (7, &lbase, &lsize, <ype); if (lsize < 1) return 7; /* else try ARR0-ARR6 first */ @@ -1066,11 +1070,11 @@ for (i = 0; i < 7; i++) { cyrix_get_arr (i, &lbase, &lsize, <ype); - if (lsize < 1) return i; + if (lsize == 0) return i; } /* ARR0-ARR6 isn't free, try ARR7 but its size must be at least 256K */ cyrix_get_arr (i, &lbase, &lsize, <ype); - if ((lsize < 1) && (size >= 0x40000)) return i; + if ((lsize == 0) && (size >= 0x40)) return i; } return -ENOSPC; } /* End Function cyrix_get_free_region */ @@ -1129,7 +1133,7 @@ /* Fall through */ case X86_VENDOR_CYRIX: case X86_VENDOR_CENTAUR: - if ( (base & 0xfff) || (size & 0xfff) ) + if ( (base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1)) ) { printk ("mtrr: size and base must be multiples of 4 kiB\n"); printk ("mtrr: size: %lx base: %lx\n", size, base); @@ -1142,7 +1146,7 @@ return -EINVAL; } } - else if (base + size < 0x100000) /* Cyrix */ + else if (base + size < 0x100000) /* Not Centaur */ { printk ("mtrr: cannot set region below 1 MiB (0x%lx,0x%lx)\n", base, size); @@ -1164,6 +1168,12 @@ return -EINVAL; /*break;*/ } + + /* For all CPU types, the checks above should have ensured that + base and size are page aligned */ + base >>= PAGE_SHIFT; + size >>= PAGE_SHIFT; + /* If the type is WC, check that this processor supports it */ if ( (type == MTRR_TYPE_WRCOMB) && !have_wrcomb () ) { @@ -1183,7 +1193,8 @@ if ( (base < lbase) || (base + size > lbase + lsize) ) { spin_unlock (&main_lock); - printk ("mtrr: 0x%lx,0x%lx overlaps existing 0x%lx,0x%lx\n", + printk ("mtrr: 0x%lx000,0x%lx000 overlaps existing" + " 0x%lx000,0x%lx000\n", base, size, lbase, lsize); return -EINVAL; } @@ -1193,7 +1204,7 @@ if ((boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR) && (type == MTRR_TYPE_UNCACHABLE)) continue; spin_unlock (&main_lock); - printk ( "mtrr: type mismatch for %lx,%lx old: %s new: %s\n", + printk ( "mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n", base, size, attrib_to_str (ltype), attrib_to_str (type) ); return -EINVAL; } @@ -1241,7 +1252,8 @@ for (i = 0; i < max; ++i) { (*get_mtrr) (i, &lbase, &lsize, <ype); - if ( (lbase == base) && (lsize == size) ) + if (lbase < 0x100000 && lbase << PAGE_SHIFT == base + && lsize < 0x100000 && lsize << PAGE_SHIFT == size) { reg = i; break; @@ -1250,7 +1262,7 @@ if (reg < 0) { spin_unlock (&main_lock); - printk ("mtrr: no MTRR for %lx,%lx found\n", base, size); + printk ("mtrr: no MTRR for %lx000,%lx000 found\n", base, size); return -EINVAL; } } @@ -1431,7 +1443,16 @@ return -EFAULT; if ( gentry.regnum >= get_num_var_ranges () ) return -EINVAL; (*get_mtrr) (gentry.regnum, &gentry.base, &gentry.size, &type); - gentry.type = type; + + /* Hide entries that go above 4GB */ + if (gentry.base + gentry.size > 0x100000 || gentry.size == 0x100000) + gentry.base = gentry.size = gentry.type = 0; + else { + gentry.base <<= PAGE_SHIFT; + gentry.size <<= PAGE_SHIFT; + gentry.type = type; + } + if ( copy_to_user ( (void *) arg, &gentry, sizeof gentry) ) return -EFAULT; break; @@ -1523,24 +1544,24 @@ for (i = 0; i < max; i++) { (*get_mtrr) (i, &base, &size, &type); - if (size < 1) usage_table[i] = 0; + if (size == 0) usage_table[i] = 0; else { - if (size < 0x100000) + if (size < 0x100000 >> PAGE_SHIFT) { - /* 1MB */ + /* less than 1MB */ factor = 'k'; - size >>= 10; + size <<= PAGE_SHIFT - 10; } else { factor = 'M'; - size >>= 20; + size >>= 20 - PAGE_SHIFT; } sprintf (ascii_buffer + ascii_buf_bytes, - "reg%02i: base=0x%08lx (%4liMB), size=%4li%cB: %s, count=%d\n", - i, base, base>>20, size, factor, + "reg%02i: base=0x%05lx000 (%4liMB), size=%4li%cB: %s, count=%d\n", + i, base, base >> (20 - PAGE_SHIFT), size, factor, attrib_to_str (type), usage_table[i]); ascii_buf_bytes += strlen (ascii_buffer + ascii_buf_bytes); } diff -urN 2.2.17pre4/arch/i386/kernel/ptrace.c 2.2.17pre4-bigmem/arch/i386/kernel/ptrace.c --- 2.2.17pre4/arch/i386/kernel/ptrace.c Thu May 4 13:00:36 2000 +++ 2.2.17pre4-bigmem/arch/i386/kernel/ptrace.c Mon Jun 19 16:31:27 2000 @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -81,6 +82,7 @@ pmd_t * pgmiddle; pte_t * pgtable; unsigned long page; + unsigned long retval; int fault; repeat: @@ -126,7 +128,10 @@ if (MAP_NR(page) >= max_mapnr) return 0; page += addr & ~PAGE_MASK; - return *(unsigned long *) page; + page = kmap(page, KM_READ); + retval = *(unsigned long *) page; + kunmap(page, KM_READ); + return retval; } /* @@ -196,7 +201,13 @@ } /* this is a hack for non-kernel-mapped video buffers and similar */ if (MAP_NR(page) < max_mapnr) - *(unsigned long *) (page + (addr & ~PAGE_MASK)) = data; + { + unsigned long vaddr; + + vaddr = kmap(page, KM_WRITE); + *(unsigned long *) (vaddr + (addr & ~PAGE_MASK)) = data; + kunmap(vaddr, KM_WRITE); + } /* we're bypassing pagetables, so we have to set the dirty bit ourselves */ /* this should also re-instate whatever read-only mode there was before */ set_pte(pgtable, pte_mkdirty(mk_pte(page, vma->vm_page_prot))); diff -urN 2.2.17pre4/arch/i386/kernel/setup.c 2.2.17pre4-bigmem/arch/i386/kernel/setup.c --- 2.2.17pre4/arch/i386/kernel/setup.c Mon Jun 19 01:51:11 2000 +++ 2.2.17pre4-bigmem/arch/i386/kernel/setup.c Mon Jun 19 16:31:27 2000 @@ -29,6 +29,8 @@ * Dragan Stancevic , May 2000 * * Transmeta CPU detection. H. Peter Anvin , May 2000 + * + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 */ /* @@ -54,6 +56,7 @@ #ifdef CONFIG_BLK_DEV_RAM #include #endif +#include #include #include #include @@ -383,12 +386,31 @@ #define VMALLOC_RESERVE (64 << 20) /* 64MB for vmalloc */ #define MAXMEM ((unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE)) +#ifdef CONFIG_BIGMEM + bigmem_start = bigmem_end = memory_end; +#endif if (memory_end > MAXMEM) { +#ifdef CONFIG_BIGMEM +#define MAXBIGMEM ((unsigned long)(~(VMALLOC_RESERVE-1))) + bigmem_start = MAXMEM; + bigmem_end = (memory_end < MAXBIGMEM) ? memory_end : MAXBIGMEM; +#endif memory_end = MAXMEM; +#ifdef CONFIG_BIGMEM + printk(KERN_NOTICE "%ldMB BIGMEM available.\n", + (bigmem_end-bigmem_start)>>20); +#else printk(KERN_WARNING "Warning only %ldMB will be used.\n", MAXMEM>>20); +#endif } +#if defined(CONFIG_BIGMEM) && defined(BIGMEM_DEBUG) + else { + memory_end -= memory_end/4; + bigmem_start = memory_end; + } +#endif memory_end += PAGE_OFFSET; *memory_start_p = memory_start; diff -urN 2.2.17pre4/arch/i386/mm/Makefile 2.2.17pre4-bigmem/arch/i386/mm/Makefile --- 2.2.17pre4/arch/i386/mm/Makefile Mon Jan 18 02:28:56 1999 +++ 2.2.17pre4-bigmem/arch/i386/mm/Makefile Mon Jun 19 16:31:27 2000 @@ -10,4 +10,8 @@ O_TARGET := mm.o O_OBJS := init.o fault.o ioremap.o extable.o +ifeq ($(CONFIG_BIGMEM),y) +O_OBJS += bigmem.o +endif + include $(TOPDIR)/Rules.make diff -urN 2.2.17pre4/arch/i386/mm/bigmem.c 2.2.17pre4-bigmem/arch/i386/mm/bigmem.c --- 2.2.17pre4/arch/i386/mm/bigmem.c Thu Jan 1 01:00:00 1970 +++ 2.2.17pre4-bigmem/arch/i386/mm/bigmem.c Mon Jun 19 16:31:27 2000 @@ -0,0 +1,35 @@ +/* + * BIGMEM IA32 code and variables. + * + * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de + * Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de + */ + +#include +#include + +unsigned long bigmem_start, bigmem_end; + +/* NOTE: fixmap_init alloc all the fixmap pagetables contigous on the + physical space so we can cache the place of the first one and move + around without checking the pgd every time. */ +pte_t *kmap_pte; +pgprot_t kmap_prot; + +#define kmap_get_fixmap_pte(vaddr) \ + pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)) + +void __init kmap_init(void) +{ + unsigned long kmap_vstart; + + /* cache the first kmap pte */ + kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); + kmap_pte = kmap_get_fixmap_pte(kmap_vstart); + + kmap_prot = PAGE_KERNEL; +#if 0 + if (boot_cpu_data.x86_capability & X86_FEATURE_PGE) + pgprot_val(kmap_prot) |= _PAGE_GLOBAL; +#endif +} diff -urN 2.2.17pre4/arch/i386/mm/init.c 2.2.17pre4-bigmem/arch/i386/mm/init.c --- 2.2.17pre4/arch/i386/mm/init.c Sat Oct 23 15:31:08 1999 +++ 2.2.17pre4-bigmem/arch/i386/mm/init.c Mon Jun 19 16:31:27 2000 @@ -2,6 +2,8 @@ * linux/arch/i386/mm/init.c * * Copyright (C) 1995 Linus Torvalds + * + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 */ #include @@ -20,6 +22,7 @@ #ifdef CONFIG_BLK_DEV_INITRD #include #endif +#include #include #include @@ -28,6 +31,8 @@ #include #include +static int totalram_pages, totalbig_pages; + extern void show_net_buffers(void); extern unsigned long init_smp_mappings(unsigned long); @@ -148,6 +153,7 @@ { int i,free = 0,total = 0,reserved = 0; int shared = 0, cached = 0; + int bigmem = 0; printk("Mem-info:\n"); show_free_areas(); @@ -155,6 +161,8 @@ i = max_mapnr; while (i-- > 0) { total++; + if (PageBIGMEM(mem_map+i)) + bigmem++; if (PageReserved(mem_map+i)) reserved++; else if (PageSwapCache(mem_map+i)) @@ -165,6 +173,7 @@ shared += atomic_read(&mem_map[i].count) - 1; } printk("%d pages of RAM\n",total); + printk("%d pages of BIGMEM\n",bigmem); printk("%d reserved pages\n",reserved); printk("%d pages shared\n",shared); printk("%d pages swap cached\n",cached); @@ -344,7 +353,12 @@ #endif local_flush_tlb(); +#ifndef CONFIG_BIGMEM return free_area_init(start_mem, end_mem); +#else + kmap_init(); /* run after fixmap_init */ + return free_area_init(start_mem, bigmem_end + PAGE_OFFSET); +#endif } /* @@ -396,8 +410,18 @@ unsigned long tmp; end_mem &= PAGE_MASK; +#ifdef CONFIG_BIGMEM + bigmem_start = PAGE_ALIGN(bigmem_start); + bigmem_end &= PAGE_MASK; +#endif high_memory = (void *) end_mem; +#ifndef CONFIG_BIGMEM max_mapnr = num_physpages = MAP_NR(end_mem); +#else + max_mapnr = num_physpages = PHYSMAP_NR(bigmem_end); + /* cache the bigmem_mapnr */ + bigmem_mapnr = PHYSMAP_NR(bigmem_start); +#endif /* clear the zero-page */ memset(empty_zero_page, 0, PAGE_SIZE); @@ -452,16 +476,39 @@ #endif free_page(tmp); } - printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init)\n", +#ifdef CONFIG_BIGMEM + for (tmp = bigmem_start; tmp < bigmem_end; tmp += PAGE_SIZE) { + /* + RMQUEUE_ORDER in page_alloc.c returns PAGE_OFFSET + tmp + which cannot be allowed to be 0 since the callers of + __get_free_pages treat 0 as an allocation failure. To + avoid this possibility, do not allow allocation of the + BIGMEM page which would map to 0. + + Leonard N. Zubkoff, 30 October 1999 + */ + if (tmp + PAGE_OFFSET != 0) { + clear_bit(PG_reserved, &mem_map[PHYSMAP_NR(tmp)].flags); + set_bit(PG_BIGMEM, &mem_map[PHYSMAP_NR(tmp)].flags); + atomic_set(&mem_map[PHYSMAP_NR(tmp)].count, 1); + free_page(tmp + PAGE_OFFSET); + totalbig_pages++; + } + } +#endif + printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %dk bigmem)\n", (unsigned long) nr_free_pages << (PAGE_SHIFT-10), max_mapnr << (PAGE_SHIFT-10), codepages << (PAGE_SHIFT-10), reservedpages << (PAGE_SHIFT-10), datapages << (PAGE_SHIFT-10), - initpages << (PAGE_SHIFT-10)); + initpages << (PAGE_SHIFT-10), + totalbig_pages << (PAGE_SHIFT-10)); if (boot_cpu_data.wp_works_ok < 0) test_wp_bit(); + + totalram_pages = max_mapnr - reservedpages; } void free_initmem(void) @@ -479,22 +526,11 @@ void si_meminfo(struct sysinfo *val) { - int i; - - i = max_mapnr; - val->totalram = 0; + val->totalram = totalram_pages << PAGE_SHIFT; val->sharedram = 0; val->freeram = nr_free_pages << PAGE_SHIFT; val->bufferram = buffermem; - while (i-- > 0) { - if (PageReserved(mem_map+i)) - continue; - val->totalram++; - if (!atomic_read(&mem_map[i].count)) - continue; - val->sharedram += atomic_read(&mem_map[i].count) - 1; - } - val->totalram <<= PAGE_SHIFT; - val->sharedram <<= PAGE_SHIFT; + val->totalbig = totalbig_pages << PAGE_SHIFT; + val->freebig = nr_free_bigpages << PAGE_SHIFT; return; } diff -urN 2.2.17pre4/fs/dcache.c 2.2.17pre4-bigmem/fs/dcache.c --- 2.2.17pre4/fs/dcache.c Tue Jun 13 03:48:14 2000 +++ 2.2.17pre4-bigmem/fs/dcache.c Mon Jun 19 16:31:56 2000 @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -927,7 +928,11 @@ if (!dentry_cache) panic("Cannot create dentry cache"); +#ifndef CONFIG_BIGMEM memory_size = num_physpages << PAGE_SHIFT; +#else + memory_size = bigmem_mapnr << PAGE_SHIFT; +#endif memory_size >>= 13; memory_size *= 2 * sizeof(void *); for (order = 0; ((1UL << order) << PAGE_SHIFT) < memory_size; order++); diff -urN 2.2.17pre4/fs/proc/array.c 2.2.17pre4-bigmem/fs/proc/array.c --- 2.2.17pre4/fs/proc/array.c Mon Jun 19 01:51:19 2000 +++ 2.2.17pre4-bigmem/fs/proc/array.c Mon Jun 19 16:31:27 2000 @@ -42,6 +42,8 @@ * Alan Cox : security fixes. * * + * Gerhard Wichert : added BIGMEM support + * Siemens AG */ #include @@ -389,6 +391,8 @@ "MemShared: %8lu kB\n" "Buffers: %8lu kB\n" "Cached: %8lu kB\n" + "BigTotal: %8lu kB\n" + "BigFree: %8lu kB\n" "SwapTotal: %8lu kB\n" "SwapFree: %8lu kB\n", i.totalram >> 10, @@ -396,6 +400,8 @@ i.sharedram >> 10, i.bufferram >> 10, page_cache_size << (PAGE_SHIFT - 10), + i.totalbig >> 10, + i.freebig >> 10, i.totalswap >> 10, i.freeswap >> 10); } @@ -451,6 +457,8 @@ return pte_page(pte) + (ptr & ~PAGE_MASK); } +#include + static int get_array(struct task_struct *p, unsigned long start, unsigned long end, char * buffer) { unsigned long addr; @@ -463,6 +471,7 @@ addr = get_phys_addr(p, start); if (!addr) return result; + addr = kmap(addr, KM_READ); do { c = *(char *) addr; if (!c) @@ -470,12 +479,19 @@ if (size < PAGE_SIZE) buffer[size++] = c; else + { + kunmap(addr, KM_READ); return result; + } addr++; start++; if (!c && start >= end) + { + kunmap(addr, KM_READ); return result; + } } while (addr & ~PAGE_MASK); + kunmap(addr-1, KM_READ); } return result; } diff -urN 2.2.17pre4/fs/proc/mem.c 2.2.17pre4-bigmem/fs/proc/mem.c --- 2.2.17pre4/fs/proc/mem.c Sun Apr 2 21:07:49 2000 +++ 2.2.17pre4-bigmem/fs/proc/mem.c Mon Jun 19 16:31:27 2000 @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -120,7 +121,9 @@ i = PAGE_SIZE-(addr & ~PAGE_MASK); if (i > scount) i = scount; + page = (char *) kmap((unsigned long) page, KM_READ); copy_to_user(tmp, page, i); + kunmap((unsigned long) page, KM_READ); addr += i; tmp += i; scount -= i; @@ -177,7 +180,9 @@ i = PAGE_SIZE-(addr & ~PAGE_MASK); if (i > count) i = count; + page = (unsigned long) kmap((unsigned long) page, KM_WRITE); copy_from_user(page, tmp, i); + kunmap((unsigned long) page, KM_WRITE); addr += i; tmp += i; count -= i; diff -urN 2.2.17pre4/include/asm-alpha/bigmem.h 2.2.17pre4-bigmem/include/asm-alpha/bigmem.h --- 2.2.17pre4/include/asm-alpha/bigmem.h Thu Jan 1 01:00:00 1970 +++ 2.2.17pre4-bigmem/include/asm-alpha/bigmem.h Mon Jun 19 16:31:27 2000 @@ -0,0 +1,27 @@ +/* + * linux/include/asm-alpha/bigmem.h + * + * On alpha we can address all the VM with a flat mapping. We need + * to differentiate BIGMEM memory only because the default PCI DMA window + * is currently limited to 2g. Thus kmap/kunmap are noops here. + * + * With bigmem support the alpha now is capable of allocating up to + * 2048Giga of memory. + * + * Copyright (C) 2000 Andrea Arcangeli , SuSE GmbH + */ + +#ifndef _ASM_BIGMEM_H +#define _ASM_BIGMEM_H + +#include + +#undef BIGMEM_DEBUG /* undef for production */ + +/* declarations for bigmem.c */ +extern unsigned long bigmem_start, bigmem_end; + +#define kmap(kaddr, type) kaddr +#define kunmap(vaddr, type) do { } while (0) + +#endif /* _ASM_BIGMEM_H */ diff -urN 2.2.17pre4/include/asm-i386/bigmem.h 2.2.17pre4-bigmem/include/asm-i386/bigmem.h --- 2.2.17pre4/include/asm-i386/bigmem.h Thu Jan 1 01:00:00 1970 +++ 2.2.17pre4-bigmem/include/asm-i386/bigmem.h Mon Jun 19 16:31:27 2000 @@ -0,0 +1,69 @@ +/* + * bigmem.h: virtual kernel memory mappings for big memory + * + * Used in CONFIG_BIGMEM systems for memory pages which are not + * addressable by direct kernel virtual adresses. + * + * Copyright (C) 1999 Gerhard Wichert, Siemens AG + * Gerhard.Wichert@pdb.siemens.de + */ + +#ifndef _ASM_BIGMEM_H +#define _ASM_BIGMEM_H + +#include + +#undef BIGMEM_DEBUG /* undef for production */ + +/* declarations for bigmem.c */ +extern unsigned long bigmem_start, bigmem_end; +extern int nr_free_bigpages; + +extern pte_t *kmap_pte; +extern pgprot_t kmap_prot; + +extern void kmap_init(void) __init; + +/* kmap helper functions necessary to access the bigmem pages in kernel */ +#include +#include + +extern inline unsigned long kmap(unsigned long kaddr, enum km_type type) +{ + if (__pa(kaddr) < bigmem_start) + return kaddr; + { + enum fixed_addresses idx = type+KM_TYPE_NR*smp_processor_id(); + unsigned long vaddr = __fix_to_virt(FIX_KMAP_BEGIN+idx); + +#ifdef BIGMEM_DEBUG + if (!pte_none(*(kmap_pte-idx))) + { + __label__ here; + here: + printk(KERN_ERR "not null pte on CPU %d from %p\n", + smp_processor_id(), &&here); + } +#endif + set_pte(kmap_pte-idx, mk_pte(kaddr & PAGE_MASK, kmap_prot)); + __flush_tlb_one(vaddr); + + return vaddr | (kaddr & ~PAGE_MASK); + } +} + +extern inline void kunmap(unsigned long vaddr, enum km_type type) +{ +#ifdef BIGMEM_DEBUG + enum fixed_addresses idx = type+KM_TYPE_NR*smp_processor_id(); + if ((vaddr & PAGE_MASK) == __fix_to_virt(FIX_KMAP_BEGIN+idx)) + { + /* force other mappings to Oops if they'll try to access + this pte without first remap it */ + pte_clear(kmap_pte-idx); + __flush_tlb_one(vaddr); + } +#endif +} + +#endif /* _ASM_BIGMEM_H */ diff -urN 2.2.17pre4/include/asm-i386/fixmap.h 2.2.17pre4-bigmem/include/asm-i386/fixmap.h --- 2.2.17pre4/include/asm-i386/fixmap.h Fri May 19 20:24:28 2000 +++ 2.2.17pre4-bigmem/include/asm-i386/fixmap.h Mon Jun 19 16:31:27 2000 @@ -6,6 +6,8 @@ * for more details. * * Copyright (C) 1998 Ingo Molnar + * + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 */ #ifndef _ASM_FIXMAP_H @@ -14,6 +16,10 @@ #include #include #include +#ifdef CONFIG_BIGMEM +#include +#include +#endif /* * Here we define all the compile-time 'special' virtual @@ -55,6 +61,10 @@ FIX_CO_APIC, /* Cobalt APIC Redirection Table */ FIX_LI_PCIA, /* Lithium PCI Bridge A */ FIX_LI_PCIB, /* Lithium PCI Bridge B */ +#endif +#ifdef CONFIG_BIGMEM + FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ + FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, #endif __end_of_fixed_addresses }; diff -urN 2.2.17pre4/include/asm-i386/io.h 2.2.17pre4-bigmem/include/asm-i386/io.h --- 2.2.17pre4/include/asm-i386/io.h Fri May 19 20:24:29 2000 +++ 2.2.17pre4-bigmem/include/asm-i386/io.h Mon Jun 19 16:31:27 2000 @@ -27,6 +27,7 @@ /* * Bit simplified and optimized by Jan Hubicka + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999. */ #ifdef SLOW_IO_BY_JUMPING @@ -109,12 +110,20 @@ */ extern inline unsigned long virt_to_phys(volatile void * address) { +#ifdef CONFIG_BIGMEM + return __pa(address); +#else return __io_phys(address); +#endif } extern inline void * phys_to_virt(unsigned long address) { +#ifdef CONFIG_BIGMEM + return __va(address); +#else return __io_virt(address); +#endif } extern void * __ioremap(unsigned long offset, unsigned long size, unsigned long flags); diff -urN 2.2.17pre4/include/asm-i386/kmap_types.h 2.2.17pre4-bigmem/include/asm-i386/kmap_types.h --- 2.2.17pre4/include/asm-i386/kmap_types.h Thu Jan 1 01:00:00 1970 +++ 2.2.17pre4-bigmem/include/asm-i386/kmap_types.h Mon Jun 19 16:31:27 2000 @@ -0,0 +1,10 @@ +#ifndef _ASM_KMAP_TYPES_H +#define _ASM_KMAP_TYPES_H + +enum km_type { + KM_READ, + KM_WRITE, + KM_TYPE_NR, +}; + +#endif diff -urN 2.2.17pre4/include/asm-i386/page.h 2.2.17pre4-bigmem/include/asm-i386/page.h --- 2.2.17pre4/include/asm-i386/page.h Fri May 19 20:24:28 2000 +++ 2.2.17pre4-bigmem/include/asm-i386/page.h Mon Jun 19 16:31:27 2000 @@ -88,6 +88,7 @@ #define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) #define MAP_NR(addr) (__pa(addr) >> PAGE_SHIFT) +#define PHYSMAP_NR(addr) ((unsigned long)(addr) >> PAGE_SHIFT) #endif /* __KERNEL__ */ diff -urN 2.2.17pre4/include/linux/bigmem.h 2.2.17pre4-bigmem/include/linux/bigmem.h --- 2.2.17pre4/include/linux/bigmem.h Thu Jan 1 01:00:00 1970 +++ 2.2.17pre4-bigmem/include/linux/bigmem.h Mon Jun 19 16:31:27 2000 @@ -0,0 +1,50 @@ +#ifndef _LINUX_BIGMEM_H +#define _LINUX_BIGMEM_H + +#include + +#ifdef CONFIG_BIGMEM + +#include + +/* declarations for linux/mm/bigmem.c */ +extern unsigned long bigmem_mapnr; +extern int nr_free_bigpages; + +extern struct page * prepare_bigmem_swapout(struct page *); +extern struct page * replace_with_bigmem(struct page *); +extern unsigned long prepare_bigmem_shm_swapin(unsigned long); + +#else /* CONFIG_BIGMEM */ + +#define prepare_bigmem_swapout(page) page +#define replace_with_bigmem(page) page +#define prepare_bigmem_shm_swapin(page) page +#define kmap(kaddr, type) kaddr +#define kunmap(vaddr, type) do { } while (0) +#define nr_free_bigpages 0 + +#endif /* CONFIG_BIGMEM */ + +/* when CONFIG_BIGMEM is not set these will be plain clear/copy_page */ +extern inline void clear_bigpage(unsigned long kaddr) +{ + unsigned long vaddr; + + vaddr = kmap(kaddr, KM_WRITE); + clear_page(vaddr); + kunmap(vaddr, KM_WRITE); +} + +extern inline void copy_bigpage(unsigned long to, unsigned long from) +{ + unsigned long vfrom, vto; + + vfrom = kmap(from, KM_READ); + vto = kmap(to, KM_WRITE); + copy_page(vto, vfrom); + kunmap(vfrom, KM_READ); + kunmap(vto, KM_WRITE); +} + +#endif /* _LINUX_BIGMEM_H */ diff -urN 2.2.17pre4/include/linux/kernel.h 2.2.17pre4-bigmem/include/linux/kernel.h --- 2.2.17pre4/include/linux/kernel.h Thu Feb 3 17:56:05 2000 +++ 2.2.17pre4-bigmem/include/linux/kernel.h Mon Jun 19 16:31:27 2000 @@ -90,7 +90,9 @@ unsigned long totalswap; /* Total swap space size */ unsigned long freeswap; /* swap space still available */ unsigned short procs; /* Number of current processes */ - char _f[22]; /* Pads structure to 64 bytes */ + unsigned long totalbig; /* Total big memory size */ + unsigned long freebig; /* Available big memory size */ + char _f[20-2*sizeof(long)]; /* Padding: libc5 uses this.. */ }; #endif diff -urN 2.2.17pre4/include/linux/mm.h 2.2.17pre4-bigmem/include/linux/mm.h --- 2.2.17pre4/include/linux/mm.h Wed May 10 22:26:45 2000 +++ 2.2.17pre4-bigmem/include/linux/mm.h Mon Jun 19 16:31:27 2000 @@ -144,6 +144,7 @@ #define PG_Slab 9 #define PG_swap_cache 10 #define PG_skip 11 +#define PG_BIGMEM 12 #define PG_reserved 31 /* Make it prettier to test the above... */ @@ -175,6 +176,11 @@ (test_and_clear_bit(PG_dirty, &(page)->flags)) #define PageTestandClearSwapCache(page) \ (test_and_clear_bit(PG_swap_cache, &(page)->flags)) +#ifdef CONFIG_BIGMEM +#define PageBIGMEM(page) (test_bit(PG_BIGMEM, &(page)->flags)) +#else +#define PageBIGMEM(page) 0 /* needed to optimize away at compile time */ +#endif /* * Various page->flags bits: @@ -332,11 +338,17 @@ #define __GFP_HIGH 0x08 #define __GFP_IO 0x10 #define __GFP_SWAP 0x20 +#ifdef CONFIG_BIGMEM +#define __GFP_BIGMEM 0x40 +#else +#define __GFP_BIGMEM 0x0 /* noop */ +#endif #define __GFP_DMA 0x80 #define GFP_BUFFER (__GFP_MED | __GFP_WAIT) #define GFP_ATOMIC (__GFP_HIGH) +#define GFP_BIGUSER (__GFP_LOW | __GFP_WAIT | __GFP_IO | __GFP_BIGMEM) #define GFP_USER (__GFP_LOW | __GFP_WAIT | __GFP_IO) #define GFP_KERNEL (__GFP_MED | __GFP_WAIT | __GFP_IO) #define GFP_NFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO) @@ -346,6 +358,11 @@ platforms, used as appropriate on others */ #define GFP_DMA __GFP_DMA + +/* Flag - indicates that the buffer can be taken from big memory which is not + directly addressable by the kernel */ + +#define GFP_BIGMEM __GFP_BIGMEM /* vma is the first one with address < vma->vm_end, * and even address < vma->vm_start. Have to extend vma. */ diff -urN 2.2.17pre4/ipc/shm.c 2.2.17pre4-bigmem/ipc/shm.c --- 2.2.17pre4/ipc/shm.c Tue Jun 13 03:48:15 2000 +++ 2.2.17pre4-bigmem/ipc/shm.c Mon Jun 19 16:31:27 2000 @@ -4,6 +4,7 @@ * Many improvements/fixes by Bruno Haible. * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994. * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli. + * BIGMEM support, Andrea Arcangeli */ #include @@ -13,6 +14,8 @@ #include #include #include +#include +#include #include #include @@ -648,21 +651,29 @@ pte = __pte(shp->shm_pages[idx]); if (!pte_present(pte)) { - unsigned long page = get_free_page(GFP_USER); + unsigned long page = __get_free_page(GFP_BIGUSER); if (!page) return -1; + clear_bigpage(page); pte = __pte(shp->shm_pages[idx]); if (pte_present(pte)) { free_page (page); /* doesn't sleep */ goto done; } if (!pte_none(pte)) { + struct page * page_map; + + page = prepare_bigmem_shm_swapin(page); + if (!page) + return -1; rw_swap_page_nocache(READ, pte_val(pte), (char *)page); pte = __pte(shp->shm_pages[idx]); if (pte_present(pte)) { free_page (page); /* doesn't sleep */ goto done; } + page_map = replace_with_bigmem(&mem_map[MAP_NR(page)]); + page = page_address(page_map); swap_free(pte_val(pte)); shm_swp--; } @@ -692,6 +703,7 @@ unsigned long id, idx; int loop = 0; int counter; + struct page * page_map; counter = shm_rss >> prio; if (!counter || !(swap_nr = get_swap_page())) @@ -720,7 +732,10 @@ page = __pte(shp->shm_pages[idx]); if (!pte_present(page)) goto check_table; - if ((gfp_mask & __GFP_DMA) && !PageDMA(&mem_map[MAP_NR(pte_page(page))])) + page_map = &mem_map[MAP_NR(pte_page(page))]; + if ((gfp_mask & __GFP_DMA) && !PageDMA(page_map)) + goto check_table; + if (!(gfp_mask & __GFP_BIGMEM) && PageBIGMEM(page_map)) goto check_table; swap_attempts++; @@ -729,11 +744,13 @@ swap_free (swap_nr); return 0; } - if (atomic_read(&mem_map[MAP_NR(pte_page(page))].count) != 1) + if (atomic_read(&page_map->count) != 1) + goto check_table; + if (!(page_map = prepare_bigmem_swapout(page_map))) goto check_table; shp->shm_pages[idx] = swap_nr; - rw_swap_page_nocache (WRITE, swap_nr, (char *) pte_page(page)); - free_page(pte_page(page)); + rw_swap_page_nocache (WRITE, swap_nr, (char *) page_address(page_map)); + __free_page(page_map); swap_successes++; shm_swp++; shm_rss--; diff -urN 2.2.17pre4/mm/Makefile 2.2.17pre4-bigmem/mm/Makefile --- 2.2.17pre4/mm/Makefile Mon Jan 18 02:27:01 1999 +++ 2.2.17pre4-bigmem/mm/Makefile Mon Jun 19 16:31:27 2000 @@ -12,4 +12,8 @@ vmalloc.o slab.o \ swap.o vmscan.o page_io.o page_alloc.o swap_state.o swapfile.o +ifeq ($(CONFIG_BIGMEM),y) +O_OBJS += bigmem.o +endif + include $(TOPDIR)/Rules.make diff -urN 2.2.17pre4/mm/bigmem.c 2.2.17pre4-bigmem/mm/bigmem.c --- 2.2.17pre4/mm/bigmem.c Thu Jan 1 01:00:00 1970 +++ 2.2.17pre4-bigmem/mm/bigmem.c Mon Jun 19 16:31:27 2000 @@ -0,0 +1,87 @@ +/* + * BIGMEM common code and variables. + * + * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de + * Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de + */ + +#include +#include +#include + +unsigned long bigmem_mapnr; +int nr_free_bigpages = 0; + +struct page * prepare_bigmem_swapout(struct page * page) +{ + /* if this is a bigmem page so it can't be swapped out directly + otherwise the b_data buffer addresses will break + the lowlevel device drivers. */ + if (PageBIGMEM(page)) + { + unsigned long regular_page; + unsigned long vaddr; + + regular_page = __get_free_page(GFP_ATOMIC); + if (!regular_page) + return NULL; + + vaddr = kmap(page_address(page), KM_READ); + copy_page(regular_page, vaddr); + kunmap(vaddr, KM_READ); + + /* ok, we can just forget about our bigmem page since + we stored its data into the new regular_page. */ + __free_page(page); + + page = MAP_NR(regular_page) + mem_map; + } + return page; +} + +struct page * replace_with_bigmem(struct page * page) +{ + if (!PageBIGMEM(page) && nr_free_bigpages) + { + unsigned long kaddr; + + kaddr = __get_free_page(GFP_ATOMIC|GFP_BIGMEM); + if (kaddr) + { + struct page * bigmem_page; + + bigmem_page = MAP_NR(kaddr) + mem_map; + if (PageBIGMEM(bigmem_page)) + { + unsigned long vaddr; + + vaddr = kmap(kaddr, KM_WRITE); + copy_page(vaddr, page_address(page)); + kunmap(vaddr, KM_WRITE); + + /* Preserve the caching of the swap_entry. */ + bigmem_page->offset = page->offset; + + /* We can just forget the old page since + we stored its data into the new + bigmem_page. */ + __free_page(page); + + page = bigmem_page; + } + } + } + return page; +} + +unsigned long prepare_bigmem_shm_swapin(unsigned long page) +{ + if (!PageBIGMEM(&mem_map[MAP_NR(page)])) + return page; + + free_page(page); + + /* no need to clear the page since it will be rewrited by the + swapin. */ + return __get_free_page(GFP_ATOMIC); +} diff -urN 2.2.17pre4/mm/filemap.c 2.2.17pre4-bigmem/mm/filemap.c --- 2.2.17pre4/mm/filemap.c Mon Jun 19 01:51:19 2000 +++ 2.2.17pre4-bigmem/mm/filemap.c Mon Jun 19 16:31:27 2000 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -138,7 +139,11 @@ int shrink_mmap(int priority, int gfp_mask) { static unsigned long clock = 0; +#ifndef CONFIG_BIGMEM unsigned long limit = num_physpages; +#else + unsigned long limit = bigmem_mapnr; +#endif struct page * page; int count; @@ -156,7 +161,11 @@ */ page++; clock++; +#ifndef CONFIG_BIGMEM if (clock >= max_mapnr) { +#else + if (clock >= bigmem_mapnr) { +#endif clock = 0; page = mem_map; } @@ -173,6 +182,9 @@ referenced = test_and_clear_bit(PG_referenced, &page->flags); if (PageLocked(page)) + continue; + + if (!(gfp_mask & __GFP_BIGMEM) && PageBIGMEM(page)) continue; if ((gfp_mask & __GFP_DMA) && !PageDMA(page)) diff -urN 2.2.17pre4/mm/memory.c 2.2.17pre4-bigmem/mm/memory.c --- 2.2.17pre4/mm/memory.c Sun Apr 2 21:07:50 2000 +++ 2.2.17pre4-bigmem/mm/memory.c Mon Jun 19 16:31:27 2000 @@ -31,12 +31,16 @@ /* * 05.04.94 - Multi-page memory management added for v1.1. * Idea by Alex Bligh (alex@cconcepts.co.uk) + * + * 16.07.99 - Support of BIGMEM added by Gerhard Wichert, Siemens AG + * (Gerhard.Wichert@pdb.siemens.de) */ #include #include #include #include +#include #include #include @@ -53,10 +57,10 @@ static inline void copy_cow_page(unsigned long from, unsigned long to) { if (from == ZERO_PAGE(to)) { - clear_page(to); + clear_bigpage(to); return; } - copy_page(to, from); + copy_bigpage(to, from); } mem_map_t * mem_map = NULL; @@ -613,7 +617,7 @@ struct page * page_map; pte = *page_table; - new_page = __get_free_page(GFP_USER); + new_page = __get_free_page(GFP_BIGUSER); /* Did swap_out() unmapped the protected page while we slept? */ if (pte_val(*page_table) != pte_val(pte)) goto end_wp_page; @@ -807,10 +811,10 @@ { pte_t entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot)); if (write_access) { - unsigned long page = __get_free_page(GFP_USER); + unsigned long page = __get_free_page(GFP_BIGUSER); if (!page) return -1; - clear_page(page); + clear_bigpage(page); entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); vma->vm_mm->rss++; tsk->min_flt++; diff -urN 2.2.17pre4/mm/page_alloc.c 2.2.17pre4-bigmem/mm/page_alloc.c --- 2.2.17pre4/mm/page_alloc.c Mon Jun 19 01:51:19 2000 +++ 2.2.17pre4-bigmem/mm/page_alloc.c Mon Jun 19 16:31:27 2000 @@ -3,6 +3,7 @@ * * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds * Swap reorganised 29.12.95, Stephen Tweedie + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 */ #include @@ -13,6 +14,7 @@ #include #include #include +#include /* export bigmem vars */ #include #include /* for copy_to/from_user */ @@ -35,7 +37,11 @@ #else #define NR_MEM_LISTS 10 #endif +#ifndef CONFIG_BIGMEM #define NR_MEM_TYPES 2 /* GFP_DMA vs not for now. */ +#else +#define NR_MEM_TYPES 3 +#endif /* The start of this MUST match the start of "struct page" */ struct free_area_struct { @@ -104,6 +110,13 @@ #define list(x) (mem_map+(x)) +#ifdef CONFIG_BIGMEM + if (map_nr >= bigmem_mapnr) + { + area = free_area[2] + order; + nr_free_bigpages -= mask; + } +#endif map_nr &= mask; nr_free_pages -= mask; while (mask + (1 << (NR_MEM_LISTS-1))) { @@ -148,6 +161,17 @@ #define MARK_USED(index, order, area) \ change_bit((index) >> (1+(order)), (area)->map) #define ADDRESS(x) (PAGE_OFFSET + ((x) << PAGE_SHIFT)) +#ifdef CONFIG_BIGMEM +#define UPDATE_NR_FREE_BIGPAGES(map_nr, order) \ + do \ + { \ + if ((map_nr) >= bigmem_mapnr) \ + nr_free_bigpages -= 1 << (order); \ + } \ + while (0) +#else +#define UPDATE_NR_FREE_BIGPAGES(map_nr, order) do { } while (0) +#endif #define RMQUEUE_TYPE(order, type) \ do { struct free_area_struct * area = free_area[type]+order; \ unsigned long new_order = order; \ @@ -158,6 +182,7 @@ map_nr = ret - mem_map; \ MARK_USED(map_nr, new_order, area); \ nr_free_pages -= 1 << order; \ + UPDATE_NR_FREE_BIGPAGES(map_nr, order); \ area->count--; \ EXPAND(ret, map_nr, order, new_order, area); \ spin_unlock_irqrestore(&page_alloc_lock, flags); \ @@ -214,6 +239,7 @@ current->flags &= ~PF_MEMALLOC; } +#ifndef CONFIG_BIGMEM if (nr_free_pages > freepages.low) goto ok_to_allocate; @@ -223,6 +249,35 @@ /* Do we have to block or can we proceed? */ if (nr_free_pages > freepages.min) goto ok_to_allocate; +#else + if (gfp_mask & __GFP_BIGMEM) { + if (nr_free_pages > freepages.low) + goto ok_to_allocate; + + /* + * Wake kswapd only if the normal classzone + * is low on memory otherwise waking up kswapd would + * be useless. + */ + if (nr_free_pages-nr_free_bigpages <= freepages.low && + waitqueue_active(&kswapd_wait)) + wake_up_interruptible(&kswapd_wait); + + /* Do we have to block or can we proceed? */ + if (nr_free_pages > freepages.min) + goto ok_to_allocate; + } else { + if (nr_free_pages-nr_free_bigpages > freepages.low) + goto ok_to_allocate; + + if (waitqueue_active(&kswapd_wait)) + wake_up_interruptible(&kswapd_wait); + + /* Do we have to block or can we proceed? */ + if (nr_free_pages-nr_free_bigpages > freepages.min) + goto ok_to_allocate; + } +#endif current->flags |= PF_MEMALLOC; atomic_inc(&free_before_allocate); @@ -237,8 +292,18 @@ * the pages. We check against pages_high to be sure * to succeed only if lots of memory is been released. */ +#ifndef CONFIG_BIGMEM if (nr_free_pages > freepages.high) goto ok_to_allocate; +#else + if (gfp_mask & __GFP_BIGMEM) { + if (nr_free_pages > freepages.high) + goto ok_to_allocate; + } else { + if (nr_free_pages-nr_free_bigpages > freepages.high) + goto ok_to_allocate; + } +#endif if (!freed && !(gfp_mask & (__GFP_MED | __GFP_HIGH))) goto nopage; @@ -246,8 +311,13 @@ ok_to_allocate: spin_lock_irqsave(&page_alloc_lock, flags); /* if it's not a dma request, try non-dma first */ - if (!(gfp_mask & __GFP_DMA)) + if (!(gfp_mask & __GFP_DMA)) { +#ifdef CONFIG_BIGMEM + if (gfp_mask & __GFP_BIGMEM) + RMQUEUE_TYPE(order, 2); +#endif RMQUEUE_TYPE(order, 0); + } RMQUEUE_TYPE(order, 1); spin_unlock_irqrestore(&page_alloc_lock, flags); @@ -266,7 +336,9 @@ unsigned type; spin_lock_irqsave(&page_alloc_lock, flags); - printk("Free pages: %6dkB\n ( ",nr_free_pages<<(PAGE_SHIFT-10)); + printk("Free pages: %6dkB (%6dkB BigMem)\n ( ", + nr_free_pages<<(PAGE_SHIFT-10), + nr_free_bigpages<<(PAGE_SHIFT-10)); printk("Free: %d (%d %d %d)\n", nr_free_pages, freepages.min, @@ -274,7 +346,19 @@ freepages.high); for (type = 0; type < NR_MEM_TYPES; type++) { unsigned long total = 0; +#ifdef CONFIG_BIGMEM + switch (type) + { + case 0: + case 1: +#endif printk("%sDMA: ", type ? "" : "Non"); +#ifdef CONFIG_BIGMEM + break; + case 2: + printk("BIGMEM: "); + } +#endif for (order=0 ; order < NR_MEM_LISTS; order++) { unsigned long nr = free_area[type][order].count; @@ -426,6 +510,8 @@ * this process. */ delete_from_swap_cache(page_map); + page_map = replace_with_bigmem(page_map); + page = page_address(page_map); set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)))); return 1; } diff -urN 2.2.17pre4/mm/vmalloc.c 2.2.17pre4-bigmem/mm/vmalloc.c --- 2.2.17pre4/mm/vmalloc.c Tue Jul 13 00:33:04 1999 +++ 2.2.17pre4-bigmem/mm/vmalloc.c Mon Jun 19 16:31:27 2000 @@ -2,6 +2,7 @@ * linux/mm/vmalloc.c * * Copyright (C) 1993 Linus Torvalds + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 */ #include @@ -94,7 +95,7 @@ unsigned long page; if (!pte_none(*pte)) printk("alloc_area_pte: page already exists\n"); - page = __get_free_page(GFP_KERNEL); + page = __get_free_page(GFP_KERNEL|GFP_BIGMEM); if (!page) return -ENOMEM; set_pte(pte, mk_pte(page, PAGE_KERNEL)); diff -urN 2.2.17pre4/mm/vmscan.c 2.2.17pre4-bigmem/mm/vmscan.c --- 2.2.17pre4/mm/vmscan.c Mon Jun 19 01:51:19 2000 +++ 2.2.17pre4-bigmem/mm/vmscan.c Mon Jun 19 16:31:27 2000 @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -60,7 +61,8 @@ if (PageReserved(page_map) || PageLocked(page_map) - || ((gfp_mask & __GFP_DMA) && !PageDMA(page_map))) + || ((gfp_mask & __GFP_DMA) && !PageDMA(page_map)) + || (!(gfp_mask & __GFP_BIGMEM) && PageBIGMEM(page_map))) return 0; /* @@ -151,6 +153,9 @@ if (!entry) return 0; /* No swap space left */ + if (!(page_map = prepare_bigmem_swapout(page_map))) + goto out_swap_free; + vma->vm_mm->rss--; tsk->nswap++; set_pte(page_table, __pte(entry)); @@ -162,10 +167,14 @@ set_bit(PG_locked, &page_map->flags); /* OK, do a physical asynchronous write to swap. */ - rw_swap_page(WRITE, entry, (char *) page, 0); + rw_swap_page(WRITE, entry, (char *) page_address(page_map), 0); __free_page(page_map); return 1; + + out_swap_free: + swap_free(entry); + return 0; } /* @@ -491,7 +500,11 @@ */ interruptible_sleep_on(&kswapd_wait); - while (nr_free_pages < freepages.high) + /* + * In 2.2.x-bigmem kswapd is critical to provide GFP_ATOMIC + * allocations (not GFP_BIGMEM ones). + */ + while (nr_free_pages - nr_free_bigpages < freepages.high) { if (do_try_to_free_pages(GFP_KSWAPD)) {