kernel_optimize_test/arch/powerpc/mm/init_32.c

180 lines
4.4 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* PowerPC version
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
*
* Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
* and Cort Dougan (PReP) (cort@cs.nmt.edu)
* Copyright (C) 1996 Paul Mackerras
* PPC44x/36-bit changes by Matt Porter (mporter@mvista.com)
*
* Derived from "arch/i386/mm/init.c"
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
*/
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/stddef.h>
#include <linux/init.h>
#include <linux/highmem.h>
#include <linux/initrd.h>
#include <linux/pagemap.h>
#include <linux/memblock.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 16:04:11 +08:00
#include <linux/gfp.h>
#include <linux/slab.h>
#include <linux/hugetlb.h>
#include <asm/pgalloc.h>
#include <asm/prom.h>
#include <asm/io.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
#include <asm/smp.h>
#include <asm/machdep.h>
#include <asm/btext.h>
#include <asm/tlb.h>
#include <asm/sections.h>
#include <asm/hugetlb.h>
#include <asm/kup.h>
#include <asm/kasan.h>
#include <mm/mmu_decl.h>
#if defined(CONFIG_KERNEL_START_BOOL) || defined(CONFIG_LOWMEM_SIZE_BOOL)
/* The amount of lowmem must be within 0xF0000000 - KERNELBASE. */
#if (CONFIG_LOWMEM_SIZE > (0xF0000000 - PAGE_OFFSET))
#error "You must adjust CONFIG_LOWMEM_SIZE or CONFIG_KERNEL_START"
#endif
#endif
#define MAX_LOW_MEM CONFIG_LOWMEM_SIZE
phys_addr_t total_memory;
phys_addr_t total_lowmem;
#ifdef CONFIG_RELOCATABLE
powerpc: Define virtual-physical translations for RELOCATABLE We find the runtime address of _stext and relocate ourselves based on the following calculation. virtual_base = ALIGN(KERNELBASE,KERNEL_TLB_PIN_SIZE) + MODULO(_stext.run,KERNEL_TLB_PIN_SIZE) relocate() is called with the Effective Virtual Base Address (as shown below) | Phys. Addr| Virt. Addr | Page |------------------------| Boundary | | | | | | | | | Kernel Load |___________|_ __ _ _ _ _|<- Effective Addr(_stext)| | ^ |Virt. Base Addr | | | | | | | | | |reloc_offset| | | | | | | | | | |______v_____|<-(KERNELBASE)%TLB_SIZE | | | | | | | | | Page |-----------|------------| Boundary | | | On BookE, we need __va() & __pa() early in the boot process to access the device tree. Currently this has been defined as : #define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) - PHYSICAL_START + KERNELBASE) where: PHYSICAL_START is kernstart_addr - a variable updated at runtime. KERNELBASE is the compile time Virtual base address of kernel. This won't work for us, as kernstart_addr is dynamic and will yield different results for __va()/__pa() for same mapping. e.g., Let the kernel be loaded at 64MB and KERNELBASE be 0xc0000000 (same as PAGE_OFFSET). In this case, we would be mapping 0 to 0xc0000000, and kernstart_addr = 64M Now __va(1MB) = (0x100000) - (0x4000000) + 0xc0000000 = 0xbc100000 , which is wrong. it should be : 0xc0000000 + 0x100000 = 0xc0100000 On platforms which support AMP, like PPC_47x (based on 44x), the kernel could be loaded at highmem. Hence we cannot always depend on the compile time constants for mapping. Here are the possible solutions: 1) Update kernstart_addr(PHSYICAL_START) to match the Physical address of compile time KERNELBASE value, instead of the actual Physical_Address(_stext). The disadvantage is that we may break other users of PHYSICAL_START. They could be replaced with __pa(_stext). 2) Redefine __va() & __pa() with relocation offset #ifdef CONFIG_RELOCATABLE_PPC32 #define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) - PHYSICAL_START + (KERNELBASE + RELOC_OFFSET))) #define __pa(x) ((unsigned long)(x) + PHYSICAL_START - (KERNELBASE + RELOC_OFFSET)) #endif where, RELOC_OFFSET could be a) A variable, say relocation_offset (like kernstart_addr), updated at boot time. This impacts performance, as we have to load an additional variable from memory. OR b) #define RELOC_OFFSET ((PHYSICAL_START & PPC_PIN_SIZE_OFFSET_MASK) - \ (KERNELBASE & PPC_PIN_SIZE_OFFSET_MASK)) This introduces more calculations for doing the translation. 3) Redefine __va() & __pa() with a new variable i.e, #define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + VIRT_PHYS_OFFSET)) where VIRT_PHYS_OFFSET : #ifdef CONFIG_RELOCATABLE_PPC32 #define VIRT_PHYS_OFFSET virt_phys_offset #else #define VIRT_PHYS_OFFSET (KERNELBASE - PHYSICAL_START) #endif /* CONFIG_RELOCATABLE_PPC32 */ where virt_phy_offset is updated at runtime to : Effective KERNELBASE - kernstart_addr. Taking our example, above: virt_phys_offset = effective_kernelstart_vaddr - kernstart_addr = 0xc0400000 - 0x400000 = 0xc0000000 and __va(0x100000) = 0xc0000000 + 0x100000 = 0xc0100000 which is what we want. I have implemented (3) in the following patch which has same cost of operation as the existing one. I have tested the patches on 440x platforms only. However this should work fine for PPC_47x also, as we only depend on the runtime address and the current TLB XLAT entry for the startup code, which is available in r25. I don't have access to a 47x board yet. So, it would be great if somebody could test this on 47x. Signed-off-by: Suzuki K. Poulose <suzuki@in.ibm.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Kumar Gala <galak@kernel.crashing.org> Cc: linuxppc-dev <linuxppc-dev@lists.ozlabs.org> Signed-off-by: Josh Boyer <jwboyer@gmail.com>
2011-12-15 06:58:37 +08:00
/* Used in __va()/__pa() */
long long virt_phys_offset;
EXPORT_SYMBOL(virt_phys_offset);
#endif
phys_addr_t lowmem_end_addr;
int boot_mapsize;
#ifdef CONFIG_PPC_PMAC
unsigned long agp_special_page;
EXPORT_SYMBOL(agp_special_page);
#endif
void MMU_init(void);
/*
* this tells the system to map all of ram with the segregs
* (i.e. page tables) instead of the bats.
* -- Cort
*/
int __map_without_bats;
int __map_without_ltlbs;
/* max amount of low RAM to map in */
unsigned long __max_low_memory = MAX_LOW_MEM;
/*
* Check for command-line options that affect what MMU_init will do.
*/
static void __init MMU_setup(void)
{
/* Check for nobats option (used in mapin_ram). */
if (strstr(boot_command_line, "nobats")) {
__map_without_bats = 1;
}
if (strstr(boot_command_line, "noltlbs")) {
__map_without_ltlbs = 1;
}
if (debug_pagealloc_enabled()) {
__map_without_bats = 1;
__map_without_ltlbs = 1;
}
if (strict_kernel_rwx_enabled() && !IS_ENABLED(CONFIG_PPC_8xx))
__map_without_ltlbs = 1;
}
/*
* MMU_init sets up the basic memory mappings for the kernel,
* including both RAM and possibly some I/O regions,
* and sets up the page tables and the MMU hardware ready to go.
*/
void __init MMU_init(void)
{
if (ppc_md.progress)
ppc_md.progress("MMU:enter", 0x111);
/* parse args from command line */
MMU_setup();
/*
* Reserve gigantic pages for hugetlb. This MUST occur before
* lowmem_end_addr is initialized below.
*/
if (memblock.memory.cnt > 1) {
#ifndef CONFIG_WII
memblock_enforce_memory_limit(memblock.memory.regions[0].size);
pr_warn("Only using first contiguous memory region\n");
#else
wii_memory_fixups();
#endif
}
total_lowmem = total_memory = memblock_end_of_DRAM() - memstart_addr;
lowmem_end_addr = memstart_addr + total_lowmem;
#ifdef CONFIG_FSL_BOOKE
/* Freescale Book-E parts expect lowmem to be mapped by fixed TLB
* entries, so we need to adjust lowmem to match the amount we can map
* in the fixed entries */
adjust_total_lowmem();
#endif /* CONFIG_FSL_BOOKE */
if (total_lowmem > __max_low_memory) {
total_lowmem = __max_low_memory;
lowmem_end_addr = memstart_addr + total_lowmem;
#ifndef CONFIG_HIGHMEM
total_memory = total_lowmem;
memblock_enforce_memory_limit(total_lowmem);
#endif /* CONFIG_HIGHMEM */
}
/* Initialize the MMU hardware */
if (ppc_md.progress)
ppc_md.progress("MMU:hw init", 0x300);
MMU_init_hw();
/* Map in all of RAM starting at KERNELBASE */
if (ppc_md.progress)
ppc_md.progress("MMU:mapin", 0x301);
mapin_ram();
/* Initialize early top-down ioremap allocator */
ioremap_bot = IOREMAP_TOP;
if (ppc_md.progress)
ppc_md.progress("MMU:exit", 0x211);
/* From now on, btext is no longer BAT mapped if it was at all */
#ifdef CONFIG_BOOTX_TEXT
btext_unmap();
#endif
kasan_mmu_init();
setup_kup();
/* Shortly after that, the entire linear mapping will be available */
memblock_set_current_limit(lowmem_end_addr);
}