forked from luck/tmp_suning_uos_patched
s390/mm: change swap pte encoding and pgtable cleanup
After the file ptes have been removed the bit combination used to encode non-linear mappings can be reused for the swap ptes. This frees up a precious pte software bit. Reflect the change in the swap encoding in the comments and do some cleanup while we are at it. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
This commit is contained in:
parent
b7d14f3a92
commit
a1c843b825
|
@ -12,12 +12,9 @@
|
|||
#define _ASM_S390_PGTABLE_H
|
||||
|
||||
/*
|
||||
* The Linux memory management assumes a three-level page table setup. For
|
||||
* s390 31 bit we "fold" the mid level into the top-level page table, so
|
||||
* that we physically have the same two-level page table as the s390 mmu
|
||||
* expects in 31 bit mode. For s390 64 bit we use three of the five levels
|
||||
* the hardware provides (region first and region second tables are not
|
||||
* used).
|
||||
* The Linux memory management assumes a three-level page table setup.
|
||||
* For s390 64 bit we use up to four of the five levels the hardware
|
||||
* provides (region first tables are not used).
|
||||
*
|
||||
* The "pgd_xxx()" functions are trivial for a folded two-level
|
||||
* setup: the pgd is never bad, and a pmd always exists (as it's folded
|
||||
|
@ -101,8 +98,8 @@ extern unsigned long zero_page_mask;
|
|||
|
||||
#ifndef __ASSEMBLY__
|
||||
/*
|
||||
* The vmalloc and module area will always be on the topmost area of the kernel
|
||||
* mapping. We reserve 96MB (31bit) / 128GB (64bit) for vmalloc and modules.
|
||||
* The vmalloc and module area will always be on the topmost area of the
|
||||
* kernel mapping. We reserve 128GB (64bit) for vmalloc and modules.
|
||||
* On 64 bit kernels we have a 2GB area at the top of the vmalloc area where
|
||||
* modules will reside. That makes sure that inter module branches always
|
||||
* happen without trampolines and in addition the placement within a 2GB frame
|
||||
|
@ -131,38 +128,6 @@ static inline int is_module_addr(void *addr)
|
|||
}
|
||||
|
||||
/*
|
||||
* A 31 bit pagetable entry of S390 has following format:
|
||||
* | PFRA | | OS |
|
||||
* 0 0IP0
|
||||
* 00000000001111111111222222222233
|
||||
* 01234567890123456789012345678901
|
||||
*
|
||||
* I Page-Invalid Bit: Page is not available for address-translation
|
||||
* P Page-Protection Bit: Store access not possible for page
|
||||
*
|
||||
* A 31 bit segmenttable entry of S390 has following format:
|
||||
* | P-table origin | |PTL
|
||||
* 0 IC
|
||||
* 00000000001111111111222222222233
|
||||
* 01234567890123456789012345678901
|
||||
*
|
||||
* I Segment-Invalid Bit: Segment is not available for address-translation
|
||||
* C Common-Segment Bit: Segment is not private (PoP 3-30)
|
||||
* PTL Page-Table-Length: Page-table length (PTL+1*16 entries -> up to 256)
|
||||
*
|
||||
* The 31 bit segmenttable origin of S390 has following format:
|
||||
*
|
||||
* |S-table origin | | STL |
|
||||
* X **GPS
|
||||
* 00000000001111111111222222222233
|
||||
* 01234567890123456789012345678901
|
||||
*
|
||||
* X Space-Switch event:
|
||||
* G Segment-Invalid Bit: *
|
||||
* P Private-Space Bit: Segment is not private (PoP 3-30)
|
||||
* S Storage-Alteration:
|
||||
* STL Segment-Table-Length: Segment-table length (STL+1*16 entries -> up to 2048)
|
||||
*
|
||||
* A 64 bit pagetable entry of S390 has following format:
|
||||
* | PFRA |0IPC| OS |
|
||||
* 0000000000111111111122222222223333333333444444444455555555556666
|
||||
|
@ -220,7 +185,6 @@ static inline int is_module_addr(void *addr)
|
|||
|
||||
/* Software bits in the page table entry */
|
||||
#define _PAGE_PRESENT 0x001 /* SW pte present bit */
|
||||
#define _PAGE_TYPE 0x002 /* SW pte type bit */
|
||||
#define _PAGE_YOUNG 0x004 /* SW pte young bit */
|
||||
#define _PAGE_DIRTY 0x008 /* SW pte dirty bit */
|
||||
#define _PAGE_READ 0x010 /* SW pte read bit */
|
||||
|
@ -240,31 +204,34 @@ static inline int is_module_addr(void *addr)
|
|||
* table lock held.
|
||||
*
|
||||
* The following table gives the different possible bit combinations for
|
||||
* the pte hardware and software bits in the last 12 bits of a pte:
|
||||
* the pte hardware and software bits in the last 12 bits of a pte
|
||||
* (. unassigned bit, x don't care, t swap type):
|
||||
*
|
||||
* 842100000000
|
||||
* 000084210000
|
||||
* 000000008421
|
||||
* .IR...wrdytp
|
||||
* empty .10...000000
|
||||
* swap .10...xxxx10
|
||||
* file .11...xxxxx0
|
||||
* prot-none, clean, old .11...000001
|
||||
* prot-none, clean, young .11...000101
|
||||
* prot-none, dirty, old .10...001001
|
||||
* prot-none, dirty, young .10...001101
|
||||
* read-only, clean, old .11...010001
|
||||
* read-only, clean, young .01...010101
|
||||
* read-only, dirty, old .11...011001
|
||||
* read-only, dirty, young .01...011101
|
||||
* read-write, clean, old .11...110001
|
||||
* read-write, clean, young .01...110101
|
||||
* read-write, dirty, old .10...111001
|
||||
* read-write, dirty, young .00...111101
|
||||
* .IR.uswrdy.p
|
||||
* empty .10.00000000
|
||||
* swap .11..ttttt.0
|
||||
* prot-none, clean, old .11.xx0000.1
|
||||
* prot-none, clean, young .11.xx0001.1
|
||||
* prot-none, dirty, old .10.xx0010.1
|
||||
* prot-none, dirty, young .10.xx0011.1
|
||||
* read-only, clean, old .11.xx0100.1
|
||||
* read-only, clean, young .01.xx0101.1
|
||||
* read-only, dirty, old .11.xx0110.1
|
||||
* read-only, dirty, young .01.xx0111.1
|
||||
* read-write, clean, old .11.xx1100.1
|
||||
* read-write, clean, young .01.xx1101.1
|
||||
* read-write, dirty, old .10.xx1110.1
|
||||
* read-write, dirty, young .00.xx1111.1
|
||||
* HW-bits: R read-only, I invalid
|
||||
* SW-bits: p present, y young, d dirty, r read, w write, s special,
|
||||
* u unused, l large
|
||||
*
|
||||
* pte_present is true for the bit pattern .xx...xxxxx1, (pte & 0x001) == 0x001
|
||||
* pte_none is true for the bit pattern .10...xxxx00, (pte & 0x603) == 0x400
|
||||
* pte_swap is true for the bit pattern .10...xxxx10, (pte & 0x603) == 0x402
|
||||
* pte_none is true for the bit pattern .10.00000000, pte == 0x400
|
||||
* pte_swap is true for the bit pattern .11..ooooo.0, (pte & 0x201) == 0x200
|
||||
* pte_present is true for the bit pattern .xx.xxxxxx.1, (pte & 0x001) == 0x001
|
||||
*/
|
||||
|
||||
/* Bits in the segment/region table address-space-control-element */
|
||||
|
@ -335,6 +302,8 @@ static inline int is_module_addr(void *addr)
|
|||
* read-write, dirty, young 11..0...0...11
|
||||
* The segment table origin is used to distinguish empty (origin==0) from
|
||||
* read-write, old segment table entries (origin!=0)
|
||||
* HW-bits: R read-only, I invalid
|
||||
* SW-bits: y young, d dirty, r read, w write
|
||||
*/
|
||||
|
||||
#define _SEGMENT_ENTRY_SPLIT_BIT 11 /* THP splitting bit number */
|
||||
|
@ -591,10 +560,9 @@ static inline int pte_none(pte_t pte)
|
|||
|
||||
static inline int pte_swap(pte_t pte)
|
||||
{
|
||||
/* Bit pattern: (pte & 0x603) == 0x402 */
|
||||
return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT |
|
||||
_PAGE_TYPE | _PAGE_PRESENT))
|
||||
== (_PAGE_INVALID | _PAGE_TYPE);
|
||||
/* Bit pattern: (pte & 0x201) == 0x200 */
|
||||
return (pte_val(pte) & (_PAGE_PROTECT | _PAGE_PRESENT))
|
||||
== _PAGE_PROTECT;
|
||||
}
|
||||
|
||||
static inline int pte_special(pte_t pte)
|
||||
|
@ -1595,51 +1563,51 @@ static inline int has_transparent_hugepage(void)
|
|||
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
||||
|
||||
/*
|
||||
* 31 bit swap entry format:
|
||||
* A page-table entry has some bits we have to treat in a special way.
|
||||
* Bits 0, 20 and bit 23 have to be zero, otherwise an specification
|
||||
* exception will occur instead of a page translation exception. The
|
||||
* specifiation exception has the bad habit not to store necessary
|
||||
* information in the lowcore.
|
||||
* Bits 21, 22, 30 and 31 are used to indicate the page type.
|
||||
* A swap pte is indicated by bit pattern (pte & 0x603) == 0x402
|
||||
* This leaves the bits 1-19 and bits 24-29 to store type and offset.
|
||||
* We use the 5 bits from 25-29 for the type and the 20 bits from 1-19
|
||||
* plus 24 for the offset.
|
||||
* 0| offset |0110|o|type |00|
|
||||
* 0 0000000001111111111 2222 2 22222 33
|
||||
* 0 1234567890123456789 0123 4 56789 01
|
||||
*
|
||||
* 64 bit swap entry format:
|
||||
* A page-table entry has some bits we have to treat in a special way.
|
||||
* Bits 52 and bit 55 have to be zero, otherwise an specification
|
||||
* exception will occur instead of a page translation exception. The
|
||||
* specifiation exception has the bad habit not to store necessary
|
||||
* information in the lowcore.
|
||||
* Bits 53, 54, 62 and 63 are used to indicate the page type.
|
||||
* A swap pte is indicated by bit pattern (pte & 0x603) == 0x402
|
||||
* This leaves the bits 0-51 and bits 56-61 to store type and offset.
|
||||
* We use the 5 bits from 57-61 for the type and the 53 bits from 0-51
|
||||
* plus 56 for the offset.
|
||||
* | offset |0110|o|type |00|
|
||||
* 0000000000111111111122222222223333333333444444444455 5555 5 55566 66
|
||||
* 0123456789012345678901234567890123456789012345678901 2345 6 78901 23
|
||||
* Bits 54 and 63 are used to indicate the page type.
|
||||
* A swap pte is indicated by bit pattern (pte & 0x201) == 0x200
|
||||
* This leaves the bits 0-51 and bits 56-62 to store type and offset.
|
||||
* We use the 5 bits from 57-61 for the type and the 52 bits from 0-51
|
||||
* for the offset.
|
||||
* | offset |01100|type |00|
|
||||
* |0000000000111111111122222222223333333333444444444455|55555|55566|66|
|
||||
* |0123456789012345678901234567890123456789012345678901|23456|78901|23|
|
||||
*/
|
||||
|
||||
#define __SWP_OFFSET_MASK (~0UL >> 11)
|
||||
#define __SWP_OFFSET_MASK ((1UL << 52) - 1)
|
||||
#define __SWP_OFFSET_SHIFT 12
|
||||
#define __SWP_TYPE_MASK ((1UL << 5) - 1)
|
||||
#define __SWP_TYPE_SHIFT 2
|
||||
|
||||
static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
|
||||
{
|
||||
pte_t pte;
|
||||
offset &= __SWP_OFFSET_MASK;
|
||||
pte_val(pte) = _PAGE_INVALID | _PAGE_TYPE | ((type & 0x1f) << 2) |
|
||||
((offset & 1UL) << 7) | ((offset & ~1UL) << 11);
|
||||
|
||||
pte_val(pte) = _PAGE_INVALID | _PAGE_PROTECT;
|
||||
pte_val(pte) |= (offset & __SWP_OFFSET_MASK) << __SWP_OFFSET_SHIFT;
|
||||
pte_val(pte) |= (type & __SWP_TYPE_MASK) << __SWP_TYPE_SHIFT;
|
||||
return pte;
|
||||
}
|
||||
|
||||
#define __swp_type(entry) (((entry).val >> 2) & 0x1f)
|
||||
#define __swp_offset(entry) (((entry).val >> 11) | (((entry).val >> 7) & 1))
|
||||
#define __swp_entry(type,offset) ((swp_entry_t) { pte_val(mk_swap_pte((type),(offset))) })
|
||||
static inline unsigned long __swp_type(swp_entry_t entry)
|
||||
{
|
||||
return (entry.val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK;
|
||||
}
|
||||
|
||||
static inline unsigned long __swp_offset(swp_entry_t entry)
|
||||
{
|
||||
return (entry.val >> __SWP_OFFSET_SHIFT) & __SWP_OFFSET_MASK;
|
||||
}
|
||||
|
||||
static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
|
||||
{
|
||||
return (swp_entry_t) { pte_val(mk_swap_pte(type, offset)) };
|
||||
}
|
||||
|
||||
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
|
||||
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
|
||||
|
|
|
@ -14,20 +14,23 @@ static inline pmd_t __pte_to_pmd(pte_t pte)
|
|||
|
||||
/*
|
||||
* Convert encoding pte bits pmd bits
|
||||
* .IR...wrdytp dy..R...I...wr
|
||||
* empty .10...000000 -> 00..0...1...00
|
||||
* prot-none, clean, old .11...000001 -> 00..1...1...00
|
||||
* prot-none, clean, young .11...000101 -> 01..1...1...00
|
||||
* prot-none, dirty, old .10...001001 -> 10..1...1...00
|
||||
* prot-none, dirty, young .10...001101 -> 11..1...1...00
|
||||
* read-only, clean, old .11...010001 -> 00..1...1...01
|
||||
* read-only, clean, young .01...010101 -> 01..1...0...01
|
||||
* read-only, dirty, old .11...011001 -> 10..1...1...01
|
||||
* read-only, dirty, young .01...011101 -> 11..1...0...01
|
||||
* read-write, clean, old .11...110001 -> 00..0...1...11
|
||||
* read-write, clean, young .01...110101 -> 01..0...0...11
|
||||
* read-write, dirty, old .10...111001 -> 10..0...1...11
|
||||
* read-write, dirty, young .00...111101 -> 11..0...0...11
|
||||
* lIR.uswrdy.p dy..R...I...wr
|
||||
* empty 010.000000.0 -> 00..0...1...00
|
||||
* prot-none, clean, old 111.000000.1 -> 00..1...1...00
|
||||
* prot-none, clean, young 111.000001.1 -> 01..1...1...00
|
||||
* prot-none, dirty, old 111.000010.1 -> 10..1...1...00
|
||||
* prot-none, dirty, young 111.000011.1 -> 11..1...1...00
|
||||
* read-only, clean, old 111.000100.1 -> 00..1...1...01
|
||||
* read-only, clean, young 101.000101.1 -> 01..1...0...01
|
||||
* read-only, dirty, old 111.000110.1 -> 10..1...1...01
|
||||
* read-only, dirty, young 101.000111.1 -> 11..1...0...01
|
||||
* read-write, clean, old 111.001100.1 -> 00..1...1...11
|
||||
* read-write, clean, young 101.001101.1 -> 01..1...0...11
|
||||
* read-write, dirty, old 110.001110.1 -> 10..0...1...11
|
||||
* read-write, dirty, young 100.001111.1 -> 11..0...0...11
|
||||
* HW-bits: R read-only, I invalid
|
||||
* SW-bits: p present, y young, d dirty, r read, w write, s special,
|
||||
* u unused, l large
|
||||
*/
|
||||
if (pte_present(pte)) {
|
||||
pmd_val(pmd) = pte_val(pte) & PAGE_MASK;
|
||||
|
@ -48,20 +51,23 @@ static inline pte_t __pmd_to_pte(pmd_t pmd)
|
|||
|
||||
/*
|
||||
* Convert encoding pmd bits pte bits
|
||||
* dy..R...I...wr .IR...wrdytp
|
||||
* empty 00..0...1...00 -> .10...001100
|
||||
* prot-none, clean, old 00..0...1...00 -> .10...000001
|
||||
* prot-none, clean, young 01..0...1...00 -> .10...000101
|
||||
* prot-none, dirty, old 10..0...1...00 -> .10...001001
|
||||
* prot-none, dirty, young 11..0...1...00 -> .10...001101
|
||||
* read-only, clean, old 00..1...1...01 -> .11...010001
|
||||
* read-only, clean, young 01..1...1...01 -> .11...010101
|
||||
* read-only, dirty, old 10..1...1...01 -> .11...011001
|
||||
* read-only, dirty, young 11..1...1...01 -> .11...011101
|
||||
* read-write, clean, old 00..0...1...11 -> .10...110001
|
||||
* read-write, clean, young 01..0...1...11 -> .10...110101
|
||||
* read-write, dirty, old 10..0...1...11 -> .10...111001
|
||||
* read-write, dirty, young 11..0...1...11 -> .10...111101
|
||||
* dy..R...I...wr lIR.uswrdy.p
|
||||
* empty 00..0...1...00 -> 010.000000.0
|
||||
* prot-none, clean, old 00..1...1...00 -> 111.000000.1
|
||||
* prot-none, clean, young 01..1...1...00 -> 111.000001.1
|
||||
* prot-none, dirty, old 10..1...1...00 -> 111.000010.1
|
||||
* prot-none, dirty, young 11..1...1...00 -> 111.000011.1
|
||||
* read-only, clean, old 00..1...1...01 -> 111.000100.1
|
||||
* read-only, clean, young 01..1...0...01 -> 101.000101.1
|
||||
* read-only, dirty, old 10..1...1...01 -> 111.000110.1
|
||||
* read-only, dirty, young 11..1...0...01 -> 101.000111.1
|
||||
* read-write, clean, old 00..1...1...11 -> 111.001100.1
|
||||
* read-write, clean, young 01..1...0...11 -> 101.001101.1
|
||||
* read-write, dirty, old 10..0...1...11 -> 110.001110.1
|
||||
* read-write, dirty, young 11..0...0...11 -> 100.001111.1
|
||||
* HW-bits: R read-only, I invalid
|
||||
* SW-bits: p present, y young, d dirty, r read, w write, s special,
|
||||
* u unused, l large
|
||||
*/
|
||||
if (pmd_present(pmd)) {
|
||||
pte_val(pte) = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN_LARGE;
|
||||
|
|
Loading…
Reference in New Issue
Block a user