[lld-macho] Fix alignment & layout to match ld64 and satisfy kernel & codesign

The Mach kernel & codesign on arm64 macOS has strict requirements for alignment and sequence of segments and sections. Dyld probably is just as picky, though kernel & codesign reject malformed Mach-O files before dyld ever has a chance.

I developed this diff by incrementally changing alignments & sequences to match the output of ld64. I stopped when my hello-world test program started working: `codesign --verify` succeded, and `execve(2)` didn't immediately fail with `errno == EBADMACHO` = `"Malformed Mach-O file"`.

Differential Revision: https://reviews.llvm.org/D94935
This commit is contained in:
Greg McGary 2021-01-02 10:31:55 -08:00
parent a872ee2f36
commit c3e4f3b231
12 changed files with 139 additions and 54 deletions

View File

@ -188,7 +188,7 @@ void RebaseSection::writeTo(uint8_t *buf) const {
NonLazyPointerSectionBase::NonLazyPointerSectionBase(const char *segname,
const char *name)
: SyntheticSection(segname, name) {
align = 8;
align = WordSize; // vector of pointers / mimic ld64
flags = MachO::S_NON_LAZY_SYMBOL_POINTERS;
}
@ -389,7 +389,9 @@ void macho::addNonLazyBindingEntries(const Symbol *sym,
StubsSection::StubsSection()
: SyntheticSection(segment_names::text, "__stubs") {
flags = MachO::S_SYMBOL_STUBS;
flags = MachO::S_SYMBOL_STUBS | MachO::S_ATTR_SOME_INSTRUCTIONS |
MachO::S_ATTR_PURE_INSTRUCTIONS;
align = 4; // machine instructions / mimic ld64
reserved2 = target->stubSize;
}
@ -413,7 +415,10 @@ bool StubsSection::addEntry(Symbol *sym) {
}
StubHelperSection::StubHelperSection()
: SyntheticSection(segment_names::text, "__stub_helper") {}
: SyntheticSection(segment_names::text, "__stub_helper") {
flags = MachO::S_ATTR_SOME_INSTRUCTIONS | MachO::S_ATTR_PURE_INSTRUCTIONS;
align = 4; // machine instructions / mimic ld64
}
uint64_t StubHelperSection::getSize() const {
return target->stubHelperHeaderSize +
@ -453,11 +458,12 @@ ImageLoaderCacheSection::ImageLoaderCacheSection() {
uint8_t *arr = bAlloc.Allocate<uint8_t>(WordSize);
memset(arr, 0, WordSize);
data = {arr, WordSize};
align = WordSize; // pointer / mimic ld64
}
LazyPointerSection::LazyPointerSection()
: SyntheticSection(segment_names::data, "__la_symbol_ptr") {
align = 8;
align = WordSize; // vector of pointers / mimic ld64
flags = MachO::S_LAZY_SYMBOL_POINTERS;
}

View File

@ -46,6 +46,12 @@ constexpr const char unwindInfo[] = "__unwind_info";
// these are not synthetic, but in service of synthetic __unwind_info
constexpr const char compactUnwind[] = "__compact_unwind";
constexpr const char ehFrame[] = "__eh_frame";
// these are not synthetic, but need to be sorted
constexpr const char text[] = "__text";
constexpr const char stubs[] = "__stubs";
constexpr const char stubHelper[] = "__stub_helper";
constexpr const char laSymbolPtr[] = "__la_symbol_ptr";
constexpr const char data[] = "__data";
} // namespace section_names
@ -71,7 +77,7 @@ class LinkEditSection : public SyntheticSection {
public:
LinkEditSection(const char *segname, const char *name)
: SyntheticSection(segname, name) {
align = WordSize;
align = WordSize; // mimic ld64
}
// Sections in __LINKEDIT are special: their offsets are recorded in the

View File

@ -87,7 +87,7 @@ using namespace lld::macho;
UnwindInfoSection::UnwindInfoSection()
: SyntheticSection(segment_names::text, section_names::unwindInfo) {
align = WordSize; // TODO(gkm): make this 4 KiB ?
align = 4; // mimic ld64
}
bool UnwindInfoSection::isNeeded() const {

View File

@ -463,6 +463,12 @@ void Writer::scanSymbols() {
}
void Writer::createLoadCommands() {
uint8_t segIndex = 0;
for (OutputSegment *seg : outputSegments) {
in.header->addLoadCommand(make<LCSegment>(seg->name, seg));
seg->index = segIndex++;
}
in.header->addLoadCommand(make<LCDyldInfo>(
in.rebase, in.binding, in.weakBinding, in.lazyBinding, in.exports));
in.header->addLoadCommand(make<LCSymtab>(symtabSection, stringTableSection));
@ -473,8 +479,8 @@ void Writer::createLoadCommands() {
switch (config->outputType) {
case MH_EXECUTE:
in.header->addLoadCommand(make<LCMain>());
in.header->addLoadCommand(make<LCLoadDylinker>());
in.header->addLoadCommand(make<LCMain>());
break;
case MH_DYLIB:
in.header->addLoadCommand(make<LCDylib>(LC_ID_DYLIB, config->installName,
@ -487,16 +493,10 @@ void Writer::createLoadCommands() {
llvm_unreachable("unhandled output file type");
}
in.header->addLoadCommand(make<LCBuildVersion>(config->platform));
uuidCommand = make<LCUuid>();
in.header->addLoadCommand(uuidCommand);
uint8_t segIndex = 0;
for (OutputSegment *seg : outputSegments) {
in.header->addLoadCommand(make<LCSegment>(seg->name, seg));
seg->index = segIndex++;
}
in.header->addLoadCommand(make<LCBuildVersion>(config->platform));
uint64_t dylibOrdinal = 1;
for (InputFile *file : inputFiles) {
@ -566,8 +566,10 @@ static DenseMap<const InputSection *, size_t> buildInputSectionPriorities() {
static int segmentOrder(OutputSegment *seg) {
return StringSwitch<int>(seg->name)
.Case(segment_names::pageZero, -2)
.Case(segment_names::text, -1)
.Case(segment_names::pageZero, -4)
.Case(segment_names::text, -3)
.Case(segment_names::dataConst, -2)
.Case(segment_names::data, -1)
// Make sure __LINKEDIT is the last segment (i.e. all its hidden
// sections must be ordered after other sections).
.Case(segment_names::linkEdit, std::numeric_limits<int>::max())
@ -579,12 +581,14 @@ static int sectionOrder(OutputSection *osec) {
// Sections are uniquely identified by their segment + section name.
if (segname == segment_names::text) {
return StringSwitch<int>(osec->name)
.Case(section_names::header, -1)
.Case(section_names::header, -4)
.Case(section_names::text, -3)
.Case(section_names::stubs, -2)
.Case(section_names::stubHelper, -1)
.Case(section_names::unwindInfo, std::numeric_limits<int>::max() - 1)
.Case(section_names::ehFrame, std::numeric_limits<int>::max())
.Default(0);
}
if (segname == segment_names::data) {
} else if (segname == segment_names::data) {
// For each thread spawned, dyld will initialize its TLVs by copying the
// address range from the start of the first thread-local data section to
// the end of the last one. We therefore arrange these sections contiguously
@ -600,10 +604,12 @@ static int sectionOrder(OutputSection *osec) {
case S_ZEROFILL:
return std::numeric_limits<int>::max();
default:
return 0;
return StringSwitch<int>(osec->name)
.Case(section_names::laSymbolPtr, -2)
.Case(section_names::data, -1)
.Default(0);
}
}
if (segname == segment_names::linkEdit) {
} else if (segname == segment_names::linkEdit) {
return StringSwitch<int>(osec->name)
.Case(section_names::rebase, -8)
.Case(section_names::binding, -7)

View File

@ -14,7 +14,7 @@
# RUN: yaml2obj %t/fat.dylib.yaml > %t/fat.dylib
# RUN: %lld -lSystem %t/test.o %t/fat.dylib -o %t/test
# RUN: llvm-objdump --macho --lazy-bind %t/test | FileCheck %s
# CHECK: __DATA __la_symbol_ptr 0x100001008 foo _foo
# CHECK: __DATA __la_symbol_ptr 0x100002000 foo _foo
#--- test.s
.globl _main

View File

@ -8,16 +8,16 @@
# RUN: llvm-objdump --macho --all-headers --syms %t/not-main | FileCheck %s
# CHECK-LABEL: SYMBOL TABLE
# CHECK-NEXT: {{0*}}[[#%x, ENTRY_ADDR:]] {{.*}} __TEXT,__text _not_main
# CHECK: cmd LC_MAIN
# CHECK-NEXT: cmdsize 24
# CHECK-NEXT: entryoff [[#ENTRYOFF:]]
# CHECK: sectname __text
# CHECK-NEXT: segname __TEXT
## Note: the following checks assume that the entry symbol is right at the
## beginning of __text.
# CHECK-NEXT: addr 0x{{0*}}[[#ENTRY_ADDR]]
# CHECK-NEXT: size
# CHECK-NEXT: offset [[#ENTRYOFF]]
# CHECK-NEXT: offset [[#ENTRYOFF:]]
# CHECK: cmd LC_MAIN
# CHECK-NEXT: cmdsize 24
# CHECK-NEXT: entryoff [[#ENTRYOFF]]
# RUN: %lld -lSystem -o %t/dysym-main %t/not-main.o %t/libfoo.dylib -e _dysym_main
# RUN: llvm-objdump --macho --all-headers --indirect-symbols --lazy-bind %t/dysym-main | FileCheck %s --check-prefix=DYSYM

View File

@ -20,19 +20,19 @@
# CHECK-NEXT: address index name
# CHECK-NEXT: _bar_fn
# CHECK-NEXT: _foo_fn
# CHECK-NEXT: Indirect symbols for (__DATA,__thread_ptrs) 2 entries
# CHECK-NEXT: address index name
# CHECK-NEXT: _bar_tlv
# CHECK-NEXT: _foo_tlv
# CHECK-NEXT: Indirect symbols for (__DATA,__la_symbol_ptr) 2 entries
# CHECK-NEXT: address index name
# CHECK-NEXT: _bar_fn
# CHECK-NEXT: _foo_fn
# CHECK-NEXT: Indirect symbols for (__DATA_CONST,__got) 3 entries
# CHECK-NEXT: address index name
# CHECK-NEXT: _bar
# CHECK-NEXT: _foo
# CHECK-NEXT: _stub_binder
# CHECK-NEXT: Indirect symbols for (__DATA,__la_symbol_ptr) 2 entries
# CHECK-NEXT: address index name
# CHECK-NEXT: _bar_fn
# CHECK-NEXT: _foo_fn
# CHECK-NEXT: Indirect symbols for (__DATA,__thread_ptrs) 2 entries
# CHECK-NEXT: address index name
# CHECK-NEXT: _bar_tlv
# CHECK-NEXT: _foo_tlv
#--- libfoo.s

View File

@ -11,7 +11,6 @@
# CHECK-NEXT: ntools 1
# CHECK-NEXT: tool ld
# CHECK-NEXT: version {{[0-9\.]+}}
# CHECK-NEXT: Load command [[#]]
.text
.global _main

View File

@ -0,0 +1,71 @@
# REQUIRES: x86
# RUN: rm -rf %t && mkdir -p %t
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o
# RUN: %lld -execute -o %t/exec %t/test.o -lSystem
# RUN: %lld -dylib -o %t/dylib %t/test.o -lSystem
# RUN: %lld -bundle -o %t/bundle %t/test.o -lSystem
# RUN: llvm-objdump --macho --all-headers %t/exec | \
# RUN: FileCheck %s --check-prefixes=EXEC,COMMON
# RUN: llvm-objdump --macho --all-headers %t/dylib | \
# RUN: FileCheck %s --check-prefixes=DYLIB,COMMON
# RUN: llvm-objdump --macho --all-headers %t/bundle | \
# RUN: FileCheck %s --check-prefixes=BUNDLE,COMMON
## Check that load commands and sections within segments occur in the proper
## sequence. On ARM64 kernel is especially picky about layout, and will
## barf with errno=EBADMACHO when the sequence is wrong.
# EXEC: cmd LC_SEGMENT_64
# EXEC: segname __PAGEZERO
# COMMON: cmd LC_SEGMENT_64
# COMMON: segname __TEXT
# COMMON: sectname __text
# COMMON: segname __TEXT
# COMMON: sectname __cstring
# COMMON: segname __TEXT
# COMMON: cmd LC_SEGMENT_64
# COMMON: segname __DATA_CONST
# COMMON: sectname __got
# COMMON: segname __DATA_CONST
# COMMON: cmd LC_SEGMENT_64
# COMMON: segname __DATA
# COMMON: sectname __data
# COMMON: segname __DATA
# COMMON: sectname __const
# COMMON: segname __DATA
# COMMON: cmd LC_SEGMENT_64
# COMMON: segname __LINKEDIT
# COMMON: cmd LC_DYLD_INFO_ONLY
# COMMON: cmd LC_SYMTAB
# COMMON: cmd LC_DYSYMTAB
# EXEC: cmd LC_LOAD_DYLINKER
# EXEC: cmd LC_MAIN
# DYLIB: cmd LC_ID_DYLIB
# COMMON: cmd LC_UUID
# COMMON: cmd LC_BUILD_VERSION
# COMMON: cmd LC_LOAD_DYLIB
.section __TEXT,__cstring
_str:
.asciz "Help me! I'm trapped in a test!\n"
.globl _mutable
.section __DATA,__data
mutable:
.long 0x1234
.globl _constant
.section __DATA,__const
constant:
.long 0x4567
.text
.global _main
_main:
mov ___nan@GOTPCREL(%rip), %rax
ret

View File

@ -10,7 +10,7 @@
# CHECK-NEXT: Symbol {
# CHECK-NEXT: Name: _local
# CHECK-NEXT: Type: Section (0xE)
# CHECK-NEXT: Section: __data (0x4)
# CHECK-NEXT: Section: __data
# CHECK-NEXT: RefType: UndefinedNonLazy (0x0)
# CHECK-NEXT: Flags [ (0x0)
# CHECK-NEXT: ]
@ -19,7 +19,7 @@
# CHECK-NEXT: Symbol {
# CHECK-NEXT: Name: __dyld_private
# CHECK-NEXT: Type: Section (0xE)
# CHECK-NEXT: Section: __data (0x4)
# CHECK-NEXT: Section: __data
# CHECK-NEXT: RefType: UndefinedNonLazy (0x0)
# CHECK-NEXT: Flags [ (0x0)
# CHECK-NEXT: ]
@ -39,7 +39,7 @@
# CHECK-NEXT: Name: _external
# CHECK-NEXT: Extern
# CHECK-NEXT: Type: Section (0xE)
# CHECK-NEXT: Section: __data (0x4)
# CHECK-NEXT: Section: __data
# CHECK-NEXT: RefType: UndefinedNonLazy (0x0)
# CHECK-NEXT: Flags [ (0x0)
# CHECK-NEXT: ]

View File

@ -7,17 +7,17 @@
# RUN: llvm-objdump -d --no-show-raw-insn --bind --lazy-bind --weak-bind --full-contents %t/test | \
# RUN: FileCheck %s
# CHECK: Contents of section __DATA_CONST,__got:
## Check that this section contains a nonzero pointer. It should point to
## _weak_external_for_gotpcrel.
# CHECK-NEXT: {{[0-9a-f]+}} {{[0-9a-f ]*[1-9a-f]+[0-9a-f ]*}}
# CHECK: Contents of section __DATA,__la_symbol_ptr:
## Check that this section contains a nonzero pointer. It should point to
## _weak_external_fn, but we don't have a good way of testing the exact value as
## the bytes here are in little-endian order.
# CHECK-NEXT: {{[0-9a-f]+}} {{[0-9a-f ]*[1-9a-f]+[0-9a-f ]*}}
# CHECK: Contents of section __DATA_CONST,__got:
## Check that this section contains a nonzero pointer. It should point to
## _weak_external_for_gotpcrel.
# CHECK-NEXT: {{[0-9a-f]+}} {{[0-9a-f ]*[1-9a-f]+[0-9a-f ]*}}
# CHECK: <_main>:
# CHECK-NEXT: movq [[#]](%rip), %rax # [[#%X,WEAK_DY_GOT_ADDR:]]
# CHECK-NEXT: movq [[#]](%rip), %rax # [[#%X,WEAK_EXT_GOT_ADDR:]]
@ -30,11 +30,11 @@
# CHECK-NEXT: callq 0x{{[0-9a-f]*}}
# CHECK-LABEL: Bind table:
# CHECK-DAG: __DATA_CONST __got 0x[[#WEAK_DY_GOT_ADDR]] pointer 0 libfoo _weak_dysym_for_gotpcrel
# CHECK-DAG: __DATA __la_symbol_ptr 0x[[#%x,WEAK_DY_FN:]] pointer 0 libfoo _weak_dysym_fn
# CHECK-DAG: __DATA __data 0x[[#%x,WEAK_DY:]] pointer 0 libfoo _weak_dysym
# CHECK-DAG: __DATA __thread_vars 0x{{[0-9a-f]*}} pointer 0 libSystem __tlv_bootstrap
# CHECK-DAG: __DATA __thread_ptrs 0x[[#WEAK_DY_TLV_ADDR]] pointer 0 libfoo _weak_dysym_tlv
# CHECK-DAG: __DATA_CONST __got 0x[[#WEAK_DY_GOT_ADDR]] pointer 0 libfoo _weak_dysym_for_gotpcrel
# CHECK-DAG: __DATA __la_symbol_ptr 0x[[#%x,WEAK_DY_FN:]] pointer 0 libfoo _weak_dysym_fn
## Check that we don't have any other bindings
# CHECK-NOT: pointer

View File

@ -34,10 +34,10 @@
# YAML-LABEL: WeakBindOpcodes:
# YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
# YAML-NEXT: Imm: 0
# YAML-NEXT: Symbol: _weak_foo
# YAML-NEXT: Symbol: _weak_foo_fn
# YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
# YAML-NEXT: Imm: 0
# YAML-NEXT: Symbol: _weak_foo_fn
# YAML-NEXT: Symbol: _weak_foo
# YAML-LABEL: LazyBindOpcodes:
# YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
# YAML-NEXT: Imm: 1
@ -79,13 +79,10 @@
# STRONG-YAML-LABEL: WeakBindOpcodes:
# STRONG-YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
# STRONG-YAML-NEXT: Imm: 0
# STRONG-YAML-NEXT: Symbol: _weak_foo
# STRONG-YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
# STRONG-YAML-NEXT: Imm: 0
# STRONG-YAML-NEXT: Symbol: _weak_foo
# STRONG-YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
# STRONG-YAML-NEXT: Imm: 0
# STRONG-YAML-NEXT: Symbol: _weak_foo_fn
# STRONG-YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
# STRONG-YAML-NEXT: Imm: 0
# STRONG-YAML-NEXT: Symbol: _weak_foo
# STRONG-YAML-LABEL: LazyBindOpcodes:
# STRONG-YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
# STRONG-YAML-NEXT: Imm: 0