Extend obj2yaml to optionally preserve raw __LINKEDIT/__DATA segments.

I am planning to upstream MachOObjectFile code to support Darwin
chained fixups. In order to test the new parser features we need a way
to produce correct (and incorrect) chained fixups. Right now the only
tool that can produce them is the Darwin linker. To avoid having to
check in binary files, this patch allows obj2yaml to print a hexdump
of the raw LINKEDIT and DATA segment, which both allows to
bootstrap the parser and enables us to easily create malformed inputs
to test error handling in the parser.

This patch adds two new options to obj2yaml:

  -raw-data-segment
  -raw-linkedit-segment

Differential Revision: https://reviews.llvm.org/D113234
This commit is contained in:
Adrian Prantl 2021-11-08 11:29:04 -08:00
parent d398e8f170
commit 8bd8dd16e2
9 changed files with 280 additions and 17 deletions

View File

@ -311,6 +311,9 @@ public:
bool isSectionBitcode(DataRefImpl Sec) const override;
bool isDebugSection(DataRefImpl Sec) const override;
/// Return the raw contents of an entire segment.
ArrayRef<uint8_t> getSegmentContents(StringRef SegmentName) const;
/// When dsymutil generates the companion file, it strips all unnecessary
/// sections (e.g. everything in the _TEXT segment) by omitting their body
/// and setting the offset in their corresponding load command to zero.

View File

@ -131,6 +131,7 @@ struct Object {
std::vector<LoadCommand> LoadCommands;
std::vector<Section> Sections;
LinkEditData LinkEdit;
Optional<llvm::yaml::BinaryRef> RawLinkEditSegment;
DWARFYAML::Data DWARF;
};

View File

@ -2048,6 +2048,46 @@ bool MachOObjectFile::isDebugSection(DataRefImpl Sec) const {
SectionName == "__swift_ast";
}
namespace {
template <typename LoadCommandType>
ArrayRef<uint8_t> getSegmentContents(const MachOObjectFile &Obj,
MachOObjectFile::LoadCommandInfo LoadCmd,
StringRef SegmentName) {
auto SegmentOrErr = getStructOrErr<LoadCommandType>(Obj, LoadCmd.Ptr);
if (!SegmentOrErr) {
consumeError(SegmentOrErr.takeError());
return {};
}
auto &Segment = SegmentOrErr.get();
if (StringRef(Segment.segname, 16).startswith(SegmentName))
return arrayRefFromStringRef(Obj.getData().slice(
Segment.fileoff, Segment.fileoff + Segment.filesize));
return {};
}
} // namespace
ArrayRef<uint8_t>
MachOObjectFile::getSegmentContents(StringRef SegmentName) const {
for (auto LoadCmd : load_commands()) {
ArrayRef<uint8_t> Contents;
switch (LoadCmd.C.cmd) {
case MachO::LC_SEGMENT:
Contents = ::getSegmentContents<MachO::segment_command>(*this, LoadCmd,
SegmentName);
break;
case MachO::LC_SEGMENT_64:
Contents = ::getSegmentContents<MachO::segment_command_64>(*this, LoadCmd,
SegmentName);
break;
default:
continue;
}
if (!Contents.empty())
return Contents;
}
return {};
}
unsigned MachOObjectFile::getSectionID(SectionRef Sec) const {
return Sec.getRawDataRefImpl().d.a;
}

View File

@ -288,6 +288,7 @@ void MachOWriter::writeLoadCommands(raw_ostream &OS) {
}
Error MachOWriter::writeSectionData(raw_ostream &OS) {
uint64_t LinkEditOff = 0;
for (auto &LC : Obj.LoadCommands) {
switch (LC.Data.load_command_data.cmd) {
case MachO::LC_SEGMENT:
@ -297,6 +298,9 @@ Error MachOWriter::writeSectionData(raw_ostream &OS) {
if (0 ==
strncmp(&LC.Data.segment_command_data.segname[0], "__LINKEDIT", 16)) {
FoundLinkEditSeg = true;
LinkEditOff = segOff;
if (Obj.RawLinkEditSegment)
continue;
writeLinkEditData(OS);
}
for (auto &Sec : LC.Sections) {
@ -344,6 +348,13 @@ Error MachOWriter::writeSectionData(raw_ostream &OS) {
}
}
if (Obj.RawLinkEditSegment) {
ZeroToOffset(OS, LinkEditOff);
if (OS.tell() - fileStart > LinkEditOff || !LinkEditOff)
return createStringError(errc::invalid_argument,
"section offsets don't line up");
Obj.RawLinkEditSegment->writeAsBinary(OS);
}
return Error::success();
}

View File

@ -110,6 +110,9 @@ void MappingTraits<MachOYAML::Object>::mapping(IO &IO,
Object.DWARF.Is64BitAddrSize = Object.Header.magic == MachO::MH_MAGIC_64 ||
Object.Header.magic == MachO::MH_CIGAM_64;
IO.mapOptional("LoadCommands", Object.LoadCommands);
if (Object.RawLinkEditSegment || !IO.outputting())
IO.mapOptional("__LINKEDIT", Object.RawLinkEditSegment);
if(!Object.LinkEdit.isEmpty() || !IO.outputting())
IO.mapOptional("LinkEditData", Object.LinkEdit);

View File

@ -0,0 +1,184 @@
# Test that obj2yaml + yaml2obj can round-trip mach-o executables with
# raw __LINKEDIT segments.
#
# RUN: yaml2obj %s | obj2yaml --raw-segment=data --raw-segment=linkedit | FileCheck %s
#
# This file was produced using:
# echo "int ext;" > a.c
# xcrun --sdk iphoneos clang -target arm64-apple-ios15.1 -o a.o a.c -c
# xcrun --sdk iphoneos clang -target arm64-apple-ios15.1 -dynamiclib a.o -o liba.dylib -install_name @executable_path/liba.dylib
# echo "extern int ext;" > b.c
# echo "int padding;" >> b.c
# echo "int *p = &ext + 4;" >> b.c
# xcrun --sdk iphoneos clang -target arm64-apple-ios15.1 -o b.o b.c -c
# xcrun --sdk iphoneos clang -target arm64-apple-ios15.1 -dynamiclib b.o -o libfixups.dylib -install_name @executable_path/libfixups.dylib -L. -la
#
# CHECK: - sectname: __data
# CHECK: segname: __DATA
# CHECK: content: '0000001000000080'
# CHECK: __LINKEDIT: 0000000020000000480000004C000000010000000100000000000000000000000300000000000000100000000000000018000000004006000040000000000000000000000100000001020000005F6578740000000000000000015F700006040080800100000000000000000000000000020000000F02000000400000000000000500000001000001000000000000000020005F70005F65787400000000000000
--- !mach-o
FileHeader:
magic: 0xFEEDFACF
cputype: 0x100000C
cpusubtype: 0x0
filetype: 0x6
ncmds: 16
sizeofcmds: 816
flags: 0x100085
reserved: 0x0
LoadCommands:
- cmd: LC_SEGMENT_64
cmdsize: 152
segname: __TEXT
vmaddr: 0
vmsize: 16384
fileoff: 0
filesize: 16384
maxprot: 5
initprot: 5
nsects: 1
flags: 0
Sections:
- sectname: __text
segname: __TEXT
addr: 0x4000
size: 0
offset: 0x4000
align: 0
reloff: 0x0
nreloc: 0
flags: 0x80000400
reserved1: 0x0
reserved2: 0x0
reserved3: 0x0
content: ''
- cmd: LC_SEGMENT_64
cmdsize: 152
segname: __DATA
vmaddr: 16384
vmsize: 16384
fileoff: 16384
filesize: 16384
maxprot: 3
initprot: 3
nsects: 1
flags: 0
Sections:
- sectname: __data
segname: __DATA
addr: 0x4000
size: 8
offset: 0x4000
align: 3
reloff: 0x0
nreloc: 0
flags: 0x0
reserved1: 0x0
reserved2: 0x0
reserved3: 0x0
content: '0000001000000080'
- cmd: LC_SEGMENT_64
cmdsize: 72
segname: __LINKEDIT
vmaddr: 32768
vmsize: 16384
fileoff: 32768
filesize: 160
maxprot: 1
initprot: 1
nsects: 0
flags: 0
- cmd: LC_ID_DYLIB
cmdsize: 64
dylib:
name: 24
timestamp: 1
current_version: 0
compatibility_version: 0
Content: '@executable_path/libfixups.dylib'
ZeroPadBytes: 8
- cmd: LC_DYLD_CHAINED_FIXUPS
cmdsize: 16
dataoff: 32768
datasize: 88
- cmd: LC_DYLD_EXPORTS_TRIE
cmdsize: 16
dataoff: 32856
datasize: 16
- cmd: LC_SYMTAB
cmdsize: 24
symoff: 32880
nsyms: 2
stroff: 32912
strsize: 16
- cmd: LC_DYSYMTAB
cmdsize: 80
ilocalsym: 0
nlocalsym: 0
iextdefsym: 0
nextdefsym: 1
iundefsym: 1
nundefsym: 1
tocoff: 0
ntoc: 0
modtaboff: 0
nmodtab: 0
extrefsymoff: 0
nextrefsyms: 0
indirectsymoff: 0
nindirectsyms: 0
extreloff: 0
nextrel: 0
locreloff: 0
nlocrel: 0
- cmd: LC_UUID
cmdsize: 24
uuid: 56F7BCE0-C1A7-38E3-A90D-742D8E3D5FA9
- cmd: LC_BUILD_VERSION
cmdsize: 32
platform: 2
minos: 983296
sdk: 983552
ntools: 1
Tools:
- tool: 3
version: 46596096
- cmd: LC_SOURCE_VERSION
cmdsize: 16
version: 0
- cmd: LC_ENCRYPTION_INFO_64
cmdsize: 24
cryptoff: 16384
cryptsize: 0
cryptid: 0
pad: 0
- cmd: LC_LOAD_DYLIB
cmdsize: 56
dylib:
name: 24
timestamp: 2
current_version: 0
compatibility_version: 0
Content: '@executable_path/liba.dylib'
ZeroPadBytes: 5
- cmd: LC_LOAD_DYLIB
cmdsize: 56
dylib:
name: 24
timestamp: 2
current_version: 85917696
compatibility_version: 65536
Content: '/usr/lib/libSystem.B.dylib'
ZeroPadBytes: 6
- cmd: LC_FUNCTION_STARTS
cmdsize: 16
dataoff: 32872
datasize: 8
- cmd: LC_DATA_IN_CODE
cmdsize: 16
dataoff: 32880
datasize: 0
__LINKEDIT: 0000000020000000480000004C000000010000000100000000000000000000000300000000000000100000000000000018000000004006000040000000000000000000000100000001020000005F6578740000000000000000015F700006040080800100000000000000000000000000020000000F02000000400000000000000500000001000001000000000000000020005F70005F65787400000000000000
...

View File

@ -29,6 +29,7 @@ class MachODumper {
const object::MachOObjectFile &Obj;
std::unique_ptr<DWARFContext> DWARFCtx;
unsigned RawSegments;
void dumpHeader(std::unique_ptr<MachOYAML::Object> &Y);
Error dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y);
void dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y);
@ -52,8 +53,8 @@ class MachODumper {
public:
MachODumper(const object::MachOObjectFile &O,
std::unique_ptr<DWARFContext> DCtx)
: Obj(O), DWARFCtx(std::move(DCtx)) {}
std::unique_ptr<DWARFContext> DCtx, unsigned RawSegments)
: Obj(O), DWARFCtx(std::move(DCtx)), RawSegments(RawSegments) {}
Expected<std::unique_ptr<MachOYAML::Object>> dump();
};
@ -176,6 +177,13 @@ Expected<const char *> MachODumper::extractSections(
if (Expected<MachOYAML::Section> S =
constructSection(Sec, Sections.size() + 1)) {
StringRef SecName(S->sectname);
// Copy data sections if requested.
if ((RawSegments & RawSegments::data) &&
StringRef(S->segname).startswith("__DATA"))
S->content =
yaml::BinaryRef(Obj.getSectionContents(Sec.offset, Sec.size));
if (SecName.startswith("__debug_")) {
// If the DWARF section cannot be successfully parsed, emit raw content
// instead of an entry in the DWARF section of the YAML.
@ -282,7 +290,11 @@ Expected<std::unique_ptr<MachOYAML::Object>> MachODumper::dump() {
dumpHeader(Y);
if (Error Err = dumpLoadCommands(Y))
return std::move(Err);
dumpLinkEdit(Y);
if (RawSegments & RawSegments::linkedit)
Y->RawLinkEditSegment =
yaml::BinaryRef(Obj.getSegmentContents("__LINKEDIT"));
else
dumpLinkEdit(Y);
return std::move(Y);
}
@ -587,9 +599,10 @@ void MachODumper::dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y) {
}
}
Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj) {
Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj,
unsigned RawSegments) {
std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(Obj);
MachODumper Dumper(Obj, std::move(DCtx));
MachODumper Dumper(Obj, std::move(DCtx), RawSegments);
Expected<std::unique_ptr<MachOYAML::Object>> YAML = Dumper.dump();
if (!YAML)
return YAML.takeError();
@ -602,7 +615,8 @@ Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj) {
return Error::success();
}
Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj) {
Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj,
unsigned RawSegments) {
yaml::YamlObjectFile YAMLFile;
YAMLFile.FatMachO.reset(new MachOYAML::UniversalBinary());
MachOYAML::UniversalBinary &YAML = *YAMLFile.FatMachO;
@ -624,7 +638,7 @@ Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj) {
return SliceObj.takeError();
std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(*SliceObj.get());
MachODumper Dumper(*SliceObj.get(), std::move(DCtx));
MachODumper Dumper(*SliceObj.get(), std::move(DCtx), RawSegments);
Expected<std::unique_ptr<MachOYAML::Object>> YAMLObj = Dumper.dump();
if (!YAMLObj)
return YAMLObj.takeError();
@ -636,12 +650,13 @@ Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj) {
return Error::success();
}
Error macho2yaml(raw_ostream &Out, const object::Binary &Binary) {
Error macho2yaml(raw_ostream &Out, const object::Binary &Binary,
unsigned RawSegments) {
if (const auto *MachOObj = dyn_cast<object::MachOUniversalBinary>(&Binary))
return macho2yaml(Out, *MachOObj);
return macho2yaml(Out, *MachOObj, RawSegments);
if (const auto *MachOObj = dyn_cast<object::MachOObjectFile>(&Binary))
return macho2yaml(Out, *MachOObj);
return macho2yaml(Out, *MachOObj, RawSegments);
llvm_unreachable("unexpected Mach-O file format");
}

View File

@ -1,4 +1,4 @@
//===------ utils/obj2yaml.cpp - obj2yaml conversion tool -------*- C++ -*-===//
//===------ utils/obj2yaml.cpp - obj2yaml conversion tool -----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@ -18,6 +18,14 @@
using namespace llvm;
using namespace llvm::object;
static cl::opt<std::string>
InputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-"));
static cl::bits<RawSegments> RawSegment(
"raw-segment",
cl::desc("Mach-O: dump the raw contents of the listed segments instead of "
"parsing them:"),
cl::values(clEnumVal(data, "__DATA"), clEnumVal(linkedit, "__LINKEDIT")));
static Error dumpObject(const ObjectFile &Obj) {
if (Obj.isCOFF())
return errorCodeToError(coff2yaml(outs(), cast<COFFObjectFile>(Obj)));
@ -54,7 +62,7 @@ static Error dumpInput(StringRef File) {
// Universal MachO is not a subclass of ObjectFile, so it needs to be handled
// here with the other binary types.
if (Binary.isMachO() || Binary.isMachOUniversalBinary())
return macho2yaml(outs(), Binary);
return macho2yaml(outs(), Binary, RawSegment.getBits());
if (ObjectFile *Obj = dyn_cast<ObjectFile>(&Binary))
return dumpObject(*Obj);
if (MinidumpFile *Minidump = dyn_cast<MinidumpFile>(&Binary))
@ -74,9 +82,6 @@ static void reportError(StringRef Input, Error Err) {
errs().flush();
}
cl::opt<std::string> InputFilename(cl::Positional, cl::desc("<input file>"),
cl::init("-"));
int main(int argc, char *argv[]) {
InitLLVM X(argc, argv);
cl::ParseCommandLineOptions(argc, argv);

View File

@ -20,12 +20,13 @@
#include "llvm/Support/MemoryBufferRef.h"
#include <system_error>
enum RawSegments : unsigned { none = 0, data = 1, linkedit = 1 << 1 };
std::error_code coff2yaml(llvm::raw_ostream &Out,
const llvm::object::COFFObjectFile &Obj);
llvm::Error elf2yaml(llvm::raw_ostream &Out,
const llvm::object::ObjectFile &Obj);
llvm::Error macho2yaml(llvm::raw_ostream &Out,
const llvm::object::Binary &Obj);
llvm::Error macho2yaml(llvm::raw_ostream &Out, const llvm::object::Binary &Obj,
unsigned RawSegments);
llvm::Error minidump2yaml(llvm::raw_ostream &Out,
const llvm::object::MinidumpFile &Obj);
llvm::Error xcoff2yaml(llvm::raw_ostream &Out,