2022-06-13 09:56:45 +08:00
|
|
|
//===- EhFrame.h ------------------------------------------------*- C++ -*-===//
|
|
|
|
//
|
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#ifndef LLD_MACHO_EH_FRAME_H
|
|
|
|
#define LLD_MACHO_EH_FRAME_H
|
|
|
|
|
|
|
|
#include "InputSection.h"
|
|
|
|
#include "Relocations.h"
|
|
|
|
|
|
|
|
#include "lld/Common/LLVM.h"
|
|
|
|
#include "llvm/ADT/ArrayRef.h"
|
|
|
|
#include "llvm/ADT/PointerUnion.h"
|
|
|
|
#include "llvm/ADT/SmallVector.h"
|
|
|
|
|
|
|
|
/*
|
|
|
|
* NOTE: The main bulk of the EH frame parsing logic is in InputFiles.cpp as it
|
|
|
|
* is closely coupled with other file parsing logic; EhFrame.h just contains a
|
|
|
|
* few helpers.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* === The EH frame format ===
|
|
|
|
*
|
|
|
|
* EH frames can either be Common Information Entries (CIEs) or Frame
|
|
|
|
* Description Entries (FDEs). CIEs contain information that is common amongst
|
|
|
|
* several FDEs. Each FDE contains a pointer to its CIE. Thus all the EH frame
|
|
|
|
* entries together form a forest of two-level trees, with CIEs as the roots
|
|
|
|
* and FDEs as the leaves. Note that a CIE must precede the FDEs which point
|
|
|
|
* to it.
|
|
|
|
*
|
|
|
|
* A CIE comprises the following fields in order:
|
|
|
|
* 1. Length of the entry (4 or 12 bytes)
|
|
|
|
* 2. CIE offset (4 bytes; always 0 for CIEs)
|
|
|
|
* 3. CIE version (byte)
|
|
|
|
* 4. Null-terminated augmentation string
|
|
|
|
* 5-8. LEB128 values that we don't care about
|
|
|
|
* 9. Augmentation data, to be interpreted using the aug string
|
|
|
|
* 10. DWARF instructions (ignored by LLD)
|
|
|
|
*
|
|
|
|
* An FDE comprises of the following:
|
|
|
|
* 1. Length of the entry (4 or 12 bytes)
|
|
|
|
* 2. CIE offset (4 bytes pcrel offset that points backwards to this FDE's CIE)
|
|
|
|
* 3. Function address (pointer-sized pcrel offset)
|
2022-11-28 08:54:07 +08:00
|
|
|
* 4. (std::optional) Augmentation data length
|
|
|
|
* 5. (std::optional) LSDA address (pointer-sized pcrel offset)
|
2022-06-13 09:56:45 +08:00
|
|
|
* 6. DWARF instructions (ignored by LLD)
|
|
|
|
*/
|
2022-08-07 22:37:49 +08:00
|
|
|
namespace lld::macho {
|
2022-06-13 09:56:45 +08:00
|
|
|
|
|
|
|
class EhReader {
|
|
|
|
public:
|
2022-08-01 08:16:08 +08:00
|
|
|
EhReader(const ObjFile *file, ArrayRef<uint8_t> data, size_t dataOff)
|
|
|
|
: file(file), data(data), dataOff(dataOff) {}
|
2022-06-13 09:56:45 +08:00
|
|
|
size_t size() const { return data.size(); }
|
|
|
|
// Read and validate the length field.
|
|
|
|
uint64_t readLength(size_t *off) const;
|
|
|
|
// Skip the length field without doing validation.
|
|
|
|
void skipValidLength(size_t *off) const;
|
|
|
|
uint8_t readByte(size_t *off) const;
|
|
|
|
uint32_t readU32(size_t *off) const;
|
2022-08-01 08:16:08 +08:00
|
|
|
uint64_t readPointer(size_t *off, uint8_t size) const;
|
2022-06-13 09:56:45 +08:00
|
|
|
StringRef readString(size_t *off) const;
|
|
|
|
void skipLeb128(size_t *off) const;
|
|
|
|
void failOn(size_t errOff, const Twine &msg) const;
|
|
|
|
|
|
|
|
private:
|
|
|
|
const ObjFile *file;
|
|
|
|
ArrayRef<uint8_t> data;
|
|
|
|
// The offset of the data array within its section. Used only for error
|
|
|
|
// reporting.
|
|
|
|
const size_t dataOff;
|
|
|
|
};
|
|
|
|
|
|
|
|
// The EH frame format, when emitted by llvm-mc, consists of a number of
|
|
|
|
// "abs-ified" relocations, i.e. relocations that are implicitly encoded as
|
|
|
|
// pcrel offsets in the section data. The offsets refer to the locations of
|
|
|
|
// symbols in the input object file. When we ingest these EH frames, we convert
|
|
|
|
// these implicit relocations into explicit Relocs.
|
|
|
|
//
|
|
|
|
// These pcrel relocations are semantically similar to X86_64_RELOC_SIGNED_4.
|
|
|
|
// However, we need this operation to be cross-platform, and ARM does not have a
|
|
|
|
// similar relocation that is applicable. We therefore use the more verbose (but
|
|
|
|
// more generic) subtractor relocation to encode these pcrel values. ld64
|
|
|
|
// appears to do something similar -- its `-r` output contains these explicit
|
|
|
|
// subtractor relocations.
|
|
|
|
class EhRelocator {
|
|
|
|
public:
|
|
|
|
EhRelocator(InputSection *isec) : isec(isec) {}
|
|
|
|
|
|
|
|
// For the next two methods, let `PC` denote `isec address + off`.
|
|
|
|
// Create relocs writing the value of target - PC to PC.
|
|
|
|
void makePcRel(uint64_t off,
|
|
|
|
llvm::PointerUnion<Symbol *, InputSection *> target,
|
|
|
|
uint8_t length);
|
|
|
|
// Create relocs writing the value of PC - target to PC.
|
|
|
|
void makeNegativePcRel(uint64_t off,
|
|
|
|
llvm::PointerUnion<Symbol *, InputSection *> target,
|
|
|
|
uint8_t length);
|
|
|
|
// Insert the new relocations into isec->relocs.
|
|
|
|
void commit();
|
|
|
|
|
|
|
|
private:
|
|
|
|
InputSection *isec;
|
|
|
|
// Insert new relocs here so that we don't invalidate iterators into the
|
|
|
|
// existing relocs vector.
|
|
|
|
SmallVector<Reloc, 6> newRelocs;
|
|
|
|
};
|
|
|
|
|
2022-08-07 22:37:49 +08:00
|
|
|
} // namespace lld::macho
|
2022-06-13 09:56:45 +08:00
|
|
|
|
|
|
|
#endif
|