[memprof] Record BuildIDs in the raw profile.

This patch adds support for recording BuildIds usng the sanitizer
ListOfModules API. We add another entry to the SegmentEntry struct and
change the memprof raw version.

Reviewed By: tejohnson

Differential Revision: https://reviews.llvm.org/D145190
This commit is contained in:
Snehasish Kumar 2023-03-13 20:11:58 +00:00
parent 55f38495e3
commit a1bbf5ac3c
27 changed files with 109 additions and 107 deletions

View File

@ -19,6 +19,7 @@
* synced up.
*
\*===----------------------------------------------------------------------===*/
#include <string.h>
#ifdef _MSC_VER
#define PACKED(...) __pragma(pack(push,1)) __VA_ARGS__ __pragma(pack(pop))
@ -32,7 +33,9 @@
(uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129)
// The version number of the raw binary format.
#define MEMPROF_RAW_VERSION 2ULL
#define MEMPROF_RAW_VERSION 3ULL
#define MEMPROF_BUILDID_MAX_SIZE 32ULL
namespace llvm {
namespace memprof {
@ -46,37 +49,40 @@ PACKED(struct Header {
uint64_t StackOffset;
});
// A struct describing the information necessary to describe a /proc/maps
// segment entry for a particular binary/library identified by its build id.
PACKED(struct SegmentEntry {
uint64_t Start;
uint64_t End;
uint64_t Offset;
// This field is unused until sanitizer procmaps support for build ids for
// Linux-Elf is implemented.
uint8_t BuildId[32] = {0};
uint64_t BuildIdSize;
uint8_t BuildId[MEMPROF_BUILDID_MAX_SIZE] = {0};
SegmentEntry(uint64_t S, uint64_t E, uint64_t O) :
Start(S), End(E), Offset(O) {}
// This constructor is only used in tests so don't set the BuildId.
SegmentEntry(uint64_t S, uint64_t E, uint64_t O)
: Start(S), End(E), Offset(O), BuildIdSize(0) {}
SegmentEntry(const SegmentEntry& S) {
Start = S.Start;
End = S.End;
Offset = S.Offset;
BuildIdSize = S.BuildIdSize;
memcpy(BuildId, S.BuildId, S.BuildIdSize);
}
SegmentEntry& operator=(const SegmentEntry& S) {
Start = S.Start;
End = S.End;
Offset = S.Offset;
BuildIdSize = S.BuildIdSize;
memcpy(BuildId, S.BuildId, S.BuildIdSize);
return *this;
}
bool operator==(const SegmentEntry& S) const {
return Start == S.Start &&
End == S.End &&
Offset == S.Offset;
return Start == S.Start && End == S.End && Offset == S.Offset &&
BuildIdSize == S.BuildIdSize &&
memcmp(BuildId, S.BuildId, S.BuildIdSize) == 0;
}
});

View File

@ -23,11 +23,11 @@
#include "sanitizer_common/sanitizer_allocator_checks.h"
#include "sanitizer_common/sanitizer_allocator_interface.h"
#include "sanitizer_common/sanitizer_allocator_report.h"
#include "sanitizer_common/sanitizer_common.h"
#include "sanitizer_common/sanitizer_errno.h"
#include "sanitizer_common/sanitizer_file.h"
#include "sanitizer_common/sanitizer_flags.h"
#include "sanitizer_common/sanitizer_internal_defs.h"
#include "sanitizer_common/sanitizer_procmaps.h"
#include "sanitizer_common/sanitizer_stackdepot.h"
#include <sched.h>
@ -295,8 +295,10 @@ struct Allocator {
// memprof_rawprofile.h.
char *Buffer = nullptr;
MemoryMappingLayout Layout(/*cache_enabled=*/true);
u64 BytesSerialized = SerializeToRawProfile(MIBMap, Layout, Buffer);
__sanitizer::ListOfModules List;
List.init();
ArrayRef<LoadedModule> Modules(List.begin(), List.end());
u64 BytesSerialized = SerializeToRawProfile(MIBMap, Modules, Buffer);
CHECK(Buffer && BytesSerialized && "could not serialize to buffer");
report_file.Write(Buffer, BytesSerialized);
}

View File

@ -33,12 +33,14 @@ void RecordStackId(const uptr Key, UNUSED LockedMemInfoBlock *const &MIB,
}
} // namespace
u64 SegmentSizeBytes(MemoryMappingLayoutBase &Layout) {
u64 SegmentSizeBytes(ArrayRef<LoadedModule> Modules) {
u64 NumSegmentsToRecord = 0;
MemoryMappedSegment segment;
for (Layout.Reset(); Layout.Next(&segment);)
if (segment.IsReadable() && segment.IsExecutable())
NumSegmentsToRecord++;
for (const auto &Module : Modules) {
for (const auto &Segment : Module.ranges()) {
if (Segment.executable)
NumSegmentsToRecord++;
}
}
return sizeof(u64) // A header which stores the number of records.
+ sizeof(SegmentEntry) * NumSegmentsToRecord;
@ -51,28 +53,31 @@ u64 SegmentSizeBytes(MemoryMappingLayoutBase &Layout) {
// Start
// End
// Offset
// BuildID 32B
// UuidSize
// Uuid 32B
// ----------
// ...
void SerializeSegmentsToBuffer(MemoryMappingLayoutBase &Layout,
void SerializeSegmentsToBuffer(ArrayRef<LoadedModule> Modules,
const u64 ExpectedNumBytes, char *&Buffer) {
char *Ptr = Buffer;
// Reserve space for the final count.
Ptr += sizeof(u64);
u64 NumSegmentsRecorded = 0;
MemoryMappedSegment segment;
for (Layout.Reset(); Layout.Next(&segment);) {
if (segment.IsReadable() && segment.IsExecutable()) {
// TODO: Record segment.uuid when it is implemented for Linux-Elf.
SegmentEntry Entry(segment.start, segment.end, segment.offset);
memcpy(Ptr, &Entry, sizeof(SegmentEntry));
Ptr += sizeof(SegmentEntry);
NumSegmentsRecorded++;
for (const auto &Module : Modules) {
for (const auto &Segment : Module.ranges()) {
if (Segment.executable) {
SegmentEntry Entry(Segment.beg, Segment.end, Module.base_address());
CHECK(Module.uuid_size() <= MEMPROF_BUILDID_MAX_SIZE);
Entry.BuildIdSize = Module.uuid_size();
memcpy(Entry.BuildId, Module.uuid(), Module.uuid_size());
memcpy(Ptr, &Entry, sizeof(SegmentEntry));
Ptr += sizeof(SegmentEntry);
NumSegmentsRecorded++;
}
}
}
// Store the number of segments we recorded in the space we reserved.
*((u64 *)Buffer) = NumSegmentsRecorded;
CHECK(ExpectedNumBytes >= static_cast<u64>(Ptr - Buffer) &&
@ -198,11 +203,11 @@ void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const Vector<u64> &StackIds,
// ----------
// Optional Padding Bytes
// ...
u64 SerializeToRawProfile(MIBMapTy &MIBMap, MemoryMappingLayoutBase &Layout,
u64 SerializeToRawProfile(MIBMapTy &MIBMap, ArrayRef<LoadedModule> Modules,
char *&Buffer) {
// Each section size is rounded up to 8b since the first entry in each section
// is a u64 which holds the number of entries in the section by convention.
const u64 NumSegmentBytes = RoundUpTo(SegmentSizeBytes(Layout), 8);
const u64 NumSegmentBytes = RoundUpTo(SegmentSizeBytes(Modules), 8);
Vector<u64> StackIds;
MIBMap.ForEach(RecordStackId, reinterpret_cast<void *>(&StackIds));
@ -232,7 +237,7 @@ u64 SerializeToRawProfile(MIBMapTy &MIBMap, MemoryMappingLayoutBase &Layout,
sizeof(Header) + NumSegmentBytes + NumMIBInfoBytes};
Ptr = WriteBytes(header, Ptr);
SerializeSegmentsToBuffer(Layout, NumSegmentBytes, Ptr);
SerializeSegmentsToBuffer(Modules, NumSegmentBytes, Ptr);
Ptr += NumSegmentBytes;
SerializeMIBInfoToBuffer(MIBMap, StackIds, NumMIBInfoBytes, Ptr);

View File

@ -2,12 +2,12 @@
#define MEMPROF_RAWPROFILE_H_
#include "memprof_mibmap.h"
#include "sanitizer_common/sanitizer_procmaps.h"
#include "sanitizer_common/sanitizer_common.h"
namespace __memprof {
// Serialize the in-memory representation of the memprof profile to the raw
// binary format. The format itself is documented memprof_rawprofile.cpp.
u64 SerializeToRawProfile(MIBMapTy &BlockCache, MemoryMappingLayoutBase &Layout,
u64 SerializeToRawProfile(MIBMapTy &BlockCache, ArrayRef<LoadedModule> Modules,
char *&Buffer);
} // namespace __memprof

View File

@ -15,22 +15,9 @@ namespace {
using ::__memprof::MIBMapTy;
using ::__memprof::SerializeToRawProfile;
using ::__sanitizer::MemoryMappedSegment;
using ::__sanitizer::MemoryMappingLayoutBase;
using ::__sanitizer::StackDepotPut;
using ::__sanitizer::StackTrace;
using ::llvm::memprof::MemInfoBlock;
using ::testing::_;
using ::testing::Action;
using ::testing::DoAll;
using ::testing::Return;
using ::testing::SetArgPointee;
class MockMemoryMappingLayout final : public MemoryMappingLayoutBase {
public:
MOCK_METHOD(bool, Next, (MemoryMappedSegment *), (override));
MOCK_METHOD(void, Reset, (), (override));
};
uint64_t PopulateFakeMap(const MemInfoBlock &FakeMIB, uint64_t StackPCBegin,
MIBMapTy &FakeMap) {
@ -56,26 +43,13 @@ template <class T = uint64_t> T Read(char *&Buffer) {
}
TEST(MemProf, Basic) {
MockMemoryMappingLayout Layout;
MemoryMappedSegment FakeSegment;
memset(&FakeSegment, 0, sizeof(FakeSegment));
FakeSegment.start = 0x10;
FakeSegment.end = 0x20;
FakeSegment.offset = 0x10;
uint8_t uuid[__sanitizer::kModuleUUIDSize] = {0xC, 0x0, 0xF, 0xF, 0xE, 0xE};
memcpy(FakeSegment.uuid, uuid, __sanitizer::kModuleUUIDSize);
FakeSegment.protection =
__sanitizer::kProtectionExecute | __sanitizer::kProtectionRead;
const Action<bool(MemoryMappedSegment *)> SetSegment =
DoAll(SetArgPointee<0>(FakeSegment), Return(true));
EXPECT_CALL(Layout, Next(_))
.WillOnce(SetSegment)
.WillOnce(Return(false))
.WillOnce(SetSegment)
.WillRepeatedly(Return(false));
EXPECT_CALL(Layout, Reset).Times(2);
__sanitizer::LoadedModule FakeModule;
FakeModule.addAddressRange(/*begin=*/0x10, /*end=*/0x20, /*executable=*/true,
/*writable=*/false, /*name=*/"");
const char uuid[MEMPROF_BUILDID_MAX_SIZE] = {0xC, 0x0, 0xF, 0xF, 0xE, 0xE};
FakeModule.setUuid(uuid, MEMPROF_BUILDID_MAX_SIZE);
__sanitizer::ArrayRef<__sanitizer::LoadedModule> Modules(&FakeModule,
(&FakeModule) + 1);
MIBMapTy FakeMap;
MemInfoBlock FakeMIB;
@ -90,7 +64,7 @@ TEST(MemProf, Basic) {
FakeIds[1] = PopulateFakeMap(FakeMIB, /*StackPCBegin=*/3, FakeMap);
char *Ptr = nullptr;
uint64_t NumBytes = SerializeToRawProfile(FakeMap, Layout, Ptr);
uint64_t NumBytes = SerializeToRawProfile(FakeMap, Modules, Ptr);
const char *Buffer = Ptr;
ASSERT_GT(NumBytes, 0ULL);
@ -111,16 +85,16 @@ TEST(MemProf, Basic) {
// Should be equal to the size of the raw profile header.
EXPECT_EQ(SegmentOffset, 48ULL);
// We expect only 1 segment entry, 8b for the count and 56b for SegmentEntry
// We expect only 1 segment entry, 8b for the count and 64b for SegmentEntry
// in memprof_rawprofile.cpp.
EXPECT_EQ(MIBOffset - SegmentOffset, 64ULL);
EXPECT_EQ(MIBOffset - SegmentOffset, 72ULL);
EXPECT_EQ(MIBOffset, 112ULL);
EXPECT_EQ(MIBOffset, 120ULL);
// We expect 2 mib entry, 8b for the count and sizeof(uint64_t) +
// sizeof(MemInfoBlock) contains stack id + MeminfoBlock.
EXPECT_EQ(StackOffset - MIBOffset, 8 + 2 * (8 + sizeof(MemInfoBlock)));
EXPECT_EQ(StackOffset, 336ULL);
EXPECT_EQ(StackOffset, 408ULL);
// We expect 2 stack entries, with 5 frames - 8b for total count,
// 2 * (8b for id, 8b for frame count and 5*8b for fake frames).
// Since this is the last section, there may be additional padding at the end
@ -128,16 +102,15 @@ TEST(MemProf, Basic) {
EXPECT_GE(TotalSize - StackOffset, 8ULL + 2 * (8 + 8 + 5 * 8));
// ============= Check contents.
// The Uuid field is not yet populated on Linux-Elf by the sanitizer procmaps
// library, so we expect it to be filled with 0 for now.
unsigned char ExpectedSegmentBytes[64] = {
0x01, 0, 0, 0, 0, 0, 0, 0, // Number of entries
0x10, 0, 0, 0, 0, 0, 0, 0, // Start
0x20, 0, 0, 0, 0, 0, 0, 0, // End
0x10, 0, 0, 0, 0, 0, 0, 0, // Offset
0x0, // Uuid
unsigned char ExpectedSegmentBytes[72] = {
0x01, 0, 0, 0, 0, 0, 0, 0, // Number of entries
0x10, 0, 0, 0, 0, 0, 0, 0, // Start
0x20, 0, 0, 0, 0, 0, 0, 0, // End
0x0, 0, 0, 0, 0, 0, 0, 0, // Offset
0x20, 0, 0, 0, 0, 0, 0, 0, // UuidSize
0xC, 0x0, 0xF, 0xF, 0xE, 0xE // Uuid
};
EXPECT_EQ(memcmp(Buffer + SegmentOffset, ExpectedSegmentBytes, 64), 0);
EXPECT_EQ(memcmp(Buffer + SegmentOffset, ExpectedSegmentBytes, 72), 0);
// Check that the number of entries is 2.
EXPECT_EQ(*reinterpret_cast<const uint64_t *>(Buffer + MIBOffset), 2ULL);
@ -193,5 +166,4 @@ TEST(MemProf, Basic) {
sizeof(ExpectedStackBytes[1])),
0);
}
} // namespace

View File

@ -19,6 +19,7 @@
* synced up.
*
\*===----------------------------------------------------------------------===*/
#include <string.h>
#ifdef _MSC_VER
#define PACKED(...) __pragma(pack(push,1)) __VA_ARGS__ __pragma(pack(pop))
@ -32,7 +33,9 @@
(uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129)
// The version number of the raw binary format.
#define MEMPROF_RAW_VERSION 2ULL
#define MEMPROF_RAW_VERSION 3ULL
#define MEMPROF_BUILDID_MAX_SIZE 32ULL
namespace llvm {
namespace memprof {
@ -46,37 +49,40 @@ PACKED(struct Header {
uint64_t StackOffset;
});
// A struct describing the information necessary to describe a /proc/maps
// segment entry for a particular binary/library identified by its build id.
PACKED(struct SegmentEntry {
uint64_t Start;
uint64_t End;
uint64_t Offset;
// This field is unused until sanitizer procmaps support for build ids for
// Linux-Elf is implemented.
uint8_t BuildId[32] = {0};
uint64_t BuildIdSize;
uint8_t BuildId[MEMPROF_BUILDID_MAX_SIZE] = {0};
SegmentEntry(uint64_t S, uint64_t E, uint64_t O) :
Start(S), End(E), Offset(O) {}
// This constructor is only used in tests so don't set the BuildId.
SegmentEntry(uint64_t S, uint64_t E, uint64_t O)
: Start(S), End(E), Offset(O), BuildIdSize(0) {}
SegmentEntry(const SegmentEntry& S) {
Start = S.Start;
End = S.End;
Offset = S.Offset;
BuildIdSize = S.BuildIdSize;
memcpy(BuildId, S.BuildId, S.BuildIdSize);
}
SegmentEntry& operator=(const SegmentEntry& S) {
Start = S.Start;
End = S.End;
Offset = S.Offset;
BuildIdSize = S.BuildIdSize;
memcpy(BuildId, S.BuildId, S.BuildIdSize);
return *this;
}
bool operator==(const SegmentEntry& S) const {
return Start == S.Start &&
End == S.End &&
Offset == S.Offset;
return Start == S.Start && End == S.End && Offset == S.Offset &&
BuildIdSize == S.BuildIdSize &&
memcmp(BuildId, S.BuildId, S.BuildIdSize) == 0;
}
});

View File

@ -158,15 +158,13 @@ bool isRuntimePath(const StringRef Path) {
}
std::string getBuildIdString(const SegmentEntry &Entry) {
constexpr size_t Size = sizeof(Entry.BuildId) / sizeof(uint8_t);
constexpr uint8_t Zeros[Size] = {0};
// If the build id is unset print a helpful string instead of all zeros.
if (memcmp(Entry.BuildId, Zeros, Size) == 0)
if (Entry.BuildIdSize == 0)
return "<None>";
std::string Str;
raw_string_ostream OS(Str);
for (size_t I = 0; I < Size; I++) {
for (size_t I = 0; I < Entry.BuildIdSize; I++) {
OS << format_hex_no_prefix(Entry.BuildId[I], 2);
}
return OS.str();

Binary file not shown.

View File

@ -72,6 +72,7 @@ INPUTS["basic"]="BASIC"
INPUTS["inline"]="INLINE"
INPUTS["multi"]="MULTI"
INPUTS["pic"]="BASIC;-pie"
INPUTS["buildid"]="BASIC;-Wl,-build-id=sha1"
for name in "${!INPUTS[@]}"; do
IFS=";" read -r src flags <<< "${INPUTS[$name]}"

View File

@ -8,17 +8,17 @@ additional allocations which do not originate from the main binary are pruned.
CHECK: MemprofProfile:
CHECK-NEXT: Summary:
CHECK-NEXT: Version: 2
CHECK-NEXT: Version: 3
CHECK-NEXT: NumSegments: {{[0-9]+}}
CHECK-NEXT: NumMibInfo: 2
CHECK-NEXT: NumAllocFunctions: 1
CHECK-NEXT: NumStackOffsets: 2
CHECK-NEXT: Segments:
CHECK-NEXT: -
CHECK-NEXT: BuildId: <None>
CHECK-NEXT: Start: 0x{{[0-9]+}}
CHECK-NEXT: End: 0x{{[0-9]+}}
CHECK-NEXT: Offset: 0x{{[0-9]+}}
CHECK-NEXT: BuildId: {{[[:xdigit:]]+}}
CHECK-NEXT: Start: 0x{{[[:xdigit:]]+}}
CHECK-NEXT: End: 0x{{[[:xdigit:]]+}}
CHECK-NEXT: Offset: 0x{{[[:xdigit:]]+}}
CHECK-NEXT: -
CHECK: Records:

View File

@ -0,0 +1,12 @@
REQUIRES: x86_64-linux
To update the inputs used below run Inputs/update_memprof_inputs.sh /path/to/updated/clang
RUN: llvm-readelf --notes %p/Inputs/buildid.memprofexe > %t1.txt
RUN: llvm-profdata show --memory %p/Inputs/buildid.memprofraw --profiled-binary %p/Inputs/buildid.memprofexe -o - > %t2.txt
RUN: cat %t1.txt %t2.txt | FileCheck %s
COM: First extract the id from the llvm-readelf output.
CHECK: Build ID: [[ID:[[:xdigit:]]+]]
COM: Then match it with the profdata output.
CHECK: BuildId: {{.*}}[[ID]]

View File

@ -5,17 +5,17 @@ RUN: llvm-profdata show --memory %p/Inputs/inline.memprofraw --profiled-binary %
CHECK: MemprofProfile:
CHECK-NEXT: Summary:
CHECK-NEXT: Version: 2
CHECK-NEXT: Version: 3
CHECK-NEXT: NumSegments: {{[0-9]+}}
CHECK-NEXT: NumMibInfo: 2
CHECK-NEXT: NumAllocFunctions: 2
CHECK-NEXT: NumStackOffsets: 1
CHECK-NEXT: Segments:
CHECK-NEXT: -
CHECK-NEXT: BuildId: <None>
CHECK-NEXT: Start: 0x{{[0-9]+}}
CHECK-NEXT: End: 0x{{[0-9]+}}
CHECK-NEXT: Offset: 0x{{[0-9]+}}
CHECK-NEXT: BuildId: {{[[:xdigit:]]+}}
CHECK-NEXT: Start: 0x{{[[:xdigit:]]+}}
CHECK-NEXT: End: 0x{{[[:xdigit:]]+}}
CHECK-NEXT: Offset: 0x{{[[:xdigit:]]+}}
CHECK-NEXT: -
CHECK: Records:

View File

@ -7,7 +7,7 @@ We expect 2 MIB entries, 1 each for the malloc calls in the program.
CHECK: MemprofProfile:
CHECK-NEXT: Summary:
CHECK-NEXT: Version: 2
CHECK-NEXT: Version: 3
CHECK-NEXT: NumSegments: {{[0-9]+}}
CHECK-NEXT: NumMibInfo: 2
CHECK-NEXT: NumAllocFunctions: 1