From 54ab9541492d808ae4cf9130dd052d602b78ee32 Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Tue, 21 Mar 2023 13:58:36 +0400 Subject: [PATCH] [BOLT] Reject symbols pointing to section end Sometimes, symbols are present that point to the end of a section (i.e., one-past the highest valid address). Currently, BOLT either rejects those symbols when they don't point to another existing section, or errs when they do and the other section is not executable. I suppose BOLT would accept the symbol when it points to an executable section. In any case, these symbols should not be considered while discovering functions and should not result in an error. This patch implements that. Note that this patch checks explicitly for symbols whose value equals the end of their section. It might make more sense to verify that the symbol's value is within [section start, section end). However, I'm not sure if this could every happen *and* its value does not equal the end. Another way to implement this is to verify that the BinarySection we find at the symbol's address actually corresponds to the symbol's section. I'm not sure what the best approach is so feedback is welcome. Reviewed By: yota9, rafauler Differential Revision: https://reviews.llvm.org/D146215 --- bolt/lib/Rewrite/RewriteInstance.cpp | 10 ++++++++++ bolt/test/X86/section-end-sym.s | 29 ++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 bolt/test/X86/section-end-sym.s diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 04ccbcf20de1..57e41d5b5724 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -1050,6 +1050,16 @@ void RewriteInstance::discoverFileObjects() { LLVM_DEBUG(dbgs() << "BOLT-DEBUG: considering symbol " << UniqueName << " for function\n"); + if (Address == Section->getAddress() + Section->getSize()) { + assert(SymbolSize == 0 && + "unexpect non-zero sized symbol at end of section"); + LLVM_DEBUG( + dbgs() + << "BOLT-DEBUG: rejecting as symbol points to end of its section\n"); + registerName(SymbolSize); + continue; + } + if (!Section->isText()) { assert(SymbolType != SymbolRef::ST_Function && "unexpected function inside non-code section"); diff --git a/bolt/test/X86/section-end-sym.s b/bolt/test/X86/section-end-sym.s new file mode 100644 index 000000000000..a9bca5604ec1 --- /dev/null +++ b/bolt/test/X86/section-end-sym.s @@ -0,0 +1,29 @@ +## Check that BOLT doesn't consider end-of-section symbols (e.g., _etext) as +## functions. + +# REQUIRES: system-linux + +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t.o +# RUN: ld.lld %t.o -o %t.exe -q +# RUN: llvm-bolt %t.exe -o /dev/null --print-cfg --debug-only=bolt 2>&1 \ +# RUN: | FileCheck %s + +# CHECK: considering symbol etext for function +# CHECK-NEXT: rejecting as symbol points to end of its section +# CHECK-NOT: Binary Function "etext{{.*}}" after building cfg + + + .text + .globl _start + .type _start,@function +_start: + retq + .size _start, .-_start + + .align 0x1000 + .globl etext +etext: + + .data +.Lfoo: + .word 0