[BOLT] Check no-LBR samples in mayHaveProfileData

No-LBR mode wasn't tested and slipped when mayHaveProfileData was added for Lite mode. This enables processing of profiles collected without LBR and converted with `perf2bolt -nl` option. Test Plan: bin/llvm-lit -a tools/bolt/test/X86/nolbr.s https://github.com/rafaelauler/bolt-tests/pull/20 Reviewed By: #bolt, rafauler Differential Revision: https://reviews.llvm.org/D140256
2022-12-21 16:31:26 -08:00 · 2022-12-21 16:31:26 -08:00 · 6b05a62a6b
commit 6b05a62a6b
parent b5a1de9c98
4 changed files with 61 additions and 2 deletions
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@ -646,7 +646,7 @@ void DataAggregator::processProfile(BinaryContext &BC) {
  // Mark all functions with registered events as having a valid profile.
  for (auto &BFI : BC.getBinaryFunctions()) {
    BinaryFunction &BF = BFI.second;
-    if (getBranchData(BF)) {
+    if (getBranchData(BF) || getFuncSampleData(BF.getNames())) {
      const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
                                                : BinaryFunction::PF_LBR;
      BF.markProfiled(Flags);
--- a/bolt/lib/Profile/DataReader.cpp
+++ b/bolt/lib/Profile/DataReader.cpp
@ -1331,7 +1331,8 @@ bool DataReader::mayHaveProfileData(const BinaryFunction &Function) {
  if (getBranchData(Function) || getMemData(Function))
    return true;

-  if (getBranchDataForNames(Function.getNames()) ||
+  if (getFuncSampleData(Function.getNames()) ||
+      getBranchDataForNames(Function.getNames()) ||
      getMemDataForNames(Function.getNames()))
    return true;

--- a/bolt/test/X86/nolbr.s
+++ b/bolt/test/X86/nolbr.s
@ -0,0 +1,39 @@
+# This reproduces a bug where profile collected from perf without LBRs and
+# converted into fdata-no-lbr format is reported to not contain profile for any
+# functions.
+
+# REQUIRES: system-linux
+
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \
+# RUN:   %s -o %t.o
+# RUN: link_fdata --no-lbr %s %t.o %t.fdata
+# RUN: FileCheck %s --input-file %t.fdata --check-prefix=CHECK-FDATA
+# RUN: llvm-strip --strip-unneeded %t.o
+# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
+# RUN: llvm-bolt %t.exe -o %t.out --data %t.fdata --dyno-stats -nl \
+# RUN:    --print-only=_start 2>&1 | FileCheck %s --check-prefix=CHECK-BOLT
+
+# CHECK-FDATA:      no_lbr
+# CHECK-FDATA-NEXT: 1 _start [[#]] 1
+
+# CHECK-BOLT: BOLT-INFO: pre-processing profile using branch profile reader
+# CHECK-BOLT: BOLT-INFO: operating with basic samples profiling data (no LBR).
+# CHECK-BOLT: BOLT-INFO: 1 out of 1 functions in the binary (100.0%) have non-empty execution profile
+
+  .globl _start
+  .type _start, %function
+_start:
+  pushq	%rbp
+  movq	%rsp, %rbp
+  cmpl  $0x0, %eax
+a:
+# FDATA: 1 _start #a# 1
+  je b
+  movl	$0x0, %eax
+  jmp c
+b:
+  movl  $0x1, %eax
+c:
+  popq	%rbp
+  retq
+.size _start, .-_start
--- a/bolt/test/link_fdata.py
+++ b/bolt/test/link_fdata.py
@ -18,6 +18,7 @@ parser.add_argument("objfile", help="Object file to extract symbol values from")
 parser.add_argument("output")
 parser.add_argument("prefix", nargs="?", default="FDATA", help="Custom FDATA prefix")
 parser.add_argument("--nmtool", default="nm", help="Path to nm tool")
+parser.add_argument("--no-lbr", action='store_true')

 args = parser.parse_args()

@ -36,6 +37,10 @@ fdata_pat = re.compile(r"([01].*) (?P<exec>\d+) (?P<mispred>\d+)")
 # [<mispred_count>]
 preagg_pat = re.compile(r"(?P<type>[BFf]) (?P<offsets_count>.*)")

+# No-LBR profile:
+# <is symbol?> <closest elf symbol or DSO name> <relative address> <count>
+nolbr_pat = re.compile(r"([01].*) (?P<count>\d+)")
+
 # Replacement symbol: #symname#
 replace_pat = re.compile(r"#(?P<symname>[^#]+)#")

@ -51,6 +56,7 @@ with open(args.input, 'r') as f:
        profile_line = prefix_match.group(1)
        fdata_match = fdata_pat.match(profile_line)
        preagg_match = preagg_pat.match(profile_line)
+        nolbr_match = nolbr_pat.match(profile_line)
        if fdata_match:
            src_dst, execnt, mispred = fdata_match.groups()
            # Split by whitespaces not preceded by a backslash (negative lookbehind)
@ -59,6 +65,14 @@ with open(args.input, 'r') as f:
            # exactly matches the format.
            assert len(chunks) == 6, f"ERROR: wrong format/whitespaces must be escaped:\n{line}"
            exprs.append(('FDATA', (*chunks, execnt, mispred)))
+        elif nolbr_match:
+            loc, count = nolbr_match.groups()
+            # Split by whitespaces not preceded by a backslash (negative lookbehind)
+            chunks = re.split(r'(?<!\\) +', loc)
+            # Check if the number of records separated by non-escaped whitespace
+            # exactly matches the format.
+            assert len(chunks) == 3, f"ERROR: wrong format/whitespaces must be escaped:\n{line}"
+            exprs.append(('NOLBR', (*chunks, count)))
        elif preagg_match:
            exprs.append(('PREAGG', preagg_match.groups()))
        else:
@ -99,12 +113,17 @@ def replace_symbol(matchobj):
    return symbols[symname]

 with open(args.output, 'w', newline='\n') as f:
+    if args.no_lbr:
+        print('no_lbr', file = f)
    for etype, expr in exprs:
        if etype == 'FDATA':
            issym1, anchor1, offsym1, issym2, anchor2, offsym2, execnt, mispred = expr
            print(evaluate_symbol(issym1, anchor1, offsym1),
                  evaluate_symbol(issym2, anchor2, offsym2),
                  execnt, mispred, file = f)
+        elif etype == 'NOLBR':
+            issym, anchor, offsym, count = expr
+            print(evaluate_symbol(issym, anchor, offsym), count, file = f)
        elif etype == 'PREAGG':
            # Replace all symbols enclosed in ##
            print(expr[0], re.sub(replace_pat, replace_symbol, expr[1]),