[LLD] Add --lto-CGO[0-3] option

Allow controlling the CodeGenOpt::Level independent of the LTO
optimization level in LLD via new options for the COFF, ELF, MachO, and
wasm frontends to lld. Most are spelled as --lto-CGO[0-3], but COFF is
spelled as -opt:lldltocgo=[0-3].

See D57422 for discussion surrounding the issue of how to set the CG opt
level. The ultimate goal is to let each function control its CG opt
level, but until then the current default means it is impossible to
specify a CG opt level lower than 2 while using LTO. This option gives
the user a means to control it for as long as it is not handled on a
per-function basis.

Reviewed By: MaskRay, #lld-macho, int3

Differential Revision: https://reviews.llvm.org/D141970
This commit is contained in:
Scott Linder 2023-02-15 17:12:47 +00:00
parent 3cf7f22498
commit 45ee0a9afc
21 changed files with 174 additions and 17 deletions

View File

@ -162,6 +162,8 @@ struct Configuration {
// Used for /opt:lldlto=N
unsigned ltoo = 2;
// Used for /opt:lldltocgo=N
std::optional<unsigned> ltoCgo;
// Used for /opt:lldltojobs=N
std::string thinLTOJobs;

View File

@ -1768,6 +1768,10 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
} else if (s.consume_front("lldlto=")) {
if (s.getAsInteger(10, config->ltoo) || config->ltoo > 3)
error("/opt:lldlto: invalid optimization level: " + s);
} else if (s.consume_front("lldltocgo=")) {
config->ltoCgo.emplace();
if (s.getAsInteger(10, *config->ltoCgo) || *config->ltoCgo > 3)
error("/opt:lldltocgo: invalid codegen optimization level: " + s);
} else if (s.consume_front("lldltojobs=")) {
if (!get_threadpool_strategy(s))
error("/opt:lldltojobs: invalid job count: " + s);

View File

@ -88,7 +88,10 @@ lto::Config BitcodeCompiler::createConfig() {
c.OptLevel = ctx.config.ltoo;
c.CPU = getCPUStr();
c.MAttrs = getMAttrs();
c.CGOptLevel = args::getCGOptLevel(ctx.config.ltoo);
std::optional<CodeGenOpt::Level> optLevelOrNone = CodeGenOpt::getLevel(
ctx.config.ltoCgo.value_or(args::getCGOptLevel(ctx.config.ltoo)));
assert(optLevelOrNone && "Invalid optimization level!");
c.CGOptLevel = *optLevelOrNone;
c.AlwaysEmitRegularLTOObj = !ctx.config.ltoObjPath.empty();
c.DebugPassManager = ctx.config.ltoDebugPassManager;
c.CSIRProfile = std::string(ctx.config.ltoCSProfileFile);

View File

@ -19,11 +19,8 @@ using namespace lld;
// TODO(sbc): Remove this once CGOptLevel can be set completely based on bitcode
// function metadata.
CodeGenOpt::Level lld::args::getCGOptLevel(int optLevelLTO) {
if (optLevelLTO == 3)
return CodeGenOpt::Aggressive;
assert(optLevelLTO < 3);
return CodeGenOpt::Default;
int lld::args::getCGOptLevel(int optLevelLTO) {
return std::clamp(optLevelLTO, 2, 3);
}
static int64_t getInteger(opt::InputArgList &args, unsigned key,

View File

@ -317,6 +317,7 @@ struct Config {
uint64_t zStackSize;
unsigned ltoPartitions;
unsigned ltoo;
llvm::CodeGenOpt::Level ltoCgo;
unsigned optimize;
StringRef thinLTOJobs;
unsigned timeTraceGranularity;

View File

@ -1139,6 +1139,14 @@ static void readConfigs(opt::InputArgList &args) {
args.hasFlag(OPT_lto_whole_program_visibility,
OPT_no_lto_whole_program_visibility, false);
config->ltoo = args::getInteger(args, OPT_lto_O, 2);
if (config->ltoo > 3)
error("invalid optimization level for LTO: " + Twine(config->ltoo));
unsigned ltoCgo =
args::getInteger(args, OPT_lto_CGO, args::getCGOptLevel(config->ltoo));
if (auto level = CodeGenOpt::getLevel(ltoCgo))
config->ltoCgo = *level;
else
error("invalid codegen optimization level for LTO: " + Twine(ltoCgo));
config->ltoObjPath = args.getLastArgValue(OPT_lto_obj_path_eq);
config->ltoPartitions = args::getInteger(args, OPT_lto_partitions, 1);
config->ltoSampleProfile = args.getLastArgValue(OPT_lto_sample_profile);
@ -1395,8 +1403,6 @@ static void readConfigs(opt::InputArgList &args) {
config->thinLTOJobs = arg->getValue();
config->threadCount = parallel::strategy.compute_thread_count();
if (config->ltoo > 3)
error("invalid optimization level for LTO: " + Twine(config->ltoo));
if (config->ltoPartitions == 0)
error("--lto-partitions: number of threads must be > 0");
if (!get_threadpool_strategy(config->thinLTOJobs))

View File

@ -127,7 +127,7 @@ static lto::Config createConfig() {
c.OptLevel = config->ltoo;
c.CPU = getCPUStr();
c.MAttrs = getMAttrs();
c.CGOptLevel = args::getCGOptLevel(config->ltoo);
c.CGOptLevel = config->ltoCgo;
c.PTO.LoopVectorization = c.OptLevel > 1;
c.PTO.SLPVectorization = c.OptLevel > 1;

View File

@ -558,6 +558,8 @@ def lto_newpm_passes: JJ<"lto-newpm-passes=">,
HelpText<"Passes to run during LTO">;
def lto_O: JJ<"lto-O">, MetaVarName<"<opt-level>">,
HelpText<"Optimization level for LTO">;
def lto_CGO: JJ<"lto-CGO">, MetaVarName<"<cgopt-level>">,
HelpText<"Codegen optimization level for LTO">;
def lto_partitions: JJ<"lto-partitions=">,
HelpText<"Number of LTO codegen partitions">;
def lto_cs_profile_generate: FF<"lto-cs-profile-generate">,

View File

@ -26,6 +26,10 @@
#include <vector>
namespace llvm::CodeGenOpt {
enum Level : int;
} // namespace llvm::CodeGenOpt
namespace lld {
namespace macho {
@ -165,6 +169,7 @@ struct Configuration {
llvm::StringRef thinLTOJobs;
llvm::StringRef umbrella;
uint32_t ltoo = 2;
llvm::CodeGenOpt::Level ltoCgo;
llvm::CachePruningPolicy thinLTOCachePolicy;
llvm::StringRef thinLTOCacheDir;
llvm::StringRef thinLTOIndexOnlyArg;

View File

@ -1421,6 +1421,17 @@ bool macho::link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
target = createTargetInfo(args);
depTracker = std::make_unique<DependencyTracker>(
args.getLastArgValue(OPT_dependency_info));
config->ltoo = args::getInteger(args, OPT_lto_O, 2);
if (config->ltoo > 3)
error("--lto-O: invalid optimization level: " + Twine(config->ltoo));
unsigned ltoCgo =
args::getInteger(args, OPT_lto_CGO, args::getCGOptLevel(config->ltoo));
if (auto level = CodeGenOpt::getLevel(ltoCgo))
config->ltoCgo = *level;
else
error("--lto-CGO: invalid codegen optimization level: " + Twine(ltoCgo));
if (errorCount())
return false;
@ -1558,9 +1569,6 @@ bool macho::link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
config->umbrella = arg->getValue();
}
config->ltoObjPath = args.getLastArgValue(OPT_object_path_lto);
config->ltoo = args::getInteger(args, OPT_lto_O, 2);
if (config->ltoo > 3)
error("--lto-O: invalid optimization level: " + Twine(config->ltoo));
config->thinLTOCacheDir = args.getLastArgValue(OPT_cache_path_lto);
config->thinLTOCachePolicy = getLTOCachePolicy(args);
config->thinLTOEmitImportsFiles = args.hasArg(OPT_thinlto_emit_imports_files);

View File

@ -70,7 +70,7 @@ static lto::Config createConfig() {
c.TimeTraceEnabled = config->timeTraceEnabled;
c.TimeTraceGranularity = config->timeTraceGranularity;
c.OptLevel = config->ltoo;
c.CGOptLevel = args::getCGOptLevel(config->ltoo);
c.CGOptLevel = config->ltoCgo;
if (config->saveTemps)
checkError(c.addSaveTemps(config->outputFile.str() + ".",
/*UseInputModulePath=*/true));

View File

@ -84,6 +84,10 @@ def lto_O: Joined<["--"], "lto-O">,
HelpText<"Set optimization level for LTO (default: 2)">,
MetaVarName<"<opt-level>">,
Group<grp_lld>;
def lto_CGO: Joined<["--"], "lto-CGO">,
HelpText<"Set codegen optimization level for LTO (default: 2)">,
MetaVarName<"<cgopt-level>">,
Group<grp_lld>;
def thinlto_cache_policy_eq: Joined<["--"], "thinlto-cache-policy=">,
HelpText<"Pruning policy for the ThinLTO cache">,
Group<grp_lld>;

View File

@ -23,7 +23,7 @@ class InputArgList;
namespace lld {
namespace args {
llvm::CodeGenOpt::Level getCGOptLevel(int optLevelLTO);
int getCGOptLevel(int optLevelLTO);
int64_t getInteger(llvm::opt::InputArgList &args, unsigned key,
int64_t Default);

28
lld/test/COFF/lto-cgo.ll Normal file
View File

@ -0,0 +1,28 @@
; REQUIRES: x86
; RUN: llvm-as %s -o %t.obj
; RUN: lld-link -opt:lldlto=0 %t.obj -dll -noentry -out:%t.dll -mllvm:-debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s
; RUN: lld-link -opt:lldltocgo=0 %t.obj -dll -noentry -out:%t.dll -mllvm:-debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s
; RUN: lld-link -opt:lldlto=3 %t.obj -dll -noentry -out:%t.dll -mllvm:-debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s
; RUN: lld-link -opt:lldltocgo=3 %t.obj -dll -noentry -out:%t.dll -mllvm:-debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s
; RUN: lld-link -opt:lldlto=0 -opt:lldltocgo=0 %t.obj -dll -noentry -out:%t.dll -mllvm:-debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s
; RUN: lld-link -opt:lldlto=3 -opt:lldltocgo=0 %t.obj -dll -noentry -out:%t.dll -mllvm:-debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s
; RUN: lld-link -opt:lldlto=0 -opt:lldltocgo=3 %t.obj -dll -noentry -out:%t.dll -mllvm:-debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s
; RUN: lld-link -opt:lldlto=3 -opt:lldltocgo=3 %t.obj -dll -noentry -out:%t.dll -mllvm:-debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s
; RUN: lld-link -opt:lldlto=0 -opt:lldltocgo=0 -opt:lldltocgo=2 %t.obj -dll -noentry -out:%t.dll -mllvm:-debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s
; RUN: not lld-link -opt:lldlto=4 %t.obj -dll -noentry -out:%t.dll -mllvm:-debug-pass=Structure 2>&1 | FileCheck --check-prefix=ERROR-O4 %s
; RUN: not lld-link -opt:lldltocgo=4 %t.obj -dll -noentry -out:%t.dll -mllvm:-debug-pass=Structure 2>&1 | FileCheck --check-prefix=ERROR-CGO4 %s
; RUN: not lld-link -opt:lldlto=4 -opt:lldltocgo=4 %t.obj -dll -noentry -out:%t.dll -mllvm:-debug-pass=Structure 2>&1 | FileCheck --check-prefixes=ERROR-O4,ERROR-CGO4 %s
; NOOPT: Fast Register Allocator
; OPT: Greedy Register Allocator
; ERROR-O4: lld-link: error: /opt:lldlto: invalid optimization level: 4
; ERROR-CGO4: lld-link: error: /opt:lldltocgo: invalid codegen optimization level: 4
target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-windows-msvc"
define void @_start() {
entry:
ret void
}

28
lld/test/ELF/lto/cgo.ll Normal file
View File

@ -0,0 +1,28 @@
; REQUIRES: x86
; RUN: llvm-as %s -o %t.o
; RUN: ld.lld --lto-O0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s
; RUN: ld.lld --lto-CGO0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s
; RUN: ld.lld --lto-O3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s
; RUN: ld.lld --lto-CGO3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s
; RUN: ld.lld --lto-O0 --lto-CGO0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s
; RUN: ld.lld --lto-O3 --lto-CGO0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s
; RUN: ld.lld --lto-O0 --lto-CGO3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s
; RUN: ld.lld --lto-O3 --lto-CGO3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s
; RUN: ld.lld --lto-O0 --lto-CGO0 --lto-CGO2 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s
; RUN: not ld.lld --lto-O4 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=ERROR-O4 %s
; RUN: not ld.lld --lto-CGO4 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=ERROR-CGO4 %s
; RUN: not ld.lld --lto-O4 --lto-CGO4 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefixes=ERROR-O4,ERROR-CGO4 %s
; NOOPT: Fast Register Allocator
; OPT: Greedy Register Allocator
; ERROR-O4: ld.lld: error: invalid optimization level for LTO: 4
; ERROR-CGO4: ld.lld: error: invalid codegen optimization level for LTO: 4
target triple = "x86_64-unknown-linux-gnu"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
define void @_start() {
entry:
ret void
}

28
lld/test/MachO/lto-cgo.ll Normal file
View File

@ -0,0 +1,28 @@
; REQUIRES: x86
; RUN: llvm-as %s -o %t.o
; RUN: %lld -dylib --lto-O0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s
; RUN: %lld -dylib --lto-CGO0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s
; RUN: %lld -dylib --lto-O3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s
; RUN: %lld -dylib --lto-CGO3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s
; RUN: %lld -dylib --lto-O0 --lto-CGO0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s
; RUN: %lld -dylib --lto-O3 --lto-CGO0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s
; RUN: %lld -dylib --lto-O0 --lto-CGO3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s
; RUN: %lld -dylib --lto-O3 --lto-CGO3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s
; RUN: %lld -dylib --lto-O0 --lto-CGO0 --lto-CGO2 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s
; RUN: not %lld -dylib --lto-O4 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=ERROR-O4 %s
; RUN: not %lld -dylib --lto-CGO4 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=ERROR-CGO4 %s
; RUN: not %lld -dylib --lto-O4 --lto-CGO4 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefixes=ERROR-O4,ERROR-CGO4 %s
; NOOPT: Fast Register Allocator
; OPT: Greedy Register Allocator
; ERROR-O4: ld64.lld: error: --lto-O: invalid optimization level: 4
; ERROR-CGO4: ld64.lld: error: --lto-CGO: invalid codegen optimization level: 4
target triple = "x86_64-apple-darwin"
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
define void @_start() {
entry:
ret void
}

28
lld/test/wasm/lto/cgo.ll Normal file
View File

@ -0,0 +1,28 @@
; REQUIRES: x86
; RUN: llvm-as %s -o %t.o
; RUN: wasm-ld --lto-O0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s
; RUN: wasm-ld --lto-CGO0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s
; RUN: wasm-ld --lto-O3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s
; RUN: wasm-ld --lto-CGO3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s
; RUN: wasm-ld --lto-O0 --lto-CGO0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s
; RUN: wasm-ld --lto-O3 --lto-CGO0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s
; RUN: wasm-ld --lto-O0 --lto-CGO3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s
; RUN: wasm-ld --lto-O3 --lto-CGO3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s
; RUN: wasm-ld --lto-O0 --lto-CGO0 --lto-CGO2 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s
; RUN: not wasm-ld --lto-O4 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=ERROR-O4 %s
; RUN: not wasm-ld --lto-CGO4 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=ERROR-CGO4 %s
; RUN: not wasm-ld --lto-O4 --lto-CGO4 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefixes=ERROR-O4,ERROR-CGO4 %s
; NOOPT-NOT: WebAssembly Optimize Returned
; OPT: WebAssembly Optimize Returned
; ERROR-O4: wasm-ld: error: invalid optimization level for LTO: 4
; ERROR-CGO4: wasm-ld: error: invalid codegen optimization level for LTO: 4
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown-wasm"
define void @_start() {
entry:
ret void
}

View File

@ -16,6 +16,10 @@
#include "llvm/Support/CachePruning.h"
#include <optional>
namespace llvm::CodeGenOpt {
enum Level : int;
} // namespace llvm::CodeGenOpt
namespace lld {
namespace wasm {
@ -63,6 +67,7 @@ struct Configuration {
uint64_t zStackSize;
unsigned ltoPartitions;
unsigned ltoo;
llvm::CodeGenOpt::Level ltoCgo;
unsigned optimize;
llvm::StringRef thinLTOJobs;
bool ltoDebugPassManager;

View File

@ -424,6 +424,14 @@ static void readConfigs(opt::InputArgList &args) {
config->importTable = args.hasArg(OPT_import_table);
config->importUndefined = args.hasArg(OPT_import_undefined);
config->ltoo = args::getInteger(args, OPT_lto_O, 2);
if (config->ltoo > 3)
error("invalid optimization level for LTO: " + Twine(config->ltoo));
unsigned ltoCgo =
args::getInteger(args, OPT_lto_CGO, args::getCGOptLevel(config->ltoo));
if (auto level = CodeGenOpt::getLevel(ltoCgo))
config->ltoCgo = *level;
else
error("invalid codegen optimization level for LTO: " + Twine(ltoCgo));
config->ltoPartitions = args::getInteger(args, OPT_lto_partitions, 1);
config->ltoDebugPassManager = args.hasArg(OPT_lto_debug_pass_manager);
config->mapFile = args.getLastArgValue(OPT_Map);
@ -560,8 +568,6 @@ static void checkOptions(opt::InputArgList &args) {
error("--compress-relocations is incompatible with output debug"
" information. Please pass --strip-debug or --strip-all");
if (config->ltoo > 3)
error("invalid optimization level for LTO: " + Twine(config->ltoo));
if (config->ltoPartitions == 0)
error("--lto-partitions: number of threads must be > 0");
if (!get_threadpool_strategy(config->thinLTOJobs))

View File

@ -51,7 +51,7 @@ static std::unique_ptr<lto::LTO> createLTO() {
c.DiagHandler = diagnosticHandler;
c.OptLevel = config->ltoo;
c.MAttrs = getMAttrs();
c.CGOptLevel = args::getCGOptLevel(config->ltoo);
c.CGOptLevel = config->ltoCgo;
c.DebugPassManager = config->ltoDebugPassManager;
if (config->relocatable)

View File

@ -256,6 +256,8 @@ def: JoinedOrSeparate<["-"], "u">, Alias<undefined>;
// LTO-related options.
def lto_O: JJ<"lto-O">, MetaVarName<"<opt-level>">,
HelpText<"Optimization level for LTO">;
def lto_CGO: JJ<"lto-CGO">, MetaVarName<"<cgopt-level>">,
HelpText<"Codegen optimization level for LTO">;
def lto_partitions: JJ<"lto-partitions=">,
HelpText<"Number of LTO codegen partitions">;
def disable_verify: F<"disable-verify">;