[ELF][PPC64] Actually implement --no-power10-stubs

When a caller that does not use TOC calls a function, a call stub is needed if
the function may use TOC. --no-power10-stubs avoids PC-relative instructions in
the code sequence.

The --no-power10-stubs=no implementation added in D94627 is wrong.
First, the first instruction incorrectly uses `mflr 0` (instead of `mflr 12`).
Second, for the PLT case, it uses addis+addi with getVA instead of addis+ld with
getGotPltVA.
This commit is contained in:
Fangrui Song 2023-02-27 16:19:13 -08:00
parent d77f96a9e0
commit 7198c87f42
4 changed files with 36 additions and 34 deletions

View File

@ -1089,13 +1089,17 @@ void PPC64R12SetupStub::writeTo(uint8_t *buf) {
buf, (gotPlt ? PLD_R12_NO_DISP : PADDI_R12_NO_DISP) | imm);
nextInstOffset = 8;
} else {
uint32_t off = destination.getVA(addend) - getThunkTargetSym()->getVA() - 8;
write32(buf + 0, 0x7c0802a6); // mflr r12
write32(buf + 4, 0x429f0005); // bcl 20,31,.+4
write32(buf + 8, 0x7d6802a6); // mflr r11
write32(buf + 12, 0x7d8803a6); // mtlr r12
write32(buf + 16, 0x3d8b0000 | computeHiBits(off));// addis r12,r11,off@ha
write32(buf + 20, 0x398c0000 | (off & 0xffff)); // addi r12,r12,off@l
uint32_t off = offset - 8;
write32(buf + 0, 0x7d8802a6); // mflr 12
write32(buf + 4, 0x429f0005); // bcl 20,31,.+4
write32(buf + 8, 0x7d6802a6); // mflr 11
write32(buf + 12, 0x7d8803a6); // mtlr 12
write32(buf + 16,
0x3d8b0000 | ((off + 0x8000) >> 16)); // addis 12,11,off@ha
if (gotPlt)
write32(buf + 20, 0xe98c0000 | (off & 0xffff)); // ld 12, off@l(12)
else
write32(buf + 20, 0x398c0000 | (off & 0xffff)); // addi 12,12,off@l
nextInstOffset = 24;
}
write32(buf + nextInstOffset, MTCTR_R12); // mtctr r12

View File

@ -73,10 +73,6 @@ void writePPC32PltCallStub(uint8_t *buf, uint64_t gotPltVA,
const InputFile *file, int64_t addend);
void writePPC64LoadAndBranch(uint8_t *buf, int64_t offset);
static inline uint16_t computeHiBits(uint32_t toCompute) {
return (toCompute + 0x8000) >> 16;
}
} // namespace lld::elf
#endif

View File

@ -115,14 +115,14 @@
## No P10; branch to next inst to get addr
# CHECK-NOP10-LABEL: <__plt_pcrel_callee_global_stother0>:
# CHECK-NOP10: 10010010: mflr 0
# CHECK-NOP10-NEXT: 10010014: bcl 20, 31, 0x10010018
# CHECK-NOP10: 10010018: mflr 11
# CHECK-NOP10: 1001001c: mtlr 12
# CHECK-NOP10: 10010020: addis 12, 11, -4097
# CHECK-NOP10: 10010024: addi 12, 12, -24
# CHECK-NOP10-NEXT: 10010028: mtctr 12
# CHECK-NOP10-NEXT: 1001002c: bctr
# CHECK-NOP10: 10010010: mflr 12
# CHECK-NOP10-NEXT: bcl 20, 31, 0x10010018
# CHECK-NOP10-NEXT: 10010018: mflr 11
# CHECK-NOP10-NEXT: mtlr 12
# CHECK-NOP10-NEXT: addis 12, 11, 2
# CHECK-NOP10-NEXT: ld 12, 336(12)
# CHECK-NOP10-NEXT: mtctr 12
# CHECK-NOP10-NEXT: bctr
# CHECK-LABEL: <caller2>:
# CHECK: 10020000: bl 0x10020010
@ -139,15 +139,16 @@
# CHECK-NEXT: 1002001c: bctr
## no P10; branch to next inst to get addr
## .plt[3]-r11 = 0x10030170-0x10020018 = 65536*1+344
# CHECK-NOP10-LABEL: <__plt_pcrel_callee_global_stother1>:
# CHECK-NOP10: 10020010: mflr 0
# CHECK-NOP10-NEXT: 10020014: bcl 20, 31, 0x10020018
# CHECK-NOP10: 10020010: mflr 12
# CHECK-NOP10-NEXT: bcl 20, 31, 0x10020018
# CHECK-NOP10-NEXT: 10020018: mflr 11
# CHECK-NOP10-NEXT: 1002001c: mtlr 12
# CHECK-NOP10-NEXT: 10020020: addis 12, 11, -4098
# CHECK-NOP10-NEXT: 10020024: addi 12, 12, -24
# CHECK-NOP10-NEXT: 10020028: mtctr 12
# CHECK-NOP10-NEXT: 1002002c: bctr
# CHECK-NOP10-NEXT: mtlr 12
# CHECK-NOP10-NEXT: addis 12, 11, 1
# CHECK-NOP10-NEXT: ld 12, 344(12)
# CHECK-NOP10-NEXT: mtctr 12
# CHECK-NOP10-NEXT: bctr
# CHECK-LABEL: <caller3>:
# CHECK: 10030000: bl 0x10030010
@ -164,15 +165,16 @@
# CHECK-NEXT: 1003001c: bctr
## no P10; branch to next inst to get addr
## .plt[4]-r11 = 0x10030178-0x10030018 = 65536*0+352
# CHECK-NOP10-LABEL: <__plt_pcrel_callee_global_TOC>:
# CHECK-NOP10-NEXT: 10030010: mflr 0
# CHECK-NOP10-NEXT: 10030014: bcl 20, 31, 0x10030018
# CHECK-NOP10-NEXT: 10030010: mflr 12
# CHECK-NOP10-NEXT: bcl 20, 31, 0x10030018
# CHECK-NOP10-NEXT: 10030018: mflr 11
# CHECK-NOP10-NEXT: 1003001c: mtlr 12
# CHECK-NOP10-NEXT: 10030020: addis 12, 11, -4099
# CHECK-NOP10-NEXT: 10030024: addi 12, 12, -24
# CHECK-NOP10-NEXT: 10030028: mtctr 12
# CHECK-NOP10-NEXT: 1003002c: bctr
# CHECK-NOP10-NEXT: mtlr 12
# CHECK-NOP10-NEXT: addis 12, 11, 0
# CHECK-NOP10-NEXT: ld 12, 352(12)
# CHECK-NOP10-NEXT: mtctr 12
# CHECK-NOP10-NEXT: bctr
.ifdef AUX
.section .text_caller1, "ax", %progbits

View File

@ -48,7 +48,7 @@
# CHECK-NEXT: bctr
# CHECK-NOP10-LABEL: <__gep_setup_callee>:
# CHECK-NOP10-NEXT: mflr 0
# CHECK-NOP10-NEXT: mflr 12
# CHECK-NOP10-NEXT: bcl 20, 31, 0x10030018
# CHECK-NOP10-NEXT: mflr 11
# CHECK-NOP10-NEXT: mtlr 12