# Test performs a BB reordering with unsupported # instruction jrcxz. Reordering works correctly with the # follow options: None, Normal or Reverse. Other strategies # are completed with Assertion `isIntN(Size * 8 + 1, Value). # The cause is the distance between BB where one contains # jrcxz instruction. # Example: OpenSSL # https://github.com/openssl/openssl/blob/master/crypto/bn/asm/x86_64-mont5.pl#L3319 # REQUIRES: system-linux # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \ # RUN: %s -o %t.o # RUN: link_fdata %s %t.o %t.fdata # RUN: %clang %cflags %t.o -falign-labels -march=native -o %t.exe -Wl,-q # RUN: llvm-bolt %t.exe -o %t.bolted --data %t.fdata \ # RUN: --reorder-blocks=ext-tsp --reorder-functions=hfsort \ # RUN: --split-functions --split-all-cold --split-eh --dyno-stats \ # RUN: --print-finalized 2>&1 | FileCheck %s # CHECK-NOT: value of -2105 is too large for field of 1 byte. .text .section .text.startup,"ax",@progbits .p2align 5,,31 .globl main .type main, @function main: jmp bn_sqrx8x_internal .globl bn_sqrx8x_internal .hidden bn_sqrx8x_internal .type bn_sqrx8x_internal,@function .align 32 bn_sqrx8x_internal: __bn_sqrx8x_internal: # FDATA: 1 bn_from_mont8x 160 1 bn_sqrx8x_internal 0 0 56 # FDATA: 1 bn_sqrx8x_internal 13 1 bn_sqrx8x_internal 40 0 60972 # FDATA: 1 bn_sqrx8x_internal 5f 1 bn_sqrx8x_internal 2c 0 60972 # FDATA: 1 bn_sqrx8x_internal 2f1 1 bn_sqrx8x_internal 500 0 60972 # FDATA: 1 bn_sqrx8x_internal 34a 1 bn_sqrx8x_internal 360 0 60972 # FDATA: 1 bn_sqrx8x_internal 411 1 bn_sqrx8x_internal 360 0 447888 # FDATA: 1 bn_sqrx8x_internal 411 1 bn_sqrx8x_internal 417 0 63984 # FDATA: 1 bn_sqrx8x_internal 427 1 bn_sqrx8x_internal 480 0 60972 # FDATA: 1 bn_sqrx8x_internal 427 1 bn_sqrx8x_internal 429 0 3012 # FDATA: 1 bn_sqrx8x_internal 467 1 bn_sqrx8x_internal 360 0 3012 # FDATA: 1 bn_sqrx8x_internal 4ba 1 bn_sqrx8x_internal 80 0 58964 # FDATA: 1 bn_sqrx8x_internal 4ba 1 bn_sqrx8x_internal 4c0 0 2008 # FDATA: 1 bn_sqrx8x_internal 4fb 1 bn_sqrx8x_internal 80 0 2008 # FDATA: 1 bn_sqrx8x_internal 5f0 1 bn_sqrx8x_internal 5f2 0 180908 # FDATA: 1 bn_sqrx8x_internal 61b 1 bn_sqrx8x_internal 540 0 180908 # FDATA: 1 bn_sqrx8x_internal 632 1 bn_sqrx8x_internal 637 0 59020 # FDATA: 1 bn_sqrx8x_internal 657 1 bn_sqrx8x_internal 660 0 59020 # FDATA: 1 bn_sqrx8x_internal 696 1 bn_sqrx8x_internal 6a0 0 120048 # FDATA: 1 bn_sqrx8x_internal 75a 1 bn_sqrx8x_internal 6a0 0 840336 # FDATA: 1 bn_sqrx8x_internal 75a 1 bn_sqrx8x_internal 760 0 120048 # FDATA: 1 bn_sqrx8x_internal 768 1 bn_sqrx8x_internal 76e 0 120048 # FDATA: 1 bn_sqrx8x_internal 7b2 1 bn_sqrx8x_internal 7c0 0 120048 # FDATA: 1 bn_sqrx8x_internal 86e 1 bn_sqrx8x_internal 7c0 0 896560 # FDATA: 1 bn_sqrx8x_internal 86e 1 bn_sqrx8x_internal 874 0 128080 # FDATA: 1 bn_sqrx8x_internal 879 1 bn_sqrx8x_internal 8c0 0 120048 # FDATA: 1 bn_sqrx8x_internal 879 1 bn_sqrx8x_internal 87b 0 8032 # FDATA: 1 bn_sqrx8x_internal 8bb 1 bn_sqrx8x_internal 7c0 0 8032 # FDATA: 1 bn_sqrx8x_internal 8e8 1 bn_sqrx8x_internal 8ed 0 120048 # FDATA: 1 bn_sqrx8x_internal 955 1 bn_sqrx8x_internal 660 0 61028 # FDATA: 1 bn_sqrx8x_internal 955 1 bn_sqrx8x_internal 95b 0 59020 # FDATA: 0 [unknown] 0 1 bn_sqrx8x_internal 5f0 0 59020 .cfi_startproc leaq 48+8(%rsp),%rdi leaq (%rsi,%r9,1),%rbp movq %r9,0+8(%rsp) movq %rbp,8+8(%rsp) jmp .Lsqr8x_zero_start .align 32 .byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 .Lsqrx8x_zero: .byte 0x3e movdqa %xmm0,0(%rdi) movdqa %xmm0,16(%rdi) movdqa %xmm0,32(%rdi) movdqa %xmm0,48(%rdi) .Lsqr8x_zero_start: movdqa %xmm0,64(%rdi) movdqa %xmm0,80(%rdi) movdqa %xmm0,96(%rdi) movdqa %xmm0,112(%rdi) leaq 128(%rdi),%rdi subq $64,%r9 jnz .Lsqrx8x_zero movq 0(%rsi),%rdx xorq %r10,%r10 xorq %r11,%r11 xorq %r12,%r12 xorq %r13,%r13 xorq %r14,%r14 xorq %r15,%r15 leaq 48+8(%rsp),%rdi xorq %rbp,%rbp jmp .Lsqrx8x_outer_loop .align 32 .Lsqrx8x_outer_loop: mulxq 8(%rsi),%r8,%rax adcxq %r9,%r8 adoxq %rax,%r10 mulxq 16(%rsi),%r9,%rax adcxq %r10,%r9 adoxq %rax,%r11 .byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00 adcxq %r11,%r10 adoxq %rax,%r12 .byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00 adcxq %r12,%r11 adoxq %rax,%r13 mulxq 40(%rsi),%r12,%rax adcxq %r13,%r12 adoxq %rax,%r14 mulxq 48(%rsi),%r13,%rax adcxq %r14,%r13 adoxq %r15,%rax mulxq 56(%rsi),%r14,%r15 movq 8(%rsi),%rdx adcxq %rax,%r14 adoxq %rbp,%r15 adcq 64(%rdi),%r15 movq %r8,8(%rdi) movq %r9,16(%rdi) sbbq %rcx,%rcx xorq %rbp,%rbp mulxq 16(%rsi),%r8,%rbx mulxq 24(%rsi),%r9,%rax adcxq %r10,%r8 adoxq %rbx,%r9 mulxq 32(%rsi),%r10,%rbx adcxq %r11,%r9 adoxq %rax,%r10 .byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00 adcxq %r12,%r10 adoxq %rbx,%r11 .byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00 adcxq %r13,%r11 adoxq %r14,%r12 .byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00 movq 16(%rsi),%rdx adcxq %rax,%r12 adoxq %rbx,%r13 adcxq %r15,%r13 adoxq %rbp,%r14 adcxq %rbp,%r14 movq %r8,24(%rdi) movq %r9,32(%rdi) mulxq 24(%rsi),%r8,%rbx mulxq 32(%rsi),%r9,%rax adcxq %r10,%r8 adoxq %rbx,%r9 mulxq 40(%rsi),%r10,%rbx adcxq %r11,%r9 adoxq %rax,%r10 .byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00 adcxq %r12,%r10 adoxq %r13,%r11 .byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00 .byte 0x3e movq 24(%rsi),%rdx adcxq %rbx,%r11 adoxq %rax,%r12 adcxq %r14,%r12 movq %r8,40(%rdi) movq %r9,48(%rdi) mulxq 32(%rsi),%r8,%rax adoxq %rbp,%r13 adcxq %rbp,%r13 mulxq 40(%rsi),%r9,%rbx adcxq %r10,%r8 adoxq %rax,%r9 mulxq 48(%rsi),%r10,%rax adcxq %r11,%r9 adoxq %r12,%r10 mulxq 56(%rsi),%r11,%r12 movq 32(%rsi),%rdx movq 40(%rsi),%r14 adcxq %rbx,%r10 adoxq %rax,%r11 movq 48(%rsi),%r15 adcxq %r13,%r11 adoxq %rbp,%r12 adcxq %rbp,%r12 movq %r8,56(%rdi) movq %r9,64(%rdi) mulxq %r14,%r9,%rax movq 56(%rsi),%r8 adcxq %r10,%r9 mulxq %r15,%r10,%rbx adoxq %rax,%r10 adcxq %r11,%r10 mulxq %r8,%r11,%rax movq %r14,%rdx adoxq %rbx,%r11 adcxq %r12,%r11 adcxq %rbp,%rax mulxq %r15,%r14,%rbx mulxq %r8,%r12,%r13 movq %r15,%rdx leaq 64(%rsi),%rsi adcxq %r14,%r11 adoxq %rbx,%r12 adcxq %rax,%r12 adoxq %rbp,%r13 .byte 0x67,0x67 mulxq %r8,%r8,%r14 adcxq %r8,%r13 adcxq %rbp,%r14 cmpq 8+8(%rsp),%rsi je .Lsqrx8x_outer_break negq %rcx movq $-8,%rcx movq %rbp,%r15 movq 64(%rdi),%r8 adcxq 72(%rdi),%r9 adcxq 80(%rdi),%r10 adcxq 88(%rdi),%r11 adcq 96(%rdi),%r12 adcq 104(%rdi),%r13 adcq 112(%rdi),%r14 adcq 120(%rdi),%r15 leaq (%rsi),%rbp leaq 128(%rdi),%rdi sbbq %rax,%rax movq -64(%rsi),%rdx movq %rax,16+8(%rsp) movq %rdi,24+8(%rsp) xorl %eax,%eax jmp .Lsqrx8x_loop .align 32 .Lsqrx8x_loop: movq %r8,%rbx mulxq 0(%rbp),%rax,%r8 adcxq %rax,%rbx adoxq %r9,%r8 mulxq 8(%rbp),%rax,%r9 adcxq %rax,%r8 adoxq %r10,%r9 mulxq 16(%rbp),%rax,%r10 adcxq %rax,%r9 adoxq %r11,%r10 mulxq 24(%rbp),%rax,%r11 adcxq %rax,%r10 adoxq %r12,%r11 .byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 adcxq %rax,%r11 adoxq %r13,%r12 mulxq 40(%rbp),%rax,%r13 adcxq %rax,%r12 adoxq %r14,%r13 mulxq 48(%rbp),%rax,%r14 movq %rbx,(%rdi,%rcx,8) movl $0,%ebx adcxq %rax,%r13 adoxq %r15,%r14 .byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00 movq 8(%rsi,%rcx,8),%rdx adcxq %rax,%r14 adoxq %rbx,%r15 adcxq %rbx,%r15 .byte 0x67 incq %rcx jnz .Lsqrx8x_loop leaq 64(%rbp),%rbp movq $-8,%rcx cmpq 8+8(%rsp),%rbp je .Lsqrx8x_break subq 16+8(%rsp),%rbx .byte 0x66 movq -64(%rsi),%rdx adcxq 0(%rdi),%r8 adcxq 8(%rdi),%r9 adcq 16(%rdi),%r10 adcq 24(%rdi),%r11 adcq 32(%rdi),%r12 adcq 40(%rdi),%r13 adcq 48(%rdi),%r14 adcq 56(%rdi),%r15 leaq 64(%rdi),%rdi .byte 0x67 sbbq %rax,%rax xorl %ebx,%ebx movq %rax,16+8(%rsp) jmp .Lsqrx8x_loop .align 32 .Lsqrx8x_break: xorq %rbp,%rbp subq 16+8(%rsp),%rbx adcxq %rbp,%r8 movq 24+8(%rsp),%rcx adcxq %rbp,%r9 movq 0(%rsi),%rdx adcq $0,%r10 movq %r8,0(%rdi) adcq $0,%r11 adcq $0,%r12 adcq $0,%r13 adcq $0,%r14 adcq $0,%r15 cmpq %rcx,%rdi je .Lsqrx8x_outer_loop movq %r9,8(%rdi) movq 8(%rcx),%r9 movq %r10,16(%rdi) movq 16(%rcx),%r10 movq %r11,24(%rdi) movq 24(%rcx),%r11 movq %r12,32(%rdi) movq 32(%rcx),%r12 movq %r13,40(%rdi) movq 40(%rcx),%r13 movq %r14,48(%rdi) movq 48(%rcx),%r14 movq %r15,56(%rdi) movq 56(%rcx),%r15 movq %rcx,%rdi jmp .Lsqrx8x_outer_loop .align 32 .Lsqrx8x_outer_break: movq %r9,72(%rdi) .byte 102,72,15,126,217 movq %r10,80(%rdi) movq %r11,88(%rdi) movq %r12,96(%rdi) movq %r13,104(%rdi) movq %r14,112(%rdi) leaq 48+8(%rsp),%rdi movq (%rsi,%rcx,1),%rdx movq 8(%rdi),%r11 xorq %r10,%r10 movq 0+8(%rsp),%r9 adoxq %r11,%r11 movq 16(%rdi),%r12 movq 24(%rdi),%r13 .align 32 .Lsqrx4x_shift_n_add: mulxq %rdx,%rax,%rbx adoxq %r12,%r12 adcxq %r10,%rax .byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00 .byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00 adoxq %r13,%r13 adcxq %r11,%rbx movq 40(%rdi),%r11 movq %rax,0(%rdi) movq %rbx,8(%rdi) mulxq %rdx,%rax,%rbx adoxq %r10,%r10 adcxq %r12,%rax movq 16(%rsi,%rcx,1),%rdx movq 48(%rdi),%r12 adoxq %r11,%r11 adcxq %r13,%rbx movq 56(%rdi),%r13 movq %rax,16(%rdi) movq %rbx,24(%rdi) mulxq %rdx,%rax,%rbx adoxq %r12,%r12 adcxq %r10,%rax movq 24(%rsi,%rcx,1),%rdx leaq 32(%rcx),%rcx movq 64(%rdi),%r10 adoxq %r13,%r13 adcxq %r11,%rbx movq 72(%rdi),%r11 movq %rax,32(%rdi) movq %rbx,40(%rdi) mulxq %rdx,%rax,%rbx adoxq %r10,%r10 adcxq %r12,%rax jrcxz .Lsqrx4x_shift_n_add_break .byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00 adoxq %r11,%r11 adcxq %r13,%rbx movq 80(%rdi),%r12 movq 88(%rdi),%r13 movq %rax,48(%rdi) movq %rbx,56(%rdi) leaq 64(%rdi),%rdi nop jmp .Lsqrx4x_shift_n_add .align 32 .Lsqrx4x_shift_n_add_break: adcxq %r13,%rbx movq %rax,48(%rdi) movq %rbx,56(%rdi) leaq 64(%rdi),%rdi .byte 102,72,15,126,213 __bn_sqrx8x_reduction: xorl %eax,%eax movq 32+8(%rsp),%rbx movq 48+8(%rsp),%rdx leaq -64(%rbp,%r9,1),%rcx movq %rcx,0+8(%rsp) movq %rdi,8+8(%rsp) leaq 48+8(%rsp),%rdi jmp .Lsqrx8x_reduction_loop .align 32 .Lsqrx8x_reduction_loop: movq 8(%rdi),%r9 movq 16(%rdi),%r10 movq 24(%rdi),%r11 movq 32(%rdi),%r12 movq %rdx,%r8 imulq %rbx,%rdx movq 40(%rdi),%r13 movq 48(%rdi),%r14 movq 56(%rdi),%r15 movq %rax,24+8(%rsp) leaq 64(%rdi),%rdi xorq %rsi,%rsi movq $-8,%rcx jmp .Lsqrx8x_reduce .align 32 .Lsqrx8x_reduce: movq %r8,%rbx mulxq 0(%rbp),%rax,%r8 adcxq %rbx,%rax adoxq %r9,%r8 mulxq 8(%rbp),%rbx,%r9 adcxq %rbx,%r8 adoxq %r10,%r9 mulxq 16(%rbp),%rbx,%r10 adcxq %rbx,%r9 adoxq %r11,%r10 mulxq 24(%rbp),%rbx,%r11 adcxq %rbx,%r10 adoxq %r12,%r11 .byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 movq %rdx,%rax movq %r8,%rdx adcxq %rbx,%r11 adoxq %r13,%r12 mulxq 32+8(%rsp),%rbx,%rdx movq %rax,%rdx movq %rax,64+48+8(%rsp,%rcx,8) mulxq 40(%rbp),%rax,%r13 adcxq %rax,%r12 adoxq %r14,%r13 mulxq 48(%rbp),%rax,%r14 adcxq %rax,%r13 adoxq %r15,%r14 mulxq 56(%rbp),%rax,%r15 movq %rbx,%rdx adcxq %rax,%r14 adoxq %rsi,%r15 adcxq %rsi,%r15 .byte 0x67,0x67,0x67 incq %rcx jnz .Lsqrx8x_reduce movq %rsi,%rax cmpq 0+8(%rsp),%rbp jae .Lsqrx8x_no_tail movq 48+8(%rsp),%rdx addq 0(%rdi),%r8 leaq 64(%rbp),%rbp movq $-8,%rcx adcxq 8(%rdi),%r9 adcxq 16(%rdi),%r10 adcq 24(%rdi),%r11 adcq 32(%rdi),%r12 adcq 40(%rdi),%r13 adcq 48(%rdi),%r14 adcq 56(%rdi),%r15 leaq 64(%rdi),%rdi sbbq %rax,%rax xorq %rsi,%rsi movq %rax,16+8(%rsp) jmp .Lsqrx8x_tail .align 32 .Lsqrx8x_tail: movq %r8,%rbx mulxq 0(%rbp),%rax,%r8 adcxq %rax,%rbx adoxq %r9,%r8 mulxq 8(%rbp),%rax,%r9 adcxq %rax,%r8 adoxq %r10,%r9 mulxq 16(%rbp),%rax,%r10 adcxq %rax,%r9 adoxq %r11,%r10 mulxq 24(%rbp),%rax,%r11 adcxq %rax,%r10 adoxq %r12,%r11 .byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 adcxq %rax,%r11 adoxq %r13,%r12 mulxq 40(%rbp),%rax,%r13 adcxq %rax,%r12 adoxq %r14,%r13 mulxq 48(%rbp),%rax,%r14 adcxq %rax,%r13 adoxq %r15,%r14 mulxq 56(%rbp),%rax,%r15 movq 72+48+8(%rsp,%rcx,8),%rdx adcxq %rax,%r14 adoxq %rsi,%r15 movq %rbx,(%rdi,%rcx,8) movq %r8,%rbx adcxq %rsi,%r15 incq %rcx jnz .Lsqrx8x_tail cmpq 0+8(%rsp),%rbp jae .Lsqrx8x_tail_done subq 16+8(%rsp),%rsi movq 48+8(%rsp),%rdx leaq 64(%rbp),%rbp adcq 0(%rdi),%r8 adcq 8(%rdi),%r9 adcq 16(%rdi),%r10 adcq 24(%rdi),%r11 adcq 32(%rdi),%r12 adcq 40(%rdi),%r13 adcq 48(%rdi),%r14 adcq 56(%rdi),%r15 leaq 64(%rdi),%rdi sbbq %rax,%rax subq $8,%rcx xorq %rsi,%rsi movq %rax,16+8(%rsp) jmp .Lsqrx8x_tail .align 32 .Lsqrx8x_tail_done: xorq %rax,%rax addq 24+8(%rsp),%r8 adcq $0,%r9 adcq $0,%r10 adcq $0,%r11 adcq $0,%r12 adcq $0,%r13 adcq $0,%r14 adcq $0,%r15 adcq $0,%rax subq 16+8(%rsp),%rsi .Lsqrx8x_no_tail: adcq 0(%rdi),%r8 .byte 102,72,15,126,217 adcq 8(%rdi),%r9 movq 56(%rbp),%rsi .byte 102,72,15,126,213 adcq 16(%rdi),%r10 adcq 24(%rdi),%r11 adcq 32(%rdi),%r12 adcq 40(%rdi),%r13 adcq 48(%rdi),%r14 adcq 56(%rdi),%r15 adcq $0,%rax movq 32+8(%rsp),%rbx movq 64(%rdi,%rcx,1),%rdx movq %r8,0(%rdi) leaq 64(%rdi),%r8 movq %r9,8(%rdi) movq %r10,16(%rdi) movq %r11,24(%rdi) movq %r12,32(%rdi) movq %r13,40(%rdi) movq %r14,48(%rdi) movq %r15,56(%rdi) leaq 64(%rdi,%rcx,1),%rdi cmpq 8+8(%rsp),%r8 jb .Lsqrx8x_reduction_loop .byte 0xf3,0xc3 .cfi_endproc .size bn_sqrx8x_internal,.-bn_sqrx8x_internal