kernel_optimize_test/lib/atomic64_test.c
Linus Torvalds 5cb52b5e16 Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar:
 "Kernel side changes:

   - Intel Knights Landing support.  (Harish Chegondi)

   - Intel Broadwell-EP uncore PMU support.  (Kan Liang)

   - Core code improvements.  (Peter Zijlstra.)

   - Event filter, LBR and PEBS fixes.  (Stephane Eranian)

   - Enable cycles:pp on Intel Atom.  (Stephane Eranian)

   - Add cycles:ppp support for Skylake.  (Andi Kleen)

   - Various x86 NMI overhead optimizations.  (Andi Kleen)

   - Intel PT enhancements.  (Takao Indoh)

   - AMD cache events fix.  (Vince Weaver)

  Tons of tooling changes:

   - Show random perf tool tips in the 'perf report' bottom line
     (Namhyung Kim)

   - perf report now defaults to --group if the perf.data file has
     grouped events, try it with:

      # perf record -e '{cycles,instructions}' -a sleep 1
      [ perf record: Woken up 1 times to write data ]
      [ perf record: Captured and wrote 1.093 MB perf.data (1247 samples) ]
      # perf report
      # Samples: 1K of event 'anon group { cycles, instructions }'
      # Event count (approx.): 1955219195
      #
      #       Overhead  Command     Shared Object      Symbol

         2.86%   0.22%  swapper     [kernel.kallsyms]  [k] intel_idle
         1.05%   0.33%  firefox     libxul.so          [.] js::SetObjectElement
         1.05%   0.00%  kworker/0:3 [kernel.kallsyms]  [k] gen6_ring_get_seqno
         0.88%   0.17%  chrome      chrome             [.] 0x0000000000ee27ab
         0.65%   0.86%  firefox     libxul.so          [.] js::ValueToId<(js::AllowGC)1>
         0.64%   0.23%  JS Helper   libxul.so          [.] js::SplayTree<js::jit::LiveRange*, js::jit::LiveRange>::splay
         0.62%   1.27%  firefox     libxul.so          [.] js::GetIterator
         0.61%   1.74%  firefox     libxul.so          [.] js::NativeSetProperty
         0.61%   0.31%  firefox     libxul.so          [.] js::SetPropertyByDefining

   - Introduce the 'perf stat record/report' workflow:

     Generate perf.data files from 'perf stat', to tap into the
     scripting capabilities perf has instead of defining a 'perf stat'
     specific scripting support to calculate event ratios, etc.

     Simple example:

        $ perf stat record -e cycles usleep 1

         Performance counter stats for 'usleep 1':

               1,134,996      cycles

             0.000670644 seconds time elapsed

        $ perf stat report

         Performance counter stats for '/home/acme/bin/perf stat record -e cycles usleep 1':

               1,134,996      cycles

             0.000670644 seconds time elapsed

        $

     It generates PERF_RECORD_ userspace records to store the details:

        $ perf report -D | grep PERF_RECORD
        0xf0 [0x28]: PERF_RECORD_THREAD_MAP nr: 1 thread: 27637
        0x118 [0x12]: PERF_RECORD_CPU_MAP nr: 1 cpu: 65535
        0x12a [0x40]: PERF_RECORD_STAT_CONFIG
        0x16a [0x30]: PERF_RECORD_STAT
        -1 -1 0x19a [0x40]: PERF_RECORD_MMAP -1/0: [0xffffffff81000000(0x1f000000) @ 0xffffffff81000000]: x [kernel.kallsyms]_text
        0x1da [0x18]: PERF_RECORD_STAT_ROUND
        [acme@ssdandy linux]$

     An effort was made to make perf.data files generated like this to
     not generate cryptic messages when processed by older tools.

     The 'perf script' bits need rebasing, will go up later.

   - Make command line options always available, even when they depend
     on some feature being enabled, warning the user about use of such
     options (Wang Nan)

   - Support hw breakpoint events (mem:0xAddress) in the default output
     mode in 'perf script' (Wang Nan)

   - Fixes and improvements for supporting annotating ARM binaries,
     support ARM call and jump instructions, more work needed to have
     arch specific stuff separated into tools/perf/arch/*/annotate/
     (Russell King)

   - Add initial 'perf config' command, for now just with a --list
     command to the contents of the configuration file in use and a
     basic man page describing its format, commands for doing edits and
     detailed documentation are being reviewed and proof-read.  (Taeung
     Song)

   - Allows BPF scriptlets specify arguments to be fetched using DWARF
     info, using a prologue generated at compile/build time (He Kuang,
     Wang Nan)

   - Allow attaching BPF scriptlets to module symbols (Wang Nan)

   - Allow attaching BPF scriptlets to userspace code using uprobe (Wang
     Nan)

   - BPF programs now can specify 'perf probe' tunables via its section
     name, separating key=val values using semicolons (Wang Nan)

     Testing some of these new BPF features:

        Use case: get callchains when receiving SSL packets, filter then in the
                  kernel, at arbitrary place.

        # cat ssl.bpf.c
        #define SEC(NAME) __attribute__((section(NAME), used))

        struct pt_regs;

        SEC("func=__inet_lookup_established hnum")
        int func(struct pt_regs *ctx, int err, unsigned short port)
        {
                return err == 0 && port == 443;
        }

        char _license[] SEC("license") = "GPL";
        int  _version   SEC("version") = LINUX_VERSION_CODE;
        #
        # perf record -a -g -e ssl.bpf.c
        ^C[ perf record: Woken up 1 times to write data ]
        [ perf record: Captured and wrote 0.787 MB perf.data (3 samples) ]
        # perf script | head -30
        swapper     0 [000] 58783.268118: perf_bpf_probe:func: (ffffffff816a0f60) hnum=0x1bb
           8a0f61 __inet_lookup_established (/lib/modules/4.3.0+/build/vmlinux)
           896def ip_rcv_finish (/lib/modules/4.3.0+/build/vmlinux)
           8976c2 ip_rcv (/lib/modules/4.3.0+/build/vmlinux)
           855eba __netif_receive_skb_core (/lib/modules/4.3.0+/build/vmlinux)
           8565d8 __netif_receive_skb (/lib/modules/4.3.0+/build/vmlinux)
           8572a8 process_backlog (/lib/modules/4.3.0+/build/vmlinux)
           856b11 net_rx_action (/lib/modules/4.3.0+/build/vmlinux)
           2a284b __do_softirq (/lib/modules/4.3.0+/build/vmlinux)
           2a2ba3 irq_exit (/lib/modules/4.3.0+/build/vmlinux)
           96b7a4 do_IRQ (/lib/modules/4.3.0+/build/vmlinux)
           969807 ret_from_intr (/lib/modules/4.3.0+/build/vmlinux)
           2dede5 cpu_startup_entry (/lib/modules/4.3.0+/build/vmlinux)
           95d5bc rest_init (/lib/modules/4.3.0+/build/vmlinux)
          1163ffa start_kernel ([kernel.vmlinux].init.text)
          11634d7 x86_64_start_reservations ([kernel.vmlinux].init.text)
          1163623 x86_64_start_kernel ([kernel.vmlinux].init.text)

        qemu-system-x86  9178 [003] 58785.792417: perf_bpf_probe:func: (ffffffff816a0f60) hnum=0x1bb
           8a0f61 __inet_lookup_established (/lib/modules/4.3.0+/build/vmlinux)
           896def ip_rcv_finish (/lib/modules/4.3.0+/build/vmlinux)
           8976c2 ip_rcv (/lib/modules/4.3.0+/build/vmlinux)
           855eba __netif_receive_skb_core (/lib/modules/4.3.0+/build/vmlinux)
           8565d8 __netif_receive_skb (/lib/modules/4.3.0+/build/vmlinux)
           856660 netif_receive_skb_internal (/lib/modules/4.3.0+/build/vmlinux)
           8566ec netif_receive_skb_sk (/lib/modules/4.3.0+/build/vmlinux)
             430a br_handle_frame_finish ([bridge])
             48bc br_handle_frame ([bridge])
           855f44 __netif_receive_skb_core (/lib/modules/4.3.0+/build/vmlinux)
           8565d8 __netif_receive_skb (/lib/modules/4.3.0+/build/vmlinux)
        #

   - Use 'perf probe' various options to list functions, see what
     variables can be collected at any given point, experiment first
     collecting without a filter, then filter, use it together with
     'perf trace', 'perf top', with or without callchains, if it
     explodes, please tell us!

   - Introduce a new callchain mode: "folded", that will list per line
     representations of all callchains for a give histogram entry,
     facilitating 'perf report' output processing by other tools, such
     as Brendan Gregg's flamegraph tools (Namhyung Kim)

     E.g:

        # perf report | grep -v ^# | head
           18.37%     0.00%  swapper  [kernel.kallsyms]   [k] cpu_startup_entry
                           |
                           ---cpu_startup_entry
                              |
                              |--12.07%--start_secondary
                              |
                               --6.30%--rest_init
                                         start_kernel
                                         x86_64_start_reservations
                                         x86_64_start_kernel
         #

     Becomes, in "folded" mode:

        # perf report -g folded | grep -v ^# | head -5
            18.37%     0.00%  swapper [kernel.kallsyms]   [k] cpu_startup_entry
          12.07% cpu_startup_entry;start_secondary
           6.30% cpu_startup_entry;rest_init;start_kernel;x86_64_start_reservations;x86_64_start_kernel
            16.90%     0.00%  swapper [kernel.kallsyms]   [k] call_cpuidle
          11.23% call_cpuidle;cpu_startup_entry;start_secondary
           5.67% call_cpuidle;cpu_startup_entry;rest_init;start_kernel;x86_64_start_reservations;x86_64_start_kernel
            16.90%     0.00%  swapper [kernel.kallsyms]   [k] cpuidle_enter
          11.23% cpuidle_enter;call_cpuidle;cpu_startup_entry;start_secondary
           5.67% cpuidle_enter;call_cpuidle;cpu_startup_entry;rest_init;start_kernel;x86_64_start_reservations;x86_64_start_kernel
            15.12%     0.00%  swapper [kernel.kallsyms]   [k] cpuidle_enter_state
         #

     The user can also select one of "count", "period" or "percent" as
     the first column.

  ... and lots of infrastructure enhancements, plus fixes and other
  changes, features I failed to list - see the shortlog and the git log
  for details"

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (271 commits)
  perf evlist: Add --trace-fields option to show trace fields
  perf record: Store data mmaps for dwarf unwind
  perf libdw: Check for mmaps also in MAP__VARIABLE tree
  perf unwind: Check for mmaps also in MAP__VARIABLE tree
  perf unwind: Use find_map function in access_dso_mem
  perf evlist: Remove perf_evlist__(enable|disable)_event functions
  perf evlist: Make perf_evlist__open() open evsels with their cpus and threads (like perf record does)
  perf report: Show random usage tip on the help line
  perf hists: Export a couple of hist functions
  perf diff: Use perf_hpp__register_sort_field interface
  perf tools: Add overhead/overhead_children keys defaults via string
  perf tools: Remove list entry from struct sort_entry
  perf tools: Include all tools/lib directory for tags/cscope/TAGS targets
  perf script: Align event name properly
  perf tools: Add missing headers in perf's MANIFEST
  perf tools: Do not show trace command if it's not compiled in
  perf report: Change default to use event group view
  perf top: Decay periods in callchains
  tools lib: Move bitmap.[ch] from tools/perf/ to tools/{lib,include}/
  tools lib: Sync tools/lib/find_bit.c with the kernel
  ...
2016-01-11 14:39:17 -08:00

238 lines
5.4 KiB
C

/*
* Testsuite for atomic64_t functions
*
* Copyright © 2010 Luca Barbieri
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/init.h>
#include <linux/bug.h>
#include <linux/kernel.h>
#include <linux/atomic.h>
#ifdef CONFIG_X86
#include <asm/processor.h> /* for boot_cpu_has below */
#endif
#define TEST(bit, op, c_op, val) \
do { \
atomic##bit##_set(&v, v0); \
r = v0; \
atomic##bit##_##op(val, &v); \
r c_op val; \
WARN(atomic##bit##_read(&v) != r, "%Lx != %Lx\n", \
(unsigned long long)atomic##bit##_read(&v), \
(unsigned long long)r); \
} while (0)
/*
* Test for a atomic operation family,
* @test should be a macro accepting parameters (bit, op, ...)
*/
#define FAMILY_TEST(test, bit, op, args...) \
do { \
test(bit, op, ##args); \
test(bit, op##_acquire, ##args); \
test(bit, op##_release, ##args); \
test(bit, op##_relaxed, ##args); \
} while (0)
#define TEST_RETURN(bit, op, c_op, val) \
do { \
atomic##bit##_set(&v, v0); \
r = v0; \
r c_op val; \
BUG_ON(atomic##bit##_##op(val, &v) != r); \
BUG_ON(atomic##bit##_read(&v) != r); \
} while (0)
#define RETURN_FAMILY_TEST(bit, op, c_op, val) \
do { \
FAMILY_TEST(TEST_RETURN, bit, op, c_op, val); \
} while (0)
#define TEST_ARGS(bit, op, init, ret, expect, args...) \
do { \
atomic##bit##_set(&v, init); \
BUG_ON(atomic##bit##_##op(&v, ##args) != ret); \
BUG_ON(atomic##bit##_read(&v) != expect); \
} while (0)
#define XCHG_FAMILY_TEST(bit, init, new) \
do { \
FAMILY_TEST(TEST_ARGS, bit, xchg, init, init, new, new); \
} while (0)
#define CMPXCHG_FAMILY_TEST(bit, init, new, wrong) \
do { \
FAMILY_TEST(TEST_ARGS, bit, cmpxchg, \
init, init, new, init, new); \
FAMILY_TEST(TEST_ARGS, bit, cmpxchg, \
init, init, init, wrong, new); \
} while (0)
#define INC_RETURN_FAMILY_TEST(bit, i) \
do { \
FAMILY_TEST(TEST_ARGS, bit, inc_return, \
i, (i) + one, (i) + one); \
} while (0)
#define DEC_RETURN_FAMILY_TEST(bit, i) \
do { \
FAMILY_TEST(TEST_ARGS, bit, dec_return, \
i, (i) - one, (i) - one); \
} while (0)
static __init void test_atomic(void)
{
int v0 = 0xaaa31337;
int v1 = 0xdeadbeef;
int onestwos = 0x11112222;
int one = 1;
atomic_t v;
int r;
TEST(, add, +=, onestwos);
TEST(, add, +=, -one);
TEST(, sub, -=, onestwos);
TEST(, sub, -=, -one);
TEST(, or, |=, v1);
TEST(, and, &=, v1);
TEST(, xor, ^=, v1);
TEST(, andnot, &= ~, v1);
RETURN_FAMILY_TEST(, add_return, +=, onestwos);
RETURN_FAMILY_TEST(, add_return, +=, -one);
RETURN_FAMILY_TEST(, sub_return, -=, onestwos);
RETURN_FAMILY_TEST(, sub_return, -=, -one);
INC_RETURN_FAMILY_TEST(, v0);
DEC_RETURN_FAMILY_TEST(, v0);
XCHG_FAMILY_TEST(, v0, v1);
CMPXCHG_FAMILY_TEST(, v0, v1, onestwos);
}
#define INIT(c) do { atomic64_set(&v, c); r = c; } while (0)
static __init void test_atomic64(void)
{
long long v0 = 0xaaa31337c001d00dLL;
long long v1 = 0xdeadbeefdeafcafeLL;
long long v2 = 0xfaceabadf00df001LL;
long long onestwos = 0x1111111122222222LL;
long long one = 1LL;
atomic64_t v = ATOMIC64_INIT(v0);
long long r = v0;
BUG_ON(v.counter != r);
atomic64_set(&v, v1);
r = v1;
BUG_ON(v.counter != r);
BUG_ON(atomic64_read(&v) != r);
TEST(64, add, +=, onestwos);
TEST(64, add, +=, -one);
TEST(64, sub, -=, onestwos);
TEST(64, sub, -=, -one);
TEST(64, or, |=, v1);
TEST(64, and, &=, v1);
TEST(64, xor, ^=, v1);
TEST(64, andnot, &= ~, v1);
RETURN_FAMILY_TEST(64, add_return, +=, onestwos);
RETURN_FAMILY_TEST(64, add_return, +=, -one);
RETURN_FAMILY_TEST(64, sub_return, -=, onestwos);
RETURN_FAMILY_TEST(64, sub_return, -=, -one);
INIT(v0);
atomic64_inc(&v);
r += one;
BUG_ON(v.counter != r);
INIT(v0);
atomic64_dec(&v);
r -= one;
BUG_ON(v.counter != r);
INC_RETURN_FAMILY_TEST(64, v0);
DEC_RETURN_FAMILY_TEST(64, v0);
XCHG_FAMILY_TEST(64, v0, v1);
CMPXCHG_FAMILY_TEST(64, v0, v1, v2);
INIT(v0);
BUG_ON(atomic64_add_unless(&v, one, v0));
BUG_ON(v.counter != r);
INIT(v0);
BUG_ON(!atomic64_add_unless(&v, one, v1));
r += one;
BUG_ON(v.counter != r);
#ifdef CONFIG_ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
INIT(onestwos);
BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1));
r -= one;
BUG_ON(v.counter != r);
INIT(0);
BUG_ON(atomic64_dec_if_positive(&v) != -one);
BUG_ON(v.counter != r);
INIT(-one);
BUG_ON(atomic64_dec_if_positive(&v) != (-one - one));
BUG_ON(v.counter != r);
#else
#warning Please implement atomic64_dec_if_positive for your architecture and select the above Kconfig symbol
#endif
INIT(onestwos);
BUG_ON(!atomic64_inc_not_zero(&v));
r += one;
BUG_ON(v.counter != r);
INIT(0);
BUG_ON(atomic64_inc_not_zero(&v));
BUG_ON(v.counter != r);
INIT(-one);
BUG_ON(!atomic64_inc_not_zero(&v));
r += one;
BUG_ON(v.counter != r);
}
static __init int test_atomics(void)
{
test_atomic();
test_atomic64();
#ifdef CONFIG_X86
pr_info("passed for %s platform %s CX8 and %s SSE\n",
#ifdef CONFIG_X86_64
"x86-64",
#elif defined(CONFIG_X86_CMPXCHG64)
"i586+",
#else
"i386+",
#endif
boot_cpu_has(X86_FEATURE_CX8) ? "with" : "without",
boot_cpu_has(X86_FEATURE_XMM) ? "with" : "without");
#else
pr_info("passed\n");
#endif
return 0;
}
core_initcall(test_atomics);