forked from luck/tmp_suning_uos_patched
perf bench futex: Cache align the worker struct
It popped up in perf testing that the worker consumes some amount of CPU. It boils down to the increment of `ops` which causes cache line bouncing between the individual threads. This patch aligns the struct by 256 bytes to ensure that not a cache line is shared among CPUs. 128 byte is the x86 worst case and grep says that L1_CACHE_SHIFT is set to 8 on s390. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Cc: Davidlohr Bueso <dbueso@suse.de> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/20161016190803.3392-1-bigeasy@linutronix.de Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
899735066a
commit
34b753007d
|
@ -39,12 +39,15 @@ static unsigned int threads_starting;
|
|||
static struct stats throughput_stats;
|
||||
static pthread_cond_t thread_parent, thread_worker;
|
||||
|
||||
#define SMP_CACHE_BYTES 256
|
||||
#define __cacheline_aligned __attribute__ ((aligned (SMP_CACHE_BYTES)))
|
||||
|
||||
struct worker {
|
||||
int tid;
|
||||
u_int32_t *futex;
|
||||
pthread_t thread;
|
||||
unsigned long ops;
|
||||
};
|
||||
} __cacheline_aligned;
|
||||
|
||||
static const struct option options[] = {
|
||||
OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
|
||||
|
|
Loading…
Reference in New Issue
Block a user