kernel_optimize_test/arch/tile/lib/spinlock_64.c

/*
 * Copyright 2011 Tilera Corporation. All Rights Reserved.
 *
 *   This program is free software; you can redistribute it and/or
 *   modify it under the terms of the GNU General Public License
 *   as published by the Free Software Foundation, version 2.
 *
 *   This program is distributed in the hope that it will be useful, but
 *   WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
 *   NON INFRINGEMENT.  See the GNU General Public License for
 *   more details.
 */

#include <linux/spinlock.h>
#include <linux/module.h>
#include <asm/processor.h>

#include "spinlock_common.h"

/*
 * Read the spinlock value without allocating in our cache and without
 * causing an invalidation to another cpu with a copy of the cacheline.
 * This is important when we are spinning waiting for the lock.
 */
static inline u32 arch_spin_read_noalloc(void *lock)
{
	return atomic_cmpxchg((atomic_t *)lock, -1, -1);
}

/*
 * Wait until the high bits (current) match my ticket.
 * If we notice the overflow bit set on entry, we clear it.
 */
void arch_spin_lock_slow(arch_spinlock_t *lock, u32 my_ticket)
{
	if (unlikely(my_ticket & __ARCH_SPIN_NEXT_OVERFLOW)) {
		__insn_fetchand4(&lock->lock, ~__ARCH_SPIN_NEXT_OVERFLOW);
		my_ticket &= ~__ARCH_SPIN_NEXT_OVERFLOW;
	}

	for (;;) {
		u32 val = arch_spin_read_noalloc(lock);
		u32 delta = my_ticket - arch_spin_current(val);
		if (delta == 0)
			return;
		relax((128 / CYCLES_PER_RELAX_LOOP) * delta);
	}
}
EXPORT_SYMBOL(arch_spin_lock_slow);

/*
 * Check the lock to see if it is plausible, and try to get it with cmpxchg().
 */
int arch_spin_trylock(arch_spinlock_t *lock)
{
	u32 val = arch_spin_read_noalloc(lock);
	if (unlikely(arch_spin_current(val) != arch_spin_next(val)))
		return 0;
	return cmpxchg(&lock->lock, val, (val + 1) & ~__ARCH_SPIN_NEXT_OVERFLOW)
		== val;
}
EXPORT_SYMBOL(arch_spin_trylock);

void arch_spin_unlock_wait(arch_spinlock_t *lock)
{
	u32 iterations = 0;
	u32 val = READ_ONCE(lock->lock);
	u32 curr = arch_spin_current(val);

	/* Return immediately if unlocked. */
	if (arch_spin_next(val) == curr)
		return;

	/* Wait until the current locker has released the lock. */
	do {
		delay_backoff(iterations++);
	} while (arch_spin_current(READ_ONCE(lock->lock)) == curr);

	/*
	 * The TILE architecture doesn't do read speculation; therefore
	 * a control dependency guarantees a LOAD->{LOAD,STORE} order.
	 */
	barrier();
}
EXPORT_SYMBOL(arch_spin_unlock_wait);

/*
 * If the read lock fails due to a writer, we retry periodically
 * until the value is positive and we write our incremented reader count.
 */
void __read_lock_failed(arch_rwlock_t *rw)
{
	u32 val;
	int iterations = 0;
	do {
		delay_backoff(iterations++);
		val = __insn_fetchaddgez4(&rw->lock, 1);
	} while (unlikely(arch_write_val_locked(val)));
}
EXPORT_SYMBOL(__read_lock_failed);

/*
 * If we failed because there were readers, clear the "writer" bit
 * so we don't block additional readers.  Otherwise, there was another
 * writer anyway, so our "fetchor" made no difference.  Then wait,
 * issuing periodic fetchor instructions, till we get the lock.
 */
void __write_lock_failed(arch_rwlock_t *rw, u32 val)
{
	int iterations = 0;
	do {
		if (!arch_write_val_locked(val))
			val = __insn_fetchand4(&rw->lock, ~__WRITE_LOCK_BIT);
		delay_backoff(iterations++);
		val = __insn_fetchor4(&rw->lock, __WRITE_LOCK_BIT);
	} while (val != 0);
}
EXPORT_SYMBOL(__write_lock_failed);
arch/tile: finish enabling support for TILE-Gx 64-bit chip This support was partially present in the existing code (look for "__tilegx__" ifdefs) but with this change you can build a working kernel using the TILE-Gx toolchain and ARCH=tilegx. Most of these files are new, generally adding a foo_64.c file where previously there was just a foo_32.c file. The ARCH=tilegx directive redirects to arch/tile, not arch/tilegx, using the existing SRCARCH mechanism in the top-level Makefile. Changes to existing files: - <asm/bitops.h> and <asm/bitops_32.h> changed to factor the include of <asm-generic/bitops/non-atomic.h> in the common header. - <asm/compat.h> and arch/tile/kernel/compat.c changed to remove the "const" markers I had put on compat_sys_execve() when trying to match some recent similar changes to the non-compat execve. It turns out the compat version wasn't "upgraded" to use const. - <asm/opcode-tile_64.h> and <asm/opcode_constants_64.h> were previously included accidentally, with the 32-bit contents. Now they have the proper 64-bit contents. Finally, I had to hack the existing hacky drivers/input/input-compat.h to add yet another "#ifdef" for INPUT_COMPAT_TEST (same as x86_64). Signed-off-by: Chris Metcalf <cmetcalf@tilera.com> Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com> [drivers/input] 2011-05-05 02:38:26 +08:00			`/*`
			`* Copyright 2011 Tilera Corporation. All Rights Reserved.`
			`*`
			`* This program is free software; you can redistribute it and/or`
			`* modify it under the terms of the GNU General Public License`
			`* as published by the Free Software Foundation, version 2.`
			`*`
			`* This program is distributed in the hope that it will be useful, but`
			`* WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or`
			`* NON INFRINGEMENT. See the GNU General Public License for`
			`* more details.`
			`*/`

			`#include <linux/spinlock.h>`
			`#include <linux/module.h>`
			`#include <asm/processor.h>`

			`#include "spinlock_common.h"`

			`/*`
			`* Read the spinlock value without allocating in our cache and without`
			`* causing an invalidation to another cpu with a copy of the cacheline.`
			`* This is important when we are spinning waiting for the lock.`
			`*/`
			`static inline u32 arch_spin_read_noalloc(void *lock)`
			`{`
			`return atomic_cmpxchg((atomic_t *)lock, -1, -1);`
			`}`

			`/*`
			`* Wait until the high bits (current) match my ticket.`
			`* If we notice the overflow bit set on entry, we clear it.`
			`*/`
			`void arch_spin_lock_slow(arch_spinlock_t *lock, u32 my_ticket)`
			`{`
			`if (unlikely(my_ticket & __ARCH_SPIN_NEXT_OVERFLOW)) {`
			`__insn_fetchand4(&lock->lock, ~__ARCH_SPIN_NEXT_OVERFLOW);`
			`my_ticket &= ~__ARCH_SPIN_NEXT_OVERFLOW;`
			`}`

			`for (;;) {`
			`u32 val = arch_spin_read_noalloc(lock);`
			`u32 delta = my_ticket - arch_spin_current(val);`
			`if (delta == 0)`
			`return;`
			`relax((128 / CYCLES_PER_RELAX_LOOP) * delta);`
			`}`
			`}`
			`EXPORT_SYMBOL(arch_spin_lock_slow);`

			`/*`
			`* Check the lock to see if it is plausible, and try to get it with cmpxchg().`
			`*/`
			`int arch_spin_trylock(arch_spinlock_t *lock)`
			`{`
			`u32 val = arch_spin_read_noalloc(lock);`
			`if (unlikely(arch_spin_current(val) != arch_spin_next(val)))`
			`return 0;`
			`return cmpxchg(&lock->lock, val, (val + 1) & ~__ARCH_SPIN_NEXT_OVERFLOW)`
			`== val;`
			`}`
			`EXPORT_SYMBOL(arch_spin_trylock);`

			`void arch_spin_unlock_wait(arch_spinlock_t *lock)`
			`{`
			`u32 iterations = 0;`
tile: modify arch_spin_unlock_wait() semantics Rather than trying to wait until all possible lockers have unlocked the lock, we now only wait until the current locker (if any) has released the lock. The old code was correct, but the new code works more like the x86 code and thus hopefully is more appropriate under contention. See commit 78bff1c8684f ("x86/ticketlock: Fix spin_unlock_wait() livelock") for x86. Signed-off-by: Chris Metcalf <cmetcalf@ezchip.com> 2015-04-29 01:02:26 +08:00			`u32 val = READ_ONCE(lock->lock);`
			`u32 curr = arch_spin_current(val);`

			`/* Return immediately if unlocked. */`
			`if (arch_spin_next(val) == curr)`
			`return;`

			`/* Wait until the current locker has released the lock. */`
			`do {`
arch/tile: finish enabling support for TILE-Gx 64-bit chip This support was partially present in the existing code (look for "__tilegx__" ifdefs) but with this change you can build a working kernel using the TILE-Gx toolchain and ARCH=tilegx. Most of these files are new, generally adding a foo_64.c file where previously there was just a foo_32.c file. The ARCH=tilegx directive redirects to arch/tile, not arch/tilegx, using the existing SRCARCH mechanism in the top-level Makefile. Changes to existing files: - <asm/bitops.h> and <asm/bitops_32.h> changed to factor the include of <asm-generic/bitops/non-atomic.h> in the common header. - <asm/compat.h> and arch/tile/kernel/compat.c changed to remove the "const" markers I had put on compat_sys_execve() when trying to match some recent similar changes to the non-compat execve. It turns out the compat version wasn't "upgraded" to use const. - <asm/opcode-tile_64.h> and <asm/opcode_constants_64.h> were previously included accidentally, with the 32-bit contents. Now they have the proper 64-bit contents. Finally, I had to hack the existing hacky drivers/input/input-compat.h to add yet another "#ifdef" for INPUT_COMPAT_TEST (same as x86_64). Signed-off-by: Chris Metcalf <cmetcalf@tilera.com> Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com> [drivers/input] 2011-05-05 02:38:26 +08:00			`delay_backoff(iterations++);`
tile: modify arch_spin_unlock_wait() semantics Rather than trying to wait until all possible lockers have unlocked the lock, we now only wait until the current locker (if any) has released the lock. The old code was correct, but the new code works more like the x86 code and thus hopefully is more appropriate under contention. See commit 78bff1c8684f ("x86/ticketlock: Fix spin_unlock_wait() livelock") for x86. Signed-off-by: Chris Metcalf <cmetcalf@ezchip.com> 2015-04-29 01:02:26 +08:00			`} while (arch_spin_current(READ_ONCE(lock->lock)) == curr);`
locking/spinlock, arch: Update and fix spin_unlock_wait() implementations This patch updates/fixes all spin_unlock_wait() implementations. The update is in semantics; where it previously was only a control dependency, we now upgrade to a full load-acquire to match the store-release from the spin_unlock() we waited on. This ensures that when spin_unlock_wait() returns, we're guaranteed to observe the full critical section we waited on. This fixes a number of spin_unlock_wait() users that (not unreasonably) rely on this. I also fixed a number of ticket lock versions to only wait on the current lock holder, instead of for a full unlock, as this is sufficient. Furthermore; again for ticket locks; I added an smp_rmb() in between the initial ticket load and the spin loop testing the current value because I could not convince myself the address dependency is sufficient, esp. if the loads are of different sizes. I'm more than happy to remove this smp_rmb() again if people are certain the address dependency does indeed work as expected. Note: PPC32 will be fixed independently Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: chris@zankel.net Cc: cmetcalf@mellanox.com Cc: davem@davemloft.net Cc: dhowells@redhat.com Cc: james.hogan@imgtec.com Cc: jejb@parisc-linux.org Cc: linux@armlinux.org.uk Cc: mpe@ellerman.id.au Cc: ralf@linux-mips.org Cc: realmz6@gmail.com Cc: rkuo@codeaurora.org Cc: rth@twiddle.net Cc: schwidefsky@de.ibm.com Cc: tony.luck@intel.com Cc: vgupta@synopsys.com Cc: ysato@users.sourceforge.jp Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org> 2016-05-26 16:35:03 +08:00
			`/*`
			`* The TILE architecture doesn't do read speculation; therefore`
			`* a control dependency guarantees a LOAD->{LOAD,STORE} order.`
			`*/`
			`barrier();`
arch/tile: finish enabling support for TILE-Gx 64-bit chip This support was partially present in the existing code (look for "__tilegx__" ifdefs) but with this change you can build a working kernel using the TILE-Gx toolchain and ARCH=tilegx. Most of these files are new, generally adding a foo_64.c file where previously there was just a foo_32.c file. The ARCH=tilegx directive redirects to arch/tile, not arch/tilegx, using the existing SRCARCH mechanism in the top-level Makefile. Changes to existing files: - <asm/bitops.h> and <asm/bitops_32.h> changed to factor the include of <asm-generic/bitops/non-atomic.h> in the common header. - <asm/compat.h> and arch/tile/kernel/compat.c changed to remove the "const" markers I had put on compat_sys_execve() when trying to match some recent similar changes to the non-compat execve. It turns out the compat version wasn't "upgraded" to use const. - <asm/opcode-tile_64.h> and <asm/opcode_constants_64.h> were previously included accidentally, with the 32-bit contents. Now they have the proper 64-bit contents. Finally, I had to hack the existing hacky drivers/input/input-compat.h to add yet another "#ifdef" for INPUT_COMPAT_TEST (same as x86_64). Signed-off-by: Chris Metcalf <cmetcalf@tilera.com> Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com> [drivers/input] 2011-05-05 02:38:26 +08:00			`}`
			`EXPORT_SYMBOL(arch_spin_unlock_wait);`

			`/*`
			`* If the read lock fails due to a writer, we retry periodically`
			`* until the value is positive and we write our incremented reader count.`
			`*/`
			`void __read_lock_failed(arch_rwlock_t *rw)`
			`{`
			`u32 val;`
			`int iterations = 0;`
			`do {`
			`delay_backoff(iterations++);`
			`val = __insn_fetchaddgez4(&rw->lock, 1);`
			`} while (unlikely(arch_write_val_locked(val)));`
			`}`
			`EXPORT_SYMBOL(__read_lock_failed);`

			`/*`
			`* If we failed because there were readers, clear the "writer" bit`
			`* so we don't block additional readers. Otherwise, there was another`
			`* writer anyway, so our "fetchor" made no difference. Then wait,`
			`* issuing periodic fetchor instructions, till we get the lock.`
			`*/`
			`void __write_lock_failed(arch_rwlock_t *rw, u32 val)`
			`{`
			`int iterations = 0;`
			`do {`
			`if (!arch_write_val_locked(val))`
			`val = __insn_fetchand4(&rw->lock, ~__WRITE_LOCK_BIT);`
			`delay_backoff(iterations++);`
			`val = __insn_fetchor4(&rw->lock, __WRITE_LOCK_BIT);`
			`} while (val != 0);`
			`}`
			`EXPORT_SYMBOL(__write_lock_failed);`