kernel_optimize_test/arch/arm/mach-omap2/sleep44xx.S
Russell King 6ebbf2ce43 ARM: convert all "mov.* pc, reg" to "bx reg" for ARMv6+
ARMv6 and greater introduced a new instruction ("bx") which can be used
to return from function calls.  Recent CPUs perform better when the
"bx lr" instruction is used rather than the "mov pc, lr" instruction,
and this sequence is strongly recommended to be used by the ARM
architecture manual (section A.4.1.1).

We provide a new macro "ret" with all its variants for the condition
code which will resolve to the appropriate instruction.

Rather than doing this piecemeal, and miss some instances, change all
the "mov pc" instances to use the new macro, with the exception of
the "movs" instruction and the kprobes code.  This allows us to detect
the "mov pc, lr" case and fix it up - and also gives us the possibility
of deploying this for other registers depending on the CPU selection.

Reported-by: Will Deacon <will.deacon@arm.com>
Tested-by: Stephen Warren <swarren@nvidia.com> # Tegra Jetson TK1
Tested-by: Robert Jarzmik <robert.jarzmik@free.fr> # mioa701_bootresume.S
Tested-by: Andrew Lunn <andrew@lunn.ch> # Kirkwood
Tested-by: Shawn Guo <shawn.guo@freescale.com>
Tested-by: Tony Lindgren <tony@atomide.com> # OMAPs
Tested-by: Gregory CLEMENT <gregory.clement@free-electrons.com> # Armada XP, 375, 385
Acked-by: Sekhar Nori <nsekhar@ti.com> # DaVinci
Acked-by: Christoffer Dall <christoffer.dall@linaro.org> # kvm/hyp
Acked-by: Haojian Zhuang <haojian.zhuang@gmail.com> # PXA3xx
Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> # Xen
Tested-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de> # ARMv7M
Tested-by: Simon Horman <horms+renesas@verge.net.au> # Shmobile
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2014-07-18 12:29:04 +01:00

388 lines
10 KiB
ArmAsm

/*
* OMAP44xx sleep code.
*
* Copyright (C) 2011 Texas Instruments, Inc.
* Santosh Shilimkar <santosh.shilimkar@ti.com>
*
* This program is free software,you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/smp_scu.h>
#include <asm/memory.h>
#include <asm/hardware/cache-l2x0.h>
#include "omap-secure.h"
#include "common.h"
#include "omap44xx.h"
#include "omap4-sar-layout.h"
#if defined(CONFIG_SMP) && defined(CONFIG_PM)
.macro DO_SMC
dsb
smc #0
dsb
.endm
ppa_zero_params:
.word 0x0
ppa_por_params:
.word 1, 0
#ifdef CONFIG_ARCH_OMAP4
/*
* =============================
* == CPU suspend finisher ==
* =============================
*
* void omap4_finish_suspend(unsigned long cpu_state)
*
* This function code saves the CPU context and performs the CPU
* power down sequence. Calling WFI effectively changes the CPU
* power domains states to the desired target power state.
*
* @cpu_state : contains context save state (r0)
* 0 - No context lost
* 1 - CPUx L1 and logic lost: MPUSS CSWR
* 2 - CPUx L1 and logic lost + GIC lost: MPUSS OSWR
* 3 - CPUx L1 and logic lost + GIC + L2 lost: MPUSS OFF
* @return: This function never returns for CPU OFF and DORMANT power states.
* Post WFI, CPU transitions to DORMANT or OFF power state and on wake-up
* from this follows a full CPU reset path via ROM code to CPU restore code.
* The restore function pointer is stored at CPUx_WAKEUP_NS_PA_ADDR_OFFSET.
* It returns to the caller for CPU INACTIVE and ON power states or in case
* CPU failed to transition to targeted OFF/DORMANT state.
*
* omap4_finish_suspend() calls v7_flush_dcache_all() which doesn't save
* stack frame and it expects the caller to take care of it. Hence the entire
* stack frame is saved to avoid possible stack corruption.
*/
ENTRY(omap4_finish_suspend)
stmfd sp!, {r4-r12, lr}
cmp r0, #0x0
beq do_WFI @ No lowpower state, jump to WFI
/*
* Flush all data from the L1 data cache before disabling
* SCTLR.C bit.
*/
bl omap4_get_sar_ram_base
ldr r9, [r0, #OMAP_TYPE_OFFSET]
cmp r9, #0x1 @ Check for HS device
bne skip_secure_l1_clean
mov r0, #SCU_PM_NORMAL
mov r1, #0xFF @ clean seucre L1
stmfd r13!, {r4-r12, r14}
ldr r12, =OMAP4_MON_SCU_PWR_INDEX
DO_SMC
ldmfd r13!, {r4-r12, r14}
skip_secure_l1_clean:
bl v7_flush_dcache_all
/*
* Clear the SCTLR.C bit to prevent further data cache
* allocation. Clearing SCTLR.C would make all the data accesses
* strongly ordered and would not hit the cache.
*/
mrc p15, 0, r0, c1, c0, 0
bic r0, r0, #(1 << 2) @ Disable the C bit
mcr p15, 0, r0, c1, c0, 0
isb
/*
* Invalidate L1 data cache. Even though only invalidate is
* necessary exported flush API is used here. Doing clean
* on already clean cache would be almost NOP.
*/
bl v7_flush_dcache_all
/*
* Switch the CPU from Symmetric Multiprocessing (SMP) mode
* to AsymmetricMultiprocessing (AMP) mode by programming
* the SCU power status to DORMANT or OFF mode.
* This enables the CPU to be taken out of coherency by
* preventing the CPU from receiving cache, TLB, or BTB
* maintenance operations broadcast by other CPUs in the cluster.
*/
bl omap4_get_sar_ram_base
mov r8, r0
ldr r9, [r8, #OMAP_TYPE_OFFSET]
cmp r9, #0x1 @ Check for HS device
bne scu_gp_set
mrc p15, 0, r0, c0, c0, 5 @ Read MPIDR
ands r0, r0, #0x0f
ldreq r0, [r8, #SCU_OFFSET0]
ldrne r0, [r8, #SCU_OFFSET1]
mov r1, #0x00
stmfd r13!, {r4-r12, r14}
ldr r12, =OMAP4_MON_SCU_PWR_INDEX
DO_SMC
ldmfd r13!, {r4-r12, r14}
b skip_scu_gp_set
scu_gp_set:
mrc p15, 0, r0, c0, c0, 5 @ Read MPIDR
ands r0, r0, #0x0f
ldreq r1, [r8, #SCU_OFFSET0]
ldrne r1, [r8, #SCU_OFFSET1]
bl omap4_get_scu_base
bl scu_power_mode
skip_scu_gp_set:
mrc p15, 0, r0, c1, c1, 2 @ Read NSACR data
tst r0, #(1 << 18)
mrcne p15, 0, r0, c1, c0, 1
bicne r0, r0, #(1 << 6) @ Disable SMP bit
mcrne p15, 0, r0, c1, c0, 1
isb
dsb
#ifdef CONFIG_CACHE_L2X0
/*
* Clean and invalidate the L2 cache.
* Common cache-l2x0.c functions can't be used here since it
* uses spinlocks. We are out of coherency here with data cache
* disabled. The spinlock implementation uses exclusive load/store
* instruction which can fail without data cache being enabled.
* OMAP4 hardware doesn't support exclusive monitor which can
* overcome exclusive access issue. Because of this, CPU can
* lead to deadlock.
*/
bl omap4_get_sar_ram_base
mov r8, r0
mrc p15, 0, r5, c0, c0, 5 @ Read MPIDR
ands r5, r5, #0x0f
ldreq r0, [r8, #L2X0_SAVE_OFFSET0] @ Retrieve L2 state from SAR
ldrne r0, [r8, #L2X0_SAVE_OFFSET1] @ memory.
cmp r0, #3
bne do_WFI
#ifdef CONFIG_PL310_ERRATA_727915
mov r0, #0x03
mov r12, #OMAP4_MON_L2X0_DBG_CTRL_INDEX
DO_SMC
#endif
bl omap4_get_l2cache_base
mov r2, r0
ldr r0, =0xffff
str r0, [r2, #L2X0_CLEAN_INV_WAY]
wait:
ldr r0, [r2, #L2X0_CLEAN_INV_WAY]
ldr r1, =0xffff
ands r0, r0, r1
bne wait
#ifdef CONFIG_PL310_ERRATA_727915
mov r0, #0x00
mov r12, #OMAP4_MON_L2X0_DBG_CTRL_INDEX
DO_SMC
#endif
l2x_sync:
bl omap4_get_l2cache_base
mov r2, r0
mov r0, #0x0
str r0, [r2, #L2X0_CACHE_SYNC]
sync:
ldr r0, [r2, #L2X0_CACHE_SYNC]
ands r0, r0, #0x1
bne sync
#endif
do_WFI:
bl omap_do_wfi
/*
* CPU is here when it failed to enter OFF/DORMANT or
* no low power state was attempted.
*/
mrc p15, 0, r0, c1, c0, 0
tst r0, #(1 << 2) @ Check C bit enabled?
orreq r0, r0, #(1 << 2) @ Enable the C bit
mcreq p15, 0, r0, c1, c0, 0
isb
/*
* Ensure the CPU power state is set to NORMAL in
* SCU power state so that CPU is back in coherency.
* In non-coherent mode CPU can lock-up and lead to
* system deadlock.
*/
mrc p15, 0, r0, c1, c0, 1
tst r0, #(1 << 6) @ Check SMP bit enabled?
orreq r0, r0, #(1 << 6)
mcreq p15, 0, r0, c1, c0, 1
isb
bl omap4_get_sar_ram_base
mov r8, r0
ldr r9, [r8, #OMAP_TYPE_OFFSET]
cmp r9, #0x1 @ Check for HS device
bne scu_gp_clear
mov r0, #SCU_PM_NORMAL
mov r1, #0x00
stmfd r13!, {r4-r12, r14}
ldr r12, =OMAP4_MON_SCU_PWR_INDEX
DO_SMC
ldmfd r13!, {r4-r12, r14}
b skip_scu_gp_clear
scu_gp_clear:
bl omap4_get_scu_base
mov r1, #SCU_PM_NORMAL
bl scu_power_mode
skip_scu_gp_clear:
isb
dsb
ldmfd sp!, {r4-r12, pc}
ENDPROC(omap4_finish_suspend)
/*
* ============================
* == CPU resume entry point ==
* ============================
*
* void omap4_cpu_resume(void)
*
* ROM code jumps to this function while waking up from CPU
* OFF or DORMANT state. Physical address of the function is
* stored in the SAR RAM while entering to OFF or DORMANT mode.
* The restore function pointer is stored at CPUx_WAKEUP_NS_PA_ADDR_OFFSET.
*/
ENTRY(omap4_cpu_resume)
/*
* Configure ACTRL and enable NS SMP bit access on CPU1 on HS device.
* OMAP44XX EMU/HS devices - CPU0 SMP bit access is enabled in PPA
* init and for CPU1, a secure PPA API provided. CPU0 must be ON
* while executing NS_SMP API on CPU1 and PPA version must be 1.4.0+.
* OMAP443X GP devices- SMP bit isn't accessible.
* OMAP446X GP devices - SMP bit access is enabled on both CPUs.
*/
ldr r8, =OMAP44XX_SAR_RAM_BASE
ldr r9, [r8, #OMAP_TYPE_OFFSET]
cmp r9, #0x1 @ Skip if GP device
bne skip_ns_smp_enable
mrc p15, 0, r0, c0, c0, 5
ands r0, r0, #0x0f
beq skip_ns_smp_enable
ppa_actrl_retry:
mov r0, #OMAP4_PPA_CPU_ACTRL_SMP_INDEX
adr r3, ppa_zero_params @ Pointer to parameters
mov r1, #0x0 @ Process ID
mov r2, #0x4 @ Flag
mov r6, #0xff
mov r12, #0x00 @ Secure Service ID
DO_SMC
cmp r0, #0x0 @ API returns 0 on success.
beq enable_smp_bit
b ppa_actrl_retry
enable_smp_bit:
mrc p15, 0, r0, c1, c0, 1
tst r0, #(1 << 6) @ Check SMP bit enabled?
orreq r0, r0, #(1 << 6)
mcreq p15, 0, r0, c1, c0, 1
isb
skip_ns_smp_enable:
#ifdef CONFIG_CACHE_L2X0
/*
* Restore the L2 AUXCTRL and enable the L2 cache.
* OMAP4_MON_L2X0_AUXCTRL_INDEX = Program the L2X0 AUXCTRL
* OMAP4_MON_L2X0_CTRL_INDEX = Enable the L2 using L2X0 CTRL
* register r0 contains value to be programmed.
* L2 cache is already invalidate by ROM code as part
* of MPUSS OFF wakeup path.
*/
ldr r2, =OMAP44XX_L2CACHE_BASE
ldr r0, [r2, #L2X0_CTRL]
and r0, #0x0f
cmp r0, #1
beq skip_l2en @ Skip if already enabled
ldr r3, =OMAP44XX_SAR_RAM_BASE
ldr r1, [r3, #OMAP_TYPE_OFFSET]
cmp r1, #0x1 @ Check for HS device
bne set_gp_por
ldr r0, =OMAP4_PPA_L2_POR_INDEX
ldr r1, =OMAP44XX_SAR_RAM_BASE
ldr r4, [r1, #L2X0_PREFETCH_CTRL_OFFSET]
adr r3, ppa_por_params
str r4, [r3, #0x04]
mov r1, #0x0 @ Process ID
mov r2, #0x4 @ Flag
mov r6, #0xff
mov r12, #0x00 @ Secure Service ID
DO_SMC
b set_aux_ctrl
set_gp_por:
ldr r1, =OMAP44XX_SAR_RAM_BASE
ldr r0, [r1, #L2X0_PREFETCH_CTRL_OFFSET]
ldr r12, =OMAP4_MON_L2X0_PREFETCH_INDEX @ Setup L2 PREFETCH
DO_SMC
set_aux_ctrl:
ldr r1, =OMAP44XX_SAR_RAM_BASE
ldr r0, [r1, #L2X0_AUXCTRL_OFFSET]
ldr r12, =OMAP4_MON_L2X0_AUXCTRL_INDEX @ Setup L2 AUXCTRL
DO_SMC
mov r0, #0x1
ldr r12, =OMAP4_MON_L2X0_CTRL_INDEX @ Enable L2 cache
DO_SMC
skip_l2en:
#endif
b cpu_resume @ Jump to generic resume
ENDPROC(omap4_cpu_resume)
#endif /* CONFIG_ARCH_OMAP4 */
#endif /* defined(CONFIG_SMP) && defined(CONFIG_PM) */
#ifndef CONFIG_OMAP4_ERRATA_I688
ENTRY(omap_bus_sync)
ret lr
ENDPROC(omap_bus_sync)
#endif
ENTRY(omap_do_wfi)
stmfd sp!, {lr}
/* Drain interconnect write buffers. */
bl omap_bus_sync
/*
* Execute an ISB instruction to ensure that all of the
* CP15 register changes have been committed.
*/
isb
/*
* Execute a barrier instruction to ensure that all cache,
* TLB and branch predictor maintenance operations issued
* by any CPU in the cluster have completed.
*/
dsb
dmb
/*
* Execute a WFI instruction and wait until the
* STANDBYWFI output is asserted to indicate that the
* CPU is in idle and low power state. CPU can specualatively
* prefetch the instructions so add NOPs after WFI. Sixteen
* NOPs as per Cortex-A9 pipeline.
*/
wfi @ Wait For Interrupt
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
ldmfd sp!, {pc}
ENDPROC(omap_do_wfi)