diff --git a/arch/arm/src/armv7-a/arm_scu.c b/arch/arm/src/armv7-a/arm_scu.c new file mode 100644 index 00000000000..24fa156e3c7 --- /dev/null +++ b/arch/arm/src/armv7-a/arm_scu.c @@ -0,0 +1,66 @@ +/**************************************************************************** + * arch/arm/src/armv7-a/arm_undefinedinsn.c + * + * Copyright (C) 2013, 2016 Gregory Nutt. All rights reserved. + * Author: Gregory Nutt + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name NuttX nor the names of its contributors may be + * used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + ****************************************************************************/ + +/**************************************************************************** + * Included Files + ****************************************************************************/ + +#include + +#include "up_arch.h" +#include "scu.h" + +#ifdef CONFIG_SMP + +/**************************************************************************** + * Public Functions + ****************************************************************************/ + +/**************************************************************************** + * Name: arm_enable_smp + * + * Description: + * Enable the SCU and make certain that current CPU is participating in + * the SMP cache coherency. + * + ****************************************************************************/ + +void arm_enable_smp(int cpu) +{ + modifyreg32(SCU_CONFIG, 0, SCU_CONFIG_CPU_SMP(cpu)); + modifyreg32(SCU_CTRL, 0, SCU_CTRL_ENABLE); +} + +#endif diff --git a/arch/arm/src/armv7-a/scu.h b/arch/arm/src/armv7-a/scu.h index cd44da392d8..a51f09cec48 100644 --- a/arch/arm/src/armv7-a/scu.h +++ b/arch/arm/src/armv7-a/scu.h @@ -92,6 +92,7 @@ # define SCU_CONFIG_NCPUS(r) ((((uint32_t)(r) & SCU_CONFIG_NCPUS_MASK) >> SCU_CONFIG_NCPUS_SHIFT) + 1) #define SCU_CONFIG_SMPCPUS_SHIFT 4 /* Processors that are in SMP or AMP mode */ #define SCU_CONFIG_SMPCPUS_MASK (15 << SCU_CONFIG_SMPCPUS_SHIFT) +# define SCU_CONFIG_CPU_SMP(n) (1 << ((n)+4)) # define SCU_CONFIG_CPU0_SMP (1 << 4) # define SCU_CONFIG_CPU1_SMP (1 << 5) # define SCU_CONFIG_CPU2_SMP (1 << 6) @@ -156,4 +157,19 @@ #define SCU_SNSAC_PTIM_CPU(n) (1 << ((n)+4)) /* CPUn has non-secure access to private timers */ #define SCU_SNSAC_GTIM_CPU(n) (1 << ((n)+8)) /* CPUn has non-secure access to global timer */ +/**************************************************************************** + * Public Functions + ****************************************************************************/ + +/**************************************************************************** + * Name: arm_enable_smp + * + * Description: + * Enable the SCU and make certain that current CPU is participating in + * the SMP cache coherency. + * + ****************************************************************************/ + +void arm_enable_smp(int cpu); + #endif /* __ARCH_ARM_SRC_ARMV7_A_SCU_H */ diff --git a/arch/arm/src/imx6/Make.defs b/arch/arm/src/imx6/Make.defs index 4870aa1d1de..9986ac23d9d 100644 --- a/arch/arm/src/imx6/Make.defs +++ b/arch/arm/src/imx6/Make.defs @@ -81,6 +81,7 @@ CMN_CSRCS += arm_unblocktask.c arm_undefinedinsn.c ifeq ($(CONFIG_SMP),y) CMN_CSRCS += arm_cpuindex.c arm_cpustart.c arm_cpupause.c arm_cpuidlestack.c +CMN_CSRCS += arm_scu.c endif ifeq ($(CONFIG_DEBUG_IRQ_INFO),y) diff --git a/arch/arm/src/imx6/imx_cpuboot.c b/arch/arm/src/imx6/imx_cpuboot.c index 50b23b5c1d4..db087575797 100644 --- a/arch/arm/src/imx6/imx_cpuboot.c +++ b/arch/arm/src/imx6/imx_cpuboot.c @@ -52,6 +52,7 @@ #include "sctlr.h" #include "smp.h" #include "fpu.h" +#include "scu.h" #include "gic.h" #include "cp15_cacheops.h" @@ -266,6 +267,12 @@ void arm_cpu_boot(int cpu) arm_fpuconfig(); #endif +#ifdef CONFIG_SMP + /* Enable SMP cache coherency for CPU0 */ + + arm_enable_smp(cpu); +#endif + /* Initialize the Generic Interrupt Controller (GIC) for CPUn (n != 0) */ arm_gic_initialize(); diff --git a/arch/arm/src/imx6/imx_irq.c b/arch/arm/src/imx6/imx_irq.c index e00a8e9527d..ecf2f197ac8 100644 --- a/arch/arm/src/imx6/imx_irq.c +++ b/arch/arm/src/imx6/imx_irq.c @@ -45,6 +45,7 @@ #include "up_internal.h" #include "sctlr.h" +#include "scu.h" #include "gic.h" /**************************************************************************** @@ -108,6 +109,12 @@ void up_irqinitialize(void) arm_gic0_initialize(); /* Initialization unique to CPU0 */ arm_gic_initialize(); /* Initialization common to all CPUs */ +#ifdef CONFIG_SMP + /* Enable SMP cache coherency for CPU0 */ + + arm_enable_smp(0); +#endif + #ifdef CONFIG_ARCH_LOWVECTORS /* If CONFIG_ARCH_LOWVECTORS is defined, then the vectors located at the * beginning of the .text region must appear at address at the address diff --git a/configs/sabre-6quad/README.txt b/configs/sabre-6quad/README.txt index afd83b14a9d..3a7988d0296 100644 --- a/configs/sabre-6quad/README.txt +++ b/configs/sabre-6quad/README.txt @@ -107,7 +107,10 @@ Status still hangs. These, I have determined are to other kinds of cache coherency problems. Semaphores, message queues, etc. basically all shared data must be made coherent. I am not sure how to do that. See - the SMP sectin below for more information. + the SMP section below for more information. + + I also added some SCU controls that should enable cache consistency for SMP + CPUs, but I don't think I have that working right yet. Platform Features ================= @@ -508,70 +511,21 @@ Open Issues: This will cause the interrupt handlers on other CPUs to spin until leave_critical_section() is called. More verification is needed. -2. Cache Concurency. This is a complex problem. There is logic in place now to - clean CPU0 D-cache before starting a new CPU and for invalidating the D-Cache - when the new CPU is started. REVISIT: Seems that this should not be necessary. - If the Shareable bit set in the MMU mappings and my understanding is that this - should keep cache coherency at least within a cluster. I need to study more - how the inner and outer shareable attribute works to control cacheing +2. Cache Concurency. Cache coherency in SMP configurations is managed by the the + CPU. I don't think I have the set up correctly yet. - But there may are many, many more such cache coherency issues if I cannot find - a systematic way to manage cache coherency. - - http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dht0008a/CJABEHDA.html - http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.den0024a/CEGDBEJE.html - - Try: - - --- mmu.h.orig 2016-05-20 13:09:34.773462000 -0600 - +++ mmu.h 2016-05-20 13:03:13.261978100 -0600 - @@ -572,8 +572,14 @@ - - #define MMU_ROMFLAGS (PMD_TYPE_SECT | PMD_SECT_AP_R1 | PMD_CACHEABLE | \ - PMD_SECT_DOM(0)) - -#define MMU_MEMFLAGS (PMD_TYPE_SECT | PMD_SECT_AP_RW1 | PMD_CACHEABLE | \ - +#ifdef CONFIG_SMP - + - +# define MMU_MEMFLAGS (PMD_TYPE_SECT | PMD_SECT_AP_RW1 | PMD_CACHEABLE | \ - + PMD_SECT_S | PMD_SECT_DOM(0)) - +#else - +# define MMU_MEMFLAGS (PMD_TYPE_SECT | PMD_SECT_AP_RW1 | PMD_CACHEABLE | \ - PMD_SECT_DOM(0)) - +#endif - #define MMU_IOFLAGS (PMD_TYPE_SECT | PMD_SECT_AP_RW1 | PMD_DEVICE | \ - PMD_SECT_DOM(0) | PMD_SECT_XN) - #define MMU_STRONGLY_ORDERED (PMD_TYPE_SECT | PMD_SECT_AP_RW1 | \ - - Another alternative would be to place all spinlocks in a non-cachable memory - region. That is problem what will have to be done. - - This is a VERIFIED PROBLEM: Cache inconsistencies appear to be the root - cause of all current SMP issues. - - I have seen cases where CPU0 sets a spinlock=1 then - tries to lock the spinlock. CPU0 will wait in this case until CPU1 unlocks the - spinlock. Most of this happens correctly; I can see that CPU1 does set the - spinlock=0, but CPU0 never sees the change and spins forever. That is surely - a consequence of cache issues. - - This was observed between up_cpu_pause() and arm_pause_handler() with the - spinlock "g_cpu_paused[cpu]". CPU1 correctly sets g_cpu_paused[cpu] to zero - but CPU0 never sees the change. - - Caching probably interferes with spinlocks as they are currently implemented. - Waiting on a cached copy of the spinlock may result in a hang or a failure to - wait. - - Should all spinlocks go into a special "strongly ordered" memory region? - - No... that is not sufficient: + Currently cache inconsistencies appear to be the root cause of all current SMP + issues. 2016-11-26: With regard to SMP, the major issue is cache coherency. I added some special build logic to move spinlock data into the separate, non- cached section. That gives an improvement in performance but there are still hangs. These, I have determined are to other kinds of cache coherency problems. Semaphores, message queues, etc. basically all - shared data must be made coherent. I am not sure how to do that. + shared data must be made coherent. + + I also added some SCU controls that should enable cache consistency for SMP + CPUs, but I don't think I have that working right yet. Configurations ==============