i.MX6: Add some controls to enable SMP cache coherency in SMP mode

This commit is contained in:
Gregory Nutt
2016-11-26 17:46:20 -06:00
parent 3353d9280f
commit 546e352830
6 changed files with 109 additions and 58 deletions
+66
View File
@@ -0,0 +1,66 @@
/****************************************************************************
* arch/arm/src/armv7-a/arm_undefinedinsn.c
*
* Copyright (C) 2013, 2016 Gregory Nutt. All rights reserved.
* Author: Gregory Nutt <gnutt@nuttx.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* 3. Neither the name NuttX nor the names of its contributors may be
* used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
****************************************************************************/
/****************************************************************************
* Included Files
****************************************************************************/
#include <nuttx/config.h>
#include "up_arch.h"
#include "scu.h"
#ifdef CONFIG_SMP
/****************************************************************************
* Public Functions
****************************************************************************/
/****************************************************************************
* Name: arm_enable_smp
*
* Description:
* Enable the SCU and make certain that current CPU is participating in
* the SMP cache coherency.
*
****************************************************************************/
void arm_enable_smp(int cpu)
{
modifyreg32(SCU_CONFIG, 0, SCU_CONFIG_CPU_SMP(cpu));
modifyreg32(SCU_CTRL, 0, SCU_CTRL_ENABLE);
}
#endif
+16
View File
@@ -92,6 +92,7 @@
# define SCU_CONFIG_NCPUS(r) ((((uint32_t)(r) & SCU_CONFIG_NCPUS_MASK) >> SCU_CONFIG_NCPUS_SHIFT) + 1)
#define SCU_CONFIG_SMPCPUS_SHIFT 4 /* Processors that are in SMP or AMP mode */
#define SCU_CONFIG_SMPCPUS_MASK (15 << SCU_CONFIG_SMPCPUS_SHIFT)
# define SCU_CONFIG_CPU_SMP(n) (1 << ((n)+4))
# define SCU_CONFIG_CPU0_SMP (1 << 4)
# define SCU_CONFIG_CPU1_SMP (1 << 5)
# define SCU_CONFIG_CPU2_SMP (1 << 6)
@@ -156,4 +157,19 @@
#define SCU_SNSAC_PTIM_CPU(n) (1 << ((n)+4)) /* CPUn has non-secure access to private timers */
#define SCU_SNSAC_GTIM_CPU(n) (1 << ((n)+8)) /* CPUn has non-secure access to global timer */
/****************************************************************************
* Public Functions
****************************************************************************/
/****************************************************************************
* Name: arm_enable_smp
*
* Description:
* Enable the SCU and make certain that current CPU is participating in
* the SMP cache coherency.
*
****************************************************************************/
void arm_enable_smp(int cpu);
#endif /* __ARCH_ARM_SRC_ARMV7_A_SCU_H */
+1
View File
@@ -81,6 +81,7 @@ CMN_CSRCS += arm_unblocktask.c arm_undefinedinsn.c
ifeq ($(CONFIG_SMP),y)
CMN_CSRCS += arm_cpuindex.c arm_cpustart.c arm_cpupause.c arm_cpuidlestack.c
CMN_CSRCS += arm_scu.c
endif
ifeq ($(CONFIG_DEBUG_IRQ_INFO),y)
+7
View File
@@ -52,6 +52,7 @@
#include "sctlr.h"
#include "smp.h"
#include "fpu.h"
#include "scu.h"
#include "gic.h"
#include "cp15_cacheops.h"
@@ -266,6 +267,12 @@ void arm_cpu_boot(int cpu)
arm_fpuconfig();
#endif
#ifdef CONFIG_SMP
/* Enable SMP cache coherency for CPU0 */
arm_enable_smp(cpu);
#endif
/* Initialize the Generic Interrupt Controller (GIC) for CPUn (n != 0) */
arm_gic_initialize();
+7
View File
@@ -45,6 +45,7 @@
#include "up_internal.h"
#include "sctlr.h"
#include "scu.h"
#include "gic.h"
/****************************************************************************
@@ -108,6 +109,12 @@ void up_irqinitialize(void)
arm_gic0_initialize(); /* Initialization unique to CPU0 */
arm_gic_initialize(); /* Initialization common to all CPUs */
#ifdef CONFIG_SMP
/* Enable SMP cache coherency for CPU0 */
arm_enable_smp(0);
#endif
#ifdef CONFIG_ARCH_LOWVECTORS
/* If CONFIG_ARCH_LOWVECTORS is defined, then the vectors located at the
* beginning of the .text region must appear at address at the address
+12 -58
View File
@@ -107,7 +107,10 @@ Status
still hangs. These, I have determined are to other kinds of cache
coherency problems. Semaphores, message queues, etc. basically all
shared data must be made coherent. I am not sure how to do that. See
the SMP sectin below for more information.
the SMP section below for more information.
I also added some SCU controls that should enable cache consistency for SMP
CPUs, but I don't think I have that working right yet.
Platform Features
=================
@@ -508,70 +511,21 @@ Open Issues:
This will cause the interrupt handlers on other CPUs to spin until
leave_critical_section() is called. More verification is needed.
2. Cache Concurency. This is a complex problem. There is logic in place now to
clean CPU0 D-cache before starting a new CPU and for invalidating the D-Cache
when the new CPU is started. REVISIT: Seems that this should not be necessary.
If the Shareable bit set in the MMU mappings and my understanding is that this
should keep cache coherency at least within a cluster. I need to study more
how the inner and outer shareable attribute works to control cacheing
2. Cache Concurency. Cache coherency in SMP configurations is managed by the the
CPU. I don't think I have the set up correctly yet.
But there may are many, many more such cache coherency issues if I cannot find
a systematic way to manage cache coherency.
http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dht0008a/CJABEHDA.html
http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.den0024a/CEGDBEJE.html
Try:
--- mmu.h.orig 2016-05-20 13:09:34.773462000 -0600
+++ mmu.h 2016-05-20 13:03:13.261978100 -0600
@@ -572,8 +572,14 @@
#define MMU_ROMFLAGS (PMD_TYPE_SECT | PMD_SECT_AP_R1 | PMD_CACHEABLE | \
PMD_SECT_DOM(0))
-#define MMU_MEMFLAGS (PMD_TYPE_SECT | PMD_SECT_AP_RW1 | PMD_CACHEABLE | \
+#ifdef CONFIG_SMP
+
+# define MMU_MEMFLAGS (PMD_TYPE_SECT | PMD_SECT_AP_RW1 | PMD_CACHEABLE | \
+ PMD_SECT_S | PMD_SECT_DOM(0))
+#else
+# define MMU_MEMFLAGS (PMD_TYPE_SECT | PMD_SECT_AP_RW1 | PMD_CACHEABLE | \
PMD_SECT_DOM(0))
+#endif
#define MMU_IOFLAGS (PMD_TYPE_SECT | PMD_SECT_AP_RW1 | PMD_DEVICE | \
PMD_SECT_DOM(0) | PMD_SECT_XN)
#define MMU_STRONGLY_ORDERED (PMD_TYPE_SECT | PMD_SECT_AP_RW1 | \
Another alternative would be to place all spinlocks in a non-cachable memory
region. That is problem what will have to be done.
This is a VERIFIED PROBLEM: Cache inconsistencies appear to be the root
cause of all current SMP issues.
I have seen cases where CPU0 sets a spinlock=1 then
tries to lock the spinlock. CPU0 will wait in this case until CPU1 unlocks the
spinlock. Most of this happens correctly; I can see that CPU1 does set the
spinlock=0, but CPU0 never sees the change and spins forever. That is surely
a consequence of cache issues.
This was observed between up_cpu_pause() and arm_pause_handler() with the
spinlock "g_cpu_paused[cpu]". CPU1 correctly sets g_cpu_paused[cpu] to zero
but CPU0 never sees the change.
Caching probably interferes with spinlocks as they are currently implemented.
Waiting on a cached copy of the spinlock may result in a hang or a failure to
wait.
Should all spinlocks go into a special "strongly ordered" memory region?
No... that is not sufficient:
Currently cache inconsistencies appear to be the root cause of all current SMP
issues.
2016-11-26: With regard to SMP, the major issue is cache coherency. I added
some special build logic to move spinlock data into the separate, non-
cached section. That gives an improvement in performance but there are
still hangs. These, I have determined are to other kinds of cache
coherency problems. Semaphores, message queues, etc. basically all
shared data must be made coherent. I am not sure how to do that.
shared data must be made coherent.
I also added some SCU controls that should enable cache consistency for SMP
CPUs, but I don't think I have that working right yet.
Configurations
==============