From d295f11a3a2ce9e1299ff454c1f630598a9f473c Mon Sep 17 00:00:00 2001 From: Masayuki Ishikawa Date: Wed, 31 Jan 2018 11:23:22 +0900 Subject: [PATCH] SMP: Introduce a new global IRQ clearing logic and tasklist protection. The previous implementation of clearing global IRQ in sched_addreadytorun() and sched_removereadytorun() was done too early. As a result, nxsem_post() would have a chance to enter the critical section even nxsem_wait() is still not in blocked state. This patch moves clearing global IRQ controls from sched_addreadytorun() and sched_removereadytorun() to sched_resumescheduler() to ensure that nxsem_post() can enter the critical section correctly. For this change, sched_resumescheduler.c is always necessary for SMP configuration. In addition, by this change, task_exit() had to be modified so that it calls sched_resumescheduler() because it calls sched_removescheduler() inside the function, otherwise it will cause a deadlock. However, I encountered another DEBUGASSERT() in sched_cpu_select() during HTTP streaming aging test on lc823450-xgevk. Actually sched_cpu_select() accesses the g_assignedtasks which might be changed by another CPU. Similarly, other tasklists might be modified simultaneously if both CPUs are executing scheduling logic. To avoid this, I introduced tasklist protetion APIs. With these changes, SMP kernel stability has been much improved. Signed-off-by: Masayuki Ishikawa --- include/nuttx/sched.h | 2 +- sched/sched/Make.defs | 6 ++ sched/sched/sched.h | 8 ++ sched/sched/sched_addblocked.c | 12 +++ sched/sched/sched_addreadytorun.c | 14 ++- sched/sched/sched_mergepending.c | 15 ++- sched/sched/sched_mergeprioritized.c | 19 +++- sched/sched/sched_removereadytorun.c | 16 +++- sched/sched/sched_resumescheduler.c | 42 ++++++++- sched/sched/sched_tasklistlock.c | 133 +++++++++++++++++++++++++++ sched/task/task_exit.c | 8 ++ 11 files changed, 259 insertions(+), 16 deletions(-) create mode 100644 sched/sched/sched_tasklistlock.c diff --git a/include/nuttx/sched.h b/include/nuttx/sched.h index 10cbcbefc88..3810962c8b3 100644 --- a/include/nuttx/sched.h +++ b/include/nuttx/sched.h @@ -883,7 +883,7 @@ void task_vforkabort(FAR struct task_tcb_s *child, int errcode); ********************************************************************************/ #if CONFIG_RR_INTERVAL > 0 || defined(CONFIG_SCHED_SPORADIC) || \ - defined(CONFIG_SCHED_INSTRUMENTATION) + defined(CONFIG_SCHED_INSTRUMENTATION) || defined(CONFIG_SMP) void sched_resume_scheduler(FAR struct tcb_s *tcb); #else # define sched_resume_scheduler(tcb) diff --git a/sched/sched/Make.defs b/sched/sched/Make.defs index 0626d9e39b0..c9088453c07 100644 --- a/sched/sched/Make.defs +++ b/sched/sched/Make.defs @@ -77,6 +77,8 @@ else ifeq ($(CONFIG_SCHED_SPORADIC),y) CSRCS += sched_resumescheduler.c else ifeq ($(CONFIG_SCHED_INSTRUMENTATION),y) CSRCS += sched_resumescheduler.c +else ifeq ($(CONFIG_SMP),y) +CSRCS += sched_resumescheduler.c endif ifeq ($(CONFIG_SCHED_CPULOAD),y) @@ -96,6 +98,10 @@ ifeq ($(CONFIG_SCHED_INSTRUMENTATION_BUFFER),y) CSRCS += sched_note.c endif +ifeq ($(CONFIG_SMP),y) +CSRCS += sched_tasklistlock.c +endif + # Include sched build support DEPPATH += --dep-path sched diff --git a/sched/sched/sched.h b/sched/sched/sched.h index e05b88692c6..63c081de856 100644 --- a/sched/sched/sched.h +++ b/sched/sched/sched.h @@ -364,6 +364,10 @@ extern volatile spinlock_t g_cpu_schedlock SP_SECTION; extern volatile spinlock_t g_cpu_locksetlock SP_SECTION; extern volatile cpu_set_t g_cpu_lockset SP_SECTION; +/* Used to lock tasklist to prevent from concurrent access */ + +extern volatile spinlock_t g_cpu_tasklistlock SP_SECTION; + #endif /* CONFIG_SMP */ /**************************************************************************** @@ -425,6 +429,10 @@ void sched_sporadic_lowpriority(FAR struct tcb_s *tcb); #ifdef CONFIG_SMP int sched_cpu_select(cpu_set_t affinity); int sched_cpu_pause(FAR struct tcb_s *tcb); + +irqstate_t sched_tasklist_lock(void); +void sched_tasklist_unlock(irqstate_t lock); + # define sched_islocked(tcb) spin_islocked(&g_cpu_schedlock) #else # define sched_cpu_select(a) (0) diff --git a/sched/sched/sched_addblocked.c b/sched/sched/sched_addblocked.c index 34303681c5c..151faedb692 100644 --- a/sched/sched/sched_addblocked.c +++ b/sched/sched/sched_addblocked.c @@ -77,6 +77,12 @@ void sched_addblocked(FAR struct tcb_s *btcb, tstate_t task_state) DEBUGASSERT(task_state >= FIRST_BLOCKED_STATE && task_state <= LAST_BLOCKED_STATE); +#ifdef CONFIG_SMP + /* Lock the tasklists before accessing */ + + irqstate_t lock = sched_tasklist_lock(); +#endif + /* Add the TCB to the blocked task list associated with this state. */ tasklist = TLIST_BLOCKED(task_state); @@ -96,6 +102,12 @@ void sched_addblocked(FAR struct tcb_s *btcb, tstate_t task_state) dq_addlast((FAR dq_entry_t *)btcb, tasklist); } +#ifdef CONFIG_SMP + /* Unlock the tasklists */ + + sched_tasklist_unlock(lock); +#endif + /* Make sure the TCB's state corresponds to the list */ btcb->task_state = task_state; diff --git a/sched/sched/sched_addreadytorun.c b/sched/sched/sched_addreadytorun.c index dcd07ac9a6b..880f4b85bb0 100644 --- a/sched/sched/sched_addreadytorun.c +++ b/sched/sched/sched_addreadytorun.c @@ -173,6 +173,10 @@ bool sched_addreadytorun(FAR struct tcb_s *btcb) int cpu; int me; + /* Lock the tasklists before accessing */ + + irqstate_t lock = sched_tasklist_lock(); + /* Check if the blocked TCB is locked to this CPU */ if ((btcb->flags & TCB_FLAG_CPU_LOCKED) != 0) @@ -339,10 +343,9 @@ bool sched_addreadytorun(FAR struct tcb_s *btcb) else if (g_cpu_nestcount[me] <= 0) { - /* Release our hold on the IRQ lock. */ - - spin_clrbit(&g_cpu_irqset, cpu, &g_cpu_irqsetlock, - &g_cpu_irqlock); + /* Do nothing here + * NOTE: spin_clrbit() will be done in sched_resumescheduler() + */ } /* Sanity check. g_cpu_netcount should be greater than zero @@ -426,6 +429,9 @@ bool sched_addreadytorun(FAR struct tcb_s *btcb) } } + /* Unlock the tasklists */ + + sched_tasklist_unlock(lock); return doswitch; } diff --git a/sched/sched/sched_mergepending.c b/sched/sched/sched_mergepending.c index 39968e6e116..6e258c7554a 100644 --- a/sched/sched/sched_mergepending.c +++ b/sched/sched/sched_mergepending.c @@ -199,6 +199,10 @@ bool sched_mergepending(void) int cpu; int me; + /* Lock the tasklist before accessing */ + + irqstate_t lock = sched_tasklist_lock(); + /* Remove and process every TCB in the g_pendingtasks list. * * Do nothing if (1) pre-emption is still disabled (by any CPU), or (2) if @@ -215,7 +219,7 @@ bool sched_mergepending(void) { /* The pending task list is empty */ - return ret; + goto errout_with_lock; } cpu = sched_cpu_select(ALL_CPUS /* ptcb->affinity */); @@ -259,7 +263,7 @@ bool sched_mergepending(void) * pending task list. */ - return ret; + goto errout_with_lock; } /* Set up for the next time through the loop */ @@ -269,7 +273,7 @@ bool sched_mergepending(void) { /* The pending task list is empty */ - return ret; + goto errout_with_lock; } cpu = sched_cpu_select(ALL_CPUS /* ptcb->affinity */); @@ -285,6 +289,11 @@ bool sched_mergepending(void) TSTATE_TASK_READYTORUN); } +errout_with_lock: + + /* Unlock the tasklist */ + + sched_tasklist_unlock(lock); return ret; } #endif /* CONFIG_SMP */ diff --git a/sched/sched/sched_mergeprioritized.c b/sched/sched/sched_mergeprioritized.c index 90ef9dfbad9..7ff70b14df8 100644 --- a/sched/sched/sched_mergeprioritized.c +++ b/sched/sched/sched_mergeprioritized.c @@ -83,6 +83,12 @@ void sched_mergeprioritized(FAR dq_queue_t *list1, FAR dq_queue_t *list2, FAR struct tcb_s *tcb2; FAR struct tcb_s *tmp; +#ifdef CONFIG_SMP + /* Lock the tasklists before accessing */ + + irqstate_t lock = sched_tasklist_lock(); +#endif + DEBUGASSERT(list1 != NULL && list2 != NULL); /* Get a private copy of list1, clearing list1. We do this early so that @@ -99,7 +105,7 @@ void sched_mergeprioritized(FAR dq_queue_t *list1, FAR dq_queue_t *list2, { /* Special case.. list1 is empty. There is nothing to be done. */ - return; + goto ret_with_lock; } /* Now the TCBs are no longer accessible and we can change the state on @@ -122,7 +128,7 @@ void sched_mergeprioritized(FAR dq_queue_t *list1, FAR dq_queue_t *list2, /* Special case.. list2 is empty. Move list1 to list2. */ dq_move(&clone, list2); - return; + goto ret_with_lock; } /* Now loop until all entries from list1 have been merged into list2. tcb1 @@ -171,4 +177,13 @@ void sched_mergeprioritized(FAR dq_queue_t *list1, FAR dq_queue_t *list2, } } while (tcb1 != NULL); + +ret_with_lock: + +#ifdef CONFIG_SMP + /* Unlock the tasklists */ + + sched_tasklist_unlock(lock); +#endif + return; } diff --git a/sched/sched/sched_removereadytorun.c b/sched/sched/sched_removereadytorun.c index 231310f62db..f5e93b881c8 100644 --- a/sched/sched/sched_removereadytorun.c +++ b/sched/sched/sched_removereadytorun.c @@ -42,6 +42,7 @@ #include #include #include +#include #include "irq/irq.h" #include "sched/sched.h" @@ -138,6 +139,10 @@ bool sched_removereadytorun(FAR struct tcb_s *rtcb) bool doswitch = false; int cpu; + /* Lock the tasklists before accessing */ + + irqstate_t lock = sched_tasklist_lock(); + /* Which CPU (if any) is the task running on? Which task list holds the * TCB? */ @@ -283,10 +288,9 @@ bool sched_removereadytorun(FAR struct tcb_s *rtcb) else if (g_cpu_nestcount[me] <= 0) { - /* Release our hold on the IRQ lock. */ - - spin_clrbit(&g_cpu_irqset, cpu, &g_cpu_irqsetlock, - &g_cpu_irqlock); + /* Do nothing here + * NOTE: spin_clrbit() will be done in sched_resumescheduler() + */ } /* Sanity check. g_cpu_netcount should be greater than zero @@ -330,6 +334,10 @@ bool sched_removereadytorun(FAR struct tcb_s *rtcb) /* Since the TCB is no longer in any list, it is now invalid */ rtcb->task_state = TSTATE_TASK_INVALID; + + /* Unlock the tasklists */ + + sched_tasklist_unlock(lock); return doswitch; } #endif /* CONFIG_SMP */ diff --git a/sched/sched/sched_resumescheduler.c b/sched/sched/sched_resumescheduler.c index 96097d4fcd6..08f104a5d83 100644 --- a/sched/sched/sched_resumescheduler.c +++ b/sched/sched/sched_resumescheduler.c @@ -45,10 +45,11 @@ #include #include +#include "irq/irq.h" #include "sched/sched.h" #if CONFIG_RR_INTERVAL > 0 || defined(CONFIG_SCHED_SPORADIC) || \ - defined(CONFIG_SCHED_INSTRUMENTATION) + defined(CONFIG_SCHED_INSTRUMENTATION) || defined(CONFIG_SMP) /**************************************************************************** * Public Functions @@ -101,7 +102,44 @@ void sched_resume_scheduler(FAR struct tcb_s *tcb) sched_note_resume(tcb); #endif +#ifdef CONFIG_SMP + /* NOTE: The following logic for adjusting global IRQ controls were + * derived from sched_addreadytorun() and sched_removedreadytorun() + * Here, we only handles clearing logic to defer unlocking IRQ lock + * followed by context switching. + */ + + int me = this_cpu(); + + /* Adjust global IRQ controls. If irqcount is greater than zero, + * then this task/this CPU holds the IRQ lock + */ + + if (tcb->irqcount > 0) + { + /* Do notihing here + * NOTE: spin_setbit() is done in sched_addreadytorun() + * and sched_removereadytorun() + */ + } + + /* No.. This CPU will be relinquishing the lock. But this works + * differently if we are performing a context switch from an + * interrupt handler and the interrupt handler has established + * a critical section. We can detect this case when + * g_cpu_nestcount[me] > 0. + */ + + else if (g_cpu_nestcount[me] <= 0) + { + /* Release our hold on the IRQ lock. */ + + spin_clrbit(&g_cpu_irqset, me, &g_cpu_irqsetlock, + &g_cpu_irqlock); + } +#endif /* CONFIG_SMP */ + } #endif /* CONFIG_RR_INTERVAL > 0 || CONFIG_SCHED_SPORADIC || \ - * CONFIG_SCHED_INSTRUMENTATION */ + * CONFIG_SCHED_INSTRUMENTATION || CONFIG_SMP */ diff --git a/sched/sched/sched_tasklistlock.c b/sched/sched/sched_tasklistlock.c new file mode 100644 index 00000000000..d8946d4b740 --- /dev/null +++ b/sched/sched/sched_tasklistlock.c @@ -0,0 +1,133 @@ +/**************************************************************************** + * sched/sched/sched_tasklistlock.c + * + * Copyright (C) 2018 Sony Corporation. All rights reserved. + * Author: Masayuki Ishikawa + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name NuttX nor the names of its contributors may be + * used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + ****************************************************************************/ + +/**************************************************************************** + * Included Files + ****************************************************************************/ + +#include +#include + +#include +#include + +#include "sched/sched.h" + +/**************************************************************************** + * Public Data + ****************************************************************************/ + +/* Splinlock to protect the tasklists */ + +static volatile spinlock_t g_tasklist_lock SP_SECTION = SP_UNLOCKED; + +/* Handles nested calls */ + +static volatile uint8_t g_tasklist_lock_count[CONFIG_SMP_NCPUS]; + +/**************************************************************************** + * Public Functions + ****************************************************************************/ + +/**************************************************************************** + * Name: sched_tasklist_lock() + * + * Description: + * Disable local interrupts and take the global spinlock (g_tasklist_lock) + * if the call counter (g_tasklist_lock_count[cpu]) equals to 0. Then the + * counter on the CPU is incremented to allow nested call. + * + * NOTE: This API is used to protect tasklists in the scheduler. So do not + * use this API for other purposes. + * + * Returned Value: + * An opaque, architecture-specific value that represents the state of + * the interrupts prior to the call to sched_tasklist_lock(); + ****************************************************************************/ + +irqstate_t sched_tasklist_lock(void) +{ + int me; + irqstate_t ret; + + ret = up_irq_save(); + me = this_cpu(); + + if (0 == g_tasklist_lock_count[me]) + { + spin_lock(&g_tasklist_lock); + } + + g_tasklist_lock_count[me]++; + ASSERT(0 != g_tasklist_lock_count[me]); + return ret; +} + +/**************************************************************************** + * Name: sched_tasklist_unlock() + * + * Description: + * Decrement the call counter (g_tasklist_lock_count[cpu]) and if it + * decrements to zero then release the spinlock (g_tasklist_lock) and + * restore the interrupt state as it was prior to the previous call to + * sched_tasklist_lock(). + * + * NOTE: This API is used to protect tasklists in the scheduler. So do not + * use this API for other purposes. + * + * Input Parameters: + * lock - The architecture-specific value that represents the state of + * the interrupts prior to the call to sched_tasklist_lock(). + * + * Returned Value: + * None + ****************************************************************************/ + +void sched_tasklist_unlock(irqstate_t lock) +{ + int me; + + me = this_cpu(); + + ASSERT(0 < g_tasklist_lock_count[me]); + g_tasklist_lock_count[me]--; + + if (0 == g_tasklist_lock_count[me]) + { + spin_unlock(&g_tasklist_lock); + } + + up_irq_restore(lock); +} diff --git a/sched/task/task_exit.c b/sched/task/task_exit.c index 6a04073f294..f42ef1421ee 100644 --- a/sched/task/task_exit.c +++ b/sched/task/task_exit.c @@ -97,6 +97,14 @@ int task_exit(void) (void)sched_removereadytorun(dtcb); rtcb = this_task(); +#ifdef CONFIG_SMP + /* Because clearing the global IRQ control in sched_removereadytorun() + * was moved to sched_resume_scheduler(). So call the API here. + */ + + sched_resume_scheduler(rtcb); +#endif + /* We are now in a bad state -- the head of the ready to run task list * does not correspond to the thread that is running. Disabling pre- * emption on this TCB and marking the new ready-to-run task as not