From e2df52390ace192e8fe632ecb83c2fa44373259a Mon Sep 17 00:00:00 2001 From: ligd Date: Wed, 10 Aug 2022 17:22:48 +0800 Subject: [PATCH] SMP: fix crash when switch to new task which is still running Situation: Assume we have 2 cpus, and busy run task0. CPU0 CPU1 task0 -> task1 task2 -> task0 1. remove task0 form runninglist 2. take task1 as new tcb 3. add task0 to blocklist 4. clear spinlock 4.1 remove task2 form runninglist 4.2 take task0 as new tcb 4.3 add task2 to blocklist 4.4 use svc ISR swith to task0 4.5 crash 5. use svc ISR swith to task1 Fix: Move clear spinlock to the end of svc ISR Signed-off-by: ligd --- arch/arm/src/arm/arm_syscall.c | 9 ++- arch/arm/src/armv6-m/arm_doirq.c | 11 +++- arch/arm/src/armv7-a/arm_doirq.c | 9 ++- arch/arm/src/armv7-a/arm_syscall.c | 8 ++- arch/arm/src/armv7-m/arm_doirq.c | 11 +++- arch/arm/src/armv7-r/arm_doirq.c | 9 ++- arch/arm/src/armv7-r/arm_syscall.c | 8 ++- arch/arm/src/armv8-m/arm_doirq.c | 11 +++- arch/arm64/src/common/arm64_doirq.c | 6 +- arch/arm64/src/common/arm64_syscall.c | 7 +++ arch/ceva/src/common/up_doirq.c | 6 +- arch/risc-v/src/common/riscv_doirq.c | 19 ++++-- .../common/supervisor/riscv_perform_syscall.c | 16 ++++- arch/sim/src/sim/up_blocktask.c | 4 ++ arch/sim/src/sim/up_exit.c | 4 ++ arch/sim/src/sim/up_releasepending.c | 4 ++ arch/sim/src/sim/up_reprioritizertr.c | 4 ++ arch/sim/src/sim/up_smpsignal.c | 4 ++ arch/sim/src/sim/up_unblocktask.c | 8 +++ arch/xtensa/src/common/xtensa_irqdispatch.c | 9 ++- include/nuttx/irq.h | 20 +++++++ sched/irq/irq_csection.c | 60 ++++++++++++++++++- sched/sched/sched_resumescheduler.c | 40 ------------- 23 files changed, 224 insertions(+), 63 deletions(-) diff --git a/arch/arm/src/arm/arm_syscall.c b/arch/arm/src/arm/arm_syscall.c index d12cf751fc4..81828e0bfc6 100644 --- a/arch/arm/src/arm/arm_syscall.c +++ b/arch/arm/src/arm/arm_syscall.c @@ -149,11 +149,18 @@ uint32_t *arm_syscall(uint32_t *regs) } #endif + /* Restore the cpu lock */ + + if (regs != CURRENT_REGS) + { + restore_critical_section(); + regs = (uint32_t *)CURRENT_REGS; + } + /* Set CURRENT_REGS to NULL to indicate that we are no longer in an * interrupt handler. */ - regs = (uint32_t *)CURRENT_REGS; CURRENT_REGS = NULL; /* Return the last value of curent_regs. This supports context switches diff --git a/arch/arm/src/armv6-m/arm_doirq.c b/arch/arm/src/armv6-m/arm_doirq.c index ce1967c62f5..977976f0434 100644 --- a/arch/arm/src/armv6-m/arm_doirq.c +++ b/arch/arm/src/armv6-m/arm_doirq.c @@ -77,9 +77,16 @@ uint32_t *arm_doirq(int irq, uint32_t *regs) if (regs == NULL) { - /* Update the return regs and restore the CURRENT_REGS to NULL. */ + /* Restore the cpu lock */ + + if (regs != CURRENT_REGS) + { + restore_critical_section(); + regs = (uint32_t *)CURRENT_REGS; + } + + /* Update the CURRENT_REGS to NULL. */ - regs = (uint32_t *)CURRENT_REGS; CURRENT_REGS = NULL; } #endif diff --git a/arch/arm/src/armv7-a/arm_doirq.c b/arch/arm/src/armv7-a/arm_doirq.c index e8c7e1c66c9..17b5759e22a 100644 --- a/arch/arm/src/armv7-a/arm_doirq.c +++ b/arch/arm/src/armv7-a/arm_doirq.c @@ -89,11 +89,18 @@ uint32_t *arm_doirq(int irq, uint32_t *regs) } #endif + /* Restore the cpu lock */ + + if (regs != CURRENT_REGS) + { + restore_critical_section(); + regs = (uint32_t *)CURRENT_REGS; + } + /* Set CURRENT_REGS to NULL to indicate that we are no longer in an * interrupt handler. */ - regs = (uint32_t *)CURRENT_REGS; CURRENT_REGS = NULL; #endif diff --git a/arch/arm/src/armv7-a/arm_syscall.c b/arch/arm/src/armv7-a/arm_syscall.c index b621d6044fc..454f2301318 100644 --- a/arch/arm/src/armv7-a/arm_syscall.c +++ b/arch/arm/src/armv7-a/arm_syscall.c @@ -555,7 +555,13 @@ uint32_t *arm_syscall(uint32_t *regs) } #endif - regs = (uint32_t *)CURRENT_REGS; + /* Restore the cpu lock */ + + if (regs != CURRENT_REGS) + { + restore_critical_section(); + regs = (uint32_t *)CURRENT_REGS; + } /* Report what happened */ diff --git a/arch/arm/src/armv7-m/arm_doirq.c b/arch/arm/src/armv7-m/arm_doirq.c index 1aeda39faaa..358aefad65e 100644 --- a/arch/arm/src/armv7-m/arm_doirq.c +++ b/arch/arm/src/armv7-m/arm_doirq.c @@ -77,9 +77,16 @@ uint32_t *arm_doirq(int irq, uint32_t *regs) if (regs == NULL) { - /* Update the return regs and restore the CURRENT_REGS to NULL. */ + /* Restore the cpu lock */ + + if (regs != CURRENT_REGS) + { + restore_critical_section(); + regs = (uint32_t *)CURRENT_REGS; + } + + /* Update the CURRENT_REGS to NULL. */ - regs = (uint32_t *)CURRENT_REGS; CURRENT_REGS = NULL; } #endif diff --git a/arch/arm/src/armv7-r/arm_doirq.c b/arch/arm/src/armv7-r/arm_doirq.c index 6d59fc035da..d601badb67f 100644 --- a/arch/arm/src/armv7-r/arm_doirq.c +++ b/arch/arm/src/armv7-r/arm_doirq.c @@ -60,11 +60,18 @@ uint32_t *arm_doirq(int irq, uint32_t *regs) irq_dispatch(irq, regs); + /* Restore the cpu lock */ + + if (regs != CURRENT_REGS) + { + restore_critical_section(); + regs = (uint32_t *)CURRENT_REGS; + } + /* Set CURRENT_REGS to NULL to indicate that we are no longer in an * interrupt handler. */ - regs = (uint32_t *)CURRENT_REGS; CURRENT_REGS = NULL; board_autoled_off(LED_INIRQ); diff --git a/arch/arm/src/armv7-r/arm_syscall.c b/arch/arm/src/armv7-r/arm_syscall.c index 8283583ca86..720c3e83778 100644 --- a/arch/arm/src/armv7-r/arm_syscall.c +++ b/arch/arm/src/armv7-r/arm_syscall.c @@ -533,7 +533,13 @@ uint32_t *arm_syscall(uint32_t *regs) break; } - regs = (uint32_t *)CURRENT_REGS; + /* Restore the cpu lock */ + + if (regs != CURRENT_REGS) + { + restore_critical_section(); + regs = (uint32_t *)CURRENT_REGS; + } /* Report what happened */ diff --git a/arch/arm/src/armv8-m/arm_doirq.c b/arch/arm/src/armv8-m/arm_doirq.c index 5cc87802c95..0670c76aef9 100644 --- a/arch/arm/src/armv8-m/arm_doirq.c +++ b/arch/arm/src/armv8-m/arm_doirq.c @@ -77,9 +77,16 @@ uint32_t *arm_doirq(int irq, uint32_t *regs) if (regs == NULL) { - /* Update the return regs and restore the CURRENT_REGS to NULL. */ + /* Restore the cpu lock */ + + if (regs != CURRENT_REGS) + { + restore_critical_section(); + regs = (uint32_t *)CURRENT_REGS; + } + + /* Update the CURRENT_REGS to NULL. */ - regs = (uint32_t *)CURRENT_REGS; CURRENT_REGS = NULL; } #endif diff --git a/arch/arm64/src/common/arm64_doirq.c b/arch/arm64/src/common/arm64_doirq.c index 15bc7fd84d7..0bf46af4a45 100644 --- a/arch/arm64/src/common/arm64_doirq.c +++ b/arch/arm64/src/common/arm64_doirq.c @@ -106,13 +106,17 @@ uint64_t *arm64_doirq(int irq, uint64_t * regs) group_addrenv(NULL); #endif + + /* Restore the cpu lock */ + + restore_critical_section(); + regs = (uint64_t *)CURRENT_REGS; } /* Set CURRENT_REGS to NULL to indicate that we are no longer in an * interrupt handler. */ - regs = (uint64_t *)CURRENT_REGS; CURRENT_REGS = NULL; return regs; diff --git a/arch/arm64/src/common/arm64_syscall.c b/arch/arm64/src/common/arm64_syscall.c index 15c7eef632f..29944e23bf6 100644 --- a/arch/arm64/src/common/arm64_syscall.c +++ b/arch/arm64/src/common/arm64_syscall.c @@ -202,6 +202,13 @@ uint64_t *arm64_syscall_switch(uint64_t * regs) break; } + /* Restore the cpu lock */ + + if ((uint64_t *)f_regs != ret_regs) + { + restore_critical_section(); + } + return ret_regs; } diff --git a/arch/ceva/src/common/up_doirq.c b/arch/ceva/src/common/up_doirq.c index 1bab68b323d..f2ff7562546 100644 --- a/arch/ceva/src/common/up_doirq.c +++ b/arch/ceva/src/common/up_doirq.c @@ -76,7 +76,11 @@ uint32_t *up_doirq(int irq, uint32_t *regs) * a context switch occurred during interrupt processing. */ - regs = CURRENT_REGS; + if (regs != CURRENT_REGS) + { + restore_critical_section(); + regs = CURRENT_REGS; + } /* Restore the previous value of CURRENT_REGS. NULL would indicate * that we are no longer in an interrupt handler. diff --git a/arch/risc-v/src/common/riscv_doirq.c b/arch/risc-v/src/common/riscv_doirq.c index 2792ef46d3a..4dad4c6887b 100644 --- a/arch/risc-v/src/common/riscv_doirq.c +++ b/arch/risc-v/src/common/riscv_doirq.c @@ -102,13 +102,20 @@ uintptr_t *riscv_doirq(int irq, uintptr_t *regs) } #endif - /* If a context switch occurred while processing the interrupt then - * CURRENT_REGS may have change value. If we return any value different - * from the input regs, then the lower level will know that a context - * switch occurred during interrupt processing. - */ + if (regs != CURRENT_REGS) + { + /* Restore the cpu lock */ - regs = (uintptr_t *)CURRENT_REGS; + restore_critical_section(); + + /* If a context switch occurred while processing the interrupt then + * CURRENT_REGS may have change value. If we return any value + * different from the input regs, then the lower level will know + * that a context switch occurred during interrupt processing. + */ + + regs = (uintptr_t *)CURRENT_REGS; + } /* Set CURRENT_REGS to NULL to indicate that we are no longer in an * interrupt handler. diff --git a/arch/risc-v/src/common/supervisor/riscv_perform_syscall.c b/arch/risc-v/src/common/supervisor/riscv_perform_syscall.c index af32ec2cef8..89dfd6ca5e3 100644 --- a/arch/risc-v/src/common/supervisor/riscv_perform_syscall.c +++ b/arch/risc-v/src/common/supervisor/riscv_perform_syscall.c @@ -56,9 +56,21 @@ void *riscv_perform_syscall(uintptr_t *regs) } #endif - /* Set new context */ + if (regs != CURRENT_REGS) + { + /* Restore the cpu lock */ + + restore_critical_section(); + + /* If a context switch occurred while processing the interrupt then + * CURRENT_REGS may have change value. If we return any value + * different from the input regs, then the lower level will know + * that a context switch occurred during interrupt processing. + */ + + regs = (uintptr_t *)CURRENT_REGS; + } - regs = (uintptr_t *)CURRENT_REGS; CURRENT_REGS = NULL; return regs; diff --git a/arch/sim/src/sim/up_blocktask.c b/arch/sim/src/sim/up_blocktask.c index 5463099c7a5..779d283cce5 100644 --- a/arch/sim/src/sim/up_blocktask.c +++ b/arch/sim/src/sim/up_blocktask.c @@ -124,6 +124,10 @@ void up_block_task(struct tcb_s *tcb, tstate_t task_state) nxsched_resume_scheduler(rtcb); + /* Restore the cpu lock */ + + restore_critical_section(); + /* Then switch contexts */ longjmp(rtcb->xcp.regs, 1); diff --git a/arch/sim/src/sim/up_exit.c b/arch/sim/src/sim/up_exit.c index 3ea743a151e..d3e6f4fec5a 100644 --- a/arch/sim/src/sim/up_exit.c +++ b/arch/sim/src/sim/up_exit.c @@ -76,6 +76,10 @@ void up_exit(int status) nxsched_resume_scheduler(tcb); + /* Restore the cpu lock */ + + restore_critical_section(); + /* Then switch contexts */ longjmp(tcb->xcp.regs, 1); diff --git a/arch/sim/src/sim/up_releasepending.c b/arch/sim/src/sim/up_releasepending.c index 1e1352d5141..1b664e62f03 100644 --- a/arch/sim/src/sim/up_releasepending.c +++ b/arch/sim/src/sim/up_releasepending.c @@ -93,6 +93,10 @@ void up_release_pending(void) nxsched_resume_scheduler(rtcb); + /* Restore the cpu lock */ + + restore_critical_section(); + /* Then switch contexts */ longjmp(rtcb->xcp.regs, 1); diff --git a/arch/sim/src/sim/up_reprioritizertr.c b/arch/sim/src/sim/up_reprioritizertr.c index ca8858a0be6..db2f143688f 100644 --- a/arch/sim/src/sim/up_reprioritizertr.c +++ b/arch/sim/src/sim/up_reprioritizertr.c @@ -146,6 +146,10 @@ void up_reprioritize_rtr(struct tcb_s *tcb, uint8_t priority) nxsched_resume_scheduler(rtcb); + /* Restore the cpu lock */ + + restore_critical_section(); + /* Then switch contexts */ longjmp(rtcb->xcp.regs, 1); diff --git a/arch/sim/src/sim/up_smpsignal.c b/arch/sim/src/sim/up_smpsignal.c index facb1ae690f..54ba4aa8a35 100644 --- a/arch/sim/src/sim/up_smpsignal.c +++ b/arch/sim/src/sim/up_smpsignal.c @@ -199,6 +199,10 @@ int up_cpu_paused(int cpu) nxsched_resume_scheduler(tcb); + /* Restore the cpu lock */ + + restore_critical_section(); + /* Then switch contexts. Any necessary address environment changes * will be made when the interrupt returns. */ diff --git a/arch/sim/src/sim/up_unblocktask.c b/arch/sim/src/sim/up_unblocktask.c index 7f5474e23c4..3f1bd1929e8 100644 --- a/arch/sim/src/sim/up_unblocktask.c +++ b/arch/sim/src/sim/up_unblocktask.c @@ -101,6 +101,10 @@ void up_unblock_task(struct tcb_s *tcb) nxsched_resume_scheduler(rtcb); + /* Restore the cpu lock */ + + restore_critical_section(); + /* Then switch contexts */ up_restorestate(rtcb->xcp.regs); @@ -125,6 +129,10 @@ void up_unblock_task(struct tcb_s *tcb) nxsched_resume_scheduler(rtcb); + /* Restore the cpu lock */ + + restore_critical_section(); + /* Then switch contexts */ longjmp(rtcb->xcp.regs, 1); diff --git a/arch/xtensa/src/common/xtensa_irqdispatch.c b/arch/xtensa/src/common/xtensa_irqdispatch.c index 7c5e26b43b5..4b4b4b90db5 100644 --- a/arch/xtensa/src/common/xtensa_irqdispatch.c +++ b/arch/xtensa/src/common/xtensa_irqdispatch.c @@ -85,11 +85,18 @@ uint32_t *xtensa_irq_dispatch(int irq, uint32_t *regs) } #endif + /* Restore the cpu lock */ + + if (regs != CURRENT_REGS) + { + restore_critical_section(); + regs = (uint32_t *)CURRENT_REGS; + } + /* Set CURRENT_REGS to NULL to indicate that we are no longer in an * interrupt handler. */ - regs = (uint32_t *)CURRENT_REGS; CURRENT_REGS = NULL; #endif diff --git a/include/nuttx/irq.h b/include/nuttx/irq.h index ba43f423850..da445182396 100644 --- a/include/nuttx/irq.h +++ b/include/nuttx/irq.h @@ -219,6 +219,26 @@ void leave_critical_section(irqstate_t flags); # define leave_critical_section(f) up_irq_restore(f) #endif +/**************************************************************************** + * Name: restore_critical_section + * + * Description: + * Restore the critical_section + * + * Input Parameters: + * None + * + * Returned Value: + * None + * + ****************************************************************************/ + +#ifdef CONFIG_SMP +void restore_critical_section(void); +#else +# define restore_critical_section() +#endif + #undef EXTERN #ifdef __cplusplus } diff --git a/sched/irq/irq_csection.c b/sched/irq/irq_csection.c index e33f441cfef..f0b6fa001cb 100644 --- a/sched/irq/irq_csection.c +++ b/sched/irq/irq_csection.c @@ -634,7 +634,7 @@ void leave_critical_section(irqstate_t flags) * holder of the IRQ lock. * * Input Parameters: - * rtcb - Points to the blocked TCB that is ready-to-run + * cpu - Points to which cpu * * Returned Value: * true - IRQs are locked by a different CPU. @@ -697,4 +697,62 @@ bool irq_cpu_locked(int cpu) } #endif +/**************************************************************************** + * Name: restore_critical_section + * + * Description: + * Restore the critical_section + * + * Input Parameters: + * None + * + * Returned Value: + * None + * + ****************************************************************************/ + +#ifdef CONFIG_SMP +void restore_critical_section(void) +{ + /* NOTE: The following logic for adjusting global IRQ controls were + * derived from nxsched_add_readytorun() and sched_removedreadytorun() + * Here, we only handles clearing logic to defer unlocking IRQ lock + * followed by context switching. + */ + + FAR struct tcb_s *tcb = this_task(); + int me = this_cpu(); + + /* Adjust global IRQ controls. If irqcount is greater than zero, + * then this task/this CPU holds the IRQ lock + */ + + if (tcb->irqcount > 0) + { + /* Do notihing here + * NOTE: spin_setbit() is done in nxsched_add_readytorun() + * and nxsched_remove_readytorun() + */ + } + + /* No.. This CPU will be relinquishing the lock. But this works + * differently if we are performing a context switch from an + * interrupt handler and the interrupt handler has established + * a critical section. We can detect this case when + * g_cpu_nestcount[me] > 0. + */ + + else if (g_cpu_nestcount[me] <= 0) + { + /* Release our hold on the IRQ lock. */ + + if ((g_cpu_irqset & (1 << me)) != 0) + { + spin_clrbit(&g_cpu_irqset, me, &g_cpu_irqsetlock, + &g_cpu_irqlock); + } + } +} +#endif /* CONFIG_SMP */ + #endif /* CONFIG_IRQCOUNT */ diff --git a/sched/sched/sched_resumescheduler.c b/sched/sched/sched_resumescheduler.c index 4ff4e96613b..77d6bc92d2f 100644 --- a/sched/sched/sched_resumescheduler.c +++ b/sched/sched/sched_resumescheduler.c @@ -74,46 +74,6 @@ void nxsched_resume_scheduler(FAR struct tcb_s *tcb) #ifdef CONFIG_SCHED_INSTRUMENTATION sched_note_resume(tcb); #endif - -#ifdef CONFIG_SMP - /* NOTE: The following logic for adjusting global IRQ controls were - * derived from nxsched_add_readytorun() and sched_removedreadytorun() - * Here, we only handles clearing logic to defer unlocking IRQ lock - * followed by context switching. - */ - - int me = this_cpu(); - - /* Adjust global IRQ controls. If irqcount is greater than zero, - * then this task/this CPU holds the IRQ lock - */ - - if (tcb->irqcount > 0) - { - /* Do nothing here - * NOTE: spin_setbit() is done in nxsched_add_readytorun() - * and nxsched_remove_readytorun() - */ - } - - /* No.. This CPU will be relinquishing the lock. But this works - * differently if we are performing a context switch from an - * interrupt handler and the interrupt handler has established - * a critical section. We can detect this case when - * g_cpu_nestcount[me] > 0. - */ - - else if (g_cpu_nestcount[me] <= 0) - { - /* Release our hold on the IRQ lock. */ - - if ((g_cpu_irqset & (1 << me)) != 0) - { - spin_clrbit(&g_cpu_irqset, me, &g_cpu_irqsetlock, - &g_cpu_irqlock); - } - } -#endif /* CONFIG_SMP */ } #endif /* CONFIG_RR_INTERVAL > 0 || CONFIG_SCHED_RESUMESCHEDULER */