sigdeliver: fix system block when kill signal to idle in SMP

Bug description:

CONFIG_SMP=y

Suppose we have 2 cores in SMP, here is the ps return:

PID GROUP CPU PRI POLICY TYPE    NPX STATE     STACK   USED  FILLED COMMAND
  0     0   0   0 FIFO   Kthread N-- Assigned 004076 000748  18.3%  CPU0 IDLE
  1     0   1   0 FIFO   Kthread N-- Running  004096 000540  13.1%  CPU1 IDLE

nsh> kill -4 0
or:
nsh> kill -4 1

system blocked.

Reason:

In func xx_sigdeliver() restore stage, when saved_irqcount == 0, that means
rtcb NOT in critical_section before switch to xx_sigdeliver(), then we need
reset the critical_section state before swith back.

Fix:

Add condition to cover saved_irqcount == 0.

Change-Id: I4af7f95e47f6d78a4094c3757d39b01ac9d533b3
Signed-off-by: ligd <liguiding1@xiaomi.com>
This commit is contained in:
ligd
2021-01-20 21:26:28 +08:00
parent 96d0764bbc
commit 0fe5ec8a48
5 changed files with 55 additions and 140 deletions
+11 -28
View File
@@ -124,20 +124,19 @@ void arm_sigdeliver(void)
sinfo("Resuming\n");
/* Call enter_critical_section() to disable local interrupts before
* restoring local context.
*
* Here, we should not use up_irq_save() in SMP mode.
* For example, if we call up_irq_save() here and another CPU might
* have called up_cpu_pause() to this cpu, hence g_cpu_irqlock has
* been locked by the cpu, in this case, we would see a deadlock in
* later call of enter_critical_section() to restore irqcount.
* To avoid this situation, we need to call enter_critical_section().
#ifdef CONFIG_SMP
/* Restore the saved 'irqcount' and recover the critical section
* spinlocks.
*/
#ifdef CONFIG_SMP
enter_critical_section();
#else
DEBUGASSERT(rtcb->irqcount == 0);
while (rtcb->irqcount < saved_irqcount)
{
enter_critical_section();
}
#endif
#ifndef CONFIG_SUPPRESS_INTERRUPTS
up_irq_save();
#endif
@@ -159,22 +158,6 @@ void arm_sigdeliver(void)
regs[REG_CPSR] = rtcb->xcp.saved_cpsr;
rtcb->xcp.sigdeliver = NULL; /* Allows next handler to be scheduled */
#ifdef CONFIG_SMP
/* Restore the saved 'irqcount' and recover the critical section
* spinlocks.
*
* REVISIT: irqcount should be one from the above call to
* enter_critical_section(). Could the saved_irqcount be zero? That
* would be a problem.
*/
DEBUGASSERT(rtcb->irqcount == 1);
while (rtcb->irqcount < saved_irqcount)
{
enter_critical_section();
}
#endif
/* Then restore the correct state for this thread of execution. */
board_autoled_off(LED_SIGNAL);
+11 -28
View File
@@ -128,20 +128,19 @@ void arm_sigdeliver(void)
sinfo("Resuming\n");
/* Call enter_critical_section() to disable local interrupts before
* restoring local context.
*
* Here, we should not use up_irq_save() in SMP mode.
* For example, if we call up_irq_save() here and another CPU might
* have called up_cpu_pause() to this cpu, hence g_cpu_irqlock has
* been locked by the cpu, in this case, we would see a deadlock in
* later call of enter_critical_section() to restore irqcount.
* To avoid this situation, we need to call enter_critical_section().
#ifdef CONFIG_SMP
/* Restore the saved 'irqcount' and recover the critical section
* spinlocks.
*/
#ifdef CONFIG_SMP
enter_critical_section();
#else
DEBUGASSERT(rtcb->irqcount == 0);
while (rtcb->irqcount < saved_irqcount)
{
enter_critical_section();
}
#endif
#ifndef CONFIG_SUPPRESS_INTERRUPTS
up_irq_save();
#endif
@@ -171,22 +170,6 @@ void arm_sigdeliver(void)
#endif
rtcb->xcp.sigdeliver = NULL; /* Allows next handler to be scheduled */
#ifdef CONFIG_SMP
/* Restore the saved 'irqcount' and recover the critical section
* spinlocks.
*
* REVISIT: irqcount should be one from the above call to
* enter_critical_section(). Could the saved_irqcount be zero? That
* would be a problem.
*/
DEBUGASSERT(rtcb->irqcount == 1);
while (rtcb->irqcount < saved_irqcount)
{
enter_critical_section();
}
#endif
/* Then restore the correct state for this thread of
* execution.
*/
+11 -28
View File
@@ -128,20 +128,19 @@ void arm_sigdeliver(void)
sinfo("Resuming\n");
/* Call enter_critical_section() to disable local interrupts before
* restoring local context.
*
* Here, we should not use up_irq_save() in SMP mode.
* For example, if we call up_irq_save() here and another CPU might
* have called up_cpu_pause() to this cpu, hence g_cpu_irqlock has
* been locked by the cpu, in this case, we would see a deadlock in
* later call of enter_critical_section() to restore irqcount.
* To avoid this situation, we need to call enter_critical_section().
#ifdef CONFIG_SMP
/* Restore the saved 'irqcount' and recover the critical section
* spinlocks.
*/
#ifdef CONFIG_SMP
enter_critical_section();
#else
DEBUGASSERT(rtcb->irqcount == 0);
while (rtcb->irqcount < saved_irqcount)
{
enter_critical_section();
}
#endif
#ifndef CONFIG_SUPPRESS_INTERRUPTS
up_irq_save();
#endif
@@ -171,22 +170,6 @@ void arm_sigdeliver(void)
#endif
rtcb->xcp.sigdeliver = NULL; /* Allows next handler to be scheduled */
#ifdef CONFIG_SMP
/* Restore the saved 'irqcount' and recover the critical section
* spinlocks.
*
* REVISIT: irqcount should be one from the above call to
* enter_critical_section(). Could the saved_irqcount be zero? That
* would be a problem.
*/
DEBUGASSERT(rtcb->irqcount == 1);
while (rtcb->irqcount < saved_irqcount)
{
enter_critical_section();
}
#endif
/* Then restore the correct state for this thread of
* execution.
*/
+11 -28
View File
@@ -144,20 +144,19 @@ void up_sigdeliver(void)
sinfo("Resuming EPC: %016" PRIx64 " INT_CTX: %016" PRIx64 "\n",
regs[REG_EPC], regs[REG_INT_CTX]);
/* Call enter_critical_section() to disable local interrupts before
* restoring local context.
*
* Here, we should not use up_irq_save() in SMP mode.
* For example, if we call up_irq_save() here and another CPU might
* have called up_cpu_pause() to this cpu, hence g_cpu_irqlock has
* been locked by the cpu, in this case, we would see a deadlock in
* later call of enter_critical_section() to restore irqcount.
* To avoid this situation, we need to call enter_critical_section().
#ifdef CONFIG_SMP
/* Restore the saved 'irqcount' and recover the critical section
* spinlocks.
*/
#ifdef CONFIG_SMP
enter_critical_section();
#else
DEBUGASSERT(rtcb->irqcount == 0);
while (rtcb->irqcount < saved_irqcount)
{
(void)enter_critical_section();
}
#endif
#ifndef CONFIG_SUPPRESS_INTERRUPTS
up_irq_save();
#endif
@@ -179,22 +178,6 @@ void up_sigdeliver(void)
regs[REG_INT_CTX] = rtcb->xcp.saved_int_ctx;
rtcb->xcp.sigdeliver = NULL; /* Allows next handler to be scheduled */
#ifdef CONFIG_SMP
/* Restore the saved 'irqcount' and recover the critical section
* spinlocks.
*
* REVISIT: irqcount should be one from the above call to
* enter_critical_section(). Could the saved_irqcount be zero? That
* would be a problem.
*/
DEBUGASSERT(rtcb->irqcount == 1);
while (rtcb->irqcount < saved_irqcount)
{
(void)enter_critical_section();
}
#endif
/* Then restore the correct state for this thread of
* execution.
*/
+11 -28
View File
@@ -121,20 +121,19 @@ void xtensa_sig_deliver(void)
sinfo("Resuming\n");
/* Call enter_critical_section() to disable local interrupts before
* restoring local context.
*
* Here, we should not use up_irq_save() in SMP mode.
* For example, if we call up_irq_save() here and another CPU might
* have called up_cpu_pause() to this cpu, hence g_cpu_irqlock has
* been locked by the cpu, in this case, we would see a deadlock in
* later call of enter_critical_section() to restore irqcount.
* To avoid this situation, we need to call enter_critical_section().
#ifdef CONFIG_SMP
/* Restore the saved 'irqcount' and recover the critical section
* spinlocks.
*/
#ifdef CONFIG_SMP
enter_critical_section();
#else
DEBUGASSERT(rtcb->irqcount == 0);
while (rtcb->irqcount < saved_irqcount)
{
enter_critical_section();
}
#endif
#ifndef CONFIG_SUPPRESS_INTERRUPTS
up_irq_save();
#endif
@@ -190,22 +189,6 @@ void xtensa_sig_deliver(void)
rtcb->xcp.regs[REG_A0] = regs[REG_A0];
#ifdef CONFIG_SMP
/* Restore the saved 'irqcount' and recover the critical section
* spinlocks.
*
* REVISIT: irqcount should be one from the above call to
* enter_critical_section(). Could the saved_irqcount be zero? That
* would be a problem.
*/
DEBUGASSERT(rtcb->irqcount == 1);
while (rtcb->irqcount < saved_irqcount)
{
enter_critical_section();
}
#endif
/* Then restore the correct state for this thread of execution.
* NOTE: The co-processor state should already be correct.
*/