logger watchdog: also check main thread

There was a time window where if a task with higher priority than the main
logger thread would busy-loop, it would block both logging threads and the
watchdog would not trigger if the writer was in idle state.
This can happen for fast SD card writes.
This commit is contained in:
Beat Küng
2022-04-20 14:58:17 +02:00
committed by Daniel Agar
parent 9e0a8050a9
commit a9129ea003
3 changed files with 23 additions and 10 deletions
+11 -6
View File
@@ -97,19 +97,24 @@ static void timer_callback(void *arg)
timer_callback_data_s *data = (timer_callback_data_s *)arg;
if (watchdog_update(data->watchdog_data)) {
data->watchdog_triggered = true;
}
int semaphore_value = 0;
px4_sem_getvalue(&data->semaphore, &semaphore_value);
/* check the value of the semaphore: if the logger cannot keep up with running it's main loop as fast
* as the timer_callback here increases the semaphore count, the counter would increase unbounded,
* leading to an overflow at some point. This case we want to avoid here, so we check the current
* value against a (somewhat arbitrary) threshold, and avoid calling sem_post() if it's exceeded.
* (it's not a problem if the threshold is a bit too large, it just means the logger will do
* multiple iterations at once, the next time it's scheduled). */
int semaphore_value;
* multiple iterations at once, the next time it's scheduled).
* As the watchdog also uses the counter we use a conservatively high value */
bool semaphore_value_saturated = semaphore_value > 100;
if (px4_sem_getvalue(&data->semaphore, &semaphore_value) == 0 && semaphore_value > 1) {
if (watchdog_update(data->watchdog_data, semaphore_value_saturated)) {
data->watchdog_triggered = true;
}
if (semaphore_value_saturated) {
return;
}
+9 -3
View File
@@ -46,7 +46,7 @@ namespace px4
namespace logger
{
bool watchdog_update(watchdog_data_t &watchdog_data)
bool watchdog_update(watchdog_data_t &watchdog_data, bool semaphore_value_saturated)
{
#if defined(__PX4_NUTTX) && defined(CONFIG_BUILD_FLAT)
@@ -62,7 +62,9 @@ bool watchdog_update(watchdog_data_t &watchdog_data)
// When the writer is waiting for an SD transfer, it is not in ready state, thus a long dropout
// will not trigger it. The longest period in ready state I measured was around 70ms,
// after a param change.
// We only check the log writer because it runs at lower priority than the main thread.
// Additionally we need to check the main thread as well, because if the main thread gets stalled as well
// while the writer is idle (no active write), it would not trigger.
// We do that by checking if the scheduling semaphore counter is saturated for a certain duration.
// No need to lock the tcb access, since we are in IRQ context
// update the timestamp if it has been scheduled recently
@@ -100,7 +102,11 @@ bool watchdog_update(watchdog_data_t &watchdog_data)
#endif
if (now - watchdog_data.ready_to_run_timestamp > 1_s) {
if (!semaphore_value_saturated) {
watchdog_data.sem_counter_saturated_start = now;
}
if (now - watchdog_data.sem_counter_saturated_start > 3_s || now - watchdog_data.ready_to_run_timestamp > 1_s) {
// boost the priority to make sure the logger continues to write to the log.
// Note that we never restore the priority, to keep the logic simple and because it is
// an event that must not occur under normal circumstances (if it does, there's a bug
+3 -1
View File
@@ -50,6 +50,7 @@ struct watchdog_data_t {
int logger_main_task_index = -1;
int logger_writer_task_index = -1;
hrt_abstime ready_to_run_timestamp = hrt_absolute_time();
hrt_abstime sem_counter_saturated_start = hrt_absolute_time();
uint8_t last_state = TSTATE_TASK_INVALID;
#endif /* __PX4_NUTTX */
};
@@ -70,9 +71,10 @@ void watchdog_initialize(const pid_t pid_logger_main, const pthread_t writer_thr
* Expected to be called from IRQ context.
*
* @param watchdog_data
* @param semaphore_value_saturated if the scheduling semaphore counter is saturated
* @return true if watchdog is triggered, false otherwise
*/
bool watchdog_update(watchdog_data_t &watchdog_data);
bool watchdog_update(watchdog_data_t &watchdog_data, bool semaphore_value_saturated);
} //namespace logger
} //namespace px4