mirror of
https://github.com/PX4/PX4-Autopilot.git
synced 2026-06-02 03:49:12 +08:00
logger: update watchdog
- reduce boost priority to PX4_WQ_HP_BASE - 6 - add cli command 'trigger_watchdog' to manually trigger watchdog - add perf counters when triggering watchdog - reduce top measurement to 300ms - restore priorities after 1.5s There are precautions in case the SD card code itself has a busy-loop.
This commit is contained in:
@@ -124,6 +124,13 @@ typedef struct {
|
|||||||
// wait for the sensor hub if its data is coming from it.
|
// wait for the sensor hub if its data is coming from it.
|
||||||
#define SCHED_PRIORITY_ESTIMATOR (PX4_WQ_HP_BASE - 5)
|
#define SCHED_PRIORITY_ESTIMATOR (PX4_WQ_HP_BASE - 5)
|
||||||
|
|
||||||
|
// Logger watchdog priority, triggered when a task busy-loops (and
|
||||||
|
// restored after a short time).
|
||||||
|
// The priority is a trade-off between:
|
||||||
|
// - ability to capture any busy-looping task below this priority
|
||||||
|
// - not having a negative impact on the system itself
|
||||||
|
#define SCHED_PRIORITY_LOG_WATCHDOG (PX4_WQ_HP_BASE - 6)
|
||||||
|
|
||||||
// Position controllers typically are in a blocking wait on estimator data
|
// Position controllers typically are in a blocking wait on estimator data
|
||||||
// so when new sensor data is available they will run last. Keeping them
|
// so when new sensor data is available they will run last. Keeping them
|
||||||
// on a high priority ensures that they are the first process to be run
|
// on a high priority ensures that they are the first process to be run
|
||||||
|
|||||||
@@ -36,7 +36,6 @@
|
|||||||
#include "logged_topics.h"
|
#include "logged_topics.h"
|
||||||
#include "logger.h"
|
#include "logger.h"
|
||||||
#include "messages.h"
|
#include "messages.h"
|
||||||
#include "watchdog.h"
|
|
||||||
|
|
||||||
#include <dirent.h>
|
#include <dirent.h>
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
@@ -84,19 +83,12 @@ using namespace px4::logger;
|
|||||||
using namespace time_literals;
|
using namespace time_literals;
|
||||||
|
|
||||||
|
|
||||||
struct timer_callback_data_s {
|
|
||||||
px4_sem_t semaphore;
|
|
||||||
|
|
||||||
watchdog_data_t watchdog_data;
|
|
||||||
volatile bool watchdog_triggered = false;
|
|
||||||
};
|
|
||||||
|
|
||||||
/* This is used to schedule work for the logger (periodic scan for updated topics) */
|
/* This is used to schedule work for the logger (periodic scan for updated topics) */
|
||||||
static void timer_callback(void *arg)
|
static void timer_callback(void *arg)
|
||||||
{
|
{
|
||||||
/* Note: we are in IRQ context here (on NuttX) */
|
/* Note: we are in IRQ context here (on NuttX) */
|
||||||
|
|
||||||
timer_callback_data_s *data = (timer_callback_data_s *)arg;
|
Logger::timer_callback_data_s *data = (Logger::timer_callback_data_s *)arg;
|
||||||
|
|
||||||
int semaphore_value = 0;
|
int semaphore_value = 0;
|
||||||
|
|
||||||
@@ -112,7 +104,7 @@ static void timer_callback(void *arg)
|
|||||||
bool semaphore_value_saturated = semaphore_value > 100;
|
bool semaphore_value_saturated = semaphore_value > 100;
|
||||||
|
|
||||||
if (watchdog_update(data->watchdog_data, semaphore_value_saturated)) {
|
if (watchdog_update(data->watchdog_data, semaphore_value_saturated)) {
|
||||||
data->watchdog_triggered = true;
|
data->watchdog_triggered.store(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (semaphore_value_saturated) {
|
if (semaphore_value_saturated) {
|
||||||
@@ -151,6 +143,15 @@ int Logger::custom_command(int argc, char *argv[])
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __PX4_NUTTX
|
||||||
|
|
||||||
|
if (!strcmp(argv[0], "trigger_watchdog")) {
|
||||||
|
get_instance()->trigger_watchdog_now();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
if (!strcmp(argv[0], "on")) {
|
if (!strcmp(argv[0], "on")) {
|
||||||
get_instance()->set_arm_override(true);
|
get_instance()->set_arm_override(true);
|
||||||
return 0;
|
return 0;
|
||||||
@@ -650,10 +651,9 @@ void Logger::run()
|
|||||||
|
|
||||||
/* init the update timer */
|
/* init the update timer */
|
||||||
struct hrt_call timer_call {};
|
struct hrt_call timer_call {};
|
||||||
timer_callback_data_s timer_callback_data;
|
px4_sem_init(&_timer_callback_data.semaphore, 0, 0);
|
||||||
px4_sem_init(&timer_callback_data.semaphore, 0, 0);
|
|
||||||
/* timer_semaphore use case is a signal */
|
/* timer_semaphore use case is a signal */
|
||||||
px4_sem_setprotocol(&timer_callback_data.semaphore, SEM_PRIO_NONE);
|
px4_sem_setprotocol(&_timer_callback_data.semaphore, SEM_PRIO_NONE);
|
||||||
|
|
||||||
int polling_topic_sub = -1;
|
int polling_topic_sub = -1;
|
||||||
|
|
||||||
@@ -673,10 +673,10 @@ void Logger::run()
|
|||||||
|
|
||||||
// sched_note_start is already called from pthread_create and task_create,
|
// sched_note_start is already called from pthread_create and task_create,
|
||||||
// which means we can expect to find the tasks in system_load.tasks, as required in watchdog_initialize
|
// which means we can expect to find the tasks in system_load.tasks, as required in watchdog_initialize
|
||||||
watchdog_initialize(pid_self, writer_thread, timer_callback_data.watchdog_data);
|
watchdog_initialize(pid_self, writer_thread, _timer_callback_data.watchdog_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
hrt_call_every(&timer_call, _log_interval, _log_interval, timer_callback, &timer_callback_data);
|
hrt_call_every(&timer_call, _log_interval, _log_interval, timer_callback, &_timer_callback_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
// check for new subscription data
|
// check for new subscription data
|
||||||
@@ -703,8 +703,8 @@ void Logger::run()
|
|||||||
/* check for logging command from MAVLink (start/stop streaming) */
|
/* check for logging command from MAVLink (start/stop streaming) */
|
||||||
handle_vehicle_command_update();
|
handle_vehicle_command_update();
|
||||||
|
|
||||||
if (timer_callback_data.watchdog_triggered) {
|
if (_timer_callback_data.watchdog_triggered.load()) {
|
||||||
timer_callback_data.watchdog_triggered = false;
|
_timer_callback_data.watchdog_triggered.store(false);
|
||||||
initialize_load_output(PrintLoadReason::Watchdog);
|
initialize_load_output(PrintLoadReason::Watchdog);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -916,7 +916,7 @@ void Logger::run()
|
|||||||
* And on linux this is quite accurate as well, but under NuttX it is not accurate,
|
* And on linux this is quite accurate as well, but under NuttX it is not accurate,
|
||||||
* because usleep() has only a granularity of CONFIG_MSEC_PER_TICK (=1ms).
|
* because usleep() has only a granularity of CONFIG_MSEC_PER_TICK (=1ms).
|
||||||
*/
|
*/
|
||||||
while (px4_sem_wait(&timer_callback_data.semaphore) != 0) {}
|
while (px4_sem_wait(&_timer_callback_data.semaphore) != 0) {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -926,7 +926,7 @@ void Logger::run()
|
|||||||
stop_log_file(LogType::Mission);
|
stop_log_file(LogType::Mission);
|
||||||
|
|
||||||
hrt_cancel(&timer_call);
|
hrt_cancel(&timer_call);
|
||||||
px4_sem_destroy(&timer_callback_data.semaphore);
|
px4_sem_destroy(&_timer_callback_data.semaphore);
|
||||||
|
|
||||||
// stop the writer thread
|
// stop the writer thread
|
||||||
_writer.thread_stop();
|
_writer.thread_stop();
|
||||||
@@ -1420,7 +1420,7 @@ void Logger::start_log_file(LogType type)
|
|||||||
if (type == LogType::Full) {
|
if (type == LogType::Full) {
|
||||||
write_parameters(type);
|
write_parameters(type);
|
||||||
write_parameter_defaults(type);
|
write_parameter_defaults(type);
|
||||||
write_perf_data(true);
|
write_perf_data(PrintLoadReason::Preflight);
|
||||||
write_console_output();
|
write_console_output();
|
||||||
write_events_file(LogType::Full);
|
write_events_file(LogType::Full);
|
||||||
write_excluded_optional_topics(type);
|
write_excluded_optional_topics(type);
|
||||||
@@ -1448,7 +1448,7 @@ void Logger::stop_log_file(LogType type)
|
|||||||
|
|
||||||
if (type == LogType::Full) {
|
if (type == LogType::Full) {
|
||||||
_writer.set_need_reliable_transfer(true);
|
_writer.set_need_reliable_transfer(true);
|
||||||
write_perf_data(false);
|
write_perf_data(PrintLoadReason::Postflight);
|
||||||
_writer.set_need_reliable_transfer(false);
|
_writer.set_need_reliable_transfer(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1478,7 +1478,7 @@ void Logger::start_log_mavlink()
|
|||||||
write_formats(LogType::Full);
|
write_formats(LogType::Full);
|
||||||
write_parameters(LogType::Full);
|
write_parameters(LogType::Full);
|
||||||
write_parameter_defaults(LogType::Full);
|
write_parameter_defaults(LogType::Full);
|
||||||
write_perf_data(true);
|
write_perf_data(PrintLoadReason::Preflight);
|
||||||
write_console_output();
|
write_console_output();
|
||||||
write_events_file(LogType::Full);
|
write_events_file(LogType::Full);
|
||||||
write_excluded_optional_topics(LogType::Full);
|
write_excluded_optional_topics(LogType::Full);
|
||||||
@@ -1498,7 +1498,7 @@ void Logger::stop_log_mavlink()
|
|||||||
if (_writer.is_started(LogType::Full, LogWriter::BackendMavlink)) {
|
if (_writer.is_started(LogType::Full, LogWriter::BackendMavlink)) {
|
||||||
_writer.select_write_backend(LogWriter::BackendMavlink);
|
_writer.select_write_backend(LogWriter::BackendMavlink);
|
||||||
_writer.set_need_reliable_transfer(true);
|
_writer.set_need_reliable_transfer(true);
|
||||||
write_perf_data(false);
|
write_perf_data(PrintLoadReason::Postflight);
|
||||||
_writer.set_need_reliable_transfer(false);
|
_writer.set_need_reliable_transfer(false);
|
||||||
_writer.unselect_write_backend();
|
_writer.unselect_write_backend();
|
||||||
_writer.notify();
|
_writer.notify();
|
||||||
@@ -1509,7 +1509,7 @@ void Logger::stop_log_mavlink()
|
|||||||
struct perf_callback_data_t {
|
struct perf_callback_data_t {
|
||||||
Logger *logger;
|
Logger *logger;
|
||||||
int counter;
|
int counter;
|
||||||
bool preflight;
|
Logger::PrintLoadReason reason;
|
||||||
char *buffer;
|
char *buffer;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -1522,23 +1522,31 @@ void Logger::perf_iterate_callback(perf_counter_t handle, void *user)
|
|||||||
|
|
||||||
perf_print_counter_buffer(buffer, buffer_length, handle);
|
perf_print_counter_buffer(buffer, buffer_length, handle);
|
||||||
|
|
||||||
if (callback_data->preflight) {
|
switch (callback_data->reason) {
|
||||||
|
case PrintLoadReason::Preflight:
|
||||||
|
default:
|
||||||
perf_name = "perf_counter_preflight";
|
perf_name = "perf_counter_preflight";
|
||||||
|
break;
|
||||||
|
|
||||||
} else {
|
case PrintLoadReason::Postflight:
|
||||||
perf_name = "perf_counter_postflight";
|
perf_name = "perf_counter_postflight";
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PrintLoadReason::Watchdog:
|
||||||
|
perf_name = "perf_counter_watchdog";
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
callback_data->logger->write_info_multiple(LogType::Full, perf_name, buffer, callback_data->counter != 0);
|
callback_data->logger->write_info_multiple(LogType::Full, perf_name, buffer, callback_data->counter != 0);
|
||||||
++callback_data->counter;
|
++callback_data->counter;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Logger::write_perf_data(bool preflight)
|
void Logger::write_perf_data(PrintLoadReason reason)
|
||||||
{
|
{
|
||||||
perf_callback_data_t callback_data = {};
|
perf_callback_data_t callback_data = {};
|
||||||
callback_data.logger = this;
|
callback_data.logger = this;
|
||||||
callback_data.counter = 0;
|
callback_data.counter = 0;
|
||||||
callback_data.preflight = preflight;
|
callback_data.reason = reason;
|
||||||
|
|
||||||
// write the perf counters
|
// write the perf counters
|
||||||
perf_iterate_all(perf_iterate_callback, &callback_data);
|
perf_iterate_all(perf_iterate_callback, &callback_data);
|
||||||
@@ -1580,7 +1588,14 @@ void Logger::print_load_callback(void *user)
|
|||||||
void Logger::initialize_load_output(PrintLoadReason reason)
|
void Logger::initialize_load_output(PrintLoadReason reason)
|
||||||
{
|
{
|
||||||
init_print_load(&_load);
|
init_print_load(&_load);
|
||||||
_next_load_print = hrt_absolute_time() + 1_s;
|
|
||||||
|
if (reason == PrintLoadReason::Watchdog) {
|
||||||
|
_next_load_print = hrt_absolute_time() + 300_ms;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
_next_load_print = hrt_absolute_time() + 1_s;
|
||||||
|
}
|
||||||
|
|
||||||
_print_load_reason = reason;
|
_print_load_reason = reason;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1588,6 +1603,7 @@ void Logger::write_load_output()
|
|||||||
{
|
{
|
||||||
if (_print_load_reason == PrintLoadReason::Watchdog) {
|
if (_print_load_reason == PrintLoadReason::Watchdog) {
|
||||||
PX4_ERR("Writing watchdog data"); // this is just that we see it easily in the log
|
PX4_ERR("Writing watchdog data"); // this is just that we see it easily in the log
|
||||||
|
write_perf_data(PrintLoadReason::Watchdog);
|
||||||
}
|
}
|
||||||
|
|
||||||
perf_callback_data_t callback_data = {};
|
perf_callback_data_t callback_data = {};
|
||||||
@@ -2421,6 +2437,9 @@ $ logger on
|
|||||||
PRINT_MODULE_USAGE_PARAM_FLOAT('c', 1.0, 0.2, 2.0, "Log rate factor (higher is faster)", true);
|
PRINT_MODULE_USAGE_PARAM_FLOAT('c', 1.0, 0.2, 2.0, "Log rate factor (higher is faster)", true);
|
||||||
PRINT_MODULE_USAGE_COMMAND_DESCR("on", "start logging now, override arming (logger must be running)");
|
PRINT_MODULE_USAGE_COMMAND_DESCR("on", "start logging now, override arming (logger must be running)");
|
||||||
PRINT_MODULE_USAGE_COMMAND_DESCR("off", "stop logging now, override arming (logger must be running)");
|
PRINT_MODULE_USAGE_COMMAND_DESCR("off", "stop logging now, override arming (logger must be running)");
|
||||||
|
#ifdef __PX4_NUTTX
|
||||||
|
PRINT_MODULE_USAGE_COMMAND_DESCR("trigger_watchdog", "manually trigger the watchdog now");
|
||||||
|
#endif
|
||||||
PRINT_MODULE_USAGE_DEFAULT_COMMANDS();
|
PRINT_MODULE_USAGE_DEFAULT_COMMANDS();
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|||||||
@@ -36,6 +36,7 @@
|
|||||||
#include "log_writer.h"
|
#include "log_writer.h"
|
||||||
#include "logged_topics.h"
|
#include "logged_topics.h"
|
||||||
#include "messages.h"
|
#include "messages.h"
|
||||||
|
#include "watchdog.h"
|
||||||
#include <containers/Array.hpp>
|
#include <containers/Array.hpp>
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
#include <px4_platform_common/defines.h>
|
#include <px4_platform_common/defines.h>
|
||||||
@@ -91,6 +92,20 @@ public:
|
|||||||
arm_until_shutdown,
|
arm_until_shutdown,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum class PrintLoadReason {
|
||||||
|
Preflight,
|
||||||
|
Postflight,
|
||||||
|
Watchdog
|
||||||
|
};
|
||||||
|
|
||||||
|
struct timer_callback_data_s {
|
||||||
|
px4_sem_t semaphore;
|
||||||
|
|
||||||
|
watchdog_data_t watchdog_data;
|
||||||
|
px4::atomic_bool watchdog_triggered{false};
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
Logger(LogWriter::Backend backend, size_t buffer_size, uint32_t log_interval, const char *poll_topic_name,
|
Logger(LogWriter::Backend backend, size_t buffer_size, uint32_t log_interval, const char *poll_topic_name,
|
||||||
LogMode log_mode, bool log_name_timestamp, float rate_factor);
|
LogMode log_mode, bool log_name_timestamp, float rate_factor);
|
||||||
|
|
||||||
@@ -131,13 +146,14 @@ public:
|
|||||||
|
|
||||||
void set_arm_override(bool override) { _manually_logging_override = override; }
|
void set_arm_override(bool override) { _manually_logging_override = override; }
|
||||||
|
|
||||||
private:
|
void trigger_watchdog_now()
|
||||||
|
{
|
||||||
|
#ifdef __PX4_NUTTX
|
||||||
|
_timer_callback_data.watchdog_data.manual_watchdog_trigger = true;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
enum class PrintLoadReason {
|
private:
|
||||||
Preflight,
|
|
||||||
Postflight,
|
|
||||||
Watchdog
|
|
||||||
};
|
|
||||||
|
|
||||||
static constexpr int MAX_MISSION_TOPICS_NUM = 5; /**< Maximum number of mission topics */
|
static constexpr int MAX_MISSION_TOPICS_NUM = 5; /**< Maximum number of mission topics */
|
||||||
static constexpr unsigned MAX_NO_LOGFILE = 999; /**< Maximum number of log files */
|
static constexpr unsigned MAX_NO_LOGFILE = 999; /**< Maximum number of log files */
|
||||||
@@ -229,9 +245,8 @@ private:
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* write performance counters
|
* write performance counters
|
||||||
* @param preflight preflight if true, postflight otherwise
|
|
||||||
*/
|
*/
|
||||||
void write_perf_data(bool preflight);
|
void write_perf_data(PrintLoadReason reason);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* write bootup console output
|
* write bootup console output
|
||||||
@@ -371,6 +386,8 @@ private:
|
|||||||
|
|
||||||
uint32_t _message_gaps{0};
|
uint32_t _message_gaps{0};
|
||||||
|
|
||||||
|
timer_callback_data_s _timer_callback_data{};
|
||||||
|
|
||||||
uORB::Subscription _manual_control_setpoint_sub{ORB_ID(manual_control_setpoint)};
|
uORB::Subscription _manual_control_setpoint_sub{ORB_ID(manual_control_setpoint)};
|
||||||
uORB::Subscription _vehicle_command_sub{ORB_ID(vehicle_command)};
|
uORB::Subscription _vehicle_command_sub{ORB_ID(vehicle_command)};
|
||||||
uORB::Subscription _vehicle_status_sub{ORB_ID(vehicle_status)};
|
uORB::Subscription _vehicle_status_sub{ORB_ID(vehicle_status)};
|
||||||
|
|||||||
@@ -34,6 +34,7 @@
|
|||||||
#include "watchdog.h"
|
#include "watchdog.h"
|
||||||
|
|
||||||
#include <px4_platform_common/log.h>
|
#include <px4_platform_common/log.h>
|
||||||
|
#include <px4_platform_common/tasks.h>
|
||||||
|
|
||||||
#if defined(__PX4_NUTTX) && !defined(CONFIG_SCHED_INSTRUMENTATION)
|
#if defined(__PX4_NUTTX) && !defined(CONFIG_SCHED_INSTRUMENTATION)
|
||||||
# error watchdog support requires CONFIG_SCHED_INSTRUMENTATION
|
# error watchdog support requires CONFIG_SCHED_INSTRUMENTATION
|
||||||
@@ -57,72 +58,104 @@ bool watchdog_update(watchdog_data_t &watchdog_data, bool semaphore_value_satura
|
|||||||
const system_load_taskinfo_s &log_writer_task = system_load.tasks[watchdog_data.logger_writer_task_index];
|
const system_load_taskinfo_s &log_writer_task = system_load.tasks[watchdog_data.logger_writer_task_index];
|
||||||
|
|
||||||
if (log_writer_task.valid) {
|
if (log_writer_task.valid) {
|
||||||
// Trigger the watchdog if the log writer task has been ready to run for a
|
|
||||||
// minimum duration and it has not been scheduled during that time.
|
|
||||||
// When the writer is waiting for an SD transfer, it is not in ready state, thus a long dropout
|
|
||||||
// will not trigger it. The longest period in ready state I measured was around 70ms,
|
|
||||||
// after a param change.
|
|
||||||
// Additionally we need to check the main thread as well, because if the main thread gets stalled as well
|
|
||||||
// while the writer is idle (no active write), it would not trigger.
|
|
||||||
// We do that by checking if the scheduling semaphore counter is saturated for a certain duration.
|
|
||||||
// No need to lock the tcb access, since we are in IRQ context
|
|
||||||
|
|
||||||
// update the timestamp if it has been scheduled recently
|
// Was it already triggered?
|
||||||
if (log_writer_task.curr_start_time > watchdog_data.ready_to_run_timestamp) {
|
if (watchdog_data.trigger_time != 0) {
|
||||||
watchdog_data.ready_to_run_timestamp = log_writer_task.curr_start_time;
|
// If so, restore the priority after 1.5s (enough time to flush the buffer and write the perf data)
|
||||||
}
|
if (now > watchdog_data.trigger_time + 1500_ms) {
|
||||||
|
// Restore priorities to ensure the logger threads cannot adversely affect the system
|
||||||
|
sched_param param{};
|
||||||
|
param.sched_priority = watchdog_data.logger_main_priority;
|
||||||
|
|
||||||
// update the timestamp if not ready to run or if transitioned into ready to run
|
if (system_load.tasks[watchdog_data.logger_main_task_index].valid) {
|
||||||
uint8_t current_state = log_writer_task.tcb->task_state;
|
sched_setparam(system_load.tasks[watchdog_data.logger_main_task_index].tcb->pid, ¶m);
|
||||||
|
}
|
||||||
|
|
||||||
if (current_state != TSTATE_TASK_READYTORUN
|
param.sched_priority = watchdog_data.log_writer_priority;
|
||||||
|| (watchdog_data.last_state != TSTATE_TASK_READYTORUN && current_state == TSTATE_TASK_READYTORUN)) {
|
sched_setparam(log_writer_task.tcb->pid, ¶m);
|
||||||
watchdog_data.ready_to_run_timestamp = now;
|
|
||||||
}
|
|
||||||
|
|
||||||
watchdog_data.last_state = current_state;
|
// Make sure we won't trigger again
|
||||||
|
watchdog_data.logger_main_task_index = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
// Trigger the watchdog if the log writer task has been ready to run for a
|
||||||
|
// minimum duration and it has not been scheduled during that time.
|
||||||
|
// When the writer is waiting for an SD transfer, it is not in ready state, thus a long dropout
|
||||||
|
// will not trigger it. The longest period in ready state I measured was around 70ms,
|
||||||
|
// after a param change.
|
||||||
|
// Additionally we need to check the main thread as well, because if the main thread gets stalled as well
|
||||||
|
// while the writer is idle (no active write), it would not trigger.
|
||||||
|
// We do that by checking if the scheduling semaphore counter is saturated for a certain duration.
|
||||||
|
// No need to lock the tcb access, since we are in IRQ context
|
||||||
|
|
||||||
|
// update the timestamp if it has been scheduled recently
|
||||||
|
if (log_writer_task.curr_start_time > watchdog_data.ready_to_run_timestamp) {
|
||||||
|
watchdog_data.ready_to_run_timestamp = log_writer_task.curr_start_time;
|
||||||
|
}
|
||||||
|
|
||||||
|
// update the timestamp if not ready to run or if transitioned into ready to run
|
||||||
|
uint8_t current_state = log_writer_task.tcb->task_state;
|
||||||
|
|
||||||
|
if (current_state != TSTATE_TASK_READYTORUN ||
|
||||||
|
(watchdog_data.last_state != TSTATE_TASK_READYTORUN && current_state == TSTATE_TASK_READYTORUN)) {
|
||||||
|
watchdog_data.ready_to_run_timestamp = now;
|
||||||
|
}
|
||||||
|
|
||||||
|
watchdog_data.last_state = current_state;
|
||||||
|
|
||||||
#if 0 // for debugging
|
#if 0 // for debugging
|
||||||
// test code that prints the maximum time in ready state.
|
// test code that prints the maximum time in ready state.
|
||||||
// Note: we are in IRQ context, and thus are strictly speaking not allowed to use PX4_ERR -
|
// Note: we are in IRQ context, and thus are strictly speaking not allowed to use PX4_ERR -
|
||||||
// we do it anyway since it's only used for debugging.
|
// we do it anyway since it's only used for debugging.
|
||||||
static uint64_t max_time = 0;
|
static uint64_t max_time = 0;
|
||||||
|
|
||||||
if (now - watchdog_data.ready_to_run_timestamp > max_time) {
|
if (now - watchdog_data.ready_to_run_timestamp > max_time) {
|
||||||
max_time = now - watchdog_data.ready_to_run_timestamp;
|
max_time = now - watchdog_data.ready_to_run_timestamp;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int counter = 0;
|
static int counter = 0;
|
||||||
|
|
||||||
if (++counter > 300) {
|
if (++counter > 300) {
|
||||||
PX4_ERR("max time in ready: %i ms", (int)max_time / 1000);
|
PX4_ERR("max time in ready: %i ms", (int)max_time / 1000);
|
||||||
counter = 0;
|
counter = 0;
|
||||||
max_time = 0;
|
max_time = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (!semaphore_value_saturated) {
|
if (!semaphore_value_saturated) {
|
||||||
watchdog_data.sem_counter_saturated_start = now;
|
watchdog_data.sem_counter_saturated_start = now;
|
||||||
}
|
|
||||||
|
|
||||||
if (now - watchdog_data.sem_counter_saturated_start > 3_s || now - watchdog_data.ready_to_run_timestamp > 1_s) {
|
|
||||||
// boost the priority to make sure the logger continues to write to the log.
|
|
||||||
// Note that we never restore the priority, to keep the logic simple and because it is
|
|
||||||
// an event that must not occur under normal circumstances (if it does, there's a bug
|
|
||||||
// somewhere)
|
|
||||||
sched_param param{};
|
|
||||||
param.sched_priority = SCHED_PRIORITY_MAX;
|
|
||||||
|
|
||||||
if (system_load.tasks[watchdog_data.logger_main_task_index].valid) {
|
|
||||||
sched_setparam(system_load.tasks[watchdog_data.logger_main_task_index].tcb->pid, ¶m);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
sched_setparam(log_writer_task.tcb->pid, ¶m);
|
if (watchdog_data.manual_watchdog_trigger
|
||||||
|
|| now > watchdog_data.sem_counter_saturated_start + 3_s
|
||||||
|
|| now > watchdog_data.ready_to_run_timestamp + 1_s) {
|
||||||
|
|
||||||
// make sure we won't trigger again
|
sched_param param{};
|
||||||
watchdog_data.logger_main_task_index = -1;
|
|
||||||
return true;
|
// Get the current priorities
|
||||||
|
if (system_load.tasks[watchdog_data.logger_main_task_index].valid) {
|
||||||
|
sched_getparam(system_load.tasks[watchdog_data.logger_main_task_index].tcb->pid, ¶m);
|
||||||
|
watchdog_data.logger_main_priority = param.sched_priority;
|
||||||
|
}
|
||||||
|
|
||||||
|
sched_getparam(log_writer_task.tcb->pid, ¶m);
|
||||||
|
watchdog_data.log_writer_priority = param.sched_priority;
|
||||||
|
|
||||||
|
// Boost the priority to make sure the logger continues to write to the log.
|
||||||
|
param.sched_priority = SCHED_PRIORITY_LOG_WATCHDOG;
|
||||||
|
|
||||||
|
if (system_load.tasks[watchdog_data.logger_main_task_index].valid) {
|
||||||
|
sched_setparam(system_load.tasks[watchdog_data.logger_main_task_index].tcb->pid, ¶m);
|
||||||
|
}
|
||||||
|
|
||||||
|
sched_setparam(log_writer_task.tcb->pid, ¶m);
|
||||||
|
|
||||||
|
watchdog_data.trigger_time = now;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -52,6 +52,10 @@ struct watchdog_data_t {
|
|||||||
hrt_abstime ready_to_run_timestamp = hrt_absolute_time();
|
hrt_abstime ready_to_run_timestamp = hrt_absolute_time();
|
||||||
hrt_abstime sem_counter_saturated_start = hrt_absolute_time();
|
hrt_abstime sem_counter_saturated_start = hrt_absolute_time();
|
||||||
uint8_t last_state = TSTATE_TASK_INVALID;
|
uint8_t last_state = TSTATE_TASK_INVALID;
|
||||||
|
int log_writer_priority = 0;
|
||||||
|
int logger_main_priority = 0;
|
||||||
|
hrt_abstime trigger_time = 0; ///< timestamp when it was triggered
|
||||||
|
bool manual_watchdog_trigger = false;
|
||||||
#endif /* __PX4_NUTTX */
|
#endif /* __PX4_NUTTX */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user