Add check for high RAM usage

We had a case where someone took off with an experimental
system with 100% RAM usage on the embedded system
without noticing. This lead to problems during flight.

Since we already have a CPU load check it seems natural
to also check the reported RAM usage.
This commit is contained in:
Matthias Grob
2024-06-26 18:44:14 +02:00
parent 30b854da35
commit e4446adba1
4 changed files with 48 additions and 7 deletions
+1
View File
@@ -157,6 +157,7 @@ param set-default CBRK_SUPPLY_CHK 894281
# disable check, no CPU load reported on posix yet # disable check, no CPU load reported on posix yet
param set-default COM_CPU_MAX -1 param set-default COM_CPU_MAX -1
param set-default COM_RAM_MAX -1
# Don't require RC calibration and configuration # Don't require RC calibration and configuration
param set-default COM_RC_IN_MODE 1 param set-default COM_RC_IN_MODE 1
@@ -43,7 +43,10 @@ CpuResourceChecks::CpuResourceChecks()
void CpuResourceChecks::checkAndReport(const Context &context, Report &reporter) void CpuResourceChecks::checkAndReport(const Context &context, Report &reporter)
{ {
if (_param_com_cpu_max.get() < FLT_EPSILON) { const bool cpu_load_check_enabled = _param_com_cpu_max.get() > FLT_EPSILON;
const bool ram_usage_check_enabled = _param_com_ram_max.get() > FLT_EPSILON;
if (!cpu_load_check_enabled && !ram_usage_check_enabled) {
return; return;
} }
@@ -54,15 +57,15 @@ void CpuResourceChecks::checkAndReport(const Context &context, Report &reporter)
/* EVENT /* EVENT
* @description * @description
* <profile name="dev"> * <profile name="dev">
* If the system does not provide any CPU load information, use the parameter <param>COM_CPU_MAX</param> * If the system does not provide any CPU and RAM load information, use the parameters <param>COM_CPU_MAX</param>
* to disable the check. * and <param>COM_RAM_MAX</param> to disable the checks.
* </profile> * </profile>
*/ */
reporter.healthFailure(NavModes::All, health_component_t::system, events::ID("check_missing_cpuload"), reporter.healthFailure(NavModes::All, health_component_t::system, events::ID("check_missing_cpuload"),
events::Log::Error, "No CPU load information"); events::Log::Error, "No CPU and RAM load information");
if (reporter.mavlink_log_pub()) { if (reporter.mavlink_log_pub()) {
mavlink_log_critical(reporter.mavlink_log_pub(), "Preflight Fail: No CPU load information"); mavlink_log_critical(reporter.mavlink_log_pub(), "Preflight Fail: No CPU and RAM load information");
} }
} else { } else {
@@ -71,7 +74,7 @@ void CpuResourceChecks::checkAndReport(const Context &context, Report &reporter)
_high_cpu_load_hysteresis.set_state_and_update(high_cpu_load, hrt_absolute_time()); _high_cpu_load_hysteresis.set_state_and_update(high_cpu_load, hrt_absolute_time());
// fail check if CPU load is above the threshold for 2 seconds // fail check if CPU load is above the threshold for 2 seconds
if (_high_cpu_load_hysteresis.get_state()) { if (cpu_load_check_enabled && _high_cpu_load_hysteresis.get_state()) {
/* EVENT /* EVENT
* @description * @description
* The CPU load can be reduced for example by disabling unused modules (e.g. mavlink instances) or reducing the gyro update * The CPU load can be reduced for example by disabling unused modules (e.g. mavlink instances) or reducing the gyro update
@@ -88,5 +91,26 @@ void CpuResourceChecks::checkAndReport(const Context &context, Report &reporter)
mavlink_log_critical(reporter.mavlink_log_pub(), "Preflight Fail: CPU load too high: %3.1f%%", (double)cpuload_percent); mavlink_log_critical(reporter.mavlink_log_pub(), "Preflight Fail: CPU load too high: %3.1f%%", (double)cpuload_percent);
} }
} }
const float ram_usage_percent = cpuload.ram_usage * 100.f;
const bool high_ram_usage = ram_usage_percent > _param_com_ram_max.get();
if (ram_usage_check_enabled && high_ram_usage) {
/* EVENT
* @description
* The RAM usage can be reduced for example by disabling unused modules (e.g. mavlink instances).
*
* <profile name="dev">
* The threshold can be adjusted via <param>COM_RAM_MAX</param> parameter.
* </profile>
*/
reporter.healthFailure<float>(NavModes::All, health_component_t::system, events::ID("check_ram_usage_too_high"),
events::Log::Error, "RAM usage too high: {1:.1}%", ram_usage_percent);
if (reporter.mavlink_log_pub()) {
mavlink_log_critical(reporter.mavlink_log_pub(), "Preflight Fail: RAM usage too high: %3.1f%%",
(double)ram_usage_percent);
}
}
} }
} }
@@ -54,6 +54,7 @@ private:
systemlib::Hysteresis _high_cpu_load_hysteresis{false}; systemlib::Hysteresis _high_cpu_load_hysteresis{false};
DEFINE_PARAMETERS_CUSTOM_PARENT(HealthAndArmingCheckBase, DEFINE_PARAMETERS_CUSTOM_PARENT(HealthAndArmingCheckBase,
(ParamFloat<px4::params::COM_CPU_MAX>) _param_com_cpu_max (ParamFloat<px4::params::COM_CPU_MAX>) _param_com_cpu_max,
(ParamFloat<px4::params::COM_RAM_MAX>) _param_com_ram_max
) )
}; };
+15
View File
@@ -802,6 +802,21 @@ PARAM_DEFINE_FLOAT(COM_KILL_DISARM, 5.0f);
*/ */
PARAM_DEFINE_FLOAT(COM_CPU_MAX, 95.0f); PARAM_DEFINE_FLOAT(COM_CPU_MAX, 95.0f);
/**
* Maximum allowed RAM usage to pass checks
*
* The check fails if the RAM usage is above this threshold.
*
* A negative value disables the check.
*
* @group Commander
* @unit %
* @min -1
* @max 100
* @increment 1
*/
PARAM_DEFINE_FLOAT(COM_RAM_MAX, 95.0f);
/** /**
* Required number of redundant power modules * Required number of redundant power modules
* *