[wifi] Fix roaming counter reset from delayed disconnect and successful retry (#15126)

This commit is contained in:
J. Nick Koston
2026-03-24 14:04:17 -10:00
committed by Jesse Hills
parent cb15e98765
commit f5f99071fb
2 changed files with 58 additions and 14 deletions
+52 -14
View File
@@ -287,18 +287,25 @@ bool CompactString::operator==(const StringRef &other) const {
/// │ │ (counter reset to 0) │ │ (retry_connect called) │ /// │ │ (counter reset to 0) │ │ (retry_connect called) │
/// │ └──────────────────────────────────┘ └───────────┬─────────────┘ /// │ └──────────────────────────────────┘ └───────────┬─────────────┘
/// │ │ │ /// │ │ │
/// │ /// │ ┌─────────┴─────────┐
/// │ ┌───────────────────────┐ /// │ ↓ ↓
/// │ │ → IDLE │ /// │ on target BSSID on other AP
/// │ │ (counter preserved!) │ /// │
/// │ └───────────────────────┘ /// │ ↓ ↓
/// │ ┌──────────────────┐ ┌────────────┐│
/// │ │ → IDLE │ │ → IDLE ││
/// │ │ (counter reset) │ │ (counter ││
/// │ │ (roam worked!) │ │ preserved)││
/// │ └──────────────────┘ └────────────┘│
/// │ │ /// │ │
/// │ Key behaviors: │ /// │ Key behaviors: │
/// │ - After 3 checks: attempts >= 3, stop checking │ /// │ - After 3 checks: attempts >= 3, stop checking │
/// │ - Non-roaming disconnect: clear_roaming_state_() resets counter │ /// │ - Non-roaming disconnect: clear_roaming_state_() resets counter │
/// │ - Disconnect during scan (SCANNING→RECONNECTING): counter preserved /// │ - Disconnect during scan (SCANNING→RECONNECTING): counter preserved │
/// │ - Disconnect after scan (within grace period): counter preserved │
/// │ - Roaming success (CONNECTING→IDLE): counter reset (can roam again) │ /// │ - Roaming success (CONNECTING→IDLE): counter reset (can roam again) │
/// │ - Roaming fail (RECONNECTING→IDLE): counter preserved (ping-pong) /// │ - Roaming success via retry (on target BSSID): counter reset
/// │ - Roaming fail (RECONNECTING on other AP): counter preserved │
/// └──────────────────────────────────────────────────────────────────────┘ /// └──────────────────────────────────────────────────────────────────────┘
// Use if-chain instead of switch to avoid jump table in RODATA (wastes RAM on ESP8266) // Use if-chain instead of switch to avoid jump table in RODATA (wastes RAM on ESP8266)
@@ -1583,17 +1590,33 @@ void WiFiComponent::check_connecting_finished(uint32_t now) {
// Only preserve attempts if reconnecting after a failed roam attempt // Only preserve attempts if reconnecting after a failed roam attempt
// This prevents ping-pong between APs when a roam target is unreachable // This prevents ping-pong between APs when a roam target is unreachable
if (this->roaming_state_ == RoamingState::CONNECTING) { if (this->roaming_state_ == RoamingState::CONNECTING) {
// Successful roam to better AP - reset attempts so we can roam again later // Successful roam to better AP on first try - reset attempts so we can roam again later
ESP_LOGD(TAG, "Roam successful"); ESP_LOGD(TAG, "Roam successful");
this->roaming_attempts_ = 0; this->roaming_attempts_ = 0;
} else if (this->roaming_state_ == RoamingState::RECONNECTING) { } else if (this->roaming_state_ == RoamingState::RECONNECTING) {
// Failed roam, reconnected via normal recovery - keep attempts to prevent ping-pong // Check if we ended up on the roam target despite needing a retry
ESP_LOGD(TAG, "Reconnected after failed roam (attempt %u/%u)", this->roaming_attempts_, ROAMING_MAX_ATTEMPTS); // (e.g., first connect failed but scan-based retry found and connected to the same better AP)
bssid_t current_bssid = this->wifi_bssid();
if (this->roaming_target_bssid_ != bssid_t{} && current_bssid == this->roaming_target_bssid_) {
char bssid_buf[MAC_ADDRESS_PRETTY_BUFFER_SIZE];
format_mac_addr_upper(current_bssid.data(), bssid_buf);
ESP_LOGD(TAG, "Roam successful (via retry, attempt %u/%u) to %s", this->roaming_attempts_, ROAMING_MAX_ATTEMPTS,
bssid_buf);
this->roaming_attempts_ = 0;
} else if (this->roaming_target_bssid_ != bssid_t{}) {
// Failed roam to specific target, reconnected to different AP - keep attempts to prevent ping-pong
ESP_LOGD(TAG, "Reconnected after failed roam (attempt %u/%u)", this->roaming_attempts_, ROAMING_MAX_ATTEMPTS);
} else {
// Reconnected after scan-induced disconnect (no roam target) - keep attempts
ESP_LOGD(TAG, "Reconnected after roam scan (attempt %u/%u)", this->roaming_attempts_, ROAMING_MAX_ATTEMPTS);
}
} else { } else {
// Normal connection (boot, credentials changed, etc.) // Normal connection (boot, credentials changed, etc.)
this->roaming_attempts_ = 0; this->roaming_attempts_ = 0;
} }
this->roaming_state_ = RoamingState::IDLE; this->roaming_state_ = RoamingState::IDLE;
this->roaming_target_bssid_ = {};
this->roaming_scan_end_ = 0;
// Clear all priority penalties - the next reconnect will happen when an AP disconnects, // Clear all priority penalties - the next reconnect will happen when an AP disconnects,
// which means the landscape has likely changed and previous tracked failures are stale // which means the landscape has likely changed and previous tracked failures are stale
@@ -2080,8 +2103,16 @@ void WiFiComponent::retry_connect() {
ESP_LOGD(TAG, "Disconnected during roam scan (attempt %u/%u)", this->roaming_attempts_, ROAMING_MAX_ATTEMPTS); ESP_LOGD(TAG, "Disconnected during roam scan (attempt %u/%u)", this->roaming_attempts_, ROAMING_MAX_ATTEMPTS);
this->roaming_state_ = RoamingState::RECONNECTING; this->roaming_state_ = RoamingState::RECONNECTING;
} else if (this->roaming_state_ == RoamingState::IDLE) { } else if (this->roaming_state_ == RoamingState::IDLE) {
// Not a roaming-triggered reconnect, reset state // Check if a roaming scan recently completed - on ESP8266, going off-channel
this->clear_roaming_state_(); // during scan can cause a delayed Beacon Timeout 8-20 seconds after scan finishes.
// Transition to RECONNECTING so the attempts counter is preserved on reconnect.
if (this->roaming_scan_end_ != 0 && millis() - this->roaming_scan_end_ < ROAMING_SCAN_GRACE_PERIOD) {
ESP_LOGD(TAG, "Disconnect after roam scan (attempt %u/%u)", this->roaming_attempts_, ROAMING_MAX_ATTEMPTS);
this->roaming_state_ = RoamingState::RECONNECTING;
} else {
// Not a roaming-triggered reconnect, reset state
this->clear_roaming_state_();
}
} }
// RECONNECTING: keep state and counter, still trying to reconnect // RECONNECTING: keep state and counter, still trying to reconnect
@@ -2316,6 +2347,8 @@ bool WiFiScanResult::operator==(const WiFiScanResult &rhs) const { return this->
void WiFiComponent::clear_roaming_state_() { void WiFiComponent::clear_roaming_state_() {
this->roaming_attempts_ = 0; this->roaming_attempts_ = 0;
this->roaming_last_check_ = 0; this->roaming_last_check_ = 0;
this->roaming_scan_end_ = 0;
this->roaming_target_bssid_ = {};
this->roaming_state_ = RoamingState::IDLE; this->roaming_state_ = RoamingState::IDLE;
} }
@@ -2383,7 +2416,7 @@ void WiFiComponent::check_roaming_(uint32_t now) {
// Guard: skip scan if signal is already good (no meaningful improvement possible) // Guard: skip scan if signal is already good (no meaningful improvement possible)
int8_t rssi = this->wifi_rssi(); int8_t rssi = this->wifi_rssi();
if (rssi > ROAMING_GOOD_RSSI) { if (rssi > ROAMING_GOOD_RSSI) {
ESP_LOGV(TAG, "Roam check skipped, signal good (%d dBm, attempt %u/%u)", rssi, this->roaming_attempts_, ESP_LOGD(TAG, "Roam check skipped, signal good (%d dBm, attempt %u/%u)", rssi, this->roaming_attempts_,
ROAMING_MAX_ATTEMPTS); ROAMING_MAX_ATTEMPTS);
return; return;
} }
@@ -2397,6 +2430,9 @@ void WiFiComponent::process_roaming_scan_() {
this->scan_done_ = false; this->scan_done_ = false;
// Default to IDLE - will be set to CONNECTING if we find a better AP // Default to IDLE - will be set to CONNECTING if we find a better AP
this->roaming_state_ = RoamingState::IDLE; this->roaming_state_ = RoamingState::IDLE;
// Record when scan completed so delayed disconnects (e.g., ESP8266 Beacon Timeout)
// can be attributed to the scan and avoid resetting the attempts counter
this->roaming_scan_end_ = millis();
// Get current connection info // Get current connection info
int8_t current_rssi = this->wifi_rssi(); int8_t current_rssi = this->wifi_rssi();
@@ -2445,10 +2481,12 @@ void WiFiComponent::process_roaming_scan_() {
WiFiAP roam_params = *selected; WiFiAP roam_params = *selected;
apply_scan_result_to_params(roam_params, *best); apply_scan_result_to_params(roam_params, *best);
this->release_scan_results_();
// Mark as roaming attempt - affects retry behavior if connection fails // Mark as roaming attempt - affects retry behavior if connection fails
this->roaming_state_ = RoamingState::CONNECTING; this->roaming_state_ = RoamingState::CONNECTING;
this->roaming_target_bssid_ = best->get_bssid(); // Must read before releasing scan results
this->release_scan_results_();
// Connect directly - wifi_sta_connect_ handles disconnect internally // Connect directly - wifi_sta_connect_ handles disconnect internally
this->start_connecting(roam_params); this->start_connecting(roam_params);
+6
View File
@@ -779,6 +779,10 @@ class WiFiComponent : public Component {
static constexpr int8_t ROAMING_MIN_IMPROVEMENT = 10; // dB static constexpr int8_t ROAMING_MIN_IMPROVEMENT = 10; // dB
static constexpr int8_t ROAMING_GOOD_RSSI = -49; // Skip scan if signal is excellent static constexpr int8_t ROAMING_GOOD_RSSI = -49; // Skip scan if signal is excellent
static constexpr uint8_t ROAMING_MAX_ATTEMPTS = 3; static constexpr uint8_t ROAMING_MAX_ATTEMPTS = 3;
// Grace period after roaming scan completes. If WiFi disconnects within this
// window (e.g., ESP8266 Beacon Timeout caused by going off-channel during scan),
// the disconnect is treated as roaming-related and the attempts counter is preserved.
static constexpr uint32_t ROAMING_SCAN_GRACE_PERIOD = 30 * 1000; // 30 seconds
// 4-byte members // 4-byte members
float output_power_{NAN}; float output_power_{NAN};
@@ -786,6 +790,7 @@ class WiFiComponent : public Component {
uint32_t last_connected_{0}; uint32_t last_connected_{0};
uint32_t reboot_timeout_{}; uint32_t reboot_timeout_{};
uint32_t roaming_last_check_{0}; uint32_t roaming_last_check_{0};
uint32_t roaming_scan_end_{0}; // Timestamp when last roaming scan completed
#ifdef USE_WIFI_AP #ifdef USE_WIFI_AP
uint32_t ap_timeout_{}; uint32_t ap_timeout_{};
#endif #endif
@@ -810,6 +815,7 @@ class WiFiComponent : public Component {
bool error_from_callback_{false}; bool error_from_callback_{false};
RetryHiddenMode retry_hidden_mode_{RetryHiddenMode::BLIND_RETRY}; RetryHiddenMode retry_hidden_mode_{RetryHiddenMode::BLIND_RETRY};
RoamingState roaming_state_{RoamingState::IDLE}; RoamingState roaming_state_{RoamingState::IDLE};
bssid_t roaming_target_bssid_{}; // BSSID of the AP we're trying to roam to
#if defined(USE_ESP32) && defined(USE_WIFI_RUNTIME_POWER_SAVE) #if defined(USE_ESP32) && defined(USE_WIFI_RUNTIME_POWER_SAVE)
WiFiPowerSaveMode configured_power_save_{WIFI_POWER_SAVE_NONE}; WiFiPowerSaveMode configured_power_save_{WIFI_POWER_SAVE_NONE};
#endif #endif