[wifi] Fix roaming counter reset from delayed disconnect and successful retry (#15126)

This commit is contained in:
J. Nick Koston
2026-03-24 14:04:17 -10:00
committed by Jesse Hills
parent cb15e98765
commit f5f99071fb
2 changed files with 58 additions and 14 deletions
+52 -14
View File
@@ -287,18 +287,25 @@ bool CompactString::operator==(const StringRef &other) const {
/// │ │ (counter reset to 0) │ │ (retry_connect called) │
/// │ └──────────────────────────────────┘ └───────────┬─────────────┘
/// │ │ │
/// │
/// │ ┌───────────────────────┐
/// │ │ → IDLE │
/// │ │ (counter preserved!) │
/// │ └───────────────────────┘
/// │ ┌─────────┴─────────┐
/// │ ↓ ↓
/// │ on target BSSID on other AP
/// │
/// │ ↓ ↓
/// │ ┌──────────────────┐ ┌────────────┐│
/// │ │ → IDLE │ │ → IDLE ││
/// │ │ (counter reset) │ │ (counter ││
/// │ │ (roam worked!) │ │ preserved)││
/// │ └──────────────────┘ └────────────┘│
/// │ │
/// │ Key behaviors: │
/// │ - After 3 checks: attempts >= 3, stop checking │
/// │ - Non-roaming disconnect: clear_roaming_state_() resets counter │
/// │ - Disconnect during scan (SCANNING→RECONNECTING): counter preserved
/// │ - Disconnect during scan (SCANNING→RECONNECTING): counter preserved │
/// │ - Disconnect after scan (within grace period): counter preserved │
/// │ - Roaming success (CONNECTING→IDLE): counter reset (can roam again) │
/// │ - Roaming fail (RECONNECTING→IDLE): counter preserved (ping-pong)
/// │ - Roaming success via retry (on target BSSID): counter reset
/// │ - Roaming fail (RECONNECTING on other AP): counter preserved │
/// └──────────────────────────────────────────────────────────────────────┘
// Use if-chain instead of switch to avoid jump table in RODATA (wastes RAM on ESP8266)
@@ -1583,17 +1590,33 @@ void WiFiComponent::check_connecting_finished(uint32_t now) {
// Only preserve attempts if reconnecting after a failed roam attempt
// This prevents ping-pong between APs when a roam target is unreachable
if (this->roaming_state_ == RoamingState::CONNECTING) {
// Successful roam to better AP - reset attempts so we can roam again later
// Successful roam to better AP on first try - reset attempts so we can roam again later
ESP_LOGD(TAG, "Roam successful");
this->roaming_attempts_ = 0;
} else if (this->roaming_state_ == RoamingState::RECONNECTING) {
// Failed roam, reconnected via normal recovery - keep attempts to prevent ping-pong
ESP_LOGD(TAG, "Reconnected after failed roam (attempt %u/%u)", this->roaming_attempts_, ROAMING_MAX_ATTEMPTS);
// Check if we ended up on the roam target despite needing a retry
// (e.g., first connect failed but scan-based retry found and connected to the same better AP)
bssid_t current_bssid = this->wifi_bssid();
if (this->roaming_target_bssid_ != bssid_t{} && current_bssid == this->roaming_target_bssid_) {
char bssid_buf[MAC_ADDRESS_PRETTY_BUFFER_SIZE];
format_mac_addr_upper(current_bssid.data(), bssid_buf);
ESP_LOGD(TAG, "Roam successful (via retry, attempt %u/%u) to %s", this->roaming_attempts_, ROAMING_MAX_ATTEMPTS,
bssid_buf);
this->roaming_attempts_ = 0;
} else if (this->roaming_target_bssid_ != bssid_t{}) {
// Failed roam to specific target, reconnected to different AP - keep attempts to prevent ping-pong
ESP_LOGD(TAG, "Reconnected after failed roam (attempt %u/%u)", this->roaming_attempts_, ROAMING_MAX_ATTEMPTS);
} else {
// Reconnected after scan-induced disconnect (no roam target) - keep attempts
ESP_LOGD(TAG, "Reconnected after roam scan (attempt %u/%u)", this->roaming_attempts_, ROAMING_MAX_ATTEMPTS);
}
} else {
// Normal connection (boot, credentials changed, etc.)
this->roaming_attempts_ = 0;
}
this->roaming_state_ = RoamingState::IDLE;
this->roaming_target_bssid_ = {};
this->roaming_scan_end_ = 0;
// Clear all priority penalties - the next reconnect will happen when an AP disconnects,
// which means the landscape has likely changed and previous tracked failures are stale
@@ -2080,8 +2103,16 @@ void WiFiComponent::retry_connect() {
ESP_LOGD(TAG, "Disconnected during roam scan (attempt %u/%u)", this->roaming_attempts_, ROAMING_MAX_ATTEMPTS);
this->roaming_state_ = RoamingState::RECONNECTING;
} else if (this->roaming_state_ == RoamingState::IDLE) {
// Not a roaming-triggered reconnect, reset state
this->clear_roaming_state_();
// Check if a roaming scan recently completed - on ESP8266, going off-channel
// during scan can cause a delayed Beacon Timeout 8-20 seconds after scan finishes.
// Transition to RECONNECTING so the attempts counter is preserved on reconnect.
if (this->roaming_scan_end_ != 0 && millis() - this->roaming_scan_end_ < ROAMING_SCAN_GRACE_PERIOD) {
ESP_LOGD(TAG, "Disconnect after roam scan (attempt %u/%u)", this->roaming_attempts_, ROAMING_MAX_ATTEMPTS);
this->roaming_state_ = RoamingState::RECONNECTING;
} else {
// Not a roaming-triggered reconnect, reset state
this->clear_roaming_state_();
}
}
// RECONNECTING: keep state and counter, still trying to reconnect
@@ -2316,6 +2347,8 @@ bool WiFiScanResult::operator==(const WiFiScanResult &rhs) const { return this->
void WiFiComponent::clear_roaming_state_() {
this->roaming_attempts_ = 0;
this->roaming_last_check_ = 0;
this->roaming_scan_end_ = 0;
this->roaming_target_bssid_ = {};
this->roaming_state_ = RoamingState::IDLE;
}
@@ -2383,7 +2416,7 @@ void WiFiComponent::check_roaming_(uint32_t now) {
// Guard: skip scan if signal is already good (no meaningful improvement possible)
int8_t rssi = this->wifi_rssi();
if (rssi > ROAMING_GOOD_RSSI) {
ESP_LOGV(TAG, "Roam check skipped, signal good (%d dBm, attempt %u/%u)", rssi, this->roaming_attempts_,
ESP_LOGD(TAG, "Roam check skipped, signal good (%d dBm, attempt %u/%u)", rssi, this->roaming_attempts_,
ROAMING_MAX_ATTEMPTS);
return;
}
@@ -2397,6 +2430,9 @@ void WiFiComponent::process_roaming_scan_() {
this->scan_done_ = false;
// Default to IDLE - will be set to CONNECTING if we find a better AP
this->roaming_state_ = RoamingState::IDLE;
// Record when scan completed so delayed disconnects (e.g., ESP8266 Beacon Timeout)
// can be attributed to the scan and avoid resetting the attempts counter
this->roaming_scan_end_ = millis();
// Get current connection info
int8_t current_rssi = this->wifi_rssi();
@@ -2445,10 +2481,12 @@ void WiFiComponent::process_roaming_scan_() {
WiFiAP roam_params = *selected;
apply_scan_result_to_params(roam_params, *best);
this->release_scan_results_();
// Mark as roaming attempt - affects retry behavior if connection fails
this->roaming_state_ = RoamingState::CONNECTING;
this->roaming_target_bssid_ = best->get_bssid(); // Must read before releasing scan results
this->release_scan_results_();
// Connect directly - wifi_sta_connect_ handles disconnect internally
this->start_connecting(roam_params);
+6
View File
@@ -779,6 +779,10 @@ class WiFiComponent : public Component {
static constexpr int8_t ROAMING_MIN_IMPROVEMENT = 10; // dB
static constexpr int8_t ROAMING_GOOD_RSSI = -49; // Skip scan if signal is excellent
static constexpr uint8_t ROAMING_MAX_ATTEMPTS = 3;
// Grace period after roaming scan completes. If WiFi disconnects within this
// window (e.g., ESP8266 Beacon Timeout caused by going off-channel during scan),
// the disconnect is treated as roaming-related and the attempts counter is preserved.
static constexpr uint32_t ROAMING_SCAN_GRACE_PERIOD = 30 * 1000; // 30 seconds
// 4-byte members
float output_power_{NAN};
@@ -786,6 +790,7 @@ class WiFiComponent : public Component {
uint32_t last_connected_{0};
uint32_t reboot_timeout_{};
uint32_t roaming_last_check_{0};
uint32_t roaming_scan_end_{0}; // Timestamp when last roaming scan completed
#ifdef USE_WIFI_AP
uint32_t ap_timeout_{};
#endif
@@ -810,6 +815,7 @@ class WiFiComponent : public Component {
bool error_from_callback_{false};
RetryHiddenMode retry_hidden_mode_{RetryHiddenMode::BLIND_RETRY};
RoamingState roaming_state_{RoamingState::IDLE};
bssid_t roaming_target_bssid_{}; // BSSID of the AP we're trying to roam to
#if defined(USE_ESP32) && defined(USE_WIFI_RUNTIME_POWER_SAVE)
WiFiPowerSaveMode configured_power_save_{WIFI_POWER_SAVE_NONE};
#endif