[core] Eliminate __udivdi3 in millis() on ESP32 and RP2040 (#14409)

This commit is contained in:
J. Nick Koston
2026-03-02 11:42:25 -10:00
committed by GitHub
parent d1de50c0e5
commit 3615a7b90c
5 changed files with 149 additions and 4 deletions
+2 -2
View File
@@ -22,8 +22,8 @@ extern "C" __attribute__((weak)) void initArduino() {}
namespace esphome {
void HOT yield() { vPortYield(); }
uint32_t IRAM_ATTR HOT millis() { return (uint32_t) (esp_timer_get_time() / 1000ULL); }
uint64_t HOT millis_64() { return static_cast<uint64_t>(esp_timer_get_time()) / 1000ULL; }
uint32_t IRAM_ATTR HOT millis() { return micros_to_millis(static_cast<uint64_t>(esp_timer_get_time())); }
uint64_t HOT millis_64() { return micros_to_millis<uint64_t>(static_cast<uint64_t>(esp_timer_get_time())); }
void HOT delay(uint32_t ms) { vTaskDelay(ms / portTICK_PERIOD_MS); }
uint32_t IRAM_ATTR HOT micros() { return (uint32_t) esp_timer_get_time(); }
void IRAM_ATTR HOT delayMicroseconds(uint32_t us) { delay_microseconds_safe(us); }
+2 -2
View File
@@ -11,8 +11,8 @@
namespace esphome {
void HOT yield() { ::yield(); }
uint64_t millis_64() { return time_us_64() / 1000ULL; }
uint32_t HOT millis() { return static_cast<uint32_t>(millis_64()); }
uint64_t millis_64() { return micros_to_millis<uint64_t>(time_us_64()); }
uint32_t HOT millis() { return micros_to_millis(time_us_64()); }
void HOT delay(uint32_t ms) { ::delay(ms); }
uint32_t HOT micros() { return ::micros(); }
void HOT delayMicroseconds(uint32_t us) { delay_microseconds_safe(us); }
+38
View File
@@ -599,6 +599,44 @@ template<std::integral T> constexpr uint32_t fnv1a_hash_extend(uint32_t hash, T
constexpr uint32_t fnv1a_hash(const char *str) { return fnv1a_hash_extend(FNV1_OFFSET_BASIS, str); }
inline uint32_t fnv1a_hash(const std::string &str) { return fnv1a_hash(str.c_str()); }
/// Convert a 64-bit microsecond count to milliseconds without calling
/// __udivdi3 (software 64-bit divide, ~1200 ns on Xtensa @ 240 MHz).
///
/// Returns uint32_t by default (for millis()), or uint64_t when requested
/// (for millis_64()). The only difference is whether hi * Q is truncated
/// to 32 bits or widened to 64.
///
/// On 32-bit targets, GCC does not optimize 64-bit constant division into a
/// multiply-by-reciprocal. Since 1000 = 8 * 125, we first right-shift by 3
/// (free divide-by-8), then use the Euclidean division identity to decompose
/// the remaining 64-bit divide-by-125 into a single 32-bit division:
///
/// floor(us / 1000) = floor(floor(us / 8) / 125) [exact for integers]
/// 2^32 = Q * 125 + R (34359738 * 125 + 46)
/// (hi * 2^32 + lo) / 125 = hi * Q + (hi * R + lo) / 125
///
/// GCC optimizes the remaining 32-bit "/ 125U" into a multiply-by-reciprocal
/// (mulhu + shift), so no division instruction is emitted.
///
/// Safe for us up to ~3.2e18 (~101,700 years of microseconds).
///
/// See: https://en.wikipedia.org/wiki/Euclidean_division
/// See: https://ridiculousfish.com/blog/posts/labor-of-division-episode-iii.html
template<typename ReturnT = uint32_t> inline constexpr ESPHOME_ALWAYS_INLINE ReturnT micros_to_millis(uint64_t us) {
constexpr uint32_t d = 125U;
constexpr uint32_t q = static_cast<uint32_t>((1ULL << 32) / d); // 34359738
constexpr uint32_t r = static_cast<uint32_t>((1ULL << 32) % d); // 46
// 1000 = 8 * 125; divide-by-8 is a free shift
uint64_t x = us >> 3;
uint32_t lo = static_cast<uint32_t>(x);
uint32_t hi = static_cast<uint32_t>(x >> 32);
// Combine remainder term: hi * (2^32 % 125) + lo
uint32_t adj = hi * r + lo;
// If adj overflowed, the true value is 2^32 + adj; apply the identity again
// static_cast<ReturnT>(hi) widens to 64-bit when ReturnT=uint64_t, preserving upper bits of hi*q
return static_cast<ReturnT>(hi) * q + (adj < lo ? (adj + r) / d + q : adj / d);
}
/// Return a random 32-bit unsigned integer.
uint32_t random_uint32();
/// Return a random float between 0 and 1.
@@ -0,0 +1,61 @@
esphome:
name: micros-to-millis-test
platformio_options:
build_flags:
- "-DDEBUG"
on_boot:
- lambda: |-
using esphome::micros_to_millis;
const char *TAG = "MTM";
int pass = 0, fail = 0;
auto check = [&](const char *name, uint64_t us) {
uint32_t got = micros_to_millis(us);
uint32_t want = (uint32_t)(us / 1000ULL);
if (got == want) { pass++; }
else { ESP_LOGE(TAG, "%s FAILED: got=%u want=%u", name, got, want); fail++; }
};
// Basic values
check("zero", 0);
check("below_1ms", 999);
check("exactly_1ms", 1000);
check("above_1ms", 1001);
// Shift boundary (1000 = 8 * 125, exercises the >>3 shift)
check("shift_7999", 7999);
check("shift_8000", 8000);
check("shift_8001", 8001);
// 32-bit boundary
check("u32max_minus1", 0xFFFFFFFEULL);
check("u32max", 0xFFFFFFFFULL);
check("u32max_plus1", 0x100000000ULL);
// Realistic uptimes
check("30_days", 2592000000000ULL);
check("1_year", 31536000000000ULL);
// Carry path: construct x = us>>3 with specific hi/lo that trigger adj overflow
{ uint64_t x = (603ULL << 32) | 0xFFFFFFFFU; check("carry_603", x << 3); }
{ uint64_t x = (5000ULL << 32) | 0xFFFFFFFFU; check("carry_5000", x << 3); }
// Carry boundary: exact transition where adj overflows (hi=1000, R=46)
{
uint32_t hi = 1000;
uint32_t thr = 0xFFFFFFFFU - hi * 46U;
uint64_t h = (uint64_t)hi << 32;
check("carry_before", (h | (thr - 1)) << 3);
check("carry_at", (h | thr) << 3);
check("carry_after", (h | (thr + 1)) << 3);
}
// Mod-8 variations (exercises the >>3 truncation)
for (int i = 0; i < 8; i++) { check("mod8", 2592000000000ULL + i); }
if (fail == 0) { ESP_LOGI(TAG, "ALL_PASSED %d tests", pass); }
else { ESP_LOGE(TAG, "%d FAILED out of %d", fail, pass + fail); }
host:
api:
logger:
@@ -0,0 +1,46 @@
"""Integration test for micros_to_millis Euclidean decomposition."""
from __future__ import annotations
import asyncio
import re
import pytest
from .types import APIClientConnectedFactory, RunCompiledFunction
@pytest.mark.asyncio
async def test_micros_to_millis(
yaml_config: str,
run_compiled: RunCompiledFunction,
api_client_connected: APIClientConnectedFactory,
) -> None:
"""Test that micros_to_millis matches reference uint64 division."""
all_passed = asyncio.Event()
failures: list[str] = []
def on_log_line(line: str) -> None:
clean_line = re.sub(r"\x1b\[[0-9;]*m", "", line)
if "ALL_PASSED" in clean_line:
all_passed.set()
elif "FAILED" in clean_line and "[MTM" in clean_line:
failures.append(clean_line)
async with (
run_compiled(yaml_config, line_callback=on_log_line),
api_client_connected() as client,
):
device_info = await client.device_info()
assert device_info is not None
assert device_info.name == "micros-to-millis-test"
try:
await asyncio.wait_for(all_passed.wait(), timeout=2.0)
except TimeoutError:
if failures:
pytest.fail(f"micros_to_millis failures: {failures}")
pytest.fail("micros_to_millis test timed out")
assert not failures, f"micros_to_millis failures: {failures}"