[core] Use ETag in external_files cache to fix re-downloads from raw.githubusercontent.com (#16020)

This commit is contained in:
J. Nick Koston
2026-04-28 20:52:09 -05:00
committed by GitHub
parent d7b21a84a3
commit eec770d622
2 changed files with 379 additions and 42 deletions
+72 -4
View File
@@ -1,14 +1,17 @@
from __future__ import annotations
import contextlib
from datetime import UTC, datetime
import logging
import os
from pathlib import Path
import requests
import esphome.config_validation as cv
from esphome.const import __version__
from esphome.core import CORE, TimePeriodSeconds
from esphome.core import CORE, EsphomeError, TimePeriodSeconds
from esphome.helpers import write_file
_LOGGER = logging.getLogger(__name__)
CODEOWNERS = ["@landonr"]
@@ -16,12 +19,72 @@ CODEOWNERS = ["@landonr"]
NETWORK_TIMEOUT = 30
IF_MODIFIED_SINCE = "If-Modified-Since"
IF_NONE_MATCH = "If-None-Match"
ETAG = "ETag"
CACHE_CONTROL = "Cache-Control"
CACHE_CONTROL_MAX_AGE = "max-age="
CONTENT_DISPOSITION = "content-disposition"
TEMP_DIR = "temp"
def _etag_sidecar_path(local_file_path: Path) -> Path:
return local_file_path.parent / f".{local_file_path.name}.etag"
def _mtime_seconds(path: Path) -> int:
"""Return `path`'s mtime as integer seconds.
Whole seconds is the common-denominator resolution across all
filesystems we run on (FAT/exFAT 2s, NTFS 100ns, APFS/ext4 ns), so
comparisons survive setting+reading round-trips that would lose
sub-second precision on lower-resolution filesystems.
"""
return int(path.stat().st_mtime)
def _read_etag(local_file_path: Path) -> str | None:
"""Return the cached ETag if its sidecar's mtime still matches the cache
file's. A mismatch means the cache file was modified out-of-band, so the
ETag no longer describes its contents -- delete the stale sidecar and
return None.
"""
etag_path = _etag_sidecar_path(local_file_path)
try:
if _mtime_seconds(etag_path) != _mtime_seconds(local_file_path):
_LOGGER.debug(
"ETag sidecar mtime mismatch at %s; treating as stale",
local_file_path,
)
etag_path.unlink()
return None
return etag_path.read_text().strip() or None
except OSError:
return None
def _write_etag(local_file_path: Path, etag: str | None) -> None:
etag_path = _etag_sidecar_path(local_file_path)
if not etag:
# ETag persistence is best-effort; matches `_read_etag`'s tolerance.
with contextlib.suppress(OSError):
etag_path.unlink()
return
try:
write_file(etag_path, etag)
except EsphomeError as e:
_LOGGER.debug("Could not save ETag for %s: %s", local_file_path, e)
return
# Pin the sidecar's mtime to the cache file's mtime. _read_etag relies on
# this match to detect out-of-band edits to the cache file.
try:
file_mtime = _mtime_seconds(local_file_path)
os.utime(etag_path, (file_mtime, file_mtime))
except OSError as e:
_LOGGER.debug(
"Could not sync ETag sidecar mtime for %s: %s", local_file_path, e
)
def has_remote_file_changed(url: str, local_file_path: Path) -> bool:
if local_file_path.exists():
_LOGGER.debug("has_remote_file_changed: File exists at %s", local_file_path)
@@ -35,14 +98,17 @@ def has_remote_file_changed(url: str, local_file_path: Path) -> bool:
IF_MODIFIED_SINCE: local_modification_time_str,
CACHE_CONTROL: CACHE_CONTROL_MAX_AGE + "3600",
}
if etag := _read_etag(local_file_path):
headers[IF_NONE_MATCH] = etag
response = requests.head(
url, headers=headers, timeout=NETWORK_TIMEOUT, allow_redirects=True
)
_LOGGER.debug(
"has_remote_file_changed: File %s, Local modified %s, response code %d",
"has_remote_file_changed: File %s, Local modified %s, ETag %s, response code %d",
local_file_path,
local_modification_time_str,
etag or "<none>",
response.status_code,
)
@@ -51,6 +117,8 @@ def has_remote_file_changed(url: str, local_file_path: Path) -> bool:
"has_remote_file_changed: File not modified since %s",
local_modification_time_str,
)
if (new_etag := response.headers.get(ETAG)) and new_etag != etag:
_write_etag(local_file_path, new_etag)
return False
_LOGGER.debug("has_remote_file_changed: File modified")
return True
@@ -112,7 +180,7 @@ def download_content(url: str, path: Path, timeout: int = NETWORK_TIMEOUT) -> by
return path.read_bytes()
raise cv.Invalid(f"Could not download from {url}: {e}") from e
path.parent.mkdir(parents=True, exist_ok=True)
data = req.content
path.write_bytes(data)
write_file(path, data)
_write_etag(path, req.headers.get(ETAG))
return data
+307 -38
View File
@@ -1,5 +1,6 @@
"""Tests for external_files.py functions."""
import os
from pathlib import Path
import time
from unittest.mock import MagicMock, patch
@@ -9,7 +10,54 @@ import requests
from esphome import external_files
from esphome.config_validation import Invalid
from esphome.core import CORE, TimePeriod
from esphome.core import CORE, EsphomeError, TimePeriod
def _seed_etag(cache_file: Path, etag: str) -> Path:
"""Write an ETag sidecar with its mtime synced to the cache file's mtime,
matching the invariant that `_write_etag` enforces in production.
"""
sidecar = external_files._etag_sidecar_path(cache_file)
sidecar.write_text(etag)
file_mtime = int(cache_file.stat().st_mtime)
os.utime(sidecar, (file_mtime, file_mtime))
return sidecar
@pytest.fixture
def mock_requests_head() -> MagicMock:
"""Patch `external_files.requests.head` so the conditional HEAD-request
validator can be tested without doing real HTTP.
"""
with patch("esphome.external_files.requests.head") as m:
yield m
@pytest.fixture
def mock_requests_get() -> MagicMock:
"""Patch `external_files.requests.get` so the download path can be
tested without doing real HTTP.
"""
with patch("esphome.external_files.requests.get") as m:
yield m
@pytest.fixture
def mock_has_remote_file_changed() -> MagicMock:
"""Patch `external_files.has_remote_file_changed` so download tests can
control the conditional check independently from the GET path.
"""
with patch("esphome.external_files.has_remote_file_changed") as m:
yield m
@pytest.fixture
def mock_write_file() -> MagicMock:
"""Patch `external_files.write_file` so atomic-write failures can be
injected without involving the real filesystem helper.
"""
with patch("esphome.external_files.write_file") as m:
yield m
def test_compute_local_file_dir(setup_core: Path) -> None:
@@ -88,9 +136,8 @@ def test_is_file_recent_with_zero_refresh(setup_core: Path) -> None:
assert result is False
@patch("esphome.external_files.requests.head")
def test_has_remote_file_changed_not_modified(
mock_head: MagicMock, setup_core: Path
mock_requests_head: MagicMock, setup_core: Path
) -> None:
"""Test has_remote_file_changed returns False when file not modified."""
test_file = setup_core / "cached.txt"
@@ -98,23 +145,23 @@ def test_has_remote_file_changed_not_modified(
mock_response = MagicMock()
mock_response.status_code = 304
mock_head.return_value = mock_response
mock_response.headers = {}
mock_requests_head.return_value = mock_response
url = "https://example.com/file.txt"
result = external_files.has_remote_file_changed(url, test_file)
assert result is False
mock_head.assert_called_once()
mock_requests_head.assert_called_once()
call_args = mock_head.call_args
call_args = mock_requests_head.call_args
headers = call_args[1]["headers"]
assert external_files.IF_MODIFIED_SINCE in headers
assert external_files.CACHE_CONTROL in headers
@patch("esphome.external_files.requests.head")
def test_has_remote_file_changed_modified(
mock_head: MagicMock, setup_core: Path
mock_requests_head: MagicMock, setup_core: Path
) -> None:
"""Test has_remote_file_changed returns True when file modified."""
test_file = setup_core / "cached.txt"
@@ -122,7 +169,8 @@ def test_has_remote_file_changed_modified(
mock_response = MagicMock()
mock_response.status_code = 200
mock_head.return_value = mock_response
mock_response.headers = {}
mock_requests_head.return_value = mock_response
url = "https://example.com/file.txt"
result = external_files.has_remote_file_changed(url, test_file)
@@ -140,15 +188,16 @@ def test_has_remote_file_changed_no_local_file(setup_core: Path) -> None:
assert result is True
@patch("esphome.external_files.requests.head")
def test_has_remote_file_changed_network_error(
mock_head: MagicMock, setup_core: Path
mock_requests_head: MagicMock, setup_core: Path
) -> None:
"""Test has_remote_file_changed returns False on network error when file is cached."""
test_file = setup_core / "cached.txt"
test_file.write_text("cached content")
mock_head.side_effect = requests.exceptions.RequestException("Network error")
mock_requests_head.side_effect = requests.exceptions.RequestException(
"Network error"
)
url = "https://example.com/file.txt"
result = external_files.has_remote_file_changed(url, test_file)
@@ -156,9 +205,8 @@ def test_has_remote_file_changed_network_error(
assert result is False
@patch("esphome.external_files.requests.head")
def test_has_remote_file_changed_timeout(
mock_head: MagicMock, setup_core: Path
mock_requests_head: MagicMock, setup_core: Path
) -> None:
"""Test has_remote_file_changed respects timeout."""
test_file = setup_core / "cached.txt"
@@ -166,15 +214,176 @@ def test_has_remote_file_changed_timeout(
mock_response = MagicMock()
mock_response.status_code = 304
mock_head.return_value = mock_response
mock_response.headers = {}
mock_requests_head.return_value = mock_response
url = "https://example.com/file.txt"
external_files.has_remote_file_changed(url, test_file)
call_args = mock_head.call_args
call_args = mock_requests_head.call_args
assert call_args[1]["timeout"] == external_files.NETWORK_TIMEOUT
def test_has_remote_file_changed_uses_etag(
mock_requests_head: MagicMock, setup_core: Path
) -> None:
"""Test has_remote_file_changed sends If-None-Match when ETag is cached."""
test_file = setup_core / "cached.txt"
test_file.write_text("cached content")
_seed_etag(test_file, '"abc123"')
mock_response = MagicMock()
mock_response.status_code = 304
mock_response.headers = {}
mock_requests_head.return_value = mock_response
url = "https://example.com/file.txt"
result = external_files.has_remote_file_changed(url, test_file)
assert result is False
headers = mock_requests_head.call_args[1]["headers"]
assert headers[external_files.IF_NONE_MATCH] == '"abc123"'
def test_has_remote_file_changed_no_etag_no_if_none_match(
mock_requests_head: MagicMock, setup_core: Path
) -> None:
"""Test has_remote_file_changed omits If-None-Match when no ETag is cached."""
test_file = setup_core / "cached.txt"
test_file.write_text("cached content")
mock_response = MagicMock()
mock_response.status_code = 304
mock_response.headers = {}
mock_requests_head.return_value = mock_response
url = "https://example.com/file.txt"
external_files.has_remote_file_changed(url, test_file)
headers = mock_requests_head.call_args[1]["headers"]
assert external_files.IF_NONE_MATCH not in headers
def test_has_remote_file_changed_refreshes_etag_on_304(
mock_requests_head: MagicMock, setup_core: Path
) -> None:
"""Test has_remote_file_changed updates the cached ETag when the 304 sends a new one."""
test_file = setup_core / "cached.txt"
test_file.write_text("cached content")
_seed_etag(test_file, '"old"')
mock_response = MagicMock()
mock_response.status_code = 304
mock_response.headers = {external_files.ETAG: '"new"'}
mock_requests_head.return_value = mock_response
url = "https://example.com/file.txt"
external_files.has_remote_file_changed(url, test_file)
assert external_files._etag_sidecar_path(test_file).read_text() == '"new"'
def test_has_remote_file_changed_ignores_etag_when_mtime_diverges(
mock_requests_head: MagicMock, setup_core: Path
) -> None:
"""If the cache file was edited out-of-band (mtime no longer matches the
sidecar's), the cached ETag must not be used -- it no longer describes the
bytes on disk.
"""
test_file = setup_core / "cached.txt"
test_file.write_text("cached content")
sidecar = _seed_etag(test_file, '"abc123"')
# Simulate an out-of-band edit to the cache file -- mtime advances by a
# full second (so it diverges at whole-second resolution) but the sidecar
# is left untouched, so the recorded ETag is now stale.
file_stat = test_file.stat()
os.utime(test_file, (file_stat.st_atime, file_stat.st_mtime + 1))
mock_response = MagicMock()
mock_response.status_code = 304
mock_response.headers = {}
mock_requests_head.return_value = mock_response
external_files.has_remote_file_changed("https://example.com/file.txt", test_file)
headers = mock_requests_head.call_args[1]["headers"]
assert external_files.IF_NONE_MATCH not in headers
# Stale sidecar should be removed so future calls don't keep paying the
# mtime-comparison cost on a known-bad sidecar.
assert not sidecar.exists()
def test_download_content_pins_etag_mtime_to_file_mtime(
mock_has_remote_file_changed: MagicMock,
mock_requests_get: MagicMock,
setup_core: Path,
) -> None:
"""After a successful download, the sidecar's mtime must equal the cache
file's mtime so `_read_etag` accepts it on the next call.
"""
test_file = setup_core / "fresh.txt"
mock_has_remote_file_changed.return_value = True
mock_response = MagicMock()
mock_response.content = b"fresh content"
mock_response.headers = {external_files.ETAG: '"deadbeef"'}
mock_response.raise_for_status = MagicMock()
mock_requests_get.return_value = mock_response
external_files.download_content("https://example.com/file.txt", test_file)
sidecar = external_files._etag_sidecar_path(test_file)
assert int(sidecar.stat().st_mtime) == int(test_file.stat().st_mtime)
def test_write_etag_swallows_write_file_failure(
mock_write_file: MagicMock, setup_core: Path, caplog: pytest.LogCaptureFixture
) -> None:
"""If `write_file` raises, _write_etag must not propagate -- ETag
persistence is best-effort and a failure here must not abort the
surrounding download.
"""
cache_file = setup_core / "cached.txt"
cache_file.write_text("cached content")
mock_write_file.side_effect = EsphomeError("disk full")
with caplog.at_level("DEBUG", logger="esphome.external_files"):
external_files._write_etag(cache_file, '"abc123"')
assert "Could not save ETag" in caplog.text
# Sidecar wasn't created, since write_file was mocked to fail before
# reaching the os.utime step.
assert not external_files._etag_sidecar_path(cache_file).exists()
def test_write_etag_swallows_utime_failure(
setup_core: Path, caplog: pytest.LogCaptureFixture
) -> None:
"""If `os.utime` raises while pinning the sidecar's mtime, _write_etag
must not propagate. The sidecar is still written; if its mtime later
fails to match the cache file, `_read_etag` will discard it on next
read.
"""
cache_file = setup_core / "cached.txt"
cache_file.write_text("cached content")
with (
patch(
"esphome.external_files.os.utime",
side_effect=PermissionError("nope"),
),
caplog.at_level("DEBUG", logger="esphome.external_files"),
):
external_files._write_etag(cache_file, '"abc123"')
assert "Could not sync ETag sidecar mtime" in caplog.text
# write_file succeeded, so the sidecar exists with the new value even
# though we couldn't pin its mtime.
sidecar = external_files._etag_sidecar_path(cache_file)
assert sidecar.exists()
assert sidecar.read_text() == '"abc123"'
def test_compute_local_file_dir_creates_parent_dirs(setup_core: Path) -> None:
"""Test compute_local_file_dir creates parent directories."""
domain = "level1/level2/level3/level4"
@@ -200,10 +409,10 @@ def test_is_file_recent_handles_float_seconds(setup_core: Path) -> None:
assert result is True
@patch("esphome.external_files.requests.get")
@patch("esphome.external_files.has_remote_file_changed")
def test_download_content_with_network_error_uses_cache(
mock_has_changed: MagicMock, mock_get: MagicMock, setup_core: Path
mock_has_remote_file_changed: MagicMock,
mock_requests_get: MagicMock,
setup_core: Path,
) -> None:
"""Test download_content uses cached file when network fails."""
test_file = setup_core / "cached.txt"
@@ -211,8 +420,10 @@ def test_download_content_with_network_error_uses_cache(
test_file.write_bytes(cached_content)
# Simulate file has changed, so it tries to download
mock_has_changed.return_value = True
mock_get.side_effect = requests.exceptions.RequestException("Network error")
mock_has_remote_file_changed.return_value = True
mock_requests_get.side_effect = requests.exceptions.RequestException(
"Network error"
)
url = "https://example.com/file.txt"
result = external_files.download_content(url, test_file)
@@ -220,17 +431,19 @@ def test_download_content_with_network_error_uses_cache(
assert result == cached_content
@patch("esphome.external_files.requests.get")
@patch("esphome.external_files.has_remote_file_changed")
def test_download_content_with_network_error_no_cache_fails(
mock_has_changed: MagicMock, mock_get: MagicMock, setup_core: Path
mock_has_remote_file_changed: MagicMock,
mock_requests_get: MagicMock,
setup_core: Path,
) -> None:
"""Test download_content raises error when network fails and no cache exists."""
test_file = setup_core / "nonexistent.txt"
# Simulate file has changed (doesn't exist), so it tries to download
mock_has_changed.return_value = True
mock_get.side_effect = requests.exceptions.RequestException("Network error")
mock_has_remote_file_changed.return_value = True
mock_requests_get.side_effect = requests.exceptions.RequestException(
"Network error"
)
url = "https://example.com/file.txt"
@@ -238,11 +451,9 @@ def test_download_content_with_network_error_no_cache_fails(
external_files.download_content(url, test_file)
@patch("esphome.external_files.requests.get")
@patch("esphome.external_files.has_remote_file_changed")
def test_download_content_skip_external_update_uses_cache(
mock_has_changed: MagicMock,
mock_get: MagicMock,
mock_has_remote_file_changed: MagicMock,
mock_requests_get: MagicMock,
setup_core: Path,
) -> None:
"""Test download_content skips network checks when CORE.skip_external_update is set."""
@@ -255,26 +466,25 @@ def test_download_content_skip_external_update_uses_cache(
result = external_files.download_content(url, test_file)
assert result == cached_content
mock_has_changed.assert_not_called()
mock_get.assert_not_called()
mock_has_remote_file_changed.assert_not_called()
mock_requests_get.assert_not_called()
@patch("esphome.external_files.requests.get")
@patch("esphome.external_files.has_remote_file_changed")
def test_download_content_skip_external_update_downloads_when_missing(
mock_has_changed: MagicMock,
mock_get: MagicMock,
mock_has_remote_file_changed: MagicMock,
mock_requests_get: MagicMock,
setup_core: Path,
) -> None:
"""Test download_content still downloads when file is missing, even with skip_external_update."""
test_file = setup_core / "missing.txt"
new_content = b"fresh content"
mock_has_changed.return_value = True
mock_has_remote_file_changed.return_value = True
mock_response = MagicMock()
mock_response.content = new_content
mock_response.headers = {}
mock_response.raise_for_status = MagicMock()
mock_get.return_value = mock_response
mock_requests_get.return_value = mock_response
CORE.skip_external_update = True
url = "https://example.com/file.txt"
@@ -282,3 +492,62 @@ def test_download_content_skip_external_update_downloads_when_missing(
assert result == new_content
assert test_file.read_bytes() == new_content
def test_download_content_saves_etag(
mock_has_remote_file_changed: MagicMock,
mock_requests_get: MagicMock,
setup_core: Path,
) -> None:
"""Test download_content writes the ETag sidecar after a successful download."""
test_file = setup_core / "fresh.txt"
new_content = b"fresh content"
mock_has_remote_file_changed.return_value = True
mock_response = MagicMock()
mock_response.content = new_content
mock_response.headers = {external_files.ETAG: '"deadbeef"'}
mock_response.raise_for_status = MagicMock()
mock_requests_get.return_value = mock_response
url = "https://example.com/file.txt"
external_files.download_content(url, test_file)
assert external_files._etag_sidecar_path(test_file).read_text() == '"deadbeef"'
def test_download_content_atomic_write_no_partial_on_failure(
mock_has_remote_file_changed: MagicMock,
mock_requests_get: MagicMock,
mock_write_file: MagicMock,
setup_core: Path,
) -> None:
"""If `write_file` (the atomic-write helper) fails, the existing cache
file must remain untouched and no temp files may be left behind. Patching
`write_file` directly exercises the atomic-rename path -- a failure inside
`write_file` is the only reason the rename wouldn't have happened.
"""
from esphome.core import EsphomeError
test_file = setup_core / "cached.txt"
original_content = b"original content"
test_file.write_bytes(original_content)
mock_has_remote_file_changed.return_value = True
mock_response = MagicMock()
mock_response.content = b"new content"
mock_response.headers = {}
mock_response.raise_for_status = MagicMock()
mock_requests_get.return_value = mock_response
mock_write_file.side_effect = EsphomeError("disk full")
with pytest.raises(EsphomeError, match="disk full"):
external_files.download_content("https://example.com/file.txt", test_file)
# Original file is untouched -- write_file aborted before its rename step.
assert test_file.read_bytes() == original_content
# write_file is responsible for cleaning its own temp files; nothing leaks
# into the cache directory either way.
leftover_tmps = list(setup_core.glob("tmp*"))
assert leftover_tmps == []