[core] Use ETag in external_files cache to fix re-downloads from raw.githubusercontent.com (#16020)

This commit is contained in:
J. Nick Koston
2026-04-28 20:52:09 -05:00
committed by GitHub
parent d7b21a84a3
commit eec770d622
2 changed files with 379 additions and 42 deletions
+307 -38
View File
@@ -1,5 +1,6 @@
"""Tests for external_files.py functions."""
import os
from pathlib import Path
import time
from unittest.mock import MagicMock, patch
@@ -9,7 +10,54 @@ import requests
from esphome import external_files
from esphome.config_validation import Invalid
from esphome.core import CORE, TimePeriod
from esphome.core import CORE, EsphomeError, TimePeriod
def _seed_etag(cache_file: Path, etag: str) -> Path:
"""Write an ETag sidecar with its mtime synced to the cache file's mtime,
matching the invariant that `_write_etag` enforces in production.
"""
sidecar = external_files._etag_sidecar_path(cache_file)
sidecar.write_text(etag)
file_mtime = int(cache_file.stat().st_mtime)
os.utime(sidecar, (file_mtime, file_mtime))
return sidecar
@pytest.fixture
def mock_requests_head() -> MagicMock:
"""Patch `external_files.requests.head` so the conditional HEAD-request
validator can be tested without doing real HTTP.
"""
with patch("esphome.external_files.requests.head") as m:
yield m
@pytest.fixture
def mock_requests_get() -> MagicMock:
"""Patch `external_files.requests.get` so the download path can be
tested without doing real HTTP.
"""
with patch("esphome.external_files.requests.get") as m:
yield m
@pytest.fixture
def mock_has_remote_file_changed() -> MagicMock:
"""Patch `external_files.has_remote_file_changed` so download tests can
control the conditional check independently from the GET path.
"""
with patch("esphome.external_files.has_remote_file_changed") as m:
yield m
@pytest.fixture
def mock_write_file() -> MagicMock:
"""Patch `external_files.write_file` so atomic-write failures can be
injected without involving the real filesystem helper.
"""
with patch("esphome.external_files.write_file") as m:
yield m
def test_compute_local_file_dir(setup_core: Path) -> None:
@@ -88,9 +136,8 @@ def test_is_file_recent_with_zero_refresh(setup_core: Path) -> None:
assert result is False
@patch("esphome.external_files.requests.head")
def test_has_remote_file_changed_not_modified(
mock_head: MagicMock, setup_core: Path
mock_requests_head: MagicMock, setup_core: Path
) -> None:
"""Test has_remote_file_changed returns False when file not modified."""
test_file = setup_core / "cached.txt"
@@ -98,23 +145,23 @@ def test_has_remote_file_changed_not_modified(
mock_response = MagicMock()
mock_response.status_code = 304
mock_head.return_value = mock_response
mock_response.headers = {}
mock_requests_head.return_value = mock_response
url = "https://example.com/file.txt"
result = external_files.has_remote_file_changed(url, test_file)
assert result is False
mock_head.assert_called_once()
mock_requests_head.assert_called_once()
call_args = mock_head.call_args
call_args = mock_requests_head.call_args
headers = call_args[1]["headers"]
assert external_files.IF_MODIFIED_SINCE in headers
assert external_files.CACHE_CONTROL in headers
@patch("esphome.external_files.requests.head")
def test_has_remote_file_changed_modified(
mock_head: MagicMock, setup_core: Path
mock_requests_head: MagicMock, setup_core: Path
) -> None:
"""Test has_remote_file_changed returns True when file modified."""
test_file = setup_core / "cached.txt"
@@ -122,7 +169,8 @@ def test_has_remote_file_changed_modified(
mock_response = MagicMock()
mock_response.status_code = 200
mock_head.return_value = mock_response
mock_response.headers = {}
mock_requests_head.return_value = mock_response
url = "https://example.com/file.txt"
result = external_files.has_remote_file_changed(url, test_file)
@@ -140,15 +188,16 @@ def test_has_remote_file_changed_no_local_file(setup_core: Path) -> None:
assert result is True
@patch("esphome.external_files.requests.head")
def test_has_remote_file_changed_network_error(
mock_head: MagicMock, setup_core: Path
mock_requests_head: MagicMock, setup_core: Path
) -> None:
"""Test has_remote_file_changed returns False on network error when file is cached."""
test_file = setup_core / "cached.txt"
test_file.write_text("cached content")
mock_head.side_effect = requests.exceptions.RequestException("Network error")
mock_requests_head.side_effect = requests.exceptions.RequestException(
"Network error"
)
url = "https://example.com/file.txt"
result = external_files.has_remote_file_changed(url, test_file)
@@ -156,9 +205,8 @@ def test_has_remote_file_changed_network_error(
assert result is False
@patch("esphome.external_files.requests.head")
def test_has_remote_file_changed_timeout(
mock_head: MagicMock, setup_core: Path
mock_requests_head: MagicMock, setup_core: Path
) -> None:
"""Test has_remote_file_changed respects timeout."""
test_file = setup_core / "cached.txt"
@@ -166,15 +214,176 @@ def test_has_remote_file_changed_timeout(
mock_response = MagicMock()
mock_response.status_code = 304
mock_head.return_value = mock_response
mock_response.headers = {}
mock_requests_head.return_value = mock_response
url = "https://example.com/file.txt"
external_files.has_remote_file_changed(url, test_file)
call_args = mock_head.call_args
call_args = mock_requests_head.call_args
assert call_args[1]["timeout"] == external_files.NETWORK_TIMEOUT
def test_has_remote_file_changed_uses_etag(
mock_requests_head: MagicMock, setup_core: Path
) -> None:
"""Test has_remote_file_changed sends If-None-Match when ETag is cached."""
test_file = setup_core / "cached.txt"
test_file.write_text("cached content")
_seed_etag(test_file, '"abc123"')
mock_response = MagicMock()
mock_response.status_code = 304
mock_response.headers = {}
mock_requests_head.return_value = mock_response
url = "https://example.com/file.txt"
result = external_files.has_remote_file_changed(url, test_file)
assert result is False
headers = mock_requests_head.call_args[1]["headers"]
assert headers[external_files.IF_NONE_MATCH] == '"abc123"'
def test_has_remote_file_changed_no_etag_no_if_none_match(
mock_requests_head: MagicMock, setup_core: Path
) -> None:
"""Test has_remote_file_changed omits If-None-Match when no ETag is cached."""
test_file = setup_core / "cached.txt"
test_file.write_text("cached content")
mock_response = MagicMock()
mock_response.status_code = 304
mock_response.headers = {}
mock_requests_head.return_value = mock_response
url = "https://example.com/file.txt"
external_files.has_remote_file_changed(url, test_file)
headers = mock_requests_head.call_args[1]["headers"]
assert external_files.IF_NONE_MATCH not in headers
def test_has_remote_file_changed_refreshes_etag_on_304(
mock_requests_head: MagicMock, setup_core: Path
) -> None:
"""Test has_remote_file_changed updates the cached ETag when the 304 sends a new one."""
test_file = setup_core / "cached.txt"
test_file.write_text("cached content")
_seed_etag(test_file, '"old"')
mock_response = MagicMock()
mock_response.status_code = 304
mock_response.headers = {external_files.ETAG: '"new"'}
mock_requests_head.return_value = mock_response
url = "https://example.com/file.txt"
external_files.has_remote_file_changed(url, test_file)
assert external_files._etag_sidecar_path(test_file).read_text() == '"new"'
def test_has_remote_file_changed_ignores_etag_when_mtime_diverges(
mock_requests_head: MagicMock, setup_core: Path
) -> None:
"""If the cache file was edited out-of-band (mtime no longer matches the
sidecar's), the cached ETag must not be used -- it no longer describes the
bytes on disk.
"""
test_file = setup_core / "cached.txt"
test_file.write_text("cached content")
sidecar = _seed_etag(test_file, '"abc123"')
# Simulate an out-of-band edit to the cache file -- mtime advances by a
# full second (so it diverges at whole-second resolution) but the sidecar
# is left untouched, so the recorded ETag is now stale.
file_stat = test_file.stat()
os.utime(test_file, (file_stat.st_atime, file_stat.st_mtime + 1))
mock_response = MagicMock()
mock_response.status_code = 304
mock_response.headers = {}
mock_requests_head.return_value = mock_response
external_files.has_remote_file_changed("https://example.com/file.txt", test_file)
headers = mock_requests_head.call_args[1]["headers"]
assert external_files.IF_NONE_MATCH not in headers
# Stale sidecar should be removed so future calls don't keep paying the
# mtime-comparison cost on a known-bad sidecar.
assert not sidecar.exists()
def test_download_content_pins_etag_mtime_to_file_mtime(
mock_has_remote_file_changed: MagicMock,
mock_requests_get: MagicMock,
setup_core: Path,
) -> None:
"""After a successful download, the sidecar's mtime must equal the cache
file's mtime so `_read_etag` accepts it on the next call.
"""
test_file = setup_core / "fresh.txt"
mock_has_remote_file_changed.return_value = True
mock_response = MagicMock()
mock_response.content = b"fresh content"
mock_response.headers = {external_files.ETAG: '"deadbeef"'}
mock_response.raise_for_status = MagicMock()
mock_requests_get.return_value = mock_response
external_files.download_content("https://example.com/file.txt", test_file)
sidecar = external_files._etag_sidecar_path(test_file)
assert int(sidecar.stat().st_mtime) == int(test_file.stat().st_mtime)
def test_write_etag_swallows_write_file_failure(
mock_write_file: MagicMock, setup_core: Path, caplog: pytest.LogCaptureFixture
) -> None:
"""If `write_file` raises, _write_etag must not propagate -- ETag
persistence is best-effort and a failure here must not abort the
surrounding download.
"""
cache_file = setup_core / "cached.txt"
cache_file.write_text("cached content")
mock_write_file.side_effect = EsphomeError("disk full")
with caplog.at_level("DEBUG", logger="esphome.external_files"):
external_files._write_etag(cache_file, '"abc123"')
assert "Could not save ETag" in caplog.text
# Sidecar wasn't created, since write_file was mocked to fail before
# reaching the os.utime step.
assert not external_files._etag_sidecar_path(cache_file).exists()
def test_write_etag_swallows_utime_failure(
setup_core: Path, caplog: pytest.LogCaptureFixture
) -> None:
"""If `os.utime` raises while pinning the sidecar's mtime, _write_etag
must not propagate. The sidecar is still written; if its mtime later
fails to match the cache file, `_read_etag` will discard it on next
read.
"""
cache_file = setup_core / "cached.txt"
cache_file.write_text("cached content")
with (
patch(
"esphome.external_files.os.utime",
side_effect=PermissionError("nope"),
),
caplog.at_level("DEBUG", logger="esphome.external_files"),
):
external_files._write_etag(cache_file, '"abc123"')
assert "Could not sync ETag sidecar mtime" in caplog.text
# write_file succeeded, so the sidecar exists with the new value even
# though we couldn't pin its mtime.
sidecar = external_files._etag_sidecar_path(cache_file)
assert sidecar.exists()
assert sidecar.read_text() == '"abc123"'
def test_compute_local_file_dir_creates_parent_dirs(setup_core: Path) -> None:
"""Test compute_local_file_dir creates parent directories."""
domain = "level1/level2/level3/level4"
@@ -200,10 +409,10 @@ def test_is_file_recent_handles_float_seconds(setup_core: Path) -> None:
assert result is True
@patch("esphome.external_files.requests.get")
@patch("esphome.external_files.has_remote_file_changed")
def test_download_content_with_network_error_uses_cache(
mock_has_changed: MagicMock, mock_get: MagicMock, setup_core: Path
mock_has_remote_file_changed: MagicMock,
mock_requests_get: MagicMock,
setup_core: Path,
) -> None:
"""Test download_content uses cached file when network fails."""
test_file = setup_core / "cached.txt"
@@ -211,8 +420,10 @@ def test_download_content_with_network_error_uses_cache(
test_file.write_bytes(cached_content)
# Simulate file has changed, so it tries to download
mock_has_changed.return_value = True
mock_get.side_effect = requests.exceptions.RequestException("Network error")
mock_has_remote_file_changed.return_value = True
mock_requests_get.side_effect = requests.exceptions.RequestException(
"Network error"
)
url = "https://example.com/file.txt"
result = external_files.download_content(url, test_file)
@@ -220,17 +431,19 @@ def test_download_content_with_network_error_uses_cache(
assert result == cached_content
@patch("esphome.external_files.requests.get")
@patch("esphome.external_files.has_remote_file_changed")
def test_download_content_with_network_error_no_cache_fails(
mock_has_changed: MagicMock, mock_get: MagicMock, setup_core: Path
mock_has_remote_file_changed: MagicMock,
mock_requests_get: MagicMock,
setup_core: Path,
) -> None:
"""Test download_content raises error when network fails and no cache exists."""
test_file = setup_core / "nonexistent.txt"
# Simulate file has changed (doesn't exist), so it tries to download
mock_has_changed.return_value = True
mock_get.side_effect = requests.exceptions.RequestException("Network error")
mock_has_remote_file_changed.return_value = True
mock_requests_get.side_effect = requests.exceptions.RequestException(
"Network error"
)
url = "https://example.com/file.txt"
@@ -238,11 +451,9 @@ def test_download_content_with_network_error_no_cache_fails(
external_files.download_content(url, test_file)
@patch("esphome.external_files.requests.get")
@patch("esphome.external_files.has_remote_file_changed")
def test_download_content_skip_external_update_uses_cache(
mock_has_changed: MagicMock,
mock_get: MagicMock,
mock_has_remote_file_changed: MagicMock,
mock_requests_get: MagicMock,
setup_core: Path,
) -> None:
"""Test download_content skips network checks when CORE.skip_external_update is set."""
@@ -255,26 +466,25 @@ def test_download_content_skip_external_update_uses_cache(
result = external_files.download_content(url, test_file)
assert result == cached_content
mock_has_changed.assert_not_called()
mock_get.assert_not_called()
mock_has_remote_file_changed.assert_not_called()
mock_requests_get.assert_not_called()
@patch("esphome.external_files.requests.get")
@patch("esphome.external_files.has_remote_file_changed")
def test_download_content_skip_external_update_downloads_when_missing(
mock_has_changed: MagicMock,
mock_get: MagicMock,
mock_has_remote_file_changed: MagicMock,
mock_requests_get: MagicMock,
setup_core: Path,
) -> None:
"""Test download_content still downloads when file is missing, even with skip_external_update."""
test_file = setup_core / "missing.txt"
new_content = b"fresh content"
mock_has_changed.return_value = True
mock_has_remote_file_changed.return_value = True
mock_response = MagicMock()
mock_response.content = new_content
mock_response.headers = {}
mock_response.raise_for_status = MagicMock()
mock_get.return_value = mock_response
mock_requests_get.return_value = mock_response
CORE.skip_external_update = True
url = "https://example.com/file.txt"
@@ -282,3 +492,62 @@ def test_download_content_skip_external_update_downloads_when_missing(
assert result == new_content
assert test_file.read_bytes() == new_content
def test_download_content_saves_etag(
mock_has_remote_file_changed: MagicMock,
mock_requests_get: MagicMock,
setup_core: Path,
) -> None:
"""Test download_content writes the ETag sidecar after a successful download."""
test_file = setup_core / "fresh.txt"
new_content = b"fresh content"
mock_has_remote_file_changed.return_value = True
mock_response = MagicMock()
mock_response.content = new_content
mock_response.headers = {external_files.ETAG: '"deadbeef"'}
mock_response.raise_for_status = MagicMock()
mock_requests_get.return_value = mock_response
url = "https://example.com/file.txt"
external_files.download_content(url, test_file)
assert external_files._etag_sidecar_path(test_file).read_text() == '"deadbeef"'
def test_download_content_atomic_write_no_partial_on_failure(
mock_has_remote_file_changed: MagicMock,
mock_requests_get: MagicMock,
mock_write_file: MagicMock,
setup_core: Path,
) -> None:
"""If `write_file` (the atomic-write helper) fails, the existing cache
file must remain untouched and no temp files may be left behind. Patching
`write_file` directly exercises the atomic-rename path -- a failure inside
`write_file` is the only reason the rename wouldn't have happened.
"""
from esphome.core import EsphomeError
test_file = setup_core / "cached.txt"
original_content = b"original content"
test_file.write_bytes(original_content)
mock_has_remote_file_changed.return_value = True
mock_response = MagicMock()
mock_response.content = b"new content"
mock_response.headers = {}
mock_response.raise_for_status = MagicMock()
mock_requests_get.return_value = mock_response
mock_write_file.side_effect = EsphomeError("disk full")
with pytest.raises(EsphomeError, match="disk full"):
external_files.download_content("https://example.com/file.txt", test_file)
# Original file is untouched -- write_file aborted before its rename step.
assert test_file.read_bytes() == original_content
# write_file is responsible for cleaning its own temp files; nothing leaks
# into the cache directory either way.
leftover_tmps = list(setup_core.glob("tmp*"))
assert leftover_tmps == []