diff --git a/esphome/external_files.py b/esphome/external_files.py index b6f6149ebbb..bd29dc93b1d 100644 --- a/esphome/external_files.py +++ b/esphome/external_files.py @@ -1,14 +1,17 @@ from __future__ import annotations +import contextlib from datetime import UTC, datetime import logging +import os from pathlib import Path import requests import esphome.config_validation as cv from esphome.const import __version__ -from esphome.core import CORE, TimePeriodSeconds +from esphome.core import CORE, EsphomeError, TimePeriodSeconds +from esphome.helpers import write_file _LOGGER = logging.getLogger(__name__) CODEOWNERS = ["@landonr"] @@ -16,12 +19,72 @@ CODEOWNERS = ["@landonr"] NETWORK_TIMEOUT = 30 IF_MODIFIED_SINCE = "If-Modified-Since" +IF_NONE_MATCH = "If-None-Match" +ETAG = "ETag" CACHE_CONTROL = "Cache-Control" CACHE_CONTROL_MAX_AGE = "max-age=" CONTENT_DISPOSITION = "content-disposition" TEMP_DIR = "temp" +def _etag_sidecar_path(local_file_path: Path) -> Path: + return local_file_path.parent / f".{local_file_path.name}.etag" + + +def _mtime_seconds(path: Path) -> int: + """Return `path`'s mtime as integer seconds. + + Whole seconds is the common-denominator resolution across all + filesystems we run on (FAT/exFAT 2s, NTFS 100ns, APFS/ext4 ns), so + comparisons survive setting+reading round-trips that would lose + sub-second precision on lower-resolution filesystems. + """ + return int(path.stat().st_mtime) + + +def _read_etag(local_file_path: Path) -> str | None: + """Return the cached ETag if its sidecar's mtime still matches the cache + file's. A mismatch means the cache file was modified out-of-band, so the + ETag no longer describes its contents -- delete the stale sidecar and + return None. + """ + etag_path = _etag_sidecar_path(local_file_path) + try: + if _mtime_seconds(etag_path) != _mtime_seconds(local_file_path): + _LOGGER.debug( + "ETag sidecar mtime mismatch at %s; treating as stale", + local_file_path, + ) + etag_path.unlink() + return None + return etag_path.read_text().strip() or None + except OSError: + return None + + +def _write_etag(local_file_path: Path, etag: str | None) -> None: + etag_path = _etag_sidecar_path(local_file_path) + if not etag: + # ETag persistence is best-effort; matches `_read_etag`'s tolerance. + with contextlib.suppress(OSError): + etag_path.unlink() + return + try: + write_file(etag_path, etag) + except EsphomeError as e: + _LOGGER.debug("Could not save ETag for %s: %s", local_file_path, e) + return + # Pin the sidecar's mtime to the cache file's mtime. _read_etag relies on + # this match to detect out-of-band edits to the cache file. + try: + file_mtime = _mtime_seconds(local_file_path) + os.utime(etag_path, (file_mtime, file_mtime)) + except OSError as e: + _LOGGER.debug( + "Could not sync ETag sidecar mtime for %s: %s", local_file_path, e + ) + + def has_remote_file_changed(url: str, local_file_path: Path) -> bool: if local_file_path.exists(): _LOGGER.debug("has_remote_file_changed: File exists at %s", local_file_path) @@ -35,14 +98,17 @@ def has_remote_file_changed(url: str, local_file_path: Path) -> bool: IF_MODIFIED_SINCE: local_modification_time_str, CACHE_CONTROL: CACHE_CONTROL_MAX_AGE + "3600", } + if etag := _read_etag(local_file_path): + headers[IF_NONE_MATCH] = etag response = requests.head( url, headers=headers, timeout=NETWORK_TIMEOUT, allow_redirects=True ) _LOGGER.debug( - "has_remote_file_changed: File %s, Local modified %s, response code %d", + "has_remote_file_changed: File %s, Local modified %s, ETag %s, response code %d", local_file_path, local_modification_time_str, + etag or "", response.status_code, ) @@ -51,6 +117,8 @@ def has_remote_file_changed(url: str, local_file_path: Path) -> bool: "has_remote_file_changed: File not modified since %s", local_modification_time_str, ) + if (new_etag := response.headers.get(ETAG)) and new_etag != etag: + _write_etag(local_file_path, new_etag) return False _LOGGER.debug("has_remote_file_changed: File modified") return True @@ -112,7 +180,7 @@ def download_content(url: str, path: Path, timeout: int = NETWORK_TIMEOUT) -> by return path.read_bytes() raise cv.Invalid(f"Could not download from {url}: {e}") from e - path.parent.mkdir(parents=True, exist_ok=True) data = req.content - path.write_bytes(data) + write_file(path, data) + _write_etag(path, req.headers.get(ETAG)) return data diff --git a/tests/unit_tests/test_external_files.py b/tests/unit_tests/test_external_files.py index 4b0826db044..f4d268abe03 100644 --- a/tests/unit_tests/test_external_files.py +++ b/tests/unit_tests/test_external_files.py @@ -1,5 +1,6 @@ """Tests for external_files.py functions.""" +import os from pathlib import Path import time from unittest.mock import MagicMock, patch @@ -9,7 +10,54 @@ import requests from esphome import external_files from esphome.config_validation import Invalid -from esphome.core import CORE, TimePeriod +from esphome.core import CORE, EsphomeError, TimePeriod + + +def _seed_etag(cache_file: Path, etag: str) -> Path: + """Write an ETag sidecar with its mtime synced to the cache file's mtime, + matching the invariant that `_write_etag` enforces in production. + """ + sidecar = external_files._etag_sidecar_path(cache_file) + sidecar.write_text(etag) + file_mtime = int(cache_file.stat().st_mtime) + os.utime(sidecar, (file_mtime, file_mtime)) + return sidecar + + +@pytest.fixture +def mock_requests_head() -> MagicMock: + """Patch `external_files.requests.head` so the conditional HEAD-request + validator can be tested without doing real HTTP. + """ + with patch("esphome.external_files.requests.head") as m: + yield m + + +@pytest.fixture +def mock_requests_get() -> MagicMock: + """Patch `external_files.requests.get` so the download path can be + tested without doing real HTTP. + """ + with patch("esphome.external_files.requests.get") as m: + yield m + + +@pytest.fixture +def mock_has_remote_file_changed() -> MagicMock: + """Patch `external_files.has_remote_file_changed` so download tests can + control the conditional check independently from the GET path. + """ + with patch("esphome.external_files.has_remote_file_changed") as m: + yield m + + +@pytest.fixture +def mock_write_file() -> MagicMock: + """Patch `external_files.write_file` so atomic-write failures can be + injected without involving the real filesystem helper. + """ + with patch("esphome.external_files.write_file") as m: + yield m def test_compute_local_file_dir(setup_core: Path) -> None: @@ -88,9 +136,8 @@ def test_is_file_recent_with_zero_refresh(setup_core: Path) -> None: assert result is False -@patch("esphome.external_files.requests.head") def test_has_remote_file_changed_not_modified( - mock_head: MagicMock, setup_core: Path + mock_requests_head: MagicMock, setup_core: Path ) -> None: """Test has_remote_file_changed returns False when file not modified.""" test_file = setup_core / "cached.txt" @@ -98,23 +145,23 @@ def test_has_remote_file_changed_not_modified( mock_response = MagicMock() mock_response.status_code = 304 - mock_head.return_value = mock_response + mock_response.headers = {} + mock_requests_head.return_value = mock_response url = "https://example.com/file.txt" result = external_files.has_remote_file_changed(url, test_file) assert result is False - mock_head.assert_called_once() + mock_requests_head.assert_called_once() - call_args = mock_head.call_args + call_args = mock_requests_head.call_args headers = call_args[1]["headers"] assert external_files.IF_MODIFIED_SINCE in headers assert external_files.CACHE_CONTROL in headers -@patch("esphome.external_files.requests.head") def test_has_remote_file_changed_modified( - mock_head: MagicMock, setup_core: Path + mock_requests_head: MagicMock, setup_core: Path ) -> None: """Test has_remote_file_changed returns True when file modified.""" test_file = setup_core / "cached.txt" @@ -122,7 +169,8 @@ def test_has_remote_file_changed_modified( mock_response = MagicMock() mock_response.status_code = 200 - mock_head.return_value = mock_response + mock_response.headers = {} + mock_requests_head.return_value = mock_response url = "https://example.com/file.txt" result = external_files.has_remote_file_changed(url, test_file) @@ -140,15 +188,16 @@ def test_has_remote_file_changed_no_local_file(setup_core: Path) -> None: assert result is True -@patch("esphome.external_files.requests.head") def test_has_remote_file_changed_network_error( - mock_head: MagicMock, setup_core: Path + mock_requests_head: MagicMock, setup_core: Path ) -> None: """Test has_remote_file_changed returns False on network error when file is cached.""" test_file = setup_core / "cached.txt" test_file.write_text("cached content") - mock_head.side_effect = requests.exceptions.RequestException("Network error") + mock_requests_head.side_effect = requests.exceptions.RequestException( + "Network error" + ) url = "https://example.com/file.txt" result = external_files.has_remote_file_changed(url, test_file) @@ -156,9 +205,8 @@ def test_has_remote_file_changed_network_error( assert result is False -@patch("esphome.external_files.requests.head") def test_has_remote_file_changed_timeout( - mock_head: MagicMock, setup_core: Path + mock_requests_head: MagicMock, setup_core: Path ) -> None: """Test has_remote_file_changed respects timeout.""" test_file = setup_core / "cached.txt" @@ -166,15 +214,176 @@ def test_has_remote_file_changed_timeout( mock_response = MagicMock() mock_response.status_code = 304 - mock_head.return_value = mock_response + mock_response.headers = {} + mock_requests_head.return_value = mock_response url = "https://example.com/file.txt" external_files.has_remote_file_changed(url, test_file) - call_args = mock_head.call_args + call_args = mock_requests_head.call_args assert call_args[1]["timeout"] == external_files.NETWORK_TIMEOUT +def test_has_remote_file_changed_uses_etag( + mock_requests_head: MagicMock, setup_core: Path +) -> None: + """Test has_remote_file_changed sends If-None-Match when ETag is cached.""" + test_file = setup_core / "cached.txt" + test_file.write_text("cached content") + _seed_etag(test_file, '"abc123"') + + mock_response = MagicMock() + mock_response.status_code = 304 + mock_response.headers = {} + mock_requests_head.return_value = mock_response + + url = "https://example.com/file.txt" + result = external_files.has_remote_file_changed(url, test_file) + + assert result is False + headers = mock_requests_head.call_args[1]["headers"] + assert headers[external_files.IF_NONE_MATCH] == '"abc123"' + + +def test_has_remote_file_changed_no_etag_no_if_none_match( + mock_requests_head: MagicMock, setup_core: Path +) -> None: + """Test has_remote_file_changed omits If-None-Match when no ETag is cached.""" + test_file = setup_core / "cached.txt" + test_file.write_text("cached content") + + mock_response = MagicMock() + mock_response.status_code = 304 + mock_response.headers = {} + mock_requests_head.return_value = mock_response + + url = "https://example.com/file.txt" + external_files.has_remote_file_changed(url, test_file) + + headers = mock_requests_head.call_args[1]["headers"] + assert external_files.IF_NONE_MATCH not in headers + + +def test_has_remote_file_changed_refreshes_etag_on_304( + mock_requests_head: MagicMock, setup_core: Path +) -> None: + """Test has_remote_file_changed updates the cached ETag when the 304 sends a new one.""" + test_file = setup_core / "cached.txt" + test_file.write_text("cached content") + _seed_etag(test_file, '"old"') + + mock_response = MagicMock() + mock_response.status_code = 304 + mock_response.headers = {external_files.ETAG: '"new"'} + mock_requests_head.return_value = mock_response + + url = "https://example.com/file.txt" + external_files.has_remote_file_changed(url, test_file) + + assert external_files._etag_sidecar_path(test_file).read_text() == '"new"' + + +def test_has_remote_file_changed_ignores_etag_when_mtime_diverges( + mock_requests_head: MagicMock, setup_core: Path +) -> None: + """If the cache file was edited out-of-band (mtime no longer matches the + sidecar's), the cached ETag must not be used -- it no longer describes the + bytes on disk. + """ + test_file = setup_core / "cached.txt" + test_file.write_text("cached content") + sidecar = _seed_etag(test_file, '"abc123"') + + # Simulate an out-of-band edit to the cache file -- mtime advances by a + # full second (so it diverges at whole-second resolution) but the sidecar + # is left untouched, so the recorded ETag is now stale. + file_stat = test_file.stat() + os.utime(test_file, (file_stat.st_atime, file_stat.st_mtime + 1)) + + mock_response = MagicMock() + mock_response.status_code = 304 + mock_response.headers = {} + mock_requests_head.return_value = mock_response + + external_files.has_remote_file_changed("https://example.com/file.txt", test_file) + + headers = mock_requests_head.call_args[1]["headers"] + assert external_files.IF_NONE_MATCH not in headers + # Stale sidecar should be removed so future calls don't keep paying the + # mtime-comparison cost on a known-bad sidecar. + assert not sidecar.exists() + + +def test_download_content_pins_etag_mtime_to_file_mtime( + mock_has_remote_file_changed: MagicMock, + mock_requests_get: MagicMock, + setup_core: Path, +) -> None: + """After a successful download, the sidecar's mtime must equal the cache + file's mtime so `_read_etag` accepts it on the next call. + """ + test_file = setup_core / "fresh.txt" + mock_has_remote_file_changed.return_value = True + mock_response = MagicMock() + mock_response.content = b"fresh content" + mock_response.headers = {external_files.ETAG: '"deadbeef"'} + mock_response.raise_for_status = MagicMock() + mock_requests_get.return_value = mock_response + + external_files.download_content("https://example.com/file.txt", test_file) + + sidecar = external_files._etag_sidecar_path(test_file) + assert int(sidecar.stat().st_mtime) == int(test_file.stat().st_mtime) + + +def test_write_etag_swallows_write_file_failure( + mock_write_file: MagicMock, setup_core: Path, caplog: pytest.LogCaptureFixture +) -> None: + """If `write_file` raises, _write_etag must not propagate -- ETag + persistence is best-effort and a failure here must not abort the + surrounding download. + """ + cache_file = setup_core / "cached.txt" + cache_file.write_text("cached content") + mock_write_file.side_effect = EsphomeError("disk full") + + with caplog.at_level("DEBUG", logger="esphome.external_files"): + external_files._write_etag(cache_file, '"abc123"') + + assert "Could not save ETag" in caplog.text + # Sidecar wasn't created, since write_file was mocked to fail before + # reaching the os.utime step. + assert not external_files._etag_sidecar_path(cache_file).exists() + + +def test_write_etag_swallows_utime_failure( + setup_core: Path, caplog: pytest.LogCaptureFixture +) -> None: + """If `os.utime` raises while pinning the sidecar's mtime, _write_etag + must not propagate. The sidecar is still written; if its mtime later + fails to match the cache file, `_read_etag` will discard it on next + read. + """ + cache_file = setup_core / "cached.txt" + cache_file.write_text("cached content") + + with ( + patch( + "esphome.external_files.os.utime", + side_effect=PermissionError("nope"), + ), + caplog.at_level("DEBUG", logger="esphome.external_files"), + ): + external_files._write_etag(cache_file, '"abc123"') + + assert "Could not sync ETag sidecar mtime" in caplog.text + # write_file succeeded, so the sidecar exists with the new value even + # though we couldn't pin its mtime. + sidecar = external_files._etag_sidecar_path(cache_file) + assert sidecar.exists() + assert sidecar.read_text() == '"abc123"' + + def test_compute_local_file_dir_creates_parent_dirs(setup_core: Path) -> None: """Test compute_local_file_dir creates parent directories.""" domain = "level1/level2/level3/level4" @@ -200,10 +409,10 @@ def test_is_file_recent_handles_float_seconds(setup_core: Path) -> None: assert result is True -@patch("esphome.external_files.requests.get") -@patch("esphome.external_files.has_remote_file_changed") def test_download_content_with_network_error_uses_cache( - mock_has_changed: MagicMock, mock_get: MagicMock, setup_core: Path + mock_has_remote_file_changed: MagicMock, + mock_requests_get: MagicMock, + setup_core: Path, ) -> None: """Test download_content uses cached file when network fails.""" test_file = setup_core / "cached.txt" @@ -211,8 +420,10 @@ def test_download_content_with_network_error_uses_cache( test_file.write_bytes(cached_content) # Simulate file has changed, so it tries to download - mock_has_changed.return_value = True - mock_get.side_effect = requests.exceptions.RequestException("Network error") + mock_has_remote_file_changed.return_value = True + mock_requests_get.side_effect = requests.exceptions.RequestException( + "Network error" + ) url = "https://example.com/file.txt" result = external_files.download_content(url, test_file) @@ -220,17 +431,19 @@ def test_download_content_with_network_error_uses_cache( assert result == cached_content -@patch("esphome.external_files.requests.get") -@patch("esphome.external_files.has_remote_file_changed") def test_download_content_with_network_error_no_cache_fails( - mock_has_changed: MagicMock, mock_get: MagicMock, setup_core: Path + mock_has_remote_file_changed: MagicMock, + mock_requests_get: MagicMock, + setup_core: Path, ) -> None: """Test download_content raises error when network fails and no cache exists.""" test_file = setup_core / "nonexistent.txt" # Simulate file has changed (doesn't exist), so it tries to download - mock_has_changed.return_value = True - mock_get.side_effect = requests.exceptions.RequestException("Network error") + mock_has_remote_file_changed.return_value = True + mock_requests_get.side_effect = requests.exceptions.RequestException( + "Network error" + ) url = "https://example.com/file.txt" @@ -238,11 +451,9 @@ def test_download_content_with_network_error_no_cache_fails( external_files.download_content(url, test_file) -@patch("esphome.external_files.requests.get") -@patch("esphome.external_files.has_remote_file_changed") def test_download_content_skip_external_update_uses_cache( - mock_has_changed: MagicMock, - mock_get: MagicMock, + mock_has_remote_file_changed: MagicMock, + mock_requests_get: MagicMock, setup_core: Path, ) -> None: """Test download_content skips network checks when CORE.skip_external_update is set.""" @@ -255,26 +466,25 @@ def test_download_content_skip_external_update_uses_cache( result = external_files.download_content(url, test_file) assert result == cached_content - mock_has_changed.assert_not_called() - mock_get.assert_not_called() + mock_has_remote_file_changed.assert_not_called() + mock_requests_get.assert_not_called() -@patch("esphome.external_files.requests.get") -@patch("esphome.external_files.has_remote_file_changed") def test_download_content_skip_external_update_downloads_when_missing( - mock_has_changed: MagicMock, - mock_get: MagicMock, + mock_has_remote_file_changed: MagicMock, + mock_requests_get: MagicMock, setup_core: Path, ) -> None: """Test download_content still downloads when file is missing, even with skip_external_update.""" test_file = setup_core / "missing.txt" new_content = b"fresh content" - mock_has_changed.return_value = True + mock_has_remote_file_changed.return_value = True mock_response = MagicMock() mock_response.content = new_content + mock_response.headers = {} mock_response.raise_for_status = MagicMock() - mock_get.return_value = mock_response + mock_requests_get.return_value = mock_response CORE.skip_external_update = True url = "https://example.com/file.txt" @@ -282,3 +492,62 @@ def test_download_content_skip_external_update_downloads_when_missing( assert result == new_content assert test_file.read_bytes() == new_content + + +def test_download_content_saves_etag( + mock_has_remote_file_changed: MagicMock, + mock_requests_get: MagicMock, + setup_core: Path, +) -> None: + """Test download_content writes the ETag sidecar after a successful download.""" + test_file = setup_core / "fresh.txt" + new_content = b"fresh content" + + mock_has_remote_file_changed.return_value = True + mock_response = MagicMock() + mock_response.content = new_content + mock_response.headers = {external_files.ETAG: '"deadbeef"'} + mock_response.raise_for_status = MagicMock() + mock_requests_get.return_value = mock_response + + url = "https://example.com/file.txt" + external_files.download_content(url, test_file) + + assert external_files._etag_sidecar_path(test_file).read_text() == '"deadbeef"' + + +def test_download_content_atomic_write_no_partial_on_failure( + mock_has_remote_file_changed: MagicMock, + mock_requests_get: MagicMock, + mock_write_file: MagicMock, + setup_core: Path, +) -> None: + """If `write_file` (the atomic-write helper) fails, the existing cache + file must remain untouched and no temp files may be left behind. Patching + `write_file` directly exercises the atomic-rename path -- a failure inside + `write_file` is the only reason the rename wouldn't have happened. + """ + from esphome.core import EsphomeError + + test_file = setup_core / "cached.txt" + original_content = b"original content" + test_file.write_bytes(original_content) + + mock_has_remote_file_changed.return_value = True + mock_response = MagicMock() + mock_response.content = b"new content" + mock_response.headers = {} + mock_response.raise_for_status = MagicMock() + mock_requests_get.return_value = mock_response + + mock_write_file.side_effect = EsphomeError("disk full") + + with pytest.raises(EsphomeError, match="disk full"): + external_files.download_content("https://example.com/file.txt", test_file) + + # Original file is untouched -- write_file aborted before its rename step. + assert test_file.read_bytes() == original_content + # write_file is responsible for cleaning its own temp files; nothing leaks + # into the cache directory either way. + leftover_tmps = list(setup_core.glob("tmp*")) + assert leftover_tmps == []