refactor: consolidate symlink-safe atomic replace into shared helper

Extract the islink/realpath guard from the 16743 fix into a single
atomic_replace() helper in utils.py, then migrate every os.replace()
call site in the codebase to use it.

The original PR #16777 correctly identified and fixed the bug, but
only patched 9 of ~24 call sites. The same bug class (managed
deployments that symlink state files silently losing the link on
every write) still existed at auth.json, sessions file, gateway
config, env_loader, webhook subscriptions, debug store, model
catalog, pairing, google OAuth, nous rate guard, and more.

Rather than add another 10+ copies of the same three-line guard,
consolidate into atomic_replace(tmp, target) which:
- resolves symlinks via os.path.realpath before os.replace
- returns the resolved real path so callers can re-apply permissions
- is a drop-in replacement for os.replace at the use sites

Changes:
- utils.py: new atomic_replace() helper + atomic_json_write /
  atomic_yaml_write now call it instead of inlining the guard
- 16 files: all os.replace() call sites migrated to atomic_replace()
  - agent/{google_oauth, nous_rate_guard, shell_hooks}.py
  - cron/jobs.py
  - gateway/{pairing, session, platforms/telegram}.py
  - hermes_cli/{auth, config, debug, env_loader, model_catalog, webhook}.py
  - tools/{memory_tool, skill_manager_tool, skills_sync}.py

Tests: tests/test_atomic_replace_symlinks.py pins the invariant for
atomic_replace + atomic_json_write + atomic_yaml_write, covers plain
files, first-time creates, broken symlinks, and permission preservation.

Refs #16743
Builds on #16777 by @vominh1919.
This commit is contained in:
Teknium 2026-04-28 04:51:38 -07:00 committed by Teknium
parent 3ab97a32d1
commit b61d9b297a
18 changed files with 225 additions and 46 deletions

View File

@ -98,6 +98,7 @@ _DEFAULT_CLIENT_SECRET = f"GOCSPX-{_PUBLIC_CLIENT_SECRET_SUFFIX}"
# Regex patterns for fallback scraping from an installed gemini-cli.
import re as _re
from utils import atomic_replace
_CLIENT_ID_PATTERN = _re.compile(
r"OAUTH_CLIENT_ID\s*=\s*['\"]([0-9]+-[a-z0-9]+\.apps\.googleusercontent\.com)['\"]"
)
@ -499,7 +500,7 @@ def save_credentials(creds: GoogleCredentials) -> Path:
fh.flush()
os.fsync(fh.fileno())
os.chmod(tmp_path, stat.S_IRUSR | stat.S_IWUSR)
os.replace(tmp_path, path)
atomic_replace(tmp_path, path)
finally:
try:
if tmp_path.exists():

View File

@ -18,6 +18,7 @@ import os
import tempfile
import time
from typing import Any, Mapping, Optional
from utils import atomic_replace
logger = logging.getLogger(__name__)
@ -118,7 +119,7 @@ def record_nous_rate_limit(
try:
with os.fdopen(fd, "w") as f:
json.dump(state, f)
os.replace(tmp_path, path)
atomic_replace(tmp_path, path)
except Exception:
# Clean up temp file on failure
try:

View File

@ -76,6 +76,7 @@ except ImportError: # pragma: no cover
fcntl = None # type: ignore[assignment]
from hermes_constants import get_hermes_home
from utils import atomic_replace
logger = logging.getLogger(__name__)
@ -568,9 +569,7 @@ def save_allowlist(data: Dict[str, Any]) -> None:
try:
with os.fdopen(fd, "w") as fh:
fh.write(json.dumps(data, indent=2, sort_keys=True))
# Resolve symlinks so os.replace writes to the real file (GitHub #16743).
real_path = os.path.realpath(p) if os.path.islink(p) else p
os.replace(tmp_path, real_path)
atomic_replace(tmp_path, p)
except Exception:
try:
os.unlink(tmp_path)

View File

@ -21,6 +21,7 @@ from typing import Optional, Dict, List, Any, Union
logger = logging.getLogger(__name__)
from hermes_time import now as _hermes_now
from utils import atomic_replace
try:
from croniter import croniter
@ -367,9 +368,7 @@ def save_jobs(jobs: List[Dict[str, Any]]):
json.dump({"jobs": jobs, "updated_at": _hermes_now().isoformat()}, f, indent=2)
f.flush()
os.fsync(f.fileno())
# Resolve symlinks so os.replace writes to the real file (GitHub #16743).
real_path = os.path.realpath(JOBS_FILE) if os.path.islink(JOBS_FILE) else JOBS_FILE
os.replace(tmp_path, real_path)
atomic_replace(tmp_path, JOBS_FILE)
_secure_file(JOBS_FILE)
except BaseException:
try:
@ -865,9 +864,7 @@ def save_job_output(job_id: str, output: str):
f.write(output)
f.flush()
os.fsync(f.fileno())
# Resolve symlinks so os.replace writes to the real file (GitHub #16743).
real_path = os.path.realpath(output_file) if os.path.islink(output_file) else output_file
os.replace(tmp_path, real_path)
atomic_replace(tmp_path, output_file)
_secure_file(output_file)
except BaseException:
try:

View File

@ -28,6 +28,7 @@ from pathlib import Path
from typing import Optional
from hermes_constants import get_hermes_dir
from utils import atomic_replace
# Unambiguous alphabet -- excludes 0/O, 1/I to prevent confusion
@ -59,7 +60,7 @@ def _secure_write(path: Path, data: str) -> None:
f.write(data)
f.flush()
os.fsync(f.fileno())
os.replace(tmp_path, str(path))
atomic_replace(tmp_path, path)
try:
os.chmod(path, 0o600)
except OSError:

View File

@ -84,6 +84,7 @@ from gateway.platforms.telegram_network import (
discover_fallback_ips,
parse_fallback_ip_env,
)
from utils import atomic_replace
def check_telegram_requirements() -> bool:
@ -554,7 +555,7 @@ class TelegramAdapter(BasePlatformAdapter):
_yaml.dump(config, f, default_flow_style=False, sort_keys=False)
f.flush()
os.fsync(f.fileno())
os.replace(tmp_path, config_path)
atomic_replace(tmp_path, config_path)
except BaseException:
try:
os.unlink(tmp_path)

View File

@ -64,6 +64,7 @@ from .whatsapp_identity import (
canonical_whatsapp_identifier,
normalize_whatsapp_identifier,
)
from utils import atomic_replace
@dataclass
@ -705,7 +706,7 @@ class SessionStore:
json.dump(data, f, indent=2)
f.flush()
os.fsync(f.fileno())
os.replace(tmp_path, sessions_file)
atomic_replace(tmp_path, sessions_file)
except BaseException:
try:
os.unlink(tmp_path)

View File

@ -43,6 +43,7 @@ import yaml
from hermes_cli.config import get_hermes_home, get_config_path, read_raw_config
from hermes_constants import OPENROUTER_BASE_URL
from utils import atomic_replace
logger = logging.getLogger(__name__)
@ -828,7 +829,7 @@ def _save_auth_store(auth_store: Dict[str, Any]) -> Path:
handle.write(payload)
handle.flush()
os.fsync(handle.fileno())
os.replace(tmp_path, auth_file)
atomic_replace(tmp_path, auth_file)
try:
dir_fd = os.open(str(auth_file.parent), os.O_RDONLY)
except OSError:

View File

@ -227,6 +227,7 @@ def get_container_exec_info() -> Optional[dict]:
# Re-export from hermes_constants — canonical definition lives there.
from hermes_constants import get_hermes_home # noqa: F811,E402
from utils import atomic_replace
def get_config_path() -> Path:
"""Get the main config file path."""
@ -3666,9 +3667,7 @@ def sanitize_env_file() -> int:
f.writelines(sanitized)
f.flush()
os.fsync(f.fileno())
# Resolve symlinks so os.replace writes to the real file (GitHub #16743).
real_path = os.path.realpath(env_path) if os.path.islink(env_path) else env_path
os.replace(tmp_path, real_path)
atomic_replace(tmp_path, env_path)
except BaseException:
try:
os.unlink(tmp_path)
@ -3771,9 +3770,7 @@ def save_env_value(key: str, value: str):
f.writelines(lines)
f.flush()
os.fsync(f.fileno())
# Resolve symlinks so os.replace writes to the real file (GitHub #16743).
real_path = os.path.realpath(env_path) if os.path.islink(env_path) else env_path
os.replace(tmp_path, real_path)
atomic_replace(tmp_path, env_path)
# Restore original permissions before _secure_file may tighten them.
if original_mode is not None:
try:
@ -3829,9 +3826,7 @@ def remove_env_value(key: str) -> bool:
f.writelines(new_lines)
f.flush()
os.fsync(f.fileno())
# Resolve symlinks so os.replace writes to the real file (GitHub #16743).
real_path = os.path.realpath(env_path) if os.path.islink(env_path) else env_path
os.replace(tmp_path, real_path)
atomic_replace(tmp_path, env_path)
if original_mode is not None:
try:
os.chmod(env_path, original_mode)

View File

@ -18,6 +18,7 @@ from pathlib import Path
from typing import Optional
from hermes_constants import get_hermes_home
from utils import atomic_replace
# ---------------------------------------------------------------------------
@ -79,7 +80,7 @@ def _save_pending(entries: list[dict]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
tmp = path.with_suffix(".json.tmp")
tmp.write_text(json.dumps(entries, indent=2), encoding="utf-8")
os.replace(tmp, path)
atomic_replace(tmp, path)
except OSError:
# Non-fatal — worst case the user has to run ``hermes debug delete``
# manually.

View File

@ -7,6 +7,7 @@ import sys
from pathlib import Path
from dotenv import load_dotenv
from utils import atomic_replace
# Env var name suffixes that indicate credential values. These are the
@ -127,7 +128,7 @@ def _sanitize_env_file_if_needed(path: Path) -> None:
f.writelines(sanitized)
f.flush()
os.fsync(f.fileno())
os.replace(tmp, path)
atomic_replace(tmp, path)
except BaseException:
try:
os.unlink(tmp)

View File

@ -54,6 +54,7 @@ from pathlib import Path
from typing import Any
from hermes_cli import __version__ as _HERMES_VERSION
from utils import atomic_replace
logger = logging.getLogger(__name__)
@ -190,7 +191,7 @@ def _write_disk_cache(data: dict[str, Any]) -> None:
with open(tmp, "w") as fh:
json.dump(data, fh, indent=2)
fh.write("\n")
os.replace(tmp, path)
atomic_replace(tmp, path)
except OSError as exc:
logger.info("model catalog cache write failed: %s", exc)

View File

@ -19,6 +19,7 @@ from pathlib import Path
from typing import Dict
from hermes_constants import display_hermes_home
from utils import atomic_replace
_SUBSCRIPTIONS_FILENAME = "webhook_subscriptions.json"
@ -52,7 +53,7 @@ def _save_subscriptions(subs: Dict[str, dict]) -> None:
json.dumps(subs, indent=2, ensure_ascii=False),
encoding="utf-8",
)
os.replace(str(tmp_path), str(path))
atomic_replace(tmp_path, path)
def _get_webhook_config() -> dict:

View File

@ -0,0 +1,160 @@
"""Regression tests for GitHub #16743 — atomic writes must preserve symlinks.
``os.replace(tmp, target)`` replaces whatever exists at ``target`` including
symlinks, which it swaps for a regular file. Managed deployments that
symlink ``~/.hermes/config.yaml`` (and other state files) to a git-tracked
profile package were silently detached on every config write.
The fix: a shared ``atomic_replace`` helper in ``utils.py`` that resolves the
target through ``os.path.realpath`` when it is a symlink, so the real file is
overwritten in-place while the symlink survives. All atomic-write sites in
the codebase were migrated to the helper; these tests pin that invariant.
"""
from __future__ import annotations
import json
import os
import sys
from pathlib import Path
import pytest
import yaml
# Ensure the repo root is importable when running via `pytest tests/...`.
_REPO_ROOT = Path(__file__).resolve().parent.parent
if str(_REPO_ROOT) not in sys.path:
sys.path.insert(0, str(_REPO_ROOT))
from utils import atomic_json_write, atomic_replace, atomic_yaml_write
# ─── Direct helper ────────────────────────────────────────────────────────────
def _write_tmp(dir_: Path, content: str) -> Path:
tmp = dir_ / ".src.tmp"
tmp.write_text(content, encoding="utf-8")
return tmp
def test_atomic_replace_preserves_symlink(tmp_path: Path) -> None:
real = tmp_path / "real.yaml"
link = tmp_path / "link.yaml"
real.write_text("original\n", encoding="utf-8")
link.symlink_to(real)
tmp = _write_tmp(tmp_path, "updated\n")
returned = atomic_replace(tmp, link)
assert link.is_symlink(), "symlink must not be replaced with a regular file"
assert real.read_text(encoding="utf-8") == "updated\n"
assert Path(returned) == real
# Follow the symlink — same content.
assert link.read_text(encoding="utf-8") == "updated\n"
def test_atomic_replace_regular_file(tmp_path: Path) -> None:
target = tmp_path / "plain.yaml"
target.write_text("old\n", encoding="utf-8")
tmp = _write_tmp(tmp_path, "fresh\n")
returned = atomic_replace(tmp, target)
assert Path(returned) == target
assert target.read_text(encoding="utf-8") == "fresh\n"
assert not target.is_symlink()
def test_atomic_replace_first_time_create(tmp_path: Path) -> None:
target = tmp_path / "new.yaml"
assert not target.exists()
tmp = _write_tmp(tmp_path, "brand new\n")
returned = atomic_replace(tmp, target)
assert Path(returned) == target
assert target.read_text(encoding="utf-8") == "brand new\n"
def test_atomic_replace_accepts_pathlike_and_str(tmp_path: Path) -> None:
target = tmp_path / "dual.json"
target.write_text("{}", encoding="utf-8")
# str inputs
tmp1 = _write_tmp(tmp_path, "1")
atomic_replace(str(tmp1), str(target))
assert target.read_text(encoding="utf-8") == "1"
# Path inputs
tmp2 = _write_tmp(tmp_path, "2")
atomic_replace(tmp2, target)
assert target.read_text(encoding="utf-8") == "2"
# ─── atomic_json_write / atomic_yaml_write wiring ──────────────────────────
def test_atomic_json_write_preserves_symlink(tmp_path: Path) -> None:
real = tmp_path / "real.json"
link = tmp_path / "link.json"
real.write_text("{}", encoding="utf-8")
link.symlink_to(real)
atomic_json_write(link, {"hello": "world"})
assert link.is_symlink()
loaded = json.loads(real.read_text(encoding="utf-8"))
assert loaded == {"hello": "world"}
def test_atomic_yaml_write_preserves_symlink(tmp_path: Path) -> None:
real = tmp_path / "real.yaml"
link = tmp_path / "link.yaml"
real.write_text("placeholder: true\n", encoding="utf-8")
link.symlink_to(real)
atomic_yaml_write(link, {"model": {"provider": "openrouter"}})
assert link.is_symlink()
data = yaml.safe_load(real.read_text(encoding="utf-8"))
assert data == {"model": {"provider": "openrouter"}}
def test_atomic_json_write_preserves_symlink_permissions(tmp_path: Path) -> None:
"""Symlinked targets keep the real file's permission bits."""
if os.name != "posix":
pytest.skip("POSIX-only")
real = tmp_path / "real.json"
link = tmp_path / "link.json"
real.write_text("{}", encoding="utf-8")
os.chmod(real, 0o644)
link.symlink_to(real)
atomic_json_write(link, {"x": 1})
import stat as _stat
mode = _stat.S_IMODE(real.stat().st_mode)
assert mode == 0o644, f"permissions drifted after symlinked write: {oct(mode)}"
# ─── Broken-symlink edge case ─────────────────────────────────────────────
def test_atomic_replace_broken_symlink_creates_target(tmp_path: Path) -> None:
"""A symlink pointing at a missing file: the write should create the
real target (resolving via realpath) rather than leaving the dangling
link in place as a regular file.
"""
missing = tmp_path / "does_not_exist_yet.yaml"
link = tmp_path / "link.yaml"
link.symlink_to(missing)
assert link.is_symlink()
assert not missing.exists()
tmp = _write_tmp(tmp_path, "created-through-link\n")
atomic_replace(tmp, link)
assert link.is_symlink(), "symlink must be preserved"
assert missing.exists(), "real target should now exist"
assert missing.read_text(encoding="utf-8") == "created-through-link\n"

View File

@ -33,6 +33,8 @@ from pathlib import Path
from hermes_constants import get_hermes_home
from typing import Dict, Any, List, Optional
from utils import atomic_replace
# fcntl is Unix-only; on Windows use msvcrt for file locking
msvcrt = None
try:
@ -448,10 +450,7 @@ class MemoryStore:
f.write(content)
f.flush()
os.fsync(f.fileno())
# Resolve symlinks so os.replace writes to the real file (GitHub #16743).
path_str = str(path)
real_path = os.path.realpath(path_str) if os.path.islink(path_str) else path_str
os.replace(tmp_path, real_path)
atomic_replace(tmp_path, path)
except BaseException:
# Clean up temp file on any failure
try:

View File

@ -42,6 +42,8 @@ from pathlib import Path
from hermes_constants import get_hermes_home, display_hermes_home
from typing import Dict, Any, Optional, Tuple
from utils import atomic_replace
logger = logging.getLogger(__name__)
# Import security scanner — external hub installs always get scanned;
@ -309,9 +311,7 @@ def _atomic_write_text(file_path: Path, content: str, encoding: str = "utf-8") -
try:
with os.fdopen(fd, "w", encoding=encoding) as f:
f.write(content)
# Resolve symlinks so os.replace writes to the real file (GitHub #16743).
real_path = os.path.realpath(file_path) if os.path.islink(file_path) else file_path
os.replace(temp_path, real_path)
atomic_replace(temp_path, file_path)
except Exception:
# Clean up temp file on error
try:

View File

@ -28,6 +28,7 @@ import shutil
from pathlib import Path
from hermes_constants import get_hermes_home
from typing import Dict, List, Tuple
from utils import atomic_replace
logger = logging.getLogger(__name__)
@ -98,9 +99,7 @@ def _write_manifest(entries: Dict[str, str]):
f.write(data)
f.flush()
os.fsync(f.fileno())
# Resolve symlinks so os.replace writes to the real file (GitHub #16743).
real_path = os.path.realpath(MANIFEST_FILE) if os.path.islink(MANIFEST_FILE) else MANIFEST_FILE
os.replace(tmp_path, real_path)
atomic_replace(tmp_path, MANIFEST_FILE)
except BaseException:
try:
os.unlink(tmp_path)

View File

@ -58,6 +58,30 @@ def _restore_file_mode(path: Path, mode: "int | None") -> None:
pass
def atomic_replace(tmp_path: Union[str, Path], target: Union[str, Path]) -> str:
"""Atomically move *tmp_path* onto *target*, preserving symlinks.
``os.replace(tmp, target)`` atomically swaps ``tmp`` into place at
``target``. When ``target`` is a symlink, the symlink itself is
replaced with a regular file silently detaching managed deployments
that symlink ``config.yaml`` / ``SOUL.md`` / ``auth.json`` etc. from
``~/.hermes/`` to a git-tracked profile package or dotfiles repo
(GitHub #16743).
This helper resolves the symlink first so ``os.replace`` writes to
the real file in-place while the symlink survives. For non-symlink
and non-existent paths the behavior is identical to a plain
``os.replace`` call.
Returns the resolved real path used for the replace, so callers that
need to re-apply permissions can target it instead of the symlink.
"""
target_str = str(target)
real_path = os.path.realpath(target_str) if os.path.islink(target_str) else target_str
os.replace(str(tmp_path), real_path)
return real_path
def atomic_json_write(
path: Union[str, Path],
data: Any,
@ -99,10 +123,8 @@ def atomic_json_write(
)
f.flush()
os.fsync(f.fileno())
# Resolve symlinks so os.replace writes to the real file instead of
# replacing the symlink with a regular file (GitHub #16743).
real_path = os.path.realpath(path) if os.path.islink(path) else path
os.replace(tmp_path, real_path)
# Preserve symlinks — swap in-place on the real file (GitHub #16743).
real_path = atomic_replace(tmp_path, path)
_restore_file_mode(real_path, original_mode)
except BaseException:
# Intentionally catch BaseException so temp-file cleanup still runs for
@ -153,10 +175,8 @@ def atomic_yaml_write(
f.write(extra_content)
f.flush()
os.fsync(f.fileno())
# Resolve symlinks so os.replace writes to the real file instead of
# replacing the symlink with a regular file (GitHub #16743).
real_path = os.path.realpath(path) if os.path.islink(path) else path
os.replace(tmp_path, real_path)
# Preserve symlinks — swap in-place on the real file (GitHub #16743).
real_path = atomic_replace(tmp_path, path)
_restore_file_mode(real_path, original_mode)
except BaseException:
# Match atomic_json_write: cleanup must also happen for process-level