fix(cli): decode .env as UTF-8 to avoid GBK crash on Windows

Path.read_text() uses the system locale by default. On Windows CN/JP/KR
locales (GBK/CP932/CP949), reading a UTF-8 .env raises UnicodeDecodeError
as soon as it contains any non-ASCII byte (e.g. an em dash).

Pin encoding="utf-8" on every .env read in hermes_cli to match how the
rest of the codebase (load_dotenv at doctor.py:26) already decodes it.

Adds a regression test that monkeypatches Path.read_text to simulate a
GBK locale and asserts 'hermes doctor' no longer raises.

Refs #18637
This commit is contained in:
CoreyNoDream 2026-05-02 15:14:03 +08:00 committed by Teknium
parent e2cea6eeba
commit c5e3a6fb5b
4 changed files with 58 additions and 4 deletions

View File

@ -263,8 +263,11 @@ def run_doctor(args):
if env_path.exists(): if env_path.exists():
check_ok(f"{_DHH}/.env file exists") check_ok(f"{_DHH}/.env file exists")
# Check for common issues # Check for common issues. Pin encoding to UTF-8 because .env files are
content = env_path.read_text() # written as UTF-8 everywhere in the codebase, while Path.read_text()
# defaults to the system locale — which crashes on non-UTF-8 Windows
# locales (e.g. GBK) as soon as the file contains any non-ASCII byte.
content = env_path.read_text(encoding="utf-8")
if _has_provider_env_config(content): if _has_provider_env_config(content):
check_ok("API key or custom endpoint configured") check_ok("API key or custom endpoint configured")
else: else:

View File

@ -289,7 +289,7 @@ def _has_any_provider_configured() -> bool:
env_file = get_env_path() env_file = get_env_path()
if env_file.exists(): if env_file.exists():
try: try:
for line in env_file.read_text().splitlines(): for line in env_file.read_text(encoding="utf-8").splitlines():
line = line.strip() line = line.strip()
if line.startswith("#") or "=" not in line: if line.startswith("#") or "=" not in line:
continue continue

View File

@ -361,7 +361,7 @@ def _write_env_vars(env_path: Path, env_writes: dict) -> None:
existing_lines = [] existing_lines = []
if env_path.exists(): if env_path.exists():
existing_lines = env_path.read_text().splitlines() existing_lines = env_path.read_text(encoding="utf-8").splitlines()
updated_keys = set() updated_keys = set()
new_lines = [] new_lines = []

View File

@ -51,6 +51,57 @@ class TestProviderEnvDetection:
assert not _has_provider_env_config(content) assert not _has_provider_env_config(content)
class TestDoctorEnvFileEncoding:
"""Regression for #18637 (bug 3): `hermes doctor` crashed on Windows
Chinese locale (GBK) because `.env` was read with Path.read_text() which
defaults to the system locale encoding, not UTF-8."""
def test_doctor_reads_env_as_utf8_even_when_locale_is_not_utf8(
self, monkeypatch, tmp_path
):
import pathlib
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
# Write a UTF-8 .env containing an em dash (U+2014 = e2 80 94). The
# 0x94 byte is exactly the one the issue reporter hit: it's invalid
# as a GBK trailing byte in this position, so locale-default reads
# raise UnicodeDecodeError on Chinese Windows.
env_path = hermes_home / ".env"
env_path.write_text(
"OPENAI_API_KEY=sk-test # em-dash here — should not crash\n",
encoding="utf-8",
)
monkeypatch.setattr(doctor_mod, "HERMES_HOME", hermes_home)
orig_read_text = pathlib.Path.read_text
def gbk_like_read_text(self, encoding=None, errors=None, **kwargs):
# Simulate a GBK locale: refuse to decode this specific UTF-8
# .env unless the caller pins encoding="utf-8".
if self == env_path and encoding != "utf-8":
raise UnicodeDecodeError(
"gbk", b"\x94", 0, 1, "illegal multibyte sequence"
)
return orig_read_text(self, encoding=encoding, errors=errors, **kwargs)
monkeypatch.setattr(pathlib.Path, "read_text", gbk_like_read_text)
# Short-circuit the expensive tool-availability probe — we only
# need doctor to reach the .env read without crashing.
fake_model_tools = types.SimpleNamespace(
check_tool_availability=lambda *a, **kw: (_ for _ in ()).throw(SystemExit(0)),
TOOLSET_REQUIREMENTS={},
)
monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
# Run doctor. If the .env read still uses locale encoding, this
# raises UnicodeDecodeError and the test fails.
with pytest.raises(SystemExit):
doctor_mod.run_doctor(Namespace(fix=False))
class TestDoctorToolAvailabilityOverrides: class TestDoctorToolAvailabilityOverrides:
def test_marks_honcho_available_when_configured(self, monkeypatch): def test_marks_honcho_available_when_configured(self, monkeypatch):
monkeypatch.setattr(doctor, "_honcho_is_configured_for_doctor", lambda: True) monkeypatch.setattr(doctor, "_honcho_is_configured_for_doctor", lambda: True)