fix(cli): decode .env as UTF-8 to avoid GBK crash on Windows
Path.read_text() uses the system locale by default. On Windows CN/JP/KR locales (GBK/CP932/CP949), reading a UTF-8 .env raises UnicodeDecodeError as soon as it contains any non-ASCII byte (e.g. an em dash). Pin encoding="utf-8" on every .env read in hermes_cli to match how the rest of the codebase (load_dotenv at doctor.py:26) already decodes it. Adds a regression test that monkeypatches Path.read_text to simulate a GBK locale and asserts 'hermes doctor' no longer raises. Refs #18637
This commit is contained in:
parent
e2cea6eeba
commit
c5e3a6fb5b
@ -263,8 +263,11 @@ def run_doctor(args):
|
||||
if env_path.exists():
|
||||
check_ok(f"{_DHH}/.env file exists")
|
||||
|
||||
# Check for common issues
|
||||
content = env_path.read_text()
|
||||
# Check for common issues. Pin encoding to UTF-8 because .env files are
|
||||
# written as UTF-8 everywhere in the codebase, while Path.read_text()
|
||||
# defaults to the system locale — which crashes on non-UTF-8 Windows
|
||||
# locales (e.g. GBK) as soon as the file contains any non-ASCII byte.
|
||||
content = env_path.read_text(encoding="utf-8")
|
||||
if _has_provider_env_config(content):
|
||||
check_ok("API key or custom endpoint configured")
|
||||
else:
|
||||
|
||||
@ -289,7 +289,7 @@ def _has_any_provider_configured() -> bool:
|
||||
env_file = get_env_path()
|
||||
if env_file.exists():
|
||||
try:
|
||||
for line in env_file.read_text().splitlines():
|
||||
for line in env_file.read_text(encoding="utf-8").splitlines():
|
||||
line = line.strip()
|
||||
if line.startswith("#") or "=" not in line:
|
||||
continue
|
||||
|
||||
@ -361,7 +361,7 @@ def _write_env_vars(env_path: Path, env_writes: dict) -> None:
|
||||
|
||||
existing_lines = []
|
||||
if env_path.exists():
|
||||
existing_lines = env_path.read_text().splitlines()
|
||||
existing_lines = env_path.read_text(encoding="utf-8").splitlines()
|
||||
|
||||
updated_keys = set()
|
||||
new_lines = []
|
||||
|
||||
@ -51,6 +51,57 @@ class TestProviderEnvDetection:
|
||||
assert not _has_provider_env_config(content)
|
||||
|
||||
|
||||
class TestDoctorEnvFileEncoding:
|
||||
"""Regression for #18637 (bug 3): `hermes doctor` crashed on Windows
|
||||
Chinese locale (GBK) because `.env` was read with Path.read_text() which
|
||||
defaults to the system locale encoding, not UTF-8."""
|
||||
|
||||
def test_doctor_reads_env_as_utf8_even_when_locale_is_not_utf8(
|
||||
self, monkeypatch, tmp_path
|
||||
):
|
||||
import pathlib
|
||||
|
||||
hermes_home = tmp_path / ".hermes"
|
||||
hermes_home.mkdir()
|
||||
# Write a UTF-8 .env containing an em dash (U+2014 = e2 80 94). The
|
||||
# 0x94 byte is exactly the one the issue reporter hit: it's invalid
|
||||
# as a GBK trailing byte in this position, so locale-default reads
|
||||
# raise UnicodeDecodeError on Chinese Windows.
|
||||
env_path = hermes_home / ".env"
|
||||
env_path.write_text(
|
||||
"OPENAI_API_KEY=sk-test # em-dash here — should not crash\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
monkeypatch.setattr(doctor_mod, "HERMES_HOME", hermes_home)
|
||||
|
||||
orig_read_text = pathlib.Path.read_text
|
||||
|
||||
def gbk_like_read_text(self, encoding=None, errors=None, **kwargs):
|
||||
# Simulate a GBK locale: refuse to decode this specific UTF-8
|
||||
# .env unless the caller pins encoding="utf-8".
|
||||
if self == env_path and encoding != "utf-8":
|
||||
raise UnicodeDecodeError(
|
||||
"gbk", b"\x94", 0, 1, "illegal multibyte sequence"
|
||||
)
|
||||
return orig_read_text(self, encoding=encoding, errors=errors, **kwargs)
|
||||
|
||||
monkeypatch.setattr(pathlib.Path, "read_text", gbk_like_read_text)
|
||||
|
||||
# Short-circuit the expensive tool-availability probe — we only
|
||||
# need doctor to reach the .env read without crashing.
|
||||
fake_model_tools = types.SimpleNamespace(
|
||||
check_tool_availability=lambda *a, **kw: (_ for _ in ()).throw(SystemExit(0)),
|
||||
TOOLSET_REQUIREMENTS={},
|
||||
)
|
||||
monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
|
||||
|
||||
# Run doctor. If the .env read still uses locale encoding, this
|
||||
# raises UnicodeDecodeError and the test fails.
|
||||
with pytest.raises(SystemExit):
|
||||
doctor_mod.run_doctor(Namespace(fix=False))
|
||||
|
||||
|
||||
class TestDoctorToolAvailabilityOverrides:
|
||||
def test_marks_honcho_available_when_configured(self, monkeypatch):
|
||||
monkeypatch.setattr(doctor, "_honcho_is_configured_for_doctor", lambda: True)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user