diff --git a/cli.py b/cli.py index 4a4ce15b..37aa8a7c 100644 --- a/cli.py +++ b/cli.py @@ -1008,7 +1008,7 @@ def _cprint(text: str): # --------------------------------------------------------------------------- -# File-drop detection — extracted as a pure function for testability. +# File-drop / local attachment detection — extracted as pure helpers for tests. # --------------------------------------------------------------------------- _IMAGE_EXTENSIONS = frozenset({ @@ -1017,12 +1017,91 @@ _IMAGE_EXTENSIONS = frozenset({ }) -def _detect_file_drop(user_input: str) -> "dict | None": - """Detect if *user_input* is a dragged/pasted file path, not a slash command. +def _is_termux_environment() -> bool: + prefix = os.getenv("PREFIX", "") + return bool(os.getenv("TERMUX_VERSION") or "com.termux/files/usr" in prefix) - When a user drags a file into the terminal, macOS pastes the absolute path - (e.g. ``/Users/roland/Desktop/file.png``) which starts with ``/`` and would - otherwise be mistaken for a slash command. + +def _split_path_input(raw: str) -> tuple[str, str]: + """Split a leading file path token from trailing free-form text. + + Supports quoted paths and backslash-escaped spaces so callers can accept + inputs like: + /tmp/pic.png describe this + ~/storage/shared/My\ Photos/cat.png what is this? + "/storage/emulated/0/DCIM/Camera/cat 1.png" summarize + """ + raw = str(raw or "").strip() + if not raw: + return "", "" + + if raw[0] in {'"', "'"}: + quote = raw[0] + pos = 1 + while pos < len(raw): + ch = raw[pos] + if ch == '\\' and pos + 1 < len(raw): + pos += 2 + continue + if ch == quote: + token = raw[1:pos] + remainder = raw[pos + 1 :].strip() + return token, remainder + pos += 1 + return raw[1:], "" + + pos = 0 + while pos < len(raw): + ch = raw[pos] + if ch == '\\' and pos + 1 < len(raw) and raw[pos + 1] == ' ': + pos += 2 + elif ch == ' ': + break + else: + pos += 1 + + token = raw[:pos].replace('\\ ', ' ') + remainder = raw[pos:].strip() + return token, remainder + + +def _resolve_attachment_path(raw_path: str) -> Path | None: + """Resolve a user-supplied local attachment path. + + Accepts quoted or unquoted paths, expands ``~`` and env vars, and resolves + relative paths from ``TERMINAL_CWD`` when set (matching terminal tool cwd). + Returns ``None`` when the path does not resolve to an existing file. + """ + token = str(raw_path or "").strip() + if not token: + return None + + if (token.startswith('"') and token.endswith('"')) or (token.startswith("'") and token.endswith("'")): + token = token[1:-1].strip() + if not token: + return None + + expanded = os.path.expandvars(os.path.expanduser(token)) + path = Path(expanded) + if not path.is_absolute(): + base_dir = Path(os.getenv("TERMINAL_CWD", os.getcwd())) + path = base_dir / path + + try: + resolved = path.resolve() + except Exception: + resolved = path + + if not resolved.exists() or not resolved.is_file(): + return None + return resolved + + +def _detect_file_drop(user_input: str) -> "dict | None": + """Detect if *user_input* starts with a real local file path. + + This catches dragged/pasted paths before they are mistaken for slash + commands, and also supports Termux-friendly paths like ``~/storage/...``. Returns a dict on match:: @@ -1034,29 +1113,31 @@ def _detect_file_drop(user_input: str) -> "dict | None": Returns ``None`` when the input is not a real file path. """ - if not isinstance(user_input, str) or not user_input.startswith("/"): + if not isinstance(user_input, str): return None - # Walk the string absorbing backslash-escaped spaces ("\ "). - raw = user_input - pos = 0 - while pos < len(raw): - ch = raw[pos] - if ch == '\\' and pos + 1 < len(raw) and raw[pos + 1] == ' ': - pos += 2 # skip escaped space - elif ch == ' ': - break - else: - pos += 1 - - first_token_raw = raw[:pos] - first_token = first_token_raw.replace('\\ ', ' ') - drop_path = Path(first_token) - - if not drop_path.exists() or not drop_path.is_file(): + stripped = user_input.strip() + if not stripped: + return None + + starts_like_path = ( + stripped.startswith("/") + or stripped.startswith("~") + or stripped.startswith("./") + or stripped.startswith("../") + or stripped.startswith('"/') + or stripped.startswith('"~') + or stripped.startswith("'/") + or stripped.startswith("'~") + ) + if not starts_like_path: + return None + + first_token, remainder = _split_path_input(stripped) + drop_path = _resolve_attachment_path(first_token) + if drop_path is None: return None - remainder = raw[pos:].strip() return { "path": drop_path, "is_image": drop_path.suffix.lower() in _IMAGE_EXTENSIONS, @@ -1064,6 +1145,69 @@ def _detect_file_drop(user_input: str) -> "dict | None": } +def _format_image_attachment_badges(attached_images: list[Path], image_counter: int, width: int | None = None) -> str: + """Format the attached-image badge row for the interactive CLI. + + Narrow terminals such as Termux should get a compact summary that fits on a + single row, while wider terminals can show the classic per-image badges. + """ + if not attached_images: + return "" + + width = width or shutil.get_terminal_size((80, 24)).columns + + def _trunc(name: str, limit: int) -> str: + return name if len(name) <= limit else name[: max(1, limit - 3)] + "..." + + if width < 52: + if len(attached_images) == 1: + return f"[📎 {_trunc(attached_images[0].name, 20)}]" + return f"[📎 {len(attached_images)} images attached]" + + if width < 80: + if len(attached_images) == 1: + return f"[📎 {_trunc(attached_images[0].name, 32)}]" + first = _trunc(attached_images[0].name, 20) + extra = len(attached_images) - 1 + return f"[📎 {first}] [+{extra}]" + + base = image_counter - len(attached_images) + 1 + return " ".join( + f"[📎 Image #{base + i}]" + for i in range(len(attached_images)) + ) + + +def _collect_query_images(query: str | None, image_arg: str | None = None) -> tuple[str, list[Path]]: + """Collect local image attachments for single-query CLI flows.""" + message = query or "" + images: list[Path] = [] + + if isinstance(message, str): + dropped = _detect_file_drop(message) + if dropped and dropped.get("is_image"): + images.append(dropped["path"]) + message = dropped["remainder"] or f"[User attached image: {dropped['path'].name}]" + + if image_arg: + explicit_path = _resolve_attachment_path(image_arg) + if explicit_path is None: + raise ValueError(f"Image file not found: {image_arg}") + if explicit_path.suffix.lower() not in _IMAGE_EXTENSIONS: + raise ValueError(f"Not a supported image file: {explicit_path}") + images.append(explicit_path) + + deduped: list[Path] = [] + seen: set[str] = set() + for img in images: + key = str(img) + if key in seen: + continue + seen.add(key) + deduped.append(img) + return message, deduped + + class ChatConsole: """Rich Console adapter for prompt_toolkit's patch_stdout context. @@ -2946,6 +3090,14 @@ class HermesCLI: doesn't fire for image-only clipboard content (e.g., VSCode terminal, Windows Terminal with WSL2). """ + if _is_termux_environment(): + _cprint( + f" {_DIM}Clipboard image paste is not available on Termux — " + f"use /image or paste a local image path like " + f"~/storage/shared/Pictures/cat.png{_RST}" + ) + return + from hermes_cli.clipboard import has_clipboard_image if has_clipboard_image(): if self._try_attach_clipboard_image(): @@ -2956,7 +3108,31 @@ class HermesCLI: else: _cprint(f" {_DIM}(._.) No image found in clipboard{_RST}") - def _preprocess_images_with_vision(self, text: str, images: list) -> str: + def _handle_image_command(self, cmd_original: str): + """Handle /image — attach a local image file for the next prompt.""" + raw_args = (cmd_original.split(None, 1)[1].strip() if " " in cmd_original else "") + if not raw_args: + hint = "~/storage/shared/Pictures/cat.png" if _is_termux_environment() else "/path/to/image.png" + _cprint(f" {_DIM}Usage: /image e.g. /image {hint}{_RST}") + return + + path_token, _remainder = _split_path_input(raw_args) + image_path = _resolve_attachment_path(path_token) + if image_path is None: + _cprint(f" {_DIM}(>_<) File not found: {path_token}{_RST}") + return + if image_path.suffix.lower() not in _IMAGE_EXTENSIONS: + _cprint(f" {_DIM}(._.) Not a supported image file: {image_path.name}{_RST}") + return + + self._attached_images.append(image_path) + _cprint(f" 📎 Attached image: {image_path.name}") + if _remainder: + _cprint(f" {_DIM}Now type your prompt (or use --image in single-query mode): {_remainder}{_RST}") + elif _is_termux_environment(): + _cprint(f" {_DIM}Tip: type your next message, or run hermes chat -q --image {image_path} \"What do you see?\"{_RST}") + + def _preprocess_images_with_vision(self, text: str, images: list, *, announce: bool = True) -> str: """Analyze attached images via the vision tool and return enriched text. Instead of embedding raw base64 ``image_url`` content parts in the @@ -2983,7 +3159,8 @@ class HermesCLI: if not img_path.exists(): continue size_kb = img_path.stat().st_size // 1024 - _cprint(f" {_DIM}👁️ analyzing {img_path.name} ({size_kb}KB)...{_RST}") + if announce: + _cprint(f" {_DIM}👁️ analyzing {img_path.name} ({size_kb}KB)...{_RST}") try: result_json = _asyncio.run( vision_analyze_tool(image_url=str(img_path), user_prompt=analysis_prompt) @@ -2996,21 +3173,24 @@ class HermesCLI: f"[If you need a closer look, use vision_analyze with " f"image_url: {img_path}]" ) - _cprint(f" {_DIM}✓ image analyzed{_RST}") + if announce: + _cprint(f" {_DIM}✓ image analyzed{_RST}") else: enriched_parts.append( f"[The user attached an image but it couldn't be analyzed. " f"You can try examining it with vision_analyze using " f"image_url: {img_path}]" ) - _cprint(f" {_DIM}⚠ vision analysis failed — path included for retry{_RST}") + if announce: + _cprint(f" {_DIM}⚠ vision analysis failed — path included for retry{_RST}") except Exception as e: enriched_parts.append( f"[The user attached an image but analysis failed ({e}). " f"You can try examining it with vision_analyze using " f"image_url: {img_path}]" ) - _cprint(f" {_DIM}⚠ vision analysis error — path included for retry{_RST}") + if announce: + _cprint(f" {_DIM}⚠ vision analysis error — path included for retry{_RST}") # Combine: vision descriptions first, then the user's original text user_text = text if isinstance(text, str) and text else "" @@ -3104,7 +3284,10 @@ class HermesCLI: _cprint(f"\n {_DIM}Tip: Just type your message to chat with Hermes!{_RST}") _cprint(f" {_DIM}Multi-line: Alt+Enter for a new line{_RST}") - _cprint(f" {_DIM}Paste image: Alt+V (or /paste){_RST}\n") + if _is_termux_environment(): + _cprint(f" {_DIM}Attach image: /image ~/storage/shared/Pictures/cat.png or start your prompt with a local image path{_RST}\n") + else: + _cprint(f" {_DIM}Paste image: Alt+V (or /paste){_RST}\n") def show_tools(self): """Display available tools with kawaii ASCII art.""" @@ -4550,6 +4733,8 @@ class HermesCLI: self._show_insights(cmd_original) elif canonical == "paste": self._handle_paste_command() + elif canonical == "image": + self._handle_image_command(cmd_original) elif canonical == "reload-mcp": with self._busy_command(self._slow_command_status(cmd_original)): self._reload_mcp() @@ -8066,10 +8251,9 @@ class HermesCLI: def _get_image_bar(): if not cli_ref._attached_images: return [] - base = cli_ref._image_counter - len(cli_ref._attached_images) + 1 - badges = " ".join( - f"[📎 Image #{base + i}]" - for i in range(len(cli_ref._attached_images)) + badges = _format_image_attachment_badges( + cli_ref._attached_images, + cli_ref._image_counter, ) return [("class:image-badge", f" {badges} ")] @@ -8542,6 +8726,7 @@ class HermesCLI: def main( query: str = None, q: str = None, + image: str = None, toolsets: str = None, skills: str | list[str] | tuple[str, ...] = None, model: str = None, @@ -8567,6 +8752,7 @@ def main( Args: query: Single query to execute (then exit). Alias: -q q: Shorthand for --query + image: Optional local image path to attach to a single query toolsets: Comma-separated list of toolsets to enable (e.g., "web,terminal") skills: Comma-separated or repeated list of skills to preload for the session model: Model to use (default: anthropic/claude-opus-4-20250514) @@ -8587,6 +8773,7 @@ def main( python cli.py --toolsets web,terminal # Use specific toolsets python cli.py --skills hermes-agent-dev,github-auth python cli.py -q "What is Python?" # Single query mode + python cli.py -q "Describe this" --image ~/storage/shared/Pictures/cat.png python cli.py --list-tools # List tools and exit python cli.py --resume 20260225_143052_a1b2c3 # Resume session python cli.py -w # Start in isolated git worktree @@ -8709,13 +8896,21 @@ def main( atexit.register(_run_cleanup) # Handle single query mode - if query: + if query or image: + query, single_query_images = _collect_query_images(query, image) if quiet: # Quiet mode: suppress banner, spinner, tool previews. # Only print the final response and parseable session info. cli.tool_progress_mode = "off" if cli._ensure_runtime_credentials(): - turn_route = cli._resolve_turn_agent_config(query) + effective_query = query + if single_query_images: + effective_query = cli._preprocess_images_with_vision( + query, + single_query_images, + announce=False, + ) + turn_route = cli._resolve_turn_agent_config(effective_query) if turn_route["signature"] != cli._active_agent_route_signature: cli.agent = None if cli._init_agent( @@ -8726,7 +8921,7 @@ def main( cli.agent.quiet_mode = True cli.agent.suppress_status_output = True result = cli.agent.run_conversation( - user_message=query, + user_message=effective_query, conversation_history=cli.conversation_history, ) response = result.get("final_response", "") if isinstance(result, dict) else str(result) @@ -8741,8 +8936,10 @@ def main( sys.exit(1) else: cli.show_banner() - cli.console.print(f"[bold blue]Query:[/] {query}") - cli.chat(query) + _query_label = query or ("[image attached]" if single_query_images else "") + if _query_label: + cli.console.print(f"[bold blue]Query:[/] {_query_label}") + cli.chat(query, images=single_query_images or None) cli._print_exit_summary() return diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index 5231dccb..9f26b4bb 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -135,6 +135,8 @@ COMMAND_REGISTRY: list[CommandDef] = [ cli_only=True, aliases=("gateway",)), CommandDef("paste", "Check clipboard for an image and attach it", "Info", cli_only=True), + CommandDef("image", "Attach a local image file for your next prompt", "Info", + cli_only=True, args_hint=""), CommandDef("update", "Update Hermes Agent to the latest version", "Info", gateway_only=True), diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 5a6e5867..7d4a4a92 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -646,6 +646,7 @@ def cmd_chat(args): "verbose": args.verbose, "quiet": getattr(args, "quiet", False), "query": args.query, + "image": getattr(args, "image", None), "resume": getattr(args, "resume", None), "worktree": getattr(args, "worktree", False), "checkpoints": getattr(args, "checkpoints", False), @@ -4291,6 +4292,10 @@ For more help on a command: "-q", "--query", help="Single query (non-interactive mode)" ) + chat_parser.add_argument( + "--image", + help="Optional local image path to attach to a single query" + ) chat_parser.add_argument( "-m", "--model", help="Model to use (e.g., anthropic/claude-sonnet-4)" diff --git a/tests/cli/test_cli_file_drop.py b/tests/cli/test_cli_file_drop.py index 386aba5d..78503de8 100644 --- a/tests/cli/test_cli_file_drop.py +++ b/tests/cli/test_cli_file_drop.py @@ -147,6 +147,20 @@ class TestEscapedSpaces: assert result["path"] == tmp_image_with_spaces assert result["remainder"] == "what is this?" + def test_tilde_prefixed_path(self, tmp_path, monkeypatch): + home = tmp_path / "home" + img = home / "storage" / "shared" / "Pictures" / "cat.png" + img.parent.mkdir(parents=True, exist_ok=True) + img.write_bytes(b"\x89PNG\r\n\x1a\n") + monkeypatch.setenv("HOME", str(home)) + + result = _detect_file_drop("~/storage/shared/Pictures/cat.png what is this?") + + assert result is not None + assert result["path"] == img + assert result["is_image"] is True + assert result["remainder"] == "what is this?" + # --------------------------------------------------------------------------- # Tests: edge cases diff --git a/tests/cli/test_cli_image_command.py b/tests/cli/test_cli_image_command.py new file mode 100644 index 00000000..7c9cef8f --- /dev/null +++ b/tests/cli/test_cli_image_command.py @@ -0,0 +1,98 @@ +from pathlib import Path +from unittest.mock import patch + +from cli import ( + HermesCLI, + _collect_query_images, + _format_image_attachment_badges, +) + + +def _make_cli(): + cli_obj = HermesCLI.__new__(HermesCLI) + cli_obj._attached_images = [] + return cli_obj + + +def _make_image(path: Path) -> Path: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_bytes(b"\x89PNG\r\n\x1a\n") + return path + + +class TestImageCommand: + def test_handle_image_command_attaches_local_image(self, tmp_path): + img = _make_image(tmp_path / "photo.png") + cli_obj = _make_cli() + + with patch("cli._cprint"): + cli_obj._handle_image_command(f"/image {img}") + + assert cli_obj._attached_images == [img] + + def test_handle_image_command_supports_quoted_path_with_spaces(self, tmp_path): + img = _make_image(tmp_path / "my photo.png") + cli_obj = _make_cli() + + with patch("cli._cprint"): + cli_obj._handle_image_command(f'/image "{img}"') + + assert cli_obj._attached_images == [img] + + def test_handle_image_command_rejects_non_image_file(self, tmp_path): + file_path = tmp_path / "notes.txt" + file_path.write_text("hello\n", encoding="utf-8") + cli_obj = _make_cli() + + with patch("cli._cprint") as mock_print: + cli_obj._handle_image_command(f"/image {file_path}") + + assert cli_obj._attached_images == [] + rendered = " ".join(str(arg) for call in mock_print.call_args_list for arg in call.args) + assert "Not a supported image file" in rendered + + +class TestCollectQueryImages: + def test_collect_query_images_accepts_explicit_image_arg(self, tmp_path): + img = _make_image(tmp_path / "diagram.png") + + message, images = _collect_query_images("describe this", str(img)) + + assert message == "describe this" + assert images == [img] + + def test_collect_query_images_extracts_leading_path(self, tmp_path): + img = _make_image(tmp_path / "camera.png") + + message, images = _collect_query_images(f"{img} what do you see?") + + assert message == "what do you see?" + assert images == [img] + + def test_collect_query_images_supports_tilde_paths(self, tmp_path, monkeypatch): + home = tmp_path / "home" + img = _make_image(home / "storage" / "shared" / "Pictures" / "cat.png") + monkeypatch.setenv("HOME", str(home)) + + message, images = _collect_query_images("describe this", "~/storage/shared/Pictures/cat.png") + + assert message == "describe this" + assert images == [img] + + +class TestImageBadgeFormatting: + def test_compact_badges_use_filename_on_narrow_terminals(self, tmp_path): + img = _make_image(tmp_path / "Screenshot 2026-04-09 at 11.22.33 AM.png") + + badges = _format_image_attachment_badges([img], image_counter=1, width=40) + + assert badges.startswith("[📎 ") + assert "Image #1" not in badges + + def test_compact_badges_summarize_multiple_images(self, tmp_path): + img1 = _make_image(tmp_path / "one.png") + img2 = _make_image(tmp_path / "two.png") + + badges = _format_image_attachment_badges([img1, img2], image_counter=2, width=45) + + assert badges == "[📎 2 images attached]" diff --git a/tests/hermes_cli/test_chat_skills_flag.py b/tests/hermes_cli/test_chat_skills_flag.py index 8551b410..0ec25a54 100644 --- a/tests/hermes_cli/test_chat_skills_flag.py +++ b/tests/hermes_cli/test_chat_skills_flag.py @@ -49,6 +49,30 @@ def test_chat_subcommand_accepts_skills_flag(monkeypatch): } +def test_chat_subcommand_accepts_image_flag(monkeypatch): + import hermes_cli.main as main_mod + + captured = {} + + def fake_cmd_chat(args): + captured["query"] = args.query + captured["image"] = args.image + + monkeypatch.setattr(main_mod, "cmd_chat", fake_cmd_chat) + monkeypatch.setattr( + sys, + "argv", + ["hermes", "chat", "-q", "hello", "--image", "~/storage/shared/Pictures/cat.png"], + ) + + main_mod.main() + + assert captured == { + "query": "hello", + "image": "~/storage/shared/Pictures/cat.png", + } + + def test_continue_worktree_and_skills_flags_work_together(monkeypatch): import hermes_cli.main as main_mod