Add browser automation tools and enhance environment configuration

- Introduced new browser automation tools in `browser_tool.py` for navigating, interacting with, and extracting content from web pages using the agent-browser CLI and Browserbase cloud execution. - Updated `.env.example` to include new configuration options for Browserbase API keys and session settings. - Enhanced `model_tools.py` and `toolsets.py` to integrate browser tools into the existing tool framework, ensuring consistent access across toolsets. - Updated `README.md` with setup instructions for browser tools and their usage examples. - Added new test script `test_modal_terminal.py` to validate Modal terminal backend functionality. - Improved `run_agent.py` to support browser tool integration and logging enhancements for better tracking of API responses.
2026-01-29 06:10:24 +00:00 · 2026-01-29 06:10:24 +00:00 · 248acf715e
commit 248acf715e
parent 54ca0997ee
12 changed files with 2626 additions and 134 deletions
--- a/.env.example
+++ b/.env.example
@ -2,14 +2,15 @@
 # Copy this file to .env and fill in your API keys

 # =============================================================================
-# LLM PROVIDER (OpenRouter - Primary)
+# LLM PROVIDER (OpenRouter)
 # =============================================================================
 # OpenRouter provides access to many models through one API
-# Get at: https://openrouter.ai/keys
+# All LLM calls go through OpenRouter - no direct provider keys needed
+# Get your key at: https://openrouter.ai/keys
 OPENROUTER_API_KEY=

 # Default model to use (OpenRouter format: provider/model)
-# Examples: anthropic/claude-sonnet-4, openai/gpt-4o, google/gemini-2.0-flash
+# Examples: anthropic/claude-sonnet-4, openai/gpt-4o, google/gemini-2.0-flash, zhipuai/glm-4-plus
 LLM_MODEL=anthropic/claude-sonnet-4

 # =============================================================================
@ -31,14 +32,17 @@ FAL_KEY=
 # =============================================================================
 # TERMINAL TOOL CONFIGURATION (mini-swe-agent backend)
 # =============================================================================
-# Backend type: "local", "docker", or "modal"
+# Backend type: "local", "singularity", "docker", or "modal"
 # - local: Runs directly on your machine (fastest, no isolation)
-# - docker: Runs in Docker containers (isolated, requires Docker installed)
+# - singularity: Runs in Apptainer/Singularity containers (HPC clusters, no root needed)
+# - docker: Runs in Docker containers (isolated, requires Docker + docker group)
 # - modal: Runs in Modal cloud sandboxes (scalable, requires Modal account)
-TERMINAL_ENV=docker
+TERMINAL_ENV=singularity

-# Docker image to use (for docker and modal backends)
-TERMINAL_DOCKER_IMAGE=python:3.11-slim
+# Container images (for singularity/docker/modal backends)
+TERMINAL_DOCKER_IMAGE=python:3.11
+TERMINAL_SINGULARITY_IMAGE=docker://python:3.11
+TERMINAL_MODAL_IMAGE=python:3.11

 # Working directory inside the container
 TERMINAL_CWD=/tmp
@ -57,6 +61,73 @@ TERMINAL_LIFETIME_SECONDS=300
 # This will authenticate via browser and store credentials locally.
 # No API key needed in .env - Modal handles auth automatically.

+# =============================================================================
+# BROWSER TOOL CONFIGURATION (agent-browser + Browserbase)
+# =============================================================================
+# Browser automation requires Browserbase cloud service for remote browser execution.
+# This allows the agent to navigate websites, fill forms, and extract information.
+#
+# STEALTH MODES:
+# - Basic Stealth: ALWAYS active (random fingerprints, auto CAPTCHA solving)
+# - Advanced Stealth: Requires BROWSERBASE_ADVANCED_STEALTH=true (Scale Plan only)
+
+# Browserbase API Key - Cloud browser execution
+# Get at: https://browserbase.com/
+BROWSERBASE_API_KEY=
+
+# Browserbase Project ID - From your Browserbase dashboard
+BROWSERBASE_PROJECT_ID=
+
+# Enable residential proxies for better CAPTCHA solving (default: true)
+# Routes traffic through residential IPs, significantly improves success rate
+BROWSERBASE_PROXIES=true
+
+# Enable advanced stealth mode (default: false, requires Scale Plan)
+# Uses custom Chromium build to avoid bot detection altogether
+BROWSERBASE_ADVANCED_STEALTH=false
+
+# Browser session timeout in seconds (optional, default: 300)
+# Sessions are cleaned up after this duration of inactivity
+BROWSER_SESSION_TIMEOUT=300
+
+# =============================================================================
+# Browser automation requires Browserbase cloud service for remote browser execution.
+# This allows the agent to navigate websites, fill forms, and extract information.
+
+# Browserbase API Key - Cloud browser execution
+# Get at: https://browserbase.com/
+BROWSERBASE_API_KEY=
+
+# Browserbase Project ID - From your Browserbase dashboard
+BROWSERBASE_PROJECT_ID=
+
+# Enable proxies for better CAPTCHA solving and anti-bot avoidance (default: true)
+# Proxies route traffic through residential IPs for more reliable access
+BROWSERBASE_PROXIES=true
+
+# Enable advanced stealth mode (default: false, requires Scale Plan)
+# Uses custom Chromium build to avoid bot detection altogether
+BROWSERBASE_ADVANCED_STEALTH=false
+
+# Browser session timeout in seconds (optional, default: 300)
+# Sessions are cleaned up after this duration of inactivity
+BROWSER_SESSION_TIMEOUT=300
+
+# =============================================================================
+# Browser automation requires Browserbase cloud service for remote browser execution.
+# This allows the agent to navigate websites, fill forms, and extract information.
+
+# Browserbase API Key - Cloud browser execution
+# Get at: https://browserbase.com/
+BROWSERBASE_API_KEY=
+
+# Browserbase Project ID - From your Browserbase dashboard
+BROWSERBASE_PROJECT_ID=
+
+# Browser session timeout in seconds (optional, default: 300)
+# Sessions are cleaned up after this duration of inactivity
+BROWSER_SESSION_TIMEOUT=300
+
 # =============================================================================
 # LEGACY/OPTIONAL API KEYS
 # =============================================================================
@ -69,10 +140,6 @@ MORPH_API_KEY=
 HECATE_VM_LIFETIME_SECONDS=300
 HECATE_DEFAULT_SNAPSHOT_ID=snapshot_p5294qxt

-# Direct provider keys (optional - OpenRouter is preferred)
-ANTHROPIC_API_KEY=
-OPENAI_API_KEY=
-
 # =============================================================================
 # DEBUG OPTIONS
 # =============================================================================
@ -80,3 +147,12 @@ WEB_TOOLS_DEBUG=false
 VISION_TOOLS_DEBUG=false
 MOA_TOOLS_DEBUG=false
 IMAGE_TOOLS_DEBUG=false
+
+# Scratch directory for Singularity sandboxes (optional)
+# If not set, uses /scratch (if available) or /tmp
+# Set this to a directory with lots of space for large pip installs
+# TERMINAL_SCRATCH_DIR=/scratch/myuser
+
+# Disk usage warning threshold in GB (default: 500)
+# Warning is printed when total sandbox disk usage exceeds this
+TERMINAL_DISK_WARNING_GB=500
--- a/README.md
+++ b/README.md
@ -6,6 +6,7 @@ An AI agent with advanced tool-calling capabilities, featuring a flexible toolse

 - **Web Tools**: Search, extract content, and crawl websites
 - **Terminal Tools**: Execute commands via mini-swe-agent (local, Docker, or Modal backends)
+- **Browser Tools**: Automate web browsers to navigate, click, type, and extract content
 - **Vision Tools**: Analyze images from URLs
 - **Reasoning Tools**: Advanced multi-model reasoning (Mixture of Agents)
 - **Creative Tools**: Generate images from text prompts
@ -53,9 +54,9 @@ nano .env  # or use your preferred editor
 - `NOUS_API_KEY` - Vision & reasoning tools (get at: https://inference-api.nousresearch.com/)
 - `FAL_KEY` - Image generation (get at: https://fal.ai/)

-**Optional API Keys:**
- `ANTHROPIC_API_KEY` - Direct Anthropic access (if not using OpenRouter)
- `OPENAI_API_KEY` - Direct OpenAI access (if not using OpenRouter)
+**Optional API Keys (for specific features):**
+- `BROWSERBASE_API_KEY` - Browser automation (get at: https://browserbase.com/)
+- `BROWSERBASE_PROJECT_ID` - From Browserbase dashboard
 - `MORPH_API_KEY` - For legacy Hecate terminal backend (get at: https://morph.so/)

 ### 4. Configure Terminal Backend
@ -63,19 +64,22 @@ nano .env  # or use your preferred editor
 The terminal tool uses **mini-swe-agent** environments. Configure in `.env`:

 ```bash
-# Backend: "local" (host machine), "docker" (containers), or "modal" (cloud)
-TERMINAL_ENV=local          # Default: runs on host machine
-TERMINAL_ENV=docker         # Recommended: isolated Docker containers
+# Backend: "local", "docker", "singularity", or "modal"
+TERMINAL_ENV=local          # Default: runs on host machine (no isolation)
+TERMINAL_ENV=singularity    # Recommended for HPC: Apptainer/Singularity containers
+TERMINAL_ENV=docker         # Isolated Docker containers
 TERMINAL_ENV=modal          # Cloud execution via Modal

-# Docker settings (for docker/modal backends)
+# Container image (for docker/singularity/modal backends)
 TERMINAL_DOCKER_IMAGE=python:3.11-slim
+TERMINAL_SINGULARITY_IMAGE=docker://python:3.11-slim
 TERMINAL_TIMEOUT=60
 ```

 **Backend Requirements:**
- **local**: No extra setup (runs directly on your machine)
- **docker**: Requires Docker installed and running. User must be in `docker` group.
+- **local**: No extra setup (runs directly on your machine, no isolation)
+- **singularity**: Requires Apptainer or Singularity installed (common on HPC clusters, no root needed)
+- **docker**: Requires Docker installed and user in `docker` group
 - **modal**: Requires Modal account (see setup below)

 ### Modal Cloud Backend Setup
@ -95,6 +99,55 @@ TERMINAL_ENV=modal

 Modal uses CLI-based authentication (stored in `~/.modal/`), so no API key is needed in `.env`. After running `modal setup`, commands will automatically execute in Modal's cloud sandboxes.

+### Browser Tools Setup
+
+Browser tools enable the agent to navigate websites, fill forms, click buttons, and extract content. They use [agent-browser](https://github.com/vercel-labs/agent-browser) CLI with [Browserbase](https://browserbase.com) cloud execution.
+
+```bash
+# 1. Install Node.js (if not already installed)
+# Use nvm (recommended) or your package manager
+
+# 2. Install agent-browser CLI globally
+npm install -g agent-browser
+
+# 3. Get Browserbase credentials
+# Sign up at https://browserbase.com/ and get your:
+# - API Key (from Settings → API Keys)
+# - Project ID (from your project dashboard)
+
+# 4. Add to your .env file:
+BROWSERBASE_API_KEY=your_api_key_here
+BROWSERBASE_PROJECT_ID=your_project_id_here
+```
+
+**Available Browser Tools:**
+
+| Tool | Description |
+|------|-------------|
+| `browser_navigate` | Navigate to a URL |
+| `browser_snapshot` | Get text-based page snapshot with element refs |
+| `browser_click` | Click an element by ref (e.g., `@e5`) |
+| `browser_type` | Type text into an input field |
+| `browser_scroll` | Scroll up or down |
+| `browser_back` | Go back in browser history |
+| `browser_press` | Press a keyboard key (Enter, Tab, etc.) |
+| `browser_close` | Close the browser session |
+| `browser_get_images` | Get list of images on the page |
+
+**Example Usage:**
+```bash
+# Use browser tools with web search and vision
+python run_agent.py \
+  --query "Go to amazon.com and find the price of the latest Kindle" \
+  --enabled_toolsets=browser,web,vision
+
+# Use browser-focused distribution
+python batch_runner.py \
+  --dataset_file=browser_tasks.jsonl \
+  --distribution=browser_use \
+  --run_name=browser_run
+```
+
 See `.env.example` for all available configuration options including debug settings.

 ## Toolsets System
@ -267,10 +320,6 @@ All environment variables can be configured in the `.env` file (copy from `.env.
 - `NOUS_API_KEY`: Vision and reasoning tools
 - `FAL_KEY`: Image generation tools

-**Optional Direct Provider Keys:**
- `ANTHROPIC_API_KEY`: Direct Anthropic access (fallback if OpenRouter not set)
- `OPENAI_API_KEY`: Direct OpenAI access (fallback if OpenRouter not set)
-
 **Terminal Tool Configuration (mini-swe-agent backend):**
 - `TERMINAL_ENV`: Backend type - `local`, `docker`, or `modal` (default: `local`)
 - `TERMINAL_DOCKER_IMAGE`: Docker image to use (default: `python:3.11-slim`)
@ -278,6 +327,11 @@ All environment variables can be configured in the `.env` file (copy from `.env.
 - `TERMINAL_LIFETIME_SECONDS`: Cleanup inactive environments after this time (default: `300`)
 - `TERMINAL_CWD`: Working directory inside containers (default: `/tmp`)

+**Browser Tool Configuration (agent-browser + Browserbase):**
+- `BROWSERBASE_API_KEY`: Browserbase API key for cloud browser execution
+- `BROWSERBASE_PROJECT_ID`: Browserbase project ID
+- `BROWSER_SESSION_TIMEOUT`: Session timeout in seconds (default: `300`)
+
 **Legacy Hecate Terminal Backend (optional):**
 - `MORPH_API_KEY`: For Hecate/MorphCloud terminal backend
 - `HECATE_VM_LIFETIME_SECONDS`: VM lifetime (default: 300)
--- a/batch_runner.py
+++ b/batch_runner.py
@ -49,8 +49,13 @@ _WORKER_CONFIG = {}
 # All possible tools - used to ensure consistent schema across all trajectory entries
 # This is required because Arrow/Parquet (used by HuggingFace datasets) needs identical schemas
 ALL_POSSIBLE_TOOLS = {
-    'terminal', 'web_search', 'web_extract', 'web_crawl',
-    'vision_analyze', 'image_generate', 'mixture_of_agents'
+    'terminal', 'web_search', 'web_extract',
+    'vision_analyze', 'image_generate', 'mixture_of_agents',
+    # Browser automation tools
+    'browser_navigate', 'browser_snapshot', 'browser_click',
+    'browser_type', 'browser_scroll', 'browser_back',
+    'browser_press', 'browser_close', 'browser_get_images',
+    'browser_vision'
 }

 # Default stats for tools that weren't used
@ -828,8 +833,13 @@ class BatchRunner:
        combined_file = self.output_dir / "trajectories.jsonl"
        print(f"\n📦 Combining ALL batch files into {combined_file.name}...")
        
-        VALID_TOOLS = {'web_search', 'web_extract', 'web_crawl', 'terminal', 'vision_analyze', 
-                       'image_generate', 'mixture_of_agents'}
+        VALID_TOOLS = {'web_search', 'web_extract', 'terminal', 'vision_analyze', 
+                       'image_generate', 'mixture_of_agents',
+                       # Browser automation tools
+                       'browser_navigate', 'browser_snapshot', 'browser_click',
+                       'browser_type', 'browser_scroll', 'browser_back',
+                       'browser_press', 'browser_close', 'browser_get_images',
+                       'browser_vision'}
        
        total_entries = 0
        filtered_entries = 0
@ -928,9 +938,9 @@ def main(
    batch_size: int = None,
    run_name: str = None,
    distribution: str = "default",
-    model: str = "claude-opus-4-20250514",
+    model: str = "anthropic/claude-sonnet-4-20250514",
    api_key: str = None,
-    base_url: str = "https://api.anthropic.com/v1/",
+    base_url: str = "https://openrouter.ai/api/v1",
    max_turns: int = 10,
    num_workers: int = 4,
    resume: bool = False,
--- a/model_tools.py
+++ b/model_tools.py
@ -37,6 +37,22 @@ from tools.terminal_hecate import terminal_hecate_tool, check_hecate_requirement
 from tools.vision_tools import vision_analyze_tool, check_vision_requirements
 from tools.mixture_of_agents_tool import mixture_of_agents_tool, check_moa_requirements
 from tools.image_generation_tool import image_generate_tool, check_image_generation_requirements
+# Browser automation tools (agent-browser + Browserbase)
+from tools.browser_tool import (
+    browser_navigate,
+    browser_snapshot,
+    browser_click,
+    browser_type,
+    browser_scroll,
+    browser_back,
+    browser_press,
+    browser_close,
+    browser_get_images,
+    browser_vision,
+    cleanup_browser,
+    check_browser_requirements,
+    BROWSER_TOOL_SCHEMAS
+)
 from toolsets import (
    get_toolset, resolve_toolset, resolve_multiple_toolsets,
    get_all_toolsets, get_toolset_names, validate_toolset,
@ -55,7 +71,7 @@ def get_web_tool_definitions() -> List[Dict[str, Any]]:
            "type": "function",
            "function": {
                "name": "web_search",
-                "description": "Search the web for information on any topic. Returns up to 5 relevant results with titles and URLs. Uses advanced search depth for comprehensive results.",
+                "description": "Search the web for information on any topic. Returns up to 5 relevant results with titles and URLs. Uses advanced search depth for comprehensive results. PREFERRED over browser tools for finding information - faster and more cost-effective. Use browser tools only when you need to interact with pages (click, fill forms, handle dynamic content).",
                "parameters": {
                    "type": "object",
                    "properties": {
@ -72,7 +88,7 @@ def get_web_tool_definitions() -> List[Dict[str, Any]]:
            "type": "function",
            "function": {
                "name": "web_extract",
-                "description": "Extract and read the full content from specific web page URLs. Useful for getting detailed information from webpages found through search. The content returned will be excerpts and key points summarized with an LLM to reduce impact on the context window.",
+                "description": "Extract and read the full content from specific web page URLs. Useful for getting detailed information from webpages found through search. The content returned will be excerpts and key points summarized with an LLM to reduce impact on the context window. PREFERRED over browser tools for reading page content - faster and more cost-effective. Use browser tools only when pages require interaction or have dynamic content.",
                "parameters": {
                    "type": "object",
                    "properties": {
@ -87,27 +103,6 @@ def get_web_tool_definitions() -> List[Dict[str, Any]]:
                }
            }
        },
-        {
-            "type": "function",
-            "function": {
-                "name": "web_crawl",
-                "description": "Crawl a website with specific instructions to find and extract targeted content. Uses AI to intelligently navigate and extract relevant information from across the site. The content returned will be excerpts and key points summarized with an LLM to reduce impact on the context window.",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "url": {
-                            "type": "string",
-                            "description": "The base URL to crawl (can include or exclude https://)"
-                        },
-                        "instructions": {
-                            "type": "string",
-                            "description": "Specific instructions for what to crawl/extract using AI intelligence (e.g., 'Find pricing information', 'Get documentation pages', 'Extract contact details')"
-                        }
-                    },
-                    "required": ["url"]
-                }
-            }
-        }
    ]

 def get_terminal_tool_definitions() -> List[Dict[str, Any]]:
@ -244,6 +239,18 @@ def get_image_tool_definitions() -> List[Dict[str, Any]]:
    ]


+def get_browser_tool_definitions() -> List[Dict[str, Any]]:
+    """
+    Get tool definitions for browser automation tools in OpenAI's expected format.
+    
+    Uses agent-browser CLI with Browserbase cloud execution.
+    
+    Returns:
+        List[Dict]: List of browser tool definitions compatible with OpenAI API
+    """
+    return [{"type": "function", "function": schema} for schema in BROWSER_TOOL_SCHEMAS]
+
+
 def get_all_tool_names() -> List[str]:
    """
    Get the names of all available tools across all toolsets.
@ -255,7 +262,7 @@ def get_all_tool_names() -> List[str]:
    
    # Web tools
    if check_firecrawl_api_key():
-        tool_names.extend(["web_search", "web_extract", "web_crawl"])
+        tool_names.extend(["web_search", "web_extract"])

    # Terminal tools (mini-swe-agent backend)
    if check_terminal_requirements():
@ -273,6 +280,15 @@ def get_all_tool_names() -> List[str]:
    if check_image_generation_requirements():
        tool_names.extend(["image_generate"])
    
+    # Browser automation tools
+    if check_browser_requirements():
+        tool_names.extend([
+            "browser_navigate", "browser_snapshot", "browser_click",
+            "browser_type", "browser_scroll", "browser_back",
+            "browser_press", "browser_close", "browser_get_images",
+            "browser_vision"
+        ])
+    
    return tool_names


@ -288,12 +304,22 @@ def get_toolset_for_tool(tool_name: str) -> str:
    """
    toolset_mapping = {
        "web_search": "web_tools",
-        "web_extract": "web_tools", 
-        "web_crawl": "web_tools",
+        "web_extract": "web_tools",
        "terminal": "terminal_tools",
        "vision_analyze": "vision_tools",
        "mixture_of_agents": "moa_tools",
-        "image_generate": "image_tools"
+        "image_generate": "image_tools",
+        # Browser automation tools
+        "browser_navigate": "browser_tools",
+        "browser_snapshot": "browser_tools",
+        "browser_click": "browser_tools",
+        "browser_type": "browser_tools",
+        "browser_scroll": "browser_tools",
+        "browser_back": "browser_tools",
+        "browser_press": "browser_tools",
+        "browser_close": "browser_tools",
+        "browser_get_images": "browser_tools",
+        "browser_vision": "browser_tools"
    }
    
    return toolset_mapping.get(tool_name, "unknown")
@ -357,6 +383,10 @@ def get_tool_definitions(
        for tool in get_image_tool_definitions():
            all_available_tools_map[tool["function"]["name"]] = tool
    
+    if check_browser_requirements():
+        for tool in get_browser_tool_definitions():
+            all_available_tools_map[tool["function"]["name"]] = tool
+    
    # Determine which tools to include based on toolsets
    tools_to_include = set()
    
@ -369,14 +399,20 @@ def get_tool_definitions(
                print(f"✅ Enabled toolset '{toolset_name}': {', '.join(resolved_tools) if resolved_tools else 'no tools'}")
            else:
                # Try legacy compatibility
-                if toolset_name in ["web_tools", "terminal_tools", "vision_tools", "moa_tools", "image_tools"]:
+                if toolset_name in ["web_tools", "terminal_tools", "vision_tools", "moa_tools", "image_tools", "browser_tools"]:
                    # Map legacy names to new system
                    legacy_map = {
-                        "web_tools": ["web_search", "web_extract", "web_crawl"],
+                        "web_tools": ["web_search", "web_extract"],
                        "terminal_tools": ["terminal"],
                        "vision_tools": ["vision_analyze"],
                        "moa_tools": ["mixture_of_agents"],
-                        "image_tools": ["image_generate"]
+                        "image_tools": ["image_generate"],
+                        "browser_tools": [
+                            "browser_navigate", "browser_snapshot", "browser_click",
+                            "browser_type", "browser_scroll", "browser_back",
+                            "browser_press", "browser_close", "browser_get_images",
+                            "browser_vision"
+                        ]
                    }
                    legacy_tools = legacy_map.get(toolset_name, [])
                    tools_to_include.update(legacy_tools)
@ -404,13 +440,19 @@ def get_tool_definitions(
                print(f"🚫 Disabled toolset '{toolset_name}': {', '.join(resolved_tools) if resolved_tools else 'no tools'}")
            else:
                # Try legacy compatibility
-                if toolset_name in ["web_tools", "terminal_tools", "vision_tools", "moa_tools", "image_tools"]:
+                if toolset_name in ["web_tools", "terminal_tools", "vision_tools", "moa_tools", "image_tools", "browser_tools"]:
                    legacy_map = {
-                        "web_tools": ["web_search", "web_extract", "web_crawl"],
+                        "web_tools": ["web_search", "web_extract"],
                        "terminal_tools": ["terminal"],
                        "vision_tools": ["vision_analyze"],
                        "moa_tools": ["mixture_of_agents"],
-                        "image_tools": ["image_generate"]
+                        "image_tools": ["image_generate"],
+                        "browser_tools": [
+                            "browser_navigate", "browser_snapshot", "browser_click",
+                            "browser_type", "browser_scroll", "browser_back",
+                            "browser_press", "browser_close", "browser_get_images",
+                            "browser_vision"
+                        ]
                    }
                    legacy_tools = legacy_map.get(toolset_name, [])
                    tools_to_include.difference_update(legacy_tools)
@ -465,12 +507,6 @@ def handle_web_function_call(function_name: str, function_args: Dict[str, Any])
        # Run async function in event loop
        return asyncio.run(web_extract_tool(urls, "markdown"))
    
-    elif function_name == "web_crawl":
-        url = function_args.get("url", "")
-        instructions = function_args.get("instructions")
-        # Run async function in event loop
-        return asyncio.run(web_crawl_tool(url, instructions, "basic"))
-    
    else:
        return json.dumps({"error": f"Unknown web function: {function_name}"}, ensure_ascii=False)

@ -603,7 +639,58 @@ def handle_image_function_call(function_name: str, function_args: Dict[str, Any]
        return json.dumps({"error": f"Unknown image generation function: {function_name}"}, ensure_ascii=False)


-def handle_function_call(function_name: str, function_args: Dict[str, Any], task_id: Optional[str] = None) -> str:
+# Browser tool handlers mapping
+BROWSER_HANDLERS = {
+    "browser_navigate": browser_navigate,
+    "browser_click": browser_click,
+    "browser_type": browser_type,
+    "browser_scroll": browser_scroll,
+    "browser_back": browser_back,
+    "browser_press": browser_press,
+    "browser_close": browser_close,
+    "browser_get_images": browser_get_images,
+    "browser_vision": browser_vision,
+}
+
+
+def handle_browser_function_call(
+    function_name: str, 
+    function_args: Dict[str, Any], 
+    task_id: Optional[str] = None,
+    user_task: Optional[str] = None
+) -> str:
+    """
+    Handle function calls for browser automation tools.
+    
+    Args:
+        function_name (str): Name of the browser function to call
+        function_args (Dict): Arguments for the function
+        task_id (str): Task identifier for session isolation
+        user_task (str): User's current task (for task-aware extraction in snapshots)
+    
+    Returns:
+        str: Function result as JSON string
+    """
+    # Special handling for browser_snapshot which needs user_task for extraction
+    if function_name == "browser_snapshot":
+        full = function_args.get("full", False)
+        return browser_snapshot(full=full, task_id=task_id, user_task=user_task)
+    
+    # Handle other browser tools
+    if function_name in BROWSER_HANDLERS:
+        handler = BROWSER_HANDLERS[function_name]
+        # Add task_id to args
+        return handler(**function_args, task_id=task_id)
+    
+    return json.dumps({"error": f"Unknown browser function: {function_name}"}, ensure_ascii=False)
+
+
+def handle_function_call(
+    function_name: str, 
+    function_args: Dict[str, Any], 
+    task_id: Optional[str] = None,
+    user_task: Optional[str] = None
+) -> str:
    """
    Main function call dispatcher that routes calls to appropriate toolsets.

@ -614,7 +701,8 @@ def handle_function_call(function_name: str, function_args: Dict[str, Any], task
    Args:
        function_name (str): Name of the function to call
        function_args (Dict): Arguments for the function
-        task_id (str): Unique identifier for this task to isolate VMs between concurrent tasks (optional)
+        task_id (str): Unique identifier for this task to isolate VMs/sessions between concurrent tasks (optional)
+        user_task (str): The user's original task/query (used for task-aware content extraction) (optional)

    Returns:
        str: Function result as JSON string
@ -624,7 +712,7 @@ def handle_function_call(function_name: str, function_args: Dict[str, Any], task
    """
    try:
        # Route web tools
-        if function_name in ["web_search", "web_extract", "web_crawl"]:
+        if function_name in ["web_search", "web_extract"]:
            return handle_web_function_call(function_name, function_args)

        # Route terminal tools
@ -643,6 +731,15 @@ def handle_function_call(function_name: str, function_args: Dict[str, Any], task
        elif function_name in ["image_generate"]:
            return handle_image_function_call(function_name, function_args)

+        # Route browser automation tools
+        elif function_name in [
+            "browser_navigate", "browser_snapshot", "browser_click",
+            "browser_type", "browser_scroll", "browser_back",
+            "browser_press", "browser_close", "browser_get_images",
+            "browser_vision"
+        ]:
+            return handle_browser_function_call(function_name, function_args, task_id, user_task)
+
        else:
            error_msg = f"Unknown function: {function_name}"
            print(f"❌ {error_msg}")
@ -664,8 +761,8 @@ def get_available_toolsets() -> Dict[str, Dict[str, Any]]:
    toolsets = {
        "web_tools": {
            "available": check_firecrawl_api_key(),
-            "tools": ["web_search_tool", "web_extract_tool", "web_crawl_tool"],
-            "description": "Web search, content extraction, and website crawling tools",
+            "tools": ["web_search_tool", "web_extract_tool"],
+            "description": "Web search and content extraction tools",
            "requirements": ["FIRECRAWL_API_KEY environment variable"]
        },
        "terminal_tools": {
@ -691,6 +788,17 @@ def get_available_toolsets() -> Dict[str, Dict[str, Any]]:
            "tools": ["image_generate_tool"],
            "description": "Generate high-quality images from text prompts using FAL.ai's FLUX.1 Krea model with automatic 2x upscaling for enhanced quality",
            "requirements": ["FAL_KEY environment variable", "fal-client package"]
+        },
+        "browser_tools": {
+            "available": check_browser_requirements(),
+            "tools": [
+                "browser_navigate", "browser_snapshot", "browser_click",
+                "browser_type", "browser_scroll", "browser_back",
+                "browser_press", "browser_close", "browser_get_images",
+                "browser_vision"
+            ],
+            "description": "Browser automation for web interaction using agent-browser CLI with Browserbase cloud execution",
+            "requirements": ["BROWSERBASE_API_KEY", "BROWSERBASE_PROJECT_ID", "agent-browser npm package"]
        }
    }
    
@ -708,7 +816,8 @@ def check_toolset_requirements() -> Dict[str, bool]:
        "terminal_tools": check_terminal_requirements(),
        "vision_tools": check_vision_requirements(),
        "moa_tools": check_moa_requirements(),
-        "image_tools": check_image_generation_requirements()
+        "image_tools": check_image_generation_requirements(),
+        "browser_tools": check_browser_requirements()
    }

 if __name__ == "__main__":
--- a/run_agent.py
+++ b/run_agent.py
@ -44,6 +44,7 @@ else:
 # Import our tool system
 from model_tools import get_tool_definitions, handle_function_call, check_toolset_requirements
 from tools.terminal_tool import cleanup_vm
+from tools.browser_tool import cleanup_browser


 class AIAgent:
@ -58,7 +59,7 @@ class AIAgent:
        self,
        base_url: str = None,
        api_key: str = None,
-        model: str = "anthropic/claude-sonnet-4-20250514",
+        model: str = "anthropic/claude-sonnet-4-20250514",  # OpenRouter format
        max_iterations: int = 10,
        tool_delay: float = 1.0,
        enabled_toolsets: List[str] = None,
@ -156,10 +157,7 @@ class AIAgent:
            client_kwargs["api_key"] = api_key
        else:
            # Primary: OPENROUTER_API_KEY, fallback to direct provider keys
-            client_kwargs["api_key"] = os.getenv(
-                "OPENROUTER_API_KEY",
-                os.getenv("ANTHROPIC_API_KEY", os.getenv("OPENAI_API_KEY", ""))
-            )
+            client_kwargs["api_key"] = os.getenv("OPENROUTER_API_KEY", "")
        
        try:
            self.client = OpenAI(**client_kwargs)
@ -339,11 +337,12 @@ class AIAgent:
                # Check if this message has tool calls
                if "tool_calls" in msg and msg["tool_calls"]:
                    # Format assistant message with tool calls
+                    # Add <think> tags around reasoning for trajectory storage
                    content = ""
                    
                    # Prepend reasoning in <think> tags if available
                    if msg.get("reasoning") and msg["reasoning"].strip():
-                        content = f"<think>{msg['reasoning']}</think>"
+                        content = f"<think>\n{msg['reasoning']}\n</think>\n"
                    
                    if msg.get("content") and msg["content"].strip():
                        content += msg["content"] + "\n"
@ -406,17 +405,18 @@ class AIAgent:
                
                else:
                    # Regular assistant message without tool calls
+                    # Add <think> tags around reasoning for trajectory storage
                    content = ""
                    
                    # Prepend reasoning in <think> tags if available
                    if msg.get("reasoning") and msg["reasoning"].strip():
-                        content = f"<think>{msg['reasoning']}</think>"
+                        content = f"<think>\n{msg['reasoning']}\n</think>\n"
                    
                    content += msg["content"] or ""
                    
                    trajectory.append({
                        "from": "gpt",
-                        "value": content
+                        "value": content.strip()
                    })
            
            elif msg["role"] == "user":
@ -515,7 +515,31 @@ class AIAgent:
            
            # Prepare messages for API call
            # If we have an ephemeral system prompt, prepend it to the messages
-            api_messages = messages.copy()
+            # Note: Reasoning is embedded in content via <think> tags for trajectory storage.
+            # However, providers like Moonshot AI require a separate 'reasoning_content' field
+            # on assistant messages with tool_calls. We handle both cases here.
+            api_messages = []
+            for msg in messages:
+                api_msg = msg.copy()
+                
+                # For assistant messages with tool_calls, providers require 'reasoning_content' field
+                # Extract reasoning from our stored 'reasoning' field and add it as 'reasoning_content'
+                if msg.get("role") == "assistant" and msg.get("tool_calls"):
+                    reasoning_text = msg.get("reasoning")
+                    if reasoning_text:
+                        # Add reasoning_content for API compatibility (Moonshot AI, Novita, etc.)
+                        api_msg["reasoning_content"] = reasoning_text
+                
+                # Remove 'reasoning' field - it's for trajectory storage only
+                # The reasoning is already in the content via <think> tags AND
+                # we've added reasoning_content for API compatibility above
+                if "reasoning" in api_msg:
+                    api_msg.pop("reasoning")
+                # Remove 'reasoning_details' if present - we use reasoning_content instead
+                if "reasoning_details" in api_msg:
+                    api_msg.pop("reasoning_details")
+                api_messages.append(api_msg)
+            
            if active_system_prompt:
                # Insert system message at the beginning
                api_messages = [{"role": "system", "content": active_system_prompt}] + api_messages
@ -582,7 +606,9 @@ class AIAgent:
                    print(f"{self.log_prefix}⏱️  API call completed in {api_duration:.2f}s")
                    
                    if self.verbose_logging:
-                        logging.debug(f"API Response received - Usage: {response.usage if hasattr(response, 'usage') else 'N/A'}")
+                        # Log response with provider info if available
+                        resp_model = getattr(response, 'model', 'N/A') if response else 'N/A'
+                        logging.debug(f"API Response received - Model: {resp_model}, Usage: {response.usage if hasattr(response, 'usage') else 'N/A'}")

                    # Validate response has valid choices before proceeding
                    if response is None or not hasattr(response, 'choices') or response.choices is None or len(response.choices) == 0:
@ -600,12 +626,28 @@ class AIAgent:
                        
                        # Check for error field in response (some providers include this)
                        error_msg = "Unknown"
+                        provider_name = "Unknown"
                        if response and hasattr(response, 'error') and response.error:
                            error_msg = str(response.error)
+                            # Try to extract provider from error metadata
+                            if hasattr(response.error, 'metadata') and response.error.metadata:
+                                provider_name = response.error.metadata.get('provider_name', 'Unknown')
                        elif response and hasattr(response, 'message') and response.message:
                            error_msg = str(response.message)
                        
+                        # Try to get provider from model field (OpenRouter often returns actual model used)
+                        if provider_name == "Unknown" and response and hasattr(response, 'model') and response.model:
+                            provider_name = f"model={response.model}"
+                        
+                        # Check for x-openrouter-provider or similar metadata
+                        if provider_name == "Unknown" and response:
+                            # Log all response attributes for debugging
+                            resp_attrs = {k: str(v)[:100] for k, v in vars(response).items() if not k.startswith('_')}
+                            if self.verbose_logging:
+                                logging.debug(f"Response attributes for invalid response: {resp_attrs}")
+                        
                        print(f"{self.log_prefix}⚠️  Invalid API response (attempt {retry_count}/{max_retries}): {', '.join(error_details)}")
+                        print(f"{self.log_prefix}   🏢 Provider: {provider_name}")
                        print(f"{self.log_prefix}   📝 Provider message: {error_msg[:200]}")
                        print(f"{self.log_prefix}   ⏱️  Response time: {api_duration:.2f}s (fast response often indicates rate limiting)")
                        
@ -623,7 +665,7 @@ class AIAgent:
                        # Longer backoff for rate limiting (likely cause of None choices)
                        wait_time = min(5 * (2 ** (retry_count - 1)), 120)  # 5s, 10s, 20s, 40s, 80s, 120s
                        print(f"{self.log_prefix}⏳ Retrying in {wait_time}s (extended backoff for possible rate limit)...")
-                        logging.warning(f"Invalid API response (retry {retry_count}/{max_retries}): {', '.join(error_details)}")
+                        logging.warning(f"Invalid API response (retry {retry_count}/{max_retries}): {', '.join(error_details)} | Provider: {provider_name}")
                        time.sleep(wait_time)
                        continue  # Retry the API call

@ -639,12 +681,17 @@ class AIAgent:
                            print(f"{self.log_prefix}   ⏪ Rolling back to last complete assistant turn")
                            rolled_back_messages = self._get_messages_up_to_last_assistant(messages)
                            
-                            # Clean up VM
+                            # Clean up VM and browser
                            try:
                                cleanup_vm(effective_task_id)
                            except Exception as e:
                                if self.verbose_logging:
                                    logging.warning(f"Failed to cleanup VM for task {effective_task_id}: {e}")
+                            try:
+                                cleanup_browser(effective_task_id)
+                            except Exception as e:
+                                if self.verbose_logging:
+                                    logging.warning(f"Failed to cleanup browser for task {effective_task_id}: {e}")
                            
                            return {
                                "final_response": None,
@ -799,17 +846,21 @@ class AIAgent:
                    self._invalid_json_retries = 0
                    
                    # Extract reasoning from response if available (for reasoning models like minimax, kimi, etc.)
-                    reasoning_content = None
+                    # Extract reasoning from response for storage
+                    # The reasoning_content field will be added when preparing API messages
+                    reasoning_text = None
                    if hasattr(assistant_message, 'reasoning') and assistant_message.reasoning:
-                        reasoning_content = assistant_message.reasoning
+                        reasoning_text = assistant_message.reasoning
                    elif hasattr(assistant_message, 'reasoning_content') and assistant_message.reasoning_content:
-                        reasoning_content = assistant_message.reasoning_content
+                        reasoning_text = assistant_message.reasoning_content
                    
-                    # Add assistant message with tool calls to conversation
-                    messages.append({
+                    # Build assistant message with tool calls
+                    # Content stays as-is; reasoning is stored separately and will be passed
+                    # to the API via reasoning_content field when preparing api_messages
+                    assistant_msg = {
                        "role": "assistant",
-                        "content": assistant_message.content,
-                        "reasoning": reasoning_content,  # Store reasoning for trajectory
+                        "content": assistant_message.content or "",
+                        "reasoning": reasoning_text,  # Stored for trajectory extraction & API calls
                        "tool_calls": [
                            {
                                "id": tool_call.id,
@ -821,7 +872,9 @@ class AIAgent:
                            }
                            for tool_call in assistant_message.tool_calls
                        ]
-                    })
+                    }
+                    
+                    messages.append(assistant_msg)
                    
                    # Execute each tool call
                    for i, tool_call in enumerate(assistant_message.tool_calls, 1):
@ -896,12 +949,17 @@ class AIAgent:
                            
                            rolled_back_messages = self._get_messages_up_to_last_assistant(messages)
                            
-                            # Clean up VM
+                            # Clean up VM and browser
                            try:
                                cleanup_vm(effective_task_id)
                            except Exception as e:
                                if self.verbose_logging:
                                    logging.warning(f"Failed to cleanup VM for task {effective_task_id}: {e}")
+                            try:
+                                cleanup_browser(effective_task_id)
+                            except Exception as e:
+                                if self.verbose_logging:
+                                    logging.warning(f"Failed to cleanup browser for task {effective_task_id}: {e}")
                            
                            return {
                                "final_response": None,
@ -917,18 +975,21 @@ class AIAgent:
                        self._empty_content_retries = 0
                    
                    # Extract reasoning from response if available
-                    reasoning_content = None
+                    reasoning_text = None
                    if hasattr(assistant_message, 'reasoning') and assistant_message.reasoning:
-                        reasoning_content = assistant_message.reasoning
+                        reasoning_text = assistant_message.reasoning
                    elif hasattr(assistant_message, 'reasoning_content') and assistant_message.reasoning_content:
-                        reasoning_content = assistant_message.reasoning_content
+                        reasoning_text = assistant_message.reasoning_content
                    
-                    # Add final assistant message
-                    messages.append({
+                    # Build final assistant message
+                    # Content stays as-is; reasoning stored separately for trajectory extraction
+                    final_msg = {
                        "role": "assistant", 
                        "content": final_response,
-                        "reasoning": reasoning_content  # Store reasoning for trajectory
-                    })
+                        "reasoning": reasoning_text  # Stored for trajectory extraction
+                    }
+                    
+                    messages.append(final_msg)
                    
                    print(f"🎉 Conversation completed after {api_call_count} OpenAI-compatible API call(s)")
                    break
@ -963,12 +1024,18 @@ class AIAgent:
        # Save trajectory if enabled
        self._save_trajectory(messages, user_message, completed)

-        # Clean up VM for this task after conversation completes
+        # Clean up VM and browser for this task after conversation completes
        try:
            cleanup_vm(effective_task_id)
        except Exception as e:
            if self.verbose_logging:
                logging.warning(f"Failed to cleanup VM for task {effective_task_id}: {e}")
+        
+        try:
+            cleanup_browser(effective_task_id)
+        except Exception as e:
+            if self.verbose_logging:
+                logging.warning(f"Failed to cleanup browser for task {effective_task_id}: {e}")

        return {
            "final_response": final_response,
@ -994,14 +1061,15 @@ class AIAgent:

 def main(
    query: str = None,
-    model: str = "claude-opus-4-20250514",
+    model: str = "anthropic/claude-sonnet-4-20250514",
    api_key: str = None,
-    base_url: str = "https://api.anthropic.com/v1/",
+    base_url: str = "https://openrouter.ai/api/v1",
    max_turns: int = 10,
    enabled_toolsets: str = None,
    disabled_toolsets: str = None,
    list_tools: bool = False,
    save_trajectories: bool = False,
+    save_sample: bool = False,
    verbose: bool = False,
    log_prefix_chars: int = 20
 ):
@ -1010,16 +1078,17 @@ def main(

    Args:
        query (str): Natural language query for the agent. Defaults to Python 3.13 example.
-        model (str): Model name to use. Defaults to claude-opus-4-20250514.
-        api_key (str): API key for authentication. Uses ANTHROPIC_API_KEY env var if not provided.
-        base_url (str): Base URL for the model API. Defaults to https://api.anthropic.com/v1/
+        model (str): Model name to use (OpenRouter format: provider/model). Defaults to anthropic/claude-sonnet-4-20250514.
+        api_key (str): API key for authentication. Uses OPENROUTER_API_KEY env var if not provided.
+        base_url (str): Base URL for the model API. Defaults to https://openrouter.ai/api/v1
        max_turns (int): Maximum number of API call iterations. Defaults to 10.
        enabled_toolsets (str): Comma-separated list of toolsets to enable. Supports predefined
                              toolsets (e.g., "research", "development", "safe").
                              Multiple toolsets can be combined: "web,vision"
        disabled_toolsets (str): Comma-separated list of toolsets to disable (e.g., "terminal")
        list_tools (bool): Just list available tools and exit
-        save_trajectories (bool): Save conversation trajectories to JSONL files. Defaults to False.
+        save_trajectories (bool): Save conversation trajectories to JSONL files (appends to trajectory_samples.jsonl). Defaults to False.
+        save_sample (bool): Save a single trajectory sample to a UUID-named JSONL file for inspection. Defaults to False.
        verbose (bool): Enable verbose logging for debugging. Defaults to False.
        log_prefix_chars (int): Number of characters to show in log previews for tool calls/responses. Defaults to 20.

@ -1173,6 +1242,34 @@ def main(
        print("-" * 30)
        print(result['final_response'])
    
+    # Save sample trajectory to UUID-named file if requested
+    if save_sample:
+        import uuid
+        sample_id = str(uuid.uuid4())[:8]
+        sample_filename = f"sample_{sample_id}.jsonl"
+        
+        # Convert messages to trajectory format (same as batch_runner)
+        trajectory = agent._convert_to_trajectory_format(
+            result['messages'], 
+            user_query, 
+            result['completed']
+        )
+        
+        entry = {
+            "conversations": trajectory,
+            "timestamp": datetime.now().isoformat(),
+            "model": model,
+            "completed": result['completed'],
+            "query": user_query
+        }
+        
+        try:
+            with open(sample_filename, "w", encoding="utf-8") as f:
+                f.write(json.dumps(entry, ensure_ascii=False) + "\n")
+            print(f"\n💾 Sample trajectory saved to: {sample_filename}")
+        except Exception as e:
+            print(f"\n⚠️ Failed to save sample: {e}")
+    
    print("\n👋 Agent execution completed!")


--- a/tests/test_modal_terminal.py
+++ b/tests/test_modal_terminal.py
@ -0,0 +1,299 @@
+#!/usr/bin/env python3
+"""
+Test Modal Terminal Tool
+
+This script tests that the Modal terminal backend is correctly configured
+and can execute commands in Modal sandboxes.
+
+Usage:
+    # Run with Modal backend
+    TERMINAL_ENV=modal python tests/test_modal_terminal.py
+    
+    # Or run directly (will use whatever TERMINAL_ENV is set in .env)
+    python tests/test_modal_terminal.py
+"""
+
+import os
+import sys
+import json
+from pathlib import Path
+
+# Try to load .env file if python-dotenv is available
+try:
+    from dotenv import load_dotenv
+    load_dotenv()
+except ImportError:
+    # Manually load .env if dotenv not available
+    env_file = Path(__file__).parent.parent / ".env"
+    if env_file.exists():
+        with open(env_file) as f:
+            for line in f:
+                line = line.strip()
+                if line and not line.startswith('#') and '=' in line:
+                    key, value = line.split('=', 1)
+                    # Remove quotes if present
+                    value = value.strip().strip('"').strip("'")
+                    os.environ.setdefault(key.strip(), value)
+
+# Add parent directory to path for imports
+parent_dir = Path(__file__).parent.parent
+sys.path.insert(0, str(parent_dir))
+sys.path.insert(0, str(parent_dir / "mini-swe-agent" / "src"))
+
+# Import terminal_tool module directly using importlib to avoid tools/__init__.py
+import importlib.util
+terminal_tool_path = parent_dir / "tools" / "terminal_tool.py"
+spec = importlib.util.spec_from_file_location("terminal_tool", terminal_tool_path)
+terminal_module = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(terminal_module)
+
+terminal_tool = terminal_module.terminal_tool
+check_terminal_requirements = terminal_module.check_terminal_requirements
+_get_env_config = terminal_module._get_env_config
+cleanup_vm = terminal_module.cleanup_vm
+get_active_environments_info = terminal_module.get_active_environments_info
+
+
+def test_modal_requirements():
+    """Test that Modal requirements are met."""
+    print("\n" + "=" * 60)
+    print("TEST 1: Modal Requirements Check")
+    print("=" * 60)
+    
+    config = _get_env_config()
+    print(f"Current TERMINAL_ENV: {config['env_type']}")
+    print(f"Modal image: {config['modal_image']}")
+    
+    # Check for Modal authentication
+    modal_token = os.getenv("MODAL_TOKEN_ID")
+    modal_toml = Path.home() / ".modal.toml"
+    
+    print(f"\nModal authentication:")
+    print(f"  MODAL_TOKEN_ID env var: {'✅ Set' if modal_token else '❌ Not set'}")
+    print(f"  ~/.modal.toml file: {'✅ Exists' if modal_toml.exists() else '❌ Not found'}")
+    
+    if config['env_type'] != 'modal':
+        print(f"\n⚠️  TERMINAL_ENV is '{config['env_type']}', not 'modal'")
+        print("   Set TERMINAL_ENV=modal in .env or export it to test Modal backend")
+        return False
+    
+    requirements_met = check_terminal_requirements()
+    print(f"\nRequirements check: {'✅ Passed' if requirements_met else '❌ Failed'}")
+    
+    return requirements_met
+
+
+def test_simple_command():
+    """Test executing a simple command."""
+    print("\n" + "=" * 60)
+    print("TEST 2: Simple Command Execution")
+    print("=" * 60)
+    
+    test_task_id = "modal_test_simple"
+    
+    print("Executing: echo 'Hello from Modal!'")
+    result = terminal_tool("echo 'Hello from Modal!'", task_id=test_task_id)
+    result_json = json.loads(result)
+    
+    print(f"\nResult:")
+    print(f"  Output: {result_json.get('output', '')[:200]}")
+    print(f"  Exit code: {result_json.get('exit_code')}")
+    print(f"  Error: {result_json.get('error')}")
+    
+    success = result_json.get('exit_code') == 0 and 'Hello from Modal!' in result_json.get('output', '')
+    print(f"\nTest: {'✅ Passed' if success else '❌ Failed'}")
+    
+    # Cleanup
+    cleanup_vm(test_task_id)
+    
+    return success
+
+
+def test_python_execution():
+    """Test executing Python code in Modal."""
+    print("\n" + "=" * 60)
+    print("TEST 3: Python Execution")
+    print("=" * 60)
+    
+    test_task_id = "modal_test_python"
+    
+    python_cmd = 'python3 -c "import sys; print(f\'Python {sys.version}\')"'
+    print(f"Executing: {python_cmd}")
+    
+    result = terminal_tool(python_cmd, task_id=test_task_id)
+    result_json = json.loads(result)
+    
+    print(f"\nResult:")
+    print(f"  Output: {result_json.get('output', '')[:200]}")
+    print(f"  Exit code: {result_json.get('exit_code')}")
+    print(f"  Error: {result_json.get('error')}")
+    
+    success = result_json.get('exit_code') == 0 and 'Python' in result_json.get('output', '')
+    print(f"\nTest: {'✅ Passed' if success else '❌ Failed'}")
+    
+    # Cleanup
+    cleanup_vm(test_task_id)
+    
+    return success
+
+
+def test_pip_install():
+    """Test installing a package with pip in Modal."""
+    print("\n" + "=" * 60)
+    print("TEST 4: Pip Install Test")
+    print("=" * 60)
+    
+    test_task_id = "modal_test_pip"
+    
+    # Install a small package and verify
+    print("Executing: pip install --break-system-packages cowsay && python3 -c \"import cowsay; cowsay.cow('Modal works!')\"")
+    
+    result = terminal_tool(
+        "pip install --break-system-packages cowsay && python3 -c \"import cowsay; cowsay.cow('Modal works!')\"",
+        task_id=test_task_id,
+        timeout=120
+    )
+    result_json = json.loads(result)
+    
+    print(f"\nResult:")
+    output = result_json.get('output', '')
+    print(f"  Output (last 500 chars): ...{output[-500:] if len(output) > 500 else output}")
+    print(f"  Exit code: {result_json.get('exit_code')}")
+    print(f"  Error: {result_json.get('error')}")
+    
+    success = result_json.get('exit_code') == 0 and 'Modal works!' in result_json.get('output', '')
+    print(f"\nTest: {'✅ Passed' if success else '❌ Failed'}")
+    
+    # Cleanup
+    cleanup_vm(test_task_id)
+    
+    return success
+
+
+def test_filesystem_persistence():
+    """Test that filesystem persists between commands in the same task."""
+    print("\n" + "=" * 60)
+    print("TEST 5: Filesystem Persistence")
+    print("=" * 60)
+    
+    test_task_id = "modal_test_persist"
+    
+    # Create a file
+    print("Step 1: Creating test file...")
+    result1 = terminal_tool("echo 'persistence test' > /tmp/modal_test.txt", task_id=test_task_id)
+    result1_json = json.loads(result1)
+    print(f"  Exit code: {result1_json.get('exit_code')}")
+    
+    # Read the file back
+    print("Step 2: Reading test file...")
+    result2 = terminal_tool("cat /tmp/modal_test.txt", task_id=test_task_id)
+    result2_json = json.loads(result2)
+    print(f"  Output: {result2_json.get('output', '')}")
+    print(f"  Exit code: {result2_json.get('exit_code')}")
+    
+    success = (
+        result1_json.get('exit_code') == 0 and
+        result2_json.get('exit_code') == 0 and
+        'persistence test' in result2_json.get('output', '')
+    )
+    print(f"\nTest: {'✅ Passed' if success else '❌ Failed'}")
+    
+    # Cleanup
+    cleanup_vm(test_task_id)
+    
+    return success
+
+
+def test_environment_isolation():
+    """Test that different task_ids get isolated environments."""
+    print("\n" + "=" * 60)
+    print("TEST 6: Environment Isolation")
+    print("=" * 60)
+    
+    task1 = "modal_test_iso_1"
+    task2 = "modal_test_iso_2"
+    
+    # Create file in task1
+    print("Step 1: Creating file in task1...")
+    result1 = terminal_tool("echo 'task1 data' > /tmp/isolated.txt", task_id=task1)
+    
+    # Try to read from task2 (should not exist)
+    print("Step 2: Trying to read file from task2 (should not exist)...")
+    result2 = terminal_tool("cat /tmp/isolated.txt 2>&1 || echo 'FILE_NOT_FOUND'", task_id=task2)
+    result2_json = json.loads(result2)
+    
+    # The file should either not exist or be empty in task2
+    output = result2_json.get('output', '')
+    isolated = 'task1 data' not in output or 'FILE_NOT_FOUND' in output or 'No such file' in output
+    
+    print(f"  Task2 output: {output[:200]}")
+    print(f"\nTest: {'✅ Passed (environments isolated)' if isolated else '❌ Failed (environments NOT isolated)'}")
+    
+    # Cleanup
+    cleanup_vm(task1)
+    cleanup_vm(task2)
+    
+    return isolated
+
+
+def main():
+    """Run all Modal terminal tests."""
+    print("🧪 Modal Terminal Tool Test Suite")
+    print("=" * 60)
+    
+    # Check current config
+    config = _get_env_config()
+    print(f"\nCurrent configuration:")
+    print(f"  TERMINAL_ENV: {config['env_type']}")
+    print(f"  TERMINAL_MODAL_IMAGE: {config['modal_image']}")
+    print(f"  TERMINAL_TIMEOUT: {config['timeout']}s")
+    
+    if config['env_type'] != 'modal':
+        print(f"\n⚠️  WARNING: TERMINAL_ENV is set to '{config['env_type']}', not 'modal'")
+        print("   To test Modal specifically, set TERMINAL_ENV=modal")
+        response = input("\n   Continue testing with current backend? (y/n): ")
+        if response.lower() != 'y':
+            print("Aborting.")
+            return
+    
+    results = {}
+    
+    # Run tests
+    results['requirements'] = test_modal_requirements()
+    
+    if not results['requirements']:
+        print("\n❌ Requirements not met. Cannot continue with other tests.")
+        return
+    
+    results['simple_command'] = test_simple_command()
+    results['python_execution'] = test_python_execution()
+    results['pip_install'] = test_pip_install()
+    results['filesystem_persistence'] = test_filesystem_persistence()
+    results['environment_isolation'] = test_environment_isolation()
+    
+    # Summary
+    print("\n" + "=" * 60)
+    print("TEST SUMMARY")
+    print("=" * 60)
+    
+    passed = sum(1 for v in results.values() if v)
+    total = len(results)
+    
+    for test_name, passed_test in results.items():
+        status = "✅ PASSED" if passed_test else "❌ FAILED"
+        print(f"  {test_name}: {status}")
+    
+    print(f"\nTotal: {passed}/{total} tests passed")
+    
+    # Show active environments
+    env_info = get_active_environments_info()
+    print(f"\nActive environments after tests: {env_info['count']}")
+    if env_info['count'] > 0:
+        print(f"  Task IDs: {env_info['task_ids']}")
+    
+    return passed == total
+
+
+if __name__ == "__main__":
+    success = main()
+    sys.exit(0 if success else 1)
--- a/tools/init.py
+++ b/tools/init.py
@ -24,11 +24,13 @@ from .web_tools import (
    check_firecrawl_api_key
 )

-# Primary terminal tool (mini-swe-agent backend: local/docker/modal)
+# Primary terminal tool (mini-swe-agent backend: local/docker/singularity/modal)
 from .terminal_tool import (
    terminal_tool,
    check_terminal_requirements,
    cleanup_vm,
+    cleanup_all_environments,
+    get_active_environments_info,
    TERMINAL_TOOL_DESCRIPTION
 )

@ -54,6 +56,25 @@ from .image_generation_tool import (
    check_image_generation_requirements
 )

+# Browser automation tools (agent-browser + Browserbase)
+from .browser_tool import (
+    browser_navigate,
+    browser_snapshot,
+    browser_click,
+    browser_type,
+    browser_scroll,
+    browser_back,
+    browser_press,
+    browser_close,
+    browser_get_images,
+    browser_vision,
+    cleanup_browser,
+    cleanup_all_browsers,
+    get_active_browser_sessions,
+    check_browser_requirements,
+    BROWSER_TOOL_SCHEMAS
+)
+
 __all__ = [
    # Web tools
    'web_search_tool',
@ -64,6 +85,8 @@ __all__ = [
    'terminal_tool',
    'check_terminal_requirements',
    'cleanup_vm',
+    'cleanup_all_environments',
+    'get_active_environments_info',
    'TERMINAL_TOOL_DESCRIPTION',
    # Terminal tools (Hecate/MorphCloud backend)
    'terminal_hecate_tool',
@ -78,5 +101,21 @@ __all__ = [
    # Image generation tools
    'image_generate_tool',
    'check_image_generation_requirements',
+    # Browser automation tools
+    'browser_navigate',
+    'browser_snapshot',
+    'browser_click',
+    'browser_type',
+    'browser_scroll',
+    'browser_back',
+    'browser_press',
+    'browser_close',
+    'browser_get_images',
+    'browser_vision',
+    'cleanup_browser',
+    'cleanup_all_browsers',
+    'get_active_browser_sessions',
+    'check_browser_requirements',
+    'BROWSER_TOOL_SCHEMAS',
 ]

--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@ -32,6 +32,10 @@ import sys
 import time
 import threading
 import atexit
+import shutil
+import subprocess
+import tempfile
+import uuid
 from pathlib import Path
 from typing import Optional, Dict, Any

@ -40,6 +44,168 @@ mini_swe_path = Path(__file__).parent.parent / "mini-swe-agent" / "src"
 if mini_swe_path.exists():
    sys.path.insert(0, str(mini_swe_path))

+
+# =============================================================================
+# Custom Singularity Environment with more space
+# =============================================================================
+
+def _get_scratch_dir() -> Path:
+    """Get the best directory for Singularity sandboxes - prefers /scratch if available."""
+    # Check for configurable scratch directory first (highest priority)
+    custom_scratch = os.getenv("TERMINAL_SCRATCH_DIR")
+    if custom_scratch:
+        scratch_path = Path(custom_scratch)
+        scratch_path.mkdir(parents=True, exist_ok=True)
+        return scratch_path
+    
+    # Check for /scratch (common on HPC clusters, especially GPU nodes)
+    scratch = Path("/scratch")
+    if scratch.exists() and os.access(scratch, os.W_OK):
+        # Create user-specific subdirectory
+        user_scratch = scratch / os.getenv("USER", "hermes") / "hermes-agent"
+        user_scratch.mkdir(parents=True, exist_ok=True)
+        print(f"[Terminal] Using /scratch for sandboxes: {user_scratch}")
+        return user_scratch
+    
+    # Fall back to /tmp
+    print("[Terminal] Warning: /scratch not available, using /tmp (limited space)")
+    return Path(tempfile.gettempdir())
+
+
+# Disk usage warning threshold (in GB)
+DISK_USAGE_WARNING_THRESHOLD_GB = float(os.getenv("TERMINAL_DISK_WARNING_GB", "500"))
+
+
+def _check_disk_usage_warning():
+    """Check if total disk usage exceeds warning threshold."""
+    scratch_dir = _get_scratch_dir()
+    
+    try:
+        # Get total size of hermes directories
+        total_bytes = 0
+        import glob
+        for path in glob.glob(str(scratch_dir / "hermes-*")):
+            for f in Path(path).rglob('*'):
+                if f.is_file():
+                    try:
+                        total_bytes += f.stat().st_size
+                    except:
+                        pass
+        
+        total_gb = total_bytes / (1024 ** 3)
+        
+        if total_gb > DISK_USAGE_WARNING_THRESHOLD_GB:
+            print(f"⚠️  [Terminal] WARNING: Disk usage ({total_gb:.1f}GB) exceeds threshold ({DISK_USAGE_WARNING_THRESHOLD_GB}GB)")
+            print(f"    Consider running cleanup_all_environments() or reducing parallel workers")
+            return True
+        
+        return False
+    except Exception as e:
+        return False
+
+
+class _SingularityEnvironment:
+    """
+    Custom Singularity/Apptainer environment with better space management.
+    
+    - Builds sandbox in /scratch (if available) or configurable location
+    - Binds a large working directory into the container
+    - Keeps container isolated from host filesystem
+    """
+    
+    def __init__(self, image: str, cwd: str = "/workspace", timeout: int = 60):
+        self.image = image
+        self.cwd = cwd
+        self.timeout = timeout
+        
+        # Use apptainer if available, otherwise singularity
+        self.executable = "apptainer" if shutil.which("apptainer") else "singularity"
+        
+        # Get scratch directory for sandbox
+        self.scratch_dir = _get_scratch_dir()
+        
+        # Create unique sandbox directory
+        self.sandbox_id = f"hermes-{uuid.uuid4().hex[:12]}"
+        self.sandbox_dir = self.scratch_dir / self.sandbox_id
+        
+        # Create a working directory that will be bound into the container
+        self.work_dir = self.scratch_dir / f"{self.sandbox_id}-work"
+        self.work_dir.mkdir(parents=True, exist_ok=True)
+        
+        # Build the sandbox
+        self._build_sandbox()
+    
+    def _build_sandbox(self):
+        """Build a writable sandbox from the container image."""
+        try:
+            result = subprocess.run(
+                [self.executable, "build", "--sandbox", str(self.sandbox_dir), self.image],
+                capture_output=True,
+                text=True,
+                timeout=300  # 5 min timeout for building
+            )
+            if result.returncode != 0:
+                raise RuntimeError(f"Failed to build sandbox: {result.stderr}")
+            
+            # Create /workspace directory inside the sandbox for bind mounting
+            workspace_in_sandbox = self.sandbox_dir / "workspace"
+            workspace_in_sandbox.mkdir(parents=True, exist_ok=True)
+            
+        except subprocess.TimeoutExpired:
+            shutil.rmtree(self.sandbox_dir, ignore_errors=True)
+            raise RuntimeError("Sandbox build timed out")
+    
+    def execute(self, command: str, cwd: str = "", *, timeout: int | None = None) -> dict:
+        """Execute a command in the Singularity container."""
+        cmd = [self.executable, "exec"]
+        
+        # Isolation flags - contain but allow network
+        cmd.extend(["--contain", "--cleanenv"])
+        
+        # Bind the working directory into the container at /workspace
+        # This gives the container access to a large writable space
+        cmd.extend(["--bind", f"{self.work_dir}:/workspace"])
+        
+        # Also bind it to /tmp inside container for pip cache etc.
+        cmd.extend(["--bind", f"{self.work_dir}:/tmp"])
+        
+        # Set working directory
+        work_dir = cwd or self.cwd
+        cmd.extend(["--pwd", work_dir])
+        
+        # Use writable sandbox
+        cmd.extend(["--writable", str(self.sandbox_dir)])
+        
+        # Execute the command
+        cmd.extend(["bash", "-c", command])
+        
+        try:
+            result = subprocess.run(
+                cmd,
+                text=True,
+                timeout=timeout or self.timeout,
+                encoding="utf-8",
+                errors="replace",
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+            )
+            return {"output": result.stdout, "returncode": result.returncode}
+        except subprocess.TimeoutExpired:
+            return {"output": f"Command timed out after {timeout or self.timeout}s", "returncode": 124}
+    
+    def cleanup(self):
+        """Clean up sandbox and working directory."""
+        shutil.rmtree(self.sandbox_dir, ignore_errors=True)
+        shutil.rmtree(self.work_dir, ignore_errors=True)
+    
+    def stop(self):
+        """Alias for cleanup."""
+        self.cleanup()
+    
+    def __del__(self):
+        """Cleanup on destruction."""
+        self.cleanup()
+
 # Tool description for LLM
 TERMINAL_TOOL_DESCRIPTION = """Execute commands on a secure Linux environment.

@ -71,6 +237,7 @@ TERMINAL_TOOL_DESCRIPTION = """Execute commands on a secure Linux environment.

 # Global state for environment lifecycle management
 _active_environments: Dict[str, Any] = {}
+_task_workdirs: Dict[str, str] = {}  # Maps task_id to working directory
 _last_activity: Dict[str, float] = {}
 _env_lock = threading.Lock()
 _cleanup_thread = None
@ -80,9 +247,10 @@ _cleanup_running = False
 def _get_env_config() -> Dict[str, Any]:
    """Get terminal environment configuration from environment variables."""
    return {
-        "env_type": os.getenv("TERMINAL_ENV", "local"),  # local, docker, or modal
-        "docker_image": os.getenv("TERMINAL_DOCKER_IMAGE", "python:3.11-slim"),
-        "modal_image": os.getenv("TERMINAL_MODAL_IMAGE", "python:3.11-slim"),
+        "env_type": os.getenv("TERMINAL_ENV", "local"),  # local, docker, singularity, or modal
+        "docker_image": os.getenv("TERMINAL_DOCKER_IMAGE", "python:3.11"),
+        "singularity_image": os.getenv("TERMINAL_SINGULARITY_IMAGE", "docker://python:3.11"),
+        "modal_image": os.getenv("TERMINAL_MODAL_IMAGE", "python:3.11"),
        "cwd": os.getenv("TERMINAL_CWD", "/tmp"),
        "timeout": int(os.getenv("TERMINAL_TIMEOUT", "60")),
        "lifetime_seconds": int(os.getenv("TERMINAL_LIFETIME_SECONDS", "300")),
@ -94,8 +262,8 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int):
    Create an execution environment from mini-swe-agent.
    
    Args:
-        env_type: One of "local", "docker", "modal"
-        image: Docker/Modal image name (ignored for local)
+        env_type: One of "local", "docker", "singularity", "modal"
+        image: Docker/Singularity/Modal image name (ignored for local)
        cwd: Working directory
        timeout: Default command timeout
        
@ -110,12 +278,16 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int):
        from minisweagent.environments.docker import DockerEnvironment
        return DockerEnvironment(image=image, cwd=cwd, timeout=timeout)
    
+    elif env_type == "singularity":
+        # Use custom Singularity environment with better space management
+        return _SingularityEnvironment(image=image, cwd=cwd, timeout=timeout)
+    
    elif env_type == "modal":
        from minisweagent.environments.extra.swerex_modal import SwerexModalEnvironment
        return SwerexModalEnvironment(image=image, cwd=cwd, timeout=timeout)
    
    else:
-        raise ValueError(f"Unknown environment type: {env_type}. Use 'local', 'docker', or 'modal'")
+        raise ValueError(f"Unknown environment type: {env_type}. Use 'local', 'docker', 'singularity', or 'modal'")


 def _cleanup_inactive_envs(lifetime_seconds: int = 300):
@ -147,6 +319,8 @@ def _cleanup_inactive_envs(lifetime_seconds: int = 300):

                if task_id in _last_activity:
                    del _last_activity[task_id]
+                if task_id in _task_workdirs:
+                    del _task_workdirs[task_id]

            except Exception as e:
                error_str = str(e)
@ -160,6 +334,8 @@ def _cleanup_inactive_envs(lifetime_seconds: int = 300):
                    del _active_environments[task_id]
                if task_id in _last_activity:
                    del _last_activity[task_id]
+                if task_id in _task_workdirs:
+                    del _task_workdirs[task_id]


 def _cleanup_thread_worker():
@ -198,9 +374,63 @@ def _stop_cleanup_thread():
        _cleanup_thread.join(timeout=5)


+def get_active_environments_info() -> Dict[str, Any]:
+    """Get information about currently active environments."""
+    info = {
+        "count": len(_active_environments),
+        "task_ids": list(_active_environments.keys()),
+        "workdirs": dict(_task_workdirs),
+    }
+    
+    # Calculate total disk usage
+    total_size = 0
+    for task_id in _active_environments.keys():
+        # Check sandbox and workdir sizes
+        scratch_dir = _get_scratch_dir()
+        for pattern in [f"hermes-*{task_id[:8]}*"]:
+            import glob
+            for path in glob.glob(str(scratch_dir / "hermes-*")):
+                try:
+                    size = sum(f.stat().st_size for f in Path(path).rglob('*') if f.is_file())
+                    total_size += size
+                except:
+                    pass
+    
+    info["total_disk_usage_mb"] = round(total_size / (1024 * 1024), 2)
+    return info
+
+
+def cleanup_all_environments():
+    """Clean up ALL active environments. Use with caution."""
+    global _active_environments, _last_activity, _task_workdirs
+    
+    task_ids = list(_active_environments.keys())
+    cleaned = 0
+    
+    for task_id in task_ids:
+        try:
+            cleanup_vm(task_id)
+            cleaned += 1
+        except Exception as e:
+            print(f"[Terminal Cleanup] Error cleaning {task_id}: {e}")
+    
+    # Also clean any orphaned directories
+    scratch_dir = _get_scratch_dir()
+    import glob
+    for path in glob.glob(str(scratch_dir / "hermes-*")):
+        try:
+            shutil.rmtree(path, ignore_errors=True)
+            print(f"[Terminal Cleanup] Removed orphaned: {path}")
+        except:
+            pass
+    
+    print(f"[Terminal Cleanup] Cleaned {cleaned} environments")
+    return cleaned
+
+
 def cleanup_vm(task_id: str):
    """Manually clean up a specific environment by task_id."""
-    global _active_environments, _last_activity
+    global _active_environments, _last_activity, _task_workdirs

    with _env_lock:
        try:
@ -216,6 +446,9 @@ def cleanup_vm(task_id: str):
                del _active_environments[task_id]
                print(f"[Terminal Cleanup] Manually cleaned up environment for task: {task_id}")

+            if task_id in _task_workdirs:
+                del _task_workdirs[task_id]
+
            if task_id in _last_activity:
                del _last_activity[task_id]

@ -268,6 +501,8 @@ def terminal_tool(
        # Select image based on env type
        if env_type == "docker":
            image = config["docker_image"]
+        elif env_type == "singularity":
+            image = config["singularity_image"]
        elif env_type == "modal":
            image = config["modal_image"]
        else:
@ -280,12 +515,26 @@ def terminal_tool(
        # Use task_id for environment isolation
        effective_task_id = task_id or "default"

+        # For local environment, create a unique subdirectory per task
+        # This prevents parallel tasks from overwriting each other's files
+        if env_type == "local":
+            import uuid
+            with _env_lock:
+                if effective_task_id not in _task_workdirs:
+                    task_workdir = Path(cwd) / f"hermes-{effective_task_id}-{uuid.uuid4().hex[:8]}"
+                    task_workdir.mkdir(parents=True, exist_ok=True)
+                    _task_workdirs[effective_task_id] = str(task_workdir)
+                cwd = _task_workdirs[effective_task_id]
+
        # Start cleanup thread
        _start_cleanup_thread()

        # Get or create environment
        with _env_lock:
            if effective_task_id not in _active_environments:
+                # Check disk usage before creating new environment
+                _check_disk_usage_warning()
+                
                try:
                    _active_environments[effective_task_id] = _create_environment(
                        env_type=env_type,
@ -397,6 +646,16 @@ def check_terminal_requirements() -> bool:
            import subprocess
            result = subprocess.run(["docker", "version"], capture_output=True, timeout=5)
            return result.returncode == 0
+        elif env_type == "singularity":
+            from minisweagent.environments.singularity import SingularityEnvironment
+            # Check if singularity/apptainer is available
+            import subprocess
+            import shutil
+            executable = shutil.which("apptainer") or shutil.which("singularity")
+            if executable:
+                result = subprocess.run([executable, "--version"], capture_output=True, timeout=5)
+                return result.returncode == 0
+            return False
        elif env_type == "modal":
            from minisweagent.environments.extra.swerex_modal import SwerexModalEnvironment
            # Check for modal token
--- a/tools/vision_tools.py
+++ b/tools/vision_tools.py
@ -155,10 +155,14 @@ async def _download_image(image_url: str, destination: Path, max_retries: int =
    for attempt in range(max_retries):
        try:
            # Download the image with appropriate headers using async httpx
-            async with httpx.AsyncClient(timeout=30.0) as client:
+            # Enable follow_redirects to handle image CDNs that redirect (e.g., Imgur, Picsum)
+            async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
                response = await client.get(
                    image_url,
-                    headers={"User-Agent": "hermes-agent-vision/1.0"},
+                    headers={
+                        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+                        "Accept": "image/*,*/*;q=0.8",
+                    },
                )
                response.raise_for_status()
                
--- a/toolset_distributions.py
+++ b/toolset_distributions.py
@ -35,7 +35,8 @@ DISTRIBUTIONS = {
            "vision": 100,
            "image_gen": 100,
            "terminal": 100,
-            "moa": 100
+            "moa": 100,
+            "browser": 100
        }
    },
    
@ -55,22 +56,24 @@ DISTRIBUTIONS = {
    "research": {
        "description": "Web research with vision analysis and reasoning",
        "toolsets": {
-            "web": 90,      # 90% chance of web tools
-            "vision": 50,   # 50% chance of vision tools
-            "moa": 40,      # 40% chance of reasoning tools
-            "terminal": 10  # 10% chance of terminal tools
+            "web": 90,       # 90% chance of web tools
+            "browser": 70,   # 70% chance of browser tools for deep research
+            "vision": 50,    # 50% chance of vision tools
+            "moa": 40,       # 40% chance of reasoning tools
+            "terminal": 10   # 10% chance of terminal tools
        }
    },

    # Scientific problem solving focused distribution
    "science": {
-        "description": "Web research with vision analysis and reasoning",
+        "description": "Scientific research with web, terminal, and browser capabilities",
        "toolsets": {
-            "web": 94,      # 90% chance of web tools
-            "vision": 65,   # 50% chance of vision tools
-            "moa": 10,      # 40% chance of reasoning tools
-            "terminal": 94,  # 10% chance of terminal tools
-            "image_gen": 15  # 80% chance of image generation tools
+            "web": 94,       # 94% chance of web tools
+            "terminal": 94,  # 94% chance of terminal tools
+            "vision": 65,    # 65% chance of vision tools
+            "browser": 50,   # 50% chance of browser for accessing papers/databases
+            "image_gen": 15, # 15% chance of image generation tools
+            "moa": 10        # 10% chance of reasoning tools
        }
    },

@ -90,6 +93,7 @@ DISTRIBUTIONS = {
        "description": "All tools except terminal for safety",
        "toolsets": {
            "web": 80,
+            "browser": 70,   # Browser is safe (no local filesystem access)
            "vision": 60,
            "image_gen": 60,
            "moa": 50
@ -104,7 +108,8 @@ DISTRIBUTIONS = {
            "vision": 50,
            "image_gen": 50,
            "terminal": 50,
-            "moa": 50
+            "moa": 50,
+            "browser": 50
        }
    },
    
@ -116,6 +121,23 @@ DISTRIBUTIONS = {
        }
    },
    
+    # Terminal only
+    "terminal_only": {
+        "description": "Only terminal tool for code execution tasks",
+        "toolsets": {
+            "terminal": 100
+        }
+    },
+    
+    # Terminal + web (common for coding tasks that need docs)
+    "terminal_web": {
+        "description": "Terminal with web search for documentation lookup",
+        "toolsets": {
+            "terminal": 100,
+            "web": 100
+        }
+    },
+    
    # Creative (vision + image generation)
    "creative": {
        "description": "Image generation and vision analysis focus",
@ -134,6 +156,58 @@ DISTRIBUTIONS = {
            "web": 30,
            "terminal": 20
        }
+    },
+    
+    # Browser-based web interaction
+    "browser_use": {
+        "description": "Full browser-based web interaction with search, vision, and page control",
+        "toolsets": {
+            "browser": 100,  # All browser tools always available
+            "web": 80,       # Web search for finding URLs and quick lookups
+            "vision": 70     # Vision analysis for images found on pages
+        }
+    },
+    
+    # Browser only (no other tools)
+    "browser_only": {
+        "description": "Only browser automation tools for pure web interaction tasks",
+        "toolsets": {
+            "browser": 100
+        }
+    },
+    
+    # Browser-focused tasks distribution (for browser-use-tasks.jsonl)
+    "browser_tasks": {
+        "description": "Browser-focused distribution (browser toolset includes web_search for finding URLs since Google blocks direct browser searches)",
+        "toolsets": {
+            "browser": 97,   # 97% - browser tools (includes web_search) almost always available
+            "vision": 12,    # 12% - vision analysis occasionally
+            "terminal": 15   # 15% - terminal occasionally for local operations
+        }
+    },
+    
+    # Terminal-focused tasks distribution (for nous-terminal-tasks.jsonl)
+    "terminal_tasks": {
+        "description": "Terminal-focused distribution with high terminal availability, occasional other tools",
+        "toolsets": {
+            "terminal": 97,   # 97% - terminal almost always available
+            "web": 15,        # 15% - web search/scrape for documentation
+            "browser": 10,    # 10% - browser occasionally for web interaction
+            "vision": 8,      # 8% - vision analysis rarely
+            "image_gen": 3    # 3% - image generation very rarely
+        }
+    },
+    
+    # Mixed browser+terminal tasks distribution (for mixed-browser-terminal-tasks.jsonl)
+    "mixed_tasks": {
+        "description": "Mixed distribution with high browser and terminal availability for complex tasks",
+        "toolsets": {
+            "browser": 92,    # 92% - browser tools highly available
+            "terminal": 92,   # 92% - terminal highly available  
+            "web": 35,        # 35% - web search/scrape fairly common
+            "vision": 15,     # 15% - vision analysis occasionally
+            "image_gen": 15   # 15% - image generation occasionally
+        }
    }
 }

--- a/toolsets.py
+++ b/toolsets.py
@ -33,10 +33,16 @@ TOOLSETS = {
    # Basic toolsets - individual tool categories
    "web": {
        "description": "Web research and content extraction tools",
-        "tools": ["web_search", "web_extract", "web_crawl"],
+        "tools": ["web_search", "web_extract"],
        "includes": []  # No other toolsets included
    },
    
+    "search": {
+        "description": "Web search only (no content extraction/scraping)",
+        "tools": ["web_search"],
+        "includes": []
+    },
+    
    "vision": {
        "description": "Image analysis and vision tools",
        "tools": ["vision_analyze"],
@ -61,6 +67,17 @@ TOOLSETS = {
        "includes": []
    },
    
+    "browser": {
+        "description": "Browser automation for web interaction (navigate, click, type, scroll, iframes, hold-click) with web search for finding URLs",
+        "tools": [
+            "browser_navigate", "browser_snapshot", "browser_click",
+            "browser_type", "browser_scroll", "browser_back",
+            "browser_press", "browser_close", "browser_get_images",
+            "browser_vision", "web_search"
+        ],
+        "includes": []
+    },
+    
    # Scenario-specific toolsets
    
    "debugging": {