VyoJ commited on
Commit
7fcdb70
·
verified ·
1 Parent(s): 9b4e937

Upload 78 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +51 -0
  2. Dockerfile +103 -0
  3. README.md +49 -12
  4. backend/.env.example +16 -0
  5. backend/.python-version +1 -0
  6. backend/README.md +163 -0
  7. backend/__init__.py +1 -0
  8. backend/modal_fara_vllm.py +231 -0
  9. backend/pyproject.toml +17 -0
  10. backend/server.py +588 -0
  11. backend/uv.lock +0 -0
  12. eslint.config.js +26 -0
  13. fara/__init__.py +4 -0
  14. fara/_prompts.py +276 -0
  15. fara/browser/__init__.py +0 -0
  16. fara/browser/browser_bb.py +353 -0
  17. fara/browser/page_script.js +610 -0
  18. fara/browser/playwright_controller.py +581 -0
  19. fara/fara_agent.py +602 -0
  20. fara/qwen_helpers/__init__.py +0 -0
  21. fara/qwen_helpers/base_tool.py +156 -0
  22. fara/qwen_helpers/fncall_prompt.py +212 -0
  23. fara/qwen_helpers/schema.py +166 -0
  24. fara/qwen_helpers/utils.py +24 -0
  25. fara/run_fara.py +193 -0
  26. fara/types.py +119 -0
  27. fara/utils.py +9 -0
  28. index.html +12 -0
  29. nginx.conf +79 -0
  30. package-lock.json +0 -0
  31. package.json +41 -0
  32. src/App.tsx +35 -0
  33. src/components/ConnectionStatus.tsx +55 -0
  34. src/components/Header.tsx +450 -0
  35. src/components/ProcessingIndicator.tsx +31 -0
  36. src/components/WelcomeScreen.tsx +521 -0
  37. src/components/index.ts +14 -0
  38. src/components/sandbox/SandboxViewer.tsx +400 -0
  39. src/components/sandbox/completionview/CompletionView.tsx +525 -0
  40. src/components/sandbox/completionview/DownloadGifButton.tsx +64 -0
  41. src/components/sandbox/completionview/DownloadJsonButton.tsx +56 -0
  42. src/components/sandbox/completionview/index.ts +3 -0
  43. src/components/sandbox/index.ts +2 -0
  44. src/components/steps/ConnectionStepCard.tsx +110 -0
  45. src/components/steps/FinalStepCard.tsx +105 -0
  46. src/components/steps/StepCard.tsx +399 -0
  47. src/components/steps/StepsList.tsx +395 -0
  48. src/components/steps/ThinkingStepCard.tsx +104 -0
  49. src/components/steps/index.ts +5 -0
  50. src/components/timeline/Timeline.tsx +486 -0
.gitignore ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Logs
2
+ logs
3
+ *.log
4
+ npm-debug.log*
5
+
6
+ # Dependencies
7
+ node_modules/
8
+
9
+ # Build
10
+ dist/
11
+ dist-ssr/
12
+ *.local
13
+
14
+ # Python
15
+ __pycache__/
16
+ *.py[cod]
17
+ *$py.class
18
+ *.so
19
+ .Python
20
+ build/
21
+ develop-eggs/
22
+ downloads/
23
+ eggs/
24
+ .eggs/
25
+ lib/
26
+ lib64/
27
+ parts/
28
+ sdist/
29
+ var/
30
+ wheels/
31
+ *.egg-info/
32
+ .installed.cfg
33
+ *.egg
34
+ .env
35
+ .venv
36
+ env/
37
+ venv/
38
+
39
+ # IDE
40
+ .vscode/
41
+ .idea/
42
+ .DS_Store
43
+ *.suo
44
+ *.ntvs*
45
+ *.njsproj
46
+ *.sln
47
+ *.sw?
48
+
49
+ # Misc
50
+ .gradio/
51
+ flagged/
Dockerfile ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Stage 1: Build React frontend
2
+ FROM node:20-slim AS frontend-builder
3
+
4
+ WORKDIR /app/frontend
5
+
6
+ # Copy package files
7
+ COPY package*.json ./
8
+
9
+ # Install dependencies
10
+ RUN npm ci
11
+
12
+ # Copy source files
13
+ COPY . .
14
+
15
+ # Build the React app
16
+ RUN npm run build
17
+
18
+ # Stage 2: Python backend + serve frontend
19
+ FROM python:3.12-slim-trixie
20
+
21
+ # Copy uv from the official distroless image (recommended approach)
22
+ COPY --from=ghcr.io/astral-sh/uv:0.9.15 /uv /uvx /bin/
23
+
24
+ # Install system dependencies for Playwright and nginx
25
+ RUN apt-get update && apt-get install -y \
26
+ nginx \
27
+ supervisor \
28
+ libnss3 \
29
+ libnspr4 \
30
+ libatk1.0-0 \
31
+ libatk-bridge2.0-0 \
32
+ libcups2 \
33
+ libdrm2 \
34
+ libxkbcommon0 \
35
+ libxcomposite1 \
36
+ libxdamage1 \
37
+ libxfixes3 \
38
+ libxrandr2 \
39
+ libgbm1 \
40
+ libasound2 \
41
+ libpango-1.0-0 \
42
+ libpangocairo-1.0-0 \
43
+ libcairo2 \
44
+ libatspi2.0-0 \
45
+ xvfb \
46
+ fonts-liberation \
47
+ libappindicator3-1 \
48
+ libu2f-udev \
49
+ libvulkan1 \
50
+ wget \
51
+ && rm -rf /var/lib/apt/lists/*
52
+
53
+ # Create a new user named "user" with user ID 1000 (required for HF Spaces)
54
+ RUN useradd -m -u 1000 user
55
+
56
+ # Create necessary directories with proper permissions for nginx (before switching user)
57
+ RUN mkdir -p /var/log/nginx /var/lib/nginx /var/cache/nginx /run \
58
+ && chown -R user:user /var/log/nginx /var/lib/nginx /var/cache/nginx /run \
59
+ && chmod -R 755 /var/log/nginx /var/lib/nginx /var/cache/nginx /run
60
+
61
+ # Configure nginx (needs root for /etc/nginx)
62
+ COPY nginx.conf /etc/nginx/nginx.conf
63
+
64
+ # Configure supervisor
65
+ COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
66
+
67
+ # Allow user to run supervisor
68
+ RUN chown -R user:user /etc/supervisor
69
+
70
+ # Switch to the "user" user
71
+ USER user
72
+
73
+ # Set home to the user's home directory
74
+ ENV HOME=/home/user \
75
+ PATH=/home/user/.local/bin:$PATH
76
+
77
+ # Set the working directory to the user's home directory
78
+ WORKDIR $HOME/app
79
+
80
+ # Copy backend code and sync with locked dependencies
81
+ COPY --chown=user:user backend/ ./backend/
82
+ RUN cd backend && uv sync
83
+
84
+ # Copy FARA source
85
+ COPY --chown=user:user fara/ ./fara/
86
+
87
+ # Activate the virtual environment by adding it to PATH
88
+ ENV PATH="$HOME/app/backend/.venv/bin:$PATH"
89
+
90
+ # Install Playwright browsers
91
+ RUN playwright install chromium
92
+
93
+ # Copy built frontend from Stage 1
94
+ COPY --chown=user:user --from=frontend-builder /app/frontend/dist ./static
95
+
96
+ # Expose port
97
+ EXPOSE 7860
98
+
99
+ # Set environment variables
100
+ ENV PYTHONUNBUFFERED=1
101
+
102
+ # Start supervisor (manages nginx + python backend)
103
+ CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
README.md CHANGED
@@ -1,12 +1,49 @@
1
- ---
2
- title: Fara BrowserUse
3
- emoji: 🔥
4
- colorFrom: indigo
5
- colorTo: pink
6
- sdk: docker
7
- pinned: false
8
- license: mit
9
- short_description: Microsoft Fara-7B Browser-Use demo inspired by CUA2
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: FARA - Computer Use Agent
3
+ emoji: 🤖
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: docker
7
+ pinned: false
8
+ license: mit
9
+ app_port: 7860
10
+ suggested_hardware: cpu-upgrade
11
+ tags:
12
+ - computer-use
13
+ - browser-automation
14
+ - ai-agent
15
+ - vision-language-model
16
+ ---
17
+
18
+ # 🤖 FARA - Computer Use Agent Demo
19
+
20
+ FARA (Fara Agent for Real-world Automation) is an AI agent that can browse the web and complete tasks autonomously.
21
+
22
+ ## Features
23
+
24
+ - 🌐 **Autonomous Web Navigation** - The agent can browse websites on its own
25
+ - 🔍 **Web Search** - Search for information across the web
26
+ - 📝 **Form Filling** - Fill out forms automatically
27
+ - 🖱️ **Point and Click** - Click buttons, links, and elements
28
+ - ⌨️ **Text Input** - Type text into fields
29
+ - 📜 **Page Scrolling** - Scroll through content
30
+
31
+ ## How to Use
32
+
33
+ 1. Enter a task in natural language (e.g., "Search for the latest news about AI")
34
+ 2. Click "Run Task" and watch the agent work!
35
+ 3. View the screenshots to see each step the agent takes
36
+
37
+ ## Powered By
38
+
39
+ - **Microsoft Fara-7B** - Vision-Language Model for computer use
40
+ - **Playwright** - Browser automation framework
41
+ - **Modal** - Model hosting and inference
42
+
43
+ ## Links
44
+
45
+ - [GitHub Repository](https://github.com/microsoft/fara)
46
+
47
+ ## License
48
+
49
+ MIT License
backend/.env.example ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Backend Environment Variables
2
+
3
+ # FARA Model Configuration
4
+ FARA_MODEL_NAME=microsoft/Fara-7B
5
+ FARA_ENDPOINT_URL=https://your-workspace--fara-vllm-serve.modal.run/v1
6
+ FARA_API_KEY=not-needed
7
+
8
+ # Modal Proxy Auth Configuration
9
+ # These credentials are used for BOTH the vLLM endpoint AND trace storage
10
+ # Create a Proxy Auth Token at: https://modal.com/settings/proxy-auth-tokens
11
+ MODAL_TOKEN_ID=wk-xxxxxxxx
12
+ MODAL_TOKEN_SECRET=ws-xxxxxxxx
13
+
14
+ # Modal Trace Storage Endpoint URL
15
+ # Get the URL from `modal deploy backend/modal_fara_vllm.py`
16
+ MODAL_TRACE_STORAGE_URL=https://your-workspace--fara-vllm-store-trace.modal.run
backend/.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.12
backend/README.md ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # FaraCUA Backend
2
+
3
+ The backend server for FaraCUA - a Computer Use Agent (CUA) demo powered by Microsoft's Fara-7B vision-language model and Modal for serverless GPU inference.
4
+
5
+ ## Overview
6
+
7
+ This backend provides:
8
+
9
+ - **WebSocket API** - Real-time communication with the React frontend for streaming agent actions
10
+ - **REST API** - Model listing, random question generation, and trace storage
11
+ - **FARA Agent Integration** - Runs the Fara agent with Playwright for browser automation
12
+ - **Modal Integration** - Proxies requests to Modal's vLLM endpoint and trace storage
13
+
14
+ ## Architecture
15
+
16
+ ```
17
+ ┌─────────────┐ WebSocket ┌─────────────┐ HTTP ┌─────────────┐
18
+ │ Frontend │ ◄───────────────► │ Backend │ ◄───────────► │ Modal │
19
+ │ (React) │ │ (FastAPI) │ │ (vLLM) │
20
+ └─────────────┘ └─────────────┘ └─────────────┘
21
+
22
+ │ Playwright
23
+
24
+ ┌─────────────┐
25
+ │ Browser │
26
+ │ (Headless) │
27
+ └─────────────┘
28
+ ```
29
+
30
+ ## Files
31
+
32
+ | File | Description |
33
+ |------|-------------|
34
+ | `server.py` | Main FastAPI server with WebSocket and REST endpoints |
35
+ | `modal_fara_vllm.py` | Modal deployment for vLLM inference and trace storage |
36
+ | `pyproject.toml` | Python dependencies |
37
+ | `.env.example` | Example environment configuration |
38
+
39
+ ## Setup
40
+
41
+ ### 1. Install Dependencies
42
+
43
+ ```bash
44
+ # Using uv (recommended)
45
+ uv sync
46
+
47
+ # Or using pip
48
+ pip install -e .
49
+ ```
50
+
51
+ ### 2. Install Playwright
52
+
53
+ ```bash
54
+ playwright install chromium
55
+ ```
56
+
57
+ ### 3. Deploy Modal Endpoints
58
+
59
+ ```bash
60
+ modal deploy backend/modal_fara_vllm.py
61
+ ```
62
+
63
+ This deploys:
64
+ - **vLLM Server** - GPU-accelerated inference for Fara-7B at `https://<workspace>--fara-vllm-serve.modal.run`
65
+ - **Trace Storage** - Endpoint for storing task traces at `https://<workspace>--fara-vllm-store-trace.modal.run`
66
+
67
+ ### 4. Configure Environment
68
+
69
+ Copy `.env.example` to `.env` and fill in your values:
70
+
71
+ ```bash
72
+ cp .env.example .env
73
+ ```
74
+
75
+ Required variables:
76
+
77
+ | Variable | Description |
78
+ |----------|-------------|
79
+ | `FARA_MODEL_NAME` | Model name (default: `microsoft/Fara-7B`) |
80
+ | `FARA_ENDPOINT_URL` | Modal vLLM endpoint URL (from deploy output) |
81
+ | `FARA_API_KEY` | API key (default: `not-needed` for Modal) |
82
+ | `MODAL_TOKEN_ID` | Modal proxy auth token ID |
83
+ | `MODAL_TOKEN_SECRET` | Modal proxy auth token secret |
84
+ | `MODAL_TRACE_STORAGE_URL` | Modal trace storage endpoint URL |
85
+
86
+ Get Modal proxy auth tokens at: https://modal.com/settings/proxy-auth-tokens
87
+
88
+ ### 5. Run the Server
89
+
90
+ ```bash
91
+ # Development mode
92
+ uvicorn backend.server:app --host 0.0.0.0 --port 8000 --reload
93
+
94
+ # Or directly
95
+ python -m backend.server
96
+ ```
97
+
98
+ ## API Endpoints
99
+
100
+ ### WebSocket
101
+
102
+ - `ws://localhost:8000/ws` - Real-time agent communication
103
+ - **Receives**: `user_task`, `stop_task`, `ping`
104
+ - **Sends**: `heartbeat`, `agent_start`, `agent_progress`, `agent_complete`, `agent_error`
105
+
106
+ ### REST
107
+
108
+ | Method | Endpoint | Description |
109
+ |--------|----------|-------------|
110
+ | GET | `/api/health` | Health check |
111
+ | GET | `/api/models` | List available models |
112
+ | GET | `/api/random-question` | Get a random example task |
113
+ | POST | `/api/traces` | Store a trace (proxies to Modal) |
114
+
115
+ ## Trace Storage
116
+
117
+ Task traces are automatically uploaded to Modal volumes for research purposes. Traces include:
118
+
119
+ - Task instruction and model used
120
+ - Step-by-step agent actions with screenshots
121
+ - Token usage and timing metrics
122
+ - User evaluation (success/failed)
123
+
124
+ Duplicate traces (same ID and instruction) are automatically overwritten to capture the latest evaluation.
125
+
126
+ ## Docker
127
+
128
+ The backend is designed to run in Docker alongside the frontend. See the root `Dockerfile` for the combined deployment.
129
+
130
+ ```bash
131
+ # Build from root
132
+ docker build -t fara-cua .
133
+
134
+ # Run with env file
135
+ docker run -d --name fara-cua -p 7860:7860 --env-file backend/.env fara-cua
136
+ ```
137
+
138
+ ## Development
139
+
140
+ ### Running Locally
141
+
142
+ For local development, you can run the backend separately:
143
+
144
+ ```bash
145
+ cd backend
146
+ uvicorn server:app --host 0.0.0.0 --port 8000 --reload
147
+ ```
148
+
149
+ Make sure the frontend is configured to connect to `http://localhost:8000`.
150
+
151
+ ### Testing Modal Endpoints
152
+
153
+ ```bash
154
+ # Test vLLM endpoint
155
+ modal run backend/modal_fara_vllm.py::test
156
+
157
+ # Check deployment status
158
+ modal app list
159
+ ```
160
+
161
+ ## License
162
+
163
+ See the root LICENSE file for license information.
backend/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Backend package
backend/modal_fara_vllm.py ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from typing import Any
3
+
4
+ import aiohttp
5
+ import modal
6
+
7
+ vllm_image = (
8
+ modal.Image.from_registry("nvidia/cuda:12.8.0-devel-ubuntu22.04", add_python="3.12")
9
+ .entrypoint([])
10
+ .uv_pip_install(
11
+ "vllm==0.11.2",
12
+ "huggingface-hub==0.36.0",
13
+ "flashinfer-python==0.5.2",
14
+ )
15
+ .env({"HF_XET_HIGH_PERFORMANCE": "1"}) # faster model transfers
16
+ )
17
+
18
+ # Lightweight image for the trace storage endpoint (doesn't need CUDA/vllm)
19
+ trace_storage_image = modal.Image.debian_slim(python_version="3.12").uv_pip_install(
20
+ "fastapi", "uvicorn"
21
+ )
22
+
23
+ MODEL_NAME = "microsoft/Fara-7B"
24
+ MODEL_REVISION = None # Use latest if no specific revision
25
+
26
+ hf_cache_vol = modal.Volume.from_name("huggingface-cache", create_if_missing=True)
27
+ vllm_cache_vol = modal.Volume.from_name("vllm-cache", create_if_missing=True)
28
+ traces_vol = modal.Volume.from_name("fara-traces", create_if_missing=True)
29
+
30
+ FAST_BOOT = True # Set to False for better performance if not cold-starting frequently
31
+
32
+ app = modal.App("fara-vllm")
33
+
34
+ MINUTES = 60 # seconds
35
+ VLLM_PORT = 5000 # Changed to 5000 as per user specification
36
+ N_GPU = 1
37
+
38
+
39
+ @app.function(
40
+ image=vllm_image,
41
+ gpu="L40S",
42
+ scaledown_window=2 * MINUTES,
43
+ timeout=10 * MINUTES,
44
+ volumes={
45
+ "/root/.cache/huggingface": hf_cache_vol,
46
+ "/root/.cache/vllm": vllm_cache_vol,
47
+ },
48
+ )
49
+ @modal.concurrent(max_inputs=32)
50
+ @modal.web_server(
51
+ port=VLLM_PORT, startup_timeout=10 * MINUTES, requires_proxy_auth=True
52
+ )
53
+ def serve():
54
+ import subprocess
55
+
56
+ cmd = [
57
+ "vllm",
58
+ "serve",
59
+ "--uvicorn-log-level=info",
60
+ MODEL_NAME,
61
+ "--served-model-name",
62
+ MODEL_NAME,
63
+ "--host",
64
+ "0.0.0.0",
65
+ "--port",
66
+ str(VLLM_PORT),
67
+ "--dtype",
68
+ "auto", # As per user specification
69
+ "--max-model-len",
70
+ "32768", # Limit context length to fit in GPU memory (default 128000 too large)
71
+ ]
72
+
73
+ if MODEL_REVISION:
74
+ cmd += ["--revision", MODEL_REVISION]
75
+
76
+ # enforce-eager disables both Torch compilation and CUDA graph capture
77
+ cmd += ["--enforce-eager" if FAST_BOOT else "--no-enforce-eager"]
78
+
79
+ # assume multiple GPUs are for splitting up large matrix multiplications
80
+ cmd += ["--tensor-parallel-size", str(N_GPU)]
81
+
82
+ print(cmd)
83
+
84
+ subprocess.Popen(" ".join(cmd), shell=True)
85
+
86
+
87
+ @app.function(
88
+ image=trace_storage_image,
89
+ volumes={"/traces": traces_vol},
90
+ timeout=2 * MINUTES,
91
+ )
92
+ @modal.fastapi_endpoint(method="POST", requires_proxy_auth=True)
93
+ def store_trace(trace_data: dict) -> dict:
94
+ """
95
+ Store a task trace JSON in the Modal volume.
96
+ If a trace with the same ID and instruction already exists, it will be overwritten.
97
+
98
+ Expected trace_data structure:
99
+ {
100
+ "trace": { id, timestamp, instruction, modelId, isRunning },
101
+ "completion": { status, message, finalAnswer },
102
+ "metadata": { traceId, inputTokensUsed, outputTokensUsed, ... user_evaluation },
103
+ "steps": [...],
104
+ "exportedAt": "ISO timestamp"
105
+ }
106
+ """
107
+ import glob
108
+ import os
109
+ from datetime import datetime
110
+
111
+ try:
112
+ # Extract trace ID and instruction for duplicate detection
113
+ trace_id = trace_data.get("trace", {}).get("id", "unknown")
114
+ instruction = trace_data.get("trace", {}).get("instruction", "")
115
+
116
+ # Create organized directory structure: /traces/YYYY-MM/
117
+ date_folder = datetime.now().strftime("%Y-%m")
118
+ trace_dir = f"/traces/{date_folder}"
119
+ os.makedirs(trace_dir, exist_ok=True)
120
+
121
+ # Check for existing trace with same ID (in all monthly folders)
122
+ existing_file = None
123
+ for monthly_dir in glob.glob("/traces/*/"):
124
+ for filepath in glob.glob(f"{monthly_dir}*_{trace_id}.json"):
125
+ # Found an existing file with this trace ID
126
+ # Verify it's the same trace by checking instruction
127
+ try:
128
+ with open(filepath, "r") as f:
129
+ existing_data = json.load(f)
130
+ existing_instruction = existing_data.get("trace", {}).get(
131
+ "instruction", ""
132
+ )
133
+ if existing_instruction == instruction:
134
+ existing_file = filepath
135
+ break
136
+ except (json.JSONDecodeError, IOError):
137
+ # If we can't read the file, skip it
138
+ continue
139
+ if existing_file:
140
+ break
141
+
142
+ if existing_file:
143
+ # Overwrite the existing file
144
+ filepath = existing_file
145
+ print(f"Overwriting existing trace: {filepath}")
146
+ else:
147
+ # Generate new filename
148
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
149
+ filename = f"{timestamp}_{trace_id}.json"
150
+ filepath = f"{trace_dir}/{filename}"
151
+
152
+ # Write trace to file
153
+ with open(filepath, "w") as f:
154
+ json.dump(trace_data, f, indent=2, default=str)
155
+
156
+ # Commit volume changes
157
+ traces_vol.commit()
158
+
159
+ return {
160
+ "success": True,
161
+ "message": "Trace stored successfully"
162
+ if not existing_file
163
+ else "Trace updated successfully",
164
+ "filepath": filepath,
165
+ "trace_id": trace_id,
166
+ "was_overwritten": existing_file is not None,
167
+ }
168
+ except Exception as e:
169
+ return {
170
+ "success": False,
171
+ "error": str(e),
172
+ }
173
+
174
+
175
+ @app.local_entrypoint()
176
+ async def test(test_timeout=10 * MINUTES, content=None, twice=True):
177
+ url = serve.get_web_url()
178
+
179
+ system_prompt = {
180
+ "role": "system",
181
+ "content": "You are an AI assistant specialized in computer use tasks.",
182
+ }
183
+ if content is None:
184
+ content = "Hello, what can you do to help with computer tasks?"
185
+
186
+ messages = [ # OpenAI chat format
187
+ system_prompt,
188
+ {"role": "user", "content": content},
189
+ ]
190
+
191
+ async with aiohttp.ClientSession(base_url=url) as session:
192
+ print(f"Running health check for server at {url}")
193
+ async with session.get("/health", timeout=test_timeout - 1 * MINUTES) as resp:
194
+ up = resp.status == 200
195
+ assert up, f"Failed health check for server at {url}"
196
+ print(f"Successful health check for server at {url}")
197
+
198
+ print(f"Sending messages to {url}:", *messages, sep="\n\t")
199
+ await _send_request(session, MODEL_NAME, messages)
200
+ if twice:
201
+ messages[0]["content"] = "You are a helpful assistant."
202
+ print(f"Sending messages to {url}:", *messages, sep="\n\t")
203
+ await _send_request(session, MODEL_NAME, messages)
204
+
205
+
206
+ async def _send_request(
207
+ session: aiohttp.ClientSession, model: str, messages: list
208
+ ) -> None:
209
+ # \`stream=True\` tells an OpenAI-compatible backend to stream chunks
210
+ payload: dict[str, Any] = {"messages": messages, "model": model, "stream": True}
211
+
212
+ headers = {"Content-Type": "application/json", "Accept": "text/event-stream"}
213
+
214
+ async with session.post(
215
+ "/v1/chat/completions", json=payload, headers=headers, timeout=1 * MINUTES
216
+ ) as resp:
217
+ async for raw in resp.content:
218
+ resp.raise_for_status()
219
+ # extract new content and stream it
220
+ line = raw.decode().strip()
221
+ if not line or line == "data: [DONE]":
222
+ continue
223
+ if line.startswith("data: "): # SSE prefix
224
+ line = line[len("data: ") :]
225
+
226
+ chunk = json.loads(line)
227
+ assert (
228
+ chunk["object"] == "chat.completion.chunk"
229
+ ) # or something went horribly wrong
230
+ print(chunk["choices"][0]["delta"]["content"], end="")
231
+ print()
backend/pyproject.toml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "backend"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = [
8
+ "fastapi>=0.109.0",
9
+ "jsonschema>=4.25.1",
10
+ "openai>=2.8.1",
11
+ "pillow>=12.0.0",
12
+ "playwright==1.51",
13
+ "pyyaml>=6.0.3",
14
+ "tenacity>=9.1.2",
15
+ "uvicorn[standard]>=0.27.0",
16
+ "websockets>=12.0",
17
+ ]
backend/server.py ADDED
@@ -0,0 +1,588 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ FARA Backend Server for HuggingFace Space
3
+ Provides WebSocket communication and REST API for the React frontend
4
+ """
5
+
6
+ import asyncio
7
+ import base64
8
+ import logging
9
+ import os
10
+
11
+ # Import FARA components
12
+ import sys
13
+ import tempfile
14
+ import uuid
15
+ from datetime import datetime
16
+ from typing import Dict, Optional
17
+
18
+ import httpx
19
+ from fastapi import FastAPI, WebSocket, WebSocketDisconnect
20
+ from fastapi.middleware.cors import CORSMiddleware
21
+ from fastapi.responses import JSONResponse
22
+ from playwright._impl._errors import TargetClosedError
23
+
24
+ sys.path.insert(0, "/app")
25
+ from fara import FaraAgent
26
+ from fara.browser.browser_bb import BrowserBB
27
+
28
+ # Configure logging
29
+ logging.basicConfig(level=logging.INFO)
30
+ logger = logging.getLogger(__name__)
31
+
32
+ # Modal trace storage configuration
33
+ MODAL_TRACE_STORAGE_URL = os.environ.get("MODAL_TRACE_STORAGE_URL", "")
34
+ MODAL_TOKEN_ID = os.environ.get("MODAL_TOKEN_ID", "")
35
+ MODAL_TOKEN_SECRET = os.environ.get("MODAL_TOKEN_SECRET", "")
36
+
37
+ # Modal vLLM endpoint configuration (from environment variables for HF Spaces)
38
+ # Includes proxy auth headers for authenticated Modal endpoints
39
+ ENDPOINT_CONFIG = {
40
+ "model": os.environ.get("FARA_MODEL_NAME", "microsoft/Fara-7B"),
41
+ "base_url": os.environ.get("FARA_ENDPOINT_URL"),
42
+ "api_key": os.environ.get("FARA_API_KEY", "not-needed"),
43
+ "default_headers": {
44
+ "Modal-Key": MODAL_TOKEN_ID,
45
+ "Modal-Secret": MODAL_TOKEN_SECRET,
46
+ }
47
+ if MODAL_TOKEN_ID and MODAL_TOKEN_SECRET
48
+ else None,
49
+ }
50
+
51
+ # Available models (for the frontend dropdown)
52
+ AVAILABLE_MODELS = ["microsoft/Fara-7B"]
53
+
54
+ app = FastAPI(title="FARA Backend")
55
+
56
+ # CORS middleware
57
+ app.add_middleware(
58
+ CORSMiddleware,
59
+ allow_origins=["*"],
60
+ allow_credentials=True,
61
+ allow_methods=["*"],
62
+ allow_headers=["*"],
63
+ )
64
+
65
+ # Store active connections and their sessions
66
+ active_connections: Dict[str, WebSocket] = {}
67
+ active_sessions: Dict[str, "FaraSession"] = {}
68
+
69
+
70
+ class FaraSession:
71
+ """Manages a single FARA agent session"""
72
+
73
+ def __init__(self, trace_id: str, websocket: WebSocket):
74
+ self.trace_id = trace_id
75
+ self.websocket = websocket
76
+ self.agent: Optional[FaraAgent] = None
77
+ self.browser_manager: Optional[BrowserBB] = None
78
+ self.screenshots_dir: Optional[str] = None
79
+ self.is_running = False
80
+ self.should_stop = False
81
+ self.step_count = 0
82
+ self.start_time: Optional[datetime] = None
83
+ self.total_input_tokens = 0
84
+ self.total_output_tokens = 0
85
+
86
+ async def initialize(self, start_page: str = "https://www.bing.com/"):
87
+ """Initialize the browser and agent"""
88
+ # Create temp directory for screenshots
89
+ self.screenshots_dir = tempfile.mkdtemp(prefix="fara_screenshots_")
90
+
91
+ # Initialize browser manager (headless for HF Space)
92
+ self.browser_manager = BrowserBB(
93
+ headless=True,
94
+ viewport_height=900,
95
+ viewport_width=1440,
96
+ page_script_path=None,
97
+ browser_channel="chromium",
98
+ browser_data_dir=None,
99
+ downloads_folder=self.screenshots_dir,
100
+ to_resize_viewport=True,
101
+ single_tab_mode=True,
102
+ animate_actions=False,
103
+ use_browser_base=False,
104
+ logger=logger,
105
+ )
106
+
107
+ self.agent = FaraAgent(
108
+ browser_manager=self.browser_manager,
109
+ client_config=ENDPOINT_CONFIG,
110
+ start_page=start_page,
111
+ downloads_folder=self.screenshots_dir,
112
+ save_screenshots=True,
113
+ max_rounds=50,
114
+ )
115
+
116
+ await self.agent.initialize()
117
+ return True
118
+
119
+ async def send_event(self, event: dict):
120
+ """Send event to the connected WebSocket"""
121
+ try:
122
+ await self.websocket.send_json(event)
123
+ except Exception as e:
124
+ logger.error(f"Error sending event: {e}")
125
+
126
+ async def get_screenshot_base64(self) -> Optional[str]:
127
+ """Get the current browser screenshot as base64"""
128
+ if self.agent:
129
+ try:
130
+ # Get the current active page from the browser context
131
+ page = self._get_active_page()
132
+ if page:
133
+ screenshot_bytes = (
134
+ await self.agent._playwright_controller.get_screenshot(page)
135
+ )
136
+ return f"data:image/png;base64,{base64.b64encode(screenshot_bytes).decode()}"
137
+ except TargetClosedError:
138
+ logger.warning(
139
+ "Page closed while getting screenshot, attempting recovery..."
140
+ )
141
+ page = self._get_active_page()
142
+ if page:
143
+ try:
144
+ screenshot_bytes = (
145
+ await self.agent._playwright_controller.get_screenshot(page)
146
+ )
147
+ return f"data:image/png;base64,{base64.b64encode(screenshot_bytes).decode()}"
148
+ except Exception as e:
149
+ logger.error(f"Recovery screenshot failed: {e}")
150
+ except Exception as e:
151
+ logger.error(f"Error getting screenshot: {e}")
152
+ return None
153
+
154
+ def _get_active_page(self):
155
+ """Get the currently active page from the browser context"""
156
+ if (
157
+ self.agent
158
+ and self.agent.browser_manager
159
+ and self.agent.browser_manager._context
160
+ ):
161
+ pages = self.agent.browser_manager._context.pages
162
+ if pages:
163
+ # Return the last (most recent) page, or the one marked as active
164
+ return pages[-1]
165
+ return self.agent._page if self.agent else None
166
+
167
+ async def run_task(self, instruction: str, model_id: str):
168
+ """Run a task and stream results via WebSocket"""
169
+ self.is_running = True
170
+ self.should_stop = False
171
+ self.step_count = 0
172
+ self.start_time = datetime.now()
173
+ self.total_input_tokens = 0
174
+ self.total_output_tokens = 0
175
+
176
+ try:
177
+ # Send agent_start event
178
+ await self.send_event(
179
+ {
180
+ "type": "agent_start",
181
+ "agentTrace": {
182
+ "id": self.trace_id,
183
+ "instruction": instruction,
184
+ "modelId": model_id,
185
+ "timestamp": self.start_time.isoformat(),
186
+ "isRunning": True,
187
+ "traceMetadata": {
188
+ "traceId": self.trace_id,
189
+ "inputTokensUsed": 0,
190
+ "outputTokensUsed": 0,
191
+ "duration": 0,
192
+ "numberOfSteps": 0,
193
+ "maxSteps": 50,
194
+ "completed": False,
195
+ },
196
+ },
197
+ }
198
+ )
199
+
200
+ # Initialize agent
201
+ await self.initialize()
202
+
203
+ # Get initial screenshot
204
+ initial_screenshot = await self.get_screenshot_base64()
205
+
206
+ # Run the agent with custom loop to stream progress
207
+ await self._run_agent_with_streaming(instruction)
208
+
209
+ except Exception as e:
210
+ logger.exception("Error running agent task")
211
+ await self.send_event({"type": "agent_error", "error": str(e)})
212
+ finally:
213
+ self.is_running = False
214
+ await self.close()
215
+
216
+ async def _run_agent_with_streaming(self, user_message: str):
217
+ """Run the agent and stream each step to the frontend"""
218
+ agent = self.agent
219
+
220
+ # Initialize if not already done
221
+ await agent.initialize()
222
+ assert agent._page is not None, "Page should be initialized"
223
+
224
+ # Get initial screenshot
225
+ scaled_screenshot = await agent._get_scaled_screenshot()
226
+
227
+ if agent.save_screenshots:
228
+ await agent._playwright_controller.get_screenshot(
229
+ agent._page,
230
+ path=os.path.join(
231
+ agent.downloads_folder, f"screenshot{agent._num_actions}.png"
232
+ ),
233
+ )
234
+
235
+ # Add user message to chat history
236
+ from fara.types import ImageObj, UserMessage
237
+
238
+ agent._chat_history.append(
239
+ UserMessage(
240
+ content=[ImageObj.from_pil(scaled_screenshot), user_message],
241
+ is_original=True,
242
+ )
243
+ )
244
+
245
+ final_answer = "<no_answer>"
246
+ is_stop_action = False
247
+
248
+ for i in range(agent.max_rounds):
249
+ if self.should_stop:
250
+ # User requested stop
251
+ await self.send_event(
252
+ {
253
+ "type": "agent_complete",
254
+ "traceMetadata": self._get_metadata(),
255
+ "final_state": "stopped",
256
+ }
257
+ )
258
+ return
259
+
260
+ is_first_round = i == 0
261
+ step_start_time = datetime.now()
262
+
263
+ # Wait for captcha if needed
264
+ if not agent.browser_manager._captcha_event.is_set():
265
+ logger.info("Waiting 60s for captcha to finish...")
266
+ captcha_solved = await agent.wait_for_captcha_with_timeout(60)
267
+ if (
268
+ not captcha_solved
269
+ and not agent.browser_manager._captcha_event.is_set()
270
+ ):
271
+ raise RuntimeError("Captcha timed out")
272
+
273
+ try:
274
+ # Generate model response
275
+ function_call, raw_response = await agent.generate_model_call(
276
+ is_first_round, scaled_screenshot if is_first_round else None
277
+ )
278
+
279
+ # Parse response
280
+ thoughts, action_dict = agent._parse_thoughts_and_action(raw_response)
281
+ action_args = action_dict.get("arguments", {})
282
+ action = action_args["action"]
283
+
284
+ logger.info(
285
+ f"\nThought #{i + 1}: {thoughts}\nAction #{i + 1}: {action}"
286
+ )
287
+
288
+ # Execute action with recovery for page changes
289
+ try:
290
+ (
291
+ is_stop_action,
292
+ new_screenshot,
293
+ action_description,
294
+ ) = await agent.execute_action(function_call)
295
+ except TargetClosedError as e:
296
+ logger.warning(
297
+ "Page closed during action execution, attempting recovery..."
298
+ )
299
+ # Try to recover the page reference
300
+ new_page = self._get_active_page()
301
+ if new_page and new_page != agent._page:
302
+ logger.info("Recovered with new active page")
303
+ agent._page = new_page
304
+ # Wait for the page to stabilize
305
+ await asyncio.sleep(1)
306
+ action_description = (
307
+ "Action completed (page navigation occurred)"
308
+ )
309
+ is_stop_action = False
310
+ new_screenshot = None
311
+ else:
312
+ raise e
313
+
314
+ # Sync the agent's page reference with the active page
315
+ active_page = self._get_active_page()
316
+ if active_page and active_page != agent._page:
317
+ logger.info("Updating agent page reference to active page")
318
+ agent._page = active_page
319
+
320
+ # Get screenshot for this step
321
+ screenshot_base64 = await self.get_screenshot_base64()
322
+
323
+ except TargetClosedError as e:
324
+ logger.error(f"Unrecoverable page error: {e}")
325
+ await self.send_event(
326
+ {
327
+ "type": "agent_error",
328
+ "error": f"Browser page closed unexpectedly: {str(e)}",
329
+ }
330
+ )
331
+ return
332
+ except Exception as e:
333
+ logger.exception(f"Error in agent step {i + 1}")
334
+ await self.send_event({"type": "agent_error", "error": str(e)})
335
+ return
336
+
337
+ # Calculate step duration and tokens (estimated)
338
+ step_duration = (datetime.now() - step_start_time).total_seconds()
339
+ step_input_tokens = 1000 # Estimated
340
+ step_output_tokens = len(raw_response) // 4 # Rough estimate
341
+
342
+ self.total_input_tokens += step_input_tokens
343
+ self.total_output_tokens += step_output_tokens
344
+ self.step_count += 1
345
+
346
+ # Create step object
347
+ step = {
348
+ "stepId": str(uuid.uuid4()),
349
+ "traceId": self.trace_id,
350
+ "stepNumber": self.step_count,
351
+ "thought": thoughts,
352
+ "actions": [
353
+ {
354
+ "function_name": action,
355
+ "description": action_description,
356
+ "parameters": action_args,
357
+ }
358
+ ],
359
+ "image": screenshot_base64,
360
+ "duration": step_duration,
361
+ "inputTokensUsed": step_input_tokens,
362
+ "outputTokensUsed": step_output_tokens,
363
+ "timestamp": datetime.now().isoformat(),
364
+ }
365
+
366
+ # Send progress event
367
+ await self.send_event(
368
+ {
369
+ "type": "agent_progress",
370
+ "agentStep": step,
371
+ "traceMetadata": self._get_metadata(),
372
+ }
373
+ )
374
+
375
+ if is_stop_action:
376
+ final_answer = thoughts
377
+ break
378
+
379
+ # Send completion event
380
+ final_state = "success" if is_stop_action else "max_steps_reached"
381
+ await self.send_event(
382
+ {
383
+ "type": "agent_complete",
384
+ "traceMetadata": self._get_metadata(completed=True),
385
+ "final_state": final_state,
386
+ }
387
+ )
388
+
389
+ def _get_metadata(self, completed: bool = False) -> dict:
390
+ """Get current trace metadata"""
391
+ duration = 0
392
+ if self.start_time:
393
+ duration = (datetime.now() - self.start_time).total_seconds()
394
+
395
+ return {
396
+ "traceId": self.trace_id,
397
+ "inputTokensUsed": self.total_input_tokens,
398
+ "outputTokensUsed": self.total_output_tokens,
399
+ "duration": duration,
400
+ "numberOfSteps": self.step_count,
401
+ "maxSteps": 50,
402
+ "completed": completed,
403
+ }
404
+
405
+ async def stop(self):
406
+ """Request the agent to stop"""
407
+ self.should_stop = True
408
+
409
+ async def close(self):
410
+ """Clean up resources"""
411
+ if self.agent:
412
+ try:
413
+ await self.agent.close()
414
+ except Exception as e:
415
+ logger.error(f"Error closing agent: {e}")
416
+ self.agent = None
417
+ self.browser_manager = None
418
+
419
+ if self.screenshots_dir and os.path.exists(self.screenshots_dir):
420
+ import shutil
421
+
422
+ try:
423
+ shutil.rmtree(self.screenshots_dir)
424
+ except Exception as e:
425
+ logger.error(f"Error cleaning up screenshots: {e}")
426
+ self.screenshots_dir = None
427
+
428
+
429
+ @app.get("/api/models")
430
+ async def get_models():
431
+ """Return available models"""
432
+ return JSONResponse(content=AVAILABLE_MODELS)
433
+
434
+
435
+ @app.post("/api/traces")
436
+ async def store_trace(trace_data: dict):
437
+ """
438
+ Store a task trace by forwarding to the Modal trace storage endpoint.
439
+ This keeps Modal credentials on the server side.
440
+ """
441
+ if not MODAL_TRACE_STORAGE_URL:
442
+ logger.warning("Modal trace storage URL not configured")
443
+ return JSONResponse(
444
+ status_code=503,
445
+ content={"success": False, "error": "Trace storage not configured"},
446
+ )
447
+
448
+ if not MODAL_TOKEN_ID or not MODAL_TOKEN_SECRET:
449
+ logger.warning("Modal proxy auth credentials not configured")
450
+ return JSONResponse(
451
+ status_code=503,
452
+ content={"success": False, "error": "Modal auth not configured"},
453
+ )
454
+
455
+ try:
456
+ async with httpx.AsyncClient(timeout=30.0) as client:
457
+ response = await client.post(
458
+ MODAL_TRACE_STORAGE_URL,
459
+ json=trace_data,
460
+ headers={
461
+ "Content-Type": "application/json",
462
+ "Modal-Key": MODAL_TOKEN_ID,
463
+ "Modal-Secret": MODAL_TOKEN_SECRET,
464
+ },
465
+ )
466
+
467
+ if response.status_code == 200:
468
+ result = response.json()
469
+ logger.info(
470
+ f"Trace stored successfully: {result.get('trace_id', 'unknown')}"
471
+ )
472
+ return JSONResponse(content=result)
473
+ else:
474
+ error_text = response.text
475
+ logger.error(
476
+ f"Failed to store trace: {response.status_code} - {error_text}"
477
+ )
478
+ return JSONResponse(
479
+ status_code=response.status_code,
480
+ content={
481
+ "success": False,
482
+ "error": f"Modal API error: {error_text}",
483
+ },
484
+ )
485
+ except httpx.TimeoutException:
486
+ logger.error("Timeout storing trace to Modal")
487
+ return JSONResponse(
488
+ status_code=504,
489
+ content={"success": False, "error": "Timeout connecting to trace storage"},
490
+ )
491
+ except Exception as e:
492
+ logger.exception("Error storing trace")
493
+ return JSONResponse(
494
+ status_code=500, content={"success": False, "error": str(e)}
495
+ )
496
+
497
+
498
+ @app.get("/api/random-question")
499
+ async def get_random_question():
500
+ """Return a random example question"""
501
+ questions = [
502
+ "Search for the latest news about AI agents",
503
+ "Find the weather forecast for San Francisco",
504
+ "Go to GitHub and search for 'computer use agent'",
505
+ "Find the top trending repositories on GitHub today",
506
+ "Search for Python tutorials on YouTube",
507
+ "Look up the current stock price of Microsoft",
508
+ "Find the schedule for upcoming SpaceX launches",
509
+ "Search for healthy breakfast recipes",
510
+ ]
511
+ import random
512
+
513
+ return JSONResponse(content={"question": random.choice(questions)})
514
+
515
+
516
+ @app.websocket("/ws")
517
+ async def websocket_endpoint(websocket: WebSocket):
518
+ """WebSocket endpoint for real-time communication"""
519
+ await websocket.accept()
520
+
521
+ # Generate a unique connection ID
522
+ connection_id = str(uuid.uuid4())
523
+ active_connections[connection_id] = websocket
524
+
525
+ # Send heartbeat with the connection ID (used as trace ID base)
526
+ trace_id = str(uuid.uuid4())
527
+ await websocket.send_json(
528
+ {"type": "heartbeat", "uuid": trace_id, "timestamp": datetime.now().isoformat()}
529
+ )
530
+
531
+ try:
532
+ while True:
533
+ # Wait for messages from the client
534
+ data = await websocket.receive_json()
535
+ message_type = data.get("type")
536
+
537
+ if message_type == "user_task":
538
+ # Extract task details
539
+ trace = data.get("trace", {})
540
+ trace_id = trace.get("id", str(uuid.uuid4()))
541
+ instruction = trace.get("instruction", "")
542
+ model_id = trace.get("modelId", "microsoft/Fara-7B")
543
+
544
+ # Create and start session
545
+ session = FaraSession(trace_id, websocket)
546
+ active_sessions[trace_id] = session
547
+
548
+ # Run the task in the background
549
+ asyncio.create_task(session.run_task(instruction, model_id))
550
+
551
+ elif message_type == "stop_task":
552
+ # Stop the running task
553
+ trace_id = data.get("trace_id")
554
+ if trace_id and trace_id in active_sessions:
555
+ await active_sessions[trace_id].stop()
556
+
557
+ elif message_type == "ping":
558
+ await websocket.send_json({"type": "pong"})
559
+
560
+ except WebSocketDisconnect:
561
+ logger.info(f"WebSocket disconnected: {connection_id}")
562
+ except Exception as e:
563
+ logger.exception(f"WebSocket error: {e}")
564
+ finally:
565
+ # Clean up
566
+ if connection_id in active_connections:
567
+ del active_connections[connection_id]
568
+
569
+ # Clean up any sessions for this connection
570
+ sessions_to_remove = []
571
+ for trace_id, session in active_sessions.items():
572
+ if session.websocket == websocket:
573
+ await session.close()
574
+ sessions_to_remove.append(trace_id)
575
+ for trace_id in sessions_to_remove:
576
+ del active_sessions[trace_id]
577
+
578
+
579
+ @app.get("/api/health")
580
+ async def health_check():
581
+ """Health check endpoint"""
582
+ return {"status": "healthy"}
583
+
584
+
585
+ if __name__ == "__main__":
586
+ import uvicorn
587
+
588
+ uvicorn.run(app, host="0.0.0.0", port=8000)
backend/uv.lock ADDED
The diff for this file is too large to render. See raw diff
 
eslint.config.js ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import js from "@eslint/js";
2
+ import globals from "globals";
3
+ import reactHooks from "eslint-plugin-react-hooks";
4
+ import reactRefresh from "eslint-plugin-react-refresh";
5
+ import tseslint from "typescript-eslint";
6
+
7
+ export default tseslint.config(
8
+ { ignores: ["dist"] },
9
+ {
10
+ extends: [js.configs.recommended, ...tseslint.configs.recommended],
11
+ files: ["**/*.{ts,tsx}"],
12
+ languageOptions: {
13
+ ecmaVersion: 2020,
14
+ globals: globals.browser,
15
+ },
16
+ plugins: {
17
+ "react-hooks": reactHooks,
18
+ "react-refresh": reactRefresh,
19
+ },
20
+ rules: {
21
+ ...reactHooks.configs.recommended.rules,
22
+ "react-refresh/only-export-components": ["warn", { allowConstantExport: true }],
23
+ "@typescript-eslint/no-unused-vars": "off",
24
+ },
25
+ },
26
+ );
fara/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from .fara_agent import FaraAgent
2
+ from .browser.playwright_controller import PlaywrightController
3
+
4
+ __all__ = ["FaraAgent", "PlaywrightController"]
fara/_prompts.py ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+
3
+ from typing import Union, Tuple
4
+
5
+ from .qwen_helpers.base_tool import BaseTool
6
+ from .qwen_helpers.fncall_prompt import NousFnCallPrompt
7
+ from .qwen_helpers.schema import (
8
+ ContentItem,
9
+ Message,
10
+ )
11
+
12
+ IMAGE_FACTOR = 28
13
+ MIN_PIXELS = 4 * 28 * 28
14
+ MAX_PIXELS = 16384 * 28 * 28
15
+ MAX_RATIO = 200
16
+
17
+
18
+ # @register_tool("computer_use")
19
+ class FaraComputerUse(BaseTool):
20
+ name = "computer_use"
21
+
22
+ @property
23
+ def description(self):
24
+ return f"""
25
+ Use a mouse and keyboard to interact with a computer, and take screenshots.
26
+ * This is an interface to a desktop GUI. You do not have access to a terminal or applications menu. You must click on desktop icons to start applications.
27
+ * Some applications may take time to start or process actions, so you may need to wait and take successive screenshots to see the results of your actions. E.g. if you click on Firefox and a window doesn't open, try wait and taking another screenshot.
28
+ * The screen's resolution is {self.display_width_px}x{self.display_height_px}.
29
+ * Whenever you intend to move the cursor to click on an element like an icon, you should consult a screenshot to determine the coordinates of the element before moving the cursor.
30
+ * If you tried clicking on a program or link but it failed to load, even after waiting, try adjusting your cursor position so that the tip of the cursor visually falls on the element that you want to click.
31
+ * Make sure to click any buttons, links, icons, etc with the cursor tip in the center of the element. Don't click boxes on their edges unless asked.
32
+ * When a separate scrollable container prominently overlays the webpage, if you want to scroll within it, you typically need to mouse_move() over it first and then scroll().
33
+ * If a popup window appears that you want to close, if left_click() on the 'X' or close button doesn't work, try key(keys=['Escape']) to close it.
34
+ * On some search bars, when you type(), you may need to press_enter=False and instead separately call left_click() on the search button to submit the search query. This is especially true of search bars that have auto-suggest popups for e.g. locations
35
+ * For calendar widgets, you usually need to left_click() on arrows to move between months and left_click() on dates to select them; type() is not typically used to input dates there.
36
+ """.strip()
37
+
38
+ parameters = {
39
+ "properties": {
40
+ "action": {
41
+ "description": """
42
+ The action to perform. The available actions are:
43
+ * `key`: Performs key down presses on the arguments passed in order, then performs key releases in reverse order. Includes "Enter", "Alt", "Shift", "Tab", "Control", "Backspace", "Delete", "Escape", "ArrowUp", "ArrowDown", "ArrowLeft", "ArrowRight", "PageDown", "PageUp", "Shift", etc.
44
+ * `type`: Type a string of text on the keyboard.
45
+ * `mouse_move`: Move the cursor to a specified (x, y) pixel coordinate on the screen.
46
+ * `left_click`: Click the left mouse button.
47
+ * `scroll`: Performs a scroll of the mouse scroll wheel.
48
+ * `visit_url`: Visit a specified URL.
49
+ * `web_search`: Perform a web search with a specified query.
50
+ * `history_back`: Go back to the previous page in the browser history.
51
+ * `pause_and_memorize_fact`: Pause and memorize a fact for future reference.
52
+ * `wait`: Wait specified seconds for the change to happen.
53
+ * `terminate`: Terminate the current task and report its completion status.
54
+ """.strip(),
55
+ "enum": [
56
+ "key",
57
+ "type",
58
+ "mouse_move",
59
+ "left_click",
60
+ "scroll",
61
+ "visit_url",
62
+ "web_search",
63
+ "history_back",
64
+ "pause_and_memorize_fact",
65
+ "wait",
66
+ "terminate",
67
+ ],
68
+ "type": "string",
69
+ },
70
+ "keys": {
71
+ "description": "Required only by `action=key`.",
72
+ "type": "array",
73
+ },
74
+ "text": {
75
+ "description": "Required only by `action=type`.",
76
+ "type": "string",
77
+ },
78
+ "press_enter": {
79
+ "description": "Whether to press the Enter key after typing. Required only by `action=type`.",
80
+ "type": "boolean",
81
+ },
82
+ "delete_existing_text": {
83
+ "description": "Whether to delete existing text before typing. Required only by `action=type`.",
84
+ "type": "boolean",
85
+ },
86
+ "coordinate": {
87
+ "description": "(x, y): The x (pixels from the left edge) and y (pixels from the top edge) coordinates to move the mouse to. Required only by `action=left_click`, `action=mouse_move`, and `action=type`.",
88
+ "type": "array",
89
+ },
90
+ "pixels": {
91
+ "description": "The amount of scrolling to perform. Positive values scroll up, negative values scroll down. Required only by `action=scroll`.",
92
+ "type": "number",
93
+ },
94
+ "url": {
95
+ "description": "The URL to visit. Required only by `action=visit_url`.",
96
+ "type": "string",
97
+ },
98
+ "query": {
99
+ "description": "The query to search for. Required only by `action=web_search`.",
100
+ "type": "string",
101
+ },
102
+ "fact": {
103
+ "description": "The fact to remember for the future. Required only by `action=pause_and_memorize_fact`.",
104
+ "type": "string",
105
+ },
106
+ "time": {
107
+ "description": "The seconds to wait. Required only by `action=wait`.",
108
+ "type": "number",
109
+ },
110
+ "status": {
111
+ "description": "The status of the task. Required only by `action=terminate`.",
112
+ "type": "string",
113
+ "enum": ["success", "failure"],
114
+ },
115
+ },
116
+ "required": ["action"],
117
+ "type": "object",
118
+ }
119
+
120
+ def __init__(self, cfg=None):
121
+ self.display_width_px = cfg["display_width_px"]
122
+ self.display_height_px = cfg["display_height_px"]
123
+ include_input_text_key_args = cfg.pop("include_input_text_key_args", False)
124
+ if not include_input_text_key_args:
125
+ self.parameters["properties"].pop("press_enter", None)
126
+ self.parameters["properties"].pop("delete_existing_text", None)
127
+ super().__init__(cfg)
128
+
129
+ def call(self, params: Union[str, dict], **kwargs):
130
+ params = self._verify_json_format_args(params)
131
+ action = params["action"]
132
+ if action == "key":
133
+ return self._key(params["text"])
134
+ elif action == "click":
135
+ return self._click(coordinate=params["coordinate"])
136
+ elif action == "long_press":
137
+ return self._long_press(
138
+ coordinate=params["coordinate"], time=params["time"]
139
+ )
140
+ elif action == "swipe":
141
+ return self._swipe(
142
+ coordinate=params["coordinate"], coordinate2=params["coordinate2"]
143
+ )
144
+ elif action == "type":
145
+ return self._type(params["text"])
146
+ elif action == "system_button":
147
+ return self._system_button(params["button"])
148
+ elif action == "open":
149
+ return self._open(params["text"])
150
+ elif action == "wait":
151
+ return self._wait(params["time"])
152
+ elif action == "terminate":
153
+ return self._terminate(params["status"])
154
+ else:
155
+ raise ValueError(f"Unknown action: {action}")
156
+
157
+ def _key(self, text: str):
158
+ raise NotImplementedError()
159
+
160
+ def _click(self, coordinate: Tuple[int, int]):
161
+ raise NotImplementedError()
162
+
163
+ def _long_press(self, coordinate: Tuple[int, int], time: int):
164
+ raise NotImplementedError()
165
+
166
+ def _swipe(self, coordinate: Tuple[int, int], coordinate2: Tuple[int, int]):
167
+ raise NotImplementedError()
168
+
169
+ def _type(self, text: str):
170
+ raise NotImplementedError()
171
+
172
+ def _system_button(self, button: str):
173
+ raise NotImplementedError()
174
+
175
+ def _open(self, text: str):
176
+ raise NotImplementedError()
177
+
178
+ def _wait(self, time: int):
179
+ raise NotImplementedError()
180
+
181
+ def _terminate(self, status: str):
182
+ raise NotImplementedError()
183
+
184
+
185
+ def round_by_factor(number: int, factor: int) -> int:
186
+ """Returns the closest integer to 'number' that is divisible by 'factor'."""
187
+ return round(number / factor) * factor
188
+
189
+
190
+ def ceil_by_factor(number: int, factor: int) -> int:
191
+ """Returns the smallest integer greater than or equal to 'number' that is divisible by 'factor'."""
192
+ return math.ceil(number / factor) * factor
193
+
194
+
195
+ def floor_by_factor(number: int, factor: int) -> int:
196
+ """Returns the largest integer less than or equal to 'number' that is divisible by 'factor'."""
197
+ return math.floor(number / factor) * factor
198
+
199
+
200
+ def smart_resize(
201
+ height: int,
202
+ width: int,
203
+ factor: int = IMAGE_FACTOR,
204
+ min_pixels: int = MIN_PIXELS,
205
+ max_pixels: int = MAX_PIXELS,
206
+ ) -> tuple[int, int]:
207
+ """
208
+ Rescales the image so that the following conditions are met:
209
+
210
+ 1. Both dimensions (height and width) are divisible by 'factor'.
211
+
212
+ 2. The total number of pixels is within the range ['min_pixels', 'max_pixels'].
213
+
214
+ 3. The aspect ratio of the image is maintained as closely as possible.
215
+ """
216
+ if max(height, width) / min(height, width) > MAX_RATIO:
217
+ raise ValueError(
218
+ f"absolute aspect ratio must be smaller than {MAX_RATIO}, got {max(height, width) / min(height, width)}"
219
+ )
220
+ h_bar = max(factor, round_by_factor(height, factor))
221
+ w_bar = max(factor, round_by_factor(width, factor))
222
+ if h_bar * w_bar > max_pixels:
223
+ beta = math.sqrt((height * width) / max_pixels)
224
+ h_bar = floor_by_factor(height / beta, factor)
225
+ w_bar = floor_by_factor(width / beta, factor)
226
+ elif h_bar * w_bar < min_pixels:
227
+ beta = math.sqrt(min_pixels / (height * width))
228
+ h_bar = ceil_by_factor(height * beta, factor)
229
+ w_bar = ceil_by_factor(width * beta, factor)
230
+ return h_bar, w_bar
231
+
232
+
233
+ def get_computer_use_system_prompt(
234
+ image,
235
+ processor_im_cfg,
236
+ include_input_text_key_args=False,
237
+ fn_call_template="default",
238
+ ):
239
+ patch_size = processor_im_cfg["patch_size"]
240
+ merge_size = processor_im_cfg["merge_size"]
241
+ min_pixels = processor_im_cfg["min_pixels"]
242
+ max_pixels = processor_im_cfg["max_pixels"]
243
+
244
+ resized_height, resized_width = smart_resize(
245
+ image.height,
246
+ image.width,
247
+ factor=patch_size * merge_size,
248
+ min_pixels=min_pixels,
249
+ max_pixels=max_pixels,
250
+ )
251
+
252
+ computer_use = FaraComputerUse(
253
+ cfg={
254
+ "display_width_px": resized_width,
255
+ "display_height_px": resized_height,
256
+ "include_input_text_key_args": include_input_text_key_args,
257
+ }
258
+ )
259
+
260
+ conversation = NousFnCallPrompt(
261
+ template_name=fn_call_template
262
+ ).preprocess_fncall_messages(
263
+ messages=[
264
+ Message(
265
+ role="system",
266
+ content=[ContentItem(text="You are a helpful assistant.")],
267
+ ),
268
+ ],
269
+ functions=[computer_use.function],
270
+ lang=None,
271
+ )
272
+
273
+ return {
274
+ "conversation": [msg.model_dump() for msg in conversation],
275
+ "im_size": (resized_width, resized_height),
276
+ }
fara/browser/__init__.py ADDED
File without changes
fara/browser/browser_bb.py ADDED
@@ -0,0 +1,353 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import atexit
3
+ import logging
4
+ import os
5
+ import platform
6
+ import signal
7
+ import subprocess
8
+ import time
9
+ from typing import Any, Callable, Dict, Optional
10
+
11
+ from playwright.async_api import (
12
+ BrowserContext,
13
+ Download,
14
+ Page,
15
+ Playwright,
16
+ async_playwright,
17
+ )
18
+
19
+ from .playwright_controller import PlaywrightController
20
+
21
+
22
+ class BrowserBB:
23
+ """Manages browser instance, context, and page lifecycle."""
24
+
25
+ def __init__(
26
+ self,
27
+ viewport_height: int,
28
+ viewport_width: int,
29
+ headless: bool,
30
+ page_script_path: str,
31
+ browser_channel: str = "firefox",
32
+ browser_data_dir: str | None = None,
33
+ downloads_folder: str | None = None,
34
+ to_resize_viewport: bool = True,
35
+ single_tab_mode: bool = True,
36
+ animate_actions: bool = False,
37
+ use_browser_base: bool = False,
38
+ logger: Optional[logging.Logger] = None,
39
+ ):
40
+ self.headless = headless
41
+ self.page_script_path = page_script_path
42
+ self.browser_channel = browser_channel
43
+ self.browser_data_dir = browser_data_dir
44
+ self.downloads_folder = downloads_folder
45
+ self.to_resize_viewport = to_resize_viewport
46
+ self.animate_actions = animate_actions
47
+ self.single_tab_mode = single_tab_mode
48
+ self.use_browser_base = use_browser_base
49
+ self.logger = logger or logging.getLogger("browser_manager")
50
+
51
+ self._viewport_height = viewport_height
52
+ self._viewport_width = viewport_width
53
+
54
+ # check _viewport_width and _viewport_height are positive integers
55
+ if not isinstance(self._viewport_width, int) or self._viewport_width <= 0:
56
+ raise ValueError(
57
+ f"Error: Browser_manager.Browser: Invalid viewport width: {self._viewport_width}. Must be a positive integer."
58
+ )
59
+ if not isinstance(self._viewport_height, int) or self._viewport_height <= 0:
60
+ raise ValueError(
61
+ f"Error: Browser_manager.Browser:Invalid viewport height: {self._viewport_height}. Must be a positive integer."
62
+ )
63
+ assert isinstance(self.headless, bool), (
64
+ f"Error: Browser_manager.Browser: headless must be a boolean, got {type(self.headless)}"
65
+ )
66
+ if page_script_path is None:
67
+ page_script_path = os.path.join(
68
+ os.path.abspath(os.path.dirname(__file__)), "page_script.js"
69
+ )
70
+ self.page_script_path = page_script_path
71
+ assert isinstance(page_script_path, str), (
72
+ f"Error: Browser_manager.Browser: page_script_path must be a string, got {type(self.page_script_path)}"
73
+ )
74
+ assert os.path.exists(self.page_script_path), (
75
+ f"Error: Browser_manager.Browser: page_script_path does not exist: {self.page_script_path}"
76
+ )
77
+
78
+ assert isinstance(self.browser_channel, str) and (
79
+ self.browser_channel in ["chromium", "firefox", "webkit"]
80
+ ), (
81
+ f"Error: Browser_manager.Browser: browser_channel must be one of ['chromium', 'firefox', 'webkit'], got {self.browser_channel}"
82
+ )
83
+
84
+ # Browser-related instances
85
+ self._playwright: Playwright | None = None
86
+ self._context: BrowserContext | None = None
87
+ self._page: Page | None = None
88
+ self.browser = None
89
+ self.session = None
90
+ self.xvfb_process = None
91
+
92
+ # Events and handlers
93
+ self._captcha_event = asyncio.Event()
94
+ self._captcha_event.set() # Initially set (no captcha)
95
+ self._download_handler: Callable[[Download], None] | None = None
96
+
97
+ self._playwright_controller = PlaywrightController(
98
+ animate_actions=self.animate_actions,
99
+ downloads_folder=self.downloads_folder,
100
+ viewport_width=self._viewport_width,
101
+ viewport_height=self._viewport_height,
102
+ _download_handler=self._download_handler,
103
+ to_resize_viewport=self.to_resize_viewport,
104
+ single_tab_mode=self.single_tab_mode,
105
+ logger=self.logger,
106
+ )
107
+
108
+ def set_download_handler(self, handler: Callable[[Download], None]) -> None:
109
+ """Set the download handler for the browser."""
110
+ self._download_handler = handler
111
+ self._playwright_controller._download_handler = handler
112
+
113
+ def set_captcha_solved_callback(self, callback: Callable[[bool], None]) -> None:
114
+ """Set callback to be called when captcha status changes."""
115
+ self._captcha_solved_callback = callback
116
+
117
+ async def init(
118
+ self,
119
+ start_page: str,
120
+ shared_data_point=None, # For captcha tracking
121
+ ) -> None:
122
+ """Initialize the browser, context, and page."""
123
+ self._playwright = await async_playwright().start()
124
+ self.shared_data_point = shared_data_point
125
+
126
+ if self.use_browser_base:
127
+ await self._init_browser_base(self.shared_data_point)
128
+ elif self.browser_data_dir is None:
129
+ await self._init_regular_browser(channel=self.browser_channel)
130
+ else:
131
+ await self._init_persistent_browser()
132
+
133
+ # Common setup for all browser types
134
+ await self._setup_common_browser_features(start_page)
135
+
136
+ async def _init_browser_base(self, shared_data_point) -> None:
137
+ """Initialize BrowserBase connection, defaults to chromium."""
138
+ # Lazy import browserbase only when needed
139
+ import browserbase
140
+ from browserbase import Browserbase
141
+
142
+ self.logger.info("Initializing BrowserBase session...")
143
+ self.bb = Browserbase(api_key=os.environ["BROWSERBASE_API_KEY"])
144
+
145
+ while True: # Wait indefinitely until we get a session
146
+ try:
147
+ self.session = self.bb.sessions.create(
148
+ project_id=os.environ["BROWSERBASE_PROJECT_ID"],
149
+ proxies=True,
150
+ browser_settings={"advanced_stealth": True},
151
+ keep_alive=True,
152
+ timeout=7200, # 2 hour timeout
153
+ region="us-east-1",
154
+ )
155
+ break
156
+ except browserbase.RateLimitError:
157
+ self.logger.warning(
158
+ "Rate limit exceeded while trying to create BrowserBase session. Retrying in 10 seconds..."
159
+ )
160
+ await asyncio.sleep(10)
161
+
162
+ assert self.session.id is not None
163
+ assert self.session.status == "RUNNING", (
164
+ f"Session status is {self.session.status}"
165
+ )
166
+
167
+ chromium = self._playwright.chromium
168
+ self.browser = await chromium.connect_over_cdp(self.session.connect_url)
169
+ self.logger.info(
170
+ f"Connected to BrowserBase session: https://browserbase.com/sessions/{self.session.id}"
171
+ )
172
+
173
+ self._context = self.browser.contexts[0]
174
+ assert len(self._context.pages) == 1
175
+ self._page = self._context.pages[0]
176
+
177
+ # Set up captcha handling
178
+ def handle_console(msg):
179
+ """Handle captcha detection and solving."""
180
+ if msg.text == "browserbase-solving-started":
181
+ self.logger.info("Captcha Solving In Progress!!")
182
+ if shared_data_point:
183
+ shared_data_point.set_encountered_captcha(True)
184
+ self._captcha_event.clear() # Block execution
185
+ elif msg.text == "browserbase-solving-finished":
186
+ self.logger.info("Captcha Solving Completed!!")
187
+
188
+ async def delayed_resume():
189
+ await asyncio.sleep(3) # Wait for navigation to settle
190
+ await self._page.wait_for_load_state("networkidle")
191
+ self._captcha_event.set()
192
+
193
+ asyncio.create_task(delayed_resume())
194
+
195
+ self._context.on("console", handle_console)
196
+ self._page.on("console", handle_console)
197
+
198
+ async def _init_regular_browser(self, channel: str = "chromium") -> None:
199
+ """Initialize regular browser according to the specified channel."""
200
+ if not self.headless:
201
+ self.start_xvfb()
202
+
203
+ launch_args: Dict[str, Any] = {"headless": self.headless}
204
+
205
+ if channel == "chromium":
206
+ self.browser = await self._playwright.chromium.launch(**launch_args)
207
+ elif channel == "firefox":
208
+ self.browser = await self._playwright.firefox.launch(**launch_args)
209
+ elif channel == "webkit":
210
+ self.browser = await self._playwright.webkit.launch(**launch_args)
211
+ else:
212
+ raise ValueError(
213
+ f"Unsupported browser channel: {channel}. Supported channels are 'chromium', 'firefox', and 'webkit'."
214
+ )
215
+
216
+ self._context = await self.browser.new_context(
217
+ user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0"
218
+ )
219
+
220
+ self._page = await self._context.new_page()
221
+
222
+ async def _init_persistent_browser(self) -> None:
223
+ """Initialize persistent browser with data directory."""
224
+ if not self.headless:
225
+ self.start_xvfb()
226
+
227
+ launch_args: Dict[str, Any] = {"headless": self.headless}
228
+ self._context = await self._playwright.chromium.launch_persistent_context(
229
+ self.browser_data_dir, **launch_args
230
+ )
231
+ self._page = await self._context.new_page()
232
+
233
+ async def _setup_common_browser_features(self, start_page: str) -> None:
234
+ """Set up features common to all browser types."""
235
+ self._context.set_default_timeout(60000) # One minute
236
+ await self._playwright_controller.on_new_page(self._page)
237
+ assert self._page is not None
238
+
239
+ # Set up new page handling for single tab mode
240
+ if self.single_tab_mode:
241
+ self._context.on(
242
+ "page", lambda new_pg: self._handle_new_page_safe(new_pg, self._page)
243
+ )
244
+
245
+ # Set up download handler
246
+ if self._download_handler:
247
+ self._page.on("download", self._download_handler)
248
+
249
+ # Set viewport and add init script
250
+ await self._page.set_viewport_size(
251
+ {"width": self._viewport_width, "height": self._viewport_height}
252
+ )
253
+
254
+ await self._page.add_init_script(path=self.page_script_path)
255
+
256
+ # Navigate to start page
257
+ await self._page.goto(start_page)
258
+ await self._page.wait_for_load_state()
259
+
260
+ async def _handle_new_page_safe(self, new_pg: Page, main_page: Page) -> None:
261
+ """Safely handle new pages in single tab mode."""
262
+ try:
263
+ await new_pg.wait_for_load_state("domcontentloaded")
264
+
265
+ # Do not close if new_pg is the current page
266
+ if new_pg == main_page or new_pg.url == main_page.url:
267
+ self.logger.info("New tab is same as current page, not closing.")
268
+ return
269
+
270
+ new_url = new_pg.url
271
+ await new_pg.close()
272
+ await self._playwright_controller.visit_page(main_page, new_url)
273
+ except Exception as e:
274
+ self.logger.warning(f"Error in handle_new_page_safe: {e}")
275
+
276
+ def start_xvfb(self) -> None:
277
+ """Start Xvfb virtual display server (Linux only)."""
278
+ # Xvfb is only needed on Linux - macOS and Windows have native display support
279
+ if platform.system() != "Linux":
280
+ return
281
+
282
+ display_num = 99 # Choose a display number unlikely to be in use
283
+ self.xvfb_process = subprocess.Popen(
284
+ ["Xvfb", f":{display_num}", "-screen", "0", "1280x1024x24", "-ac"],
285
+ stdout=subprocess.DEVNULL,
286
+ stderr=subprocess.DEVNULL,
287
+ )
288
+ os.environ["DISPLAY"] = f":{display_num}"
289
+
290
+ # Make sure Xvfb has time to start
291
+ time.sleep(1)
292
+
293
+ # Register cleanup function
294
+ atexit.register(self.stop_xvfb)
295
+
296
+ def stop_xvfb(self) -> None:
297
+ """Stop the Xvfb process if it's running."""
298
+ if self.xvfb_process:
299
+ self.xvfb_process.send_signal(signal.SIGTERM)
300
+ self.xvfb_process.wait()
301
+ self.xvfb_process = None
302
+
303
+ async def wait_for_captcha_resolution(self) -> None:
304
+ """Wait for captcha to be resolved if one is being solved."""
305
+ await self._captcha_event.wait()
306
+
307
+ @property
308
+ def page(self) -> Page | None:
309
+ """Get the current page."""
310
+ return self._page
311
+
312
+ @page.setter
313
+ def page(self, value):
314
+ self._page = value
315
+
316
+ @property
317
+ def context(self) -> BrowserContext | None:
318
+ """Get the browser context."""
319
+ return self._context
320
+
321
+ @property
322
+ def playwright_controller(self):
323
+ """Get the playwright controller."""
324
+ return self._playwright_controller
325
+
326
+ async def close(self) -> None:
327
+ """Close the browser and clean up resources."""
328
+ self.logger.info("Closing browser...")
329
+
330
+ if self._page is not None:
331
+ await self._page.close()
332
+ self._page = None
333
+
334
+ if self._context is not None:
335
+ await self._context.close()
336
+ self._context = None
337
+
338
+ if self._playwright is not None:
339
+ await self._playwright.stop()
340
+ self._playwright = None
341
+
342
+ if self.browser:
343
+ if self.use_browser_base and self.session:
344
+ self.bb.sessions.update(
345
+ self.session.id,
346
+ status="REQUEST_RELEASE",
347
+ project_id=os.environ["BROWSERBASE_PROJECT_ID"],
348
+ )
349
+ await self.browser.close()
350
+ self.browser = None
351
+
352
+ if not self.headless:
353
+ self.stop_xvfb()
fara/browser/page_script.js ADDED
@@ -0,0 +1,610 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ var MultimodalWebSurfer = MultimodalWebSurfer || (function() {
2
+ let nextLabel = 10;
3
+
4
+ let roleMapping = {
5
+ "a": "link",
6
+ "area": "link",
7
+ "button": "button",
8
+ "input, type=button": "button",
9
+ "input, type=checkbox": "checkbox",
10
+ "input, type=email": "textbox",
11
+ "input, type=number": "spinbutton",
12
+ "input, type=radio": "radio",
13
+ "input, type=range": "slider",
14
+ "input, type=reset": "button",
15
+ "input, type=search": "searchbox",
16
+ "input, type=submit": "button",
17
+ "input, type=tel": "textbox",
18
+ "input, type=text": "textbox",
19
+ "input, type=url": "textbox",
20
+ "search": "search",
21
+ "select": "combobox",
22
+ "option": "option",
23
+ "textarea": "textbox"
24
+ };
25
+
26
+ let getCursor = function (elm) {
27
+ return window.getComputedStyle(elm)["cursor"];
28
+ };
29
+
30
+ let isVisible = function (element) {
31
+ return !!(element.offsetWidth || element.offsetHeight || element.getClientRects().length);
32
+ };
33
+
34
+ /**
35
+ * Finds interactive elements in the regular DOM (excluding Shadow DOM)
36
+ * Looks for elements that are:
37
+ * 1. Standard interactive elements (inputs, buttons, links)
38
+ * 2. Elements with ARIA roles indicating interactivity
39
+ * 3. Elements with cursor styles suggesting interactivity
40
+ *
41
+ * @returns {Array} Array of DOM elements that are deemed interactive
42
+ */
43
+ let getInteractiveElementsNoShaddow = function () {
44
+ let results = []
45
+ let roles = ["scrollbar", "searchbox", "slider", "spinbutton", "switch", "tab", "treeitem", "button", "checkbox", "gridcell", "link", "menuitem", "menuitemcheckbox", "menuitemradio", "option", "progressbar", "radio", "textbox", "combobox", "menu", "tree", "treegrid", "grid", "listbox", "radiogroup", "widget"];
46
+ let inertCursors = ["auto", "default", "none", "text", "vertical-text", "not-allowed", "no-drop"];
47
+
48
+ // Get the main interactive elements
49
+ let nodeList = document.querySelectorAll("input, select, textarea, button, [href], [onclick], [contenteditable], [tabindex]:not([tabindex='-1'])");
50
+ for (let i = 0; i < nodeList.length; i++) { // Copy to something mutable
51
+ // make sure not disabled and visible
52
+ if (nodeList[i].disabled || !isVisible(nodeList[i])) {
53
+ continue;
54
+ }
55
+ results.push(nodeList[i]);
56
+ }
57
+
58
+ // Anything not already included that has a suitable role
59
+ nodeList = document.querySelectorAll("[role]");
60
+ for (let i = 0; i < nodeList.length; i++) { // Copy to something mutable
61
+ // make sure not disabled and visible
62
+ if (nodeList[i].disabled || !isVisible(nodeList[i])) {
63
+ continue;
64
+ }
65
+ if (results.indexOf(nodeList[i]) == -1) {
66
+ let role = nodeList[i].getAttribute("role");
67
+ if (roles.indexOf(role) > -1) {
68
+ results.push(nodeList[i]);
69
+ }
70
+ }
71
+ }
72
+
73
+ // Any element that changes the cursor to something implying interactivity
74
+ nodeList = document.querySelectorAll("*");
75
+ for (let i = 0; i < nodeList.length; i++) {
76
+ let node = nodeList[i];
77
+ if (node.disabled || !isVisible(node)) {
78
+ continue;
79
+ }
80
+
81
+ // Cursor is default, or does not suggest interactivity
82
+ let cursor = getCursor(node);
83
+ if (inertCursors.indexOf(cursor) >= 0) {
84
+ continue;
85
+ }
86
+
87
+ // Move up to the first instance of this cursor change
88
+ let parent = node.parentNode;
89
+ while (parent && getCursor(parent) == cursor) {
90
+ node = parent;
91
+ parent = node.parentNode;
92
+ }
93
+
94
+ // Add the node if it is new
95
+ if (results.indexOf(node) == -1) {
96
+ results.push(node);
97
+ }
98
+ }
99
+
100
+ return results;
101
+ };
102
+
103
+ /**
104
+ * Recursively gathers elements matching specified roles from both regular DOM and Shadow DOM
105
+ * @param {Array} roles - Array of role selectors to match
106
+ * @param {Document|ShadowRoot} root - Root element to start search from
107
+ * @returns {Array} Array of matching elements
108
+ */
109
+ function gatherAllElements(roles, root = document) {
110
+ const elements = [];
111
+ const stack = [root];
112
+ const selector = roles.join(",");
113
+
114
+ while (stack.length > 0) {
115
+ const currentRoot = stack.pop();
116
+
117
+ // Add elements at current level
118
+ elements.push(...Array.from(currentRoot.querySelectorAll(selector)));
119
+
120
+ // Add shadow roots to stack
121
+ currentRoot.querySelectorAll("*").forEach(el => {
122
+ if (el.shadowRoot && el.shadowRoot.mode === "open") {
123
+ stack.push(el.shadowRoot);
124
+ }
125
+ });
126
+ }
127
+
128
+ return elements;
129
+ }
130
+
131
+ /**
132
+ * Gets all interactive elements from both regular DOM and Shadow DOM
133
+ * Filters elements to ensure they are visible and accessible
134
+ * @returns {Array} Array of interactive elements
135
+ */
136
+ let getInteractiveElements = function () {
137
+ // Get all elements that are interactive without the shadow DOM
138
+ const interactive_roles = ["input", "option", "select", "textarea", "button", "href", "onclick", "contenteditable", "tabindex:not([tabindex='-1'])"];
139
+
140
+ let results = [];
141
+
142
+ let elements_no_shaddow = getInteractiveElementsNoShaddow();
143
+ for (let i = 0; i < elements_no_shaddow.length; i++) {
144
+ if (results.indexOf(elements_no_shaddow[i]) == -1) {
145
+ // check if it has a rect
146
+ let rects = elements_no_shaddow[i].getClientRects();
147
+ for (const rect of rects) {
148
+ let x = rect.left + rect.width / 2;
149
+ let y = rect.top + rect.height / 2;
150
+ if (isTopmost(elements_no_shaddow[i], x, y)) {
151
+ results.push(elements_no_shaddow[i]);
152
+ break;
153
+ }
154
+ }
155
+ }
156
+ }
157
+
158
+ // From the shadow DOM get all interactive elements and options that are not in the no shadow list
159
+ let elements_all = gatherAllElements(interactive_roles);
160
+
161
+ // Filter and process interactive elements
162
+ elements_all.forEach(element => {
163
+ // if file, auto add
164
+ if (element.tagName.toLowerCase() === "input" && element.getAttribute("type") == "file") {
165
+ results.push(element);
166
+ return;
167
+ }
168
+ // if option auto add
169
+ if (element.tagName.toLowerCase() === "option") {
170
+ results.push(element);
171
+ return;
172
+ }
173
+ if (element.disabled || !isVisible(element)) {
174
+ return;
175
+ }
176
+
177
+ if (interactive_roles.includes(element.tagName.toLowerCase())) {
178
+ results.push(element);
179
+ }
180
+ });
181
+
182
+ return results;
183
+ };
184
+
185
+ /**
186
+ * Assigns unique identifiers to interactive elements
187
+ * @param {Array} elements - Array of elements to label
188
+ * @returns {Array} Updated array of interactive elements
189
+ */
190
+ let labelElements = function (elements) {
191
+ for (let i = 0; i < elements.length; i++) {
192
+ if (!elements[i].hasAttribute("__elementId")) {
193
+ elements[i].setAttribute("__elementId", "" + (nextLabel++));
194
+ }
195
+ }
196
+ return getInteractiveElements();
197
+ };
198
+
199
+ /**
200
+ * Checks if an element is the topmost element at given coordinates
201
+ * @param {Element} element - Element to check
202
+ * @param {number} x - X coordinate
203
+ * @param {number} y - Y coordinate
204
+ * @returns {boolean} True if element is topmost at coordinates
205
+ */
206
+ let isTopmost = function (element, x, y) {
207
+ let hit = document.elementFromPoint(x, y);
208
+
209
+ // Hack to handle elements outside the viewport
210
+ if (hit === null) {
211
+ return true;
212
+ }
213
+
214
+ while (hit) {
215
+ if (hit == element) return true;
216
+ hit = hit.parentNode;
217
+ }
218
+ return false;
219
+ };
220
+
221
+ let getFocusedElementId = function () {
222
+ let elm = document.activeElement;
223
+ while (elm) {
224
+ if (elm.hasAttribute && elm.hasAttribute("__elementId")) {
225
+ return elm.getAttribute("__elementId");
226
+ }
227
+ elm = elm.parentNode;
228
+ }
229
+ return null;
230
+ };
231
+
232
+ let trimmedInnerText = function (element) {
233
+ if (!element) {
234
+ return "";
235
+ }
236
+ let text = element.innerText;
237
+ if (!text) {
238
+ return "";
239
+ }
240
+ return text.trim();
241
+ };
242
+
243
+ let getApproximateAriaName = function (element) {
244
+ if (element.hasAttribute("aria-label")) {
245
+ return element.getAttribute("aria-label");
246
+ }
247
+
248
+ // check if element has span that is called label and grab the inner text
249
+ if (element.querySelector("span.label")) {
250
+ return element.querySelector("span.label").innerText;
251
+ }
252
+
253
+ // Check for aria labels
254
+ if (element.hasAttribute("aria-labelledby")) {
255
+ let buffer = "";
256
+ let ids = element.getAttribute("aria-labelledby").split(" ");
257
+ for (let i = 0; i < ids.length; i++) {
258
+ let label = document.getElementById(ids[i]);
259
+ if (label) {
260
+ buffer = buffer + " " + trimmedInnerText(label);
261
+ }
262
+ }
263
+ return buffer.trim();
264
+ }
265
+
266
+ if (element.hasAttribute("aria-label")) {
267
+ return element.getAttribute("aria-label");
268
+ }
269
+
270
+ // Check for labels
271
+ if (element.hasAttribute("id")) {
272
+ let label_id = element.getAttribute("id");
273
+ let label = "";
274
+ try {
275
+ // Escape special characters in the ID
276
+ let escaped_id = CSS.escape(label_id);
277
+ let labels = document.querySelectorAll(`label[for="${escaped_id}"]`);
278
+ for (let j = 0; j < labels.length; j++) {
279
+ label += labels[j].innerText + " ";
280
+ }
281
+ label = label.trim();
282
+ if (label != "") {
283
+ return label;
284
+ }
285
+ } catch (e) {
286
+ console.warn("Error finding label for element:", e);
287
+ }
288
+ }
289
+
290
+ if (element.hasAttribute("name")) {
291
+ return element.getAttribute("name");
292
+ }
293
+
294
+ if (element.parentElement && element.parentElement.tagName == "LABEL") {
295
+ return element.parentElement.innerText;
296
+ }
297
+
298
+ // Check for alt text or titles
299
+ if (element.hasAttribute("alt")) {
300
+ return element.getAttribute("alt")
301
+ }
302
+
303
+ if (element.hasAttribute("title")) {
304
+ return element.getAttribute("title")
305
+ }
306
+
307
+ return trimmedInnerText(element);
308
+ };
309
+
310
+ let getApproximateAriaRole = function (element) {
311
+ let tag = element.tagName.toLowerCase();
312
+ if (tag == "input" && element.hasAttribute("type")) {
313
+ tag = tag + ", type=" + element.getAttribute("type");
314
+ }
315
+
316
+ if (element.hasAttribute("role")) {
317
+ return [element.getAttribute("role"), tag];
318
+ }
319
+ else if (tag in roleMapping) {
320
+ return [roleMapping[tag], tag];
321
+ }
322
+ else {
323
+ return ["", tag];
324
+ }
325
+ };
326
+
327
+ /**
328
+ * Gets information about all interactive elements including their:
329
+ * - Position and dimensions
330
+ * - ARIA roles and names
331
+ * - Tag names
332
+ * - Scrollability
333
+ *
334
+ * @returns {Object} Map of element IDs to their properties
335
+ */
336
+ let getInteractiveRects = function () {
337
+ let elements = labelElements(getInteractiveElements());
338
+ let results = {};
339
+ for (let i = 0; i < elements.length; i++) {
340
+ let key = elements[i].getAttribute("__elementId");
341
+ let rects = elements[i].getBoundingClientRect();
342
+
343
+ // Skip options unless their select is focused
344
+ if (elements[i].tagName.toLowerCase() === "option") {
345
+
346
+ let select_focused = false;
347
+ let select = elements[i].closest("select");
348
+ if (select && select.hasAttribute("__elementId") &&
349
+ getFocusedElementId() === select.getAttribute("__elementId")) {
350
+ select_focused = true;
351
+ }
352
+ // check if option is visible without select being focused
353
+ let option_visible = false;
354
+ if (isVisible(elements[i])) {
355
+ option_visible = true;
356
+ }
357
+ // check if select is expanded even if not focused
358
+ let select_expanded = false;
359
+ if (select && select.hasAttribute("open")) {
360
+ select_expanded = true;
361
+ }
362
+ if (!(select_focused || option_visible || select_expanded)) {
363
+ continue;
364
+ }
365
+ }
366
+
367
+ let ariaRole = getApproximateAriaRole(elements[i]);
368
+ let ariaName = getApproximateAriaName(elements[i]);
369
+ let vScrollable = elements[i].scrollHeight - elements[i].clientHeight >= 1;
370
+
371
+ let record = {
372
+ "tag_name": ariaRole[1],
373
+ "role": ariaRole[0],
374
+ "aria-name": ariaName,
375
+ "v-scrollable": vScrollable,
376
+ "rects": []
377
+ };
378
+
379
+ if (rects.length > 0) {
380
+ for (const rect of rects) {
381
+ let x = rect.left + rect.width / 2;
382
+ let y = rect.top + rect.height / 2;
383
+ if (isTopmost(elements[i], x, y)) {
384
+ record["rects"].push(JSON.parse(JSON.stringify(rect)));
385
+ }
386
+ }
387
+ }
388
+ else {
389
+ record["rects"].push(JSON.parse(JSON.stringify(rects)));
390
+ }
391
+
392
+ results[key] = record;
393
+ }
394
+ return results;
395
+ };
396
+
397
+ /**
398
+ * Gets current viewport information including dimensions and scroll positions
399
+ * @returns {Object} Viewport properties
400
+ */
401
+ let getVisualViewport = function () {
402
+ let vv = window.visualViewport;
403
+ let de = document.documentElement;
404
+ return {
405
+ "height": vv ? vv.height : 0,
406
+ "width": vv ? vv.width : 0,
407
+ "offsetLeft": vv ? vv.offsetLeft : 0,
408
+ "offsetTop": vv ? vv.offsetTop : 0,
409
+ "pageLeft": vv ? vv.pageLeft : 0,
410
+ "pageTop": vv ? vv.pageTop : 0,
411
+ "scale": vv ? vv.scale : 0,
412
+ "clientWidth": de ? de.clientWidth : 0,
413
+ "clientHeight": de ? de.clientHeight : 0,
414
+ "scrollWidth": de ? de.scrollWidth : 0,
415
+ "scrollHeight": de ? de.scrollHeight : 0
416
+ };
417
+ };
418
+
419
+ let _getMetaTags = function () {
420
+ let meta = document.querySelectorAll("meta");
421
+ let results = {};
422
+ for (let i = 0; i < meta.length; i++) {
423
+ let key = null;
424
+ if (meta[i].hasAttribute("name")) {
425
+ key = meta[i].getAttribute("name");
426
+ }
427
+ else if (meta[i].hasAttribute("property")) {
428
+ key = meta[i].getAttribute("property");
429
+ }
430
+ else {
431
+ continue;
432
+ }
433
+ if (meta[i].hasAttribute("content")) {
434
+ results[key] = meta[i].getAttribute("content");
435
+ }
436
+ }
437
+ return results;
438
+ };
439
+
440
+ let _getJsonLd = function () {
441
+ let jsonld = [];
442
+ let scripts = document.querySelectorAll('script[type="application/ld+json"]');
443
+ for (let i = 0; i < scripts.length; i++) {
444
+ jsonld.push(scripts[i].innerHTML.trim());
445
+ }
446
+ return jsonld;
447
+ };
448
+
449
+ // From: https://www.stevefenton.co.uk/blog/2022/12/parse-microdata-with-javascript/
450
+ let _getMicrodata = function () {
451
+ function sanitize(input) {
452
+ return input.replace(/\s/gi, ' ').trim();
453
+ }
454
+
455
+ function addValue(information, name, value) {
456
+ if (information[name]) {
457
+ if (typeof information[name] === 'array') {
458
+ information[name].push(value);
459
+ } else {
460
+ const arr = [];
461
+ arr.push(information[name]);
462
+ arr.push(value);
463
+ information[name] = arr;
464
+ }
465
+ } else {
466
+ information[name] = value;
467
+ }
468
+ }
469
+
470
+ function traverseItem(item, information) {
471
+ const children = item.children;
472
+
473
+ for (let i = 0; i < children.length; i++) {
474
+ const child = children[i];
475
+
476
+ if (child.hasAttribute('itemscope')) {
477
+ if (child.hasAttribute('itemprop')) {
478
+ const itemProp = child.getAttribute('itemprop');
479
+ const itemType = child.getAttribute('itemtype');
480
+
481
+ const childInfo = {
482
+ itemType: itemType
483
+ };
484
+
485
+ traverseItem(child, childInfo);
486
+
487
+ itemProp.split(' ').forEach(propName => {
488
+ addValue(information, propName, childInfo);
489
+ });
490
+ }
491
+
492
+ } else if (child.hasAttribute('itemprop')) {
493
+ const itemProp = child.getAttribute('itemprop');
494
+ itemProp.split(' ').forEach(propName => {
495
+ if (propName === 'url') {
496
+ addValue(information, propName, child.href);
497
+ } else {
498
+ addValue(information, propName, sanitize(child.getAttribute("content") || child.content || child.textContent || child.src || ""));
499
+ }
500
+ });
501
+ traverseItem(child, information);
502
+ } else {
503
+ traverseItem(child, information);
504
+ }
505
+ }
506
+ }
507
+
508
+ const microdata = [];
509
+
510
+ document.querySelectorAll("[itemscope]").forEach(function (elem, i) {
511
+ const itemType = elem.getAttribute('itemtype');
512
+ const information = {
513
+ itemType: itemType
514
+ };
515
+ traverseItem(elem, information);
516
+ microdata.push(information);
517
+ });
518
+
519
+ return microdata;
520
+ };
521
+
522
+ let getPageMetadata = function () {
523
+ let jsonld = _getJsonLd();
524
+ let metaTags = _getMetaTags();
525
+ let microdata = _getMicrodata();
526
+ let results = {}
527
+ if (jsonld.length > 0) {
528
+ try {
529
+ results["jsonld"] = JSON.parse(jsonld);
530
+ }
531
+ catch (e) {
532
+ results["jsonld"] = jsonld;
533
+ }
534
+ }
535
+ if (microdata.length > 0) {
536
+ results["microdata"] = microdata;
537
+ }
538
+ for (let key in metaTags) {
539
+ if (metaTags.hasOwnProperty(key)) {
540
+ results["meta_tags"] = metaTags;
541
+ break;
542
+ }
543
+ }
544
+ return results;
545
+ };
546
+
547
+ /**
548
+ * Extracts all visible text content from the viewport
549
+ * Preserves basic formatting with newlines for block elements
550
+ * @returns {string} Visible text content
551
+ */
552
+ let getVisibleText = function () {
553
+ // Get the window's current viewport boundaries
554
+ const viewportHeight = window.innerHeight || document.documentElement.clientHeight;
555
+ const viewportWidth = window.innerWidth || document.documentElement.clientWidth;
556
+
557
+ let textInView = "";
558
+ const walker = document.createTreeWalker(
559
+ document.body,
560
+ NodeFilter.SHOW_TEXT,
561
+ null,
562
+ false
563
+ );
564
+
565
+ while (walker.nextNode()) {
566
+ const textNode = walker.currentNode;
567
+ // Create a range to retrieve bounding rectangles of the current text node
568
+ const range = document.createRange();
569
+ range.selectNodeContents(textNode);
570
+
571
+ const rects = range.getClientRects();
572
+
573
+ // Check if any rect is inside (or partially inside) the viewport
574
+ for (const rect of rects) {
575
+ const isVisible =
576
+ rect.width > 0 &&
577
+ rect.height > 0 &&
578
+ rect.bottom >= 0 &&
579
+ rect.right >= 0 &&
580
+ rect.top <= viewportHeight &&
581
+ rect.left <= viewportWidth;
582
+
583
+ if (isVisible) {
584
+ textInView += textNode.nodeValue.replace(/\s+/g, " ");
585
+ // Is the parent a block element?
586
+ if (textNode.parentNode) {
587
+ const parent = textNode.parentNode;
588
+ const style = window.getComputedStyle(parent);
589
+ if (["inline", "hidden", "none"].indexOf(style.display) === -1) {
590
+ textInView += "\n";
591
+ }
592
+ }
593
+ break; // No need to check other rects once found visible
594
+ }
595
+ }
596
+ }
597
+
598
+ // Remove blank lines from textInView
599
+ textInView = textInView.replace(/^\s*\n/gm, "").trim().replace(/\n+/g, "\n");
600
+ return textInView;
601
+ };
602
+
603
+ // Public API
604
+ return {
605
+ getInteractiveRects: getInteractiveRects,
606
+ getVisualViewport: getVisualViewport,
607
+ getFocusedElementId: getFocusedElementId,
608
+ getPageMetadata: getPageMetadata,
609
+ };
610
+ })();
fara/browser/playwright_controller.py ADDED
@@ -0,0 +1,581 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import base64
3
+ import os
4
+ import random
5
+ import logging
6
+ import functools
7
+ from typing import Any, Callable, Optional, Tuple, Union, TypeVar, Awaitable
8
+
9
+ from playwright._impl._errors import Error as PlaywrightError
10
+ from playwright._impl._errors import TimeoutError, TargetClosedError
11
+ from playwright.async_api import Download, Page
12
+ from playwright.async_api import TimeoutError as PlaywrightTimeoutError
13
+
14
+ # Adapted from Magentic-UI
15
+ # Some of the Code for clicking coordinates and keypresses adapted from https://github.com/openai/openai-cua-sample-app/blob/main/computers/base_playwright.py
16
+ # Copyright 2025 OpenAI - MIT License
17
+ CUA_KEY_TO_PLAYWRIGHT_KEY = {
18
+ "/": "Divide",
19
+ "\\": "Backslash",
20
+ "alt": "Alt",
21
+ "arrowdown": "ArrowDown",
22
+ "arrowleft": "ArrowLeft",
23
+ "arrowright": "ArrowRight",
24
+ "arrowup": "ArrowUp",
25
+ "backspace": "Backspace",
26
+ "capslock": "CapsLock",
27
+ "cmd": "Meta",
28
+ "ctrl": "Control",
29
+ "delete": "Delete",
30
+ "end": "End",
31
+ "enter": "Enter",
32
+ "esc": "Escape",
33
+ "home": "Home",
34
+ "insert": "Insert",
35
+ "option": "Alt",
36
+ "pagedown": "PageDown",
37
+ "pageup": "PageUp",
38
+ "shift": "Shift",
39
+ "space": " ",
40
+ "super": "Meta",
41
+ "tab": "Tab",
42
+ "win": "Meta",
43
+ }
44
+
45
+ F = TypeVar("F", bound=Callable[..., Awaitable[Any]])
46
+
47
+
48
+ def handle_target_closed(max_retries: int = 2, timeout_secs: int = 30):
49
+ """
50
+ Decorator to handle TargetClosedError and tunnel connection errors by attempting to recover the page.
51
+
52
+ Args:
53
+ max_retries: Maximum number of retry attempts
54
+ timeout_secs: Timeout for page operations during recovery
55
+ """
56
+
57
+ def decorator(func: F) -> F:
58
+ @functools.wraps(func)
59
+ async def wrapper(*args, **kwargs):
60
+ # Extract the page object - assume it's the first argument after self
61
+ logger = args[0].logger
62
+ page = None
63
+ if len(args) >= 2 and hasattr(
64
+ args[1], "url"
65
+ ): # Check if second arg looks like a Page
66
+ page = args[1]
67
+
68
+ retries = 0
69
+ last_error = None
70
+
71
+ while retries <= max_retries:
72
+ try:
73
+ return await func(*args, **kwargs)
74
+ except (TargetClosedError, PlaywrightError) as e:
75
+ # Check if this is a tunnel connection error
76
+ is_tunnel_error = "net::ERR_TUNNEL_CONNECTION_FAILED" in str(e)
77
+ is_target_closed = isinstance(
78
+ e, TargetClosedError
79
+ ) or "Target page, context or browser has been closed" in str(e)
80
+
81
+ if not (is_tunnel_error or is_target_closed):
82
+ # Not an error we handle, re-raise
83
+ raise e
84
+
85
+ last_error = e
86
+ retries += 1
87
+
88
+ if retries > max_retries:
89
+ raise e
90
+
91
+ if page is None:
92
+ # Can't recover without page reference
93
+ raise e
94
+
95
+ error_type = (
96
+ "tunnel connection" if is_tunnel_error else "target closed"
97
+ )
98
+ logger.warning(
99
+ f"{error_type} error in {func.__name__}, attempting recovery (retry {retries}/{max_retries})"
100
+ )
101
+
102
+ try:
103
+ # Attempt to recover the page
104
+ await _recover_page(page, timeout_secs, logger)
105
+ # Small delay before retry
106
+ await asyncio.sleep(0.5)
107
+ except Exception as recovery_error:
108
+ logger.error(f"Page recovery failed: {recovery_error}")
109
+ # If recovery fails, raise the original error
110
+ raise e from recovery_error
111
+
112
+ # This shouldn't be reached, but just in case
113
+ raise last_error
114
+
115
+ return wrapper
116
+
117
+ return decorator
118
+
119
+
120
+ async def _recover_page(page: Page, timeout_secs: int = 30, logger=None) -> None:
121
+ """
122
+ Attempt to recover a closed page by reloading it.
123
+
124
+ Args:
125
+ page: The Playwright page object to recover
126
+ timeout_secs: Timeout for recovery operations
127
+ """
128
+ logger = logger or logging.getLogger("playwright_controller")
129
+ try:
130
+ # First, try to check if the page is still responsive
131
+ await page.evaluate("1", timeout=1000)
132
+ # If we get here, the page is actually fine
133
+ return
134
+ except Exception:
135
+ # Page is indeed problematic, attempt recovery
136
+ pass
137
+
138
+ try:
139
+ # Stop any ongoing navigation
140
+ await page.evaluate("window.stop()", timeout=2000)
141
+ except Exception:
142
+ # Ignore errors from window.stop()
143
+ pass
144
+
145
+ try:
146
+ # Try to reload the page
147
+ await page.reload(timeout=timeout_secs * 1000)
148
+ await page.wait_for_load_state("load", timeout=timeout_secs * 1000)
149
+ logger.info("playwright_controller._recover_page(): Page recovery successful")
150
+ except Exception as e:
151
+ logger.error(f"playwright_controller._recover_page(): Page reload failed: {e}")
152
+
153
+ # Try alternative recovery: navigate to current URL
154
+ try:
155
+ current_url = page.url
156
+ if current_url and current_url != "about:blank":
157
+ await page.goto(current_url, timeout=timeout_secs * 1000)
158
+ await page.wait_for_load_state("load", timeout=timeout_secs * 1000)
159
+ logger.info(
160
+ "playwright_controller._recover_page(): Page recovery via goto successful"
161
+ )
162
+ else:
163
+ raise Exception(
164
+ "playwright_controller._recover_page(): No valid URL to navigate to"
165
+ )
166
+ except Exception as goto_error:
167
+ raise Exception(
168
+ f"playwright_controller._recover_page(): All recovery methods failed. Reload error: {e}, Goto error: {goto_error}"
169
+ )
170
+
171
+
172
+ # Enhanced version that can handle browser context recreation
173
+ def handle_target_closed_with_context(max_retries: int = 2, timeout_secs: int = 30):
174
+ """
175
+ Enhanced decorator that can also handle browser context recreation.
176
+ Use this for critical operations where you have access to the browser context.
177
+ """
178
+
179
+ def decorator(func: F) -> F:
180
+ @functools.wraps(func)
181
+ async def wrapper(*args, **kwargs):
182
+ logger = args[0].logger
183
+ page = None
184
+ if len(args) >= 2 and hasattr(args[1], "url"):
185
+ page = args[1]
186
+
187
+ retries = 0
188
+ last_error = None
189
+
190
+ while retries <= max_retries:
191
+ try:
192
+ return await func(*args, **kwargs)
193
+ except (TargetClosedError, PlaywrightError) as e:
194
+ # Check if this is a tunnel connection error
195
+ is_tunnel_error = "net::ERR_TUNNEL_CONNECTION_FAILED" in str(e)
196
+ is_target_closed = isinstance(
197
+ e, TargetClosedError
198
+ ) or "Target page, context or browser has been closed" in str(e)
199
+
200
+ if not (is_tunnel_error or is_target_closed):
201
+ # Not an error we handle, re-raise
202
+ raise e
203
+
204
+ last_error = e
205
+ retries += 1
206
+
207
+ if retries > max_retries:
208
+ raise e
209
+
210
+ if page is None:
211
+ raise e
212
+
213
+ error_type = (
214
+ "tunnel connection" if is_tunnel_error else "target closed"
215
+ )
216
+ logger.warning(
217
+ f"playwright_controller.handle_target_closed_with_context(): {error_type} error in {func.__name__}, attempting enhanced recovery (retry {retries}/{max_retries})"
218
+ )
219
+
220
+ try:
221
+ # Check if the browser context is still alive
222
+ context = page.context
223
+ browser = context.browser
224
+
225
+ if browser and not browser.is_connected():
226
+ # Browser connection is lost - this is a more serious issue
227
+ logger.error(
228
+ "playwright_controller.handle_target_closed_with_context(): Browser connection lost - cannot recover automatically"
229
+ )
230
+ raise e
231
+
232
+ # Try basic recovery first
233
+ await _recover_page(page, timeout_secs)
234
+ await asyncio.sleep(0.5)
235
+
236
+ except Exception as recovery_error:
237
+ logger.error(
238
+ f"playwright_controller.handle_target_closed_with_context(): Enhanced page recovery failed: {recovery_error}"
239
+ )
240
+ raise e from recovery_error
241
+
242
+ raise last_error
243
+
244
+ return wrapper
245
+
246
+ return decorator
247
+
248
+
249
+ class PlaywrightController:
250
+ def __init__(
251
+ self,
252
+ animate_actions: bool = False,
253
+ downloads_folder: Optional[str] = None,
254
+ viewport_width: int = 1440,
255
+ viewport_height: int = 900,
256
+ _download_handler: Optional[Callable[[Download], None]] = None,
257
+ to_resize_viewport: bool = True,
258
+ single_tab_mode: bool = False,
259
+ sleep_after_action: int = 10,
260
+ timeout_load: int = 1,
261
+ logger=None,
262
+ ) -> None:
263
+ """
264
+ A controller for Playwright to interact with web pages.
265
+ animate_actions: If True, actions will be animated.
266
+ downloads_folder: The folder to save downloads to.
267
+ viewport_width: The width of the viewport.
268
+ viewport_height: The height of the viewport.
269
+ _download_handler: A handler for downloads.
270
+ to_resize_viewport: If True, the viewport will be resized.
271
+ single_tab_mode (bool): If True, forces navigation to happen in the same tab rather than opening new tabs/windows.
272
+
273
+ """
274
+ self.animate_actions = animate_actions
275
+ self.downloads_folder = downloads_folder
276
+ self.viewport_width = viewport_width
277
+ self.viewport_height = viewport_height
278
+ self._download_handler = _download_handler
279
+ self.to_resize_viewport = to_resize_viewport
280
+ self.single_tab_mode = single_tab_mode
281
+ self._sleep_after_action = sleep_after_action
282
+ self._timeout_load = timeout_load
283
+ self.logger = logger or logging.getLogger("playwright_controller")
284
+
285
+ # Set up the download handler
286
+ self.last_cursor_position: Tuple[float, float] = (0.0, 0.0)
287
+
288
+ async def sleep(self, page: Page, duration: Union[int, float]) -> None:
289
+ await asyncio.sleep(duration)
290
+
291
+ @handle_target_closed()
292
+ async def on_new_page(self, page: Page) -> None:
293
+ assert page is not None
294
+ # bring page to front just in case
295
+ await page.bring_to_front()
296
+ page.on("download", self._download_handler) # type: ignore
297
+ if self.to_resize_viewport and self.viewport_width and self.viewport_height:
298
+ await page.set_viewport_size(
299
+ {"width": self.viewport_width, "height": self.viewport_height}
300
+ )
301
+ await self.sleep(page, 0.2)
302
+ try:
303
+ await page.wait_for_load_state(timeout=30000)
304
+ except PlaywrightTimeoutError:
305
+ self.logger.error("WARNING: Page load timeout, page might not be loaded")
306
+ # stop page loading
307
+ await page.evaluate("window.stop()")
308
+
309
+ @handle_target_closed()
310
+ async def _ensure_page_ready(self, page: Page) -> None:
311
+ assert page is not None
312
+ await self.on_new_page(page)
313
+
314
+ @handle_target_closed()
315
+ async def get_screenshot(self, page: Page, path: str | None = None) -> bytes:
316
+ """
317
+ Capture a screenshot of the current page.
318
+
319
+ Args:
320
+ page (Page): The Playwright page object.
321
+ path (str, optional): The file path to save the screenshot. If None, the screenshot will be returned as bytes. Default: None
322
+ """
323
+ await self._ensure_page_ready(page)
324
+ try:
325
+ screenshot = await page.screenshot(path=path, timeout=15000)
326
+ return screenshot
327
+ except Exception:
328
+ await page.evaluate("window.stop()")
329
+ # try again
330
+ screenshot = await page.screenshot(path=path, timeout=15000)
331
+ return screenshot
332
+
333
+ @handle_target_closed()
334
+ async def back(self, page: Page) -> None:
335
+ await self._ensure_page_ready(page)
336
+ await page.go_back()
337
+
338
+ @handle_target_closed()
339
+ async def visit_page(self, page: Page, url: str) -> Tuple[bool, bool]:
340
+ await self._ensure_page_ready(page)
341
+ reset_prior_metadata_hash = False
342
+ reset_last_download = False
343
+ try:
344
+ # Regular webpage
345
+ await page.goto(url)
346
+ await page.wait_for_load_state()
347
+ reset_prior_metadata_hash = True
348
+ except Exception as e_outer:
349
+ # Downloaded file
350
+ if self.downloads_folder and "net::ERR_ABORTED" in str(e_outer):
351
+ async with page.expect_download() as download_info:
352
+ try:
353
+ await page.goto(url)
354
+ except Exception as e_inner:
355
+ if "net::ERR_ABORTED" in str(e_inner):
356
+ pass
357
+ else:
358
+ raise e_inner
359
+ download = await download_info.value
360
+ fname = os.path.join(
361
+ self.downloads_folder, download.suggested_filename
362
+ )
363
+ await download.save_as(fname)
364
+ message = f"<body style=\"margin: 20px;\"><h1>Successfully downloaded '{download.suggested_filename}' to local path:<br><br>{fname}</h1></body>"
365
+ await page.goto(
366
+ "data:text/html;base64,"
367
+ + base64.b64encode(message.encode("utf-8")).decode("utf-8")
368
+ )
369
+ reset_last_download = True
370
+ else:
371
+ raise e_outer
372
+ return reset_prior_metadata_hash, reset_last_download
373
+
374
+ @handle_target_closed()
375
+ async def page_down(
376
+ self, page: Page, amount: int = 400, full_page: bool = False
377
+ ) -> None:
378
+ await self._ensure_page_ready(page)
379
+ if full_page:
380
+ await page.mouse.wheel(0, self.viewport_height - 50)
381
+ else:
382
+ await page.mouse.wheel(0, amount)
383
+
384
+ @handle_target_closed()
385
+ async def page_up(
386
+ self, page: Page, amount: int = 400, full_page: bool = False
387
+ ) -> None:
388
+ await self._ensure_page_ready(page)
389
+ if full_page:
390
+ await page.mouse.wheel(0, -self.viewport_height + 50)
391
+ else:
392
+ await page.mouse.wheel(0, -amount)
393
+
394
+ async def gradual_cursor_animation(
395
+ self, page: Page, start_x: float, start_y: float, end_x: float, end_y: float
396
+ ) -> None:
397
+ # animation helper
398
+ # Create the red cursor if it doesn't exist
399
+ await page.evaluate("""
400
+ (function() {
401
+ if (!document.getElementById('red-cursor')) {
402
+ let cursor = document.createElement('div');
403
+ cursor.id = 'red-cursor';
404
+ cursor.style.width = '10px';
405
+ cursor.style.height = '10px';
406
+ cursor.style.backgroundColor = 'red';
407
+ cursor.style.position = 'absolute';
408
+ cursor.style.borderRadius = '50%';
409
+ cursor.style.zIndex = '10000';
410
+ document.body.appendChild(cursor);
411
+ }
412
+ })();
413
+ """)
414
+
415
+ steps = 20
416
+ for step in range(steps):
417
+ x = start_x + (end_x - start_x) * (step / steps)
418
+ y = start_y + (end_y - start_y) * (step / steps)
419
+ # await page.mouse.move(x, y, steps=1)
420
+ await page.evaluate(f"""
421
+ (function() {{
422
+ let cursor = document.getElementById('red-cursor');
423
+ if (cursor) {{
424
+ cursor.style.left = '{x}px';
425
+ cursor.style.top = '{y}px';
426
+ }}
427
+ }})();
428
+ """)
429
+ await asyncio.sleep(0.05)
430
+
431
+ self.last_cursor_position = (end_x, end_y)
432
+ await asyncio.sleep(1.0)
433
+
434
+ @handle_target_closed()
435
+ async def click_coords(self, page: Page, x: float, y: float) -> None:
436
+ new_page: Page | None = None
437
+ await self._ensure_page_ready(page)
438
+
439
+ if self.animate_actions:
440
+ # Move cursor to the box slowly
441
+ start_x, start_y = self.last_cursor_position
442
+ await self.gradual_cursor_animation(page, start_x, start_y, x, y)
443
+ await asyncio.sleep(0.1)
444
+
445
+ try:
446
+ # Give it a chance to open a new page
447
+ async with page.expect_event("popup", timeout=1000) as page_info: # type: ignore
448
+ await page.mouse.click(x, y, delay=10)
449
+ new_page = await page_info.value # type: ignore
450
+ assert isinstance(new_page, Page)
451
+ await self.on_new_page(new_page)
452
+ except TimeoutError:
453
+ pass
454
+ else:
455
+ try:
456
+ # Give it a chance to open a new page
457
+ async with page.expect_event("popup", timeout=1000) as page_info: # type: ignore
458
+ await page.mouse.click(x, y, delay=10)
459
+ new_page = await page_info.value # type: ignore
460
+ assert isinstance(new_page, Page)
461
+ await self.on_new_page(new_page)
462
+ except TimeoutError:
463
+ pass
464
+ return new_page
465
+
466
+ @handle_target_closed()
467
+ async def hover_coords(self, page: Page, x: float, y: float) -> None:
468
+ """
469
+ Hovers the mouse over the specified coordinates.
470
+
471
+ Args:
472
+ page (Page): The Playwright page object.
473
+ x (float): The x coordinate to hover over.
474
+ y (float): The y coordinate to hover over.
475
+ """
476
+ await self._ensure_page_ready(page)
477
+
478
+ if self.animate_actions:
479
+ # Move cursor to the coordinates slowly
480
+ start_x, start_y = self.last_cursor_position
481
+ await self.gradual_cursor_animation(page, start_x, start_y, x, y)
482
+ await asyncio.sleep(0.1)
483
+
484
+ await page.mouse.move(x, y)
485
+
486
+ @handle_target_closed()
487
+ async def fill_coords(
488
+ self,
489
+ page: Page,
490
+ x: float,
491
+ y: float,
492
+ value: str,
493
+ press_enter: bool = True,
494
+ delete_existing_text: bool = False,
495
+ ) -> None:
496
+ await self._ensure_page_ready(page)
497
+ new_page: Page | None = None
498
+
499
+ if self.animate_actions:
500
+ # Move cursor to the box slowly
501
+ start_x, start_y = self.last_cursor_position
502
+ await self.gradual_cursor_animation(page, start_x, start_y, x, y)
503
+ await asyncio.sleep(0.1)
504
+
505
+ await page.mouse.click(x, y)
506
+
507
+ if delete_existing_text:
508
+ await page.keyboard.press("ControlOrMeta+A")
509
+ await page.keyboard.press("Backspace")
510
+
511
+ # fill char by char to mimic human speed for short text and type fast for long text
512
+ if len(value) < 100:
513
+ delay_typing_speed = 50 + 100 * random.random()
514
+ else:
515
+ delay_typing_speed = 10
516
+
517
+ if self.animate_actions:
518
+ try:
519
+ # Give it a chance to open a new page
520
+ async with page.expect_event("popup", timeout=1000) as page_info: # type: ignore
521
+ try:
522
+ await page.keyboard.type(value)
523
+ except PlaywrightError:
524
+ await page.keyboard.type(value, delay=delay_typing_speed)
525
+ if press_enter:
526
+ await page.keyboard.press("Enter")
527
+ new_page = await page_info.value # type: ignore
528
+ assert isinstance(new_page, Page)
529
+ await self.on_new_page(new_page)
530
+ except TimeoutError:
531
+ pass
532
+ else:
533
+ try:
534
+ # Give it a chance to open a new page
535
+ async with page.expect_event("popup", timeout=1000) as page_info: # type: ignore
536
+ try:
537
+ await page.keyboard.type(value)
538
+ except PlaywrightError:
539
+ await page.keyboard.type(value, delay=delay_typing_speed)
540
+ if press_enter:
541
+ await page.keyboard.press("Enter")
542
+ new_page = await page_info.value # type: ignore
543
+ assert isinstance(new_page, Page)
544
+ await self.on_new_page(new_page)
545
+ except TimeoutError:
546
+ pass
547
+
548
+ return new_page
549
+
550
+ async def keypress(self, page: Page, keys: list[str]) -> None:
551
+ """
552
+ Press specified keys in sequence.
553
+
554
+ Args:
555
+ page (Page): The Playwright page object
556
+ keys (List[str]): List of keys to press
557
+ """
558
+ await self._ensure_page_ready(page)
559
+ mapped_keys = [CUA_KEY_TO_PLAYWRIGHT_KEY.get(key.lower(), key) for key in keys]
560
+ try:
561
+ for key in mapped_keys:
562
+ await page.keyboard.down(key)
563
+ for key in reversed(mapped_keys):
564
+ await page.keyboard.up(key)
565
+ except Exception as e:
566
+ raise RuntimeError(
567
+ f"I tried to keypress(keys={keys}), but I got an error: {e}"
568
+ ) from None
569
+
570
+ @handle_target_closed()
571
+ async def wait_for_load_state(
572
+ self, page: Page, state: str = "load", timeout: Optional[int] = None
573
+ ) -> None:
574
+ """Wait for the page to reach a specific load state."""
575
+ await page.wait_for_load_state(state, timeout=timeout)
576
+
577
+ @handle_target_closed()
578
+ async def get_page_url(self, page: Page) -> str:
579
+ """Get the current page URL."""
580
+ await self._ensure_page_ready(page)
581
+ return page.url
fara/fara_agent.py ADDED
@@ -0,0 +1,602 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ast
2
+ import asyncio
3
+ import io
4
+ import json
5
+ import logging
6
+ import os
7
+ from typing import Any, Dict, List, Tuple
8
+ from urllib.parse import quote_plus
9
+
10
+ from openai import AsyncOpenAI
11
+ from PIL import Image
12
+ from playwright.async_api import BrowserContext, Download, Page
13
+ from tenacity import before_sleep_log, retry, stop_after_attempt, wait_exponential
14
+
15
+ from ._prompts import get_computer_use_system_prompt
16
+ from .browser.playwright_controller import PlaywrightController
17
+ from .types import (
18
+ AssistantMessage,
19
+ FunctionCall,
20
+ ImageObj,
21
+ LLMMessage,
22
+ ModelResponse,
23
+ SystemMessage,
24
+ UserMessage,
25
+ WebSurferEvent,
26
+ message_to_openai_format,
27
+ )
28
+ from .utils import get_trimmed_url
29
+
30
+
31
+ class FaraAgent:
32
+ DEFAULT_START_PAGE = "https://www.bing.com/"
33
+
34
+ MLM_PROCESSOR_IM_CFG = {
35
+ "min_pixels": 3136,
36
+ "max_pixels": 12845056,
37
+ "patch_size": 14,
38
+ "merge_size": 2,
39
+ }
40
+
41
+ SCREENSHOT_TOKENS = 1105
42
+ USER_MESSAGE = "Here is the next screenshot. Think about what to do next."
43
+ MAX_URL_LENGTH = 100
44
+
45
+ def __init__(
46
+ self,
47
+ browser_manager: Any,
48
+ client_config: dict,
49
+ downloads_folder: str | None = None,
50
+ start_page: str | None = "about:blank",
51
+ animate_actions: bool = False,
52
+ single_tab_mode: bool = True,
53
+ max_n_images: int = 3,
54
+ fn_call_template: str = "default",
55
+ model_call_timeout: int = 20,
56
+ max_rounds: int = 10,
57
+ save_screenshots: bool = False,
58
+ logger: logging.Logger | None = None,
59
+ ):
60
+ self.downloads_folder = downloads_folder
61
+ if not os.path.exists(self.downloads_folder or "") and self.downloads_folder:
62
+ os.makedirs(self.downloads_folder)
63
+ self.single_tab_mode = single_tab_mode
64
+ self.start_page = start_page or self.DEFAULT_START_PAGE
65
+ self.animate_actions = animate_actions
66
+ self.browser_manager = browser_manager
67
+ self.client_config = client_config
68
+ self.max_n_images = max_n_images
69
+ self.fn_call_template = fn_call_template
70
+ self.model_call_timeout = model_call_timeout
71
+ self.max_rounds = max_rounds
72
+ self.max_url_chars = self.MAX_URL_LENGTH
73
+ if save_screenshots and self.downloads_folder is None:
74
+ assert False, "downloads_folder must be set if save_screenshots is True"
75
+ self.save_screenshots = save_screenshots
76
+ self._facts = []
77
+ self._task_summary = None
78
+ self._num_actions = 0
79
+ self.logger = logger or logging.getLogger(__name__)
80
+ self._mlm_width = 1440
81
+ self._mlm_height = 900
82
+ self.viewport_height = 900
83
+ self.viewport_width = 1440
84
+ self.include_input_text_key_args = True
85
+
86
+ def _download_handler(download: Download) -> None:
87
+ self._last_download = download
88
+
89
+ self._download_handler = _download_handler
90
+ self.did_initialize = False
91
+
92
+ # OpenAI client will be initialized in initialize()
93
+ self._openai_client: AsyncOpenAI | None = None
94
+ self._chat_history: List[LLMMessage] = []
95
+
96
+ async def initialize(self) -> None:
97
+ if self.did_initialize:
98
+ return
99
+ self._last_download = None
100
+ self._prior_metadata_hash = None
101
+
102
+ # Initialize OpenAI client
103
+ self._openai_client = AsyncOpenAI(
104
+ api_key=self.client_config.get("api_key"),
105
+ base_url=self.client_config.get("base_url"),
106
+ default_headers=self.client_config.get("default_headers"),
107
+ )
108
+
109
+ # Set up download handler
110
+ self.browser_manager.set_download_handler(self._download_handler)
111
+
112
+ # Initialize browser
113
+ await self.browser_manager.init(self.start_page)
114
+ self.did_initialize = True
115
+
116
+ @property
117
+ def _page(self) -> Page | None:
118
+ """Get the current page from browser manager."""
119
+ return self.browser_manager.page if self.browser_manager else None
120
+
121
+ @_page.setter
122
+ def _page(self, value):
123
+ if self.browser_manager:
124
+ self.browser_manager.page = value
125
+ else:
126
+ raise ValueError("Browser manager is not initialized. Cannot set page.")
127
+
128
+ @property
129
+ def context(self) -> BrowserContext | None:
130
+ """Get the browser context from browser manager."""
131
+ return self.browser_manager.context if self.browser_manager else None
132
+
133
+ @property
134
+ def _playwright_controller(self) -> PlaywrightController | None:
135
+ """Get the playwright controller from browser manager."""
136
+ return (
137
+ self.browser_manager.playwright_controller if self.browser_manager else None
138
+ )
139
+
140
+ async def wait_for_captcha_with_timeout(
141
+ self, timeout_seconds=300
142
+ ): # 5 minutes default
143
+ """Wait for captcha to be solved with timeout"""
144
+ try:
145
+ await asyncio.wait_for(
146
+ self.browser_manager.wait_for_captcha_resolution(),
147
+ timeout=timeout_seconds,
148
+ )
149
+ return True # Captcha solved in time
150
+ except asyncio.TimeoutError:
151
+ self.logger.warning(f"Captcha timeout after {timeout_seconds} seconds!")
152
+ # Force resume execution
153
+ self.browser_manager._captcha_event.set()
154
+ return False # Captcha timed out
155
+
156
+ @retry(
157
+ stop=stop_after_attempt(5),
158
+ wait=wait_exponential(multiplier=5.0, min=5.0, max=60),
159
+ before_sleep=before_sleep_log(logging.getLogger(__name__), logging.WARNING),
160
+ reraise=True,
161
+ )
162
+ async def _make_model_call(
163
+ self,
164
+ history: List[LLMMessage],
165
+ extra_create_args: Dict[str, Any] | None = None,
166
+ ) -> ModelResponse:
167
+ """Make a model call using OpenAI client"""
168
+ openai_messages = [message_to_openai_format(msg) for msg in history]
169
+ request_params = {
170
+ "model": self.client_config.get("model", "gpt-4o"),
171
+ "messages": openai_messages,
172
+ }
173
+ if extra_create_args:
174
+ request_params.update(extra_create_args)
175
+
176
+ response = await self._openai_client.chat.completions.create(**request_params)
177
+ content = response.choices[0].message.content
178
+ usage = {}
179
+ if response.usage:
180
+ usage = {
181
+ "prompt_tokens": response.usage.prompt_tokens,
182
+ "completion_tokens": response.usage.completion_tokens,
183
+ "total_tokens": response.usage.total_tokens,
184
+ }
185
+ return ModelResponse(content=content, usage=usage)
186
+
187
+ def remove_screenshot_from_message(self, msg: List[Dict[str, Any]] | Any) -> Any:
188
+ """Remove the screenshot from the message content."""
189
+ if isinstance(msg.content, list):
190
+ new_content = []
191
+ for c in msg.content:
192
+ if not isinstance(c, ImageObj):
193
+ new_content.append(c)
194
+ msg.content = new_content
195
+ elif isinstance(msg.content, ImageObj):
196
+ msg = None
197
+ return msg
198
+
199
+ def maybe_remove_old_screenshots(
200
+ self, history: List[LLMMessage], includes_current: bool = False
201
+ ) -> List[LLMMessage]:
202
+ """Remove old screenshots from the chat history. Assuming we have not yet added the current screenshot message.
203
+
204
+ Note: Original user messages (marked with is_original=True) have their TEXT preserved,
205
+ but their images may be removed if we exceed max_n_images. Boilerplate messages can be
206
+ completely removed.
207
+ """
208
+ if self.max_n_images <= 0:
209
+ return history
210
+
211
+ max_n_images = self.max_n_images if includes_current else self.max_n_images - 1
212
+ new_history: List[LLMMessage] = []
213
+ n_images = 0
214
+ for i in range(len(history) - 1, -1, -1):
215
+ msg = history[i]
216
+
217
+ is_original_user_message = isinstance(msg, UserMessage) and getattr(
218
+ msg, "is_original", False
219
+ )
220
+
221
+ if i == 0 and n_images >= max_n_images:
222
+ # First message is always the task so we keep it and remove the screenshot if necessary
223
+ msg = self.remove_screenshot_from_message(msg)
224
+ if msg is None:
225
+ continue
226
+
227
+ if isinstance(msg.content, list):
228
+ # Check if the message contains an image. Assumes 1 image per message.
229
+ has_image = False
230
+ for c in msg.content:
231
+ if isinstance(c, ImageObj):
232
+ has_image = True
233
+ break
234
+ if has_image:
235
+ if n_images < max_n_images:
236
+ new_history.append(msg)
237
+ elif is_original_user_message:
238
+ # Original user message but over limit: keep text, remove image
239
+ msg = self.remove_screenshot_from_message(msg)
240
+ if msg is not None:
241
+ new_history.append(msg)
242
+ n_images += 1
243
+ else:
244
+ new_history.append(msg)
245
+ elif isinstance(msg.content, ImageObj):
246
+ if n_images < max_n_images:
247
+ new_history.append(msg)
248
+ n_images += 1
249
+ else:
250
+ new_history.append(msg)
251
+
252
+ new_history = new_history[::-1]
253
+
254
+ return new_history
255
+
256
+ async def _get_scaled_screenshot(self) -> Image.Image:
257
+ """Get current screenshot and scale it for the model."""
258
+ screenshot = await self._playwright_controller.get_screenshot(self._page)
259
+ screenshot = Image.open(io.BytesIO(screenshot))
260
+ _, scaled_screenshot = self._get_system_message(screenshot)
261
+ return scaled_screenshot
262
+
263
+ def _get_system_message(
264
+ self, screenshot: ImageObj | Image.Image
265
+ ) -> Tuple[List[SystemMessage], Image.Image]:
266
+ system_prompt_info = get_computer_use_system_prompt(
267
+ screenshot,
268
+ self.MLM_PROCESSOR_IM_CFG,
269
+ include_input_text_key_args=self.include_input_text_key_args,
270
+ fn_call_template=self.fn_call_template,
271
+ )
272
+ self._mlm_width, self._mlm_height = system_prompt_info["im_size"]
273
+ scaled_screenshot = screenshot.resize((self._mlm_width, self._mlm_height))
274
+
275
+ system_message = []
276
+ for msg in system_prompt_info["conversation"]:
277
+ tmp_content = ""
278
+ for content in msg["content"]:
279
+ tmp_content += content["text"]
280
+
281
+ system_message.append(SystemMessage(content=tmp_content))
282
+
283
+ return system_message, scaled_screenshot
284
+
285
+ def _parse_thoughts_and_action(self, message: str) -> Tuple[str, Dict[str, Any]]:
286
+ try:
287
+ tmp = message.split("<tool_call>\n")
288
+ thoughts = tmp[0].strip()
289
+ action_text = tmp[1].split("\n</tool_call>")[0]
290
+ try:
291
+ action = json.loads(action_text)
292
+ except json.decoder.JSONDecodeError:
293
+ self.logger.error(f"Invalid action text: {action_text}")
294
+ action = ast.literal_eval(action_text)
295
+
296
+ return thoughts, action
297
+ except Exception as e:
298
+ self.logger.error(
299
+ f"Error parsing thoughts and action: {message}", exc_info=True
300
+ )
301
+ raise e
302
+
303
+ def convert_resized_coords_to_original(
304
+ self, coords: List[float], rsz_w: int, rsz_h: int, og_w: int, og_h: int
305
+ ) -> List[float]:
306
+ scale_x = og_w / rsz_w
307
+ scale_y = og_h / rsz_h
308
+ return [coords[0] * scale_x, coords[1] * scale_y]
309
+
310
+ def proc_coords(
311
+ self,
312
+ coords: List[float] | None,
313
+ im_w: int,
314
+ im_h: int,
315
+ og_im_w: int | None = None,
316
+ og_im_h: int | None = None,
317
+ ) -> List[float] | None:
318
+ if not coords:
319
+ return coords
320
+
321
+ if og_im_w is None:
322
+ og_im_w = im_w
323
+ if og_im_h is None:
324
+ og_im_h = im_h
325
+
326
+ tgt_x, tgt_y = coords
327
+ return self.convert_resized_coords_to_original(
328
+ [tgt_x, tgt_y], im_w, im_h, og_im_w, og_im_h
329
+ )
330
+
331
+ async def run(self, user_message: str) -> Tuple:
332
+ """Run the agent with a user message."""
333
+ # Initialize if not already done
334
+ await self.initialize()
335
+
336
+ # Ensure page is ready after initialization
337
+ assert self._page is not None, "Page should be initialized"
338
+
339
+ # Get initial screenshot and add user message with image to chat history
340
+ scaled_screenshot = await self._get_scaled_screenshot()
341
+
342
+ if self.save_screenshots:
343
+ await self._playwright_controller.get_screenshot(
344
+ self._page,
345
+ path=os.path.join(
346
+ self.downloads_folder, f"screenshot{self._num_actions}.png"
347
+ ),
348
+ )
349
+
350
+ self._chat_history.append(
351
+ UserMessage(
352
+ content=[ImageObj.from_pil(scaled_screenshot), user_message],
353
+ is_original=True,
354
+ )
355
+ )
356
+
357
+ all_actions = []
358
+ all_observations = []
359
+ final_answer = "<no_answer>"
360
+ is_stop_action = False
361
+ for i in range(self.max_rounds):
362
+ is_first_round = i == 0
363
+ if not self.browser_manager._captcha_event.is_set():
364
+ self.logger.info("Waiting 60s for captcha to finish...")
365
+ captcha_solved = await self.wait_for_captcha_with_timeout(60)
366
+ if (
367
+ not captcha_solved
368
+ and not self.browser_manager._captcha_event.is_set()
369
+ ):
370
+ raise RuntimeError(
371
+ "Captcha timed out, unable to proceed with web surfing."
372
+ )
373
+
374
+ function_call, raw_response = await self.generate_model_call(
375
+ is_first_round, scaled_screenshot if is_first_round else None
376
+ )
377
+ assert isinstance(raw_response, str)
378
+ all_actions.append(raw_response)
379
+ thoughts, action_dict = self._parse_thoughts_and_action(raw_response)
380
+ action_args = action_dict.get("arguments", {})
381
+ action = action_args["action"]
382
+ self.logger.info(
383
+ f"\nThought #{i + 1}: {thoughts}\nAction #{i + 1}: executing tool '{action}' with arguments {json.dumps(action_args)}"
384
+ )
385
+
386
+ (
387
+ is_stop_action,
388
+ new_screenshot,
389
+ action_description,
390
+ ) = await self.execute_action(function_call)
391
+ all_observations.append(action_description)
392
+ self.logger.info(f"Observation#{i + 1}: {action_description}")
393
+ if is_stop_action:
394
+ final_answer = thoughts
395
+ break
396
+ return final_answer, all_actions, all_observations
397
+
398
+ async def generate_model_call(
399
+ self, is_first_round: bool, first_screenshot: Image.Image | None = None
400
+ ) -> Tuple[List[FunctionCall], str]:
401
+ history = self.maybe_remove_old_screenshots(self._chat_history)
402
+
403
+ screenshot_for_system = first_screenshot
404
+ if not is_first_round:
405
+ # Get screenshot and add new user message for subsequent rounds
406
+ scaled_screenshot = await self._get_scaled_screenshot()
407
+ screenshot_for_system = scaled_screenshot
408
+
409
+ text_prompt = self.USER_MESSAGE
410
+ curr_url = await self._playwright_controller.get_page_url(self._page)
411
+ trimmed_url = get_trimmed_url(curr_url, max_len=self.max_url_chars)
412
+ text_prompt = f"Current URL: {trimmed_url}\n" + text_prompt
413
+
414
+ curr_message = UserMessage(
415
+ content=[ImageObj.from_pil(scaled_screenshot), text_prompt]
416
+ )
417
+ self._chat_history.append(curr_message)
418
+ history.append(curr_message)
419
+
420
+ # Generate system message using the screenshot
421
+ system_message, _ = self._get_system_message(screenshot_for_system)
422
+ history = system_message + history
423
+ response = await self._make_model_call(
424
+ history, extra_create_args={"temperature": 0}
425
+ )
426
+ message = response.content
427
+
428
+ self._chat_history.append(AssistantMessage(content=message))
429
+ thoughts, action = self._parse_thoughts_and_action(message)
430
+ action["arguments"]["thoughts"] = thoughts
431
+
432
+ function_call = [FunctionCall(id="dummy", **action)]
433
+ return function_call, message
434
+
435
+ async def execute_action(
436
+ self,
437
+ function_call: List[FunctionCall],
438
+ ) -> Tuple[bool, bytes, str]:
439
+ name = function_call[0].name
440
+ args = function_call[0].arguments
441
+ action_description = ""
442
+ assert self._page is not None
443
+ self.logger.debug(
444
+ WebSurferEvent(
445
+ source="FaraAgent",
446
+ url=await self._playwright_controller.get_page_url(self._page),
447
+ action=name,
448
+ arguments=args,
449
+ message=f"{name}( {json.dumps(args)} )",
450
+ )
451
+ )
452
+ if "coordinate" in args:
453
+ args["coordinate"] = self.proc_coords(
454
+ args["coordinate"],
455
+ self._mlm_width,
456
+ self._mlm_height,
457
+ self.viewport_width,
458
+ self.viewport_height,
459
+ )
460
+
461
+ is_stop_action = False
462
+
463
+ if args["action"] == "visit_url":
464
+ url = str(args["url"])
465
+ action_description = f"I typed '{url}' into the browser address bar."
466
+ # Check if the argument starts with a known protocol
467
+ if url.startswith(("https://", "http://", "file://", "about:")):
468
+ (
469
+ reset_prior_metadata,
470
+ reset_last_download,
471
+ ) = await self._playwright_controller.visit_page(self._page, url)
472
+ # If the argument contains a space, treat it as a search query
473
+ elif " " in url:
474
+ (
475
+ reset_prior_metadata,
476
+ reset_last_download,
477
+ ) = await self._playwright_controller.visit_page(
478
+ self._page,
479
+ f"https://www.bing.com/search?q={quote_plus(url)}&FORM=QBLH",
480
+ )
481
+ # Otherwise, prefix with https://
482
+ else:
483
+ (
484
+ reset_prior_metadata,
485
+ reset_last_download,
486
+ ) = await self._playwright_controller.visit_page(
487
+ self._page, "https://" + url
488
+ )
489
+ if reset_last_download and self._last_download is not None:
490
+ self._last_download = None
491
+ if reset_prior_metadata and self._prior_metadata_hash is not None:
492
+ self._prior_metadata_hash = None
493
+ elif args["action"] == "history_back":
494
+ action_description = "I clicked the browser back button."
495
+ await self._playwright_controller.back(self._page)
496
+ elif args["action"] == "web_search":
497
+ query = args.get("query")
498
+ action_description = f"I typed '{query}' into the browser search bar."
499
+ encoded_query = quote_plus(query)
500
+ (
501
+ reset_prior_metadata,
502
+ reset_last_download,
503
+ ) = await self._playwright_controller.visit_page(
504
+ self._page, f"https://www.bing.com/search?q={encoded_query}&FORM=QBLH"
505
+ )
506
+ if reset_last_download and self._last_download is not None:
507
+ self._last_download = None
508
+ if reset_prior_metadata and self._prior_metadata_hash is not None:
509
+ self._prior_metadata_hash = None
510
+ elif args["action"] == "scroll":
511
+ pixels = int(args.get("pixels", 0))
512
+ if pixels > 0:
513
+ action_description = "I scrolled up one page in the browser."
514
+ await self._playwright_controller.page_up(self._page)
515
+ elif pixels < 0:
516
+ action_description = "I scrolled down one page in the browser."
517
+ await self._playwright_controller.page_down(self._page)
518
+ elif args["action"] == "keypress" or args["action"] == "key":
519
+ keys = args.get("keys", [])
520
+ action_description = f"I pressed the following keys: {keys}"
521
+ await self._playwright_controller.keypress(self._page, keys)
522
+ elif args["action"] == "hover" or args["action"] == "mouse_move":
523
+ if "coordinate" in args:
524
+ tgt_x, tgt_y = args["coordinate"]
525
+ await self._playwright_controller.hover_coords(self._page, tgt_x, tgt_y)
526
+
527
+ elif args["action"] == "sleep" or args["action"] == "wait":
528
+ duration = args.get("duration", 3.0)
529
+ duration = args.get("time", duration)
530
+ action_description = (
531
+ "I am waiting a short period of time before taking further action."
532
+ )
533
+ await self._playwright_controller.sleep(self._page, duration)
534
+ elif args["action"] == "click" or args["action"] == "left_click":
535
+ if "coordinate" in args:
536
+ tgt_x, tgt_y = args["coordinate"]
537
+ action_description = f"I clicked at coordinates ({tgt_x}, {tgt_y})."
538
+ new_page_tentative = await self._playwright_controller.click_coords(
539
+ self._page, tgt_x, tgt_y
540
+ )
541
+
542
+ if new_page_tentative is not None:
543
+ self._page = new_page_tentative
544
+ self._prior_metadata_hash = None
545
+
546
+ elif args["action"] == "input_text" or args["action"] == "type":
547
+ text_value = str(args.get("text", args.get("text_value")))
548
+ action_description = f"I typed '{text_value}'."
549
+ press_enter = args.get("press_enter", True)
550
+ delete_existing_text = args.get("delete_existing_text", False)
551
+
552
+ if "coordinate" in args:
553
+ tgt_x, tgt_y = args["coordinate"]
554
+ new_page_tentative = await self._playwright_controller.fill_coords(
555
+ self._page,
556
+ tgt_x,
557
+ tgt_y,
558
+ text_value,
559
+ press_enter=press_enter,
560
+ delete_existing_text=delete_existing_text,
561
+ )
562
+ if new_page_tentative is not None:
563
+ self._page = new_page_tentative
564
+ self._prior_metadata_hash = None
565
+
566
+ elif args["action"] == "pause_and_memorize_fact":
567
+ fact = str(args.get("fact"))
568
+ self._facts.append(fact)
569
+ action_description = f"I memorized the following fact: {fact}"
570
+ elif args["action"] == "stop" or args["action"] == "terminate":
571
+ action_description = args.get("thoughts")
572
+ is_stop_action = True
573
+
574
+ else:
575
+ raise ValueError(f"Unknown tool: {args['action']}")
576
+
577
+ await self._playwright_controller.wait_for_load_state(self._page)
578
+ await self._playwright_controller.sleep(self._page, 3)
579
+
580
+ # Get new screenshot after action
581
+ self._num_actions += 1
582
+ if self.save_screenshots:
583
+ new_screenshot = await self._playwright_controller.get_screenshot(
584
+ self._page,
585
+ path=os.path.join(
586
+ self.downloads_folder, f"screenshot{self._num_actions}.png"
587
+ ),
588
+ )
589
+ else:
590
+ new_screenshot = await self._playwright_controller.get_screenshot(
591
+ self._page
592
+ )
593
+ return is_stop_action, new_screenshot, action_description
594
+
595
+ async def close(self) -> None:
596
+ """
597
+ Close the browser and the page.
598
+ Should be called when the agent is no longer needed.
599
+ """
600
+ if self._page is not None:
601
+ self._page = None
602
+ await self.browser_manager.close()
fara/qwen_helpers/__init__.py ADDED
File without changes
fara/qwen_helpers/base_tool.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Source: https://github.com/QwenLM/Qwen-Agent/blob/main/qwen_agent/tools/base.py
2
+
3
+ import json
4
+ from abc import ABC, abstractmethod
5
+ from typing import List, Optional, Union
6
+
7
+ from .schema import ContentItem
8
+ from .utils import has_chinese_chars, json_loads
9
+
10
+
11
+ def is_tool_schema(obj: dict) -> bool:
12
+ """
13
+ Check if obj is a valid JSON schema describing a tool compatible with OpenAI's tool calling.
14
+ Example valid schema:
15
+ {
16
+ "name": "get_current_weather",
17
+ "description": "Get the current weather in a given location",
18
+ "parameters": {
19
+ "type": "object",
20
+ "properties": {
21
+ "location": {
22
+ "type": "string",
23
+ "description": "The city and state, e.g. San Francisco, CA"
24
+ },
25
+ "unit": {
26
+ "type": "string",
27
+ "enum": ["celsius", "fahrenheit"]
28
+ }
29
+ },
30
+ "required": ["location"]
31
+ }
32
+ }
33
+ """
34
+ import jsonschema
35
+
36
+ try:
37
+ assert set(obj.keys()) == {"name", "description", "parameters"}
38
+ assert isinstance(obj["name"], str)
39
+ assert obj["name"].strip()
40
+ assert isinstance(obj["description"], str)
41
+ assert isinstance(obj["parameters"], dict)
42
+
43
+ assert set(obj["parameters"].keys()) == {"type", "properties", "required"}
44
+ assert obj["parameters"]["type"] == "object"
45
+ assert isinstance(obj["parameters"]["properties"], dict)
46
+ assert isinstance(obj["parameters"]["required"], list)
47
+ assert set(obj["parameters"]["required"]).issubset(
48
+ set(obj["parameters"]["properties"].keys())
49
+ )
50
+ except AssertionError:
51
+ return False
52
+ try:
53
+ jsonschema.validate(instance={}, schema=obj["parameters"])
54
+ except jsonschema.exceptions.SchemaError:
55
+ return False
56
+ except jsonschema.exceptions.ValidationError:
57
+ pass
58
+ return True
59
+
60
+
61
+ class BaseTool(ABC):
62
+ name: str = ""
63
+ description: str = ""
64
+ parameters: Union[List[dict], dict] = []
65
+
66
+ def __init__(self, cfg: Optional[dict] = None):
67
+ self.cfg = cfg or {}
68
+ if not self.name:
69
+ raise ValueError(
70
+ f"You must set {self.__class__.__name__}.name, either by @register_tool(name=...) or explicitly setting {self.__class__.__name__}.name"
71
+ )
72
+ if isinstance(self.parameters, dict):
73
+ if not is_tool_schema(
74
+ {
75
+ "name": self.name,
76
+ "description": self.description,
77
+ "parameters": self.parameters,
78
+ }
79
+ ):
80
+ raise ValueError(
81
+ "The parameters, when provided as a dict, must confirm to a valid openai-compatible JSON schema."
82
+ )
83
+
84
+ @abstractmethod
85
+ def call(
86
+ self, params: Union[str, dict], **kwargs
87
+ ) -> Union[str, list, dict, List[ContentItem]]:
88
+ """The interface for calling tools.
89
+
90
+ Each tool needs to implement this function, which is the workflow of the tool.
91
+
92
+ Args:
93
+ params: The parameters of func_call.
94
+ kwargs: Additional parameters for calling tools.
95
+
96
+ Returns:
97
+ The result returned by the tool, implemented in the subclass.
98
+ """
99
+ raise NotImplementedError
100
+
101
+ def _verify_json_format_args(
102
+ self, params: Union[str, dict], strict_json: bool = False
103
+ ) -> dict:
104
+ """Verify the parameters of the function call"""
105
+ if isinstance(params, str):
106
+ try:
107
+ if strict_json:
108
+ params_json: dict = json.loads(params)
109
+ else:
110
+ params_json: dict = json_loads(params)
111
+ except json.decoder.JSONDecodeError:
112
+ raise ValueError("Parameters must be formatted as a valid JSON!")
113
+ else:
114
+ params_json: dict = params
115
+ if isinstance(self.parameters, list):
116
+ for param in self.parameters:
117
+ if "required" in param and param["required"]:
118
+ if param["name"] not in params_json:
119
+ raise ValueError("Parameters %s is required!" % param["name"])
120
+ elif isinstance(self.parameters, dict):
121
+ import jsonschema
122
+
123
+ jsonschema.validate(instance=params_json, schema=self.parameters)
124
+ else:
125
+ raise ValueError
126
+ return params_json
127
+
128
+ @property
129
+ def function(self) -> dict: # Bad naming. It should be `function_info`.
130
+ return {
131
+ # 'name_for_human': self.name_for_human,
132
+ "name": self.name,
133
+ "description": self.description,
134
+ "parameters": self.parameters,
135
+ # 'args_format': self.args_format
136
+ }
137
+
138
+ @property
139
+ def name_for_human(self) -> str:
140
+ return self.cfg.get("name_for_human", self.name)
141
+
142
+ @property
143
+ def args_format(self) -> str:
144
+ fmt = self.cfg.get("args_format")
145
+ if fmt is None:
146
+ if has_chinese_chars(
147
+ [self.name_for_human, self.name, self.description, self.parameters]
148
+ ):
149
+ fmt = "此工具的输入应为JSON对象。"
150
+ else:
151
+ fmt = "Format the arguments as a JSON object."
152
+ return fmt
153
+
154
+ @property
155
+ def file_access(self) -> bool:
156
+ return False
fara/qwen_helpers/fncall_prompt.py ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Source: https://github.com/QwenLM/Qwen-Agent/blob/main/qwen_agent/llm/fncall_prompts/nous_fncall_prompt.py
2
+
3
+ import copy
4
+ import json
5
+ import os
6
+
7
+ from typing import List, Literal, Union
8
+
9
+ from .schema import ASSISTANT, FUNCTION, SYSTEM, USER, ContentItem, Message
10
+
11
+
12
+ class NousFnCallPrompt:
13
+ def __init__(self, template_name: str = "default"):
14
+ """Initialize NousFnCallPrompt with a specific template.
15
+
16
+ Args:
17
+ template_name: Name of the template to use. Options:
18
+ "default", "qwen", "with_ci"
19
+ """
20
+ self.template_name = template_name
21
+ self.template_map = {
22
+ "default": FN_CALL_TEMPLATE,
23
+ "qwen": FN_CALL_TEMPLATE_QWEN,
24
+ "with_ci": FN_CALL_TEMPLATE_WITH_CI,
25
+ }
26
+
27
+ if template_name not in self.template_map:
28
+ raise ValueError(
29
+ f"Unknown template_name: {template_name}. "
30
+ f"Available options: {list(self.template_map.keys())}"
31
+ )
32
+
33
+ def preprocess_fncall_messages(
34
+ self,
35
+ messages: List[Message],
36
+ functions: List[dict],
37
+ lang: Literal["en", "zh"],
38
+ parallel_function_calls: bool = True,
39
+ function_choice: Union[Literal["auto"], str] = "auto",
40
+ ) -> List[Message]:
41
+ del lang # ignored
42
+ del parallel_function_calls # ignored
43
+ if function_choice != "auto":
44
+ raise NotImplementedError
45
+
46
+ ori_messages = messages
47
+
48
+ # Change function_call responses to plaintext responses:
49
+ messages = []
50
+ for msg in copy.deepcopy(ori_messages):
51
+ role, content, reasoning_content = (
52
+ msg.role,
53
+ msg.content,
54
+ msg.reasoning_content,
55
+ )
56
+ if role in (SYSTEM, USER):
57
+ messages.append(msg)
58
+ elif role == ASSISTANT:
59
+ content = content or []
60
+ fn_call = msg.function_call
61
+ if fn_call:
62
+ if (not SPECIAL_CODE_MODE) or (
63
+ CODE_TOOL_PATTERN not in fn_call.name
64
+ ):
65
+ fc = {
66
+ "name": fn_call.name,
67
+ "arguments": json.loads(fn_call.arguments),
68
+ }
69
+ fc = json.dumps(fc, ensure_ascii=False)
70
+ fc = f"<tool_call>\n{fc}\n</tool_call>"
71
+ else:
72
+ para = json.loads(fn_call.arguments)
73
+ code = para["code"]
74
+ para["code"] = ""
75
+ fc = {"name": fn_call.name, "arguments": para}
76
+ fc = json.dumps(fc, ensure_ascii=False)
77
+ fc = f"<tool_call>\n{fc}\n<code>\n{code}\n</code>\n</tool_call>"
78
+
79
+ content.append(ContentItem(text=fc))
80
+ if messages[-1].role == ASSISTANT:
81
+ messages[-1].content.append(ContentItem(text="\n"))
82
+ messages[-1].content.extend(content)
83
+ else:
84
+ # TODO: Assuming there will only be one continuous reasoning_content here
85
+ messages.append(
86
+ Message(
87
+ role=role,
88
+ content=content,
89
+ reasoning_content=reasoning_content,
90
+ )
91
+ )
92
+ elif role == FUNCTION:
93
+ assert isinstance(content, list)
94
+ assert len(content) == 1
95
+ assert content[0].text
96
+ fc = f"<tool_response>\n{content[0].text}\n</tool_response>"
97
+ content = [ContentItem(text=fc)]
98
+ if messages[-1].role == USER:
99
+ messages[-1].content.append(ContentItem(text="\n"))
100
+ messages[-1].content.extend(content)
101
+ else:
102
+ messages.append(Message(role=USER, content=content))
103
+ else:
104
+ raise TypeError
105
+
106
+ tool_descs = [{"type": "function", "function": f} for f in functions]
107
+ tool_names = [
108
+ function.get("name_for_model", function.get("name", ""))
109
+ for function in functions
110
+ ]
111
+ tool_descs = "\n".join([json.dumps(f, ensure_ascii=False) for f in tool_descs])
112
+
113
+ # Select template based on configuration
114
+ if SPECIAL_CODE_MODE and any([CODE_TOOL_PATTERN in x for x in tool_names]):
115
+ selected_template = FN_CALL_TEMPLATE_WITH_CI
116
+ else:
117
+ selected_template = self.template_map[self.template_name]
118
+
119
+ tool_system = selected_template.format(tool_descs=tool_descs)
120
+ if messages[0].role == SYSTEM:
121
+ messages[0].content.append(ContentItem(text="\n\n" + tool_system))
122
+ else:
123
+ messages = [
124
+ Message(role=SYSTEM, content=[ContentItem(text=tool_system)])
125
+ ] + messages
126
+ return messages
127
+
128
+
129
+ FN_CALL_TEMPLATE_QWEN = """# Tools
130
+
131
+ You may call one or more functions to assist with the user query.
132
+
133
+ You are provided with function signatures within <tools></tools> XML tags:
134
+ <tools>
135
+ {tool_descs}
136
+ </tools>
137
+
138
+ For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
139
+ <tool_call>
140
+ {{"name": <function-name>, "arguments": <args-json-object>}}
141
+ </tool_call>"""
142
+
143
+ FN_CALL_TEMPLATE = """You are a web automation agent that performs actions on websites to fulfill user requests by calling various tools.
144
+ * You should stop execution at Critical Points. A Critical Point would be encountered in tasks like 'Checkout', 'Book', 'Purchase', 'Call', 'Email', 'Order', etc where a binding transaction/agreement would require the user's permission/personal or sensitive information (name, email, credit card, address, payment information, resume, etc) in order to complete a transaction (purchase, reservation, sign-up etc), or to communicate in a way that a human would be expected to do (call, email, apply to a job, etc).
145
+ * Solve the task as far as you can up until a Critical Point:
146
+ - For example, if the task is to "call a restaurant to make a reservation", you should not actually make the call but should navigate to the restaurant's page and find the phone number.
147
+ - Similarly, if the task is to "order new size 12 running shoes" you should not actually place the order but should instead search for the right shoes that meet the criteria and add them to the cart.
148
+ - Some tasks, like answering questions, may not encounter a Critical Point at all.
149
+
150
+ You are provided with function signatures within <tools></tools> XML tags:
151
+ <tools>
152
+ {tool_descs}
153
+ </tools>
154
+
155
+ For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
156
+ <tool_call>
157
+ {{"name": <function-name>, "arguments": <args-json-object>}}
158
+ </tool_call>"""
159
+
160
+
161
+ SPECIAL_CODE_MODE = os.getenv("SPECIAL_CODE_MODE", "false").lower() == "true"
162
+ CODE_TOOL_PATTERN = "code_interpreter"
163
+ FN_CALL_TEMPLATE_WITH_CI = """# Tools
164
+
165
+ You may call one or more functions to assist with the user query.
166
+
167
+ You are provided with function signatures within <tools></tools> XML tags:
168
+ <tools>
169
+ {tool_descs}
170
+ </tools>
171
+
172
+ For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
173
+ <tool_call>
174
+ {{"name": <function-name>, "arguments": <args-json-object>}}
175
+ </tool_call>
176
+ For code parameters, use placeholders first, and then put the code within <code></code> XML tags, such as:
177
+ <tool_call>
178
+ {{"name": <function-name>, "arguments": {{"code": ""}}}}
179
+ <code>
180
+ Here is the code.
181
+ </code>
182
+ </tool_call>"""
183
+
184
+
185
+ # Mainly for removing incomplete special tokens when streaming the output
186
+ # This assumes that '<tool_call>\n{"name": "' is the special token for the NousFnCallPrompt
187
+ def remove_incomplete_special_tokens(text: str) -> str:
188
+ if text in '<tool_call>\n{"name": "':
189
+ text = ""
190
+ return text
191
+
192
+
193
+ def extract_fn(text: str):
194
+ fn_name, fn_args = "", ""
195
+ fn_name_s = '"name": "'
196
+ fn_name_e = '", "'
197
+ fn_args_s = '"arguments": '
198
+ i = text.find(fn_name_s)
199
+ k = text.find(fn_args_s)
200
+ if i > 0:
201
+ _text = text[i + len(fn_name_s) :]
202
+ j = _text.find(fn_name_e)
203
+ if j > -1:
204
+ fn_name = _text[:j]
205
+ if k > 0:
206
+ fn_args = text[k + len(fn_args_s) :]
207
+
208
+ if len(fn_args) > 5:
209
+ fn_args = fn_args[:-5]
210
+ else:
211
+ fn_args = ""
212
+ return fn_name, fn_args
fara/qwen_helpers/schema.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Source: https://github.com/QwenLM/Qwen-Agent/blob/main/qwen_agent/llm/schema.py
2
+
3
+ from typing import List, Literal, Optional, Tuple, Union
4
+
5
+ from pydantic import BaseModel, field_validator, model_validator
6
+
7
+
8
+ DEFAULT_SYSTEM_MESSAGE = "You are a helpful assistant."
9
+
10
+ ROLE = "role"
11
+ CONTENT = "content"
12
+ REASONING_CONTENT = "reasoning_content"
13
+ NAME = "name"
14
+
15
+ SYSTEM = "system"
16
+ USER = "user"
17
+ ASSISTANT = "assistant"
18
+ FUNCTION = "function"
19
+
20
+ FILE = "file"
21
+ IMAGE = "image"
22
+ AUDIO = "audio"
23
+ VIDEO = "video"
24
+
25
+
26
+ class BaseModelCompatibleDict(BaseModel):
27
+ def __getitem__(self, item):
28
+ return getattr(self, item)
29
+
30
+ def __setitem__(self, key, value):
31
+ setattr(self, key, value)
32
+
33
+ def model_dump(self, **kwargs):
34
+ if "exclude_none" not in kwargs:
35
+ kwargs["exclude_none"] = True
36
+ return super().model_dump(**kwargs)
37
+
38
+ def model_dump_json(self, **kwargs):
39
+ if "exclude_none" not in kwargs:
40
+ kwargs["exclude_none"] = True
41
+ return super().model_dump_json(**kwargs)
42
+
43
+ def get(self, key, default=None):
44
+ try:
45
+ value = getattr(self, key)
46
+ if value:
47
+ return value
48
+ else:
49
+ return default
50
+ except AttributeError:
51
+ return default
52
+
53
+ def __str__(self):
54
+ return f"{self.model_dump()}"
55
+
56
+
57
+ class FunctionCall(BaseModelCompatibleDict):
58
+ name: str
59
+ arguments: str
60
+
61
+ def __init__(self, name: str, arguments: str):
62
+ super().__init__(name=name, arguments=arguments)
63
+
64
+ def __repr__(self):
65
+ return f"FunctionCall({self.model_dump()})"
66
+
67
+
68
+ class ContentItem(BaseModelCompatibleDict):
69
+ text: Optional[str] = None
70
+ image: Optional[str] = None
71
+ file: Optional[str] = None
72
+ audio: Optional[Union[str, dict]] = None
73
+ video: Optional[Union[str, list]] = None
74
+
75
+ def __init__(
76
+ self,
77
+ text: Optional[str] = None,
78
+ image: Optional[str] = None,
79
+ file: Optional[str] = None,
80
+ audio: Optional[Union[str, dict]] = None,
81
+ video: Optional[Union[str, list]] = None,
82
+ ):
83
+ super().__init__(text=text, image=image, file=file, audio=audio, video=video)
84
+
85
+ @model_validator(mode="after")
86
+ def check_exclusivity(self):
87
+ provided_fields = 0
88
+ if self.text is not None:
89
+ provided_fields += 1
90
+ if self.image:
91
+ provided_fields += 1
92
+ if self.file:
93
+ provided_fields += 1
94
+ if self.audio:
95
+ provided_fields += 1
96
+ if self.video:
97
+ provided_fields += 1
98
+
99
+ if provided_fields != 1:
100
+ raise ValueError(
101
+ "Exactly one of 'text', 'image', 'file', 'audio', or 'video' must be provided."
102
+ )
103
+ return self
104
+
105
+ def __repr__(self):
106
+ return f"ContentItem({self.model_dump()})"
107
+
108
+ def get_type_and_value(
109
+ self,
110
+ ) -> Tuple[Literal["text", "image", "file", "audio", "video"], str]:
111
+ ((t, v),) = self.model_dump().items()
112
+ assert t in ("text", "image", "file", "audio", "video")
113
+ return t, v
114
+
115
+ @property
116
+ def type(self) -> Literal["text", "image", "file", "audio", "video"]:
117
+ t, v = self.get_type_and_value()
118
+ return t
119
+
120
+ @property
121
+ def value(self) -> str:
122
+ t, v = self.get_type_and_value()
123
+ return v
124
+
125
+
126
+ class Message(BaseModelCompatibleDict):
127
+ role: str
128
+ content: Union[str, List[ContentItem]]
129
+ reasoning_content: Optional[Union[str, List[ContentItem]]] = None
130
+ name: Optional[str] = None
131
+ function_call: Optional[FunctionCall] = None
132
+ extra: Optional[dict] = None
133
+
134
+ def __init__(
135
+ self,
136
+ role: str,
137
+ content: Union[str, List[ContentItem]],
138
+ reasoning_content: Optional[Union[str, List[ContentItem]]] = None,
139
+ name: Optional[str] = None,
140
+ function_call: Optional[FunctionCall] = None,
141
+ extra: Optional[dict] = None,
142
+ **kwargs,
143
+ ):
144
+ if content is None:
145
+ content = ""
146
+ if reasoning_content is None:
147
+ reasoning_content = ""
148
+ super().__init__(
149
+ role=role,
150
+ content=content,
151
+ reasoning_content=reasoning_content,
152
+ name=name,
153
+ function_call=function_call,
154
+ extra=extra,
155
+ )
156
+
157
+ def __repr__(self):
158
+ return f"Message({self.model_dump()})"
159
+
160
+ @field_validator("role")
161
+ def role_checker(cls, value: str) -> str:
162
+ if value not in [USER, ASSISTANT, SYSTEM, FUNCTION]:
163
+ raise ValueError(
164
+ f'{value} must be one of {",".join([USER, ASSISTANT, SYSTEM, FUNCTION])}'
165
+ )
166
+ return value
fara/qwen_helpers/utils.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Source: https://github.com/QwenLM/Qwen-Agent/blob/main/qwen_agent/utils/utils.py
2
+
3
+ import json
4
+ import re
5
+
6
+ from typing import Any
7
+
8
+
9
+ CHINESE_CHAR_RE = re.compile(r"[\u4e00-\u9fff]")
10
+
11
+
12
+ def has_chinese_chars(data: Any) -> bool:
13
+ text = f"{data}"
14
+ return bool(CHINESE_CHAR_RE.search(text))
15
+
16
+
17
+ def json_loads(text: str) -> dict:
18
+ text = text.strip("\n")
19
+ if text.startswith("```") and text.endswith("\n```"):
20
+ text = "\n".join(text.split("\n")[1:-1])
21
+ try:
22
+ return json.loads(text)
23
+ except json.decoder.JSONDecodeError as json_err:
24
+ raise json_err
fara/run_fara.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import argparse
3
+ import os
4
+ from fara import FaraAgent
5
+ from fara.browser.browser_bb import BrowserBB
6
+ import logging
7
+ from typing import Dict
8
+ from pathlib import Path
9
+ import json
10
+
11
+
12
+ # Configure logging to only show logs from fara.fara_agent
13
+ logging.basicConfig(
14
+ level=logging.CRITICAL,
15
+ format="%(message)s",
16
+ )
17
+
18
+ # Enable INFO level only for fara.fara_agent
19
+ fara_agent_logger = logging.getLogger("fara.fara_agent")
20
+ fara_agent_logger.setLevel(logging.INFO)
21
+
22
+ # Add a handler to ensure fara_agent logs are shown
23
+ handler = logging.StreamHandler()
24
+ handler.setLevel(logging.INFO)
25
+ handler.setFormatter(logging.Formatter("%(message)s"))
26
+ fara_agent_logger.addHandler(handler)
27
+ fara_agent_logger.propagate = False # Don't propagate to root logger
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ DEFAULT_ENDPOINT_CONFIG = {
33
+ "model": "microsoft/Fara-7B",
34
+ "base_url": "http://localhost:5000/v1",
35
+ "api_key": "not-needed",
36
+ }
37
+
38
+
39
+ async def run_fara_agent(
40
+ initial_task: str = None,
41
+ endpoint_config: Dict[str, str] = None,
42
+ start_page: str = "https://www.bing.com/",
43
+ headless: bool = True,
44
+ downloads_folder: str = None,
45
+ save_screenshots: bool = True,
46
+ max_rounds: int = 100,
47
+ use_browser_base: bool = False,
48
+ ):
49
+ # Initialize browser manager
50
+ print("Initializing Browser...")
51
+ browser_manager = BrowserBB(
52
+ headless=headless,
53
+ viewport_height=900,
54
+ viewport_width=1440,
55
+ page_script_path=None,
56
+ browser_channel="firefox",
57
+ browser_data_dir=None,
58
+ downloads_folder=downloads_folder,
59
+ to_resize_viewport=True,
60
+ single_tab_mode=True,
61
+ animate_actions=False,
62
+ use_browser_base=use_browser_base,
63
+ logger=logger,
64
+ )
65
+ print("Browser Running... Starting Fara Agent...")
66
+
67
+ agent = FaraAgent(
68
+ browser_manager=browser_manager,
69
+ client_config=endpoint_config,
70
+ start_page=start_page,
71
+ downloads_folder=downloads_folder,
72
+ save_screenshots=save_screenshots,
73
+ max_rounds=max_rounds,
74
+ )
75
+
76
+ try:
77
+ await agent.initialize()
78
+
79
+ # Interactive loop
80
+ task = initial_task
81
+ first_round = True
82
+
83
+ while True:
84
+ if task is None:
85
+ if first_round:
86
+ task = input("Enter task: ").strip()
87
+ else:
88
+ task = input(
89
+ "\nEnter another task (or press Enter to exit): "
90
+ ).strip()
91
+
92
+ if not task:
93
+ print("Exiting...")
94
+ break
95
+
96
+ print("##########################################")
97
+ print(f"Task: {task}")
98
+ print("##########################################")
99
+
100
+ try:
101
+ print("Running Fara...\n")
102
+ final_answer, all_actions, all_observations = await agent.run(task)
103
+ print(f"\nFinal Answer: {final_answer}")
104
+ except Exception as e:
105
+ print(f"Error occurred: {e}")
106
+ task = None
107
+ first_round = False
108
+
109
+ finally:
110
+ # Close the agent and browser
111
+ await agent.close()
112
+
113
+
114
+ def main():
115
+ """CLI entry point for fara command."""
116
+ parser = argparse.ArgumentParser(description="Run FARA agent interactively")
117
+ parser.add_argument(
118
+ "--task",
119
+ type=str,
120
+ required=False,
121
+ help="Initial task for the FARA agent (optional)",
122
+ )
123
+ parser.add_argument(
124
+ "--start_page",
125
+ type=str,
126
+ default="https://www.bing.com/",
127
+ help="The starting page",
128
+ )
129
+ parser.add_argument(
130
+ "--headful",
131
+ action="store_true",
132
+ help="Run the browser in headful mode (show GUI, default is headless)",
133
+ )
134
+ parser.add_argument(
135
+ "--downloads_folder",
136
+ type=str,
137
+ default=None,
138
+ help="Folder to save screenshots and downloads",
139
+ )
140
+ parser.add_argument(
141
+ "--save_screenshots",
142
+ action="store_true",
143
+ help="Whether to save screenshots during the agent's operation",
144
+ )
145
+ parser.add_argument(
146
+ "--max_rounds",
147
+ type=int,
148
+ default=100,
149
+ help="Maximum number of rounds for the agent to run",
150
+ )
151
+ parser.add_argument(
152
+ "--browserbase",
153
+ action="store_true",
154
+ help="Whether to use BrowserBase for browser management",
155
+ )
156
+ parser.add_argument(
157
+ "--endpoint_config",
158
+ type=Path,
159
+ default=None,
160
+ help="Path to the endpoint configuration JSON file. By default, tries local vllm on 5000 port",
161
+ )
162
+
163
+ args = parser.parse_args()
164
+
165
+ if args.browserbase:
166
+ assert os.environ.get("BROWSERBASE_API_KEY"), (
167
+ "BROWSERBASE_API_KEY environment variable must be set to use browserbase"
168
+ )
169
+ assert os.environ.get("BROWSERBASE_PROJECT_ID"), (
170
+ "BROWSERBASE_API_KEY and BROWSERBASE_PROJECT_ID environment variables must be set to use browserbase"
171
+ )
172
+
173
+ endpoint_config = DEFAULT_ENDPOINT_CONFIG
174
+ if args.endpoint_config:
175
+ with open(args.endpoint_config, "r") as f:
176
+ endpoint_config = json.load(f)
177
+
178
+ asyncio.run(
179
+ run_fara_agent(
180
+ initial_task=args.task,
181
+ endpoint_config=endpoint_config,
182
+ start_page=args.start_page,
183
+ headless=not args.headful,
184
+ downloads_folder=args.downloads_folder,
185
+ save_screenshots=args.save_screenshots,
186
+ max_rounds=args.max_rounds,
187
+ use_browser_base=args.browserbase,
188
+ )
189
+ )
190
+
191
+
192
+ if __name__ == "__main__":
193
+ main()
fara/types.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import base64
3
+ from dataclasses import dataclass, field
4
+ from typing import Any, List, Tuple, Dict
5
+ from PIL import Image
6
+
7
+
8
+ @dataclass
9
+ class LLMMessage:
10
+ content: str | List[Dict[str, Any]]
11
+ source: str = "user"
12
+
13
+
14
+ @dataclass
15
+ class SystemMessage(LLMMessage):
16
+ def __init__(self, content: str, source: str = "system"):
17
+ self.content = content
18
+ self.source = source
19
+
20
+
21
+ @dataclass
22
+ class UserMessage(LLMMessage):
23
+ def __init__(
24
+ self,
25
+ content: str | List[Dict[str, Any]],
26
+ source: str = "user",
27
+ is_original: bool = False,
28
+ ):
29
+ self.content = content
30
+ self.source = source
31
+ self.is_original = is_original
32
+
33
+
34
+ @dataclass
35
+ class AssistantMessage(LLMMessage):
36
+ def __init__(self, content: str, source: str = "assistant"):
37
+ self.content = content
38
+ self.source = source
39
+
40
+
41
+ @dataclass
42
+ class ImageObj:
43
+ """Image wrapper for handling screenshots and images"""
44
+
45
+ image: Image.Image
46
+
47
+ @classmethod
48
+ def from_pil(cls, image: Image.Image) -> "ImageObj":
49
+ return cls(image=image)
50
+
51
+ def to_base64(self) -> str:
52
+ """Convert PIL image to base64 string"""
53
+ buffered = io.BytesIO()
54
+ self.image.save(buffered, format="PNG")
55
+ return base64.b64encode(buffered.getvalue()).decode("utf-8")
56
+
57
+ def resize(self, size: Tuple[int, int]) -> Image.Image:
58
+ """Resize the image"""
59
+ return self.image.resize(size)
60
+
61
+
62
+ @dataclass
63
+ class ModelResponse:
64
+ """Response from model call"""
65
+
66
+ content: str
67
+ usage: Dict[str, Any] = field(default_factory=dict)
68
+
69
+
70
+ @dataclass
71
+ class FunctionCall:
72
+ """Represents a function call with arguments"""
73
+
74
+ id: str
75
+ name: str
76
+ arguments: Dict[str, Any]
77
+
78
+
79
+ def message_to_openai_format(message: LLMMessage) -> Dict[str, Any]:
80
+ """Convert our LLMMessage to OpenAI API format"""
81
+ role = (
82
+ "system"
83
+ if isinstance(message, SystemMessage)
84
+ else "assistant"
85
+ if isinstance(message, AssistantMessage)
86
+ else "user"
87
+ )
88
+
89
+ # Handle multimodal content (text + images)
90
+ if isinstance(message.content, list):
91
+ content_parts = []
92
+ for item in message.content:
93
+ if isinstance(item, ImageObj):
94
+ # Convert image to base64 data URL
95
+ base64_image = item.to_base64()
96
+ content_parts.append(
97
+ {
98
+ "type": "image_url",
99
+ "image_url": {"url": f"data:image/png;base64,{base64_image}"},
100
+ }
101
+ )
102
+ elif isinstance(item, str):
103
+ content_parts.append({"type": "text", "text": item})
104
+ elif isinstance(item, dict):
105
+ # Already in proper format
106
+ content_parts.append(item)
107
+ return {"role": role, "content": content_parts}
108
+ else:
109
+ # Simple text content
110
+ return {"role": role, "content": message.content}
111
+
112
+
113
+ @dataclass
114
+ class WebSurferEvent:
115
+ source: str
116
+ message: str
117
+ url: str
118
+ action: str | None = None
119
+ arguments: Dict[str, Any] | None = None
fara/utils.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ def strip_url_query(url):
2
+ return url.split("?", 1)[0]
3
+
4
+
5
+ def get_trimmed_url(url, max_len):
6
+ trimmed_url = strip_url_query(url)
7
+ if len(trimmed_url) > max_len:
8
+ trimmed_url = trimmed_url[:max_len] + " ..."
9
+ return trimmed_url
index.html ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!doctype html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
+ <title>Fara CUA</title>
7
+ </head>
8
+ <body>
9
+ <div id="root"></div>
10
+ <script type="module" src="/src/main.tsx"></script>
11
+ </body>
12
+ </html>
nginx.conf ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ worker_processes auto;
2
+ error_log /var/log/nginx/error.log warn;
3
+ pid /tmp/nginx.pid;
4
+
5
+ events {
6
+ worker_connections 1024;
7
+ }
8
+
9
+ http {
10
+ include /etc/nginx/mime.types;
11
+ default_type application/octet-stream;
12
+
13
+ log_format main '$remote_addr - $remote_user [$time_local] "$request" '
14
+ '$status $body_bytes_sent "$http_referer" '
15
+ '"$http_user_agent" "$http_x_forwarded_for"';
16
+
17
+ access_log /var/log/nginx/access.log main;
18
+
19
+ sendfile on;
20
+ keepalive_timeout 65;
21
+
22
+ # Temp paths for non-root user
23
+ client_body_temp_path /tmp/client_temp;
24
+ proxy_temp_path /tmp/proxy_temp_path;
25
+ fastcgi_temp_path /tmp/fastcgi_temp;
26
+ uwsgi_temp_path /tmp/uwsgi_temp;
27
+ scgi_temp_path /tmp/scgi_temp;
28
+
29
+ upstream backend {
30
+ server 127.0.0.1:8000;
31
+ }
32
+
33
+ server {
34
+ listen 7860;
35
+ server_name localhost;
36
+
37
+ root /app/static;
38
+ index index.html;
39
+
40
+ # API endpoints proxy to Python backend
41
+ location /api/ {
42
+ proxy_pass http://backend/api/;
43
+ proxy_http_version 1.1;
44
+ proxy_set_header Upgrade $http_upgrade;
45
+ proxy_set_header Connection "upgrade";
46
+ proxy_set_header Host $host;
47
+ proxy_set_header X-Real-IP $remote_addr;
48
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
49
+ proxy_set_header X-Forwarded-Proto $scheme;
50
+ proxy_read_timeout 300s;
51
+ proxy_connect_timeout 75s;
52
+ }
53
+
54
+ # WebSocket endpoint
55
+ location /ws {
56
+ proxy_pass http://backend/ws;
57
+ proxy_http_version 1.1;
58
+ proxy_set_header Upgrade $http_upgrade;
59
+ proxy_set_header Connection "upgrade";
60
+ proxy_set_header Host $host;
61
+ proxy_set_header X-Real-IP $remote_addr;
62
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
63
+ proxy_set_header X-Forwarded-Proto $scheme;
64
+ proxy_read_timeout 86400;
65
+ proxy_send_timeout 86400;
66
+ }
67
+
68
+ # Serve static files and SPA fallback
69
+ location / {
70
+ try_files $uri $uri/ /index.html;
71
+ }
72
+
73
+ # Cache static assets
74
+ location ~* \.(js|css|png|jpg|jpeg|gif|ico|svg|woff|woff2|ttf|eot)$ {
75
+ expires 1y;
76
+ add_header Cache-Control "public, immutable";
77
+ }
78
+ }
79
+ }
package-lock.json ADDED
The diff for this file is too large to render. See raw diff
 
package.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "fara-cua-front",
3
+ "private": true,
4
+ "version": "0.0.0",
5
+ "type": "module",
6
+ "scripts": {
7
+ "dev": "vite",
8
+ "build": "vite build",
9
+ "build:dev": "vite build --mode development",
10
+ "lint": "eslint src/ --config eslint.config.js",
11
+ "type-check": "tsc --noEmit --project tsconfig.json",
12
+ "preview": "vite preview"
13
+ },
14
+ "dependencies": {
15
+ "@emotion/react": "^11.14.0",
16
+ "@emotion/styled": "^11.14.1",
17
+ "@mui/icons-material": "^7.3.4",
18
+ "@mui/lab": "^7.0.1-beta.19",
19
+ "@mui/material": "^7.3.4",
20
+ "gifshot": "^0.4.5",
21
+ "react": "^18.3.1",
22
+ "react-dom": "^18.3.1",
23
+ "react-router-dom": "^6.30.1",
24
+ "ulid": "^3.0.1",
25
+ "zustand": "^5.0.8"
26
+ },
27
+ "devDependencies": {
28
+ "@eslint/js": "^9.38.0",
29
+ "@types/node": "^22.16.5",
30
+ "@types/react": "^18.3.23",
31
+ "@types/react-dom": "^18.3.7",
32
+ "@vitejs/plugin-react-swc": "^3.11.0",
33
+ "autoprefixer": "^10.4.21",
34
+ "eslint": "^9.32.0",
35
+ "eslint-plugin-react-hooks": "^5.2.0",
36
+ "eslint-plugin-react-refresh": "^0.4.20",
37
+ "globals": "^15.15.0",
38
+ "typescript-eslint": "^8.38.0",
39
+ "vite": "^5.4.19"
40
+ }
41
+ }
src/App.tsx ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { CssBaseline, ThemeProvider } from '@mui/material';
2
+ import { useMemo } from 'react';
3
+ import { BrowserRouter, Route, Routes } from "react-router-dom";
4
+ import { getWebSocketUrl } from './config/api';
5
+ import { useAgentWebSocket } from './hooks/useAgentWebSocket';
6
+ import Task from "./pages/Task";
7
+ import Welcome from "./pages/Welcome";
8
+ import { selectIsDarkMode, useAgentStore } from './stores/agentStore';
9
+ import getTheme from './theme';
10
+
11
+ const App = () => {
12
+ const isDarkMode = useAgentStore(selectIsDarkMode);
13
+ const theme = useMemo(() => getTheme(isDarkMode ? 'dark' : 'light'), [isDarkMode]);
14
+
15
+ // Initialize WebSocket connection at app level so it persists across route changes
16
+ const { stopCurrentTask } = useAgentWebSocket({ url: getWebSocketUrl() });
17
+
18
+ // Store functions in window for global access
19
+ (window as Window & { __stopCurrentTask?: () => void }).__stopCurrentTask = stopCurrentTask;
20
+
21
+
22
+ return (
23
+ <ThemeProvider theme={theme}>
24
+ <CssBaseline />
25
+ <BrowserRouter>
26
+ <Routes>
27
+ <Route path="/" element={<Welcome />} />
28
+ <Route path="/task" element={<Task />} />
29
+ </Routes>
30
+ </BrowserRouter>
31
+ </ThemeProvider>
32
+ );
33
+ };
34
+
35
+ export default App;
src/components/ConnectionStatus.tsx ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from 'react';
2
+ import { Box, Chip, keyframes } from '@mui/material';
3
+ import CircleIcon from '@mui/icons-material/Circle';
4
+
5
+ interface ConnectionStatusProps {
6
+ isConnected: boolean;
7
+ }
8
+
9
+ // Pulse animation for connected indicator
10
+ const pulse = keyframes`
11
+ 0%, 100% {
12
+ opacity: 1;
13
+ }
14
+ 50% {
15
+ opacity: 0.5;
16
+ }
17
+ `;
18
+
19
+ export const ConnectionStatus: React.FC<ConnectionStatusProps> = ({ isConnected }) => {
20
+ return (
21
+ <Chip
22
+ label={isConnected ? 'Backend Online' : 'Backend Offline'}
23
+ deleteIcon={
24
+ <CircleIcon
25
+ sx={{
26
+ fontSize: 6,
27
+ animation: isConnected ? `${pulse} 2s ease-in-out infinite` : 'none',
28
+ }}
29
+ />
30
+ }
31
+ onDelete={() => {}} // Required for deleteIcon to show
32
+ size="small"
33
+ sx={{
34
+ backgroundColor: 'action.hover',
35
+ border: '1px solid',
36
+ borderColor: 'divider',
37
+ color: 'text.primary',
38
+ fontSize: '0.7rem',
39
+ fontWeight: 500,
40
+ height: 'auto',
41
+ '& .MuiChip-label': {
42
+ px: 1,
43
+ py: 0.5,
44
+ },
45
+ '& .MuiChip-deleteIcon': {
46
+ color: isConnected ? '#10b981' : '#ef4444',
47
+ marginRight: 0.5,
48
+ '&:hover': {
49
+ color: isConnected ? '#10b981' : '#ef4444',
50
+ },
51
+ },
52
+ }}
53
+ />
54
+ );
55
+ };
src/components/Header.tsx ADDED
@@ -0,0 +1,450 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useState, useEffect, useRef } from 'react';
2
+ import { AppBar, Toolbar, Box, Typography, Chip, IconButton, CircularProgress, keyframes, Button } from '@mui/material';
3
+ import ArrowBackIcon from '@mui/icons-material/ArrowBack';
4
+ import LightModeOutlined from '@mui/icons-material/LightModeOutlined';
5
+ import DarkModeOutlined from '@mui/icons-material/DarkModeOutlined';
6
+ import CheckIcon from '@mui/icons-material/Check';
7
+ import CloseIcon from '@mui/icons-material/Close';
8
+ import AccessTimeIcon from '@mui/icons-material/AccessTime';
9
+ import InputIcon from '@mui/icons-material/Input';
10
+ import OutputIcon from '@mui/icons-material/Output';
11
+ import SmartToyIcon from '@mui/icons-material/SmartToy';
12
+ import FormatListNumberedIcon from '@mui/icons-material/FormatListNumbered';
13
+ import HourglassEmptyIcon from '@mui/icons-material/HourglassEmpty';
14
+ import StopCircleIcon from '@mui/icons-material/StopCircle';
15
+ import { useAgentStore, selectTrace, selectError, selectIsDarkMode, selectMetadata, selectIsConnectingToE2B, selectFinalStep } from '@/stores/agentStore';
16
+
17
+ interface HeaderProps {
18
+ isAgentProcessing: boolean;
19
+ onBackToHome?: () => void;
20
+ }
21
+
22
+ // Animation for the running task border - smooth oscillation (primary)
23
+ const borderPulse = keyframes`
24
+ 0%, 100% {
25
+ border-color: rgba(79, 134, 198, 0.5);
26
+ box-shadow: 0 0 0 0 rgba(79, 134, 198, 0.3);
27
+ }
28
+ 50% {
29
+ border-color: rgba(79, 134, 198, 1);
30
+ box-shadow: 0 0 8px 2px rgba(79, 134, 198, 0.4);
31
+ }
32
+ `;
33
+
34
+ // Animation for the background glow (primary)
35
+ const backgroundPulse = keyframes`
36
+ 0%, 100% {
37
+ background-color: rgba(79, 134, 198, 0.08);
38
+ }
39
+ 50% {
40
+ background-color: rgba(79, 134, 198, 0.15);
41
+ }
42
+ `;
43
+
44
+ // Animation for token flash - smooth glow effect
45
+ const tokenFlash = keyframes`
46
+ 0% {
47
+ filter: brightness(1);
48
+ text-shadow: none;
49
+ }
50
+ 25% {
51
+ filter: brightness(1.4);
52
+ text-shadow: 0 0 8px rgba(79, 134, 198, 0.6);
53
+ }
54
+ 100% {
55
+ filter: brightness(1);
56
+ text-shadow: none;
57
+ }
58
+ `;
59
+
60
+ // Animation for token icon flash
61
+ const iconFlash = keyframes`
62
+ 0% {
63
+ filter: brightness(1);
64
+ transform: scale(1);
65
+ }
66
+ 25% {
67
+ filter: brightness(1.6);
68
+ transform: scale(1.15);
69
+ }
70
+ 100% {
71
+ filter: brightness(1);
72
+ transform: scale(1);
73
+ }
74
+ `;
75
+
76
+ export const Header: React.FC<HeaderProps> = ({ isAgentProcessing, onBackToHome }) => {
77
+ const trace = useAgentStore(selectTrace);
78
+ const error = useAgentStore(selectError);
79
+ const finalStep = useAgentStore(selectFinalStep);
80
+ const isDarkMode = useAgentStore(selectIsDarkMode);
81
+ const toggleDarkMode = useAgentStore((state) => state.toggleDarkMode);
82
+ const metadata = useAgentStore(selectMetadata);
83
+ const isConnectingToE2B = useAgentStore(selectIsConnectingToE2B);
84
+ const [elapsedTime, setElapsedTime] = useState(0);
85
+ const [inputTokenFlash, setInputTokenFlash] = useState(false);
86
+ const [outputTokenFlash, setOutputTokenFlash] = useState(false);
87
+ const prevInputTokens = useRef(0);
88
+ const prevOutputTokens = useRef(0);
89
+
90
+ // Update elapsed time every 100ms when agent is processing
91
+ useEffect(() => {
92
+ if (isAgentProcessing && trace?.timestamp) {
93
+ const interval = setInterval(() => {
94
+ const now = new Date();
95
+ const startTime = new Date(trace.timestamp);
96
+ const elapsed = (now.getTime() - startTime.getTime()) / 1000;
97
+ setElapsedTime(elapsed);
98
+ }, 100);
99
+
100
+ return () => clearInterval(interval);
101
+ } else if (metadata && metadata.duration > 0) {
102
+ setElapsedTime(metadata.duration);
103
+ }
104
+ }, [isAgentProcessing, trace?.timestamp, metadata]);
105
+
106
+ // Detect token changes and trigger flash animation
107
+ useEffect(() => {
108
+ if (metadata) {
109
+ // Input tokens changed
110
+ if (metadata.inputTokensUsed > prevInputTokens.current && prevInputTokens.current > 0) {
111
+ setInputTokenFlash(true);
112
+ setTimeout(() => setInputTokenFlash(false), 800);
113
+ }
114
+ prevInputTokens.current = metadata.inputTokensUsed;
115
+
116
+ // Output tokens changed
117
+ if (metadata.outputTokensUsed > prevOutputTokens.current && prevOutputTokens.current > 0) {
118
+ setOutputTokenFlash(true);
119
+ setTimeout(() => setOutputTokenFlash(false), 800);
120
+ }
121
+ prevOutputTokens.current = metadata.outputTokensUsed;
122
+ }
123
+ }, [metadata?.inputTokensUsed, metadata?.outputTokensUsed]);
124
+
125
+ // Determine task status - Use finalStep as source of truth
126
+ const getTaskStatus = () => {
127
+ // If we have a final step, use its type
128
+ if (finalStep) {
129
+ switch (finalStep.type) {
130
+ case 'failure':
131
+ return { label: 'Task failed', color: 'error', icon: <CloseIcon sx={{ fontSize: 16, color: 'error.main' }} /> };
132
+ case 'stopped':
133
+ return { label: 'Task stopped', color: 'warning', icon: <StopCircleIcon sx={{ fontSize: 16, color: 'warning.main' }} /> };
134
+ case 'max_steps_reached':
135
+ return { label: 'Max steps reached', color: 'warning', icon: <HourglassEmptyIcon sx={{ fontSize: 16, color: 'warning.main' }} /> };
136
+ case 'success':
137
+ return { label: 'Completed', color: 'success', icon: <CheckIcon sx={{ fontSize: 16, color: 'success.main' }} /> };
138
+ }
139
+ }
140
+ // Otherwise check running states
141
+ if (isConnectingToE2B) return { label: 'Starting...', color: 'primary', icon: <CircularProgress size={16} thickness={5} sx={{ color: 'primary.main' }} /> };
142
+ if (isAgentProcessing || trace?.isRunning) return { label: 'Running', color: 'primary', icon: <CircularProgress size={16} thickness={5} sx={{ color: 'primary.main' }} /> };
143
+ return { label: 'Ready', color: 'default', icon: <CheckIcon sx={{ fontSize: 16, color: 'text.secondary' }} /> };
144
+ };
145
+
146
+ const taskStatus = getTaskStatus();
147
+
148
+ // Extract model name from modelId (e.g., "Qwen/Qwen3-VL-8B-Instruct" -> "Qwen3-VL-8B-Instruct")
149
+ const modelName = trace?.modelId?.split('/').pop() || 'Unknown Model';
150
+
151
+ // Handler for emergency stop
152
+ const handleEmergencyStop = () => {
153
+ const stopTask = (window as Window & { __stopCurrentTask?: () => void }).__stopCurrentTask;
154
+ if (stopTask) {
155
+ stopTask();
156
+ }
157
+ };
158
+
159
+ return (
160
+ <AppBar
161
+ position="static"
162
+ elevation={0}
163
+ sx={{
164
+ backgroundColor: 'background.paper',
165
+ borderBottom: '1px solid',
166
+ borderColor: 'divider',
167
+ }}
168
+ >
169
+ <Toolbar disableGutters sx={{ px: 2, py: 2.5, flexDirection: 'column', alignItems: 'stretch', gap: 0 }}>
170
+ {/* First row: Back button + Task info + Connection Status */}
171
+ <Box sx={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between', width: '100%', gap: 3 }}>
172
+ {/* Left side: Back button + Task info */}
173
+ <Box sx={{ display: 'flex', alignItems: 'center', gap: 1.5, flex: 1, minWidth: 0 }}>
174
+ <IconButton
175
+ onClick={onBackToHome}
176
+ size="small"
177
+ sx={{
178
+ color: 'primary.main',
179
+ backgroundColor: 'primary.50',
180
+ border: '1px solid',
181
+ borderColor: 'primary.200',
182
+ cursor: 'pointer',
183
+ '&:hover': {
184
+ backgroundColor: 'primary.100',
185
+ borderColor: 'primary.main',
186
+ },
187
+ }}
188
+ >
189
+ <ArrowBackIcon fontSize="small" />
190
+ </IconButton>
191
+ <Typography
192
+ variant="body2"
193
+ sx={{
194
+ color: 'text.primary',
195
+ fontWeight: 700,
196
+ fontSize: '1rem',
197
+ overflow: 'hidden',
198
+ textOverflow: 'ellipsis',
199
+ whiteSpace: 'nowrap',
200
+ }}
201
+ >
202
+ {trace?.instruction || 'No task running'}
203
+ </Typography>
204
+ </Box>
205
+
206
+ {/* Right side: Emergency Stop + Dark Mode */}
207
+ <Box sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
208
+ {/* Emergency Stop Button - Only show when agent is processing */}
209
+ {isAgentProcessing && (
210
+ <Button
211
+ onClick={handleEmergencyStop}
212
+ variant="outlined"
213
+ size="small"
214
+ startIcon={<StopCircleIcon />}
215
+ sx={{
216
+ color: 'error.main',
217
+ borderColor: 'error.main',
218
+ backgroundColor: 'transparent',
219
+ fontWeight: 600,
220
+ fontSize: '0.8rem',
221
+ px: 1.5,
222
+ py: 0.5,
223
+ textTransform: 'none',
224
+ '&:hover': {
225
+ backgroundColor: 'error.50',
226
+ borderColor: 'error.dark',
227
+ },
228
+ }}
229
+ >
230
+ Stop
231
+ </Button>
232
+ )}
233
+
234
+ <IconButton
235
+ onClick={toggleDarkMode}
236
+ size="small"
237
+ sx={{
238
+ color: 'primary.main',
239
+ backgroundColor: 'primary.50',
240
+ border: '1px solid',
241
+ borderColor: 'primary.200',
242
+ '&:hover': {
243
+ backgroundColor: 'primary.100',
244
+ borderColor: 'primary.main',
245
+ },
246
+ }}
247
+ >
248
+ {isDarkMode ? <LightModeOutlined fontSize="small" /> : <DarkModeOutlined fontSize="small" />}
249
+ </IconButton>
250
+ </Box>
251
+ </Box>
252
+
253
+ {/* Second row: Status + Model + Metadata - Only show when we have trace data */}
254
+ {trace && (
255
+ <Box
256
+ sx={{
257
+ display: 'flex',
258
+ alignItems: 'center',
259
+ gap: 1.5,
260
+ pl: 5.5,
261
+ pr: 1,
262
+ pt: .5,
263
+ mt: .5,
264
+ }}
265
+ >
266
+ {/* Status Badge - Compact */}
267
+ <Box
268
+ sx={{
269
+ display: 'flex',
270
+ alignItems: 'center',
271
+ gap: 0.5,
272
+ px: 1,
273
+ py: 0.25,
274
+ borderRadius: 1,
275
+ backgroundColor:
276
+ taskStatus.color === 'primary' ? 'primary.50' :
277
+ taskStatus.color === 'success' ? 'success.50' :
278
+ taskStatus.color === 'error' ? 'error.50' :
279
+ taskStatus.color === 'warning' ? 'warning.50' :
280
+ 'action.hover',
281
+ border: '1px solid',
282
+ borderColor:
283
+ taskStatus.color === 'primary' ? 'primary.main' :
284
+ taskStatus.color === 'success' ? 'success.main' :
285
+ taskStatus.color === 'error' ? 'error.main' :
286
+ taskStatus.color === 'warning' ? 'warning.main' :
287
+ 'divider',
288
+ }}
289
+ >
290
+ {taskStatus.icon}
291
+ <Typography
292
+ variant="caption"
293
+ sx={{
294
+ fontSize: '0.7rem',
295
+ fontWeight: 700,
296
+ color:
297
+ taskStatus.color === 'primary' ? 'primary.main' :
298
+ taskStatus.color === 'success' ? 'success.main' :
299
+ taskStatus.color === 'error' ? 'error.main' :
300
+ taskStatus.color === 'warning' ? 'warning.main' :
301
+ 'text.primary',
302
+ }}
303
+ >
304
+ {taskStatus.label}
305
+ </Typography>
306
+ </Box>
307
+
308
+ {/* Divider */}
309
+ <Box sx={{ width: '1px', height: 16, backgroundColor: 'divider' }} />
310
+
311
+ {/* Model */}
312
+ <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>
313
+ <SmartToyIcon sx={{ fontSize: '0.85rem', color: 'primary.main' }} />
314
+ <Typography
315
+ variant="caption"
316
+ sx={{
317
+ fontSize: '0.75rem',
318
+ fontWeight: 600,
319
+ color: 'text.primary',
320
+ }}
321
+ >
322
+ {modelName}
323
+ </Typography>
324
+ </Box>
325
+
326
+ {/* Steps Count */}
327
+ {metadata && (
328
+ <>
329
+ <Box sx={{ width: '1px', height: 16, backgroundColor: 'divider' }} />
330
+ <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>
331
+ <Typography
332
+ variant="caption"
333
+ sx={{
334
+ fontSize: '0.75rem',
335
+ fontWeight: 700,
336
+ color: 'text.primary',
337
+ mr: 0.5,
338
+ }}
339
+ >
340
+ {metadata.numberOfSteps}
341
+ </Typography>
342
+ <Typography
343
+ variant="caption"
344
+ sx={{
345
+ fontSize: '0.7rem',
346
+ fontWeight: 400,
347
+ color: 'text.secondary',
348
+ }}
349
+ >
350
+ {metadata.numberOfSteps === 1 ? 'Step' : 'Steps'}
351
+ </Typography>
352
+ </Box>
353
+ </>
354
+ )}
355
+
356
+ {/* Time */}
357
+ {(isAgentProcessing || metadata) && (
358
+ <>
359
+ <Box sx={{ width: '1px', height: 16, backgroundColor: 'divider' }} />
360
+ <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>
361
+ <AccessTimeIcon sx={{ fontSize: '0.85rem', color: 'primary.main' }} />
362
+ <Typography
363
+ variant="caption"
364
+ sx={{
365
+ fontSize: '0.75rem',
366
+ fontWeight: 700,
367
+ color: 'text.primary',
368
+ minWidth: '45px',
369
+ textAlign: 'left',
370
+ }}
371
+ >
372
+ {elapsedTime.toFixed(1)}s
373
+ </Typography>
374
+ </Box>
375
+ </>
376
+ )}
377
+
378
+ {/* Input Tokens */}
379
+ {metadata && metadata.inputTokensUsed > 0 && (
380
+ <>
381
+ <Box sx={{ width: '1px', height: 16, backgroundColor: 'divider' }} />
382
+ <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>
383
+ <InputIcon
384
+ sx={{
385
+ fontSize: '0.85rem',
386
+ color: 'primary.main',
387
+ transition: 'all 0.2s ease',
388
+ animation: inputTokenFlash ? `${iconFlash} 0.8s ease-out` : 'none',
389
+ }}
390
+ />
391
+ <Box
392
+ sx={{
393
+ transition: 'all 0.2s ease',
394
+ animation: inputTokenFlash ? `${tokenFlash} 0.8s ease-out` : 'none',
395
+ }}
396
+ >
397
+ <Typography
398
+ variant="caption"
399
+ sx={{
400
+ fontSize: '0.75rem',
401
+ fontWeight: 700,
402
+ color: 'text.primary',
403
+ }}
404
+ >
405
+ {metadata.inputTokensUsed.toLocaleString()}
406
+ </Typography>
407
+ </Box>
408
+ </Box>
409
+ </>
410
+ )}
411
+
412
+ {/* Output Tokens */}
413
+ {metadata && metadata.outputTokensUsed > 0 && (
414
+ <>
415
+ <Box sx={{ width: '1px', height: 16, backgroundColor: 'divider' }} />
416
+ <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>
417
+ <OutputIcon
418
+ sx={{
419
+ fontSize: '0.85rem',
420
+ color: 'primary.main',
421
+ transition: 'all 0.2s ease',
422
+ animation: outputTokenFlash ? `${iconFlash} 0.8s ease-out` : 'none',
423
+ }}
424
+ />
425
+ <Box
426
+ sx={{
427
+ transition: 'all 0.2s ease',
428
+ animation: outputTokenFlash ? `${tokenFlash} 0.8s ease-out` : 'none',
429
+ }}
430
+ >
431
+ <Typography
432
+ variant="caption"
433
+ sx={{
434
+ fontSize: '0.75rem',
435
+ fontWeight: 700,
436
+ color: 'text.primary',
437
+ }}
438
+ >
439
+ {metadata.outputTokensUsed.toLocaleString()}
440
+ </Typography>
441
+ </Box>
442
+ </Box>
443
+ </>
444
+ )}
445
+ </Box>
446
+ )}
447
+ </Toolbar>
448
+ </AppBar>
449
+ );
450
+ };
src/components/ProcessingIndicator.tsx ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from 'react';
2
+ import { Box, CircularProgress, Typography } from '@mui/material';
3
+
4
+ interface ProcessingIndicatorProps {
5
+ isAgentProcessing: boolean;
6
+ }
7
+
8
+ export const ProcessingIndicator: React.FC<ProcessingIndicatorProps> = ({ isAgentProcessing }) => {
9
+ if (!isAgentProcessing) return null;
10
+
11
+ return (
12
+ <Box
13
+ sx={{
14
+ display: 'flex',
15
+ alignItems: 'center',
16
+ gap: 2,
17
+ backgroundColor: 'rgba(255, 255, 255, 0.9)',
18
+ px: 2,
19
+ py: 1,
20
+ borderRadius: 2,
21
+ backdropFilter: 'blur(10px)',
22
+ border: '1px solid rgba(0, 0, 0, 0.1)',
23
+ }}
24
+ >
25
+ <CircularProgress size={20} thickness={4} />
26
+ <Typography variant="body2" sx={{ fontWeight: 600, color: 'text.primary' }}>
27
+ Agent is running...
28
+ </Typography>
29
+ </Box>
30
+ );
31
+ };
src/components/WelcomeScreen.tsx ADDED
@@ -0,0 +1,521 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { fetchAvailableModels, generateRandomQuestion } from '@/services/api';
2
+ import { selectAvailableModels, selectIsDarkMode, selectIsLoadingModels, selectSelectedModelId, useAgentStore } from '@/stores/agentStore';
3
+ import DarkModeOutlined from '@mui/icons-material/DarkModeOutlined';
4
+ import LightModeOutlined from '@mui/icons-material/LightModeOutlined';
5
+ import SendIcon from '@mui/icons-material/Send';
6
+ import ShuffleIcon from '@mui/icons-material/Shuffle';
7
+ import SmartToyIcon from '@mui/icons-material/SmartToy';
8
+ import { Box, Button, CircularProgress, Container, FormControl, IconButton, InputLabel, MenuItem, Paper, Select, TextField, Typography } from '@mui/material';
9
+ import React, { useEffect, useRef, useState } from 'react';
10
+
11
+ interface WelcomeScreenProps {
12
+ onStartTask: (instruction: string, modelId: string) => void;
13
+ isConnected: boolean;
14
+ }
15
+
16
+ export const WelcomeScreen: React.FC<WelcomeScreenProps> = ({ onStartTask, isConnected }) => {
17
+ const [customTask, setCustomTask] = useState('');
18
+ const [isTyping, setIsTyping] = useState(false);
19
+ const [isGeneratingQuestion, setIsGeneratingQuestion] = useState(false);
20
+ const typingIntervalRef = useRef<NodeJS.Timeout | null>(null);
21
+
22
+ const isDarkMode = useAgentStore(selectIsDarkMode);
23
+ const toggleDarkMode = useAgentStore((state) => state.toggleDarkMode);
24
+ const selectedModelId = useAgentStore(selectSelectedModelId);
25
+ const setSelectedModelId = useAgentStore((state) => state.setSelectedModelId);
26
+ const availableModels = useAgentStore(selectAvailableModels);
27
+ const isLoadingModels = useAgentStore(selectIsLoadingModels);
28
+ const setAvailableModels = useAgentStore((state) => state.setAvailableModels);
29
+ const setIsLoadingModels = useAgentStore((state) => state.setIsLoadingModels);
30
+
31
+ // Load available models on mount
32
+ useEffect(() => {
33
+ const loadModels = async () => {
34
+ setIsLoadingModels(true);
35
+ try {
36
+ const models = await fetchAvailableModels();
37
+ setAvailableModels(models);
38
+
39
+ // Set first model as default if current selection is not in the list
40
+ if (models.length > 0 && !models.includes(selectedModelId)) {
41
+ setSelectedModelId(models[0]);
42
+ }
43
+ } catch (error) {
44
+ console.error('Failed to load models:', error);
45
+ // Fallback to empty array on error
46
+ setAvailableModels([]);
47
+ } finally {
48
+ setIsLoadingModels(false);
49
+ }
50
+ };
51
+
52
+ loadModels();
53
+ }, []); // eslint-disable-line react-hooks/exhaustive-deps
54
+
55
+ // Clean up typing interval on unmount
56
+ useEffect(() => {
57
+ return () => {
58
+ if (typingIntervalRef.current) {
59
+ clearInterval(typingIntervalRef.current);
60
+ }
61
+ };
62
+ }, []);
63
+
64
+ const handleWriteRandomTask = async () => {
65
+ // Clear any existing typing interval
66
+ if (typingIntervalRef.current) {
67
+ clearInterval(typingIntervalRef.current);
68
+ typingIntervalRef.current = null;
69
+ }
70
+
71
+ setIsGeneratingQuestion(true);
72
+ try {
73
+ const randomTask = await generateRandomQuestion();
74
+
75
+ // Clear current text
76
+ setCustomTask('');
77
+ setIsTyping(true);
78
+
79
+ // Type effect
80
+ let currentIndex = 0;
81
+ typingIntervalRef.current = setInterval(() => {
82
+ if (currentIndex < randomTask.length) {
83
+ setCustomTask(randomTask.substring(0, currentIndex + 1));
84
+ currentIndex++;
85
+ } else {
86
+ if (typingIntervalRef.current) {
87
+ clearInterval(typingIntervalRef.current);
88
+ typingIntervalRef.current = null;
89
+ }
90
+ setIsTyping(false);
91
+ }
92
+ }, 10); // 10ms per character
93
+ } catch (error) {
94
+ console.error('Failed to generate question:', error);
95
+ setIsTyping(false);
96
+ } finally {
97
+ setIsGeneratingQuestion(false);
98
+ }
99
+ };
100
+
101
+ const handleCustomTask = () => {
102
+ if (customTask.trim() && !isTyping) {
103
+ onStartTask(customTask.trim(), selectedModelId);
104
+ }
105
+ };
106
+
107
+ return (
108
+ <>
109
+ {/* Dark Mode Toggle - Top Right (Absolute to viewport) */}
110
+ <Box sx={{ position: 'absolute', top: 24, right: 24, zIndex: 1000 }}>
111
+ <IconButton
112
+ onClick={toggleDarkMode}
113
+ size="medium"
114
+ sx={{
115
+ color: 'text.primary',
116
+ backgroundColor: 'background.paper',
117
+ border: '1px solid',
118
+ borderColor: 'divider',
119
+ '&:hover': {
120
+ backgroundColor: 'action.hover',
121
+ borderColor: 'primary.main',
122
+ },
123
+ }}
124
+ >
125
+ {isDarkMode ? <LightModeOutlined /> : <DarkModeOutlined />}
126
+ </IconButton>
127
+ </Box>
128
+
129
+ <Container
130
+ maxWidth="md"
131
+ sx={{
132
+ display: 'flex',
133
+ flexDirection: 'column',
134
+ alignItems: 'center',
135
+ justifyContent: 'center',
136
+ minHeight: '100vh',
137
+ textAlign: 'center',
138
+ py: 8,
139
+ }}
140
+ >
141
+ {/* Title */}
142
+ <Typography
143
+ variant="h2"
144
+ sx={{
145
+ fontWeight: 800,
146
+ mb: 1,
147
+ color: 'text.primary',
148
+ }}
149
+ >
150
+ FARA Agent
151
+ </Typography>
152
+
153
+ {/* Powered by Microsoft */}
154
+ <Box
155
+ sx={{
156
+ display: 'flex',
157
+ alignItems: 'center',
158
+ gap: 1,
159
+ mb: 2,
160
+ flexWrap: 'wrap',
161
+ justifyContent: 'center',
162
+ }}
163
+ >
164
+ <Typography
165
+ variant="body2"
166
+ sx={{
167
+ color: 'text.secondary',
168
+ fontWeight: 500,
169
+ }}
170
+ >
171
+ Powered by
172
+ </Typography>
173
+
174
+ {/* Microsoft Fara link */}
175
+ <Box
176
+ component="a"
177
+ href="https://github.com/microsoft/fara"
178
+ target="_blank"
179
+ rel="noopener noreferrer"
180
+ sx={{
181
+ display: 'flex',
182
+ alignItems: 'center',
183
+ gap: 0.75,
184
+ textDecoration: 'none',
185
+ transition: 'all 0.2s ease',
186
+ '&:hover': {
187
+ '& .fara-text': {
188
+ textDecoration: 'underline',
189
+ },
190
+ },
191
+ }}
192
+ >
193
+ <Typography
194
+ className="fara-text"
195
+ sx={{
196
+ color: 'primary.main',
197
+ fontWeight: 700,
198
+ fontSize: '1rem',
199
+ }}
200
+ >
201
+ Microsoft Fara-7B
202
+ </Typography>
203
+ </Box>
204
+
205
+ {/* Separator */}
206
+ <Typography
207
+ variant="body2"
208
+ sx={{
209
+ color: 'text.secondary',
210
+ mx: 0.5,
211
+ }}
212
+ >
213
+ &
214
+ </Typography>
215
+
216
+ {/* Modal link */}
217
+ <Box
218
+ component="a"
219
+ href="https://modal.com/"
220
+ target="_blank"
221
+ rel="noopener noreferrer"
222
+ sx={{
223
+ display: 'flex',
224
+ alignItems: 'center',
225
+ gap: 0.75,
226
+ textDecoration: 'none',
227
+ transition: 'all 0.2s ease',
228
+ '&:hover': {
229
+ '& .modal-text': {
230
+ textDecoration: 'underline',
231
+ },
232
+ },
233
+ }}
234
+ >
235
+ <Typography
236
+ className="modal-text"
237
+ sx={{
238
+ color: 'primary.main',
239
+ fontWeight: 700,
240
+ fontSize: '1rem',
241
+ }}
242
+ >
243
+ Modal
244
+ </Typography>
245
+ </Box>
246
+ </Box>
247
+
248
+ {/* Subtitle */}
249
+ <Typography
250
+ variant="h6"
251
+ sx={{
252
+ color: 'text.secondary',
253
+ fontWeight: 500,
254
+ mb: 1,
255
+ }}
256
+ >
257
+ AI-Powered Browser Automation
258
+ </Typography>
259
+
260
+ {/* Description */}
261
+ <Typography
262
+ variant="body1"
263
+ sx={{
264
+ color: 'text.secondary',
265
+ maxWidth: '650px',
266
+ mb: 3,
267
+ lineHeight: 1.7,
268
+ }}
269
+ >
270
+ Experience the future of AI automation as FARA operates your browser in real time to complete complex on-screen tasks.
271
+ Built with{' '}
272
+ <Box
273
+ component="a"
274
+ href="https://github.com/microsoft/fara"
275
+ target="_blank"
276
+ rel="noopener noreferrer"
277
+ sx={{
278
+ color: 'primary.main',
279
+ textDecoration: 'none',
280
+ fontWeight: 700,
281
+ '&:hover': {
282
+ textDecoration: 'underline',
283
+ },
284
+ }}
285
+ >
286
+ Microsoft Fara-7B
287
+ </Box>
288
+ , a vision-language model specifically designed for <strong>computer use and GUI automation</strong>.
289
+ </Typography>
290
+
291
+ {/* Task Input Section */}
292
+ <Paper
293
+ elevation={0}
294
+ sx={{
295
+ maxWidth: '725px',
296
+ width: '100%',
297
+ p: 2.5,
298
+ border: '2px solid',
299
+ borderColor: isConnected ? 'primary.main' : 'divider',
300
+ borderRadius: 2,
301
+ backgroundColor: 'background.paper',
302
+ transition: 'all 0.2s ease',
303
+ '&:hover': isConnected ? {
304
+ borderColor: 'primary.dark',
305
+ boxShadow: (theme) => `0 4px 16px ${theme.palette.mode === 'dark' ? 'rgba(79, 134, 198, 0.3)' : 'rgba(79, 134, 198, 0.15)'}`,
306
+ } : {},
307
+ }}
308
+ >
309
+ {/* Input Field */}
310
+ <TextField
311
+ fullWidth
312
+ placeholder="Describe your task here..."
313
+ value={customTask}
314
+ onChange={(e) => setCustomTask(e.target.value)}
315
+ onKeyPress={(e) => {
316
+ if (e.key === 'Enter' && !e.shiftKey && isConnected && customTask.trim() && !isTyping) {
317
+ handleCustomTask();
318
+ }
319
+ }}
320
+ disabled={!isConnected || isTyping}
321
+ multiline
322
+ rows={3}
323
+ sx={{
324
+ mb: 2,
325
+ '& .MuiOutlinedInput-root': {
326
+ borderRadius: 1.5,
327
+ backgroundColor: 'action.hover',
328
+ color: 'text.primary',
329
+ '& fieldset': {
330
+ borderColor: 'divider',
331
+ },
332
+ '&:hover fieldset': {
333
+ borderColor: 'text.secondary',
334
+ },
335
+ '&.Mui-focused fieldset': {
336
+ borderColor: 'primary.main',
337
+ borderWidth: '2px',
338
+ },
339
+ },
340
+ '& .MuiInputBase-input': {
341
+ color: (theme) => theme.palette.mode === 'dark' ? '#FFFFFF !important' : '#000000 !important',
342
+ fontWeight: 500,
343
+ WebkitTextFillColor: (theme) => theme.palette.mode === 'dark' ? '#FFFFFF !important' : '#000000 !important',
344
+ },
345
+ '& .MuiInputBase-input.Mui-disabled': {
346
+ color: (theme) => theme.palette.mode === 'dark' ? '#FFFFFF !important' : '#000000 !important',
347
+ WebkitTextFillColor: (theme) => theme.palette.mode === 'dark' ? '#FFFFFF !important' : '#000000 !important',
348
+ },
349
+ '& .MuiInputBase-input::placeholder': {
350
+ color: 'text.secondary',
351
+ opacity: 0.7,
352
+ },
353
+ }}
354
+ />
355
+
356
+ {/* Model Selection + Buttons Row */}
357
+ <Box sx={{ display: 'flex', gap: 1.5, alignItems: 'center', justifyContent: 'space-between' }}>
358
+ {/* Model Select */}
359
+ <FormControl size="small" sx={{ minWidth: 240 }}>
360
+ <InputLabel id="model-select-label">Model</InputLabel>
361
+ <Select
362
+ labelId="model-select-label"
363
+ value={availableModels.length > 0 && availableModels.includes(selectedModelId) ? selectedModelId : ''}
364
+ label="Model"
365
+ onChange={(e) => setSelectedModelId(e.target.value)}
366
+ disabled={!isConnected || isTyping || isLoadingModels}
367
+ sx={{
368
+ borderRadius: 1.5,
369
+ '& .MuiOutlinedInput-notchedOutline': {
370
+ borderWidth: 2,
371
+ },
372
+ }}
373
+ >
374
+ {isLoadingModels ? (
375
+ <MenuItem disabled>
376
+ <Box sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
377
+ <CircularProgress size={16} />
378
+ <Typography variant="body2">Loading models...</Typography>
379
+ </Box>
380
+ </MenuItem>
381
+ ) : availableModels.length === 0 ? (
382
+ <MenuItem disabled>
383
+ <Typography variant="body2" sx={{ color: 'error.main' }}>
384
+ No models available
385
+ </Typography>
386
+ </MenuItem>
387
+ ) : (
388
+ availableModels.map((modelId) => (
389
+ <MenuItem key={modelId} value={modelId}>
390
+ <Box sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
391
+ <SmartToyIcon sx={{ fontSize: '0.9rem', color: 'primary.main' }} />
392
+ <Typography variant="body2" sx={{ fontWeight: 600, fontSize: '0.875rem' }}>
393
+ {modelId.split('/').pop()}
394
+ </Typography>
395
+ </Box>
396
+ </MenuItem>
397
+ ))
398
+ )}
399
+ </Select>
400
+ </FormControl>
401
+
402
+ {/* Buttons on the right */}
403
+ <Box sx={{ display: 'flex', gap: 1.5 }}>
404
+ <Button
405
+ variant="outlined"
406
+ onClick={handleWriteRandomTask}
407
+ disabled={!isConnected || isTyping || isGeneratingQuestion}
408
+ startIcon={isGeneratingQuestion ? <CircularProgress size={16} /> : <ShuffleIcon />}
409
+ sx={{
410
+ borderRadius: 1.5,
411
+ textTransform: 'none',
412
+ fontWeight: 600,
413
+ borderWidth: 2,
414
+ px: 3,
415
+ '&:hover': {
416
+ borderWidth: 2,
417
+ },
418
+ }}
419
+ >
420
+ {isGeneratingQuestion ? 'Generating...' : isTyping ? 'Writing...' : 'Write random task'}
421
+ </Button>
422
+
423
+ <Button
424
+ variant="contained"
425
+ onClick={handleCustomTask}
426
+ disabled={!isConnected || !customTask.trim() || isTyping}
427
+ sx={{
428
+ borderRadius: 1.5,
429
+ textTransform: 'none',
430
+ fontWeight: 600,
431
+ px: 4,
432
+ background: 'linear-gradient(135deg, #4F86C6 0%, #2B5C94 100%)',
433
+ }}
434
+ endIcon={<SendIcon />}
435
+ >
436
+ Run Task
437
+ </Button>
438
+ </Box>
439
+ </Box>
440
+ </Paper>
441
+
442
+ {/* Research Notice */}
443
+ <Typography
444
+ variant="body2"
445
+ sx={{
446
+ color: 'text.secondary',
447
+ maxWidth: '700px',
448
+ mt: 3,
449
+ mb: 2,
450
+ lineHeight: 1.6,
451
+ fontStyle: 'italic',
452
+ opacity: 0.8,
453
+ textAlign: 'center',
454
+ }}
455
+ >
456
+ This is a demo of the FARA computer use agent. The agent will browse the web on your behalf.
457
+ Cold starts may take upto 1 minute for the first prompt after which each step should take 5-10s.
458
+ <strong> Please do not enter any personal or sensitive information.</strong>
459
+ {' '}Task logs will be stored for research purposes.
460
+ </Typography>
461
+
462
+ {/* Credits */}
463
+ <Typography
464
+ variant="caption"
465
+ sx={{
466
+ color: 'text.secondary',
467
+ mt: 1,
468
+ opacity: 0.7,
469
+ textAlign: 'center',
470
+ }}
471
+ >
472
+ Frontend based on{' '}
473
+ <Box
474
+ component="a"
475
+ href="https://huggingface.co/spaces/smolagents/computer-use-agent"
476
+ target="_blank"
477
+ rel="noopener noreferrer"
478
+ sx={{
479
+ color: 'primary.main',
480
+ textDecoration: 'none',
481
+ '&:hover': {
482
+ textDecoration: 'underline',
483
+ },
484
+ }}
485
+ >
486
+ HuggingFace smolagents/computer-use-agent
487
+ </Box>
488
+ </Typography>
489
+
490
+ {/* Connection status hint */}
491
+ {!isConnected && (
492
+ <Typography
493
+ variant="caption"
494
+ sx={{
495
+ mt: 2,
496
+ color: 'text.secondary',
497
+ display: 'flex',
498
+ alignItems: 'center',
499
+ gap: 1,
500
+ }}
501
+ >
502
+ <Box
503
+ sx={{
504
+ width: 8,
505
+ height: 8,
506
+ borderRadius: '50%',
507
+ backgroundColor: 'warning.main',
508
+ animation: 'pulse 2s ease-in-out infinite',
509
+ '@keyframes pulse': {
510
+ '0%, 100%': { opacity: 1 },
511
+ '50%': { opacity: 0.5 },
512
+ },
513
+ }}
514
+ />
515
+ Make sure the backend is running on port 8000
516
+ </Typography>
517
+ )}
518
+ </Container>
519
+ </>
520
+ );
521
+ };
src/components/index.ts ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // General components
2
+ export { Header } from './Header';
3
+ export { ConnectionStatus } from './ConnectionStatus';
4
+ export { ProcessingIndicator } from './ProcessingIndicator';
5
+ export { WelcomeScreen } from './WelcomeScreen';
6
+
7
+ // Sandbox components
8
+ export { SandboxViewer, CompletionView, DownloadGifButton, DownloadJsonButton } from './sandbox';
9
+
10
+ // Timeline components
11
+ export { Timeline } from './timeline';
12
+
13
+ // Steps components
14
+ export { StepsList, StepCard, FinalStepCard, ThinkingStepCard, ConnectionStepCard } from './steps';
src/components/sandbox/SandboxViewer.tsx ADDED
@@ -0,0 +1,400 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useGifGenerator } from '@/hooks/useGifGenerator';
2
+ import { useJsonExporter } from '@/hooks/useJsonExporter';
3
+ import { selectError, selectFinalStep, selectSteps, selectTrace, useAgentStore } from '@/stores/agentStore';
4
+ import { AgentStep, AgentTraceMetadata } from '@/types/agent';
5
+ import ImageIcon from '@mui/icons-material/Image';
6
+ import MonitorIcon from '@mui/icons-material/Monitor';
7
+ import PlayCircleIcon from '@mui/icons-material/PlayCircle';
8
+ import { Box, Button, CircularProgress, keyframes, Typography } from '@mui/material';
9
+ import React from 'react';
10
+ import { useNavigate } from 'react-router-dom';
11
+ import { CompletionView } from './completionview/CompletionView';
12
+
13
+ // Animation for live indicator
14
+ const livePulse = keyframes`
15
+ 0%, 100% {
16
+ opacity: 1;
17
+ transform: scale(1);
18
+ }
19
+ 50% {
20
+ opacity: 0.7;
21
+ transform: scale(1.2);
22
+ }
23
+ `;
24
+
25
+ interface SandboxViewerProps {
26
+ vncUrl: string;
27
+ isAgentProcessing?: boolean;
28
+ metadata?: AgentTraceMetadata;
29
+ traceStartTime?: Date;
30
+ selectedStep?: AgentStep | null; // The step to display in time-travel mode
31
+ isRunning?: boolean; // Is the agent currently running
32
+ }
33
+
34
+ export const SandboxViewer: React.FC<SandboxViewerProps> = ({
35
+ vncUrl,
36
+ isAgentProcessing = false,
37
+ metadata,
38
+ traceStartTime,
39
+ selectedStep,
40
+ isRunning = false
41
+ }) => {
42
+ const navigate = useNavigate();
43
+ const error = useAgentStore(selectError);
44
+ const finalStep = useAgentStore(selectFinalStep);
45
+ const steps = useAgentStore(selectSteps);
46
+ const trace = useAgentStore(selectTrace);
47
+ const resetAgent = useAgentStore((state) => state.resetAgent);
48
+ const setSelectedStepIndex = useAgentStore((state) => state.setSelectedStepIndex);
49
+
50
+ // Get the latest screenshot from steps (for non-VNC mode)
51
+ const latestScreenshot = steps && steps.length > 0 ? steps[steps.length - 1].image : null;
52
+
53
+ // Hook to generate GIF
54
+ const { isGenerating, error: gifError, generateAndDownloadGif } = useGifGenerator({
55
+ steps: steps || [],
56
+ traceId: finalStep?.metadata.traceId || '',
57
+ });
58
+
59
+ // Hook to export JSON
60
+ const { downloadTraceAsJson } = useJsonExporter({
61
+ trace,
62
+ steps: steps || [],
63
+ metadata: finalStep?.metadata || metadata,
64
+ finalStep,
65
+ });
66
+
67
+ // Extract final_answer from the last step, or fallback to last thought
68
+ const getFinalAnswer = (): string | null => {
69
+ console.log('🔍 getFinalAnswer - steps:', steps);
70
+ if (!steps || steps.length === 0) {
71
+ console.log('❌ No steps available');
72
+ return null;
73
+ }
74
+
75
+ // Try to find final_answer in any step (iterate backwards)
76
+ for (let i = steps.length - 1; i >= 0; i--) {
77
+ const step = steps[i];
78
+
79
+ if (step.actions && Array.isArray(step.actions)) {
80
+ const finalAnswerAction = step.actions.find(
81
+ (action) => action.function_name === 'final_answer'
82
+ );
83
+
84
+ if (finalAnswerAction) {
85
+ // Handle both named parameter and positional argument
86
+ const result = finalAnswerAction?.parameters?.answer || finalAnswerAction?.parameters?.arg_0 || null;
87
+ console.log('✅ Final answer found in step', i + 1, ':', result);
88
+ return result;
89
+ }
90
+ }
91
+ }
92
+
93
+ console.log('🔍 No final_answer found, looking for last thought...');
94
+
95
+ // Fallback: find the last step with a thought (iterate backwards)
96
+ for (let i = steps.length - 1; i >= 0; i--) {
97
+ const step = steps[i];
98
+ if (step.thought) {
99
+ console.log('📝 Using thought from step', i + 1, 'as fallback:', step.thought);
100
+ return step.thought;
101
+ }
102
+ }
103
+
104
+ console.log('❌ No final answer or thought found in any step');
105
+ return null;
106
+ };
107
+
108
+ const finalAnswer = getFinalAnswer();
109
+ console.log('🎯 Final answer to display:', finalAnswer);
110
+
111
+ // Determine if we should show success/fail status
112
+ const showStatus = !isRunning && !selectedStep && finalStep;
113
+
114
+ // Handler to go back to home
115
+ const handleBackToHome = () => {
116
+ // Reset frontend state
117
+ useAgentStore.getState().resetAgent();
118
+
119
+ // Reload the page to reconnect websocket
120
+ window.location.href = '/';
121
+ };
122
+
123
+ // Handler to go back to live mode
124
+ const handleGoLive = () => {
125
+ setSelectedStepIndex(null);
126
+ };
127
+
128
+ return (
129
+ <Box
130
+ sx={{
131
+ flex: '1 1 auto',
132
+ display: 'flex',
133
+ flexDirection: 'column',
134
+ position: 'relative',
135
+ border: '1px solid',
136
+ borderColor: showStatus
137
+ ? ((finalStep?.type === 'failure' || finalStep?.type === 'sandbox_timeout') ? 'error.main' : 'success.main')
138
+ : ((vncUrl || isAgentProcessing) && !selectedStep && !showStatus ? 'primary.main' : 'divider'),
139
+ borderRadius: '12px',
140
+ backgroundColor: 'background.paper',
141
+ transition: 'border 0.3s ease',
142
+ overflow: 'hidden',
143
+ }}
144
+ >
145
+ {/* Live Badge or Go Live Button */}
146
+ {vncUrl && !showStatus && (
147
+ <>
148
+ {!selectedStep ? (
149
+ // Live Badge when in live mode
150
+ <Box
151
+ sx={{
152
+ position: 'absolute',
153
+ top: 12,
154
+ right: 12,
155
+ zIndex: 10,
156
+ display: 'flex',
157
+ alignItems: 'center',
158
+ gap: 1,
159
+ px: 2,
160
+ py: 1,
161
+ backgroundColor: (theme) =>
162
+ theme.palette.mode === 'dark'
163
+ ? 'rgba(0, 0, 0, 0.7)'
164
+ : 'rgba(255, 255, 255, 0.9)',
165
+ backdropFilter: 'blur(8px)',
166
+ borderRadius: 0.75,
167
+ border: '1px solid',
168
+ borderColor: 'primary.main',
169
+ boxShadow: (theme) =>
170
+ theme.palette.mode === 'dark'
171
+ ? '0 2px 8px rgba(0, 0, 0, 0.4)'
172
+ : '0 2px 8px rgba(0, 0, 0, 0.1)',
173
+ }}
174
+ >
175
+ <Box
176
+ sx={{
177
+ width: 10,
178
+ height: 10,
179
+ borderRadius: '50%',
180
+ backgroundColor: 'error.main',
181
+ animation: `${livePulse} 2s ease-in-out infinite`,
182
+ }}
183
+ />
184
+ <Typography
185
+ variant="caption"
186
+ sx={{
187
+ fontSize: '0.8rem',
188
+ fontWeight: 700,
189
+ color: 'text.primary',
190
+ textTransform: 'uppercase',
191
+ letterSpacing: '0.5px',
192
+ }}
193
+ >
194
+ Live
195
+ </Typography>
196
+ </Box>
197
+ ) : (
198
+ // Go Live Button when viewing a specific step
199
+ <Button
200
+ onClick={handleGoLive}
201
+ startIcon={<PlayCircleIcon sx={{ fontSize: 20 }} />}
202
+ sx={{
203
+ position: 'absolute',
204
+ top: 12,
205
+ right: 12,
206
+ zIndex: 10,
207
+ px: 2,
208
+ py: 1,
209
+ backgroundColor: (theme) =>
210
+ theme.palette.mode === 'dark'
211
+ ? 'rgba(0, 0, 0, 0.7)'
212
+ : 'rgba(255, 255, 255, 0.9)',
213
+ backdropFilter: 'blur(8px)',
214
+ borderRadius: 0.75,
215
+ border: '1px solid',
216
+ borderColor: 'primary.main',
217
+ boxShadow: (theme) =>
218
+ theme.palette.mode === 'dark'
219
+ ? '0 2px 8px rgba(0, 0, 0, 0.4)'
220
+ : '0 2px 8px rgba(0, 0, 0, 0.1)',
221
+ fontSize: '0.8rem',
222
+ fontWeight: 700,
223
+ textTransform: 'uppercase',
224
+ letterSpacing: '0.5px',
225
+ color: 'primary.main',
226
+ '&:hover': {
227
+ backgroundColor: (theme) =>
228
+ theme.palette.mode === 'dark'
229
+ ? 'rgba(0, 0, 0, 0.85)'
230
+ : 'rgba(255, 255, 255, 1)',
231
+ borderColor: 'primary.dark',
232
+ },
233
+ }}
234
+ >
235
+ Go Live
236
+ </Button>
237
+ )}
238
+ </>
239
+ )}
240
+
241
+ <Box
242
+ sx={{
243
+ flex: 1,
244
+ minHeight: 0,
245
+ display: 'flex',
246
+ alignItems: 'center',
247
+ justifyContent: 'center',
248
+ }}
249
+ >
250
+ {showStatus && finalStep ? (
251
+ // Show success/fail status when agent has completed
252
+ <CompletionView
253
+ finalStep={finalStep}
254
+ trace={trace}
255
+ steps={steps}
256
+ metadata={metadata}
257
+ finalAnswer={finalAnswer}
258
+ isGenerating={isGenerating}
259
+ gifError={gifError}
260
+ onGenerateGif={generateAndDownloadGif}
261
+ onDownloadJson={downloadTraceAsJson}
262
+ onBackToHome={handleBackToHome}
263
+ />
264
+ ) : selectedStep ? (
265
+ // Time-travel mode: Show screenshot of selected step
266
+ <Box
267
+ sx={{
268
+ width: '100%',
269
+ height: '100%',
270
+ display: 'flex',
271
+ alignItems: 'center',
272
+ justifyContent: 'center',
273
+ overflow: 'auto',
274
+ backgroundColor: 'black',
275
+ position: 'relative',
276
+ }}
277
+ >
278
+ {selectedStep.image ? (
279
+ <img
280
+ src={selectedStep.image}
281
+ alt="Step screenshot"
282
+ style={{
283
+ maxWidth: '100%',
284
+ maxHeight: '100%',
285
+ objectFit: 'contain',
286
+ }}
287
+ />
288
+ ) : (
289
+ <Box
290
+ sx={{
291
+ textAlign: 'center',
292
+ p: 4,
293
+ color: 'text.secondary',
294
+ width: '100%',
295
+ height: '100%',
296
+ display: 'flex',
297
+ flexDirection: 'column',
298
+ alignItems: 'center',
299
+ justifyContent: 'center',
300
+ }}
301
+ >
302
+ <ImageIcon sx={{ fontSize: 48, mb: 2, opacity: 0.5 }} />
303
+ <Typography variant="body2" sx={{ fontWeight: 600, mb: 0.5, fontSize: '0.875rem', color: 'text.primary' }}>
304
+ No screenshot available
305
+ </Typography>
306
+ <Typography variant="caption" sx={{ fontSize: '0.75rem', color: 'text.secondary' }}>
307
+ This step doesn't have a screenshot
308
+ </Typography>
309
+ </Box>
310
+ )}
311
+ </Box>
312
+ ) : vncUrl ? (
313
+ // Live mode: Show VNC stream
314
+ <iframe
315
+ src={vncUrl}
316
+ style={{ width: '100%', height: '100%', border: 'none' }}
317
+ title="OS Stream"
318
+ lang="en"
319
+ />
320
+ ) : latestScreenshot ? (
321
+ // Live mode without VNC: Show latest screenshot
322
+ <Box
323
+ sx={{
324
+ width: '100%',
325
+ height: '100%',
326
+ display: 'flex',
327
+ alignItems: 'center',
328
+ justifyContent: 'center',
329
+ overflow: 'auto',
330
+ backgroundColor: 'black',
331
+ position: 'relative',
332
+ }}
333
+ >
334
+ <img
335
+ src={latestScreenshot}
336
+ alt="Latest screenshot"
337
+ style={{
338
+ maxWidth: '100%',
339
+ maxHeight: '100%',
340
+ objectFit: 'contain',
341
+ }}
342
+ />
343
+ </Box>
344
+ ) : isAgentProcessing ? (
345
+ // Loading state
346
+ <Box
347
+ sx={{
348
+ textAlign: 'center',
349
+ p: 4,
350
+ color: 'text.secondary',
351
+ width: '100%',
352
+ height: '100%',
353
+ display: 'flex',
354
+ flexDirection: 'column',
355
+ alignItems: 'center',
356
+ justifyContent: 'center',
357
+ }}
358
+ >
359
+ <CircularProgress
360
+ size={48}
361
+ sx={{
362
+ mb: 2,
363
+ color: 'primary.main'
364
+ }}
365
+ />
366
+ <Typography variant="body2" sx={{ fontWeight: 600, mb: 0.5, fontSize: '0.875rem', color: 'text.primary' }}>
367
+ Starting FARA Agent...
368
+ </Typography>
369
+ <Typography variant="caption" sx={{ fontSize: '0.75rem', color: 'text.secondary' }}>
370
+ Initializing browser environment
371
+ </Typography>
372
+ </Box>
373
+ ) : (
374
+ // No stream available
375
+ <Box
376
+ sx={{
377
+ textAlign: 'center',
378
+ p: 4,
379
+ color: 'text.secondary',
380
+ width: '100%',
381
+ height: '100%',
382
+ display: 'flex',
383
+ flexDirection: 'column',
384
+ alignItems: 'center',
385
+ justifyContent: 'center',
386
+ }}
387
+ >
388
+ <MonitorIcon sx={{ fontSize: 48, mb: 2, opacity: 0.5 }} />
389
+ <Typography variant="body2" sx={{ fontWeight: 600, mb: 0.5, fontSize: '0.875rem' }}>
390
+ No stream available
391
+ </Typography>
392
+ <Typography variant="caption" sx={{ fontSize: '0.75rem', color: 'text.secondary' }}>
393
+ Stream will appear when agent starts
394
+ </Typography>
395
+ </Box>
396
+ )}
397
+ </Box>
398
+ </Box>
399
+ );
400
+ };
src/components/sandbox/completionview/CompletionView.tsx ADDED
@@ -0,0 +1,525 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useTraceUploader } from '@/hooks/useTraceUploader';
2
+ import { useAgentStore } from '@/stores/agentStore';
3
+ import { AgentStep, AgentTrace, AgentTraceMetadata, FinalStep } from '@/types/agent';
4
+ import AccessTimeIcon from '@mui/icons-material/AccessTime';
5
+ import AddIcon from '@mui/icons-material/Add';
6
+ import AssignmentIcon from '@mui/icons-material/Assignment';
7
+ import ChatBubbleOutlineIcon from '@mui/icons-material/ChatBubbleOutline';
8
+ import CheckIcon from '@mui/icons-material/Check';
9
+ import CloseIcon from '@mui/icons-material/Close';
10
+ import CloudUploadIcon from '@mui/icons-material/CloudUpload';
11
+ import FormatListNumberedIcon from '@mui/icons-material/FormatListNumbered';
12
+ import HourglassEmptyIcon from '@mui/icons-material/HourglassEmpty';
13
+ import InputIcon from '@mui/icons-material/Input';
14
+ import OutputIcon from '@mui/icons-material/Output';
15
+ import SmartToyIcon from '@mui/icons-material/SmartToy';
16
+ import StopCircleIcon from '@mui/icons-material/StopCircle';
17
+ import ThumbDownIcon from '@mui/icons-material/ThumbDown';
18
+ import ThumbUpIcon from '@mui/icons-material/ThumbUp';
19
+ import { Alert, Box, Button, Divider, IconButton, Paper, Tooltip, Typography } from '@mui/material';
20
+ import React, { useEffect, useState, useRef, useCallback } from 'react';
21
+ import { DownloadGifButton } from './DownloadGifButton';
22
+ import { DownloadJsonButton } from './DownloadJsonButton';
23
+
24
+ interface CompletionViewProps {
25
+ finalStep: FinalStep;
26
+ trace?: AgentTrace;
27
+ steps?: AgentStep[];
28
+ metadata?: AgentTraceMetadata;
29
+ finalAnswer?: string | null;
30
+ isGenerating: boolean;
31
+ gifError: string | null;
32
+ onGenerateGif: () => void;
33
+ onDownloadJson: () => void;
34
+ onBackToHome: () => void;
35
+ }
36
+
37
+ /**
38
+ * Component displaying the completion status (success or failure) of a task
39
+ */
40
+ export const CompletionView: React.FC<CompletionViewProps> = ({
41
+ finalStep,
42
+ trace,
43
+ steps,
44
+ metadata,
45
+ finalAnswer,
46
+ isGenerating,
47
+ gifError,
48
+ onGenerateGif,
49
+ onDownloadJson,
50
+ onBackToHome,
51
+ }) => {
52
+ const updateTraceEvaluationInStore = useAgentStore((state) => state.updateTraceEvaluation);
53
+ const [evaluation, setEvaluation] = useState<'success' | 'failed' | 'not_evaluated'>(
54
+ finalStep.metadata.user_evaluation || 'not_evaluated'
55
+ );
56
+ const [isVoting, setIsVoting] = useState(false);
57
+
58
+ // Use refs to always have fresh values for the upload callback
59
+ const traceRef = useRef(trace);
60
+ const stepsRef = useRef(steps || []);
61
+ const metadataRef = useRef(metadata || finalStep.metadata);
62
+ const finalStepRef = useRef(finalStep);
63
+
64
+ // Keep refs updated
65
+ useEffect(() => {
66
+ traceRef.current = trace;
67
+ stepsRef.current = steps || [];
68
+ metadataRef.current = metadata || finalStep.metadata;
69
+ finalStepRef.current = finalStep;
70
+ }, [trace, steps, metadata, finalStep]);
71
+
72
+ // Hook for uploading traces to Modal - uses callback to get fresh data
73
+ const { uploadTrace, isUploading, uploadError, uploadSuccess } = useTraceUploader({
74
+ getTraceData: useCallback(() => ({
75
+ trace: traceRef.current,
76
+ steps: stepsRef.current,
77
+ metadata: metadataRef.current,
78
+ finalStep: finalStepRef.current,
79
+ }), []),
80
+ });
81
+
82
+ // Note: Auto-upload on task completion is now handled by useAgentWebSocket
83
+ // This component only handles re-uploads when user provides evaluation
84
+
85
+ const handleTraceEvaluation = async (vote: 'success' | 'failed') => {
86
+ if (isVoting || !trace?.id) return;
87
+
88
+ const newEvaluation = evaluation === vote ? 'not_evaluated' : vote;
89
+ setIsVoting(true);
90
+
91
+ try {
92
+ setEvaluation(newEvaluation);
93
+ // Update the store so the evaluation is reflected in the trace data
94
+ updateTraceEvaluationInStore(newEvaluation);
95
+
96
+ // Force re-upload the full trace with evaluation included
97
+ // The Modal storage will overwrite the existing trace with the same ID
98
+ // Use a slight delay to ensure store is updated
99
+ setTimeout(() => {
100
+ uploadTrace(true); // forceUpload=true to always upload with new evaluation
101
+ }, 100);
102
+ } catch (error) {
103
+ console.error('Failed to update trace evaluation:', error);
104
+ } finally {
105
+ setIsVoting(false);
106
+ }
107
+ };
108
+
109
+ const getStatusConfig = () => {
110
+ switch (finalStep.type) {
111
+ case 'success':
112
+ return {
113
+ icon: <CheckIcon sx={{ fontSize: 28 }} />,
114
+ title: 'Task Completed Successfully!',
115
+ color: 'success.main',
116
+ };
117
+ case 'stopped':
118
+ return {
119
+ icon: <StopCircleIcon sx={{ fontSize: 28 }} />,
120
+ title: 'Task Stopped',
121
+ color: 'warning.main',
122
+ };
123
+ case 'max_steps_reached':
124
+ return {
125
+ icon: <HourglassEmptyIcon sx={{ fontSize: 28 }} />,
126
+ title: 'Maximum Steps Reached',
127
+ color: 'warning.main',
128
+ };
129
+ case 'sandbox_timeout':
130
+ return {
131
+ icon: <AccessTimeIcon sx={{ fontSize: 28 }} />,
132
+ title: 'Max Sandbox Time Reached',
133
+ color: 'error.main',
134
+ };
135
+ case 'failure':
136
+ default:
137
+ return {
138
+ icon: <CloseIcon sx={{ fontSize: 28 }} />,
139
+ title: 'Task Failed (Agent Internal Error)',
140
+ color: 'error.main',
141
+ };
142
+ }
143
+ };
144
+
145
+ const statusConfig = getStatusConfig();
146
+
147
+ // Format model name for display
148
+ const formatModelName = (modelId: string) => {
149
+ const parts = modelId.split('/');
150
+ return parts.length > 1 ? parts[1] : modelId;
151
+ };
152
+
153
+ return (
154
+ <Box
155
+ sx={{
156
+ width: '100%',
157
+ maxWidth: 600,
158
+ mx: 'auto',
159
+ p: 2,
160
+ display: 'flex',
161
+ flexDirection: 'column',
162
+ gap: 1.5,
163
+ }}
164
+ >
165
+ {/* Status Header - Compact */}
166
+ <Box sx={{ textAlign: 'center', mb: 0.5 }}>
167
+ <Box sx={{ display: 'flex', alignItems: 'center', justifyContent: 'center', gap: 1.5, mb: 0.75 }}>
168
+ <Box
169
+ sx={{
170
+ width: 40,
171
+ height: 40,
172
+ borderRadius: '50%',
173
+ backgroundColor: statusConfig.color,
174
+ display: 'flex',
175
+ alignItems: 'center',
176
+ justifyContent: 'center',
177
+ boxShadow: (theme) => {
178
+ const rgba = finalStep.type === 'success'
179
+ ? '102, 187, 106'
180
+ : (finalStep.type === 'failure' || finalStep.type === 'sandbox_timeout')
181
+ ? '244, 67, 54'
182
+ : '255, 152, 0';
183
+ return `0 2px 8px ${theme.palette.mode === 'dark' ? `rgba(${rgba}, 0.3)` : `rgba(${rgba}, 0.2)`}`;
184
+ },
185
+ }}
186
+ >
187
+ {React.cloneElement(statusConfig.icon, { sx: { fontSize: 24, color: 'white' } })}
188
+ </Box>
189
+ <Typography
190
+ variant="h6"
191
+ sx={{
192
+ fontWeight: 700,
193
+ color: statusConfig.color,
194
+ fontSize: '1.1rem',
195
+ letterSpacing: '-0.5px',
196
+ }}
197
+ >
198
+ {statusConfig.title}
199
+ </Typography>
200
+ </Box>
201
+ </Box>
202
+
203
+ {/* Single Report Box - Task + Agent + Response + Metrics */}
204
+ <Paper
205
+ elevation={0}
206
+ sx={{
207
+ p: 2.5,
208
+ backgroundColor: (theme) => theme.palette.mode === 'dark' ? 'rgba(255,255,255,0.03)' : 'rgba(0,0,0,0.03)',
209
+ borderRadius: 1.5,
210
+ border: '1px solid',
211
+ borderColor: 'divider',
212
+ }}
213
+ >
214
+ {/* Task */}
215
+ {trace?.instruction && (
216
+ <Box sx={{ mb: 2 }}>
217
+ <Box sx={{ display: 'flex', alignItems: 'flex-start', gap: 1.5 }}>
218
+ <AssignmentIcon sx={{ fontSize: 18, color: 'text.secondary', mt: 0.25, flexShrink: 0 }} />
219
+ <Box sx={{ flex: 1, minWidth: 0 }}>
220
+ <Typography
221
+ variant="caption"
222
+ sx={{
223
+ fontWeight: 700,
224
+ color: 'text.secondary',
225
+ fontSize: '0.7rem',
226
+ textTransform: 'uppercase',
227
+ letterSpacing: '0.5px',
228
+ display: 'block',
229
+ mb: 0.5,
230
+ }}
231
+ >
232
+ Task
233
+ </Typography>
234
+ <Typography
235
+ variant="body2"
236
+ sx={{
237
+ color: 'text.primary',
238
+ fontWeight: 700,
239
+ lineHeight: 1.5,
240
+ fontSize: '0.85rem',
241
+ }}
242
+ >
243
+ {trace.instruction}
244
+ </Typography>
245
+ </Box>
246
+ </Box>
247
+ </Box>
248
+ )}
249
+
250
+ {/* Agent Response */}
251
+ {finalAnswer && (
252
+ <Box sx={{ mb: 2 }}>
253
+ <Box sx={{ display: 'flex', alignItems: 'flex-start', gap: 1.5 }}>
254
+ <ChatBubbleOutlineIcon
255
+ sx={{
256
+ fontSize: 18,
257
+ color: 'text.secondary',
258
+ mt: 0.25,
259
+ flexShrink: 0
260
+ }}
261
+ />
262
+ <Box sx={{ flex: 1, minWidth: 0 }}>
263
+ <Typography
264
+ variant="caption"
265
+ sx={{
266
+ fontWeight: 700,
267
+ color: 'text.secondary',
268
+ fontSize: '0.7rem',
269
+ textTransform: 'uppercase',
270
+ letterSpacing: '0.5px',
271
+ display: 'block',
272
+ mb: 0.75,
273
+ }}
274
+ >
275
+ Agent Response
276
+ </Typography>
277
+ <Typography
278
+ variant="body2"
279
+ sx={{
280
+ color: 'text.primary',
281
+ lineHeight: 1.5,
282
+ fontSize: '0.85rem',
283
+ whiteSpace: 'pre-wrap',
284
+ wordBreak: 'break-word',
285
+ }}
286
+ >
287
+ {finalAnswer}
288
+ </Typography>
289
+ </Box>
290
+ </Box>
291
+ </Box>
292
+ )}
293
+
294
+ {/* Trace Evaluation */}
295
+ <Box sx={{ mb: 2 }}>
296
+ <Box sx={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between' }}>
297
+ <Typography
298
+ variant="caption"
299
+ sx={{
300
+ fontWeight: 700,
301
+ color: 'text.secondary',
302
+ fontSize: '0.7rem',
303
+ textTransform: 'uppercase',
304
+ letterSpacing: '0.5px',
305
+ }}
306
+ >
307
+ Was this task completed successfully?
308
+ </Typography>
309
+
310
+ {/* Evaluation buttons */}
311
+ <Box sx={{ display: 'flex', gap: 1 }}>
312
+ <Tooltip title={evaluation === 'success' ? 'Remove success rating' : 'Mark as successful'}>
313
+ <IconButton
314
+ size="small"
315
+ onClick={() => handleTraceEvaluation('success')}
316
+ disabled={isVoting}
317
+ sx={{
318
+ padding: '4px',
319
+ color: evaluation === 'success' ? 'success.main' : 'action.disabled',
320
+ '&:hover': {
321
+ color: 'success.main',
322
+ backgroundColor: (theme) => theme.palette.mode === 'dark' ? 'rgba(102, 187, 106, 0.1)' : 'rgba(102, 187, 106, 0.08)',
323
+ },
324
+ }}
325
+ >
326
+ <ThumbUpIcon sx={{ fontSize: 18 }} />
327
+ </IconButton>
328
+ </Tooltip>
329
+ <Tooltip title={evaluation === 'failed' ? 'Remove failure rating' : 'Mark as failed'}>
330
+ <IconButton
331
+ size="small"
332
+ onClick={() => handleTraceEvaluation('failed')}
333
+ disabled={isVoting}
334
+ sx={{
335
+ padding: '4px',
336
+ color: evaluation === 'failed' ? 'error.main' : 'action.disabled',
337
+ '&:hover': {
338
+ color: 'error.main',
339
+ backgroundColor: (theme) => theme.palette.mode === 'dark' ? 'rgba(244, 67, 54, 0.1)' : 'rgba(244, 67, 54, 0.08)',
340
+ },
341
+ }}
342
+ >
343
+ <ThumbDownIcon sx={{ fontSize: 18 }} />
344
+ </IconButton>
345
+ </Tooltip>
346
+ </Box>
347
+ </Box>
348
+ </Box>
349
+
350
+ {/* Divider before metrics */}
351
+ <Divider sx={{ my: 2 }} />
352
+
353
+ {/* Metrics */}
354
+ <Box
355
+ sx={{
356
+ display: 'flex',
357
+ alignItems: 'center',
358
+ gap: 1.5,
359
+ flexWrap: 'wrap',
360
+ justifyContent: 'center',
361
+ }}
362
+ >
363
+ {/* Agent */}
364
+ {trace?.modelId && (
365
+ <>
366
+ <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>
367
+ <SmartToyIcon sx={{ fontSize: '0.85rem', color: 'primary.main' }} />
368
+ <Typography
369
+ variant="caption"
370
+ sx={{
371
+ color: 'text.primary',
372
+ fontFamily: 'monospace',
373
+ fontSize: '0.75rem',
374
+ fontWeight: 700,
375
+ }}
376
+ >
377
+ {formatModelName(trace.modelId)}
378
+ </Typography>
379
+ </Box>
380
+
381
+ {/* Divider */}
382
+ <Box sx={{ width: '1px', height: 16, backgroundColor: 'divider' }} />
383
+ </>
384
+ )}
385
+
386
+ {/* Steps Count */}
387
+ <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>
388
+ <FormatListNumberedIcon sx={{ fontSize: '0.85rem', color: 'primary.main' }} />
389
+ <Typography
390
+ variant="caption"
391
+ sx={{
392
+ fontSize: '0.75rem',
393
+ fontWeight: 700,
394
+ color: 'text.primary',
395
+ mr: 0.5,
396
+ }}
397
+ >
398
+ {finalStep.metadata.numberOfSteps}
399
+ </Typography>
400
+ <Typography
401
+ variant="caption"
402
+ sx={{
403
+ fontSize: '0.7rem',
404
+ fontWeight: 400,
405
+ color: 'text.secondary',
406
+ }}
407
+ >
408
+ {finalStep.metadata.numberOfSteps === 1 ? 'Step' : 'Steps'}
409
+ </Typography>
410
+ </Box>
411
+
412
+ {/* Divider */}
413
+ <Box sx={{ width: '1px', height: 16, backgroundColor: 'divider' }} />
414
+
415
+ {/* Duration */}
416
+ <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>
417
+ <AccessTimeIcon sx={{ fontSize: '0.85rem', color: 'primary.main' }} />
418
+ <Typography
419
+ variant="caption"
420
+ sx={{
421
+ fontSize: '0.75rem',
422
+ fontWeight: 700,
423
+ color: 'text.primary',
424
+ }}
425
+ >
426
+ {finalStep.metadata.duration.toFixed(1)}s
427
+ </Typography>
428
+ </Box>
429
+
430
+ {/* Divider */}
431
+ <Box sx={{ width: '1px', height: 16, backgroundColor: 'divider' }} />
432
+
433
+ {/* Input Tokens */}
434
+ <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>
435
+ <InputIcon sx={{ fontSize: '0.85rem', color: 'primary.main' }} />
436
+ <Typography
437
+ variant="caption"
438
+ sx={{
439
+ fontSize: '0.75rem',
440
+ fontWeight: 700,
441
+ color: 'text.primary',
442
+ }}
443
+ >
444
+ {finalStep.metadata.inputTokensUsed.toLocaleString()}
445
+ </Typography>
446
+ </Box>
447
+
448
+ {/* Divider */}
449
+ <Box sx={{ width: '1px', height: 16, backgroundColor: 'divider' }} />
450
+
451
+ {/* Output Tokens */}
452
+ <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>
453
+ <OutputIcon sx={{ fontSize: '0.85rem', color: 'primary.main' }} />
454
+ <Typography
455
+ variant="caption"
456
+ sx={{
457
+ fontSize: '0.75rem',
458
+ fontWeight: 700,
459
+ color: 'text.primary',
460
+ }}
461
+ >
462
+ {finalStep.metadata.outputTokensUsed.toLocaleString()}
463
+ </Typography>
464
+ </Box>
465
+ </Box>
466
+ </Paper>
467
+
468
+ {/* GIF Error Alert */}
469
+ {gifError && (
470
+ <Alert severity="error" sx={{ fontSize: '0.72rem', py: 0.5 }}>
471
+ {gifError}
472
+ </Alert>
473
+ )}
474
+
475
+ {/* Action Buttons */}
476
+ <Box
477
+ sx={{
478
+ display: 'flex',
479
+ flexDirection: 'column',
480
+ gap: 1.5,
481
+ alignItems: 'center',
482
+ }}
483
+ >
484
+ {/* Download buttons */}
485
+ <Box
486
+ sx={{
487
+ display: 'flex',
488
+ gap: 1,
489
+ justifyContent: 'center',
490
+ flexWrap: 'wrap',
491
+ }}
492
+ >
493
+ <DownloadGifButton
494
+ isGenerating={isGenerating}
495
+ onClick={onGenerateGif}
496
+ disabled={!steps || steps.length === 0}
497
+ />
498
+ <DownloadJsonButton onClick={onDownloadJson} disabled={!trace} />
499
+ </Box>
500
+
501
+ {/* New Task button - larger and below */}
502
+ <Button
503
+ variant="contained"
504
+ startIcon={<AddIcon sx={{ fontSize: 20 }} />}
505
+ onClick={onBackToHome}
506
+ color="primary"
507
+ sx={{
508
+ textTransform: 'none',
509
+ fontWeight: 700,
510
+ fontSize: '0.9rem',
511
+ px: 3,
512
+ py: 1,
513
+ boxShadow: 2,
514
+ minWidth: 200,
515
+ '&:hover': {
516
+ boxShadow: 4,
517
+ },
518
+ }}
519
+ >
520
+ New Task
521
+ </Button>
522
+ </Box>
523
+ </Box>
524
+ );
525
+ };
src/components/sandbox/completionview/DownloadGifButton.tsx ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from 'react';
2
+ import { Button, CircularProgress, Tooltip } from '@mui/material';
3
+ import GifIcon from '@mui/icons-material/Gif';
4
+
5
+ interface DownloadGifButtonProps {
6
+ isGenerating: boolean;
7
+ onClick: () => void;
8
+ disabled?: boolean;
9
+ }
10
+
11
+ /**
12
+ * Button to download a GIF replay of the trace
13
+ */
14
+ export const DownloadGifButton: React.FC<DownloadGifButtonProps> = ({
15
+ isGenerating,
16
+ onClick,
17
+ disabled = false,
18
+ }) => {
19
+ return (
20
+ <Tooltip
21
+ title={
22
+ disabled
23
+ ? "No steps available"
24
+ : "Download GIF replay"
25
+ }
26
+ >
27
+ <span>
28
+ <Button
29
+ variant="outlined"
30
+ size="small"
31
+ onClick={onClick}
32
+ disabled={disabled || isGenerating}
33
+ startIcon={
34
+ isGenerating ? (
35
+ <CircularProgress size={16} />
36
+ ) : (
37
+ <GifIcon sx={{ fontSize: '1.2rem' }} />
38
+ )
39
+ }
40
+ sx={{
41
+ textTransform: 'none',
42
+ fontSize: '0.75rem',
43
+ fontWeight: 600,
44
+ borderRadius: 1,
45
+ px: 1.5,
46
+ py: 0.5,
47
+ borderColor: 'divider',
48
+ color: 'text.primary',
49
+ '&:hover': {
50
+ borderColor: 'primary.main',
51
+ backgroundColor: 'action.hover',
52
+ },
53
+ '&.Mui-disabled': {
54
+ borderColor: 'divider',
55
+ color: 'text.disabled',
56
+ },
57
+ }}
58
+ >
59
+ {isGenerating ? 'Generating...' : 'Download GIF'}
60
+ </Button>
61
+ </span>
62
+ </Tooltip>
63
+ );
64
+ };
src/components/sandbox/completionview/DownloadJsonButton.tsx ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from 'react';
2
+ import { Button, Tooltip } from '@mui/material';
3
+ import DownloadIcon from '@mui/icons-material/Download';
4
+
5
+ interface DownloadJsonButtonProps {
6
+ onClick: () => void;
7
+ disabled?: boolean;
8
+ }
9
+
10
+ /**
11
+ * Button to download trace as JSON
12
+ */
13
+ export const DownloadJsonButton: React.FC<DownloadJsonButtonProps> = ({
14
+ onClick,
15
+ disabled = false,
16
+ }) => {
17
+ return (
18
+ <Tooltip
19
+ title={
20
+ disabled
21
+ ? "No trace available"
22
+ : "Download trace as JSON"
23
+ }
24
+ >
25
+ <span>
26
+ <Button
27
+ variant="outlined"
28
+ size="small"
29
+ onClick={onClick}
30
+ disabled={disabled}
31
+ startIcon={<DownloadIcon sx={{ fontSize: '1.2rem' }} />}
32
+ sx={{
33
+ textTransform: 'none',
34
+ fontSize: '0.75rem',
35
+ fontWeight: 600,
36
+ borderRadius: 1,
37
+ px: 1.5,
38
+ py: 0.5,
39
+ borderColor: 'divider',
40
+ color: 'text.primary',
41
+ '&:hover': {
42
+ borderColor: 'primary.main',
43
+ backgroundColor: 'action.hover',
44
+ },
45
+ '&.Mui-disabled': {
46
+ borderColor: 'divider',
47
+ color: 'text.disabled',
48
+ },
49
+ }}
50
+ >
51
+ Download JSON Trace
52
+ </Button>
53
+ </span>
54
+ </Tooltip>
55
+ );
56
+ };
src/components/sandbox/completionview/index.ts ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ export { CompletionView } from './CompletionView';
2
+ export { DownloadGifButton } from './DownloadGifButton';
3
+ export { DownloadJsonButton } from './DownloadJsonButton';
src/components/sandbox/index.ts ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ export { SandboxViewer } from './SandboxViewer';
2
+ export { CompletionView, DownloadGifButton, DownloadJsonButton } from './completionview';
src/components/steps/ConnectionStepCard.tsx ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from 'react';
2
+ import { Card, CardContent, Box, Typography, CircularProgress } from '@mui/material';
3
+ import CableIcon from '@mui/icons-material/Cable';
4
+ import { keyframes } from '@mui/system';
5
+
6
+ // Border pulse animation
7
+ const borderPulse = keyframes`
8
+ 0%, 100% {
9
+ border-color: rgba(79, 134, 198, 0.4);
10
+ box-shadow: 0 2px 8px rgba(79, 134, 198, 0.15);
11
+ }
12
+ 50% {
13
+ border-color: rgba(79, 134, 198, 0.8);
14
+ box-shadow: 0 2px 12px rgba(79, 134, 198, 0.3);
15
+ }
16
+ `;
17
+
18
+ // Background pulse animation
19
+ const backgroundPulse = keyframes`
20
+ 0%, 100% {
21
+ background-color: rgba(79, 134, 198, 0.03);
22
+ }
23
+ 50% {
24
+ background-color: rgba(79, 134, 198, 0.08);
25
+ }
26
+ `;
27
+
28
+ interface ConnectionStepCardProps {
29
+ isConnecting: boolean;
30
+ }
31
+
32
+ export const ConnectionStepCard: React.FC<ConnectionStepCardProps> = ({ isConnecting }) => {
33
+ return (
34
+ <Card
35
+ elevation={0}
36
+ sx={{
37
+ backgroundColor: 'background.paper',
38
+ border: '2px solid',
39
+ borderColor: isConnecting ? 'primary.main' : 'success.main',
40
+ borderRadius: 1.5,
41
+ animation: isConnecting ? `${borderPulse} 2s ease-in-out infinite` : 'none',
42
+ position: 'relative',
43
+ overflow: 'hidden',
44
+ '&::before': isConnecting ? {
45
+ content: '""',
46
+ position: 'absolute',
47
+ top: 0,
48
+ left: 0,
49
+ right: 0,
50
+ bottom: 0,
51
+ animation: `${backgroundPulse} 2s ease-in-out infinite`,
52
+ zIndex: 0,
53
+ } : {},
54
+ }}
55
+ >
56
+ <CardContent sx={{ p: 1.5, '&:last-child': { pb: 1.5 }, position: 'relative', zIndex: 1 }}>
57
+ {/* Header with spinner or check */}
58
+ <Box sx={{ display: 'flex', alignItems: 'center', gap: 1.5 }}>
59
+ <Box
60
+ sx={{
61
+ display: 'flex',
62
+ alignItems: 'center',
63
+ justifyContent: 'center',
64
+ position: 'relative',
65
+ }}
66
+ >
67
+ {isConnecting ? (
68
+ <CircularProgress
69
+ size={32}
70
+ thickness={2.5}
71
+ sx={{
72
+ color: 'primary.main',
73
+ }}
74
+ />
75
+ ) : (
76
+ <CableIcon
77
+ sx={{
78
+ fontSize: 28,
79
+ color: 'success.main',
80
+ }}
81
+ />
82
+ )}
83
+ </Box>
84
+
85
+ <Box sx={{ flex: 1, minWidth: 0 }}>
86
+ <Typography
87
+ sx={{
88
+ fontSize: '0.85rem',
89
+ fontWeight: 700,
90
+ color: isConnecting ? 'primary.main' : 'success.main',
91
+ lineHeight: 1.3,
92
+ }}
93
+ >
94
+ {isConnecting ? 'Starting FARA...' : 'Browser Ready'}
95
+ </Typography>
96
+ <Typography
97
+ sx={{
98
+ fontSize: '0.7rem',
99
+ color: 'text.secondary',
100
+ lineHeight: 1.2,
101
+ }}
102
+ >
103
+ {isConnecting ? 'Initializing browser environment' : 'Agent ready to execute tasks'}
104
+ </Typography>
105
+ </Box>
106
+ </Box>
107
+ </CardContent>
108
+ </Card>
109
+ );
110
+ };
src/components/steps/FinalStepCard.tsx ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useAgentStore } from '@/stores/agentStore';
2
+ import { FinalStep } from '@/types/agent';
3
+ import AccessTimeIcon from '@mui/icons-material/AccessTime';
4
+ import CheckIcon from '@mui/icons-material/Check';
5
+ import CloseIcon from '@mui/icons-material/Close';
6
+ import HourglassEmptyIcon from '@mui/icons-material/HourglassEmpty';
7
+ import StopCircleIcon from '@mui/icons-material/StopCircle';
8
+ import { Box, Card, CardContent, Typography } from '@mui/material';
9
+ import React from 'react';
10
+
11
+ interface FinalStepCardProps {
12
+ finalStep: FinalStep;
13
+ isActive?: boolean;
14
+ }
15
+
16
+ export const FinalStepCard: React.FC<FinalStepCardProps> = ({ finalStep, isActive = false }) => {
17
+ const setSelectedStepIndex = useAgentStore((state) => state.setSelectedStepIndex);
18
+
19
+ const getStatusConfig = () => {
20
+ switch (finalStep.type) {
21
+ case 'success':
22
+ return {
23
+ icon: <CheckIcon sx={{ fontSize: 20, color: 'success.main' }} />,
24
+ label: 'Task completed',
25
+ color: 'success',
26
+ };
27
+ case 'stopped':
28
+ return {
29
+ icon: <StopCircleIcon sx={{ fontSize: 20, color: 'warning.main' }} />,
30
+ label: 'Task stopped',
31
+ color: 'warning',
32
+ };
33
+ case 'max_steps_reached':
34
+ return {
35
+ icon: <HourglassEmptyIcon sx={{ fontSize: 20, color: 'warning.main' }} />,
36
+ label: 'Max steps reached',
37
+ color: 'warning',
38
+ };
39
+ case 'sandbox_timeout':
40
+ return {
41
+ icon: <AccessTimeIcon sx={{ fontSize: 20, color: 'error.main' }} />,
42
+ label: 'Sandbox timeout',
43
+ color: 'error',
44
+ };
45
+ case 'failure':
46
+ default:
47
+ return {
48
+ icon: <CloseIcon sx={{ fontSize: 20, color: 'error.main' }} />,
49
+ label: 'Task failed',
50
+ color: 'error',
51
+ };
52
+ }
53
+ };
54
+
55
+ const statusConfig = getStatusConfig();
56
+
57
+ const handleClick = () => {
58
+ // Clicking on final step goes to live mode (null)
59
+ setSelectedStepIndex(null);
60
+ };
61
+
62
+ return (
63
+ <Card
64
+ elevation={0}
65
+ onClick={handleClick}
66
+ sx={{
67
+ backgroundColor: 'background.paper',
68
+ border: '1px solid',
69
+ borderColor: (theme) => `${isActive
70
+ ? theme.palette[statusConfig.color].main
71
+ : theme.palette.divider} !important`,
72
+ borderRadius: 1.5,
73
+ transition: 'all 0.2s ease',
74
+ cursor: 'pointer',
75
+ boxShadow: isActive
76
+ ? (theme) => `0 2px 8px ${theme.palette.mode === 'dark'
77
+ ? `rgba(${statusConfig.color === 'success' ? '102, 187, 106' : statusConfig.color === 'error' ? '244, 67, 54' : '255, 152, 0'}, 0.3)`
78
+ : `rgba(${statusConfig.color === 'success' ? '102, 187, 106' : statusConfig.color === 'error' ? '244, 67, 54' : '255, 152, 0'}, 0.2)`}`
79
+ : 'none',
80
+ '&:hover': {
81
+ borderColor: (theme) => `${theme.palette[statusConfig.color].main} !important`,
82
+ boxShadow: (theme) => `0 2px 8px ${theme.palette.mode === 'dark'
83
+ ? `rgba(${statusConfig.color === 'success' ? '102, 187, 106' : statusConfig.color === 'error' ? '244, 67, 54' : '255, 152, 0'}, 0.2)`
84
+ : `rgba(${statusConfig.color === 'success' ? '102, 187, 106' : statusConfig.color === 'error' ? '244, 67, 54' : '255, 152, 0'}, 0.1)`}`,
85
+ },
86
+ }}
87
+ >
88
+ <CardContent sx={{ p: 1.5, '&:last-child': { pb: 1.5 } }}>
89
+ {/* Header with icon */}
90
+ <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.75 }}>
91
+ {statusConfig.icon}
92
+ <Typography
93
+ sx={{
94
+ fontSize: '0.85rem',
95
+ fontWeight: 700,
96
+ color: `${statusConfig.color}.main`,
97
+ }}
98
+ >
99
+ {statusConfig.label}
100
+ </Typography>
101
+ </Box>
102
+ </CardContent>
103
+ </Card>
104
+ );
105
+ };
src/components/steps/StepCard.tsx ADDED
@@ -0,0 +1,399 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { updateStepEvaluation } from '@/services/api';
2
+ import { useAgentStore } from '@/stores/agentStore';
3
+ import { AgentStep } from '@/types/agent';
4
+ import AccessTimeIcon from '@mui/icons-material/AccessTime';
5
+ import ExpandMoreIcon from '@mui/icons-material/ExpandMore';
6
+ import InputIcon from '@mui/icons-material/Input';
7
+ import OutputIcon from '@mui/icons-material/Output';
8
+ import ThumbDownIcon from '@mui/icons-material/ThumbDown';
9
+ import ThumbUpIcon from '@mui/icons-material/ThumbUp';
10
+ import { Accordion, AccordionDetails, AccordionSummary, Box, Card, CardContent, Chip, IconButton, Tooltip, Typography } from '@mui/material';
11
+ import React, { useState } from 'react';
12
+
13
+ interface StepCardProps {
14
+ step: AgentStep;
15
+ index: number;
16
+ isLatest?: boolean;
17
+ isActive?: boolean;
18
+ }
19
+
20
+ export const StepCard: React.FC<StepCardProps> = ({ step, index, isLatest = false, isActive = false }) => {
21
+ const setSelectedStepIndex = useAgentStore((state) => state.setSelectedStepIndex);
22
+ const updateStepEvaluationInStore = useAgentStore((state) => state.updateStepEvaluation);
23
+ const [thoughtExpanded, setThoughtExpanded] = useState(false);
24
+ const [actionsExpanded, setActionsExpanded] = useState(false);
25
+ const [evaluation, setEvaluation] = useState<'like' | 'dislike' | 'neutral'>(step.step_evaluation || 'neutral');
26
+ const [isVoting, setIsVoting] = useState(false);
27
+
28
+ const hasMultipleActions = step.actions && step.actions.length > 1;
29
+ const displayedActions = hasMultipleActions && !actionsExpanded
30
+ ? step.actions.slice(0, 1)
31
+ : step.actions;
32
+
33
+ const handleClick = () => {
34
+ setSelectedStepIndex(index);
35
+ };
36
+
37
+ const handleAccordionClick = (event: React.MouseEvent) => {
38
+ event.stopPropagation(); // Prevent propagation to avoid selecting the step
39
+ };
40
+
41
+ const handleVote = async (event: React.MouseEvent, vote: 'like' | 'dislike') => {
42
+ event.stopPropagation(); // Prevent propagation to avoid selecting the step
43
+
44
+ if (isVoting) return;
45
+
46
+ const newEvaluation = evaluation === vote ? 'neutral' : vote;
47
+ setIsVoting(true);
48
+
49
+ try {
50
+ await updateStepEvaluation(step.traceId, step.stepId, newEvaluation);
51
+ setEvaluation(newEvaluation);
52
+ // Update the store so the evaluation is reflected in JSON export
53
+ updateStepEvaluationInStore(step.stepId, newEvaluation);
54
+ } catch (error) {
55
+ console.error('Failed to update step evaluation:', error);
56
+ } finally {
57
+ setIsVoting(false);
58
+ }
59
+ };
60
+
61
+ return (
62
+ <Card
63
+ elevation={0}
64
+ onClick={handleClick}
65
+ sx={{
66
+ backgroundColor: 'background.paper',
67
+ border: '1px solid',
68
+ borderColor: (theme) => `${isActive ? theme.palette.primary.main : theme.palette.divider} !important`,
69
+ borderRadius: 1.5,
70
+ transition: 'all 0.2s ease',
71
+ cursor: 'pointer',
72
+ boxShadow: isActive ? (theme) => `0 2px 8px ${theme.palette.mode === 'dark' ? 'rgba(79, 134, 198, 0.3)' : 'rgba(79, 134, 198, 0.2)'}` : 'none',
73
+ '&:hover': {
74
+ borderColor: (theme) => `${theme.palette.primary.main} !important`,
75
+ boxShadow: (theme) => `0 2px 8px ${theme.palette.mode === 'dark' ? 'rgba(79, 134, 198, 0.2)' : 'rgba(79, 134, 198, 0.1)'}`,
76
+ },
77
+ }}
78
+ >
79
+ <CardContent sx={{ p: 1.5, '&:last-child': { pb: 1.5 } }}>
80
+ {/* Step header */}
81
+ <Box sx={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between', mb: 1.5 }}>
82
+ <Typography
83
+ sx={{
84
+ fontSize: '1.5rem',
85
+ fontWeight: 800,
86
+ color: isActive ? 'primary.main' : 'text.primary',
87
+ lineHeight: 1,
88
+ }}
89
+ >
90
+ {index + 1}
91
+ </Typography>
92
+ <Box sx={{ display: 'flex', gap: 0.5, alignItems: 'center' }}>
93
+ <Chip
94
+ icon={<AccessTimeIcon sx={{ fontSize: '0.7rem !important' }} />}
95
+ label={`${step.duration.toFixed(1)}s`}
96
+ size="small"
97
+ sx={{
98
+ height: 'auto',
99
+ py: 0.25,
100
+ fontSize: '0.65rem',
101
+ fontWeight: 600,
102
+ backgroundColor: 'action.hover',
103
+ color: 'text.primary',
104
+ '& .MuiChip-icon': { marginLeft: 0.5, color: 'text.secondary' },
105
+ }}
106
+ />
107
+ <Chip
108
+ icon={<InputIcon sx={{ fontSize: '0.7rem !important' }} />}
109
+ label={step.inputTokensUsed.toLocaleString()}
110
+ size="small"
111
+ sx={{
112
+ height: 'auto',
113
+ py: 0.25,
114
+ fontSize: '0.65rem',
115
+ fontWeight: 600,
116
+ backgroundColor: 'action.hover',
117
+ color: 'text.primary',
118
+ '& .MuiChip-icon': { marginLeft: 0.5, color: 'text.secondary' },
119
+ }}
120
+ />
121
+ <Chip
122
+ icon={<OutputIcon sx={{ fontSize: '0.7rem !important' }} />}
123
+ label={step.outputTokensUsed.toLocaleString()}
124
+ size="small"
125
+ sx={{
126
+ height: 'auto',
127
+ py: 0.25,
128
+ fontSize: '0.65rem',
129
+ fontWeight: 600,
130
+ backgroundColor: 'action.hover',
131
+ color: 'text.primary',
132
+ '& .MuiChip-icon': { marginLeft: 0.5, color: 'text.secondary' },
133
+ }}
134
+ />
135
+ </Box>
136
+ </Box>
137
+
138
+ {/* Step image */}
139
+ {step.image && (
140
+ <Box
141
+ sx={{
142
+ mb: 1.5,
143
+ borderRadius: 1,
144
+ overflow: 'hidden',
145
+ border: '1px solid',
146
+ borderColor: (theme) => isActive ? theme.palette.primary.main : theme.palette.divider,
147
+ backgroundColor: 'action.hover',
148
+ transition: 'border-color 0.2s ease',
149
+ }}
150
+ >
151
+ <img
152
+ src={step.image}
153
+ alt={`Step ${index + 1}`}
154
+ style={{ width: '100%', height: 'auto', display: 'block' }}
155
+ />
156
+ </Box>
157
+ )}
158
+
159
+ {/* Action */}
160
+ {step.actions && step.actions.length > 0 && (
161
+ <Box sx={{ mb: 1.5 }}>
162
+ <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5, mb: 0.75, justifyContent: 'space-between' }}>
163
+ <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>
164
+ <Typography
165
+ variant="caption"
166
+ sx={{
167
+ fontWeight: 700,
168
+ color: 'text.secondary',
169
+ fontSize: '0.65rem',
170
+ textTransform: 'uppercase',
171
+ letterSpacing: '0.5px',
172
+ }}
173
+ >
174
+ Action
175
+ </Typography>
176
+ {hasMultipleActions && (
177
+ <Tooltip title={actionsExpanded ? 'Show less' : `Show all ${step.actions.length} actions`}>
178
+ <IconButton
179
+ size="small"
180
+ onClick={(e) => {
181
+ e.stopPropagation();
182
+ setActionsExpanded(!actionsExpanded);
183
+ }}
184
+ sx={{
185
+ padding: '2px',
186
+ color: 'text.secondary',
187
+ '&:hover': {
188
+ color: 'text.primary',
189
+ backgroundColor: 'action.hover',
190
+ },
191
+ }}
192
+ >
193
+ <ExpandMoreIcon
194
+ sx={{
195
+ fontSize: 16,
196
+ transform: actionsExpanded ? 'rotate(180deg)' : 'rotate(0deg)',
197
+ transition: 'transform 0.2s',
198
+ }}
199
+ />
200
+ </IconButton>
201
+ </Tooltip>
202
+ )}
203
+ </Box>
204
+
205
+ {/* Vote buttons */}
206
+ <Box sx={{ display: 'flex', gap: 0.5 }}>
207
+ <Tooltip title={evaluation === 'like' ? 'Remove like' : 'Like this step'}>
208
+ <IconButton
209
+ size="small"
210
+ onClick={(e) => handleVote(e, 'like')}
211
+ disabled={isVoting}
212
+ sx={{
213
+ padding: '2px',
214
+ color: evaluation === 'like' ? 'success.main' : 'action.disabled',
215
+ '&:hover': {
216
+ color: 'success.main',
217
+ backgroundColor: (theme) => theme.palette.mode === 'dark' ? 'rgba(102, 187, 106, 0.1)' : 'rgba(102, 187, 106, 0.08)',
218
+ },
219
+ }}
220
+ >
221
+ <ThumbUpIcon sx={{ fontSize: 14 }} />
222
+ </IconButton>
223
+ </Tooltip>
224
+ <Tooltip title={evaluation === 'dislike' ? 'Remove dislike' : 'Dislike this step'}>
225
+ <IconButton
226
+ size="small"
227
+ onClick={(e) => handleVote(e, 'dislike')}
228
+ disabled={isVoting}
229
+ sx={{
230
+ padding: '2px',
231
+ color: evaluation === 'dislike' ? 'error.main' : 'action.disabled',
232
+ '&:hover': {
233
+ color: 'error.main',
234
+ backgroundColor: (theme) => theme.palette.mode === 'dark' ? 'rgba(244, 67, 54, 0.1)' : 'rgba(244, 67, 54, 0.08)',
235
+ },
236
+ }}
237
+ >
238
+ <ThumbDownIcon sx={{ fontSize: 14 }} />
239
+ </IconButton>
240
+ </Tooltip>
241
+ </Box>
242
+ </Box>
243
+ <Box component="ul" sx={{ listStyle: 'none', p: 0, m: 0 }}>
244
+ {displayedActions?.map((action, actionIndex) => (
245
+ <Box
246
+ key={actionIndex}
247
+ component="li"
248
+ sx={{
249
+ display: 'flex',
250
+ alignItems: 'flex-start',
251
+ fontSize: '0.75rem',
252
+ color: 'text.primary',
253
+ lineHeight: 1.4,
254
+ mb: 0.5,
255
+ '&:last-child': { mb: 0 },
256
+ }}
257
+ >
258
+ {/* <Typography
259
+ component="span"
260
+ sx={{
261
+ mr: 0.5,
262
+ color: 'text.secondary',
263
+ fontWeight: 700,
264
+ flexShrink: 0,
265
+ fontSize: '0.75rem',
266
+ }}
267
+ >
268
+
269
+ </Typography> */}
270
+ <Typography
271
+ component="span"
272
+ sx={{
273
+ fontSize: '0.75rem',
274
+ fontWeight: 900,
275
+ wordBreak: 'break-word',
276
+ }}
277
+ >
278
+ {action.description}
279
+ </Typography>
280
+ </Box>
281
+ ))}
282
+ </Box>
283
+ </Box>
284
+ )}
285
+
286
+ {/* Thought - Accordion */}
287
+ {step.thought && (
288
+ <Accordion
289
+ expanded={thoughtExpanded}
290
+ onChange={(e, expanded) => setThoughtExpanded(expanded)}
291
+ onClick={handleAccordionClick}
292
+ elevation={0}
293
+ disableGutters
294
+ sx={{
295
+ mb: 0.5,
296
+ backgroundColor: 'transparent',
297
+ border: 'none',
298
+ boxShadow: 'none',
299
+ '&:before': { display: 'none' },
300
+ '&.MuiAccordion-root': {
301
+ backgroundColor: 'transparent',
302
+ boxShadow: 'none',
303
+ '&:before': {
304
+ display: 'none',
305
+ },
306
+ },
307
+ '& .MuiAccordionSummary-root': {
308
+ minHeight: 'auto',
309
+ p: 0,
310
+ backgroundColor: 'transparent',
311
+ '&:hover': {
312
+ backgroundColor: 'transparent',
313
+ },
314
+ '&.Mui-expanded': {
315
+ minHeight: 'auto',
316
+ },
317
+ },
318
+ '& .MuiAccordionSummary-content': {
319
+ margin: '0 !important',
320
+ },
321
+ '& .MuiAccordionDetails-root': {
322
+ p: 0,
323
+ pt: 0.5,
324
+ pb: 0,
325
+ backgroundColor: 'transparent',
326
+ },
327
+ }}
328
+ >
329
+ <AccordionSummary
330
+ expandIcon={<ExpandMoreIcon sx={{ fontSize: 16, color: 'text.secondary' }} />}
331
+ sx={{
332
+ flexDirection: 'row',
333
+ border: 'none',
334
+ '& .MuiAccordionSummary-expandIconWrapper': {
335
+ transform: 'rotate(-90deg)',
336
+ transition: 'transform 0.2s',
337
+ '&.Mui-expanded': {
338
+ transform: 'rotate(0deg)',
339
+ },
340
+ },
341
+ }}
342
+ >
343
+ <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>
344
+ <Typography
345
+ variant="caption"
346
+ sx={{
347
+ fontWeight: 700,
348
+ color: 'text.secondary',
349
+ fontSize: '0.65rem',
350
+ textTransform: 'uppercase',
351
+ letterSpacing: '0.5px',
352
+ }}
353
+ >
354
+ Thought
355
+ </Typography>
356
+ </Box>
357
+ </AccordionSummary>
358
+ <AccordionDetails>
359
+ <Typography
360
+ variant="body2"
361
+ sx={{
362
+ fontSize: '0.75rem',
363
+ color: 'text.primary',
364
+ lineHeight: 1.4,
365
+ pl: 2.5,
366
+ }}
367
+ >
368
+ {step.thought}
369
+ </Typography>
370
+ </AccordionDetails>
371
+ </Accordion>
372
+ )}
373
+
374
+ {/* Error */}
375
+ {step.error && (
376
+ <Box sx={{
377
+ mt: 1.5,
378
+ p: 1,
379
+ borderRadius: 1,
380
+ backgroundColor: (theme) => theme.palette.mode === 'dark' ? 'rgba(244, 67, 54, 0.1)' : 'rgba(244, 67, 54, 0.08)',
381
+ border: '1px solid',
382
+ borderColor: 'error.main'
383
+ }}>
384
+ <Typography
385
+ variant="caption"
386
+ sx={{
387
+ fontSize: '0.7rem',
388
+ color: 'error.main',
389
+ fontWeight: 600,
390
+ }}
391
+ >
392
+ Error: {step.error}
393
+ </Typography>
394
+ </Box>
395
+ )}
396
+ </CardContent>
397
+ </Card>
398
+ );
399
+ };
src/components/steps/StepsList.tsx ADDED
@@ -0,0 +1,395 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useRef, useEffect } from 'react';
2
+ import { AgentTrace } from '@/types/agent';
3
+ import { Box, Typography, Stack, Paper } from '@mui/material';
4
+ import { StepCard } from './StepCard';
5
+ import { FinalStepCard } from './FinalStepCard';
6
+ import { ThinkingStepCard } from './ThinkingStepCard';
7
+ import { ConnectionStepCard } from './ConnectionStepCard';
8
+ import ListAltIcon from '@mui/icons-material/ListAlt';
9
+ import FormatListNumberedIcon from '@mui/icons-material/FormatListNumbered';
10
+ import { useAgentStore, selectSelectedStepIndex, selectFinalStep, selectIsConnectingToE2B, selectIsAgentProcessing } from '@/stores/agentStore';
11
+
12
+ interface StepsListProps {
13
+ trace?: AgentTrace;
14
+ }
15
+
16
+ export const StepsList: React.FC<StepsListProps> = ({ trace }) => {
17
+ const containerRef = useRef<HTMLDivElement>(null);
18
+ const selectedStepIndex = useAgentStore(selectSelectedStepIndex);
19
+ const setSelectedStepIndex = useAgentStore((state) => state.setSelectedStepIndex);
20
+ const finalStep = useAgentStore(selectFinalStep);
21
+ const isConnectingToE2B = useAgentStore(selectIsConnectingToE2B);
22
+ const isAgentProcessing = useAgentStore(selectIsAgentProcessing);
23
+ const isScrollingProgrammatically = useRef(false);
24
+ const [showThinkingCard, setShowThinkingCard] = React.useState(false);
25
+ const thinkingTimeoutRef = useRef<NodeJS.Timeout | null>(null);
26
+ const streamStartTimeRef = useRef<number | null>(null);
27
+ const [showConnectionCard, setShowConnectionCard] = React.useState(false);
28
+ const hasConnectedRef = useRef(false);
29
+
30
+ // Check if final step is active (when selectedStepIndex is null and finalStep exists and trace is not running)
31
+ const isFinalStepActive = selectedStepIndex === null && finalStep && !trace?.isRunning;
32
+
33
+ // Check if thinking card is active (when in live mode and thinking card is shown)
34
+ const isThinkingCardActive = selectedStepIndex === null && showThinkingCard;
35
+
36
+ // Determine the active step index
37
+ // If a specific step is selected, use that
38
+ // If the final step is active, no normal step should be active
39
+ // If the thinking card is active, no normal step should be active
40
+ // Otherwise, show the last step as active
41
+ const activeStepIndex = selectedStepIndex !== null
42
+ ? selectedStepIndex
43
+ : isFinalStepActive
44
+ ? null // When final step is active, no normal step is active
45
+ : isThinkingCardActive
46
+ ? null // When thinking card is active, no normal step is active
47
+ : (trace?.steps && trace.steps.length > 0 && trace?.isRunning)
48
+ ? trace.steps.length - 1
49
+ : (trace?.steps && trace.steps.length > 0)
50
+ ? trace.steps.length - 1
51
+ : null;
52
+
53
+ // Manage ConnectionStepCard display:
54
+ // - Shows when isConnectingToE2B = true OR when we had a connection
55
+ // - Remains visible even when task is finished (if we have steps or finalStep)
56
+ useEffect(() => {
57
+ if (isConnectingToE2B || isAgentProcessing || (trace?.steps && trace.steps.length > 0) || finalStep) {
58
+ setShowConnectionCard(true);
59
+ hasConnectedRef.current = true;
60
+ }
61
+ }, [isConnectingToE2B, isAgentProcessing, trace?.steps, finalStep]);
62
+
63
+ // Manage ThinkingCard display:
64
+ // - Appears 5 seconds AFTER stream starts (isAgentProcessing = true, NOT during isConnectingToE2B)
65
+ // - Remains visible during the entire agent processing
66
+ // - Hides only when agent stops OR a finalStep exists
67
+ useEffect(() => {
68
+ // If stream really starts (isAgentProcessing = true and NOT connecting)
69
+ // And no startTime recorded yet
70
+ if (isAgentProcessing && !isConnectingToE2B && !streamStartTimeRef.current) {
71
+ streamStartTimeRef.current = Date.now();
72
+ }
73
+
74
+ // If agent stops OR we have a finalStep, reset and hide
75
+ if (!isAgentProcessing || finalStep) {
76
+ streamStartTimeRef.current = null;
77
+ setShowThinkingCard(false);
78
+ if (thinkingTimeoutRef.current) {
79
+ clearTimeout(thinkingTimeoutRef.current);
80
+ thinkingTimeoutRef.current = null;
81
+ }
82
+ return;
83
+ }
84
+
85
+ // If agent is running, not connecting, no finalStep: start 5 second timer
86
+ if (isAgentProcessing && !isConnectingToE2B && !finalStep && streamStartTimeRef.current) {
87
+ // Clean up any existing timeout
88
+ if (thinkingTimeoutRef.current) {
89
+ clearTimeout(thinkingTimeoutRef.current);
90
+ }
91
+
92
+ // Calculate elapsed time since stream started
93
+ const elapsedTime = Date.now() - streamStartTimeRef.current;
94
+ const remainingTime = Math.max(0, 5000 - elapsedTime);
95
+
96
+ thinkingTimeoutRef.current = setTimeout(() => {
97
+ setShowThinkingCard(true);
98
+ }, remainingTime);
99
+ }
100
+
101
+ // Cleanup on unmount or when dependencies change
102
+ return () => {
103
+ if (thinkingTimeoutRef.current) {
104
+ clearTimeout(thinkingTimeoutRef.current);
105
+ thinkingTimeoutRef.current = null;
106
+ }
107
+ };
108
+ }, [isAgentProcessing, isConnectingToE2B, finalStep]);
109
+
110
+ // Auto-scroll logic
111
+ useEffect(() => {
112
+ const container = containerRef.current;
113
+ if (!container) return;
114
+
115
+ isScrollingProgrammatically.current = true;
116
+
117
+ // Use setTimeout to ensure DOM has updated
118
+ setTimeout(() => {
119
+ if (!container) return;
120
+
121
+ // LIVE MODE: Always scroll to the bottom (last visible element)
122
+ if (selectedStepIndex === null) {
123
+ // Scroll to bottom
124
+ container.scrollTo({
125
+ top: container.scrollHeight,
126
+ behavior: 'smooth',
127
+ });
128
+ }
129
+ // NON-LIVE MODE: Scroll to selected step
130
+ else {
131
+ const selectedElement = container.querySelector(`[data-step-index="${selectedStepIndex}"]`);
132
+ if (selectedElement) {
133
+ selectedElement.scrollIntoView({
134
+ behavior: 'smooth',
135
+ block: 'center',
136
+ });
137
+ }
138
+ }
139
+
140
+ // Reset flag after scroll animation
141
+ setTimeout(() => {
142
+ isScrollingProgrammatically.current = false;
143
+ }, 500);
144
+ }, 100);
145
+ }, [selectedStepIndex, trace?.steps?.length, showThinkingCard, finalStep]);
146
+
147
+ // Detect which step is visible when scrolling (steps → timeline)
148
+ useEffect(() => {
149
+ const container = containerRef.current;
150
+ if (!container || !trace?.steps || trace.steps.length === 0) return;
151
+
152
+ const handleScroll = () => {
153
+ // Don't update if we're scrolling programmatically
154
+ if (isScrollingProgrammatically.current) return;
155
+
156
+ // Don't update if agent is running (stay in live mode)
157
+ if (trace?.isRunning) return;
158
+
159
+ const containerRect = container.getBoundingClientRect();
160
+ const containerTop = containerRect.top;
161
+ const containerBottom = containerRect.bottom;
162
+ const containerCenter = containerRect.top + containerRect.height / 2;
163
+
164
+ // Check scroll position
165
+ const isAtTop = container.scrollTop <= 5; // 5px tolerance
166
+ const isAtBottom = container.scrollTop + container.clientHeight >= container.scrollHeight - 5; // 5px tolerance
167
+
168
+ let targetStepIndex: number | null = -1;
169
+ let targetDistance = Infinity;
170
+ let isFinalStepTarget = false;
171
+
172
+ if (isAtTop) {
173
+ // At the top: find the highest visible step
174
+ let highestVisibleBottom = Infinity;
175
+
176
+ trace.steps.forEach((_, index) => {
177
+ const stepElement = container.querySelector(`[data-step-index="${index}"]`);
178
+ if (stepElement) {
179
+ const stepRect = stepElement.getBoundingClientRect();
180
+ const stepTop = stepRect.top;
181
+ const stepBottom = stepRect.bottom;
182
+ const isVisible = stepTop < containerBottom && stepBottom > containerTop;
183
+
184
+ if (isVisible && stepTop < highestVisibleBottom) {
185
+ highestVisibleBottom = stepTop;
186
+ targetStepIndex = index;
187
+ isFinalStepTarget = false;
188
+ }
189
+ }
190
+ });
191
+ } else if (isAtBottom) {
192
+ // At the bottom: find the lowest visible step
193
+ let lowestVisibleTop = -Infinity;
194
+
195
+ trace.steps.forEach((_, index) => {
196
+ const stepElement = container.querySelector(`[data-step-index="${index}"]`);
197
+ if (stepElement) {
198
+ const stepRect = stepElement.getBoundingClientRect();
199
+ const stepTop = stepRect.top;
200
+ const stepBottom = stepRect.bottom;
201
+ const isVisible = stepTop < containerBottom && stepBottom > containerTop;
202
+
203
+ if (isVisible && stepTop > lowestVisibleTop) {
204
+ lowestVisibleTop = stepTop;
205
+ targetStepIndex = index;
206
+ isFinalStepTarget = false;
207
+ }
208
+ }
209
+ });
210
+
211
+ // Check if final step is the lowest visible
212
+ if (finalStep) {
213
+ const finalStepElement = container.querySelector(`[data-step-index="final"]`);
214
+ if (finalStepElement) {
215
+ const finalStepRect = finalStepElement.getBoundingClientRect();
216
+ const finalStepTop = finalStepRect.top;
217
+ const finalStepBottom = finalStepRect.bottom;
218
+ const isVisible = finalStepTop < containerBottom && finalStepBottom > containerTop;
219
+
220
+ if (isVisible && finalStepTop > lowestVisibleTop) {
221
+ targetStepIndex = null;
222
+ isFinalStepTarget = true;
223
+ }
224
+ }
225
+ }
226
+ } else {
227
+ // Not at bottom: find the step closest to center
228
+ trace.steps.forEach((_, index) => {
229
+ const stepElement = container.querySelector(`[data-step-index="${index}"]`);
230
+ if (stepElement) {
231
+ const stepRect = stepElement.getBoundingClientRect();
232
+ const stepCenter = stepRect.top + stepRect.height / 2;
233
+ const distance = Math.abs(containerCenter - stepCenter);
234
+
235
+ if (distance < targetDistance) {
236
+ targetDistance = distance;
237
+ targetStepIndex = index;
238
+ isFinalStepTarget = false;
239
+ }
240
+ }
241
+ });
242
+
243
+ // Check if final step is closest to center
244
+ if (finalStep) {
245
+ const finalStepElement = container.querySelector(`[data-step-index="final"]`);
246
+ if (finalStepElement) {
247
+ const finalStepRect = finalStepElement.getBoundingClientRect();
248
+ const finalStepCenter = finalStepRect.top + finalStepRect.height / 2;
249
+ const distance = Math.abs(containerCenter - finalStepCenter);
250
+
251
+ if (distance < targetDistance) {
252
+ targetStepIndex = null;
253
+ isFinalStepTarget = true;
254
+ }
255
+ }
256
+ }
257
+ }
258
+
259
+ // Update the selected step if changed
260
+ if (isFinalStepTarget && selectedStepIndex !== null) {
261
+ setSelectedStepIndex(null);
262
+ } else if (!isFinalStepTarget && targetStepIndex !== -1 && targetStepIndex !== selectedStepIndex) {
263
+ setSelectedStepIndex(targetStepIndex);
264
+ }
265
+ };
266
+
267
+ // Throttle scroll events
268
+ let scrollTimeout: NodeJS.Timeout;
269
+ const throttledScroll = () => {
270
+ clearTimeout(scrollTimeout);
271
+ scrollTimeout = setTimeout(handleScroll, 150);
272
+ };
273
+
274
+ container.addEventListener('scroll', throttledScroll);
275
+ return () => {
276
+ container.removeEventListener('scroll', throttledScroll);
277
+ clearTimeout(scrollTimeout);
278
+ };
279
+ }, [trace?.steps, selectedStepIndex, setSelectedStepIndex, finalStep]);
280
+
281
+ return (
282
+ <Paper
283
+ elevation={0}
284
+ sx={{
285
+ width: { xs: '100%', md: 320 },
286
+ flexShrink: 0,
287
+ display: 'flex',
288
+ flexDirection: 'column',
289
+ ml: { xs: 0, md: 1.5 },
290
+ mt: { xs: 3, md: 0 },
291
+ overflow: 'hidden',
292
+ }}
293
+ >
294
+ <Box sx={{ px: 2, py: 1.5, borderBottom: '1px solid', borderColor: 'divider', display: 'flex', alignItems: 'center', justifyContent: 'space-between' }}>
295
+ <Typography variant="h6" sx={{ fontSize: '0.9rem', fontWeight: 700, color: 'text.primary' }}>
296
+ Steps
297
+ </Typography>
298
+ {trace?.traceMetadata && trace.traceMetadata.numberOfSteps > 0 && (
299
+ <Box sx={{ display: 'flex', alignItems: 'center', gap: 0 }}>
300
+ <Typography
301
+ variant="caption"
302
+ sx={{
303
+ fontSize: '0.75rem',
304
+ fontWeight: 700,
305
+ color: 'text.primary',
306
+ }}
307
+ >
308
+ {trace.traceMetadata.numberOfSteps}
309
+ </Typography>
310
+ <Typography
311
+ variant="caption"
312
+ sx={{
313
+ fontSize: '0.75rem',
314
+ fontWeight: 700,
315
+ color: 'text.disabled',
316
+ }}
317
+ >
318
+ /{trace.traceMetadata.maxSteps}
319
+ </Typography>
320
+ </Box>
321
+ )}
322
+ </Box>
323
+ <Box
324
+ ref={containerRef}
325
+ sx={{
326
+ flex: 1,
327
+ overflowY: 'auto',
328
+ minHeight: 0,
329
+ p: 2,
330
+ }}
331
+ >
332
+ {(trace?.steps && trace.steps.length > 0) || finalStep || showThinkingCard || showConnectionCard ? (
333
+ <Stack spacing={2.5}>
334
+ {/* Show connection step card (first item) */}
335
+ {showConnectionCard && (
336
+ <Box data-step-index="connection">
337
+ <ConnectionStepCard isConnecting={isConnectingToE2B} />
338
+ </Box>
339
+ )}
340
+
341
+ {/* Show all steps */}
342
+ {trace?.steps && trace.steps.map((step, index) => (
343
+ <Box key={step.stepId} data-step-index={index}>
344
+ <StepCard
345
+ step={step}
346
+ index={index}
347
+ isLatest={index === trace.steps!.length - 1}
348
+ isActive={index === activeStepIndex}
349
+ />
350
+ </Box>
351
+ ))}
352
+
353
+ {/* Show thinking indicator after steps (appears 5 seconds after stream start) */}
354
+ {showThinkingCard && (
355
+ <Box data-step-index="thinking">
356
+ <ThinkingStepCard isActive={isThinkingCardActive} />
357
+ </Box>
358
+ )}
359
+
360
+ {/* Show final step card if exists */}
361
+ {finalStep && (
362
+ <Box data-step-index="final">
363
+ <FinalStepCard
364
+ finalStep={finalStep}
365
+ isActive={isFinalStepActive}
366
+ />
367
+ </Box>
368
+ )}
369
+ </Stack>
370
+ ) : (
371
+ <Box
372
+ sx={{
373
+ display: 'flex',
374
+ flexDirection: 'column',
375
+ alignItems: 'center',
376
+ justifyContent: 'center',
377
+ height: '100%',
378
+ color: 'text.secondary',
379
+ p: 3,
380
+ textAlign: 'center',
381
+ }}
382
+ >
383
+ <ListAltIcon sx={{ fontSize: 48, mb: 2, opacity: 0.5 }} />
384
+ <Typography variant="body1" sx={{ fontWeight: 600, mb: 0.5 }}>
385
+ No steps yet
386
+ </Typography>
387
+ <Typography variant="caption" sx={{ fontSize: '0.75rem' }}>
388
+ Steps will appear as the agent progresses
389
+ </Typography>
390
+ </Box>
391
+ )}
392
+ </Box>
393
+ </Paper>
394
+ );
395
+ };
src/components/steps/ThinkingStepCard.tsx ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from 'react';
2
+ import { Card, CardContent, Box, Typography, CircularProgress } from '@mui/material';
3
+ import { keyframes } from '@mui/system';
4
+
5
+ // Border pulse animation
6
+ const borderPulse = keyframes`
7
+ 0%, 100% {
8
+ border-color: rgba(79, 134, 198, 0.4);
9
+ box-shadow: 0 2px 8px rgba(79, 134, 198, 0.15);
10
+ }
11
+ 50% {
12
+ border-color: rgba(79, 134, 198, 0.8);
13
+ box-shadow: 0 2px 12px rgba(79, 134, 198, 0.3);
14
+ }
15
+ `;
16
+
17
+ // Background pulse animation
18
+ const backgroundPulse = keyframes`
19
+ 0%, 100% {
20
+ background-color: rgba(79, 134, 198, 0.03);
21
+ }
22
+ 50% {
23
+ background-color: rgba(79, 134, 198, 0.08);
24
+ }
25
+ `;
26
+
27
+ interface ThinkingStepCardProps {
28
+ isActive?: boolean;
29
+ }
30
+
31
+ export const ThinkingStepCard: React.FC<ThinkingStepCardProps> = ({ isActive = false }) => {
32
+
33
+ return (
34
+ <Card
35
+ elevation={0}
36
+ sx={{
37
+ backgroundColor: 'background.paper',
38
+ border: '1px solid',
39
+ borderColor: (theme) => `${isActive ? theme.palette.primary.main : theme.palette.divider} !important`,
40
+ borderRadius: 1.5,
41
+ animation: isActive ? `${borderPulse} 2s ease-in-out infinite` : 'none',
42
+ position: 'relative',
43
+ overflow: 'hidden',
44
+ boxShadow: isActive ? (theme) => `0 2px 8px ${theme.palette.mode === 'dark' ? 'rgba(79, 134, 198, 0.3)' : 'rgba(79, 134, 198, 0.2)'}` : 'none',
45
+ transition: 'all 0.2s ease',
46
+ '&::before': {
47
+ content: '""',
48
+ position: 'absolute',
49
+ top: 0,
50
+ left: 0,
51
+ right: 0,
52
+ bottom: 0,
53
+ animation: isActive ? `${backgroundPulse} 2s ease-in-out infinite` : 'none',
54
+ zIndex: 0,
55
+ },
56
+ }}
57
+ >
58
+ <CardContent sx={{ p: 1.5, '&:last-child': { pb: 1.5 }, position: 'relative', zIndex: 1 }}>
59
+ {/* Header with spinner */}
60
+ <Box sx={{ display: 'flex', alignItems: 'center', gap: 1.5 }}>
61
+ <Box
62
+ sx={{
63
+ display: 'flex',
64
+ alignItems: 'center',
65
+ justifyContent: 'center',
66
+ }}
67
+ >
68
+ {/* Spinner circulaire */}
69
+ <CircularProgress
70
+ size={32}
71
+ thickness={3.5}
72
+ sx={{
73
+ color: 'primary.main',
74
+ }}
75
+ />
76
+ </Box>
77
+
78
+ <Box sx={{ flex: 1, minWidth: 0 }}>
79
+ <Typography
80
+ sx={{
81
+ fontSize: '0.85rem',
82
+ fontWeight: 700,
83
+ color: 'primary.main',
84
+ lineHeight: 1.3,
85
+ }}
86
+ >
87
+ Agent
88
+ </Typography>
89
+ <Typography
90
+ sx={{
91
+ fontSize: '0.7rem',
92
+ color: 'text.secondary',
93
+ lineHeight: 1.2,
94
+ fontStyle: 'italic',
95
+ }}
96
+ >
97
+ Thinking...
98
+ </Typography>
99
+ </Box>
100
+ </Box>
101
+ </CardContent>
102
+ </Card>
103
+ );
104
+ };
src/components/steps/index.ts ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ export { StepsList } from './StepsList';
2
+ export { StepCard } from './StepCard';
3
+ export { ThinkingStepCard } from './ThinkingStepCard';
4
+ export { FinalStepCard } from './FinalStepCard';
5
+ export { ConnectionStepCard } from './ConnectionStepCard';
src/components/timeline/Timeline.tsx ADDED
@@ -0,0 +1,486 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useRef, useEffect } from 'react';
2
+ import { Box, Typography, CircularProgress, Button } from '@mui/material';
3
+ import CheckIcon from '@mui/icons-material/Check';
4
+ import CloseIcon from '@mui/icons-material/Close';
5
+ import StopCircleIcon from '@mui/icons-material/StopCircle';
6
+ import HourglassEmptyIcon from '@mui/icons-material/HourglassEmpty';
7
+ import AccessTimeIcon from '@mui/icons-material/AccessTime';
8
+ import CableIcon from '@mui/icons-material/Cable';
9
+ import { AgentTraceMetadata } from '@/types/agent';
10
+ import { useAgentStore, selectSelectedStepIndex, selectFinalStep, selectIsConnectingToE2B, selectIsAgentProcessing } from '@/stores/agentStore';
11
+
12
+ interface TimelineProps {
13
+ metadata: AgentTraceMetadata;
14
+ isRunning: boolean;
15
+ }
16
+
17
+ export const Timeline: React.FC<TimelineProps> = ({ metadata, isRunning }) => {
18
+ const timelineRef = useRef<HTMLDivElement>(null);
19
+ const selectedStepIndex = useAgentStore(selectSelectedStepIndex);
20
+ const setSelectedStepIndex = useAgentStore((state) => state.setSelectedStepIndex);
21
+ const finalStep = useAgentStore(selectFinalStep);
22
+ const isConnectingToE2B = useAgentStore(selectIsConnectingToE2B);
23
+ const isAgentProcessing = useAgentStore(selectIsAgentProcessing);
24
+
25
+ // Show connection indicator if connecting or if we have started processing
26
+ const showConnectionIndicator = isConnectingToE2B || isAgentProcessing || (metadata.numberOfSteps > 0) || finalStep;
27
+
28
+ // Generate array of steps with their status
29
+ // Only show completed steps + current step if running
30
+ const totalStepsToShow = isRunning && !isConnectingToE2B
31
+ ? metadata.numberOfSteps + 1 // Show completed steps + current step
32
+ : metadata.numberOfSteps; // Show only completed steps when not running
33
+
34
+ // Calculate total width for the line (including finalStep if present)
35
+ const lineWidth = finalStep
36
+ ? `calc(${totalStepsToShow} * (40px + 12px) + 52px)` // Add space for finalStep (40px + 12px gap)
37
+ : `calc(${totalStepsToShow} * (40px + 12px))`;
38
+
39
+ const steps = Array.from({ length: totalStepsToShow }, (_, index) => ({
40
+ stepNumber: index + 1,
41
+ stepIndex: index,
42
+ isCompleted: index < metadata.numberOfSteps,
43
+ // Step is current if: we're at the right index AND running AND not connecting to E2B
44
+ isCurrent: (index === metadata.numberOfSteps && isRunning && !isConnectingToE2B) ||
45
+ (index === 0 && metadata.numberOfSteps === 0 && isRunning && !isConnectingToE2B),
46
+ isSelected: selectedStepIndex === index,
47
+ }));
48
+
49
+ // Handle step click
50
+ const handleStepClick = (stepIndex: number, isCompleted: boolean, isCurrent: boolean) => {
51
+ if (isCompleted) {
52
+ setSelectedStepIndex(stepIndex);
53
+ } else if (isCurrent) {
54
+ // Clicking on the current step (with animation) goes back to live mode
55
+ setSelectedStepIndex(null);
56
+ }
57
+ };
58
+
59
+ // Handle final step click (goes to live mode showing the final status)
60
+ const handleFinalStepClick = () => {
61
+ setSelectedStepIndex(null);
62
+ };
63
+
64
+ // Auto-scroll to current step while running
65
+ useEffect(() => {
66
+ if (timelineRef.current && isRunning) {
67
+ // Only auto-scroll while running, not when finished
68
+ const currentStepElement = timelineRef.current.querySelector(`[data-step="${metadata.numberOfSteps}"]`);
69
+ if (currentStepElement) {
70
+ currentStepElement.scrollIntoView({ behavior: 'smooth', inline: 'center', block: 'nearest' });
71
+ }
72
+ }
73
+ }, [metadata.numberOfSteps, isRunning]);
74
+
75
+ return (
76
+ <Box
77
+ sx={{
78
+ p: 2,
79
+ border: '1px solid',
80
+ borderColor: 'divider',
81
+ borderRadius: '12px',
82
+ backgroundColor: 'background.paper',
83
+ flexShrink: 0,
84
+ }}
85
+ >
86
+ <Box sx={{ display: 'flex', flexDirection: 'column', gap: 1.5 }}>
87
+ {/* Header with step count */}
88
+ <Box sx={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between' }}>
89
+ <Typography variant="h6" sx={{ fontSize: '0.9rem', fontWeight: 700, color: 'text.primary' }}>
90
+ Timeline
91
+ {selectedStepIndex !== null && (
92
+ <Typography component="span" sx={{ ml: 1, color: 'text.secondary', fontWeight: 500, fontSize: '0.65rem' }}>
93
+ - Viewing step {selectedStepIndex + 1}
94
+ </Typography>
95
+ )}
96
+ </Typography>
97
+ {selectedStepIndex !== null && (
98
+ <Button
99
+ size="small"
100
+ variant="outlined"
101
+ onClick={handleFinalStepClick}
102
+ sx={{
103
+ textTransform: 'none',
104
+ fontSize: '0.7rem',
105
+ fontWeight: 600,
106
+ px: 1.5,
107
+ py: 0.25,
108
+ minWidth: 'auto',
109
+ color: 'text.secondary',
110
+ borderColor: 'divider',
111
+ '&:hover': {
112
+ backgroundColor: (theme) => theme.palette.mode === 'dark' ? 'rgba(255,255,255,0.05)' : 'rgba(0,0,0,0.03)',
113
+ borderColor: 'text.secondary',
114
+ },
115
+ }}
116
+ >
117
+ Back to latest step
118
+ </Button>
119
+ )}
120
+ </Box>
121
+
122
+ {/* Horizontal scrollable step indicators */}
123
+ <Box
124
+ ref={timelineRef}
125
+ sx={{
126
+ display: 'flex',
127
+ alignItems: 'center',
128
+ overflowX: 'auto',
129
+ overflowY: 'hidden',
130
+ gap: 1.5,
131
+ py: 1.5,
132
+ height: 60,
133
+ position: 'relative',
134
+ // Hide scrollbar completely
135
+ scrollbarWidth: 'none', // Firefox
136
+ '&::-webkit-scrollbar': {
137
+ display: 'none', // Chrome, Safari, Edge
138
+ },
139
+ // Horizontal line crossing through circles
140
+ '&::before': {
141
+ content: '""',
142
+ position: 'absolute',
143
+ left: "25px",
144
+ // Calculate width to cover visible steps + finalStep if present
145
+ width: lineWidth,
146
+ top: '19.5px',
147
+ transform: 'translateY(-50%)',
148
+ transition: 'width 0.6s cubic-bezier(0.4, 0, 0.2, 1)',
149
+ height: '2px',
150
+ backgroundColor: (theme) => theme.palette.mode === 'dark' ? 'rgba(255, 255, 255, 0.1)' : 'rgba(0, 0, 0, 0.3)',
151
+ zIndex: 0,
152
+ pointerEvents: 'none',
153
+ },
154
+ }}
155
+ >
156
+ {/* Connection indicator (step 0) */}
157
+ {showConnectionIndicator && (
158
+ <Box
159
+ data-step="connection"
160
+ sx={{
161
+ display: 'flex',
162
+ flexDirection: 'column',
163
+ alignItems: 'center',
164
+ gap: 0.75,
165
+ minWidth: 40,
166
+ flexShrink: 0,
167
+ position: 'relative',
168
+ zIndex: 1,
169
+ }}
170
+ >
171
+ {/* White circle background to hide the line */}
172
+ <Box
173
+ sx={{
174
+ position: 'relative',
175
+ display: 'flex',
176
+ alignItems: 'center',
177
+ justifyContent: 'center',
178
+ height: 28,
179
+ width: 28,
180
+ }}
181
+ >
182
+ {/* White background to hide the line */}
183
+ <Box
184
+ sx={{
185
+ position: 'absolute',
186
+ width: 28,
187
+ height: 28,
188
+ borderRadius: '50%',
189
+ backgroundColor: 'background.paper',
190
+ zIndex: 0,
191
+ }}
192
+ />
193
+
194
+ {/* Connection icon */}
195
+ {isConnectingToE2B ? (
196
+ <CircularProgress
197
+ size={20}
198
+ thickness={5}
199
+ sx={{
200
+ color: 'primary.main',
201
+ position: 'relative',
202
+ zIndex: 1,
203
+ }}
204
+ />
205
+ ) : (
206
+ <CableIcon
207
+ sx={{
208
+ fontSize: 20,
209
+ color: 'success.main',
210
+ position: 'relative',
211
+ zIndex: 1,
212
+ }}
213
+ />
214
+ )}
215
+ </Box>
216
+
217
+ {/* Connection label */}
218
+ <Typography
219
+ variant="caption"
220
+ sx={{
221
+ fontSize: '0.7rem',
222
+ fontWeight: 700,
223
+ color: isConnectingToE2B ? 'primary.main' : 'success.main',
224
+ whiteSpace: 'nowrap',
225
+ }}
226
+ >
227
+ {isConnectingToE2B ? 'Connecting' : 'Connected'}
228
+ </Typography>
229
+ </Box>
230
+ )}
231
+
232
+ {/* Render steps and insert final step at the right position */}
233
+ {steps.map((step, index) => (
234
+ <React.Fragment key={step.stepNumber}>
235
+ <Box
236
+ data-step={step.stepNumber}
237
+ onClick={() => handleStepClick(step.stepIndex, step.isCompleted, step.isCurrent)}
238
+ sx={{
239
+ display: 'flex',
240
+ flexDirection: 'column',
241
+ alignItems: 'center',
242
+ gap: 0.75,
243
+ minWidth: 40,
244
+ flexShrink: 0,
245
+ position: 'relative',
246
+ zIndex: 1,
247
+ cursor: (step.isCompleted || step.isCurrent) ? 'pointer' : 'default',
248
+ '&:hover': (step.isCompleted || step.isCurrent) ? {
249
+ '& .step-dot': {
250
+ transform: 'scale(1.15)',
251
+ },
252
+ } : {},
253
+ }}
254
+ >
255
+ {/* White circle background to hide the line */}
256
+ <Box
257
+ sx={{
258
+ position: 'relative',
259
+ display: 'flex',
260
+ alignItems: 'center',
261
+ justifyContent: 'center',
262
+ height: 28,
263
+ width: 28,
264
+ }}
265
+ >
266
+ {/* White background to hide the line */}
267
+ <Box
268
+ sx={{
269
+ position: 'absolute',
270
+ width: 28,
271
+ height: 28,
272
+ borderRadius: '50%',
273
+ backgroundColor: 'background.paper',
274
+ zIndex: 0,
275
+ }}
276
+ />
277
+
278
+ {/* Step dot */}
279
+ {step.isCurrent ? (
280
+ <Box
281
+ sx={{
282
+ position: 'relative',
283
+ display: 'flex',
284
+ alignItems: 'center',
285
+ justifyContent: 'center',
286
+ zIndex: 1,
287
+ }}
288
+ >
289
+ <CircularProgress
290
+ size={20}
291
+ thickness={5}
292
+ sx={{
293
+ color: 'primary.main',
294
+ position: 'absolute',
295
+ }}
296
+ />
297
+ <Box
298
+ sx={{
299
+ width: 8,
300
+ height: 8,
301
+ borderRadius: '50%',
302
+ backgroundColor: 'white',
303
+ position: 'absolute',
304
+ pointerEvents: 'none',
305
+ boxShadow: '0 0 4px rgba(0,0,0,0.2)',
306
+ }}
307
+ />
308
+ </Box>
309
+ ) : (
310
+ <Box
311
+ sx={{
312
+ position: 'relative',
313
+ display: 'flex',
314
+ alignItems: 'center',
315
+ justifyContent: 'center',
316
+ zIndex: 1,
317
+ }}
318
+ >
319
+ <Box
320
+ className="step-dot"
321
+ sx={{
322
+ width: step.isSelected ? 20 : step.isCompleted ? 14 : 12,
323
+ height: step.isSelected ? 20 : step.isCompleted ? 14 : 12,
324
+ borderRadius: '50%',
325
+ // Always keep steps in primary color (blue)
326
+ backgroundColor: step.isCompleted
327
+ ? 'primary.main' // Blue for completed steps
328
+ : (theme) => theme.palette.mode === 'dark' ? 'grey.800' : 'grey.300', // Light grey for future steps
329
+ transition: 'all 0.2s ease',
330
+ boxShadow: step.isCompleted || step.isSelected
331
+ ? step.isSelected
332
+ ? '0 0 8px rgba(255, 167, 38, 0.5)'
333
+ : '0 2px 4px rgba(0,0,0,0.1)'
334
+ : 'none',
335
+ }}
336
+ />
337
+ {/* White dot for selected step */}
338
+ {step.isSelected && (
339
+ <Box
340
+ sx={{
341
+ width: 8,
342
+ height: 8,
343
+ borderRadius: '50%',
344
+ backgroundColor: 'white',
345
+ position: 'absolute',
346
+ }}
347
+ />
348
+ )}
349
+ </Box>
350
+ )}
351
+ </Box>
352
+
353
+ {/* Step number - show for all steps */}
354
+ <Typography
355
+ variant="caption"
356
+ sx={{
357
+ fontSize: '0.7rem',
358
+ fontWeight: step.isSelected || step.isCurrent ? 900 : 400,
359
+ color: step.isCurrent
360
+ ? 'primary.main'
361
+ : (step.isCompleted || step.isSelected
362
+ ? 'text.primary'
363
+ : (theme) => theme.palette.mode === 'dark' ? 'grey.700' : 'grey.400'),
364
+ whiteSpace: 'nowrap',
365
+ lineHeight: 1,
366
+ }}
367
+ >
368
+ {step.stepNumber}
369
+ </Typography>
370
+ </Box>
371
+
372
+ {/* Insert final step indicator right after the last completed step */}
373
+ {finalStep && step.stepNumber === metadata.numberOfSteps && (
374
+ <Box
375
+ data-step="final"
376
+ onClick={handleFinalStepClick}
377
+ sx={{
378
+ display: 'flex',
379
+ flexDirection: 'column',
380
+ alignItems: 'center',
381
+ gap: 0.75,
382
+ minWidth: 40,
383
+ flexShrink: 0,
384
+ position: 'relative',
385
+ zIndex: 1,
386
+ cursor: 'pointer',
387
+ '&:hover': {
388
+ '& .final-step-icon': {
389
+ transform: 'scale(1.15)',
390
+ },
391
+ },
392
+ }}
393
+ >
394
+ {/* White circle background to hide the line */}
395
+ <Box
396
+ sx={{
397
+ position: 'relative',
398
+ display: 'flex',
399
+ alignItems: 'center',
400
+ justifyContent: 'center',
401
+ height: 28,
402
+ width: 28,
403
+ }}
404
+ >
405
+ {/* White background to hide the line */}
406
+ <Box
407
+ sx={{
408
+ position: 'absolute',
409
+ width: 28,
410
+ height: 28,
411
+ borderRadius: '50%',
412
+ backgroundColor: 'background.paper',
413
+ zIndex: 0,
414
+ }}
415
+ />
416
+
417
+ {/* Final step icon */}
418
+ <Box
419
+ className="final-step-icon"
420
+ sx={{
421
+ width: selectedStepIndex === null ? 20 : 18,
422
+ height: selectedStepIndex === null ? 20 : 18,
423
+ borderRadius: '50%',
424
+ backgroundColor:
425
+ finalStep.type === 'success' ? 'success.main' :
426
+ finalStep.type === 'stopped' || finalStep.type === 'max_steps_reached' ? 'warning.main' :
427
+ 'error.main',
428
+ display: 'flex',
429
+ alignItems: 'center',
430
+ justifyContent: 'center',
431
+ transition: 'all 0.2s ease',
432
+ boxShadow: selectedStepIndex === null
433
+ ? finalStep.type === 'success'
434
+ ? '0 2px 8px rgba(102, 187, 106, 0.4)'
435
+ : finalStep.type === 'stopped' || finalStep.type === 'max_steps_reached'
436
+ ? '0 2px 8px rgba(255, 152, 0, 0.4)'
437
+ : '0 2px 8px rgba(244, 67, 54, 0.4)'
438
+ : '0 2px 4px rgba(0,0,0,0.1)',
439
+ position: 'relative',
440
+ zIndex: 1,
441
+ }}
442
+ >
443
+ {finalStep.type === 'success' ? (
444
+ <CheckIcon sx={{ fontSize: 14, color: 'white' }} />
445
+ ) : finalStep.type === 'stopped' ? (
446
+ <StopCircleIcon sx={{ fontSize: 14, color: 'white' }} />
447
+ ) : finalStep.type === 'max_steps_reached' ? (
448
+ <HourglassEmptyIcon sx={{ fontSize: 14, color: 'white' }} />
449
+ ) : finalStep.type === 'sandbox_timeout' ? (
450
+ <AccessTimeIcon sx={{ fontSize: 14, color: 'white' }} />
451
+ ) : (
452
+ <CloseIcon sx={{ fontSize: 14, color: 'white' }} />
453
+ )}
454
+ </Box>
455
+ </Box>
456
+
457
+ {/* Final step label */}
458
+ <Typography
459
+ variant="caption"
460
+ sx={{
461
+ fontSize: '0.7rem',
462
+ fontWeight: selectedStepIndex === null ? 700 : 500,
463
+ color:
464
+ finalStep.type === 'success'
465
+ ? (selectedStepIndex === null ? 'text.primary' : 'text.secondary')
466
+ : finalStep.type === 'stopped' || finalStep.type === 'max_steps_reached'
467
+ ? 'warning.main'
468
+ : 'error.main',
469
+ whiteSpace: 'nowrap',
470
+ }}
471
+ >
472
+ {finalStep.type === 'success' ? 'End' :
473
+ finalStep.type === 'stopped' ? 'Stopped' :
474
+ finalStep.type === 'max_steps_reached' ? 'Max Steps' :
475
+ finalStep.type === 'sandbox_timeout' ? 'Timeout' :
476
+ 'Failed'}
477
+ </Typography>
478
+ </Box>
479
+ )}
480
+ </React.Fragment>
481
+ ))}
482
+ </Box>
483
+ </Box>
484
+ </Box>
485
+ );
486
+ };