browsergym_env / server /browsergym_environment.py
kashif's picture
kashif HF Staff
Upload folder using huggingface_hub
0b0b6b0 verified
"""BrowserGym Environment implementation for OpenEnv.
This module wraps the BrowserGym framework to provide a compatible interface
with OpenEnv's Environment ABC. BrowserGym includes multiple benchmarks:
- MiniWoB++: Training environment with 100+ simple web tasks
- WebArena: Realistic evaluation with 812 complex tasks
- VisualWebArena: Visual web navigation tasks
- WorkArena: Enterprise task automation
"""
import importlib
import os
from typing import Any, Dict, Optional
from uuid import uuid4
import gymnasium as gym
from openenv_core.env_server.interfaces import Environment
from browsergym_env.models import (
BrowserGymAction,
BrowserGymObservation,
BrowserGymState,
)
_MINIWOB_LOAD_HELP = (
"MiniWoB tasks require the MiniWoB HTML bundle to be served over HTTP. "
"The official BrowserGym Docker image handles this automatically by "
"serving the bundle on port 8888. For custom or non-Docker deployments, "
"clone the MiniWoB++ repository, start a static server inside "
"`miniwob-plusplus/miniwob/html` (e.g. `python -m http.server 8888`), and "
"set the MINIWOB_URL environment variable to the served base URL such as "
"`http://localhost:8888/miniwob/`."
)
class BrowserGymEnvironment(Environment):
"""BrowserGym environment wrapper for OpenEnv.
This environment wraps BrowserGym's Gymnasium-compatible environments to
provide unified access to multiple web navigation benchmarks.
"""
def __init__(
self,
benchmark: str = "miniwob",
task_name: Optional[str] = None,
headless: bool = True,
viewport_width: int = 1280,
viewport_height: int = 720,
timeout: float = 10000.0,
**gym_kwargs: Any,
):
"""Initialize the BrowserGym environment.
Args:
benchmark: Benchmark to use ('miniwob', 'webarena', 'visualwebarena', etc.)
task_name: Specific task within the benchmark (e.g., 'click-test', 'click-button')
If None, will use first available task
headless: Whether to run browser in headless mode
viewport_width: Browser viewport width
viewport_height: Browser viewport height
timeout: Action timeout in milliseconds
**gym_kwargs: Additional arguments passed to gym.make()
"""
super().__init__()
self.benchmark = benchmark
self.task_name = task_name
self.headless = headless
self.viewport_width = viewport_width
self.viewport_height = viewport_height
self.timeout = timeout
self.gym_kwargs = dict(gym_kwargs)
# Build environment ID
if task_name:
self.env_id = f"browsergym/{benchmark}.{task_name}"
else:
self.env_id = f"browsergym/{benchmark}"
# force import the benchmark module
benchmark_modules = {
"miniwob": "browsergym.miniwob",
"webarena": "browsergym.webarena",
"visualwebarena": "browsergym.visualwebarena",
"workarena": "browsergym.workarena",
}
module_path = benchmark_modules.get(benchmark)
try:
if module_path:
importlib.import_module(module_path)
else:
importlib.import_module("browsergym")
except ModuleNotFoundError as import_error:
message = (
"Failed to import BrowserGym benchmark "
f"'{benchmark}': {import_error}\n"
"Install the matching browsergym package "
f"(e.g., browsergym-{benchmark})."
)
raise ValueError(message) from import_error
# Create the BrowserGym environment
try:
self.gym_env = gym.make(
self.env_id,
headless=headless,
viewport={"width": viewport_width, "height": viewport_height},
timeout=timeout,
**self.gym_kwargs,
)
except Exception as e: # noqa: BLE001 - gym.make
message = (
"Failed to create BrowserGym environment "
f"'{self.env_id}': {e}\n"
"Make sure the benchmark package is installed "
f"(e.g., pip install browsergym-{benchmark})."
)
raise ValueError(message) from e
# State tracking
self._state = BrowserGymState(
episode_id=str(uuid4()),
step_count=0,
benchmark=benchmark,
task_name=task_name or "",
)
self._last_obs: Optional[Dict[str, Any]] = None
self._last_info: Optional[Dict[str, Any]] = None
def reset(
self,
seed: Optional[int] = None,
task_name: Optional[str] = None,
) -> BrowserGymObservation:
"""Reset the environment with a specific task.
Args:
seed: Random seed for reproducibility
task_name: Override task name for this episode
Returns:
Initial observation for the task
"""
# Generate new episode ID
self._state = BrowserGymState(
episode_id=str(uuid4()),
step_count=0,
benchmark=self.benchmark,
task_name=task_name or self.task_name or "",
)
# Reset options
reset_options = {}
if seed is not None:
reset_options["seed"] = seed
# Reset the gym environment
try:
obs, info = self.gym_env.reset(**reset_options)
except AttributeError as err:
if "context" in str(err) and hasattr(self.gym_env, "close"):
# BrowserGym can leave partially initialized state after a
# failed reset. Close the hanging resources and try once more.
self.gym_env.close()
obs, info = self.gym_env.reset(**reset_options)
else:
raise
except Exception as err: # noqa: BLE001 - browsergym
message = str(err)
if self.benchmark == "miniwob" and "core is not defined" in message:
raise ValueError(_MINIWOB_LOAD_HELP) from err
raise
self._last_obs = obs
self._last_info = info
# Extract observation details
return self._create_observation(obs, info, done=False, reward=0.0)
def step(self, action: BrowserGymAction) -> BrowserGymObservation:
"""Execute an action in the environment.
Args:
action: The action to execute
Returns:
Observation after executing the action
"""
self._state.step_count += 1
# Execute action in gym environment
try:
obs, reward, terminated, truncated, info = self.gym_env.step(
action.action_str
)
self._last_obs = obs
self._last_info = info
# Update state
done = terminated or truncated
self._state.cum_reward += float(reward)
# Extract goal from info if available
if "goal" in info:
self._state.goal = str(info["goal"])
return self._create_observation(obs, info, done=done, reward=float(reward))
except Exception as e:
# Handle action execution errors
error_msg = str(e)
return BrowserGymObservation(
text=self._last_obs.get("text", "") if self._last_obs else "",
url=self._last_obs.get("url", "") if self._last_obs else "",
goal=self._state.goal,
error=error_msg,
last_action_error=True,
done=False,
reward=0.0,
)
def _create_observation(
self,
obs: Dict[str, Any],
info: Dict[str, Any],
done: bool,
reward: float,
) -> BrowserGymObservation:
"""Convert BrowserGym observation to OpenEnv format.
Args:
obs: BrowserGym observation dict
info: BrowserGym info dict
done: Whether episode is done
reward: Reward for the step
Returns:
BrowserGymObservation
"""
# Extract text observation (could be AXTree, DOM, or other)
text = ""
if "axtree_txt" in obs:
text = obs["axtree_txt"]
elif "pruned_html" in obs:
text = obs["pruned_html"]
elif "dom_txt" in obs:
text = obs["dom_txt"]
elif isinstance(obs, str):
text = obs
# Extract URL
url = info.get("url", "")
if not url and "page" in info:
url = info["page"].get("url", "")
# Extract goal/instruction
goal = info.get("goal", "")
if not goal and "task" in info:
goal = info["task"].get("goal", "")
# Update state
self._state.current_url = url
self._state.goal = goal
# Extract additional observation modalities
screenshot = obs.get("screenshot") if isinstance(obs, dict) else None
axtree_txt = obs.get("axtree_txt", "") if isinstance(obs, dict) else ""
pruned_html = obs.get("pruned_html", "") if isinstance(obs, dict) else ""
# Store full BrowserGym observation and info in metadata
# This preserves timestamps, additional fields, and any future extensions
browsergym_metadata = {
"browsergym_obs": obs if isinstance(obs, dict) else {},
"browsergym_info": info,
}
return BrowserGymObservation(
text=text,
url=url,
screenshot=screenshot,
goal=goal,
axtree_txt=axtree_txt,
pruned_html=pruned_html,
error="",
last_action_error=False,
done=done,
reward=reward,
metadata=browsergym_metadata,
)
@property
def state(self) -> BrowserGymState:
"""Get the current environment state."""
return self._state
def close(self) -> None:
"""Clean up environment resources."""
if hasattr(self, "gym_env"):
self.gym_env.close()