Skip to content

BrowserEnv

Bases: BaseEnv

Browser environment for agents.

Source code in utu/env/browser_env.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
class BrowserEnv(BaseEnv):
    """Browser environment for agents."""

    def __init__(self, trace_id: str):
        self.trace_id = trace_id
        self.docker_manager = DockerManager()
        self.browser_state: str = None

    async def build(self):
        """Build the environment. We use docker to run a browser container."""
        self.container_info = await self.docker_manager.start_container(self.trace_id)
        self.mcp_url = self.container_info["mcp_url"]

    async def cleanup(self):
        await self.docker_manager.stop_container(self.trace_id)

    def get_state(self) -> str:
        """Get the current state of the environment."""
        return self.browser_state

    async def get_tools(self) -> list[Tool]:
        """Get the tools available in the environment."""
        activated_tools = (
            "search_google",
            "go_to_url",
            "go_back",
            # "wait",
            "click_element",
            "input_text",
            "switch_tab",
            "open_tab",
            "scroll_down",
            "scroll_up",
            "download_file",
            # "search_google_api"
        )
        tools: list[Tool] = []

        def create_on_invoke_tool(tool_name: str):
            async def on_invoke_tool(ctx: RunContextWrapper[TContext], input_json: str) -> str:
                try:
                    async with MCPClient.get_mcp_client(self.mcp_url) as client:
                        res = await client.call_tool(tool_name, json.loads(input_json))
                        if res.isError:
                            return f"Error: {res.content[0].text}"
                        self.browser_state = res.content[1].text  # DISCUSS: record the web actions?
                        return res.content[0].text
                except Exception as e:  # pylint: disable=broad-except
                    logger.error(f"except: {e}", exc_info=True)
                    return f"Error: {e}"

            return on_invoke_tool

        async with MCPClient.get_mcp_client(self.mcp_url) as client:
            # NOTE: check `MCPUtil` in @agents
            res = await client.list_tools()
            assert res.nextCursor is None
            for tool in res.tools:
                if tool.name not in activated_tools:
                    continue
                tools.append(
                    FunctionTool(
                        name=tool.name,
                        description=tool.description,
                        params_json_schema=tool.inputSchema,
                        on_invoke_tool=create_on_invoke_tool(tool.name),
                    )
                )
            return tools

build async

build()

Build the environment. We use docker to run a browser container.

Source code in utu/env/browser_env.py
20
21
22
23
async def build(self):
    """Build the environment. We use docker to run a browser container."""
    self.container_info = await self.docker_manager.start_container(self.trace_id)
    self.mcp_url = self.container_info["mcp_url"]

get_state

get_state() -> str

Get the current state of the environment.

Source code in utu/env/browser_env.py
28
29
30
def get_state(self) -> str:
    """Get the current state of the environment."""
    return self.browser_state

get_tools async

get_tools() -> list[Tool]

Get the tools available in the environment.

Source code in utu/env/browser_env.py
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
async def get_tools(self) -> list[Tool]:
    """Get the tools available in the environment."""
    activated_tools = (
        "search_google",
        "go_to_url",
        "go_back",
        # "wait",
        "click_element",
        "input_text",
        "switch_tab",
        "open_tab",
        "scroll_down",
        "scroll_up",
        "download_file",
        # "search_google_api"
    )
    tools: list[Tool] = []

    def create_on_invoke_tool(tool_name: str):
        async def on_invoke_tool(ctx: RunContextWrapper[TContext], input_json: str) -> str:
            try:
                async with MCPClient.get_mcp_client(self.mcp_url) as client:
                    res = await client.call_tool(tool_name, json.loads(input_json))
                    if res.isError:
                        return f"Error: {res.content[0].text}"
                    self.browser_state = res.content[1].text  # DISCUSS: record the web actions?
                    return res.content[0].text
            except Exception as e:  # pylint: disable=broad-except
                logger.error(f"except: {e}", exc_info=True)
                return f"Error: {e}"

        return on_invoke_tool

    async with MCPClient.get_mcp_client(self.mcp_url) as client:
        # NOTE: check `MCPUtil` in @agents
        res = await client.list_tools()
        assert res.nextCursor is None
        for tool in res.tools:
            if tool.name not in activated_tools:
                continue
            tools.append(
                FunctionTool(
                    name=tool.name,
                    description=tool.description,
                    params_json_schema=tool.inputSchema,
                    on_invoke_tool=create_on_invoke_tool(tool.name),
                )
            )
        return tools