feat(browser): persistent login + CDP attach mode #2809

Browser sessions now reuse a Chromium user profile across runs by default
(`~/.cow/browser_profile`), so users only log in to a site once.
Three launch modes are selectable via `tools.browser` in config.json:
  - persistent (default): Playwright Chromium with a persistent user_data_dir
  - cdp: attach to an externally launched real Chrome via `cdp_endpoint`
    (full fingerprints, ideal for sites with strict bot detection)
  - fresh: clean context every run, set `persistent: false`

Also:
  - Self-heal when the user closes the browser window mid-session: detect
    closed page/context/browser via close listeners and exception scanning,
    then transparently relaunch on the next request.
  - Graceful CDP shutdown: disconnect only, never kill the user's Chrome.
  - Friendly errors when the CDP endpoint is unreachable or the persistent
    profile is locked, so the LLM can guide the user instead of looping.
  - Fix tool config being silently overwritten by workspace config in
    AgentInitializer; per-tool user settings (e.g. browser.cdp_endpoint)
    are now merged instead of replaced.
  - Update zh / en / ja docs with the new login-persistence section,
    including the Chrome 137+ requirement to pair --remote-debugging-port
    with a dedicated --user-data-dir.
This commit is contained in:
zhayujie
2026-05-19 11:52:11 +08:00
parent a85c5f9d4e
commit a0dfdb79df
6 changed files with 592 additions and 50 deletions

View File

@@ -15,6 +15,10 @@ import threading
from typing import Optional, Dict, Any, List, Callable
from common.log import logger
from common.utils import expand_path
_DEFAULT_USER_DATA_DIR = "~/.cow/browser_profile"
try:
from playwright.sync_api import sync_playwright, Browser, BrowserContext, Page, Playwright
@@ -212,6 +216,21 @@ _SNAPSHOT_JS = """
)
_BROWSER_DEAD_HINTS = (
"has been closed",
"browser has disconnected",
"target closed",
"browser closed",
"context or browser has been closed",
)
def _is_browser_dead_error(err: Exception) -> bool:
"""Return True if *err* indicates the browser / page died out from under us."""
msg = str(err).lower()
return any(h in msg for h in _BROWSER_DEAD_HINTS)
def _should_use_headless() -> bool:
"""Decide headless mode: headless on Linux servers without display, headed elsewhere."""
if sys.platform in ("win32", "darwin"):
@@ -302,11 +321,38 @@ class BrowserService:
self._context = None
self._page = None
# Launch mode: one of "fresh" | "persistent" | "cdp".
# - cdp: connect to an externally launched Chrome via CDP endpoint.
# - persistent: launch with launch_persistent_context using a user_data_dir
# so cookies / login state survive across runs (default).
# - fresh: classic launch + new_context, clean state every run.
cdp_endpoint = self._config.get("cdp_endpoint") or ""
persistent_flag = self._config.get("persistent", True)
user_data_dir_cfg = self._config.get("user_data_dir")
if user_data_dir_cfg is None:
user_data_dir_cfg = _DEFAULT_USER_DATA_DIR
self._cdp_endpoint: str = cdp_endpoint.strip() if isinstance(cdp_endpoint, str) else ""
if self._cdp_endpoint:
self._launch_mode = "cdp"
self._user_data_dir: str = ""
elif persistent_flag and user_data_dir_cfg:
self._launch_mode = "persistent"
self._user_data_dir = expand_path(str(user_data_dir_cfg))
else:
self._launch_mode = "fresh"
self._user_data_dir = ""
# Idle auto-release
idle_cfg = self._config.get("idle_timeout")
self._idle_timeout: float = float(idle_cfg) if idle_cfg is not None else self._IDLE_TIMEOUT_DEFAULT
self._idle_timer: Optional[threading.Timer] = None
# Set when the browser / page is detected to have died externally
# (e.g. user manually closed the window). The next _submit() will then
# tear down the stale thread and relaunch.
self._needs_restart = False
# ------------------------------------------------------------------
# Background-thread lifecycle
# ------------------------------------------------------------------
@@ -354,6 +400,12 @@ class BrowserService:
result_slot["value"] = fn(*args, **kwargs)
except Exception as e:
result_slot["error"] = e
if _is_browser_dead_error(e):
self._needs_restart = True
logger.warning(
f"[Browser] Detected closed page/context ({e}); "
"will relaunch on next request."
)
finally:
result_slot["event"].set()
@@ -375,7 +427,7 @@ class BrowserService:
result_slot["event"].set()
def _launch_browser(self):
"""Launch Chromium on the background thread."""
"""Launch / connect Chromium on the background thread."""
if self._headless is None:
headless_cfg = self._config.get("headless")
self._headless = headless_cfg if headless_cfg is not None else _should_use_headless()
@@ -390,36 +442,142 @@ class BrowserService:
viewport_w = self._config.get("viewport_width", 1280)
viewport_h = self._config.get("viewport_height", 720)
viewport = {"width": viewport_w, "height": viewport_h}
user_agent = (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/131.0.0.0 Safari/537.36"
)
self._playwright = sync_playwright().start()
logger.info(f"[Browser] Launching Chromium (headless={self._headless})")
if self._launch_mode == "cdp":
self._connect_cdp(viewport)
elif self._launch_mode == "persistent":
self._launch_persistent(launch_args, viewport, user_agent)
else:
self._launch_fresh(launch_args, viewport, user_agent)
logger.info("[Browser] Browser ready")
def _launch_fresh(self, launch_args: List[str], viewport: Dict[str, int], user_agent: str):
"""Classic launch: brand new Chromium with an empty context."""
logger.info(f"[Browser] Launching Chromium (fresh, headless={self._headless})")
self._browser = self._playwright.chromium.launch(
headless=self._headless,
args=launch_args,
)
self._context = self._browser.new_context(
viewport={"width": viewport_w, "height": viewport_h},
user_agent=(
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/131.0.0.0 Safari/537.36"
),
viewport=viewport,
user_agent=user_agent,
)
self._page = self._context.new_page()
logger.info("[Browser] Browser ready")
self._wire_close_listeners()
def _launch_persistent(self, launch_args: List[str], viewport: Dict[str, int], user_agent: str):
"""Launch Chromium with a persistent user_data_dir so login state survives."""
os.makedirs(self._user_data_dir, exist_ok=True)
logger.info(
f"[Browser] Launching Chromium (persistent, headless={self._headless}, "
f"profile={self._user_data_dir})"
)
try:
self._context = self._playwright.chromium.launch_persistent_context(
user_data_dir=self._user_data_dir,
headless=self._headless,
args=launch_args,
viewport=viewport,
user_agent=user_agent,
)
except Exception as e:
# Profile is locked when another Chromium instance already holds it.
msg = str(e).lower()
if "singletonlock" in msg or "profile" in msg or "lock" in msg:
raise RuntimeError(
f"Browser profile '{self._user_data_dir}' is in use by another process. "
"Close the other Chromium / cow instance, or set a different "
"tools.browser.user_data_dir."
) from e
raise
# Persistent context has no parent Browser handle; reuse the auto-created page.
self._browser = None
pages = self._context.pages
self._page = pages[0] if pages else self._context.new_page()
self._wire_close_listeners()
def _connect_cdp(self, viewport: Dict[str, int]):
"""Attach to an existing Chrome started with --remote-debugging-port."""
endpoint = self._cdp_endpoint
logger.info(f"[Browser] Connecting to existing Chrome via CDP: {endpoint}")
try:
self._browser = self._playwright.chromium.connect_over_cdp(endpoint)
except Exception as e:
msg = str(e).lower()
if "econnrefused" in msg or "connect" in msg or "refused" in msg:
raise RuntimeError(
f"Cannot reach Chrome at {endpoint}. The CDP browser is not "
"running. Ask the user to launch Chrome with "
"--remote-debugging-port and --user-data-dir, then retry. "
"Do not retry this tool until the user confirms."
) from e
raise
contexts = self._browser.contexts
if contexts:
self._context = contexts[0]
else:
self._context = self._browser.new_context(viewport=viewport)
pages = self._context.pages
self._page = pages[0] if pages else self._context.new_page()
self._wire_close_listeners()
def _wire_close_listeners(self):
"""Mark needs_restart whenever the browser / context / page dies externally."""
def _on_dead(_obj=None):
self._needs_restart = True
try:
if self._browser:
self._browser.on("disconnected", _on_dead)
if self._context:
self._context.on("close", _on_dead)
if self._page:
self._page.on("close", _on_dead)
except Exception as e:
logger.debug(f"[Browser] Failed to wire close listeners: {e}")
def _shutdown_browser(self):
"""Shut down all Playwright resources on the background thread."""
"""Shut down Playwright resources on the background thread.
Mode-specific behavior:
- cdp: only disconnect the Playwright client; leave the user's Chrome
and its tabs untouched (do NOT close the context).
- persistent: close the persistent context (no separate browser handle).
- fresh: close context, then browser.
"""
self._cancel_idle_timer()
for obj, label in [
(self._context, "context"),
(self._browser, "browser"),
]:
if self._launch_mode == "cdp":
# For CDP, browser.close() only detaches the Playwright client;
# the user's Chrome process and its tabs stay alive.
try:
if obj:
obj.close()
if self._browser:
self._browser.close()
except Exception as e:
logger.debug(f"[Browser] {label} close error: {e}")
logger.debug(f"[Browser] cdp disconnect error: {e}")
else:
for obj, label in [
(self._context, "context"),
(self._browser, "browser"),
]:
try:
if obj:
obj.close()
except Exception as e:
logger.debug(f"[Browser] {label} close error: {e}")
try:
if self._playwright:
self._playwright.stop()
@@ -433,6 +591,13 @@ class BrowserService:
def _submit(self, fn: Callable, *args, **kwargs):
"""Submit *fn* to the background thread and block until it completes."""
# If the browser died externally (e.g. user closed the window), tear
# down the stale thread first so _start_thread() will relaunch fresh.
if self._needs_restart:
logger.info("[Browser] Restarting after detecting closed browser")
self.close()
self._needs_restart = False
self._start_thread()
if not self._alive:
@@ -481,6 +646,7 @@ class BrowserService:
self._cancel_idle_timer()
with self._lock:
if not self._alive:
self._needs_restart = False
return
self._alive = False
t = self._thread
@@ -490,6 +656,7 @@ class BrowserService:
t.join(timeout=10)
with self._lock:
self._thread = None
self._needs_restart = False
# ------------------------------------------------------------------
# Actions (each method is dispatched to the background thread)

View File

@@ -4,6 +4,15 @@ Browser tool - Control a Chromium browser for web navigation and interaction.
Uses Playwright under the hood. Browser instance is lazily started on first
use, reused across tool calls within the same session, and cleaned up via
close().
Launch modes (configured under `tools.browser` in config.json):
- persistent (default): Chromium runs with a persistent user_data_dir
(default `~/.cow/browser_profile`), so cookies and login state survive
across runs. The user only needs to log in once.
- cdp: When `cdp_endpoint` is set, attach to an externally launched Chrome
via the Chrome DevTools Protocol. Lets the agent reuse the user's real
browser (with all logins / extensions / true fingerprints).
- fresh: Set `persistent` to false to fall back to a clean context every run.
"""
import json
@@ -25,7 +34,10 @@ class BrowserTool(BaseTool):
"get_text, press, evaluate.\n\n"
"Workflow: navigate (auto-includes snapshot with element refs) → click/fill/select by ref → snapshot to verify.\n\n"
"Use snapshot as the primary way to read pages. Use screenshot + send to show key results to the user. "
"For login/CAPTCHA/authorization etc., screenshot and ask the user for help."
"For login/CAPTCHA/authorization etc., screenshot and ask the user for help. "
"Login state is persisted across sessions (cookies / localStorage are kept in a "
"user profile directory), so once the user logs in to a site, the agent can keep "
"using it without logging in again."
)
params: dict = {