""" Browser service - Playwright wrapper managing browser lifecycle and page operations. All Playwright calls run on a dedicated background thread so that callers from any worker thread can safely use the service. An idle-timeout mechanism automatically shuts down the browser (and its thread) after a configurable period of inactivity to free resources. """ import os import sys import uuid import queue import threading from typing import Optional, Dict, Any, List, Callable from common.log import logger try: from playwright.sync_api import sync_playwright, Browser, BrowserContext, Page, Playwright _HAS_PLAYWRIGHT = True except ImportError: _HAS_PLAYWRIGHT = False # --------------------------------------------------------------------------- # Snapshot DOM helpers # --------------------------------------------------------------------------- # Tags that typically carry useful content for an agent _INTERACTIVE_TAGS = { "a", "button", "input", "textarea", "select", "option", "label", "details", "summary", } _SEMANTIC_TAGS = { "h1", "h2", "h3", "h4", "h5", "h6", "p", "li", "td", "th", "caption", "figcaption", "blockquote", "pre", "code", "nav", "main", "article", "section", "header", "footer", "form", "table", "img", "video", "audio", } _KEEP_TAGS = _INTERACTIVE_TAGS | _SEMANTIC_TAGS _SNAPSHOT_JS = """ () => { const KEEP = new Set(%s); const INTERACTIVE = new Set(%s); const SKIP = new Set(["script","style","noscript","svg","path","meta","link","br","hr"]); const CLICKABLE_ROLES = new Set([ "button","link","tab","menuitem","menuitemcheckbox","menuitemradio", "option","switch","checkbox","radio","combobox","searchbox","slider", "spinbutton","textbox","treeitem" ]); let refCounter = 0; const refMap = {}; function visible(el) { if (!(el instanceof HTMLElement)) return true; const st = window.getComputedStyle(el); if (st.display === "none" || st.visibility === "hidden") return false; if (parseFloat(st.opacity) === 0) return false; return true; } // Strong signals: these attributes alone are enough to mark as interactive function hasStrongInteractiveSignal(el) { const role = el.getAttribute("role"); if (role && CLICKABLE_ROLES.has(role)) return true; if (el.hasAttribute("onclick") || el.hasAttribute("tabindex")) return true; if (el.hasAttribute("data-click") || el.hasAttribute("data-action")) return true; if (el.getAttribute("contenteditable") === "true") return true; return false; } // Check if cursor:pointer is set directly (not just inherited from parent) function hasOwnPointerCursor(el) { try { const st = window.getComputedStyle(el); if (st.cursor !== "pointer") return false; const parent = el.parentElement; if (parent) { const pst = window.getComputedStyle(parent); if (pst.cursor === "pointer") return false; } return true; } catch(e) {} return false; } function hasTextOrContent(el) { const t = el.textContent || ""; if (t.trim().length > 0) return true; if (el.querySelector("img,video,audio,canvas")) return true; const ariaLabel = el.getAttribute("aria-label"); if (ariaLabel && ariaLabel.trim()) return true; const title = el.getAttribute("title"); if (title && title.trim()) return true; return false; } function isImplicitInteractive(el) { if (hasStrongInteractiveSignal(el)) return true; if (hasOwnPointerCursor(el) && hasTextOrContent(el)) return true; return false; } function getTextContent(el) { let text = ""; for (const ch of el.childNodes) { if (ch.nodeType === Node.TEXT_NODE) { text += ch.textContent; } } return text.trim(); } function walk(node) { if (node.nodeType === Node.TEXT_NODE) { const t = node.textContent.trim(); return t ? t : null; } if (node.nodeType !== Node.ELEMENT_NODE) return null; const tag = node.tagName.toLowerCase(); if (SKIP.has(tag)) return null; if (!visible(node)) return null; const children = []; for (const ch of node.childNodes) { const r = walk(ch); if (r !== null) { if (typeof r === "string") children.push(r); else children.push(r); } } const nativeInteractive = INTERACTIVE.has(tag); const implicitInteractive = !nativeInteractive && (node instanceof HTMLElement) && isImplicitInteractive(node); const keep = KEEP.has(tag) || implicitInteractive; if (!keep) { if (children.length === 0) return null; if (children.length === 1) return children[0]; return children; } const obj = { tag }; if (nativeInteractive || implicitInteractive) { refCounter++; obj.ref = refCounter; refMap[refCounter] = node; } if (implicitInteractive) { const role = node.getAttribute("role"); if (role) obj.role = role; const directText = getTextContent(node); if (!directText && children.length === 0) { const ariaLabel = node.getAttribute("aria-label"); const title = node.getAttribute("title"); if (ariaLabel) obj.ariaLabel = ariaLabel; else if (title) obj.ariaLabel = title; } } // Attributes if (tag === "a" && node.href) obj.href = node.getAttribute("href"); if (tag === "img") { obj.alt = node.alt || ""; obj.src = node.getAttribute("src") || ""; } if (tag === "input" || tag === "textarea" || tag === "select") { obj.type = node.type || "text"; obj.name = node.name || undefined; obj.value = node.value || undefined; obj.placeholder = node.placeholder || undefined; if (node.disabled) obj.disabled = true; if (tag === "input" && node.type === "checkbox") obj.checked = node.checked; } if (tag === "button") { if (node.disabled) obj.disabled = true; } if (tag === "option") { obj.value = node.value; if (node.selected) obj.selected = true; } if (tag === "label" && node.htmlFor) obj.for = node.htmlFor; // Role / aria-label for native interactive & semantic elements if (!implicitInteractive) { const role = node.getAttribute("role"); if (role) obj.role = role; const ariaLabel = node.getAttribute("aria-label"); if (ariaLabel) obj.ariaLabel = ariaLabel; } // Children if (children.length === 1 && typeof children[0] === "string") { obj.text = children[0]; } else if (children.length > 0) { obj.children = children; } return obj; } const result = walk(document.body); window.__cowRefMap = refMap; return { tree: result, refCount: refCounter }; } """ % ( str(list(_KEEP_TAGS)), str(list(_INTERACTIVE_TAGS)), ) def _should_use_headless() -> bool: """Decide headless mode: headless on Linux servers without display, headed elsewhere.""" if sys.platform in ("win32", "darwin"): return False # Linux: check for display if os.environ.get("DISPLAY") or os.environ.get("WAYLAND_DISPLAY"): return False return True def _flatten_tree(node, indent=0) -> List[str]: """Convert snapshot tree to compact text lines for LLM consumption.""" if node is None: return [] if isinstance(node, str): return [" " * indent + node] if isinstance(node, list): lines = [] for child in node: lines.extend(_flatten_tree(child, indent)) return lines if not isinstance(node, dict): return [] tag = node.get("tag", "?") ref = node.get("ref") parts = [tag] if ref: parts[0] = f"[{ref}] {tag}" # Inline attributes for attr in ("type", "name", "href", "alt", "role", "ariaLabel", "placeholder", "value"): val = node.get(attr) if val: # Truncate long values s = str(val) if len(s) > 80: s = s[:77] + "..." parts.append(f'{attr}="{s}"') for flag in ("disabled", "checked", "selected"): if node.get(flag): parts.append(flag) prefix = " " * indent header = prefix + " ".join(parts) text = node.get("text") if text: # Truncate long text if len(text) > 120: text = text[:117] + "..." header += f": {text}" lines = [header] children = node.get("children", []) for child in children: lines.extend(_flatten_tree(child, indent + 2)) return lines class BrowserService: """Manages a Playwright browser on a dedicated background thread. All Playwright operations are dispatched to a single long-lived thread via a task queue. Callers from *any* worker thread can use the public API safely. An idle timer automatically shuts the browser down after ``idle_timeout`` seconds of inactivity (default 300 = 5 min). """ _IDLE_TIMEOUT_DEFAULT = 300 # seconds def __init__(self, config: Optional[Dict[str, Any]] = None): self._config = config or {} self._headless: Optional[bool] = None self._screenshot_dir: Optional[str] = None # Background thread state self._thread: Optional[threading.Thread] = None self._task_queue: queue.Queue = queue.Queue() self._lock = threading.Lock() self._alive = False self._ready = threading.Event() # Playwright objects (only accessed on the background thread) self._playwright = None self._browser = None self._context = None self._page = None # Idle auto-release idle_cfg = self._config.get("idle_timeout") self._idle_timeout: float = float(idle_cfg) if idle_cfg is not None else self._IDLE_TIMEOUT_DEFAULT self._idle_timer: Optional[threading.Timer] = None # ------------------------------------------------------------------ # Background-thread lifecycle # ------------------------------------------------------------------ def _start_thread(self): """Start the dedicated Playwright thread if not already running.""" with self._lock: if self._alive and self._thread and self._thread.is_alive(): return # Wait for old thread to fully exit before creating a new one old = self._thread if old and old.is_alive(): old.join(timeout=5) # Fresh queue to avoid stale sentinels from a previous close() self._task_queue = queue.Queue() self._alive = True self._ready = threading.Event() self._thread = threading.Thread(target=self._run_loop, daemon=True, name="BrowserThread") self._thread.start() # Block until browser is ready (or failed) self._ready.wait(timeout=30) def _run_loop(self): """Event loop running on the dedicated thread. Processes tasks until stopped.""" logger.info("[Browser] Background thread started") try: self._launch_browser() except Exception as e: logger.error(f"[Browser] Failed to launch browser: {e}") self._alive = False self._ready.set() self._drain_queue(RuntimeError(f"Browser launch failed: {e}")) return self._ready.set() while self._alive: try: task = self._task_queue.get(timeout=1.0) except queue.Empty: continue if task is None: break fn, args, kwargs, result_slot = task try: result_slot["value"] = fn(*args, **kwargs) except Exception as e: result_slot["error"] = e finally: result_slot["event"].set() self._shutdown_browser() self._drain_queue(RuntimeError("Browser thread stopped")) logger.info("[Browser] Background thread exited") def _drain_queue(self, error: Exception): """Unblock all callers waiting on the queue with an error.""" while True: try: task = self._task_queue.get_nowait() except queue.Empty: break if task is None: continue _, _, _, result_slot = task result_slot["error"] = error result_slot["event"].set() def _launch_browser(self): """Launch Chromium on the background thread.""" if self._headless is None: headless_cfg = self._config.get("headless") self._headless = headless_cfg if headless_cfg is not None else _should_use_headless() launch_args = ["--disable-dev-shm-usage"] if self._headless: launch_args.append("--no-sandbox") extra_args = self._config.get("launch_args", []) if extra_args: launch_args.extend(extra_args) viewport_w = self._config.get("viewport_width", 1280) viewport_h = self._config.get("viewport_height", 720) self._playwright = sync_playwright().start() logger.info(f"[Browser] Launching Chromium (headless={self._headless})") self._browser = self._playwright.chromium.launch( headless=self._headless, args=launch_args, ) self._context = self._browser.new_context( viewport={"width": viewport_w, "height": viewport_h}, user_agent=( "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " "AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/131.0.0.0 Safari/537.36" ), ) self._page = self._context.new_page() logger.info("[Browser] Browser ready") def _shutdown_browser(self): """Shut down all Playwright resources on the background thread.""" self._cancel_idle_timer() for obj, label in [ (self._context, "context"), (self._browser, "browser"), ]: try: if obj: obj.close() except Exception as e: logger.debug(f"[Browser] {label} close error: {e}") try: if self._playwright: self._playwright.stop() except Exception as e: logger.debug(f"[Browser] playwright stop error: {e}") self._page = None self._context = None self._browser = None self._playwright = None logger.info("[Browser] Browser closed") def _submit(self, fn: Callable, *args, **kwargs): """Submit *fn* to the background thread and block until it completes.""" self._start_thread() if not self._alive: raise RuntimeError("Browser is not available") self._reset_idle_timer() result_slot: Dict[str, Any] = {"event": threading.Event()} self._task_queue.put((fn, args, kwargs, result_slot)) # Timeout prevents permanent hang if the background thread crashes completed = result_slot["event"].wait(timeout=120) if not completed: raise TimeoutError("Browser operation timed out (120s)") if "error" in result_slot: raise result_slot["error"] return result_slot.get("value") # ------------------------------------------------------------------ # Idle auto-release # ------------------------------------------------------------------ def _reset_idle_timer(self): self._cancel_idle_timer() if self._idle_timeout > 0: self._idle_timer = threading.Timer(self._idle_timeout, self._on_idle_timeout) self._idle_timer.daemon = True self._idle_timer.start() def _cancel_idle_timer(self): if self._idle_timer: self._idle_timer.cancel() self._idle_timer = None def _on_idle_timeout(self): logger.info(f"[Browser] Idle for {self._idle_timeout}s, auto-releasing browser") self.close() # ------------------------------------------------------------------ # Public lifecycle # ------------------------------------------------------------------ def close(self): """Shut down browser and background thread (safe from any thread).""" self._cancel_idle_timer() with self._lock: if not self._alive: return self._alive = False t = self._thread if self._task_queue is not None: self._task_queue.put(None) if t is not None and t.is_alive(): t.join(timeout=10) with self._lock: self._thread = None # ------------------------------------------------------------------ # Actions (each method is dispatched to the background thread) # ------------------------------------------------------------------ def navigate(self, url: str, timeout: int = 30000) -> Dict[str, Any]: return self._submit(self._do_navigate, url, timeout) def _do_navigate(self, url: str, timeout: int) -> Dict[str, Any]: page = self._page try: resp = page.goto(url, wait_until="domcontentloaded", timeout=timeout) status = resp.status if resp else None except Exception as e: return {"error": f"Navigation failed: {e}"} try: page.wait_for_load_state("networkidle", timeout=8000) except Exception: pass page.wait_for_timeout(500) try: title = page.title() except Exception: title = "" try: current_url = page.url except Exception: current_url = url return {"url": current_url, "title": title, "status": status} def snapshot(self, selector: Optional[str] = None) -> str: return self._submit(self._do_snapshot, selector) def _do_snapshot(self, selector: Optional[str] = None) -> str: page = self._page try: result = page.evaluate(_SNAPSHOT_JS) except Exception as e: return f"[Snapshot error: {e}]" tree = result.get("tree") ref_count = result.get("refCount", 0) lines = _flatten_tree(tree) try: title = page.title() except Exception: title = "" try: url = page.url except Exception: url = "" header = f"Page: {title} ({url})\nInteractive elements: {ref_count}\n---" body = "\n".join(lines) max_chars = self._config.get("snapshot_max_chars", 30000) if len(body) > max_chars: body = body[:max_chars] + "\n... [snapshot truncated]" return f"{header}\n{body}" def screenshot(self, full_page: bool = False, cwd: str = "") -> str: return self._submit(self._do_screenshot, full_page, cwd) def _do_screenshot(self, full_page: bool = False, cwd: str = "") -> str: page = self._page save_dir = self._get_screenshot_dir(cwd) filename = f"screenshot_{uuid.uuid4().hex[:8]}.png" filepath = os.path.join(save_dir, filename) page.screenshot(path=filepath, full_page=full_page) logger.info(f"[Browser] Screenshot saved: {filepath}") return filepath def click(self, ref: Optional[int] = None, selector: Optional[str] = None, timeout: int = 5000) -> Dict[str, Any]: return self._submit(self._do_click, ref, selector, timeout) def _do_click(self, ref, selector, timeout) -> Dict[str, Any]: page = self._page try: if ref is not None: result = page.evaluate(f""" () => {{ const el = window.__cowRefMap && window.__cowRefMap[{ref}]; if (!el) return {{ error: "ref {ref} not found. Run snapshot first." }}; el.click(); return {{ clicked: true, tag: el.tagName.toLowerCase() }}; }} """) if result.get("error"): return result page.wait_for_timeout(500) return result elif selector: page.click(selector, timeout=timeout) return {"clicked": True, "selector": selector} else: return {"error": "Provide either ref (from snapshot) or selector"} except Exception as e: return {"error": f"Click failed: {e}"} def fill(self, text: str, ref: Optional[int] = None, selector: Optional[str] = None, timeout: int = 5000) -> Dict[str, Any]: return self._submit(self._do_fill, text, ref, selector, timeout) def _do_fill(self, text, ref, selector, timeout) -> Dict[str, Any]: page = self._page try: if ref is not None: result = page.evaluate(f""" () => {{ const el = window.__cowRefMap && window.__cowRefMap[{ref}]; if (!el) return {{ error: "ref {ref} not found. Run snapshot first." }}; el.focus(); el.value = ""; return {{ tag: el.tagName.toLowerCase(), name: el.name || "" }}; }} """) if result.get("error"): return result page.keyboard.type(text) return {"filled": True, "ref": ref, "text": text} elif selector: page.fill(selector, text, timeout=timeout) return {"filled": True, "selector": selector, "text": text} else: return {"error": "Provide either ref (from snapshot) or selector"} except Exception as e: return {"error": f"Fill failed: {e}"} def select(self, value: str, ref: Optional[int] = None, selector: Optional[str] = None, timeout: int = 5000) -> Dict[str, Any]: return self._submit(self._do_select, value, ref, selector, timeout) def _do_select(self, value, ref, selector, timeout) -> Dict[str, Any]: page = self._page try: if ref is not None: result = page.evaluate(f""" () => {{ const el = window.__cowRefMap && window.__cowRefMap[{ref}]; if (!el || el.tagName.toLowerCase() !== "select") return {{ error: "ref {ref} is not a