diff --git a/agent/tools/__init__.py b/agent/tools/__init__.py
index acf28f98..5c2cc206 100644
--- a/agent/tools/__init__.py
+++ b/agent/tools/__init__.py
@@ -55,6 +55,15 @@ def _import_optional_tools():
except Exception as e:
logger.error(f"[Tools] WebSearch failed to load: {e}")
+ # WebFetch Tool
+ try:
+ from agent.tools.web_fetch.web_fetch import WebFetch
+ tools['WebFetch'] = WebFetch
+ except ImportError as e:
+ logger.error(f"[Tools] WebFetch not loaded - missing dependency: {e}")
+ except Exception as e:
+ logger.error(f"[Tools] WebFetch failed to load: {e}")
+
return tools
# Load optional tools
@@ -62,6 +71,7 @@ _optional_tools = _import_optional_tools()
EnvConfig = _optional_tools.get('EnvConfig')
SchedulerTool = _optional_tools.get('SchedulerTool')
WebSearch = _optional_tools.get('WebSearch')
+WebFetch = _optional_tools.get('WebFetch')
GoogleSearch = _optional_tools.get('GoogleSearch')
FileSave = _optional_tools.get('FileSave')
Terminal = _optional_tools.get('Terminal')
@@ -102,6 +112,7 @@ __all__ = [
'EnvConfig',
'SchedulerTool',
'WebSearch',
+ 'WebFetch',
# Optional tools (may be None if dependencies not available)
# 'BrowserTool'
]
diff --git a/agent/tools/web_fetch/__init__.py b/agent/tools/web_fetch/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/agent/tools/web_fetch/web_fetch.py b/agent/tools/web_fetch/web_fetch.py
new file mode 100644
index 00000000..93a8b70d
--- /dev/null
+++ b/agent/tools/web_fetch/web_fetch.py
@@ -0,0 +1,98 @@
+"""
+Web Fetch tool - Fetch and extract readable content from web pages.
+"""
+
+import re
+from typing import Dict, Any
+from urllib.parse import urlparse
+
+import requests
+
+from agent.tools.base_tool import BaseTool, ToolResult
+from common.log import logger
+
+
+DEFAULT_TIMEOUT = 10
+
+DEFAULT_HEADERS = {
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+}
+
+
+class WebFetch(BaseTool):
+ """Tool for fetching and extracting readable content from web pages"""
+
+ name: str = "web_fetch"
+ description: str = (
+ "Fetch and extract readable text content from a web page URL. "
+ )
+
+ params: dict = {
+ "type": "object",
+ "properties": {
+ "url": {
+ "type": "string",
+ "description": "The HTTP/HTTPS URL to fetch"
+ }
+ },
+ "required": ["url"]
+ }
+
+ def __init__(self, config: dict = None):
+ self.config = config or {}
+
+ def execute(self, args: Dict[str, Any]) -> ToolResult:
+ url = args.get("url", "").strip()
+ if not url:
+ return ToolResult.fail("Error: 'url' parameter is required")
+
+ parsed = urlparse(url)
+ if parsed.scheme not in ("http", "https"):
+ return ToolResult.fail("Error: Invalid URL (must start with http:// or https://)")
+
+ try:
+ response = requests.get(
+ url,
+ headers=DEFAULT_HEADERS,
+ timeout=DEFAULT_TIMEOUT,
+ allow_redirects=True,
+ )
+ response.raise_for_status()
+ except requests.Timeout:
+ return ToolResult.fail(f"Error: Request timed out after {DEFAULT_TIMEOUT}s")
+ except requests.ConnectionError:
+ return ToolResult.fail(f"Error: Failed to connect to {parsed.netloc}")
+ except requests.HTTPError as e:
+ return ToolResult.fail(f"Error: HTTP {e.response.status_code} for URL: {url}")
+ except Exception as e:
+ return ToolResult.fail(f"Error: Failed to fetch URL: {e}")
+
+ html = response.text
+ title = self._extract_title(html)
+ text = self._extract_text(html)
+
+ return ToolResult.success(f"Title: {title}\n\nContent:\n{text}")
+
+ @staticmethod
+ def _extract_title(html: str) -> str:
+ match = re.search(r"
]*>(.*?)", html, re.IGNORECASE | re.DOTALL)
+ return match.group(1).strip() if match else "Untitled"
+
+ @staticmethod
+ def _extract_text(html: str) -> str:
+ # Remove script and style blocks
+ text = re.sub(r"", "", html, flags=re.IGNORECASE | re.DOTALL)
+ text = re.sub(r"", "", text, flags=re.IGNORECASE | re.DOTALL)
+ # Remove HTML tags
+ text = re.sub(r"<[^>]+>", "", text)
+ # Decode common HTML entities
+ text = text.replace("&", "&").replace("<", "<").replace(">", ">")
+ text = text.replace(""", '"').replace("'", "'").replace(" ", " ")
+ # Collapse whitespace: multiple spaces/tabs -> single space, multiple newlines -> double newline
+ text = re.sub(r"[^\S\n]+", " ", text)
+ text = re.sub(r"\n{3,}", "\n\n", text)
+ # Strip leading/trailing whitespace per line
+ lines = [line.strip() for line in text.splitlines()]
+ text = "\n".join(lines)
+ return text.strip()
diff --git a/skills/skill-creator/SKILL.md b/skills/skill-creator/SKILL.md
index 6b6d5d12..697f9f9f 100644
--- a/skills/skill-creator/SKILL.md
+++ b/skills/skill-creator/SKILL.md
@@ -95,7 +95,7 @@ Do NOT create auxiliary documentation files:
## Installing a Skill from URL
-1. Fetch the URL content (curl or web-fetch skill)
+1. Fetch the URL content (curl or web_fetch tool)
2. Extract `name` from YAML frontmatter
3. Create directory `/skills//` and save content as `SKILL.md`
4. Check the saved SKILL.md for an installation/setup section — if it defines additional steps (e.g., downloading scripts, installing dependencies), execute them; otherwise installation is complete
diff --git a/skills/web-fetch/SKILL.md b/skills/web-fetch/SKILL.md
deleted file mode 100644
index 39315fb0..00000000
--- a/skills/web-fetch/SKILL.md
+++ /dev/null
@@ -1,56 +0,0 @@
----
-name: web-fetch
-description: Fetch and extract readable content from web pages. Use for lightweight page access without browser automation.
-homepage: https://github.com/zhayujie/chatgpt-on-wechat
-metadata:
- emoji: 🌐
- requires:
- bins: ["curl"]
- always: true
----
-
-# Web Fetch
-
-Fetch and extract readable content from web pages using curl and basic text processing.
-
-## Usage
-
-**Important**: Scripts are located relative to this skill's base directory.
-
-When you see this skill in ``, note the `` path.
-
-```bash
-# General pattern:
-bash "/scripts/fetch.sh" [output_file]
-
-# Example (replace with actual path from skill listing):
-bash "~/chatgpt-on-wechat/skills/web-fetch/scripts/fetch.sh" "https://example.com"
-```
-
-**Parameters:**
-- `url`: The HTTP/HTTPS URL to fetch (required)
-- `output_file`: Optional file to save the output (default: stdout)
-
-**Returns:**
-- Extracted page content with title and text
-
-## Examples
-
-### Fetch a web page
-```bash
-bash "/scripts/fetch.sh" "https://example.com"
-```
-
-### Save to file
-```bash
-bash "/scripts/fetch.sh" "https://example.com" output.txt
-cat output.txt
-```
-
-## Notes
-
-- Uses curl for HTTP requests (timeout: 10s)
-- Extracts title and basic text content
-- Removes HTML tags and scripts
-- Works with any standard web page
-- No external dependencies beyond curl
diff --git a/skills/web-fetch/scripts/fetch.sh b/skills/web-fetch/scripts/fetch.sh
deleted file mode 100755
index 1713b263..00000000
--- a/skills/web-fetch/scripts/fetch.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/usr/bin/env bash
-# Fetch and extract readable content from a web page
-
-set -euo pipefail
-
-url="${1:-}"
-output_file="${2:-}"
-
-if [ -z "$url" ]; then
- echo "Error: URL is required"
- echo "Usage: bash fetch.sh [output_file]"
- exit 1
-fi
-
-# Validate URL
-if [[ ! "$url" =~ ^https?:// ]]; then
- echo "Error: Invalid URL (must start with http:// or https://)"
- exit 1
-fi
-
-# Fetch the page with curl
-html=$(curl -sS -L --max-time 10 \
- -H "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36" \
- -H "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" \
- "$url" 2>&1) || {
- echo "Error: Failed to fetch URL: $url"
- exit 1
-}
-
-# Extract title
-title=$(echo "$html" | grep -oP '(?<=).*?(?=)' | head -1 || echo "Untitled")
-
-# Remove script and style tags
-text=$(echo "$html" | sed 's/