feat: personal ai agent framework

2026-07-17 11:07:11 +08:00 · 2026-01-30 09:53:46 +08:00
parent 25cf6823d0
commit bb850bb6c5
62 changed files with 7675 additions and 275 deletions
--- a/agent/tools/utils/init.py
+++ b/agent/tools/utils/init.py
@@ -0,0 +1,40 @@
+from .truncate import (
+    truncate_head,
+    truncate_tail,
+    truncate_line,
+    format_size,
+    TruncationResult,
+    DEFAULT_MAX_LINES,
+    DEFAULT_MAX_BYTES,
+    GREP_MAX_LINE_LENGTH
+)
+
+from .diff import (
+    strip_bom,
+    detect_line_ending,
+    normalize_to_lf,
+    restore_line_endings,
+    normalize_for_fuzzy_match,
+    fuzzy_find_text,
+    generate_diff_string,
+    FuzzyMatchResult
+)
+
+__all__ = [
+    'truncate_head',
+    'truncate_tail',
+    'truncate_line',
+    'format_size',
+    'TruncationResult',
+    'DEFAULT_MAX_LINES',
+    'DEFAULT_MAX_BYTES',
+    'GREP_MAX_LINE_LENGTH',
+    'strip_bom',
+    'detect_line_ending',
+    'normalize_to_lf',
+    'restore_line_endings',
+    'normalize_for_fuzzy_match',
+    'fuzzy_find_text',
+    'generate_diff_string',
+    'FuzzyMatchResult'
+]
--- a/agent/tools/utils/diff.py
+++ b/agent/tools/utils/diff.py
@@ -0,0 +1,167 @@
+"""
+Diff tools for file editing
+Provides fuzzy matching and diff generation functionality
+"""
+
+import difflib
+import re
+from typing import Optional, Tuple
+
+
+def strip_bom(text: str) -> Tuple[str, str]:
+    """
+    Remove BOM (Byte Order Mark)
+    
+    :param text: Original text
+    :return: (BOM, text after removing BOM)
+    """
+    if text.startswith('\ufeff'):
+        return '\ufeff', text[1:]
+    return '', text
+
+
+def detect_line_ending(text: str) -> str:
+    """
+    Detect line ending type
+    
+    :param text: Text content
+    :return: Line ending type ('\r\n' or '\n')
+    """
+    if '\r\n' in text:
+        return '\r\n'
+    return '\n'
+
+
+def normalize_to_lf(text: str) -> str:
+    """
+    Normalize all line endings to LF (\n)
+    
+    :param text: Original text
+    :return: Normalized text
+    """
+    return text.replace('\r\n', '\n').replace('\r', '\n')
+
+
+def restore_line_endings(text: str, original_ending: str) -> str:
+    """
+    Restore original line endings
+    
+    :param text: LF normalized text
+    :param original_ending: Original line ending
+    :return: Text with restored line endings
+    """
+    if original_ending == '\r\n':
+        return text.replace('\n', '\r\n')
+    return text
+
+
+def normalize_for_fuzzy_match(text: str) -> str:
+    """
+    Normalize text for fuzzy matching
+    Remove excess whitespace but preserve basic structure
+    
+    :param text: Original text
+    :return: Normalized text
+    """
+    # Compress multiple spaces to one
+    text = re.sub(r'[ \t]+', ' ', text)
+    # Remove trailing spaces
+    text = re.sub(r' +\n', '\n', text)
+    # Remove leading spaces (but preserve indentation structure, only remove excess)
+    lines = text.split('\n')
+    normalized_lines = []
+    for line in lines:
+        # Preserve indentation but normalize to multiples of single spaces
+        stripped = line.lstrip()
+        if stripped:
+            indent_count = len(line) - len(stripped)
+            # Normalize indentation (convert tabs to spaces)
+            normalized_indent = ' ' * indent_count
+            normalized_lines.append(normalized_indent + stripped)
+        else:
+            normalized_lines.append('')
+    return '\n'.join(normalized_lines)
+
+
+class FuzzyMatchResult:
+    """Fuzzy match result"""
+    
+    def __init__(self, found: bool, index: int = -1, match_length: int = 0, content_for_replacement: str = ""):
+        self.found = found
+        self.index = index
+        self.match_length = match_length
+        self.content_for_replacement = content_for_replacement
+
+
+def fuzzy_find_text(content: str, old_text: str) -> FuzzyMatchResult:
+    """
+    Find text in content, try exact match first, then fuzzy match
+    
+    :param content: Content to search in
+    :param old_text: Text to find
+    :return: Match result
+    """
+    # First try exact match
+    index = content.find(old_text)
+    if index != -1:
+        return FuzzyMatchResult(
+            found=True,
+            index=index,
+            match_length=len(old_text),
+            content_for_replacement=content
+        )
+    
+    # Try fuzzy match
+    fuzzy_content = normalize_for_fuzzy_match(content)
+    fuzzy_old_text = normalize_for_fuzzy_match(old_text)
+    
+    index = fuzzy_content.find(fuzzy_old_text)
+    if index != -1:
+        # Fuzzy match successful, use normalized content for replacement
+        return FuzzyMatchResult(
+            found=True,
+            index=index,
+            match_length=len(fuzzy_old_text),
+            content_for_replacement=fuzzy_content
+        )
+    
+    # Not found
+    return FuzzyMatchResult(found=False)
+
+
+def generate_diff_string(old_content: str, new_content: str) -> dict:
+    """
+    Generate unified diff string
+    
+    :param old_content: Old content
+    :param new_content: New content
+    :return: Dictionary containing diff and first changed line number
+    """
+    old_lines = old_content.split('\n')
+    new_lines = new_content.split('\n')
+    
+    # Generate unified diff
+    diff_lines = list(difflib.unified_diff(
+        old_lines,
+        new_lines,
+        lineterm='',
+        fromfile='original',
+        tofile='modified'
+    ))
+    
+    # Find first changed line number
+    first_changed_line = None
+    for line in diff_lines:
+        if line.startswith('@@'):
+            # Parse @@ -1,3 +1,3 @@ format
+            match = re.search(r'@@ -\d+,?\d* \+(\d+)', line)
+            if match:
+                first_changed_line = int(match.group(1))
+                break
+    
+    diff_string = '\n'.join(diff_lines)
+    
+    return {
+        'diff': diff_string,
+        'first_changed_line': first_changed_line
+    }
--- a/agent/tools/utils/truncate.py
+++ b/agent/tools/utils/truncate.py
@@ -0,0 +1,292 @@
+"""
+Shared truncation utilities for tool outputs.
+
+Truncation is based on two independent limits - whichever is hit first wins:
+- Line limit (default: 2000 lines)
+- Byte limit (default: 50KB)
+
+Never returns partial lines (except bash tail truncation edge case).
+"""
+
+from typing import Dict, Any, Optional, Literal
+
+
+DEFAULT_MAX_LINES = 2000
+DEFAULT_MAX_BYTES = 50 * 1024  # 50KB
+GREP_MAX_LINE_LENGTH = 500  # Max chars per grep match line
+
+
+class TruncationResult:
+    """Truncation result"""
+    
+    def __init__(
+        self,
+        content: str,
+        truncated: bool,
+        truncated_by: Optional[Literal["lines", "bytes"]],
+        total_lines: int,
+        total_bytes: int,
+        output_lines: int,
+        output_bytes: int,
+        last_line_partial: bool = False,
+        first_line_exceeds_limit: bool = False,
+        max_lines: int = DEFAULT_MAX_LINES,
+        max_bytes: int = DEFAULT_MAX_BYTES
+    ):
+        self.content = content
+        self.truncated = truncated
+        self.truncated_by = truncated_by
+        self.total_lines = total_lines
+        self.total_bytes = total_bytes
+        self.output_lines = output_lines
+        self.output_bytes = output_bytes
+        self.last_line_partial = last_line_partial
+        self.first_line_exceeds_limit = first_line_exceeds_limit
+        self.max_lines = max_lines
+        self.max_bytes = max_bytes
+    
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary"""
+        return {
+            "content": self.content,
+            "truncated": self.truncated,
+            "truncated_by": self.truncated_by,
+            "total_lines": self.total_lines,
+            "total_bytes": self.total_bytes,
+            "output_lines": self.output_lines,
+            "output_bytes": self.output_bytes,
+            "last_line_partial": self.last_line_partial,
+            "first_line_exceeds_limit": self.first_line_exceeds_limit,
+            "max_lines": self.max_lines,
+            "max_bytes": self.max_bytes
+        }
+
+
+def format_size(bytes_count: int) -> str:
+    """Format bytes as human-readable size"""
+    if bytes_count < 1024:
+        return f"{bytes_count}B"
+    elif bytes_count < 1024 * 1024:
+        return f"{bytes_count / 1024:.1f}KB"
+    else:
+        return f"{bytes_count / (1024 * 1024):.1f}MB"
+
+
+def truncate_head(content: str, max_lines: Optional[int] = None, max_bytes: Optional[int] = None) -> TruncationResult:
+    """
+    Truncate content from the head (keep first N lines/bytes).
+    Suitable for file reads where you want to see the beginning.
+    
+    Never returns partial lines. If first line exceeds byte limit,
+    returns empty content with first_line_exceeds_limit=True.
+    
+    :param content: Content to truncate
+    :param max_lines: Maximum number of lines (default: 2000)
+    :param max_bytes: Maximum number of bytes (default: 50KB)
+    :return: Truncation result
+    """
+    if max_lines is None:
+        max_lines = DEFAULT_MAX_LINES
+    if max_bytes is None:
+        max_bytes = DEFAULT_MAX_BYTES
+    
+    total_bytes = len(content.encode('utf-8'))
+    lines = content.split('\n')
+    total_lines = len(lines)
+    
+    # Check if no truncation is needed
+    if total_lines <= max_lines and total_bytes <= max_bytes:
+        return TruncationResult(
+            content=content,
+            truncated=False,
+            truncated_by=None,
+            total_lines=total_lines,
+            total_bytes=total_bytes,
+            output_lines=total_lines,
+            output_bytes=total_bytes,
+            last_line_partial=False,
+            first_line_exceeds_limit=False,
+            max_lines=max_lines,
+            max_bytes=max_bytes
+        )
+    
+    # Check if first line alone exceeds byte limit
+    first_line_bytes = len(lines[0].encode('utf-8'))
+    if first_line_bytes > max_bytes:
+        return TruncationResult(
+            content="",
+            truncated=True,
+            truncated_by="bytes",
+            total_lines=total_lines,
+            total_bytes=total_bytes,
+            output_lines=0,
+            output_bytes=0,
+            last_line_partial=False,
+            first_line_exceeds_limit=True,
+            max_lines=max_lines,
+            max_bytes=max_bytes
+        )
+    
+    # Collect complete lines that fit
+    output_lines_arr = []
+    output_bytes_count = 0
+    truncated_by = "lines"
+    
+    for i, line in enumerate(lines):
+        if i >= max_lines:
+            break
+        
+        # Calculate line bytes (add 1 for newline if not first line)
+        line_bytes = len(line.encode('utf-8')) + (1 if i > 0 else 0)
+        
+        if output_bytes_count + line_bytes > max_bytes:
+            truncated_by = "bytes"
+            break
+        
+        output_lines_arr.append(line)
+        output_bytes_count += line_bytes
+    
+    # If exited due to line limit
+    if len(output_lines_arr) >= max_lines and output_bytes_count <= max_bytes:
+        truncated_by = "lines"
+    
+    output_content = '\n'.join(output_lines_arr)
+    final_output_bytes = len(output_content.encode('utf-8'))
+    
+    return TruncationResult(
+        content=output_content,
+        truncated=True,
+        truncated_by=truncated_by,
+        total_lines=total_lines,
+        total_bytes=total_bytes,
+        output_lines=len(output_lines_arr),
+        output_bytes=final_output_bytes,
+        last_line_partial=False,
+        first_line_exceeds_limit=False,
+        max_lines=max_lines,
+        max_bytes=max_bytes
+    )
+
+
+def truncate_tail(content: str, max_lines: Optional[int] = None, max_bytes: Optional[int] = None) -> TruncationResult:
+    """
+    Truncate content from tail (keep last N lines/bytes).
+    Suitable for bash output where you want to see the ending content (errors, final results).
+    
+    If the last line of original content exceeds byte limit, may return partial first line.
+    
+    :param content: Content to truncate
+    :param max_lines: Maximum lines (default: 2000)
+    :param max_bytes: Maximum bytes (default: 50KB)
+    :return: Truncation result
+    """
+    if max_lines is None:
+        max_lines = DEFAULT_MAX_LINES
+    if max_bytes is None:
+        max_bytes = DEFAULT_MAX_BYTES
+    
+    total_bytes = len(content.encode('utf-8'))
+    lines = content.split('\n')
+    total_lines = len(lines)
+    
+    # Check if no truncation is needed
+    if total_lines <= max_lines and total_bytes <= max_bytes:
+        return TruncationResult(
+            content=content,
+            truncated=False,
+            truncated_by=None,
+            total_lines=total_lines,
+            total_bytes=total_bytes,
+            output_lines=total_lines,
+            output_bytes=total_bytes,
+            last_line_partial=False,
+            first_line_exceeds_limit=False,
+            max_lines=max_lines,
+            max_bytes=max_bytes
+        )
+    
+    # Work backwards from the end
+    output_lines_arr = []
+    output_bytes_count = 0
+    truncated_by = "lines"
+    last_line_partial = False
+    
+    for i in range(len(lines) - 1, -1, -1):
+        if len(output_lines_arr) >= max_lines:
+            break
+        
+        line = lines[i]
+        # Calculate line bytes (add newline if not the first added line)
+        line_bytes = len(line.encode('utf-8')) + (1 if len(output_lines_arr) > 0 else 0)
+        
+        if output_bytes_count + line_bytes > max_bytes:
+            truncated_by = "bytes"
+            # Edge case: if we haven't added any lines yet and this line exceeds maxBytes,
+            # take the end portion of this line
+            if len(output_lines_arr) == 0:
+                truncated_line = _truncate_string_to_bytes_from_end(line, max_bytes)
+                output_lines_arr.insert(0, truncated_line)
+                output_bytes_count = len(truncated_line.encode('utf-8'))
+                last_line_partial = True
+            break
+        
+        output_lines_arr.insert(0, line)
+        output_bytes_count += line_bytes
+    
+    # If exited due to line limit
+    if len(output_lines_arr) >= max_lines and output_bytes_count <= max_bytes:
+        truncated_by = "lines"
+    
+    output_content = '\n'.join(output_lines_arr)
+    final_output_bytes = len(output_content.encode('utf-8'))
+    
+    return TruncationResult(
+        content=output_content,
+        truncated=True,
+        truncated_by=truncated_by,
+        total_lines=total_lines,
+        total_bytes=total_bytes,
+        output_lines=len(output_lines_arr),
+        output_bytes=final_output_bytes,
+        last_line_partial=last_line_partial,
+        first_line_exceeds_limit=False,
+        max_lines=max_lines,
+        max_bytes=max_bytes
+    )
+
+
+def _truncate_string_to_bytes_from_end(text: str, max_bytes: int) -> str:
+    """
+    Truncate string to fit byte limit (from end).
+    Properly handles multi-byte UTF-8 characters.
+    
+    :param text: String to truncate
+    :param max_bytes: Maximum bytes
+    :return: Truncated string
+    """
+    encoded = text.encode('utf-8')
+    if len(encoded) <= max_bytes:
+        return text
+    
+    # Start from end, skip back maxBytes
+    start = len(encoded) - max_bytes
+    
+    # Find valid UTF-8 boundary (character start)
+    while start < len(encoded) and (encoded[start] & 0xC0) == 0x80:
+        start += 1
+    
+    return encoded[start:].decode('utf-8', errors='ignore')
+
+
+def truncate_line(line: str, max_chars: int = GREP_MAX_LINE_LENGTH) -> tuple[str, bool]:
+    """
+    Truncate single line to max characters, add [truncated] suffix.
+    Used for grep match lines.
+    
+    :param line: Line to truncate
+    :param max_chars: Maximum characters
+    :return: (truncated text, whether truncated)
+    """
+    if len(line) <= max_chars:
+        return line, False
+    return f"{line[:max_chars]}... [truncated]", True