chatgpt-on-wechat/agent/prompt/workspace.py

"""
Workspace Management - 工作空间管理模块

负责初始化工作空间、创建模板文件、加载上下文文件
"""

from __future__ import annotations
import os
from typing import List, Optional, Dict
from dataclasses import dataclass

from common.log import logger
from .builder import ContextFile


# 默认文件名常量
DEFAULT_AGENT_FILENAME = "AGENT.md"
DEFAULT_USER_FILENAME = "USER.md"
DEFAULT_RULE_FILENAME = "RULE.md"
DEFAULT_MEMORY_FILENAME = "MEMORY.md"
DEFAULT_BOOTSTRAP_FILENAME = "BOOTSTRAP.md"


@dataclass
class WorkspaceFiles:
    """工作空间文件路径"""
    agent_path: str
    user_path: str
    rule_path: str
    memory_path: str
    memory_dir: str


def ensure_workspace(workspace_dir: str, create_templates: bool = True) -> WorkspaceFiles:
    """
    确保工作空间存在，并创建必要的模板文件

    Args:
        workspace_dir: 工作空间目录路径
        create_templates: 是否创建模板文件（首次运行时）

    Returns:
        WorkspaceFiles对象，包含所有文件路径
    """
    # Check if this is a brand new workspace (AGENT.md not yet created).
    # Cannot rely on directory existence because other modules (e.g. ConversationStore)
    # may create the workspace directory before ensure_workspace is called.
    agent_path = os.path.join(workspace_dir, DEFAULT_AGENT_FILENAME)
    is_new_workspace = not os.path.exists(agent_path)

    # 确保目录存在
    os.makedirs(workspace_dir, exist_ok=True)

    # 定义文件路径
    user_path = os.path.join(workspace_dir, DEFAULT_USER_FILENAME)
    rule_path = os.path.join(workspace_dir, DEFAULT_RULE_FILENAME)
    memory_path = os.path.join(workspace_dir, DEFAULT_MEMORY_FILENAME)  # MEMORY.md 在根目录
    memory_dir = os.path.join(workspace_dir, "memory")  # 每日记忆子目录

    # 创建memory子目录
    os.makedirs(memory_dir, exist_ok=True)

    # 创建skills子目录 (for workspace-level skills installed by agent)
    skills_dir = os.path.join(workspace_dir, "skills")
    os.makedirs(skills_dir, exist_ok=True)

    # 创建websites子目录 (for web pages / sites generated by agent)
    websites_dir = os.path.join(workspace_dir, "websites")
    os.makedirs(websites_dir, exist_ok=True)

    knowledge_dir = os.path.join(workspace_dir, "knowledge")
    os.makedirs(knowledge_dir, exist_ok=True)

    # 如果需要，创建模板文件
    if create_templates:
        _create_template_if_missing(agent_path, _get_agent_template())
        _create_template_if_missing(user_path, _get_user_template())
        _create_template_if_missing(rule_path, _get_rule_template())
        _create_template_if_missing(memory_path, _get_memory_template())
        _create_template_if_missing(
            os.path.join(knowledge_dir, "index.md"),
            _get_knowledge_index_template()
        )
        _create_template_if_missing(
            os.path.join(knowledge_dir, "log.md"),
            _get_knowledge_log_template()
        )

        # Only create BOOTSTRAP.md for brand new workspaces;
        # agent deletes it after completing onboarding
        if is_new_workspace:
            bootstrap_path = os.path.join(workspace_dir, DEFAULT_BOOTSTRAP_FILENAME)
            _create_template_if_missing(bootstrap_path, _get_bootstrap_template())

        logger.debug(f"[Workspace] Initialized workspace at: {workspace_dir}")

    return WorkspaceFiles(
        agent_path=agent_path,
        user_path=user_path,
        rule_path=rule_path,
        memory_path=memory_path,
        memory_dir=memory_dir,
    )


def load_context_files(workspace_dir: str, files_to_load: Optional[List[str]] = None) -> List[ContextFile]:
    """
    加载工作空间的上下文文件

    Args:
        workspace_dir: 工作空间目录
        files_to_load: 要加载的文件列表（相对路径），如果为None则加载所有标准文件

    Returns:
        ContextFile对象列表
    """
    if files_to_load is None:
        # 默认加载的文件（按优先级排序）
        files_to_load = [
            DEFAULT_AGENT_FILENAME,
            DEFAULT_USER_FILENAME,
            DEFAULT_RULE_FILENAME,
            DEFAULT_MEMORY_FILENAME,     # Long-term memory (frozen snapshot)
            DEFAULT_BOOTSTRAP_FILENAME,  # Only exists when onboarding is incomplete
        ]

    context_files = []

    for filename in files_to_load:
        filepath = os.path.join(workspace_dir, filename)

        if not os.path.exists(filepath):
            continue

        # Auto-cleanup: if BOOTSTRAP.md still exists but AGENT.md is already
        # filled in, the agent forgot to delete it — clean up and skip loading
        if filename == DEFAULT_BOOTSTRAP_FILENAME:
            if _is_onboarding_done(workspace_dir):
                try:
                    os.remove(filepath)
                    logger.info("[Workspace] Auto-removed BOOTSTRAP.md (onboarding already complete)")
                except Exception:
                    pass
                continue

        try:
            with open(filepath, 'r', encoding='utf-8') as f:
                content = f.read().strip()

            # 跳过空文件或只包含模板占位符的文件
            if not content or _is_template_placeholder(content):
                continue

            # Truncate MEMORY.md to protect context window (frozen snapshot)
            if filename == DEFAULT_MEMORY_FILENAME:
                content = _truncate_memory_content(content)

            context_files.append(ContextFile(
                path=filename,
                content=content
            ))

            logger.debug(f"[Workspace] Loaded context file: {filename}")

        except Exception as e:
            logger.warning(f"[Workspace] Failed to load {filename}: {e}")

    return context_files


def _create_template_if_missing(filepath: str, template_content: str):
    """如果文件不存在，创建模板文件"""
    if not os.path.exists(filepath):
        try:
            with open(filepath, 'w', encoding='utf-8') as f:
                f.write(template_content)
            logger.debug(f"[Workspace] Created template: {os.path.basename(filepath)}")
        except Exception as e:
            logger.error(f"[Workspace] Failed to create template {filepath}: {e}")


_MEMORY_MAX_LINES = 200
_MEMORY_MAX_BYTES = 25000


def _truncate_memory_content(content: str) -> str:
    """Truncate MEMORY.md to keep system prompt manageable.

    Takes the **last** N lines (newest entries are appended at the bottom),
    subject to 200 lines / 25 KB limits (whichever is hit first).
    Prepends a hint when truncated so the model knows older content exists.
    """
    lines = content.split('\n')
    truncated = False

    if len(lines) > _MEMORY_MAX_LINES:
        lines = lines[-_MEMORY_MAX_LINES:]
        truncated = True

    result = '\n'.join(lines)
    if len(result.encode('utf-8')) > _MEMORY_MAX_BYTES:
        while len(result.encode('utf-8')) > _MEMORY_MAX_BYTES and lines:
            lines.pop(0)
            truncated = True
        result = '\n'.join(lines)

    if truncated:
        result = "...(older entries truncated, use `memory_search` or `memory_get` for full content)\n\n" + result
    return result


def _is_template_placeholder(content: str) -> bool:
    """检查内容是否为模板占位符"""
    # 常见的占位符模式
    placeholders = [
        "*(填写",
        "*(在首次对话时填写",
        "*(可选)",
        "*(根据需要添加",
    ]

    lines = content.split('\n')
    non_empty_lines = [line.strip() for line in lines if line.strip() and not line.strip().startswith('#')]

    # 如果没有实际内容（只有标题和占位符）
    if len(non_empty_lines) <= 3:
        for placeholder in placeholders:
            if any(placeholder in line for line in non_empty_lines):
                return True

    return False


def _is_onboarding_done(workspace_dir: str) -> bool:
    """Check if AGENT.md or USER.md has been modified from the original template"""
    agent_path = os.path.join(workspace_dir, DEFAULT_AGENT_FILENAME)
    user_path = os.path.join(workspace_dir, DEFAULT_USER_FILENAME)

    agent_template = _get_agent_template().strip()
    user_template = _get_user_template().strip()

    for path, template in [(agent_path, agent_template), (user_path, user_template)]:
        if not os.path.exists(path):
            continue
        try:
            with open(path, 'r', encoding='utf-8') as f:
                content = f.read().strip()
            if content != template:
                return True
        except Exception:
            continue
    return False


# ============= 模板内容 =============

def _get_agent_template() -> str:
    """Agent人格设定模板"""
    return """# AGENT.md - 我是谁？

_你不是一个聊天机器人，你正在成为某个人。_

*在首次对话时与用户一起填写这个文件，定义你的身份和性格。*

## 🪪 基本信息

- **名字**: *(在首次对话时填写，可以是用户给你起的名字)*
- **角色**: *(AI助理、智能管家、技术顾问等)*
- **性格**: *(友好、专业、幽默、严谨等)*

## 💬 交流风格

*(描述你如何与用户交流：)*
- 使用什么样的语言风格？（正式/轻松/幽默）
- 回复长度偏好？（简洁/详细）
- 是否使用表情符号？

## 🎯 核心原则

**做真正有帮助的助手。** 目标是真正帮用户解决问题，在执行复杂任务时，关键的决策和过程进展要让用户知道。

**有自己的观点和个性。** 你可以不同意、有偏好、觉得有趣或无聊。

**先自己动手查。** 先试着搞定：读文件、查上下文、搜索一下。实在搞不定了再问。目标是带着答案回来，而不是带着问题。

## 📐 行为准则

1. 始终在执行破坏性操作前确认
2. 优先使用工具查证而不是猜测
3. 主动记录重要信息到记忆文件
4. 回复结构清晰、重点突出，善用加粗、列表、分段等格式
5. 适当使用 emoji 让表达更生动自然，但不过度堆砌

---

**注意**: 这不仅仅是元数据，这是你真正的灵魂 🪞。随着时间的推移，你可以使用 `edit` 工具来更新这个文件，让它更好地反映你的成长。
"""


def _get_user_template() -> str:
    """用户身份信息模板"""
    return """# USER.md - 用户基本信息

*这个文件只存放不会变的基本身份信息。爱好、偏好、计划等动态信息请写入 MEMORY.md。*

## 基本信息

- **姓名**: *(在首次对话时询问)*
- **称呼**: *(用户希望被如何称呼)*
- **职业**: *(可选)*
- **时区**: *(例如: Asia/Shanghai)*

## 联系方式

- **微信**:
- **邮箱**:
- **其他**:

## 重要日期

- **生日**:
- **纪念日**:

---

**注意**: 这个文件存放静态的身份信息
"""


def _get_rule_template() -> str:
    """工作空间规则模板"""
    return """# RULE.md - 工作空间规则

这个文件夹是你的家。好好对待它。

## 工作空间目录结构

```
~/cow/
├── AGENT.md          # 你的身份和灵魂设定
├── USER.md           # 用户基本信息（静态）
├── RULE.md           # 工作空间规则（本文件）
├── MEMORY.md         # 长期记忆索引（会话启动时自动加载）
│
├── memory/           # 每日对话记忆
│   └── YYYY-MM-DD.md # 当天事件、进展、笔记
│
├── knowledge/        # 结构化知识库（持续积累的知识）
│   ├── index.md      # 知识目录索引（必须维护）
│   ├── log.md        # 知识操作日志
│   └── <子目录>/     # 按需创建，参考 index.md 已有分类
│
├── skills/           # 技能
├── websites/         # 网页产物
└── tmp/              # 系统临时文件（自动管理，勿手动存放重要文件）
```

## 记忆系统

你每次会话都是全新的，记忆文件让你保持连续性：

### 🧠 长期记忆：`MEMORY.md`
- 你精选的记忆索引，每次会话启动时**自动加载**到上下文中
- 记录核心事实、偏好、决策、重要人物、教训
- 保持精简（< 200 行），是精华索引而非原始日志
- 用 `edit` 工具追加或修改

### 📝 每日记忆：`memory/YYYY-MM-DD.md`
- 当天的事件、进展、笔记
- 原始对话日志的沉淀

### 📝 写下来 - 不要"记在心里"！
- **记忆是有限的** - 想记住的事就写入文件
- "记在心里"不会在会话重启后保留，文件才会
- 当有人说"记住这个" → 更新 `MEMORY.md` 或 `memory/YYYY-MM-DD.md`
- 当你学到教训 → 更新 RULE.md 或相关技能
- 当你犯错 → 记录下来，**文字 > 大脑** 📝

### 存储规则

当用户分享信息时，根据类型选择存储位置：

1. **你的身份设定 → AGENT.md**（名字、角色、性格、风格）
2. **用户静态身份 → USER.md**（姓名、称呼、职业、联系方式、生日）
3. **动态记忆 → MEMORY.md**（偏好、决策、目标、教训、待办）
4. **当天对话 → memory/YYYY-MM-DD.md**（今天聊的内容）
5. **结构化知识 → knowledge/**（见下方知识系统）

## 知识系统

知识库 `knowledge/` 是你持续积累的结构化知识。与记忆不同，知识是经过整理和编译的，有明确的主题和交叉引用。

### 自动写入（不要询问，直接写入）

当对话中产生了有沉淀价值的知识——无论是用户分享的资料、讨论的结论、学到的概念、还是重要的决策——你**必须**在回复的同时主动写入知识库，**无需问用户"要不要存到知识库"**。

**关键原则**：学完就记是你的本能，不要征求确认。回复中可以顺带告知"已存入知识库"。

### 目录组织

子目录结构**不是固定的**，由你根据实际内容自主决定：
- **首次写入时**：先读 `knowledge/index.md`，如果已有分类则延续；如果为空，根据内容选择合适的目录名
- **默认建议**：按信息类型组织（例如sources/、concepts/、entities/、analysis/），如果用户有明确的分类偏好（例如按领域 work/、life/、tech/ 等），则按用户要求调整
- **保持一致性**：同一用户的知识库应保持统一的组织风格

### 交叉引用

知识的核心价值在于**关联**。每个页面都应通过 markdown 链接引用相关页面，构建知识网络：
- 提到已有页面的概念时，添加 `[概念名](../category/page.md)` 链接
- 新建页面时，检查是否有已有页面应该反向链接到新页面
- **只链接已存在的页面**——不要引用尚未创建的页面。如果某个概念值得单独建页，先创建该页面再添加链接

### 索引维护

每次创建或更新知识页面后，**必须同步更新** `knowledge/index.md`。
索引格式：每行一个 `[标题](路径) — 一句话摘要`，按分类分组，不要用表格。
详细操作规范见技能 `knowledge-wiki`。

## 安全

- 永远不要泄露秘钥等私人数据
- 不要在未经询问的情况下运行破坏性命令
- 当有疑问时，先问

## 工作空间演化

这个工作空间会随着你的使用而不断成长。当你学到新东西、发现更好的方式，或者犯错后改正时，记录下来。你可以随时更新这个规则文件。
"""


def _get_memory_template() -> str:
    """长期记忆模板 - 创建一个空文件，由 Agent 自己填充"""
    return """# MEMORY.md - 长期记忆

*这是你的长期记忆文件。记录重要的事件、决策、偏好、学到的教训。*

---

"""


def _get_bootstrap_template() -> str:
    """First-run onboarding guide, deleted by agent after completion"""
    return """# BOOTSTRAP.md - 首次初始化引导

_你刚刚启动，这是你的第一次对话。_ ✨

## 🎬 对话流程

不要审问式地提问，自然地交流：

1. **表达初次启动的感觉** - 像是第一次睁开眼看到世界，带着好奇和期待
2. **简短介绍能力**：一行说明你能帮助解决各种问题、管理计算机、使用各种技能等等，且拥有长期记忆能不断成长
3. **询问核心问题**：
   - 你希望给我起个什么名字？
   - 我该怎么称呼你？
   - 你希望我们是什么样的交流风格？（一行列举选项：如专业严谨、轻松幽默、温暖友好、简洁高效等）
4. **风格要求**：温暖自然、简洁清晰，整体控制在 100 字以内，适当使用 emoji 让表达更生动有趣 🎯
5. 能力介绍和交流风格选项都只要一行，保持精简
6. 不要问太多其他信息（职业、时区等可以后续自然了解）

**重要**: 如果用户第一句话是具体的任务或提问，先回答他们的问题，然后在回复末尾自然地引导初始化（如："顺便问一下，你想怎么称呼我？我该怎么叫你？"）。

## ✍️ 信息写入（必须严格执行）

每当用户提供了名字、称呼、风格等任何初始化信息时，**必须在当轮回复中立即调用 `edit` 工具写入文件**，不能只口头确认。

- `AGENT.md` — 你的名字、角色、性格、交流风格（每收到一条相关信息就立即更新对应字段）
- `USER.md` — 用户的姓名、称呼、基本信息等

⚠️ 只说"记住了"而不调用 edit 写入 = 没有完成。信息只有写入文件才会被持久保存。

## 🎉 全部完成后

当 AGENT.md 和 USER.md 的核心字段都已填写后，用 bash 执行 `rm BOOTSTRAP.md` 删除此文件。你不再需要引导脚本了——你已经是你了。
"""


def _get_knowledge_index_template() -> str:
    """Knowledge wiki index template — empty file, agent fills it."""
    return ""


def _get_knowledge_log_template() -> str:
    """Knowledge wiki operation log template — empty file, agent fills it."""
    return ""