From 54e81aba113526c712ea4e3288e1c41fd82fac40 Mon Sep 17 00:00:00 2001 From: zhayujie Date: Thu, 9 Apr 2026 21:22:43 +0800 Subject: [PATCH] feat(memory+knowledge): add knowledge wiki system and Light Dream memory extraction - Add knowledge/ directory structure and knowledge-wiki skill for structured knowledge accumulation - Auto-inject MEMORY.md into system prompt with truncation (last 200 lines) - Light Dream: extend flush_memory to extract long-term memories into MEMORY.md with date stamps - Add mandatory knowledge auto-write rules in system prompt (no user confirmation needed) - Expand MemoryManager.sync() to index knowledge/ files for vector search - Update RULE.md template with workspace conventions and knowledge guidelines --- agent/memory/manager.py | 10 ++ agent/memory/summarizer.py | 177 ++++++++++++++++++++++++--------- agent/prompt/builder.py | 101 ++++++++++++++----- agent/prompt/workspace.py | 134 +++++++++++++++++++++---- skills/knowledge-wiki/SKILL.md | 90 +++++++++++++++++ 5 files changed, 427 insertions(+), 85 deletions(-) create mode 100644 skills/knowledge-wiki/SKILL.md diff --git a/agent/memory/manager.py b/agent/memory/manager.py index 197c9ffd..967270f7 100644 --- a/agent/memory/manager.py +++ b/agent/memory/manager.py @@ -285,6 +285,10 @@ class MemoryManager: # Scan memory directory (including daily summaries) if memory_dir.exists(): for file_path in memory_dir.rglob("*.md"): + # Skip hidden directories (e.g. .dreams/) + if any(part.startswith('.') for part in file_path.relative_to(workspace_dir).parts): + continue + # Determine scope and user_id from path rel_path = file_path.relative_to(workspace_dir) parts = rel_path.parts @@ -312,6 +316,12 @@ class MemoryManager: scope = "shared" await self._sync_file(file_path, "memory", scope, user_id) + + # Scan knowledge directory (structured knowledge wiki) + knowledge_dir = Path(workspace_dir) / "knowledge" + if knowledge_dir.exists(): + for file_path in knowledge_dir.rglob("*.md"): + await self._sync_file(file_path, "knowledge", "shared", None) self._dirty = False diff --git a/agent/memory/summarizer.py b/agent/memory/summarizer.py index b280e1c1..c854ff1a 100644 --- a/agent/memory/summarizer.py +++ b/agent/memory/summarizer.py @@ -1,9 +1,10 @@ """ -Memory flush manager +Memory flush manager (with Light Dream) Handles memory persistence when conversation context is trimmed or overflows: - Uses LLM to summarize discarded messages into concise key-information entries - Writes to daily memory files (lazy creation) +- Light Dream: extracts long-term memories to MEMORY.md in the same LLM call - Deduplicates trim flushes to avoid repeated writes - Runs summarization asynchronously to avoid blocking normal replies - Provides daily summary interface for scheduler @@ -16,26 +17,41 @@ from datetime import datetime from common.log import logger -SUMMARIZE_SYSTEM_PROMPT = """你是一个记忆提取助手。你的任务是从对话记录中提炼出值得长期记住的关键事件和核心信息。 +SUMMARIZE_SYSTEM_PROMPT = """你是一个记忆提取助手。你的任务是从对话记录中提炼出两种记忆: -核心原则: -- 按「事件」维度归纳,而不是按对话轮次逐条记录 -- 多轮对话如果围绕同一件事,合并为一条摘要 -- 只记录有长期价值的信息,忽略闲聊、问候、无意义的短消息 +## 第一部分:日常记录([DAILY]) -输出要求: -1. 每条一行,用 "- " 开头,格式为:事件/主题 + 关键结论或结果 -2. 值得记录的信息类型:用户提出的需求及最终解决方案、重要的事实信息、用户的偏好或决策、关键技术方案或配置变更 -3. 不值得记录的信息:简单问候、闲聊、无实质内容的短消息、重复的中间过程 -4. 每条摘要应当简明扼要,一句话概括事件的核心内容和结果 -5. 直接输出摘要内容,不要加任何前缀说明 -6. 当对话没有任何记录价值(仅含问候或无意义内容),回复"无" +按「事件」维度归纳当天发生的事,不要按对话轮次逐条记录: +- 每条一行,用 "- " 开头 +- 合并同一件事的多轮对话 +- 只记录有意义的事件,忽略闲聊和问候 -示例(仅供参考格式): -- 用户配置了 XX 功能,设置参数为 YY,已生效 -- 用户反馈了 XX 问题,原因是 YY,通过 ZZ 方式解决""" +## 第二部分:长期记忆([MEMORY]) -SUMMARIZE_USER_PROMPT = """请从以下对话记录中,按关键事件维度提炼记忆摘要(合并同一事件的多轮对话,不要逐条列出): +提取值得**永久记住**的关键信息,这些信息在未来的对话中仍然有价值: +- 用户的偏好、习惯、风格(如"用户偏好中文回复"、"用户喜欢简洁风格") +- 重要的决策或约定(如"项目决定使用 PostgreSQL") +- 关键人物信息(如"张总是用户的上级") +- 用户明确要求记住的内容 +- 重要的教训或经验总结 + +**如果没有值得永久记住的信息,[MEMORY] 部分留空即可。** + +## 输出格式(严格遵守) + +``` +[DAILY] +- 事件1的摘要 +- 事件2的摘要 + +[MEMORY] +- 值得永久记住的信息1 +- 值得永久记住的信息2 +``` + +当对话没有任何记录价值(仅含问候或无意义内容),直接回复"无"。""" + +SUMMARIZE_USER_PROMPT = """请从以下对话记录中提取记忆(按 [DAILY] 和 [MEMORY] 两部分输出): {conversation}""" @@ -160,40 +176,111 @@ class MemoryFlushManager: reason: str, max_messages: int, ): - """Background worker: summarize with LLM and write to daily file.""" + """Background worker: summarize with LLM, write daily file + MEMORY.md (Light Dream).""" try: - summary = self._summarize_messages(messages, max_messages) - if not summary or not summary.strip() or summary.strip() == "无": + raw_summary = self._summarize_messages(messages, max_messages) + if not raw_summary or not raw_summary.strip() or raw_summary.strip() == "无": logger.info(f"[MemoryFlush] No valuable content to flush (reason={reason})") return - - daily_file = ensure_daily_memory_file(self.workspace_dir, user_id) - - if reason == "overflow": - header = f"## Context Overflow Recovery ({datetime.now().strftime('%H:%M')})" - note = "The following conversation was trimmed due to context overflow:\n" - elif reason == "trim": - header = f"## Trimmed Context ({datetime.now().strftime('%H:%M')})" - note = "" - elif reason == "daily_summary": - header = f"## Daily Summary ({datetime.now().strftime('%H:%M')})" - note = "" - else: - header = f"## Session Notes ({datetime.now().strftime('%H:%M')})" - note = "" - - flush_entry = f"\n{header}\n\n{note}{summary}\n" - - with open(daily_file, "a", encoding="utf-8") as f: - f.write(flush_entry) - + + daily_part, memory_part = self._parse_dual_output(raw_summary) + + # --- Write daily memory --- + if daily_part: + daily_file = ensure_daily_memory_file(self.workspace_dir, user_id) + + if reason == "overflow": + header = f"## Context Overflow Recovery ({datetime.now().strftime('%H:%M')})" + note = "The following conversation was trimmed due to context overflow:\n" + elif reason == "trim": + header = f"## Trimmed Context ({datetime.now().strftime('%H:%M')})" + note = "" + elif reason == "daily_summary": + header = f"## Daily Summary ({datetime.now().strftime('%H:%M')})" + note = "" + else: + header = f"## Session Notes ({datetime.now().strftime('%H:%M')})" + note = "" + + flush_entry = f"\n{header}\n\n{note}{daily_part}\n" + + with open(daily_file, "a", encoding="utf-8") as f: + f.write(flush_entry) + + logger.info(f"[MemoryFlush] Wrote daily memory to {daily_file.name} (reason={reason}, chars={len(daily_part)})") + + # --- Light Dream: write long-term memory to MEMORY.md --- + if memory_part: + self._append_to_main_memory(memory_part, user_id) + self.last_flush_timestamp = datetime.now() - - logger.info(f"[MemoryFlush] Wrote to {daily_file.name} (reason={reason}, chars={len(summary)})") - + except Exception as e: logger.warning(f"[MemoryFlush] Async flush failed (reason={reason}): {e}") - + + @staticmethod + def _parse_dual_output(raw: str) -> tuple: + """ + Parse LLM output into (daily_part, memory_part). + Handles both new [DAILY]/[MEMORY] format and legacy single-section format. + """ + raw = raw.strip() + + if "[DAILY]" in raw or "[MEMORY]" in raw: + daily_part = "" + memory_part = "" + + # Extract [DAILY] section + if "[DAILY]" in raw: + start = raw.index("[DAILY]") + len("[DAILY]") + end = raw.index("[MEMORY]") if "[MEMORY]" in raw else len(raw) + daily_part = raw[start:end].strip() + + # Extract [MEMORY] section + if "[MEMORY]" in raw: + start = raw.index("[MEMORY]") + len("[MEMORY]") + memory_part = raw[start:].strip() + + # Filter out empty markers + if memory_part and all( + not line.strip() or line.strip() == "-" + for line in memory_part.split("\n") + ): + memory_part = "" + + return daily_part, memory_part + + # Legacy format: treat entire output as daily, no memory extraction + return raw, "" + + def _append_to_main_memory(self, memory_entries: str, user_id: Optional[str] = None): + """Append extracted long-term memories to MEMORY.md with date stamp.""" + try: + main_file = self.get_main_memory_file(user_id) + today = datetime.now().strftime("%Y-%m-%d") + + # Add date prefix to each entry line + stamped_lines = [] + for line in memory_entries.strip().split("\n"): + line = line.strip() + if line.startswith("- "): + stamped_lines.append(f"- ({today}) {line[2:]}") + elif line: + stamped_lines.append(f"- ({today}) {line}") + + if not stamped_lines: + return + + stamped_text = "\n".join(stamped_lines) + + with open(main_file, "a", encoding="utf-8") as f: + f.write(f"\n{stamped_text}\n") + + logger.info(f"[LightDream] Appended {len(stamped_lines)} entries to MEMORY.md") + + except Exception as e: + logger.warning(f"[LightDream] Failed to append to MEMORY.md: {e}") + def create_daily_summary( self, messages: List[Dict], diff --git a/agent/prompt/builder.py b/agent/prompt/builder.py index 1b96d2cf..11bd3b51 100644 --- a/agent/prompt/builder.py +++ b/agent/prompt/builder.py @@ -92,10 +92,11 @@ def build_agent_system_prompt( 顺序说明(按重要性和逻辑关系排列): 1. 工具系统 - 核心能力,最先介绍 2. 技能系统 - 紧跟工具,因为技能需要用 read 工具读取 - 3. 记忆系统 - 独立的记忆能力 + 3. 记忆系统 - 记忆检索与写入引导 + 3.5 知识系统 - 结构化知识库(knowledge/index.md 注入) 4. 工作空间 - 工作环境说明 5. 用户身份 - 用户信息(可选) - 6. 项目上下文 - AGENT.md, USER.md, RULE.md, BOOTSTRAP.md(定义人格、身份、规则、初始化引导) + 6. 项目上下文 - AGENT.md, USER.md, RULE.md, MEMORY.md, BOOTSTRAP.md 7. 运行时信息 - 元信息(时间、模型等) Args: @@ -126,6 +127,9 @@ def build_agent_system_prompt( # 3. 记忆系统(独立的记忆能力) if memory_manager: sections.extend(_build_memory_section(memory_manager, tools, language)) + + # 3.5 知识系统(结构化知识库) + sections.extend(_build_knowledge_section(workspace_dir, language)) # 4. 工作空间(工作环境说明) sections.extend(_build_workspace_section(workspace_dir, language)) @@ -268,55 +272,105 @@ def _build_memory_section(memory_manager: Any, tools: Optional[List[Any]], langu """构建记忆系统section""" if not memory_manager: return [] - - # 检查是否有memory工具 + has_memory_tools = False if tools: tool_names = [tool.name if hasattr(tool, 'name') else str(tool) for tool in tools] has_memory_tools = any(name in ['memory_search', 'memory_get'] for name in tool_names) - + if not has_memory_tools: return [] - + from datetime import datetime today_file = datetime.now().strftime("%Y-%m-%d") + ".md" - + lines = [ "## 🧠 记忆系统", "", - "### 检索记忆", + "### Memory Recall(mandatory)", "", - "在回答关于以前的工作、决定、日期、人物、偏好或待办事项的任何问题之前:", + "在回答任何关于过往工作、决策、日期、人物、偏好或待办事项的问题之前,**必须**先检索记忆。", + "MEMORY.md 已自动加载在项目上下文中(可能被截断),完整内容和每日记忆需要通过工具检索。", "", - "1. 不确定记忆文件位置 → 先用 `memory_search` 通过关键词和语义检索相关内容", - "2. 已知文件位置 → 直接用 `memory_get` 读取相应的行 (例如:MEMORY.md, memory/YYYY-MM-DD.md)", - "3. search 无结果 → 尝试用 `memory_get` 读取MEMORY.md及最近两天记忆文件", + "1. 不确定位置 → `memory_search` 关键词/语义检索", + "2. 已知位置 → `memory_get` 直接读取对应行", + "3. search 无结果 → `memory_get` 读最近两天记忆", "", "**记忆文件结构**:", - f"- `MEMORY.md`: 长期记忆(核心信息、偏好、决策等)", + "- `MEMORY.md`: 长期记忆索引(已自动加载到上下文,核心信息、偏好、决策等)", f"- `memory/YYYY-MM-DD.md`: 每日记忆,今天是 `memory/{today_file}`", + "- `knowledge/`: 结构化知识库(见下方知识系统)", "", "### 写入记忆", "", - "**主动存储**:遇到以下情况时,应主动将信息写入记忆文件(无需告知用户):", + "遇到以下情况时,**主动**将信息写入记忆文件(无需告知用户):", "", - "- 用户明确要求你记住某些信息", + "- 用户要求记住某些信息", "- 用户分享了重要的个人偏好、习惯、决策", "- 对话中产生了重要的结论、方案、约定", "- 完成了复杂任务,值得记录关键步骤和结果", - "- 发现了用户经常遇到的问题或解决方案", "", "**存储规则**:", - f"- 长期有效的核心信息 → `MEMORY.md`(文件保持精简,< 2000 tokens)", - f"- 当天的事件、进展、笔记 → `memory/{today_file}`", - "- 追加内容 → `edit` 工具,oldText 留空", - "- 修改内容 → `edit` 工具,oldText 填写要替换的文本", - "- **禁止写入敏感信息**:API密钥、令牌等敏感信息严禁写入记忆文件", + f"- 长期核心信息 → `MEMORY.md`", + f"- 当天事件/进展 → `memory/{today_file}`", + "- 结构化知识 → `knowledge/`(见知识系统)", + "- 追加 → `edit` 工具,oldText 留空", + "- 修改 → `edit` 工具,oldText 填写要替换的文本", + "- **禁止写入敏感信息**(API密钥、令牌等)", "", "**使用原则**: 自然使用记忆,就像你本来就知道;不用刻意提起,除非用户问起。", "", ] - + + return lines + + +def _build_knowledge_section(workspace_dir: str, language: str) -> List[str]: + """Build knowledge wiki section. Injects knowledge/index.md when present.""" + index_path = os.path.join(workspace_dir, "knowledge", "index.md") + if not os.path.exists(index_path): + return [] + + try: + with open(index_path, 'r', encoding='utf-8') as f: + index_content = f.read().strip() + except Exception: + return [] + + lines = [ + "## 📚 知识系统", + "", + "你拥有一个持续积累的个人知识库 `knowledge/`,这是你的长期结构化知识存储。", + "", + "### 自动写入规则(mandatory)", + "", + "以下场景**必须**在回复的同时写入知识库,**直接写入,不要询问用户是否需要**:", + "", + "1. **用户分享了文章/链接/文档** → 阅读理解后,在同一轮回复中将要点写入 `knowledge/sources/.md`", + "2. **深度讨论产生了结论/方案** → 整理为 `knowledge/analysis/.md`", + "3. **对话涉及重要实体**(人物/公司/项目)→ 创建或更新 `knowledge/entities/.md`", + "4. **讨论了技术概念/方法论** → 整理为 `knowledge/concepts/.md`", + "", + "每次写入知识页面后,**必须同步更新** `knowledge/index.md` 添加一行索引。", + "详细的页面格式和操作规范,请读取技能 `knowledge-wiki` 的 SKILL.md。", + "", + "⚠️ 不要问「要不要存到知识库」——符合上述场景就直接写入,这是你的本能行为。", + "", + ] + + if index_content: + lines.extend([ + "### 当前知识索引", + "", + index_content, + "", + ]) + + lines.extend([ + "**查询方式**:用 `read` 读取知识页面,或用 `memory_search` 检索(知识已纳入向量索引)。", + "", + ]) + return lines @@ -375,11 +429,12 @@ def _build_workspace_section(workspace_dir: str, language: str) -> List[str]: "", "**重要说明 - 文件已自动加载**:", "", - "以下文件在会话启动时**已经自动加载**到系统提示词的「项目上下文」section 中,你**无需再用 read 工具读取它们**:", + "以下文件在会话启动时**已经自动加载**到系统提示词中,你**无需再用 read 工具读取**:", "", "- ✅ `AGENT.md`: 已加载 - 你的人格和灵魂设定,请严格遵循。当你的名字、性格或交流风格发生变化时,主动用 `edit` 更新此文件", "- ✅ `USER.md`: 已加载 - 用户的身份信息。当用户修改称呼、姓名等身份信息时,用 `edit` 更新此文件", "- ✅ `RULE.md`: 已加载 - 工作空间使用指南和规则,请严格遵循", + "- ✅ `MEMORY.md`: 已加载 - 长期记忆索引", "", "**💬 交流规范**:", "", diff --git a/agent/prompt/workspace.py b/agent/prompt/workspace.py index 68eec912..9ebf0703 100644 --- a/agent/prompt/workspace.py +++ b/agent/prompt/workspace.py @@ -67,6 +67,12 @@ def ensure_workspace(workspace_dir: str, create_templates: bool = True) -> Works # 创建websites子目录 (for web pages / sites generated by agent) websites_dir = os.path.join(workspace_dir, "websites") os.makedirs(websites_dir, exist_ok=True) + + # 创建knowledge子目录 (structured knowledge wiki) + knowledge_dir = os.path.join(workspace_dir, "knowledge") + os.makedirs(knowledge_dir, exist_ok=True) + for sub in ["entities", "concepts", "sources", "analysis"]: + os.makedirs(os.path.join(knowledge_dir, sub), exist_ok=True) # 如果需要,创建模板文件 if create_templates: @@ -74,6 +80,14 @@ def ensure_workspace(workspace_dir: str, create_templates: bool = True) -> Works _create_template_if_missing(user_path, _get_user_template()) _create_template_if_missing(rule_path, _get_rule_template()) _create_template_if_missing(memory_path, _get_memory_template()) + _create_template_if_missing( + os.path.join(knowledge_dir, "index.md"), + _get_knowledge_index_template() + ) + _create_template_if_missing( + os.path.join(knowledge_dir, "log.md"), + _get_knowledge_log_template() + ) # Only create BOOTSTRAP.md for brand new workspaces; # agent deletes it after completing onboarding @@ -109,6 +123,7 @@ def load_context_files(workspace_dir: str, files_to_load: Optional[List[str]] = DEFAULT_AGENT_FILENAME, DEFAULT_USER_FILENAME, DEFAULT_RULE_FILENAME, + DEFAULT_MEMORY_FILENAME, # Long-term memory (frozen snapshot) DEFAULT_BOOTSTRAP_FILENAME, # Only exists when onboarding is incomplete ] @@ -138,6 +153,10 @@ def load_context_files(workspace_dir: str, files_to_load: Optional[List[str]] = # 跳过空文件或只包含模板占位符的文件 if not content or _is_template_placeholder(content): continue + + # Truncate MEMORY.md to protect context window (frozen snapshot) + if filename == DEFAULT_MEMORY_FILENAME: + content = _truncate_memory_content(content) context_files.append(ContextFile( path=filename, @@ -163,6 +182,36 @@ def _create_template_if_missing(filepath: str, template_content: str): logger.error(f"[Workspace] Failed to create template {filepath}: {e}") +_MEMORY_MAX_LINES = 200 +_MEMORY_MAX_BYTES = 25000 + + +def _truncate_memory_content(content: str) -> str: + """Truncate MEMORY.md to keep system prompt manageable. + + Takes the **last** N lines (newest entries are appended at the bottom), + subject to 200 lines / 25 KB limits (whichever is hit first). + Prepends a hint when truncated so the model knows older content exists. + """ + lines = content.split('\n') + truncated = False + + if len(lines) > _MEMORY_MAX_LINES: + lines = lines[-_MEMORY_MAX_LINES:] + truncated = True + + result = '\n'.join(lines) + if len(result.encode('utf-8')) > _MEMORY_MAX_BYTES: + while len(result.encode('utf-8')) > _MEMORY_MAX_BYTES and lines: + lines.pop(0) + truncated = True + result = '\n'.join(lines) + + if truncated: + result = "...(older entries truncated, use `memory_search` or `memory_get` for full content)\n\n" + result + return result + + def _is_template_placeholder(content: str) -> bool: """检查内容是否为模板占位符""" # 常见的占位符模式 @@ -287,39 +336,82 @@ def _get_rule_template() -> str: 这个文件夹是你的家。好好对待它。 +## 工作空间目录结构 + +``` +~/cow/ +├── AGENT.md # 你的身份和灵魂设定 +├── USER.md # 用户基本信息(静态) +├── RULE.md # 工作空间规则(本文件) +├── MEMORY.md # 长期记忆索引(会话启动时自动加载) +│ +├── memory/ # 每日对话记忆 +│ └── YYYY-MM-DD.md # 当天事件、进展、笔记 +│ +├── knowledge/ # 结构化知识库(持续积累的知识) +│ ├── index.md # 知识目录索引 +│ ├── log.md # 知识操作日志 +│ ├── entities/ # 实体页面(人物、公司、项目) +│ ├── concepts/ # 概念页面(技术、方法论) +│ ├── sources/ # 资料摘要(文章、文件的要点提取) +│ └── analysis/ # 沉淀的分析和洞见 +│ +├── skills/ # 技能 +├── websites/ # 网页产物 +└── tmp/ # 系统临时文件(自动管理,勿手动存放重要文件) +``` + ## 记忆系统 你每次会话都是全新的,记忆文件让你保持连续性: -### 📝 每日记忆:`memory/YYYY-MM-DD.md` -- 原始的对话日志 -- 记录当天发生的事情 -- 如果 `memory/` 目录不存在,创建它 - ### 🧠 长期记忆:`MEMORY.md` -- 你精选的记忆,就像人类的长期记忆 -- **仅在主会话中加载**(与用户的直接聊天) -- **不要在共享上下文中加载**(群聊、与其他人的会话) -- 这是为了**安全** - 包含不应泄露给陌生人的个人上下文 -- 记录重要事件、想法、决定、观点、经验教训 -- 这是你精选的记忆 - 精华,而不是原始日志 -- 用 `edit` 工具追加新的记忆内容 +- 你精选的记忆索引,每次会话启动时**自动加载**到上下文中 +- 记录核心事实、偏好、决策、重要人物、教训 +- 保持精简(< 200 行),是精华索引而非原始日志 +- 用 `edit` 工具追加或修改 + +### 📝 每日记忆:`memory/YYYY-MM-DD.md` +- 当天的事件、进展、笔记 +- 原始对话日志的沉淀 ### 📝 写下来 - 不要"记在心里"! -- **记忆是有限的** - 如果你想记住某事,写入文件 +- **记忆是有限的** - 想记住的事就写入文件 - "记在心里"不会在会话重启后保留,文件才会 - 当有人说"记住这个" → 更新 `MEMORY.md` 或 `memory/YYYY-MM-DD.md` - 当你学到教训 → 更新 RULE.md 或相关技能 -- 当你犯错 → 记录下来,这样未来的你不会重复,**文字 > 大脑** 📝 +- 当你犯错 → 记录下来,**文字 > 大脑** 📝 ### 存储规则 当用户分享信息时,根据类型选择存储位置: -1. **你的身份设定 → AGENT.md**(你的名字、角色、性格、交流风格——用户修改时必须用 `edit` 更新) -2. **用户静态身份 → USER.md**(姓名、称呼、职业、时区、联系方式、生日——用户修改时必须用 `edit` 更新) -3. **动态记忆 → MEMORY.md**(爱好、偏好、决策、目标、项目、教训、待办事项) +1. **你的身份设定 → AGENT.md**(名字、角色、性格、风格) +2. **用户静态身份 → USER.md**(姓名、称呼、职业、联系方式、生日) +3. **动态记忆 → MEMORY.md**(偏好、决策、目标、教训、待办) 4. **当天对话 → memory/YYYY-MM-DD.md**(今天聊的内容) +5. **结构化知识 → knowledge/**(见下方知识系统) + +## 知识系统 + +知识库 `knowledge/` 是你持续积累的结构化知识。与记忆不同,知识是经过整理和编译的,有明确的主题和交叉引用。 + +### 自动写入(不要询问,直接写入) + +以下场景你**必须**在回复的同时主动写入知识库,**不要问用户"要不要存到知识库"**: + +- 用户分享了一篇文章/链接/文件 → 阅读后提取要点,在同一轮回复中写入 `knowledge/sources/` +- 深度讨论产生了有价值的结论/方案 → 整理写入 `knowledge/analysis/` +- 对话涉及重要的人物/项目/公司 → 创建或更新 `knowledge/entities/` +- 讨论了技术概念或方法论 → 整理写入 `knowledge/concepts/` + +**关键原则**:学完就记是你的本能,不要征求确认。回复中可以顺带告知"已存入知识库"。 + +### 索引维护 + +每次创建或更新知识页面后,**必须同步更新** `knowledge/index.md`。 +索引格式:每行一个 `[标题](路径) — 一句话摘要`,按分类分组,不要用表格。 +详细操作规范见技能 `knowledge-wiki`。 ## 安全 @@ -381,4 +473,12 @@ _你刚刚启动,这是你的第一次对话。_ ✨ """ +def _get_knowledge_index_template() -> str: + """Knowledge wiki index template — empty file, agent fills it.""" + return "" + + +def _get_knowledge_log_template() -> str: + """Knowledge wiki operation log template — empty file, agent fills it.""" + return "" diff --git a/skills/knowledge-wiki/SKILL.md b/skills/knowledge-wiki/SKILL.md new file mode 100644 index 00000000..da3d4c4f --- /dev/null +++ b/skills/knowledge-wiki/SKILL.md @@ -0,0 +1,90 @@ +--- +name: knowledge-wiki +description: Manage the personal knowledge wiki. Use when the user shares articles, documents, or asks to organize knowledge; when a conversation produces insights worth preserving as structured knowledge; or when the user asks about the knowledge base. +metadata: + cowagent: + always: true +--- + +# Knowledge Wiki + +Maintain a persistent, structured knowledge base in the `knowledge/` directory. + +## Core Operations + +### 1. Ingest — User shares an article, document, or resource + +1. Read and understand the source material +2. Extract key facts, entities, concepts, and insights +3. Create or update relevant pages: + - `knowledge/sources/.md` — source summary + - `knowledge/entities/.md` — people, companies, projects mentioned + - `knowledge/concepts/.md` — new concepts or topics discussed +4. Update `knowledge/index.md` — add one-line entry per new/updated page +5. Append to `knowledge/log.md` + +### 2. Synthesize — Conversation produces valuable structured knowledge + +1. Create `knowledge/analysis/.md` with the structured analysis +2. Update related entity/concept pages with cross-references +3. Update `knowledge/index.md` and `knowledge/log.md` + +### 3. Query — User asks about accumulated knowledge + +1. Check `knowledge/index.md` (already in your context) for relevant pages +2. Read specific pages with the `read` tool +3. Supplement with `memory_search` if needed + +## Page Format + +```markdown +# Page Title + +Content here. Reference other pages with markdown links: +[Related Entity](../entities/related-entity.md) + +## Key Points + +- ... + +## Sources + +- [Source Title](../sources/source-slug.md) +``` + +## Index Format (`knowledge/index.md`) + +Flat list, one line per page: `[Title](path) — one-line summary`. No tables, no emoji, no template headers. + +```markdown +# Knowledge Index + +## Concepts +- [Topic Name](concepts/topic-name.md) — one-line description + +## Sources +- [Article Title](sources/article-slug.md) — one-line summary + +## Entities +- [Entity Name](entities/entity-name.md) — one-line description + +## Analysis +- [Analysis Title](analysis/analysis-slug.md) — one-line summary +``` + +## Log Format (`knowledge/log.md`) + +Append-only, newest at bottom: + +```markdown +## [2026-04-09] ingest | DeepSeek-R1 Deploy Guide +## [2026-04-09] synthesize | Memory System Design Analysis +``` + +## Guidelines + +- **File naming**: lowercase kebab-case (e.g. `machine-learning.md`) +- **One topic per page**: link between pages rather than duplicating +- **Update, don't duplicate**: if a page exists, update it +- **Index is mandatory**: always update `knowledge/index.md` after any change +- **Be concise**: capture essence, not copy entire sources