feat(web): hint API base version path in config placeholder

fix: remove unnecessary API Base URL in run scripts
feat: switch default model to deepseek-v4-flash
2026-07-21 06:07:13 +08:00 · 2026-04-26 17:10:24 +08:00 · 2026-04-26 16:29:08 +08:00 · 2026-04-26 15:54:50 +08:00 · 2026-04-24 16:39:48 +08:00 · 2026-04-24 15:29:43 +08:00
101 changed files with 5762 additions and 719 deletions
--- a/.github/workflows/deploy-image-arm.yml
+++ b/.github/workflows/deploy-image-arm.yml
@@ -19,7 +19,7 @@ env:

 jobs:
  build-and-push-image:
-    if: github.repository == 'zhayujie/chatgpt-on-wechat'
+    if: github.repository == 'zhayujie/CowAgent'
    runs-on: ubuntu-latest
    permissions:
      contents: read
@@ -51,7 +51,12 @@ jobs:
        uses: docker/metadata-action@v4
        with:
          images: |
-            ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+            ${{ env.REGISTRY }}/zhayujie/chatgpt-on-wechat
+            ${{ env.REGISTRY }}/zhayujie/cowagent
+          tags: |
+            type=raw,value=latest-arm64,enable={{is_default_branch}}
+            type=ref,event=branch,suffix=-arm64
+            type=ref,event=tag,suffix=-arm64

      - name: Build and push Docker image
        uses: docker/build-push-action@v3
@@ -60,7 +65,7 @@ jobs:
          push: true
          file: ./docker/Dockerfile.latest
          platforms: linux/arm64
-          tags: ${{ steps.meta.outputs.tags }}-arm64
+          tags: ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}

      - uses: actions/delete-package-versions@v4
--- a/.github/workflows/deploy-image.yml
+++ b/.github/workflows/deploy-image.yml
@@ -16,10 +16,11 @@ on:
 env:
  REGISTRY: ghcr.io
  IMAGE_NAME: ${{ github.repository }}
+  DOCKERHUB_IMAGE: zhayujie/chatgpt-on-wechat

 jobs:
  build-and-push-image:
-    if: github.repository == 'zhayujie/chatgpt-on-wechat'
+    if: github.repository == 'zhayujie/CowAgent'
    runs-on: ubuntu-latest
    permissions:
      contents: read
@@ -47,8 +48,14 @@ jobs:
        uses: docker/metadata-action@v4
        with:
          images: |
-            ${{ env.IMAGE_NAME }}
-            ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+            zhayujie/chatgpt-on-wechat
+            zhayujie/cowagent
+            ${{ env.REGISTRY }}/zhayujie/chatgpt-on-wechat
+            ${{ env.REGISTRY }}/zhayujie/cowagent
+          tags: |
+            type=raw,value=latest,enable={{is_default_branch}}
+            type=ref,event=branch
+            type=ref,event=tag

      - name: Build and push Docker image
        uses: docker/build-push-action@v3
--- a/README.md
+++ b/README.md
@@ -29,7 +29,7 @@
 -  ✅  **工具系统：** 内置文件读写、终端执行、浏览器操作、定时任务等工具，Agent 自主调用以完成复杂任务
 -  ✅  **CLI系统：** 提供终端命令和对话命令，支持进程管理、技能安装、配置修改等操作
 -  ✅  **多模态消息：** 支持对文本、图片、语音、文件等多类型消息进行解析、处理、生成、发送等操作
-  ✅  **多模型支持：** 支持 OpenAI, Claude, Gemini, DeepSeek, MiniMax、GLM、Qwen、Kimi、Doubao 等国内外主流模型厂商
+-  ✅  **多模型支持：** 支持 DeepSeek、MiniMax、Claude、Gemini、OpenAI、GLM、Qwen、Doubao、Kimi 等国内外主流模型厂商
 -  ✅  **多通道接入：** 支持运行在本地计算机或服务器，可集成到微信、飞书、钉钉、企业微信、QQ、微信公众号、网页中使用

 ## 声明
@@ -70,6 +70,8 @@

 # 🏷 更新日志

+>**2026.04.22：** [2.0.7版本](https://github.com/zhayujie/CowAgent/releases/tag/2.0.7)，图像生成内置技能（GPT Image 2、Nano Banana 等）、新模型支持（Kimi K2.6、Claude Opus 4.7、GLM 5.1）、知识库和记忆增强、Web 控制台优化
+
 >**2026.04.14：** [2.0.6版本](https://github.com/zhayujie/CowAgent/releases/tag/2.0.6)，知识库系统、梦境记忆模块、上下文智能压缩、Web 控制台多会话及多项优化。

 >**2026.04.01：** [2.0.5版本](https://github.com/zhayujie/CowAgent/releases/tag/2.0.5)，Cow CLI 命令系统、Skill Hub 开源、浏览器工具、企微扫码创建、多项优化和修复。
@@ -113,7 +115,7 @@ irm https://cdn.link-ai.tech/code/cow/run.ps1 | iex

 项目支持国内外主流厂商的模型接口，可选模型及配置说明参考：[模型说明](#模型说明)。

-> 注：Agent 模式下推荐使用以下模型，可根据效果及成本综合选择：MiniMax-M2.7、glm-5-turbo、kimi-k2.5、qwen3.5-plus、claude-sonnet-4-6、gemini-3.1-pro-preview、gpt-5.4、gpt-5.4-mini
+> 注：Agent 模式下推荐使用以下模型，可根据效果及成本综合选择：deepseek-v4-flash、MiniMax-M2.7、glm-5.1、kimi-k2.6、qwen3.5-plus、claude-sonnet-4-6、gemini-3.1-pro-preview、gpt-5.4、gpt-5.4-mini

 同时支持使用 **LinkAI 平台** 接口，支持上述全部模型，并支持知识库、工作流、插件等 Agent 技能，参考 [接口文档](https://docs.link-ai.tech/platform/api)。

@@ -180,7 +182,9 @@ cow install-browser
 # config.json 文件内容示例
 {
  "channel_type": "weixin",                                   # 接入渠道类型，默认为 weixin, 支持修改为 feishu,dingtalk,wecom_bot,qq,wechatcom_app,wechatmp_service,wechatmp,terminal
-  "model": "MiniMax-M2.7",                                    # 模型名称
+  "model": "deepseek-v4-flash",                                # 模型名称
+  "deepseek_api_key": "",                                      # DeepSeek API Key
+  "deepseek_api_base": "https://api.deepseek.com/v1",         # DeepSeek API 地址
  "minimax_api_key": "",                                      # MiniMax API Key
  "zhipu_ai_api_key": "",                                     # 智谱 GLM API Key
  "moonshot_api_key": "",                                     # Kimi/Moonshot API Key
@@ -190,8 +194,6 @@ cow install-browser
  "claude_api_base": "https://api.anthropic.com/v1",          # Claude API 地址，修改可接入三方代理平台
  "gemini_api_key": "",                                       # Gemini API Key
  "gemini_api_base": "https://generativelanguage.googleapis.com", # Gemini API 地址
-  "deepseek_api_key": "",                                      # DeepSeek API Key
-  "deepseek_api_base": "https://api.deepseek.com/v1",         # DeepSeek API 地址，可修改为第三方代理
  "open_ai_api_key": "",                                      # OpenAI API Key
  "open_ai_api_base": "https://api.openai.com/v1",            # OpenAI API 地址
  "linkai_api_key": "",                                       # LinkAI API Key
@@ -206,7 +208,7 @@ cow install-browser
  "agent_max_context_tokens": 50000,                          # Agent 模式下最大上下文 tokens，超出将自动智能压缩处理
  "agent_max_context_turns": 20,                              # Agent 模式下最大上下文记忆轮次，一问一答为一轮，超出后智能压缩处理
  "agent_max_steps": 20,                                      # Agent 模式下单次任务的最大决策步数，超出后将停止继续调用工具
-  "enable_thinking": true                                     # 是否启用深度思考，开启后 Web 端展示模型推理过程，关闭后可加速响应
+  "enable_thinking": false                                    # 是否启用深度思考模式
 }
 ```

@@ -224,7 +226,7 @@ cow install-browser
 <details>
 <summary>2. 其他配置</summary>

-+ `model`: 模型名称，Agent 模式下推荐使用 `MiniMax-M2.7`、`glm-5-turbo`、`kimi-k2.5`、`qwen3.6-plus`、`claude-sonnet-4-6`、`gemini-3.1-pro-preview`，全部模型名称参考[common/const.py](https://github.com/zhayujie/CowAgent/blob/master/common/const.py)文件
+ `model`: 模型名称，Agent 模式下推荐使用 `deepseek-v4-flash`、`MiniMax-M2.7`、`glm-5.1`、`kimi-k2.6`、`qwen3.6-plus`、`claude-sonnet-4-6`、`gemini-3.1-pro-preview`，全部模型名称参考[common/const.py](https://github.com/zhayujie/CowAgent/blob/master/common/const.py)文件
 + `character_desc`：普通对话模式下的机器人系统提示词。在 Agent 模式下该配置不生效，由工作空间中的文件内容构成。
 + `subscribe_msg`：订阅消息，公众号和企业微信 channel 中请填写，当被订阅时会自动回复， 可使用特殊占位符。目前支持的占位符有{trigger_prefix}，在程序中它会自动替换成 bot 的触发词。
 </details>
@@ -312,44 +314,36 @@ sudo docker logs -f chatgpt-on-wechat
 推荐通过 Web 控制台在线管理模型配置，无需手动编辑文件，详见 [模型文档](https://docs.cowagent.ai/models)。以下是手动修改 `config.json` 配置模型的说明：

 <details>
-<summary>OpenAI</summary>
+<summary>DeepSeek</summary>

-1. API Key 创建：在 [OpenAI平台](https://platform.openai.com/api-keys) 创建 API Key
+1. API Key 创建：在 [DeepSeek 平台](https://platform.deepseek.com/api_keys) 创建 API Key

 2. 填写配置

-```json
-{
-    "model": "gpt-5.4",
-    "open_ai_api_key": "YOUR_API_KEY",
-    "open_ai_api_base": "https://api.openai.com/v1",
-    "bot_type": "openai"
-}
-```
-
- - `model`: 与 OpenAI 接口的 [model参数](https://platform.openai.com/docs/models) 一致，支持包括 gpt-5.4、gpt-5.4-mini、gpt-5.4-nano、o 系列、gpt-4.1 等模型，Agent 模式推荐使用  `gpt-5.4`、`gpt-5.4-mini`
- - `open_ai_api_base`: 如果需要接入第三方代理接口，可通过修改该参数进行接入
- - `bot_type`: 使用 OpenAI 相关模型时无需填写。当使用第三方代理接口接入 Claude 等非 OpenAI 官方模型时，该参数设为 `openai`
-</details>
-
-<details>
-<summary>LinkAI</summary>
-
-1. API Key 创建：在 [LinkAI平台](https://link-ai.tech/console/interface) 创建 API Key 
-
-2. 填写配置
+方式一：官方接入（推荐）：

 ```json
 {
-    "model": "gpt-5.4-mini",
-    "use_linkai": true,
-    "linkai_api_key": "YOUR API KEY"
+    "model": "deepseek-v4-flash",
+    "deepseek_api_key": "sk-xxxxxxxxxxx"
+}
+```
+
+ - `model`: 推荐填写 `deepseek-v4-flash`、`deepseek-v4-pro`
+ - `deepseek_api_key`: DeepSeek 平台的 API Key
+ - `deepseek_api_base`: 可选，默认为 `https://api.deepseek.com/v1`，可修改为第三方代理地址
+
+方式二：OpenAI 兼容方式接入：
+
+```json
+{
+    "model": "deepseek-v4-flash",
+    "bot_type": "openai",
+    "open_ai_api_key": "sk-xxxxxxxxxxx",
+    "open_ai_api_base": "https://api.deepseek.com/v1"
 }
 ```

-+ `use_linkai`: 是否使用 LinkAI 接口，默认关闭，设置为 true 后可对接 LinkAI 平台的模型，并使用知识库、工作流、数据库、插件等丰富的 Agent 技能
-+ `linkai_api_key`: LinkAI 平台的 API Key，可在 [控制台](https://link-ai.tech/console/interface) 中创建
-+ `model`: [模型列表](https://link-ai.tech/console/models)中的全部模型均可使用
 </details>

 <details>
@@ -381,6 +375,56 @@ sudo docker logs -f chatgpt-on-wechat
 - `open_ai_api_key`: MiniMax 平台的 API-KEY
 </details>

+<details>
+<summary>Claude</summary>
+
+1. API Key 创建：在 [Claude控制台](https://console.anthropic.com/settings/keys) 创建 API Key
+
+2. 填写配置
+
+```json
+{
+    "model": "claude-sonnet-4-6",
+    "claude_api_key": "YOUR_API_KEY"
+}
+```
+ - `model`: 参考 [官方模型ID](https://docs.anthropic.com/en/docs/about-claude/models/overview#model-aliases) ，支持 `claude-sonnet-4-6、claude-opus-4-7、claude-opus-4-6、claude-sonnet-4-5、claude-sonnet-4-0、claude-opus-4-0、claude-3-5-sonnet-latest` 等
+</details>
+
+<details>
+<summary>Gemini</summary>
+
+API Key 创建：在 [控制台](https://aistudio.google.com/app/apikey?hl=zh-cn) 创建 API Key ，配置如下
+```json
+{
+    "model": "gemini-3.1-flash-lite-preview",
+    "gemini_api_key": ""
+}
+```
+ - `model`: 参考[官方文档-模型列表](https://ai.google.dev/gemini-api/docs/models?hl=zh-cn)，支持 `gemini-3.1-flash-lite-preview、gemini-3.1-pro-preview、gemini-3-flash-preview、gemini-3-pro-preview` 等
+</details>
+
+<details>
+<summary>OpenAI</summary>
+
+1. API Key 创建：在 [OpenAI平台](https://platform.openai.com/api-keys) 创建 API Key
+
+2. 填写配置
+
+```json
+{
+    "model": "gpt-5.4",
+    "open_ai_api_key": "YOUR_API_KEY",
+    "open_ai_api_base": "https://api.openai.com/v1",
+    "bot_type": "openai"
+}
+```
+
+ - `model`: 与 OpenAI 接口的 [model参数](https://platform.openai.com/docs/models) 一致，支持包括 gpt-5.4、gpt-5.4-mini、gpt-5.4-nano、o 系列、gpt-4.1 等模型，Agent 模式推荐使用  `gpt-5.4`、`gpt-5.4-mini`
+ - `open_ai_api_base`: 如果需要接入第三方代理接口，可通过修改该参数进行接入
+ - `bot_type`: 使用 OpenAI 相关模型时无需填写。当使用第三方代理接口接入 Claude 等非 OpenAI 官方模型时，该参数设为 `openai`
+</details>
+
 <details>
 <summary>智谱AI (GLM)</summary>

@@ -388,24 +432,24 @@ sudo docker logs -f chatgpt-on-wechat

 ```json
 {
-  "model": "glm-5-turbo",
+  "model": "glm-5.1",
  "zhipu_ai_api_key": ""
 }
 ```
- - `model`: 可填 `glm-5-turbo、glm-5、glm-4.7、glm-4-plus、glm-4-flash、glm-4-air、glm-4-airx、glm-4-long` 等, 参考 [glm 系列模型编码](https://bigmodel.cn/dev/api/normal-model/glm-4)
+ - `model`: 可填 `glm-5.1、glm-5-turbo、glm-5、glm-4.7、glm-4-plus、glm-4-flash、glm-4-air、glm-4-airx、glm-4-long` 等, 参考 [glm 系列模型编码](https://bigmodel.cn/dev/api/normal-model/glm-4)
 - `zhipu_ai_api_key`: 智谱AI 平台的 API KEY，在 [控制台](https://www.bigmodel.cn/usercenter/proj-mgmt/apikeys) 创建

 方式二：OpenAI 兼容方式接入，配置如下：
 ```json
 {
  "bot_type": "openai",
-  "model": "glm-5-turbo",
+  "model": "glm-5.1",
  "open_ai_api_base": "https://open.bigmodel.cn/api/paas/v4",
  "open_ai_api_key": ""
 }
 ```
 - `bot_type`: OpenAI 兼容方式
- `model`: 可填 `glm-5-turbo、glm-5、glm-4.7、glm-4-plus、glm-4-flash、glm-4-air、glm-4-airx、glm-4-long` 等
+- `model`: 可填 `glm-5.1、glm-5-turbo、glm-5、glm-4.7、glm-4-plus、glm-4-flash、glm-4-air、glm-4-airx、glm-4-long` 等
 - `open_ai_api_base`: 智谱AI 平台的 BASE URL
 - `open_ai_api_key`: 智谱AI 平台的 API KEY
 </details>
@@ -439,35 +483,6 @@ sudo docker logs -f chatgpt-on-wechat
 - `open_ai_api_key`: 通义千问的 API-KEY
 </details>

-<details>
-<summary>Kimi (Moonshot)</summary>
-
-方式一：官方接入，配置如下：
-
-```json
-{
-    "model": "kimi-k2.5",
-    "moonshot_api_key": ""
-}
-```
- - `model`: 可填写 `kimi-k2.5、kimi-k2、moonshot-v1-8k、moonshot-v1-32k、moonshot-v1-128k`
- - `moonshot_api_key`: Moonshot 的 API-KEY，在 [控制台](https://platform.moonshot.cn/console/api-keys) 创建
- 
-方式二：OpenAI 兼容方式接入，配置如下：
-```json
-{
-  "bot_type": "openai",
-  "model": "kimi-k2.5",
-  "open_ai_api_base": "https://api.moonshot.cn/v1",
-  "open_ai_api_key": ""
-}
-```
- `bot_type`: OpenAI 兼容方式
- `model`: 可填写 `kimi-k2.5、kimi-k2、moonshot-v1-8k、moonshot-v1-32k、moonshot-v1-128k`
- `open_ai_api_base`: Moonshot 的 BASE URL
- `open_ai_api_key`: Moonshot 的 API-KEY
-</details>
-
 <details>
 <summary>豆包 (Doubao)</summary>

@@ -487,67 +502,74 @@ sudo docker logs -f chatgpt-on-wechat
 </details>

 <details>
-<summary>Claude</summary>
+<summary>Kimi (Moonshot)</summary>

-1. API Key 创建：在 [Claude控制台](https://console.anthropic.com/settings/keys) 创建 API Key
+方式一：官方接入，配置如下：
+
+```json
+{
+    "model": "kimi-k2.6",
+    "moonshot_api_key": ""
+}
+```
+ - `model`: 可填写 `kimi-k2.6、kimi-k2.5、kimi-k2、moonshot-v1-8k、moonshot-v1-32k、moonshot-v1-128k`
+ - `moonshot_api_key`: Moonshot 的 API-KEY，在 [控制台](https://platform.moonshot.cn/console/api-keys) 创建
+
+方式二：OpenAI 兼容方式接入，配置如下：
+```json
+{
+  "bot_type": "openai",
+  "model": "kimi-k2.6",
+  "open_ai_api_base": "https://api.moonshot.cn/v1",
+  "open_ai_api_key": ""
+}
+```
+- `bot_type`: OpenAI 兼容方式
+- `model`: 可填写 `kimi-k2.6、kimi-k2.5、kimi-k2、moonshot-v1-8k、moonshot-v1-32k、moonshot-v1-128k`
+- `open_ai_api_base`: Moonshot 的 BASE URL
+- `open_ai_api_key`: Moonshot 的 API-KEY
+</details>
+
+<details>
+<summary>ModelScope</summary>
+
+```json
+{
+  "bot_type": "modelscope",
+  "model": "Qwen/QwQ-32B",
+  "modelscope_api_key": "your_api_key",
+  "modelscope_base_url": "https://api-inference.modelscope.cn/v1/chat/completions",
+  "text_to_image": "MusePublic/489_ckpt_FLUX_1"
+}
+```
+
+- `bot_type`: modelscope 接口格式
+- `model`: 参考[模型列表](https://www.modelscope.cn/models?filter=inference_type&page=1)
+- `modelscope_api_key`: 参考 [官方文档-访问令牌](https://modelscope.cn/docs/accounts/token) ，在 [控制台](https://modelscope.cn/my/myaccesstoken)
+- `modelscope_base_url`: modelscope 平台的 BASE URL
+- `text_to_image`: 图像生成模型，参考[模型列表](https://www.modelscope.cn/models?filter=inference_type&page=1)
+</details>
+
+<details>
+<summary>LinkAI</summary>
+
+1. API Key 创建：在 [LinkAI平台](https://link-ai.tech/console/interface) 创建 API Key

 2. 填写配置

 ```json
 {
-    "model": "claude-sonnet-4-6",
-    "claude_api_key": "YOUR_API_KEY"
+    "model": "gpt-5.4-mini",
+    "use_linkai": true,
+    "linkai_api_key": "YOUR API KEY"
 }
 ```
- - `model`: 参考 [官方模型ID](https://docs.anthropic.com/en/docs/about-claude/models/overview#model-aliases) ，支持 `claude-sonnet-4-6、claude-opus-4-6、claude-sonnet-4-5、claude-sonnet-4-0、claude-opus-4-0、claude-3-5-sonnet-latest` 等
+
+ `use_linkai`: 是否使用 LinkAI 接口，默认关闭，设置为 true 后可对接 LinkAI 平台的模型，并使用知识库、工作流、数据库、插件等丰富的 Agent 技能
+ `linkai_api_key`: LinkAI 平台的 API Key，可在 [控制台](https://link-ai.tech/console/interface) 中创建
+ `model`: [模型列表](https://link-ai.tech/console/models)中的全部模型均可使用
 </details>

-<details>
-<summary>Gemini</summary>
-
-API Key 创建：在 [控制台](https://aistudio.google.com/app/apikey?hl=zh-cn) 创建 API Key ，配置如下
-```json
-{
-    "model": "gemini-3.1-flash-lite-preview",
-    "gemini_api_key": ""
-}
-```
- - `model`: 参考[官方文档-模型列表](https://ai.google.dev/gemini-api/docs/models?hl=zh-cn)，支持 `gemini-3.1-flash-lite-preview、gemini-3.1-pro-preview、gemini-3-flash-preview、gemini-3-pro-preview` 等
-</details>
-
-<details>
-<summary>DeepSeek</summary>
-
-1. API Key 创建：在 [DeepSeek 平台](https://platform.deepseek.com/api_keys) 创建 API Key 
-
-2. 填写配置
-
-方式一：官方接入（推荐）：
-
-```json
-{
-    "model": "deepseek-chat",
-    "deepseek_api_key": "sk-xxxxxxxxxxx"
-}
-```
-
- - `model`: 可填 `deepseek-chat、deepseek-reasoner`，分别对应的是 DeepSeek-V3.2（非思考模式）和 DeepSeek-R1（思考模式）
- - `deepseek_api_key`: DeepSeek 平台的 API Key
- - `deepseek_api_base`: 可选，默认为 `https://api.deepseek.com/v1`，可修改为第三方代理地址
-
-方式二：OpenAI 兼容方式接入：
-
-```json
-{
-    "model": "deepseek-chat",
-    "bot_type": "openai",
-    "open_ai_api_key": "sk-xxxxxxxxxxx",
-    "open_ai_api_base": "https://api.deepseek.com/v1"
-}
-```
-
- </details>
-
 <details>
 <summary>Azure</summary>

@@ -640,26 +662,6 @@ API Key 创建：在 [控制台](https://aistudio.google.com/app/apikey?hl=zh-cn
 - `open_ai_api_key`: 讯飞星火平台的[APIPassword](https://console.xfyun.cn/services/bm3) ，因模型而已
 </details>

-<details>
-<summary>ModelScope</summary>
-
-```json
-{
-  "bot_type": "modelscope",
-  "model": "Qwen/QwQ-32B",
-  "modelscope_api_key": "your_api_key",
-  "modelscope_base_url": "https://api-inference.modelscope.cn/v1/chat/completions",
-  "text_to_image": "MusePublic/489_ckpt_FLUX_1"
-}
-```
-
- `bot_type`: modelscope 接口格式
- `model`: 参考[模型列表](https://www.modelscope.cn/models?filter=inference_type&page=1)
- `modelscope_api_key`: 参考 [官方文档-访问令牌](https://modelscope.cn/docs/accounts/token) ，在 [控制台](https://modelscope.cn/my/myaccesstoken) 
- `modelscope_base_url`: modelscope 平台的 BASE URL
- `text_to_image`: 图像生成模型，参考[模型列表](https://www.modelscope.cn/models?filter=inference_type&page=1)
-</details>
-
 <details>
 <summary>Coding Plan</summary>

--- a/agent/chat/session_service.py
+++ b/agent/chat/session_service.py
@@ -0,0 +1,241 @@
+"""
+SessionService - Manages multi-session lifecycle for both web channel and cloud client.
+
+Provides a unified interface for listing, deleting, renaming, clearing context,
+and generating AI titles for conversation sessions. Backed by ConversationStore
+(SQLite) and AgentBridge (in-memory agent instances).
+"""
+
+import re
+from typing import Optional
+
+from common.log import logger
+
+
+def _truncate_fallback_title(user_message: str, max_len: int = 30) -> str:
+    """Pick the first non-empty line of the user message and truncate it."""
+    if not user_message:
+        return "New Chat"
+    first_line = ""
+    for line in user_message.splitlines():
+        line = line.strip()
+        if line:
+            first_line = line
+            break
+    if not first_line:
+        return "New Chat"
+    if len(first_line) > max_len:
+        first_line = first_line[:max_len].rstrip() + "..."
+    return first_line
+
+
+def generate_session_title(user_message: str, assistant_reply: str = "") -> str:
+    """
+    Generate a short session title by calling the current bot's reply_text.
+    Falls back to the first line of the user message if the LLM call fails
+    or returns an obvious error sentinel.
+    """
+    fallback = _truncate_fallback_title(user_message)
+    try:
+        from bridge.bridge import Bridge
+        from models.session_manager import Session
+        bot = Bridge().get_bot("chat")
+
+        prompt_parts = [f"User: {user_message[:300]}"]
+        if assistant_reply:
+            prompt_parts.append(f"Assistant: {assistant_reply[:300]}")
+
+        session = Session("__title_gen__", system_prompt="")
+        session.messages = [
+            {"role": "user", "content": (
+                "Generate a very short title (max 15 characters for Chinese, max 6 words for English) "
+                "summarizing this conversation. Return ONLY the title text, nothing else.\n\n"
+                + "\n".join(prompt_parts)
+            )}
+        ]
+
+        result = bot.reply_text(session) or {}
+        # When bots fail (network error, auth error, rate limit, etc.) they
+        # typically return completion_tokens=0 with a sentinel content like
+        # "请再问我一次吧" / "我现在有点累了". Treat that as failure.
+        completion_tokens = result.get("completion_tokens", 0) or 0
+        raw = (result.get("content") or "").strip()
+        if completion_tokens <= 0:
+            logger.warning(
+                f"[SessionService] Title generation got empty completion "
+                f"(completion_tokens={completion_tokens}, content='{raw[:50]}'), "
+                f"using fallback")
+            return fallback
+
+        title = re.sub(r'<think>.*?</think>', '', raw, flags=re.DOTALL).strip().strip('"\'')
+        logger.info(f"[SessionService] Title generation result: '{title}' (len={len(title)})")
+        if title and len(title) <= 50:
+            return title
+    except Exception as e:
+        logger.warning(f"[SessionService] Title generation failed: {e}")
+    return fallback
+
+
+class SessionService:
+    """
+    High-level service for session lifecycle management.
+
+    Usage:
+        svc = SessionService()
+        result = svc.dispatch("list", {"channel_type": "web", "page": 1})
+    """
+
+    def _get_store(self):
+        from agent.memory import get_conversation_store
+        return get_conversation_store()
+
+    def _remove_agent(self, session_id: str):
+        """Remove the in-memory Agent instance for a session if it exists."""
+        try:
+            from bridge.bridge import Bridge
+            ab = Bridge().get_agent_bridge()
+            if session_id in ab.agents:
+                del ab.agents[session_id]
+                logger.info(f"[SessionService] Removed agent instance: {session_id}")
+        except Exception:
+            pass
+
+    @staticmethod
+    def _normalize_sid(session_id: str) -> str:
+        if session_id and not session_id.startswith("session_"):
+            return f"session_{session_id}"
+        return session_id
+
+    # ------------------------------------------------------------------
+    # actions
+    # ------------------------------------------------------------------
+    def list_sessions(self, channel_type: Optional[str] = None,
+                      page: int = 1, page_size: int = 50) -> dict:
+        store = self._get_store()
+        return store.list_sessions(
+            channel_type=channel_type,
+            page=page,
+            page_size=page_size,
+        )
+
+    def delete_session(self, session_id: str) -> None:
+        if not session_id:
+            raise ValueError("session_id required")
+        session_id = self._normalize_sid(session_id)
+
+        store = self._get_store()
+        store.clear_session(session_id)
+        self._remove_agent(session_id)
+        logger.info(f"[SessionService] Session deleted: {session_id}")
+
+    def rename_session(self, session_id: str, title: str) -> None:
+        if not session_id:
+            raise ValueError("session_id required")
+        if not title:
+            raise ValueError("title required")
+        session_id = self._normalize_sid(session_id)
+
+        store = self._get_store()
+        found = store.rename_session(session_id, title)
+        if not found:
+            raise ValueError("session not found")
+
+    def clear_context(self, session_id: str) -> int:
+        """
+        Set context boundary. Returns the new context_start_seq value.
+        """
+        if not session_id:
+            raise ValueError("session_id required")
+        session_id = self._normalize_sid(session_id)
+
+        store = self._get_store()
+        new_seq = store.clear_context(session_id)
+        self._remove_agent(session_id)
+        return new_seq
+
+    def gen_title(self, session_id: str, user_message: str,
+                  assistant_reply: str = "") -> str:
+        """
+        Generate an AI title and persist it. Returns the generated title.
+        """
+        if not session_id:
+            raise ValueError("session_id required")
+        if not user_message:
+            raise ValueError("user_message required")
+        session_id = self._normalize_sid(session_id)
+
+        title = generate_session_title(user_message, assistant_reply)
+
+        store = self._get_store()
+        updated = store.rename_session(session_id, title)
+        logger.info(f"[SessionService] Title set: sid={session_id}, "
+                     f"title='{title}', db_updated={updated}")
+        return title
+
+    # ------------------------------------------------------------------
+    # dispatch — single entry point for protocol messages
+    # ------------------------------------------------------------------
+    def dispatch(self, action: str, payload: Optional[dict] = None) -> dict:
+        """
+        Dispatch a session management action and return a protocol-compatible
+        response dict.
+
+        Action names use a ``*_session`` / session-prefixed convention so they
+        can coexist with history actions (e.g. ``query``) on the same HISTORY
+        message channel without ambiguity.
+
+        Supported actions:
+          - list_sessions: list sessions with pagination
+          - delete_session: delete a session
+          - rename_session: rename a session title
+          - clear_context: set context boundary
+          - generate_title: AI-generate a session title
+
+        :param action: one of the above action names
+        :param payload: action-specific payload
+        :return: dict with action, code, message, payload
+        """
+        payload = payload or {}
+        try:
+            if action == "list_sessions":
+                result = self.list_sessions(
+                    channel_type=payload.get("channel_type"),
+                    page=int(payload.get("page", 1)),
+                    page_size=int(payload.get("page_size", 50)),
+                )
+                return {"action": action, "code": 200, "message": "success", "payload": result}
+
+            elif action == "delete_session":
+                self.delete_session(payload.get("session_id", ""))
+                return {"action": action, "code": 200, "message": "success", "payload": None}
+
+            elif action == "rename_session":
+                self.rename_session(
+                    payload.get("session_id", ""),
+                    payload.get("title", "").strip(),
+                )
+                return {"action": action, "code": 200, "message": "success", "payload": None}
+
+            elif action == "clear_context":
+                new_seq = self.clear_context(payload.get("session_id", ""))
+                return {"action": action, "code": 200, "message": "success",
+                        "payload": {"context_start_seq": new_seq}}
+
+            elif action == "generate_title":
+                title = self.gen_title(
+                    payload.get("session_id", ""),
+                    payload.get("user_message", ""),
+                    payload.get("assistant_reply", ""),
+                )
+                return {"action": action, "code": 200, "message": "success",
+                        "payload": {"title": title}}
+
+            else:
+                return {"action": action, "code": 400,
+                        "message": f"unknown action: {action}", "payload": None}
+
+        except ValueError as e:
+            return {"action": action, "code": 400, "message": str(e), "payload": None}
+        except Exception as e:
+            logger.error(f"[SessionService] dispatch error: action={action}, error={e}")
+            return {"action": action, "code": 500, "message": str(e), "payload": None}
--- a/agent/knowledge/service.py
+++ b/agent/knowledge/service.py
@@ -34,7 +34,8 @@ class KnowledgeService:
    # ------------------------------------------------------------------
    def list_tree(self) -> dict:
        """
-        Return the knowledge directory tree grouped by category.
+        Return the knowledge directory tree grouped by category,
+        supporting arbitrarily nested sub-directories.

        Returns::

@@ -44,10 +45,20 @@ class KnowledgeService:
                        "dir": "concepts",
                        "files": [
                            {"name": "moe.md", "title": "MoE", "size": 1234},
-                            ...
+                        ],
+                        "children": []
+                    },
+                    {
+                        "dir": "platform",
+                        "files": [],
+                        "children": [
+                            {
+                                "dir": "analysis",
+                                "files": [{"name": "perf.md", ...}],
+                                "children": []
+                            }
                        ]
                    },
-                    ...
                ],
                "stats": {"pages": 15, "size": 32768},
                "enabled": true
@@ -56,37 +67,48 @@ class KnowledgeService:
        if not os.path.isdir(self.knowledge_dir):
            return {"tree": [], "stats": {"pages": 0, "size": 0}, "enabled": conf().get("knowledge", True)}

-        tree = []
-        total_files = 0
-        total_bytes = 0
-        for name in sorted(os.listdir(self.knowledge_dir)):
-            full = os.path.join(self.knowledge_dir, name)
-            if not os.path.isdir(full) or name.startswith("."):
-                continue
-            files = []
-            for fname in sorted(os.listdir(full)):
-                if fname.endswith(".md") and not fname.startswith("."):
-                    fpath = os.path.join(full, fname)
-                    size = os.path.getsize(fpath)
-                    total_files += 1
-                    total_bytes += size
-                    title = fname.replace(".md", "")
-                    try:
-                        with open(fpath, "r", encoding="utf-8") as f:
-                            first_line = f.readline().strip()
-                        if first_line.startswith("# "):
-                            title = first_line[2:].strip()
-                    except Exception:
-                        pass
-                    files.append({"name": fname, "title": title, "size": size})
-            tree.append({"dir": name, "files": files})
+        stats = {"pages": 0, "size": 0}
+        root_files, tree = self._scan_dir(self.knowledge_dir, stats, is_root=True)

        return {
+            "root_files": root_files,
            "tree": tree,
-            "stats": {"pages": total_files, "size": total_bytes},
+            "stats": stats,
            "enabled": conf().get("knowledge", True),
        }

+    def _scan_dir(self, dir_path: str, stats: dict, is_root: bool = False) -> tuple:
+        """
+        Recursively scan a directory.
+
+        :return: (files, children) where files is a list of .md file dicts
+                 in this directory and children is a list of sub-directory nodes.
+        """
+        files = []
+        children = []
+        for name in sorted(os.listdir(dir_path)):
+            if name.startswith("."):
+                continue
+            full = os.path.join(dir_path, name)
+            if os.path.isdir(full):
+                sub_files, sub_children = self._scan_dir(full, stats)
+                children.append({"dir": name, "files": sub_files, "children": sub_children})
+            elif name.endswith(".md"):
+                size = os.path.getsize(full)
+                if not is_root:
+                    stats["pages"] += 1
+                    stats["size"] += size
+                title = name.replace(".md", "")
+                try:
+                    with open(full, "r", encoding="utf-8") as f:
+                        first_line = f.readline().strip()
+                    if first_line.startswith("# "):
+                        title = first_line[2:].strip()
+                except Exception:
+                    pass
+                files.append({"name": name, "title": title, "size": size})
+        return files, children
+
    # ------------------------------------------------------------------
    # read — single file content
    # ------------------------------------------------------------------
--- a/agent/memory/conversation_store.py
+++ b/agent/memory/conversation_store.py
@@ -139,6 +139,7 @@ def _extract_tool_results(content: Any) -> Dict[str, str]:

 def _group_into_display_turns(
    rows: List[tuple],
+    include_thinking: bool = True,
 ) -> List[Dict[str, Any]]:
    """
    Convert raw (role, content_json, created_at) DB rows into display turns.
@@ -216,6 +217,8 @@ def _group_into_display_turns(
                            continue
                        btype = block.get("type")
                        if btype == "thinking":
+                            if not include_thinking:
+                                continue
                            txt = block.get("thinking", "").strip()
                            if txt:
                                steps.append({"type": "thinking", "content": txt})
@@ -601,9 +604,17 @@ class ConversationStore:
            finally:
                conn.close()

+        # Honour the current enable_thinking switch when building display turns
+        # so that toggling it off hides previously-saved thinking blocks too.
+        try:
+            from config import conf
+            include_thinking = bool(conf().get("enable_thinking", False))
+        except Exception:
+            include_thinking = False
+
        # Strip seq for display grouping, but record max seq per visible user group
        plain_rows = [(role, content, created_at) for _seq, role, content, created_at in rows]
-        visible = _group_into_display_turns(plain_rows)
+        visible = _group_into_display_turns(plain_rows, include_thinking=include_thinking)

        # Build a mapping: find the seq of each visible user message to annotate context boundary.
        # Walk through rows to find visible user message seqs in order.
--- a/agent/memory/summarizer.py
+++ b/agent/memory/summarizer.py
@@ -57,6 +57,7 @@ MEMORY.md 会注入每次对话的系统提示词中，因此必须保持精炼
 - **清理无效**：删除临时性记录、空白条目、格式残留、无意义、重复内容等
 - **删除冗余**：已被更精炼表述涵盖的旧条目应删除，避免信息重复
 - 每条一行，用 "- " 开头，不带日期前缀
+- 可用 "## 标题" 对相关条目分组，使结构更清晰
 - 目标：控制在 50 条以内，每条尽量一句话概括

 ### Part 2: 梦境日记（[DREAM]）
--- a/agent/protocol/agent_stream.py
+++ b/agent/protocol/agent_stream.py
@@ -13,6 +13,37 @@ from agent.tools.base_tool import BaseTool, ToolResult
 from common.log import logger


+# Maximum number of characters of model "reasoning / thinking" content to persist
+# in conversation history. The full reasoning is still streamed to the UI in real
+# time (subject to its own SSE / rendering limits); this bound only controls what
+# is stored in DB and replayed in history. Long reasoning is not useful for later
+# context (the LLM never sees thinking blocks anyway) and bloats DB.
+# Keep aligned with the frontend REASONING_RENDER_CAP and the SSE
+# MAX_REASONING_STREAM_CHARS so that storage / stream / display all match.
+MAX_STORED_REASONING_CHARS = 4 * 1024  # 4 KB
+
+# Marker inserted between head and tail when reasoning is truncated.
+_REASONING_TRUNCATE_MARKER = "\n\n... [reasoning truncated, {omitted} chars omitted] ...\n\n"
+
+
+def _truncate_reasoning_for_storage(text: str) -> str:
+    """Trim long reasoning to head + tail with an omission marker.
+
+    Keeps the first and last halves of MAX_STORED_REASONING_CHARS so both the
+    initial chain-of-thought and the final conclusions are preserved for UI
+    replay, without storing the entire (often very large) middle.
+    """
+    if not text:
+        return text
+    if len(text) <= MAX_STORED_REASONING_CHARS:
+        return text
+    half = MAX_STORED_REASONING_CHARS // 2
+    head = text[:half]
+    tail = text[-half:]
+    omitted = len(text) - len(head) - len(tail)
+    return head + _REASONING_TRUNCATE_MARKER.format(omitted=omitted) + tail
+
+
 class AgentStreamExecutor:
    """
    Agent Stream Executor
@@ -79,22 +110,47 @@ class AgentStreamExecutor:
                logger.error(f"Event callback error: {e}")
    
    def _is_thinking_enabled(self) -> bool:
+        """Whether deep-thinking mode is on at the model layer.
+
+        Mirrors the global toggle used by ``bridge.agent_bridge`` when deciding
+        whether to send ``thinking={"type": "enabled"}`` to the model. Used for
+        logging and reasoning-update event emission across all channels.
+        """
+        from config import conf
+        return bool(conf().get("enable_thinking", False))
+
+    def _should_render_thinking_inline(self) -> bool:
+        """Whether ``<think>...</think>`` blocks embedded directly in ``content``
+        (MiniMax, some third-party proxies) should be surfaced to the channel.
+
+        Only the Web console can render them in a collapsible panel. IM channels
+        (WeChat/WeCom/DingTalk/Feishu) must strip them, otherwise users see raw
+        XML tags in their chat.
+        """
        from config import conf
        channel_type = getattr(self.model, 'channel_type', '') or ''
-        return conf().get("enable_thinking", True) and channel_type == 'web'
+        return conf().get("enable_thinking", False) and channel_type == 'web'

    def _filter_think_tags(self, text: str) -> str:
        """
-        Remove <think> and </think> tags but keep the content inside.
-        Some LLM providers (e.g., MiniMax) may return thinking process wrapped in <think> tags.
-        We only remove the tags themselves, keeping the actual thinking content.
+        Handle <think>...</think> blocks in content returned by some LLM providers
+        (e.g., MiniMax).
+
+        - When inline thinking rendering is allowed (Web + thinking enabled):
+          remove only the tags, keep the content inside.
+        - Otherwise (IM channels, or thinking disabled globally): remove both
+          the tags and the content entirely.
        """
        if not text:
            return text
        import re
-        # Remove only the <think> and </think> tags, keep the content
-        text = re.sub(r'<think>', '', text)
-        text = re.sub(r'</think>', '', text)
+        if self._should_render_thinking_inline():
+            text = re.sub(r'<think>', '', text)
+            text = re.sub(r'</think>', '', text)
+        else:
+            text = re.sub(r'<think>[\s\S]*?</think>', '', text)
+            # Also strip unclosed <think> tag at the end (streaming partial)
+            text = re.sub(r'<think>[\s\S]*$', '', text)
        return text

    def _hash_args(self, args: dict) -> str:
@@ -185,8 +241,8 @@ class AgentStreamExecutor:
        # Log user message with model info
        
        thinking_enabled = self._is_thinking_enabled()
-        thinking_label = "💭 thinking" if thinking_enabled else "⚡ fast"
-        logger.info(f"🤖 {self.model.model} | {thinking_label} | 👤 {user_message}")        
+        thinking_label = " | 💭 thinking" if thinking_enabled else ""
+        logger.info(f"🤖 {self.model.model}{thinking_label} | 👤 {user_message}")        
        
        # Add user message (Claude format - use content blocks for consistency)
        self.messages.append({
@@ -235,6 +291,9 @@ class AgentStreamExecutor:
                        if turn > 1:
                            logger.info(f"[Agent] Requesting explicit response from LLM...")
                            
+                            # Remember position so we can remove the injected prompt later
+                            prompt_insert_idx = len(self.messages)
+                            
                            # 添加一条消息，明确要求回复用户
                            self.messages.append({
                                "role": "user",
@@ -248,8 +307,24 @@ class AgentStreamExecutor:
                            assistant_msg, tool_calls = self._call_llm_stream(retry_on_empty=False)
                            final_response = assistant_msg
                            
-                            # 如果还是空，才使用 fallback
-                            if not assistant_msg and not tool_calls:
+                            # Remove the injected prompt from history so it doesn't
+                            # appear as a user message in persisted conversations.
+                            # _call_llm_stream may have appended an assistant message
+                            # after the prompt, so we locate and remove only the prompt.
+                            if (prompt_insert_idx < len(self.messages)
+                                    and self.messages[prompt_insert_idx].get("role") == "user"):
+                                self.messages.pop(prompt_insert_idx)
+                                logger.debug("[Agent] Removed injected explicit-response prompt from message history")
+                            
+                            # If LLM responded with tool_calls instead of text, fall through
+                            # to the tool execution path below (don't break the loop).
+                            if tool_calls:
+                                logger.info(
+                                    f"[Agent] LLM returned tool_calls in explicit-response retry, "
+                                    f"continuing to execute tools instead of breaking"
+                                )
+                            elif not assistant_msg:
+                                # Still empty (no text and no tool_calls): use fallback
                                logger.warning(f"[Agent] Still empty after explicit request")
                                final_response = (
                                    "抱歉，我暂时无法生成回复。请尝试换一种方式描述你的需求，或稍后再试。"
@@ -264,20 +339,28 @@ class AgentStreamExecutor:
                    else:
                        logger.info(f"💭 {assistant_msg[:150]}{'...' if len(assistant_msg) > 150 else ''}")
                    
-                    logger.debug(f"✅ 完成 (无工具调用)")
-                    self._emit_event("turn_end", {
-                        "turn": turn,
-                        "has_tool_calls": False
-                    })
-                    break
+                    # If the explicit-response retry produced tool_calls, skip the break
+                    # and continue down to the tool execution branch in this same iteration.
+                    if not tool_calls:
+                        logger.debug(f"✅ 完成 (无工具调用)")
+                        self._emit_event("turn_end", {
+                            "turn": turn,
+                            "has_tool_calls": False
+                        })
+                        break

-                # Log tool calls with arguments
+                # Log tool calls with arguments (truncate long values like base64)
                tool_calls_str = []
                for tc in tool_calls:
-                    # Safely handle None or missing arguments
                    args = tc.get('arguments') or {}
                    if isinstance(args, dict):
-                        args_str = ', '.join([f"{k}={v}" for k, v in args.items()])
+                        parts = []
+                        for k, v in args.items():
+                            v_str = str(v)
+                            if len(v_str) > 200:
+                                v_str = v_str[:200] + f"...({len(v_str)} chars)"
+                            parts.append(f"{k}={v_str}")
+                        args_str = ', '.join(parts)
                        if args_str:
                            tool_calls_str.append(f"{tc['name']}({args_str})")
                        else:
@@ -631,8 +714,11 @@ class AgentStreamExecutor:
                                    tool_calls_buffer[index]["arguments"] += func["arguments"]

                    # Preserve _gemini_raw_parts for Gemini thoughtSignature round-trip
+                    # (direct Gemini: list of parts; LinkAI proxy: base64 string of JSON parts)
                    if "_gemini_raw_parts" in delta:
                        gemini_raw_parts = delta["_gemini_raw_parts"]
+                    elif isinstance(choice, dict) and choice.get("_gemini_raw_parts"):
+                        gemini_raw_parts = choice["_gemini_raw_parts"]

        except Exception as e:
            error_str = str(e)
@@ -799,9 +885,15 @@ class AgentStreamExecutor:
        assistant_msg = {"role": "assistant", "content": []}

        if full_reasoning:
+            stored_reasoning = _truncate_reasoning_for_storage(full_reasoning)
+            if len(stored_reasoning) < len(full_reasoning):
+                logger.info(
+                    f"[reasoning] truncated for storage: "
+                    f"{len(full_reasoning)} -> {len(stored_reasoning)} chars"
+                )
            assistant_msg["content"].append({
                "type": "thinking",
-                "thinking": full_reasoning
+                "thinking": stored_reasoning
            })

        if full_content:
--- a/agent/tools/bash/bash.py
+++ b/agent/tools/bash/bash.py
@@ -169,10 +169,16 @@ SAFETY:
                except Exception as retry_err:
                    logger.warning(f"[Bash] Retry failed: {retry_err}")

-            # Combine stdout and stderr
-            output = result.stdout
-            if result.stderr:
-                output += "\n" + result.stderr
+            # When command succeeds with stdout, keep output clean (stderr goes to server log only).
+            # When command fails or stdout is empty, include stderr so the agent can diagnose.
+            if result.returncode == 0 and result.stdout.strip():
+                output = result.stdout
+                if result.stderr:
+                    logger.info(f"[Bash] stderr (not forwarded): {result.stderr[:500]}")
+            else:
+                output = result.stdout
+                if result.stderr:
+                    output += "\n" + result.stderr

            # Check if we need to save full output to temp file
            temp_file_path = None
--- a/agent/tools/utils/truncate.py
+++ b/agent/tools/utils/truncate.py
@@ -8,7 +8,10 @@ Truncation is based on two independent limits - whichever is hit first wins:
 Never returns partial lines (except bash tail truncation edge case).
 """

-from typing import Dict, Any, Optional, Literal, Tuple
+from __future__ import annotations
+from typing import Dict, Any, Optional, Tuple, TYPE_CHECKING
+if TYPE_CHECKING:
+    from typing import Literal


 DEFAULT_MAX_LINES = 2000
--- a/agent/tools/vision/vision.py
+++ b/agent/tools/vision/vision.py
@@ -43,7 +43,7 @@ _MAIN_MODEL_PROVIDER_NAME = "MainModel"
 # Auto-discovered as fallback vision providers when their API key is configured.
 # OpenAI and LinkAI are handled separately (raw HTTP providers), so not listed here.
 _DISCOVERABLE_MODELS = [
-    ("moonshot_api_key", const.MOONSHOT, const.KIMI_K2_5, "Moonshot"),
+    ("moonshot_api_key", const.MOONSHOT, const.KIMI_K2_6, "Moonshot"),
    ("ark_api_key", const.DOUBAO, const.DOUBAO_SEED_2_PRO, "Doubao"),
    ("dashscope_api_key", const.QWEN_DASHSCOPE, const.QWEN36_PLUS, "DashScope"),
    ("claude_api_key", const.CLAUDEAPI, const.CLAUDE_4_6_SONNET, "Claude"),
--- a/app.py
+++ b/app.py
@@ -274,6 +274,39 @@ def sigterm_handler_wrap(_signo):
    signal.signal(_signo, func)


+def _sync_builtin_skills():
+    """Sync builtin skills from project skills/ to workspace skills/ on startup."""
+    import shutil
+    try:
+        workspace = conf().get("agent_workspace", "~/cow")
+        workspace = os.path.expanduser(workspace)
+        project_root = os.path.dirname(os.path.abspath(__file__))
+        builtin_dir = os.path.join(project_root, "skills")
+        custom_dir = os.path.join(workspace, "skills")
+
+        if not os.path.isdir(builtin_dir):
+            return
+
+        os.makedirs(custom_dir, exist_ok=True)
+        synced = 0
+        for name in os.listdir(builtin_dir):
+            src = os.path.join(builtin_dir, name)
+            if not os.path.isdir(src) or not os.path.isfile(os.path.join(src, "SKILL.md")):
+                continue
+            dst = os.path.join(custom_dir, name)
+            try:
+                if os.path.isdir(dst):
+                    shutil.rmtree(dst)
+                shutil.copytree(src, dst)
+                synced += 1
+            except Exception as e:
+                logger.warning(f"[App] Failed to sync builtin skill '{name}': {e}")
+        if synced:
+            logger.info(f"[App] Synced {synced} builtin skill(s) to workspace")
+    except Exception as e:
+        logger.warning(f"[App] Builtin skills sync failed: {e}")
+
+
 def run():
    global _channel_mgr
    try:
@@ -299,6 +332,9 @@ def run():
        if web_console_enabled and "web" not in channel_names:
            channel_names.append("web")

+        # Sync builtin skills to workspace before channels start
+        _sync_builtin_skills()
+
        logger.info(f"[App] Starting channels: {channel_names}")

        _channel_mgr = ChannelManager()
--- a/bridge/agent_bridge.py
+++ b/bridge/agent_bridge.py
@@ -167,13 +167,15 @@ class AgentLLMModel(LLMModel):
                if session_id:
                    kwargs['session_id'] = session_id

-                # Determine thinking: respect global config, then channel_type
+                # Thinking mode is a global toggle independent of the channel.
+                # IM channels (WeChat/WeCom/DingTalk/Feishu) won't render the
+                # reasoning trace, but still benefit from the higher answer
+                # quality the thinking pass produces.
                from config import conf
-                global_thinking = conf().get("enable_thinking", True)
-                if not global_thinking:
-                    kwargs['thinking'] = {"type": "disabled"}
-                else:
-                    kwargs['thinking'] = {"type": "enabled"} if channel_type == "web" else {"type": "disabled"}
+                kwargs['thinking'] = (
+                    {"type": "enabled"} if conf().get("enable_thinking", False)
+                    else {"type": "disabled"}
+                )

                response = self.bot.call_with_tools(**kwargs)
                return self._format_response(response)
@@ -220,13 +222,15 @@ class AgentLLMModel(LLMModel):
                if session_id:
                    kwargs['session_id'] = session_id

-                # Determine thinking: respect global config, then channel_type
+                # Thinking mode is a global toggle independent of the channel.
+                # IM channels (WeChat/WeCom/DingTalk/Feishu) won't render the
+                # reasoning trace, but still benefit from the higher answer
+                # quality the thinking pass produces.
                from config import conf
-                global_thinking = conf().get("enable_thinking", True)
-                if not global_thinking:
-                    kwargs['thinking'] = {"type": "disabled"}
-                else:
-                    kwargs['thinking'] = {"type": "enabled"} if channel_type == "web" else {"type": "disabled"}
+                kwargs['thinking'] = (
+                    {"type": "enabled"} if conf().get("enable_thinking", False)
+                    else {"type": "disabled"}
+                )

                stream = self.bot.call_with_tools(**kwargs)
                
@@ -446,7 +450,7 @@ class AgentBridge:
                        except Exception as e:
                            logger.warning(f"[AgentBridge] Failed to clear DB after recovery: {e}")
            
-            # Check if there are files to send (from read tool)
+            # Check if there are files to send (from send/read tool)
            if hasattr(agent, 'stream_executor') and hasattr(agent.stream_executor, 'files_to_send'):
                files_to_send = agent.stream_executor.files_to_send
                if files_to_send:
@@ -608,18 +612,55 @@ class AgentBridge:
            from config import conf
            if not conf().get("conversation_persistence", True):
                return
+            # When deep-thinking display is disabled, strip "thinking" content
+            # blocks before persisting so they don't resurface on history reload.
+            # The in-memory message list keeps them intact for this run's
+            # multi-turn LLM context.
+            thinking_enabled = bool(conf().get("enable_thinking", False))
        except Exception:
-            pass
+            thinking_enabled = False
+
+        messages_to_store = new_messages
+        if not thinking_enabled:
+            messages_to_store = self._strip_thinking_blocks(new_messages)
+
        try:
            from agent.memory import get_conversation_store
            get_conversation_store().append_messages(
-                session_id, new_messages, channel_type=channel_type
+                session_id, messages_to_store, channel_type=channel_type
            )
        except Exception as e:
            logger.warning(
                f"[AgentBridge] Failed to persist messages for session={session_id}: {e}"
            )

+    @staticmethod
+    def _strip_thinking_blocks(messages: list) -> list:
+        """Return a shallow copy of messages with assistant "thinking" blocks removed."""
+        cleaned = []
+        for msg in messages:
+            if not isinstance(msg, dict):
+                cleaned.append(msg)
+                continue
+            if msg.get("role") != "assistant":
+                cleaned.append(msg)
+                continue
+            content = msg.get("content")
+            if not isinstance(content, list):
+                cleaned.append(msg)
+                continue
+            filtered_blocks = [
+                b for b in content
+                if not (isinstance(b, dict) and b.get("type") == "thinking")
+            ]
+            if len(filtered_blocks) == len(content):
+                cleaned.append(msg)
+            else:
+                new_msg = dict(msg)
+                new_msg["content"] = filtered_blocks
+                cleaned.append(new_msg)
+        return cleaned
+
    def clear_session(self, session_id: str):
        """
        Clear a specific session's agent and conversation history
--- a/bridge/agent_initializer.py
+++ b/bridge/agent_initializer.py
@@ -548,14 +548,17 @@ class AgentInitializer:
        import threading

        def _daily_flush_loop():
+            import random
            while True:
                try:
                    now = datetime.datetime.now()
-                    target = now.replace(hour=23, minute=55, second=0, microsecond=0)
+                    jitter_min = random.randint(50, 55)
+                    jitter_sec = random.randint(0, 59)
+                    target = now.replace(hour=23, minute=jitter_min, second=jitter_sec, microsecond=0)
                    if target <= now:
                        target += datetime.timedelta(days=1)
                    wait_seconds = (target - now).total_seconds()
-                    logger.info(f"[DailyFlush] Next flush at {target.strftime('%Y-%m-%d %H:%M')} (in {wait_seconds/3600:.1f}h)")
+                    logger.info(f"[DailyFlush] Next flush at {target.strftime('%Y-%m-%d %H:%M:%S')} (in {wait_seconds/3600:.1f}h)")
                    time.sleep(wait_seconds)

                    self._flush_all_agents()
--- a/channel/chat_channel.py
+++ b/channel/chat_channel.py
@@ -297,8 +297,12 @@ class ChatChannel(Channel):
                logger.debug("[chat_channel] sending reply: {}, context: {}".format(reply, context))
                
                # 如果是文本回复，尝试提取并发送图片
-                if reply.type == ReplyType.TEXT:
+                # Web channel renders images/videos inline via renderMarkdown,
+                # so skip the extract-and-send step to avoid duplicate media.
+                if reply.type == ReplyType.TEXT and context.get("channel_type") != "web":
                    self._extract_and_send_images(reply, context)
+                elif reply.type == ReplyType.TEXT:
+                    self._send(reply, context)
                # 如果是图片回复但带有文本内容，先发文本再发图片
                elif reply.type == ReplyType.IMAGE_URL and hasattr(reply, 'text_content') and reply.text_content:
                    # 先发送文本
--- a/channel/web/chat.html
+++ b/channel/web/chat.html
@@ -213,6 +213,9 @@
            <div id="session-list" class="session-list"></div>
        </aside>

+        <!-- Mobile overlay for session panel (click to close) -->
+        <div id="session-panel-overlay" class="session-panel-overlay hidden" onclick="closeSessionPanel()"></div>
+
        <!-- ================================================================ -->
        <!-- MAIN CONTENT                                                     -->
        <!-- ================================================================ -->
@@ -285,7 +288,7 @@
                <!-- ====================================================== -->
                <!-- VIEW: Chat                                              -->
                <!-- ====================================================== -->
-                <div id="view-chat" class="view active">
+                <div id="view-chat" class="view active relative">
                    <!-- Messages -->
                    <div id="chat-messages" class="flex-1 overflow-y-auto">
                        <!-- Welcome Screen -->
@@ -361,6 +364,18 @@
                        </div>
                    </div>

+                    <!-- Scroll-to-bottom FAB -->
+                    <button id="scroll-to-bottom-btn"
+                            class="hidden absolute right-5 bottom-[80px] z-10
+                                   w-9 h-9 rounded-full shadow-lg
+                                   bg-white dark:bg-[#2A2A2A] border border-slate-200 dark:border-white/15
+                                   text-slate-500 dark:text-slate-400 hover:text-primary-500 dark:hover:text-primary-400
+                                   flex items-center justify-center cursor-pointer transition-all duration-200
+                                   hover:shadow-xl hover:scale-105"
+                            onclick="_autoScrollEnabled = true; scrollChatToBottom(true);">
+                        <i class="fas fa-chevron-down text-sm"></i>
+                    </button>
+
                    <!-- Chat Input -->
                    <div class="flex-shrink-0 border-t border-slate-200 dark:border-white/10 bg-white dark:bg-[#1A1A1A] px-4 py-3">
                        <div class="max-w-3xl mx-auto">
@@ -445,6 +460,9 @@
                                                </div>
                                                <div class="cfg-dropdown-menu"></div>
                                            </div>
+                                            <div id="cfg-custom-tip" class="mt-1.5 text-xs text-slate-400 dark:text-slate-500 hidden">
+                                                <i class="fas fa-info-circle mr-1"></i><span data-i18n="config_custom_tip">接口需遵循 OpenAI API 协议</span>
+                                            </div>
                                        </div>
                                        <!-- Model -->
                                        <div>
@@ -546,7 +564,7 @@
                                                <span class="cfg-tip" data-tip-key="config_enable_thinking_hint"><i class="fas fa-circle-question"></i></span>
                                            </label>
                                            <label class="relative inline-flex items-center cursor-pointer">
-                                                <input id="cfg-enable-thinking" type="checkbox" class="sr-only peer" checked>
+                                                <input id="cfg-enable-thinking" type="checkbox" class="sr-only peer">
                                                <div class="w-9 h-5 bg-slate-200 dark:bg-slate-700 peer-checked:bg-primary-400 rounded-full
                                                            after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white
                                                            after:rounded-full after:h-4 after:w-4 after:transition-all peer-checked:after:translate-x-full"></div>
--- a/channel/web/static/css/console.css
+++ b/channel/web/static/css/console.css
@@ -339,6 +339,23 @@
 }
 .confirm-btn-ok:hover { background: #dc2626; }

+/* Session panel overlay (mobile only, click to close) */
+.session-panel-overlay {
+    display: none;
+}
+@media (max-width: 768px) {
+    .session-panel-overlay {
+        display: block;
+        position: fixed;
+        inset: 0;
+        z-index: 44;
+        background: rgba(0, 0, 0, 0.3);
+    }
+    .session-panel-overlay.hidden {
+        display: none;
+    }
+}
+
 /* Mobile: session panel as overlay */
@media (max-width: 768px) {
    .session-panel {
@@ -492,6 +509,22 @@
    color: #b0b8c4;
    margin-bottom: 0.375rem;
 }
+/* Streaming reasoning: render as plain pre to avoid expensive markdown
+   re-parsing on every chunk. Wrap long lines so the bubble width is
+   respected and use the same font size/color as the rendered version. */
+.agent-thinking-step .thinking-stream-pre {
+    margin: 0;
+    padding: 0;
+    background: transparent;
+    border: 0;
+    font-family: inherit;
+    font-size: inherit;
+    line-height: 1.5;
+    color: inherit;
+    white-space: pre-wrap;
+    word-break: break-word;
+    overflow-wrap: anywhere;
+}

 /* Content step - real text output frozen before tool calls */
 .agent-content-step {
@@ -935,13 +968,13 @@
    font-size: 8px;
    transition: transform 0.15s;
 }
-.knowledge-tree-group.open .chevron {
+.knowledge-tree-group.open > .knowledge-tree-group-btn .chevron {
    transform: rotate(90deg);
 }
 .knowledge-tree-group-items {
    display: none;
 }
-.knowledge-tree-group.open .knowledge-tree-group-items {
+.knowledge-tree-group.open > .knowledge-tree-group-items {
    display: block;
 }

@@ -1035,12 +1068,10 @@
 }
 .cfg-tip:hover { color: #64748b; }
 .dark .cfg-tip:hover { color: #cbd5e1; }
-.cfg-tip::after {
-    content: attr(data-tooltip);
-    position: absolute;
-    left: 50%;
-    bottom: calc(100% + 6px);
-    transform: translateX(-50%);
+/* Floating tooltip portal — appended to <body> by JS so it isn't clipped
+   by overflow:hidden ancestors. */
+.cfg-tip-floating {
+    position: fixed;
    padding: 6px 10px;
    border-radius: 8px;
    font-size: 12px;
@@ -1053,13 +1084,13 @@
    opacity: 0;
    pointer-events: none;
    transition: opacity 0.15s;
-    z-index: 50;
+    z-index: 9999;
 }
-.dark .cfg-tip::after {
+.dark .cfg-tip-floating {
    background: #334155;
    color: #f1f5f9;
 }
-.cfg-tip:hover::after {
+.cfg-tip-floating.show {
    opacity: 1;
 }

--- a/channel/web/static/js/console.js
+++ b/channel/web/static/js/console.js
@@ -38,13 +38,14 @@ const I18N = {
        config_max_tokens: '最大上下文 Token', config_max_tokens_hint: '对话中 Agent 能输入的最大 Token 长度，超过后会智能压缩处理',
        config_max_turns: '最大记忆轮次', config_max_turns_hint: '一问一答为一轮，超过后会智能压缩处理',
        config_max_steps: '最大执行步数', config_max_steps_hint: '单次对话中 Agent 最多调用工具的次数',
-        config_enable_thinking: '深度思考', config_enable_thinking_hint: '启用后在 Web 端展示模型推理过程',
+        config_enable_thinking: '深度思考', config_enable_thinking_hint: '是否启用深度思考模式',
        config_channel_type: '通道类型',
        config_provider: '模型厂商', config_model_name: '模型',
        config_custom_model_hint: '输入自定义模型名称',
        config_save: '保存', config_saved: '已保存',
        config_save_error: '保存失败',
        config_custom_option: '自定义...',
+        config_custom_tip: '接口需遵循 OpenAI API 协议',
        config_security: '安全设置', config_password: '访问密码',
        config_password_hint: '留空则不启用密码保护',
        config_password_changed: '密码已更新，请重新登录',
@@ -123,13 +124,14 @@ const I18N = {
        config_max_tokens: 'Max Context Tokens', config_max_tokens_hint: 'Max tokens the Agent can input per conversation, auto-compressed when exceeded',
        config_max_turns: 'Max Memory Turns', config_max_turns_hint: 'One Q&A pair = one turn, auto-compressed when exceeded',
        config_max_steps: 'Max Steps', config_max_steps_hint: 'Max tool calls the Agent can make in a single conversation',
-        config_enable_thinking: 'Deep Thinking', config_enable_thinking_hint: 'Show model reasoning on web console',
+        config_enable_thinking: 'Deep Thinking', config_enable_thinking_hint: 'Enable deep thinking mode',
        config_channel_type: 'Channel Type',
        config_provider: 'Provider', config_model_name: 'Model',
        config_custom_model_hint: 'Enter custom model name',
        config_save: 'Save', config_saved: 'Saved',
        config_save_error: 'Save failed',
        config_custom_option: 'Custom...',
+        config_custom_tip: 'API must follow OpenAI protocol.',
        config_security: 'Security', config_password: 'Password',
        config_password_hint: 'Leave empty to disable password protection',
        config_password_changed: 'Password updated, please re-login',
@@ -202,6 +204,7 @@ function applyI18n() {
    document.querySelectorAll('[data-tip-key]').forEach(el => {
        el.setAttribute('data-tooltip', t(el.dataset.tipKey));
    });
+    installCfgTipPortal();
    const langLabel = document.getElementById('lang-label');
    if (langLabel) langLabel.textContent = currentLang === 'zh' ? '中文' : 'EN';
 }
@@ -213,6 +216,54 @@ function toggleLanguage() {
    _applyInputTooltips();
 }

+// Floating tooltip portal for [data-tip-key] elements. Tooltip nodes are
+// appended to <body> so they aren't clipped by overflow:hidden ancestors
+// (e.g. the config panel's scroll container).
+let _cfgTipPortalEl = null;
+let _cfgTipPortalInstalled = false;
+function installCfgTipPortal() {
+    if (_cfgTipPortalInstalled) return;
+    _cfgTipPortalInstalled = true;
+
+    const showTip = (target) => {
+        const text = target.getAttribute('data-tooltip');
+        if (!text) return;
+        if (!_cfgTipPortalEl) {
+            _cfgTipPortalEl = document.createElement('div');
+            _cfgTipPortalEl.className = 'cfg-tip-floating';
+            document.body.appendChild(_cfgTipPortalEl);
+        }
+        _cfgTipPortalEl.textContent = text;
+        const rect = target.getBoundingClientRect();
+        // Render once to measure, then position above the target, centered.
+        _cfgTipPortalEl.style.left = '0px';
+        _cfgTipPortalEl.style.top = '0px';
+        _cfgTipPortalEl.classList.add('show');
+        const tipRect = _cfgTipPortalEl.getBoundingClientRect();
+        let left = rect.left + rect.width / 2 - tipRect.width / 2;
+        // Clamp horizontally to the viewport with an 8px gutter.
+        left = Math.max(8, Math.min(left, window.innerWidth - tipRect.width - 8));
+        const top = rect.top - tipRect.height - 6;
+        _cfgTipPortalEl.style.left = left + 'px';
+        _cfgTipPortalEl.style.top = top + 'px';
+    };
+    const hideTip = () => {
+        if (_cfgTipPortalEl) _cfgTipPortalEl.classList.remove('show');
+    };
+
+    document.addEventListener('mouseover', (e) => {
+        const target = e.target.closest('[data-tip-key]');
+        if (target) showTip(target);
+    });
+    document.addEventListener('mouseout', (e) => {
+        const target = e.target.closest('[data-tip-key]');
+        if (target) hideTip();
+    });
+    // Hide on scroll/resize so the tooltip doesn't drift away from its anchor.
+    window.addEventListener('scroll', hideTip, true);
+    window.addEventListener('resize', hideTip);
+}
+
 // =====================================================================
 // Theme
 // =====================================================================
@@ -337,18 +388,59 @@ function createMd() {
 const md = createMd();

 const VIDEO_EXT_RE = /\.(?:mp4|webm|mov|avi|mkv)$/i;  // tested against URL without query string
+const IMAGE_EXT_RE = /\.(?:jpg|jpeg|png|gif|webp|bmp|svg)$/i;  // tested against URL without query string
+
+function _toWebUrl(url) {
+    if (/^\/[A-Za-z]/.test(url) && !url.startsWith('/api/')) {
+        return '/api/file?path=' + encodeURIComponent(url);
+    }
+    if (/^file:\/\/\//i.test(url)) {
+        return '/api/file?path=' + encodeURIComponent(url.replace(/^file:\/\/\//i, '/'));
+    }
+    return url;
+}

 function _buildVideoHtml(url) {
+    const webUrl = _toWebUrl(url);
    const fileName = url.split('/').pop().split('?')[0];
    return `<div style="margin:10px 0;">` +
        `<video controls preload="metadata" ` +
        `style="max-width:100%;border-radius:10px;box-shadow:0 2px 8px rgba(0,0,0,0.15);display:block;">` +
-        `<source src="${url}"></video>` +
-        `<a href="${url}" target="_blank" ` +
+        `<source src="${webUrl}"></video>` +
+        `<a href="${webUrl}" target="_blank" ` +
        `style="display:inline-flex;align-items:center;gap:4px;margin-top:4px;font-size:12px;color:#8b8fa8;text-decoration:none;">` +
        `<i class="fas fa-download"></i> ${escapeHtml(fileName)}</a></div>`;
 }

+function _openImageLightbox(src) {
+    let overlay = document.getElementById('cow-lightbox');
+    if (!overlay) {
+        overlay = document.createElement('div');
+        overlay.id = 'cow-lightbox';
+        overlay.style.cssText = 'position:fixed;inset:0;z-index:9999;background:rgba(0,0,0,0.85);display:flex;align-items:center;justify-content:center;cursor:zoom-out;opacity:0;transition:opacity .2s';
+        overlay.onclick = () => { overlay.style.opacity = '0'; setTimeout(() => overlay.style.display = 'none', 200); };
+        const img = document.createElement('img');
+        img.id = 'cow-lightbox-img';
+        img.style.cssText = 'max-width:92vw;max-height:92vh;border-radius:8px;box-shadow:0 4px 24px rgba(0,0,0,0.5);object-fit:contain;';
+        img.onclick = (e) => e.stopPropagation();
+        overlay.appendChild(img);
+        document.body.appendChild(overlay);
+    }
+    overlay.querySelector('#cow-lightbox-img').src = src;
+    overlay.style.display = 'flex';
+    requestAnimationFrame(() => overlay.style.opacity = '1');
+}
+
+function _buildImageHtml(url) {
+    const webUrl = _toWebUrl(url);
+    const safeUrl = webUrl.replace(/"/g, '&quot;');
+    return `<div style="margin:10px 0;">` +
+        `<img src="${safeUrl}" alt="image" loading="lazy" ` +
+        `onclick="_openImageLightbox(this.src)" ` +
+        `style="max-width:520px;width:100%;border-radius:10px;box-shadow:0 2px 8px rgba(0,0,0,0.15);display:block;cursor:zoom-in;">` +
+        `</div>`;
+}
+
 function injectVideoPlayers(html) {
    // Step 1: replace markdown-it anchor tags whose href points to a video file.
    const step1 = html.replace(
@@ -367,10 +459,43 @@ function injectVideoPlayers(html) {
    }).join('');
 }

+// Convert image URLs into inline <img> previews. Mirrors injectVideoPlayers but for images.
+// Handles three cases produced by markdown-it:
+//   1. <a href="...image.jpg">...</a>  (bare URL or autolink that linkify turned into an anchor)
+//   2. <img src="...">                  (markdown image syntax) — leave as-is, but normalize style
+//   3. raw URL still present in a text node                    — only as a safety net
+function injectImagePreviews(html) {
+    // Step 1: anchor whose href points to an image file -> replace with <img> preview.
+    const step1 = html.replace(
+        /<a\s+href="(https?:\/\/[^"]+)"[^>]*>[^<]*<\/a>/gi,
+        (match, url) => IMAGE_EXT_RE.test(url.split('?')[0]) ? _buildImageHtml(url) : match
+    );
+    // Step 2: bare image URLs left in text nodes (rare — markdown-it's linkify usually catches them).
+    return step1.split(/(<[^>]+>)/).map((chunk, idx) => {
+        if (idx % 2 !== 0) return chunk;
+        return chunk.replace(/https?:\/\/\S+/gi, (url) => {
+            const bare = url.replace(/[),.\s]+$/, '');
+            return IMAGE_EXT_RE.test(bare.split('?')[0]) ? _buildImageHtml(bare) : url;
+        });
+    }).join('');
+}
+
+function _rewriteLocalImgSrc(html) {
+    return html.replace(/<img\s([^>]*?)src="([^"]+)"([^>]*?)>/gi, (match, pre, src, post) => {
+        const webSrc = _toWebUrl(src);
+        const safeSrc = webSrc.replace(/"/g, '&quot;');
+        const hasClick = /onclick/i.test(pre + post);
+        const clickAttr = hasClick ? '' : ` onclick="_openImageLightbox(this.src)" style="cursor:zoom-in;"`;
+        return `<img ${pre}src="${safeSrc}"${post}${clickAttr}>`;
+    });
+}
+
 function renderMarkdown(text) {
    try {
-        const html = md.render(text);
-        return injectVideoPlayers(html);
+        let html = md.render(text);
+        html = _rewriteLocalImgSrc(html);
+        // Order matters: video first (more specific), then image.
+        return injectImagePreviews(injectVideoPlayers(html));
    }
    catch (e) { return text.replace(/\n/g, '<br>'); }
 }
@@ -428,6 +553,16 @@ const sendBtn = document.getElementById('send-btn');
 const messagesDiv = document.getElementById('chat-messages');
 const fileInput = document.getElementById('file-input');

+// Smart auto-scroll: pause when user scrolls up, resume when near bottom
+let _autoScrollEnabled = true;
+const _SCROLL_THRESHOLD = 80; // px from bottom to re-enable auto-scroll
+
+messagesDiv.addEventListener('scroll', () => {
+    const distFromBottom = messagesDiv.scrollHeight - messagesDiv.scrollTop - messagesDiv.clientHeight;
+    _autoScrollEnabled = distFromBottom <= _SCROLL_THRESHOLD;
+    _updateScrollToBottomBtn();
+});
+
 // Intercept internal navigation links in chat messages
 messagesDiv.addEventListener('click', (e) => {
    const copyBtn = e.target.closest('.copy-msg-btn');
@@ -982,17 +1117,60 @@ function startSSE(requestId, loadingEl, timestamp, titleInfo) {
                    reasoningStartTime = Date.now();
                    currentReasoningEl = document.createElement('div');
                    currentReasoningEl.className = 'agent-step agent-thinking-step';
+                    // During streaming, use a <pre> with a single text node and
+                    // append-only updates. This avoids re-parsing markdown and
+                    // re-setting innerHTML on every chunk, which is what causes
+                    // the page to crash on long chains-of-thought.
                    currentReasoningEl.innerHTML = `
                        <div class="thinking-header" onclick="this.parentElement.classList.toggle('expanded')">
                            <i class="fas fa-lightbulb text-amber-400 flex-shrink-0"></i>
                            <span class="thinking-summary">${t('thinking_in_progress')}</span>
                            <i class="fas fa-chevron-right thinking-chevron"></i>
                        </div>
-                        <div class="thinking-full"></div>`;
+                        <div class="thinking-full"><pre class="thinking-stream-pre"></pre></div>`;
                    stepsEl.appendChild(currentReasoningEl);
+                    const preEl = currentReasoningEl.querySelector('.thinking-stream-pre');
+                    preEl.appendChild(document.createTextNode(''));
+                    currentReasoningEl._streamTextNode = preEl.firstChild;
+                    currentReasoningEl._streamPendingText = '';
+                    currentReasoningEl._streamRafScheduled = false;
+                    currentReasoningEl._streamCharsRendered = 0;
+                    currentReasoningEl._streamCapped = false;
+                }
+                // Hard cap: once REASONING_RENDER_CAP chars are in the DOM, stop
+                // appending further deltas. The full text is still kept in
+                // `reasoningText` for finalize-time head+tail rendering.
+                if (!currentReasoningEl._streamCapped) {
+                    currentReasoningEl._streamPendingText += item.content;
+                    if (!currentReasoningEl._streamRafScheduled) {
+                        currentReasoningEl._streamRafScheduled = true;
+                        const elRef = currentReasoningEl;
+                        requestAnimationFrame(() => {
+                            elRef._streamRafScheduled = false;
+                            if (!elRef.isConnected || !elRef._streamTextNode) return;
+                            let pending = elRef._streamPendingText;
+                            elRef._streamPendingText = '';
+                            if (!pending) return;
+                            const remaining = REASONING_RENDER_CAP - elRef._streamCharsRendered;
+                            if (remaining <= 0) {
+                                elRef._streamCapped = true;
+                            } else {
+                                if (pending.length > remaining) {
+                                    pending = pending.slice(0, remaining);
+                                    elRef._streamCapped = true;
+                                }
+                                elRef._streamTextNode.appendData(pending);
+                                elRef._streamCharsRendered += pending.length;
+                                if (elRef._streamCapped) {
+                                    elRef._streamTextNode.appendData(
+                                        '\n\n... [reasoning truncated for display] ...'
+                                    );
+                                }
+                            }
+                            scrollChatToBottom();
+                        });
+                    }
                }
-                currentReasoningEl.querySelector('.thinking-full').innerHTML = renderMarkdown(reasoningText);
-                scrollChatToBottom();

            } else if (item.type === 'delta') {
                ensureBotEl();
@@ -1079,8 +1257,8 @@ function startSSE(requestId, loadingEl, timestamp, titleInfo) {
                const imgEl = document.createElement('img');
                imgEl.src = item.content;
                imgEl.alt = 'screenshot';
-                imgEl.style.cssText = 'max-width:600px;border-radius:8px;margin:8px 0;cursor:pointer;box-shadow:0 1px 4px rgba(0,0,0,0.1);';
-                imgEl.onclick = () => window.open(item.content, '_blank');
+                imgEl.style.cssText = 'max-width:600px;border-radius:8px;margin:8px 0;cursor:zoom-in;box-shadow:0 1px 4px rgba(0,0,0,0.1);';
+                imgEl.onclick = () => _openImageLightbox(imgEl.src);
                mediaEl.appendChild(imgEl);
                scrollChatToBottom();

@@ -1290,11 +1468,41 @@ function renderToolCallsHtml(toolCalls) {
    }).join('');
 }

+// Cap for rendering reasoning content in the bubble. Beyond this size,
+// we skip markdown rendering entirely and show plain text head + tail to
+// keep the page responsive (very long chains-of-thought can otherwise
+// stall or crash the browser when re-parsed by marked.js).
+// Keep this in sync with backend MAX_STORED_REASONING_CHARS and
+// MAX_REASONING_STREAM_CHARS so storage / SSE / display stay aligned.
+const REASONING_RENDER_CAP = 4 * 1024; // 4 KB
+
+function _truncateReasoningForDisplay(text) {
+    if (!text || text.length <= REASONING_RENDER_CAP) return { text, truncated: false, omitted: 0 };
+    const half = Math.floor(REASONING_RENDER_CAP / 2);
+    const head = text.slice(0, half);
+    const tail = text.slice(-half);
+    return {
+        text: head + '\n\n... [' + (text.length - head.length - tail.length) + ' chars omitted] ...\n\n' + tail,
+        truncated: true,
+        omitted: text.length - head.length - tail.length,
+    };
+}
+
+function _renderReasoningBody(text) {
+    // For short reasoning, render as markdown. For long ones, fall back to
+    // an escaped <pre> block to avoid expensive markdown parsing.
+    const { text: shown, truncated } = _truncateReasoningForDisplay(text);
+    if (truncated || shown.length > REASONING_RENDER_CAP) {
+        return '<pre class="thinking-stream-pre">' + escapeHtml(shown) + '</pre>';
+    }
+    return renderMarkdown(shown);
+}
+
 function finalizeThinking(el, startTime, text) {
    const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
    el.querySelector('.thinking-summary').textContent = t('thinking_done');
    const fullDiv = el.querySelector('.thinking-full');
-    fullDiv.innerHTML = `<div class="thinking-duration">${t('thinking_duration')} ${elapsed}s</div>` + renderMarkdown(text);
+    fullDiv.innerHTML = `<div class="thinking-duration">${t('thinking_duration')} ${elapsed}s</div>` + _renderReasoningBody(text);
 }

 function renderThinkingHtml(text) {
@@ -1307,7 +1515,7 @@ function renderThinkingHtml(text) {
        <span class="thinking-summary">${t('thinking_done')}</span>
        <i class="fas fa-chevron-right thinking-chevron"></i>
    </div>
-    <div class="thinking-full">${renderMarkdown(full)}</div>
+    <div class="thinking-full">${_renderReasoningBody(full)}</div>
 </div>`;
 }

@@ -1354,11 +1562,40 @@ function renderStepsHtml(steps) {
        </div>` : ''}
    </div>
 </div>`;
+            // If this tool sent a file (send/read tool), render the media inline
+            // so it persists across page refreshes (SSE-only file events are not stored).
+            const mediaHtml = _renderSentFileFromToolResult(step);
+            if (mediaHtml) html += mediaHtml;
        }
    }
    return { stepsHtml: html, lastContentText };
 }

+// Extract file-to-send metadata from a tool's result and render an inline preview.
+// Returns '' if the result isn't a file_to_send payload.
+function _renderSentFileFromToolResult(step) {
+    if (!step || !step.result) return '';
+    let payload;
+    try {
+        payload = typeof step.result === 'string' ? JSON.parse(step.result) : step.result;
+    } catch (_) { return ''; }
+    if (!payload || payload.type !== 'file_to_send' || !payload.path) return '';
+    const webUrl = _toWebUrl(payload.path);
+    const fileType = payload.file_type || 'file';
+    const fileName = payload.file_name || payload.path.split('/').pop();
+    if (fileType === 'image') {
+        return `<div class="agent-step">${_buildImageHtml(webUrl)}</div>`;
+    }
+    if (fileType === 'video') {
+        return `<div class="agent-step">${_buildVideoHtml(webUrl)}</div>`;
+    }
+    return `<div class="agent-step"><a href="${webUrl}" download="${escapeHtml(fileName)}" target="_blank" ` +
+        `style="display:inline-flex;align-items:center;gap:6px;padding:8px 14px;margin:8px 0;border-radius:8px;` +
+        `background:var(--bg-secondary,#f3f4f6);color:var(--text-primary,#374151);text-decoration:none;font-size:14px;` +
+        `border:1px solid var(--border-color,#e5e7eb);">` +
+        `<i class="fas fa-file-download" style="color:#6b7280;"></i> ${escapeHtml(fileName)}</a></div>`;
+}
+
 function createBotMessageEl(content, timestamp, requestId, msg) {
    const el = document.createElement('div');
    el.className = 'flex gap-3 px-4 sm:px-6 py-3';
@@ -1404,7 +1641,8 @@ function createBotMessageEl(content, timestamp, requestId, msg) {
 function addUserMessage(content, timestamp, attachments) {
    const el = createUserMessageEl(content, timestamp, attachments);
    messagesDiv.appendChild(el);
-    scrollChatToBottom();
+    _autoScrollEnabled = true;
+    scrollChatToBottom(true);
 }

 function addBotMessage(content, timestamp, requestId) {
@@ -1497,7 +1735,7 @@ function loadHistory(page) {
            if (isFirstLoad) {
                // Use requestAnimationFrame to ensure the DOM has fully rendered
                // before scrolling, otherwise scrollHeight may not reflect new content.
-                requestAnimationFrame(() => scrollChatToBottom());
+                requestAnimationFrame(() => scrollChatToBottom(true));
            } else {
                // Restore scroll position so loading older messages doesn't jump the view
                messagesDiv.scrollTop = messagesDiv.scrollHeight - prevScrollHeight;
@@ -1626,6 +1864,7 @@ function newChat() {
    if (panel && !sessionPanelOpen) {
        sessionPanelOpen = true;
        panel.classList.remove('hidden');
+        _showSessionOverlay();
        _persistPanelState();
    }
    const newSid = sessionId;
@@ -1643,11 +1882,40 @@ function _persistPanelState() {
    localStorage.setItem(SESSION_PANEL_KEY, sessionPanelOpen ? '1' : '0');
 }

+function _isMobileView() {
+    return window.innerWidth <= 768;
+}
+
+function _showSessionOverlay() {
+    if (!_isMobileView()) return;
+    const overlay = document.getElementById('session-panel-overlay');
+    if (overlay) overlay.classList.remove('hidden');
+}
+
+function _hideSessionOverlay() {
+    const overlay = document.getElementById('session-panel-overlay');
+    if (overlay) overlay.classList.add('hidden');
+}
+
+function closeSessionPanel() {
+    const panel = document.getElementById('session-panel');
+    if (!panel || !sessionPanelOpen) return;
+    sessionPanelOpen = false;
+    panel.classList.add('hidden');
+    _hideSessionOverlay();
+    _persistPanelState();
+}
+
 function toggleSessionPanel() {
    const panel = document.getElementById('session-panel');
    if (!panel) return;
    sessionPanelOpen = !sessionPanelOpen;
    panel.classList.toggle('hidden', !sessionPanelOpen);
+    if (sessionPanelOpen) {
+        _showSessionOverlay();
+    } else {
+        _hideSessionOverlay();
+    }
    _persistPanelState();
    if (sessionPanelOpen) loadSessionList();
 }
@@ -1657,6 +1925,7 @@ function openSessionPanel() {
    if (!panel || sessionPanelOpen) return;
    sessionPanelOpen = true;
    panel.classList.remove('hidden');
+    _showSessionOverlay();
    _persistPanelState();
    loadSessionList();
 }
@@ -1664,11 +1933,13 @@ function openSessionPanel() {
 function _restoreSessionPanel() {
    const panel = document.getElementById('session-panel');
    if (!panel) return;
-    if (sessionPanelOpen) {
+    if (sessionPanelOpen && !_isMobileView()) {
        panel.classList.remove('hidden');
+        _showSessionOverlay();
        loadSessionList();
    } else {
        panel.classList.add('hidden');
+        _hideSessionOverlay();
    }
 }

@@ -1860,6 +2131,7 @@ function switchSession(newSessionId) {
        el.classList.toggle('active', el.dataset.sessionId === sessionId);
    });

+    if (_isMobileView()) closeSessionPanel();
    if (currentView !== 'chat') navigateTo('chat');
 }

@@ -1981,8 +2253,17 @@ function formatToolArgs(args) {
    }
 }

-function scrollChatToBottom() {
-    messagesDiv.scrollTop = messagesDiv.scrollHeight;
+function scrollChatToBottom(force) {
+    if (force || _autoScrollEnabled) {
+        messagesDiv.scrollTop = messagesDiv.scrollHeight;
+    }
+}
+
+function _updateScrollToBottomBtn() {
+    const btn = document.getElementById('scroll-to-bottom-btn');
+    if (!btn) return;
+    const distFromBottom = messagesDiv.scrollHeight - messagesDiv.scrollTop - messagesDiv.clientHeight;
+    btn.classList.toggle('hidden', distFromBottom <= _SCROLL_THRESHOLD);
 }

 function applyHighlighting(container) {
@@ -2080,7 +2361,7 @@ function initConfigView(data) {
    document.getElementById('cfg-max-tokens').value = data.agent_max_context_tokens || 50000;
    document.getElementById('cfg-max-turns').value = data.agent_max_context_turns || 20;
    document.getElementById('cfg-max-steps').value = data.agent_max_steps || 20;
-    document.getElementById('cfg-enable-thinking').checked = data.enable_thinking !== false;
+    document.getElementById('cfg-enable-thinking').checked = data.enable_thinking === true;

    const pwdInput = document.getElementById('cfg-password');
    const maskedPwd = data.web_password_masked || '';
@@ -2124,6 +2405,9 @@ function onProviderChange(pid) {
    const p = configProviders[cfgProviderValue];
    if (!p) return;

+    const customTip = document.getElementById('cfg-custom-tip');
+    if (customTip) customTip.classList.toggle('hidden', cfgProviderValue !== 'custom');
+
    const modelEl = document.getElementById('cfg-model-select');
    const modelOpts = (p.models || []).map(m => ({ value: m, label: m }));
    modelOpts.push({ value: '__custom__', label: t('config_custom_option') });
@@ -2172,12 +2456,17 @@ function onProviderChange(pid) {
    }

    // API Base
+    const apiBaseInput = document.getElementById('cfg-api-base');
    if (p.api_base_key) {
        document.getElementById('cfg-api-base-wrap').classList.remove('hidden');
-        document.getElementById('cfg-api-base').value = configApiBases[p.api_base_key] || p.api_base_default || '';
+        apiBaseInput.value = configApiBases[p.api_base_key] || p.api_base_default || '';
+        // Hint the version-path tail (e.g. /v1) so users are reminded to
+        // include it themselves. We don't auto-rewrite anything server-side.
+        apiBaseInput.placeholder = p.api_base_placeholder || 'https://...';
    } else {
        document.getElementById('cfg-api-base-wrap').classList.add('hidden');
-        document.getElementById('cfg-api-base').value = '';
+        apiBaseInput.value = '';
+        apiBaseInput.placeholder = 'https://...';
    }

    onModelSelectChange(modelOpts[0] ? modelOpts[0].value : '');
@@ -3526,6 +3815,7 @@ navigateTo = function(viewId) {
 // Knowledge View
 // =====================================================================
 let _knowledgeTreeData = [];
+let _knowledgeRootFiles = [];
 let _knowledgeCurrentFile = null;
 let _knowledgeGraphLoaded = false;

@@ -3543,7 +3833,9 @@ function loadKnowledgeView() {
        const statsEl = document.getElementById('knowledge-stats');

        const tree = data.tree || [];
+        const rootFiles = data.root_files || [];
        _knowledgeTreeData = tree;
+        _knowledgeRootFiles = rootFiles;
        const stats = data.stats || {};
        const totalPages = stats.pages || 0;
        const sizeStr = stats.size < 1024 ? stats.size + ' B' : (stats.size / 1024).toFixed(1) + ' KB';
@@ -3561,14 +3853,17 @@ function loadKnowledgeView() {
        emptyEl.classList.add('hidden');
        docsPanel.classList.remove('hidden');

-        renderKnowledgeTree(tree);
+        renderKnowledgeTree(tree, rootFiles);

        // Auto-select the first file (desktop only)
        if (window.innerWidth >= 768) {
-            const firstGroup = tree.find(g => g.files && g.files.length > 0);
-            if (firstGroup) {
-                const firstFile = firstGroup.files[0];
-                openKnowledgeFile(firstGroup.dir + '/' + firstFile.name, firstFile.title);
+            const firstFile = rootFiles.length > 0 ? rootFiles[0] : null;
+            const firstGroup = !firstFile ? tree.find(g => g.files && g.files.length > 0) : null;
+            if (firstFile) {
+                openKnowledgeFile(firstFile.name, firstFile.title);
+            } else if (firstGroup) {
+                const gf = firstGroup.files[0];
+                openKnowledgeFile(firstGroup.dir + '/' + gf.name, gf.title);
            }
        } else {
            document.getElementById('knowledge-content-placeholder').classList.add('hidden');
@@ -3577,23 +3872,48 @@ function loadKnowledgeView() {
    }).catch(() => {});
 }

-function renderKnowledgeTree(tree, filter) {
+function renderKnowledgeTree(tree, rootFilesOrFilter, filter) {
    const container = document.getElementById('knowledge-tree');
    container.innerHTML = '';
-    const lowerFilter = (filter || '').toLowerCase();
+    let rootFiles, lowerFilter;
+    if (typeof rootFilesOrFilter === 'string') {
+        rootFiles = _knowledgeRootFiles;
+        lowerFilter = (rootFilesOrFilter || '').toLowerCase();
+    } else {
+        rootFiles = rootFilesOrFilter || _knowledgeRootFiles;
+        lowerFilter = (filter || '').toLowerCase();
+    }
+    (rootFiles || []).forEach(f => {
+        if (lowerFilter && !f.title.toLowerCase().includes(lowerFilter) && !f.name.toLowerCase().includes(lowerFilter)) return;
+        const fbtn = document.createElement('button');
+        fbtn.className = 'knowledge-tree-file' + (_knowledgeCurrentFile === f.name ? ' active' : '');
+        fbtn.dataset.path = f.name;
+        fbtn.innerHTML = `<i class="fas fa-file-lines text-[10px] text-slate-400"></i><span class="truncate">${escapeHtml(f.title)}</span>`;
+        fbtn.onclick = () => openKnowledgeFile(f.name, f.title);
+        container.appendChild(fbtn);
+    });
+    _renderKnowledgeGroups(container, tree, '', lowerFilter, 0);
+}

-    tree.forEach(group => {
-        const files = group.files.filter(f =>
+function _renderKnowledgeGroups(container, groups, parentPath, lowerFilter, depth) {
+    const indent = depth * 12;
+    groups.forEach(group => {
+        const groupPath = parentPath ? parentPath + '/' + group.dir : group.dir;
+        const files = (group.files || []).filter(f =>
            !lowerFilter || f.title.toLowerCase().includes(lowerFilter) || f.name.toLowerCase().includes(lowerFilter)
        );
-        if (files.length === 0 && lowerFilter) return;
+        const children = group.children || [];
+        const hasMatchingChildren = lowerFilter ? _hasFilterMatch(children, lowerFilter) : children.length > 0;
+        if (files.length === 0 && !hasMatchingChildren && lowerFilter) return;

        const div = document.createElement('div');
        div.className = 'knowledge-tree-group open';

+        const fileCount = _countFiles(group);
        const btn = document.createElement('button');
        btn.className = 'knowledge-tree-group-btn';
-        btn.innerHTML = `<i class="fas fa-chevron-right chevron"></i><i class="fas fa-folder text-amber-400 text-[11px]"></i><span>${escapeHtml(group.dir)}</span><span class="ml-auto text-[10px] text-slate-400">${files.length}</span>`;
+        btn.style.paddingLeft = (8 + indent) + 'px';
+        btn.innerHTML = `<i class="fas fa-chevron-right chevron"></i><i class="fas fa-folder text-amber-400 text-[11px]"></i><span>${escapeHtml(group.dir)}</span><span class="ml-auto text-[10px] text-slate-400">${fileCount}</span>`;
        btn.onclick = () => div.classList.toggle('open');
        div.appendChild(btn);

@@ -3601,20 +3921,42 @@ function renderKnowledgeTree(tree, filter) {
        items.className = 'knowledge-tree-group-items';
        files.forEach(f => {
            const fbtn = document.createElement('button');
-            const fpath = group.dir + '/' + f.name;
+            const fpath = groupPath + '/' + f.name;
            fbtn.className = 'knowledge-tree-file' + (_knowledgeCurrentFile === fpath ? ' active' : '');
            fbtn.dataset.path = fpath;
+            fbtn.style.paddingLeft = (24 + indent) + 'px';
            fbtn.innerHTML = `<i class="fas fa-file-lines text-[10px] text-slate-400"></i><span class="truncate">${escapeHtml(f.title)}</span>`;
            fbtn.onclick = () => openKnowledgeFile(fpath, f.title);
            items.appendChild(fbtn);
        });
+        if (children.length > 0) {
+            _renderKnowledgeGroups(items, children, groupPath, lowerFilter, depth + 1);
+        }
        div.appendChild(items);
        container.appendChild(div);
    });
 }

+function _hasFilterMatch(groups, lowerFilter) {
+    for (const g of groups) {
+        for (const f of (g.files || [])) {
+            if (f.title.toLowerCase().includes(lowerFilter) || f.name.toLowerCase().includes(lowerFilter)) return true;
+        }
+        if (_hasFilterMatch(g.children || [], lowerFilter)) return true;
+    }
+    return false;
+}
+
+function _countFiles(group) {
+    let count = (group.files || []).length;
+    for (const child of (group.children || [])) {
+        count += _countFiles(child);
+    }
+    return count;
+}
+
 function filterKnowledgeTree(query) {
-    renderKnowledgeTree(_knowledgeTreeData, query);
+    renderKnowledgeTree(_knowledgeTreeData, _knowledgeRootFiles, query);
 }

 function resolveKnowledgePath(currentFilePath, relativeHref) {
@@ -3693,12 +4035,22 @@ function bindChatKnowledgeLinks(container) {
 }

 function _findKnowledgeFileByName(filename) {
-    for (const group of _knowledgeTreeData) {
-        for (const f of group.files) {
+    for (const f of _knowledgeRootFiles) {
+        if (f.name === filename) return { path: f.name, title: f.title };
+    }
+    return _searchFileInGroups(_knowledgeTreeData, '', filename);
+}
+
+function _searchFileInGroups(groups, parentPath, filename) {
+    for (const group of groups) {
+        const groupPath = parentPath ? parentPath + '/' + group.dir : group.dir;
+        for (const f of (group.files || [])) {
            if (f.name === filename) {
-                return { path: group.dir + '/' + f.name, title: f.title };
+                return { path: groupPath + '/' + f.name, title: f.title };
            }
        }
+        const found = _searchFileInGroups(group.children || [], groupPath, filename);
+        if (found) return found;
    }
    return null;
 }
@@ -4022,7 +4374,10 @@ function initApp() {
    _restoreSessionPanel();

    fetch('/api/knowledge/list').then(r => r.json()).then(data => {
-        if (data.status === 'success') _knowledgeTreeData = data.tree || [];
+        if (data.status === 'success') {
+            _knowledgeTreeData = data.tree || [];
+            _knowledgeRootFiles = data.root_files || [];
+        }
    }).catch(() => {});

    fetch('/api/version').then(r => r.json()).then(data => {
--- a/channel/web/web_channel.py
+++ b/channel/web/web_channel.py
@@ -91,39 +91,9 @@ def _get_upload_dir() -> str:


 def _generate_session_title(user_message: str, assistant_reply: str = "") -> str:
-    """
-    Generate a short session title by calling the current bot's reply_text.
-    """
-    import re
-    fallback = user_message[:50].split("\n")[0].strip() or "New Chat"
-    try:
-        from bridge.bridge import Bridge
-        from models.session_manager import Session
-        bot = Bridge().get_bot("chat")
-
-        prompt_parts = [f"User: {user_message[:300]}"]
-        if assistant_reply:
-            prompt_parts.append(f"Assistant: {assistant_reply[:300]}")
-
-        session = Session("__title_gen__", system_prompt="")
-        session.messages = [
-            {"role": "user", "content": (
-                "Generate a very short title (max 15 characters for Chinese, max 6 words for English) "
-                "summarizing this conversation. Return ONLY the title text, nothing else.\n\n"
-                + "\n".join(prompt_parts)
-            )}
-        ]
-
-        result = bot.reply_text(session)
-        raw = (result.get("content") or "").strip()
-        # Strip <think>...</think> reasoning blocks
-        title = re.sub(r'<think>.*?</think>', '', raw, flags=re.DOTALL).strip().strip('"\'')
-        logger.info(f"[WebChannel] Title generation result: '{title}' (len={len(title)})")
-        if title and len(title) <= 50:
-            return title
-    except Exception as e:
-        logger.warning(f"[WebChannel] Title generation failed: {e}")
-    return fallback
+    """Delegate to the shared SessionService implementation."""
+    from agent.chat.session_service import generate_session_title
+    return generate_session_title(user_message, assistant_reply)


 class WebMessage(ChatMessage):
@@ -238,9 +208,24 @@ class WebChannel(ChatChannel):

            # Fallback: polling mode
            if session_id in self.session_queues:
+                content = reply.content if reply.content is not None else ""
+                # Skip file:// IMAGE_URL/FILE replies originating from an SSE-enabled
+                # request: they were already pushed via the `file_to_send` event during
+                # agent execution. By the time the chat_channel sends the IMAGE_URL reply,
+                # the SSE stream has typically closed (after the text "done") and the
+                # request_id is gone from sse_queues, so we'd otherwise duplicate the file
+                # as a polling bubble. Scheduler/push tasks have no on_event and must
+                # still go through polling normally.
+                if (
+                    reply.type in (ReplyType.IMAGE_URL, ReplyType.FILE)
+                    and content.startswith("file://")
+                    and context.get("on_event") is not None
+                ):
+                    logger.debug(f"Polling skipped duplicate file reply for session {session_id}")
+                    return
                response_data = {
                    "type": str(reply.type),
-                    "content": reply.content,
+                    "content": content,
                    "timestamp": time.time(),
                    "request_id": request_id
                }
@@ -255,6 +240,17 @@ class WebChannel(ChatChannel):
    def _make_sse_callback(self, request_id: str):
        """Build an on_event callback that pushes agent stream events into the SSE queue."""

+        # Cap reasoning bytes pushed to the frontend per request to avoid
+        # browser stalls / crashes on very long chains-of-thought. Anything
+        # beyond the cap is dropped from the stream (DB still persists a
+        # truncated copy via _truncate_reasoning_for_storage).
+        # Keep aligned with frontend REASONING_RENDER_CAP and backend
+        # MAX_STORED_REASONING_CHARS.
+        MAX_REASONING_STREAM_CHARS = 4 * 1024  # 4 KB
+        # Use a single-element list as a mutable counter accessible from closure.
+        reasoning_chars_sent = [0]
+        reasoning_capped_notified = [False]
+
        def on_event(event: dict):
            if request_id not in self.sse_queues:
                return
@@ -264,8 +260,21 @@ class WebChannel(ChatChannel):

            if event_type == "reasoning_update":
                delta = data.get("delta", "")
-                if delta:
-                    q.put({"type": "reasoning", "content": delta})
+                if not delta:
+                    return
+                remaining = MAX_REASONING_STREAM_CHARS - reasoning_chars_sent[0]
+                if remaining <= 0:
+                    if not reasoning_capped_notified[0]:
+                        reasoning_capped_notified[0] = True
+                        q.put({
+                            "type": "reasoning",
+                            "content": "\n\n... [reasoning truncated for display] ...",
+                        })
+                    return
+                if len(delta) > remaining:
+                    delta = delta[:remaining]
+                reasoning_chars_sent[0] += len(delta)
+                q.put({"type": "reasoning", "content": delta})

            elif event_type == "message_update":
                delta = data.get("delta", "")
@@ -299,6 +308,25 @@ class WebChannel(ChatChannel):
                if tool_calls:
                    q.put({"type": "message_end", "has_tool_calls": True})

+            elif event_type == "agent_end":
+                # Safety net: if the agent finishes with an empty final_response,
+                # chat_channel skips _send_reply (because reply.content is empty),
+                # which means no "done" event is ever emitted and the SSE stream
+                # would hang until the 10-min idle timeout. Push a fallback "done"
+                # here so the frontend always gets closure.
+                final_response = data.get("final_response", "")
+                if not final_response or not str(final_response).strip():
+                    logger.warning(
+                        f"[WebChannel] agent_end with empty final_response for "
+                        f"request {request_id}, sending fallback done"
+                    )
+                    q.put({
+                        "type": "done",
+                        "content": "(模型未返回任何内容，请重试或换一种方式描述你的需求)",
+                        "request_id": request_id,
+                        "timestamp": time.time(),
+                    })
+
            elif event_type == "file_to_send":
                file_path = data.get("path", "")
                file_name = data.get("file_name", os.path.basename(file_path))
@@ -742,65 +770,58 @@ class ChatHandler:
 class ConfigHandler:

    _RECOMMENDED_MODELS = [
-        const.MINIMAX_M2_7, const.MINIMAX_M2_5, const.MINIMAX_M2_1, const.MINIMAX_M2_1_LIGHTNING,
-        const.GLM_5_TURBO, const.GLM_5, const.GLM_4_7,
-        const.QWEN36_PLUS, const.QWEN35_PLUS, const.QWEN3_MAX,
-        const.KIMI_K2_5, const.KIMI_K2,
-        const.DOUBAO_SEED_2_PRO, const.DOUBAO_SEED_2_CODE,
-        const.CLAUDE_4_6_SONNET, const.CLAUDE_4_6_OPUS, const.CLAUDE_4_5_SONNET,
+        const.DEEPSEEK_V4_FLASH, const.DEEPSEEK_V4_PRO, const.DEEPSEEK_CHAT, const.DEEPSEEK_REASONER,
+        const.MINIMAX_M2_7_HIGHSPEED, const.MINIMAX_M2_7, const.MINIMAX_M2_5, const.MINIMAX_M2_1, const.MINIMAX_M2_1_LIGHTNING,
+        const.CLAUDE_4_6_SONNET, const.CLAUDE_4_7_OPUS, const.CLAUDE_4_6_OPUS, const.CLAUDE_4_5_SONNET,
        const.GEMINI_31_FLASH_LITE_PRE, const.GEMINI_31_PRO_PRE, const.GEMINI_3_FLASH_PRE,
        const.GPT_54, const.GPT_54_MINI, const.GPT_54_NANO, const.GPT_5, const.GPT_41, const.GPT_4o,
-        const.DEEPSEEK_CHAT, const.DEEPSEEK_REASONER,
+        const.GLM_5_1, const.GLM_5_TURBO, const.GLM_5, const.GLM_4_7,
+        const.QWEN36_PLUS, const.QWEN35_PLUS, const.QWEN3_MAX,
+        const.DOUBAO_SEED_2_PRO, const.DOUBAO_SEED_2_CODE,
+        const.KIMI_K2_6, const.KIMI_K2_5, const.KIMI_K2,
    ]

+    # Generic placeholder hints surfaced in the web console. We deliberately
+    # show the version-path tail (e.g. "/v1") so users are reminded to type
+    # the full base URL. The form is intentionally vague (`...../v1`) so it
+    # never looks like a real default a user might paste verbatim — and we
+    # never auto-rewrite anything on the server side.
+    _PLACEHOLDER_V1 = "https://...../v1"
+    _PLACEHOLDER_ZHIPU = "https://...../api/paas/v4"
+    _PLACEHOLDER_DOUBAO = "https://...../api/v3"
+    _PLACEHOLDER_GEMINI = "https://....."
+
    PROVIDER_MODELS = OrderedDict([
+        ("deepseek", {
+            "label": "DeepSeek",
+            "api_key_field": "deepseek_api_key",
+            "api_base_key": "deepseek_api_base",
+            "api_base_default": "https://api.deepseek.com/v1",
+            "api_base_placeholder": _PLACEHOLDER_V1,
+            "models": [const.DEEPSEEK_V4_FLASH, const.DEEPSEEK_V4_PRO, const.DEEPSEEK_CHAT, const.DEEPSEEK_REASONER],
+        }),
        ("minimax", {
            "label": "MiniMax",
            "api_key_field": "minimax_api_key",
            "api_base_key": None,
            "api_base_default": None,
+            "api_base_placeholder": "",
            "models": [const.MINIMAX_M2_7, const.MINIMAX_M2_7_HIGHSPEED, const.MINIMAX_M2_5, const.MINIMAX_M2_1, const.MINIMAX_M2_1_LIGHTNING],
        }),
-        ("zhipu", {
-            "label": "智谱AI",
-            "api_key_field": "zhipu_ai_api_key",
-            "api_base_key": "zhipu_ai_api_base",
-            "api_base_default": "https://open.bigmodel.cn/api/paas/v4",
-            "models": [const.GLM_5_TURBO, const.GLM_5, const.GLM_4_7],
-        }),
-        ("dashscope", {
-            "label": "通义千问",
-            "api_key_field": "dashscope_api_key",
-            "api_base_key": None,
-            "api_base_default": None,
-            "models": [const.QWEN36_PLUS, const.QWEN35_PLUS, const.QWEN3_MAX],
-        }),
-        ("moonshot", {
-            "label": "Kimi",
-            "api_key_field": "moonshot_api_key",
-            "api_base_key": "moonshot_base_url",
-            "api_base_default": "https://api.moonshot.cn/v1",
-            "models": [const.KIMI_K2_5, const.KIMI_K2],
-        }),
-        ("doubao", {
-            "label": "豆包",
-            "api_key_field": "ark_api_key",
-            "api_base_key": "ark_base_url",
-            "api_base_default": "https://ark.cn-beijing.volces.com/api/v3",
-            "models": [const.DOUBAO_SEED_2_PRO, const.DOUBAO_SEED_2_CODE],
-        }),
        ("claudeAPI", {
            "label": "Claude",
            "api_key_field": "claude_api_key",
            "api_base_key": "claude_api_base",
            "api_base_default": "https://api.anthropic.com/v1",
-            "models": [const.CLAUDE_4_6_SONNET, const.CLAUDE_4_6_OPUS, const.CLAUDE_4_5_SONNET],
+            "api_base_placeholder": _PLACEHOLDER_V1,
+            "models": [const.CLAUDE_4_6_SONNET, const.CLAUDE_4_7_OPUS, const.CLAUDE_4_6_OPUS, const.CLAUDE_4_5_SONNET],
        }),
        ("gemini", {
            "label": "Gemini",
            "api_key_field": "gemini_api_key",
            "api_base_key": "gemini_api_base",
            "api_base_default": "https://generativelanguage.googleapis.com",
+            "api_base_placeholder": _PLACEHOLDER_GEMINI,
            "models": [const.GEMINI_31_FLASH_LITE_PRE, const.GEMINI_31_PRO_PRE, const.GEMINI_3_FLASH_PRE],
        }),
        ("openai", {
@@ -808,20 +829,47 @@ class ConfigHandler:
            "api_key_field": "open_ai_api_key",
            "api_base_key": "open_ai_api_base",
            "api_base_default": "https://api.openai.com/v1",
+            "api_base_placeholder": _PLACEHOLDER_V1,
            "models": [const.GPT_54, const.GPT_54_MINI, const.GPT_54_NANO, const.GPT_5, const.GPT_41, const.GPT_4o],
        }),
-        ("deepseek", {
-            "label": "DeepSeek",
-            "api_key_field": "deepseek_api_key",
-            "api_base_key": "deepseek_api_base",
-            "api_base_default": "https://api.deepseek.com/v1",
-            "models": [const.DEEPSEEK_CHAT, const.DEEPSEEK_REASONER],
+        ("zhipu", {
+            "label": "智谱AI",
+            "api_key_field": "zhipu_ai_api_key",
+            "api_base_key": "zhipu_ai_api_base",
+            "api_base_default": "https://open.bigmodel.cn/api/paas/v4",
+            "api_base_placeholder": _PLACEHOLDER_ZHIPU,
+            "models": [const.GLM_5_1, const.GLM_5_TURBO, const.GLM_5, const.GLM_4_7],
+        }),
+        ("dashscope", {
+            "label": "通义千问",
+            "api_key_field": "dashscope_api_key",
+            "api_base_key": None,
+            "api_base_default": None,
+            "api_base_placeholder": "",
+            "models": [const.QWEN36_PLUS, const.QWEN35_PLUS, const.QWEN3_MAX],
+        }),
+        ("doubao", {
+            "label": "豆包",
+            "api_key_field": "ark_api_key",
+            "api_base_key": "ark_base_url",
+            "api_base_default": "https://ark.cn-beijing.volces.com/api/v3",
+            "api_base_placeholder": _PLACEHOLDER_DOUBAO,
+            "models": [const.DOUBAO_SEED_2_PRO, const.DOUBAO_SEED_2_CODE],
+        }),
+        ("moonshot", {
+            "label": "Kimi",
+            "api_key_field": "moonshot_api_key",
+            "api_base_key": "moonshot_base_url",
+            "api_base_default": "https://api.moonshot.cn/v1",
+            "api_base_placeholder": _PLACEHOLDER_V1,
+            "models": [const.KIMI_K2_6, const.KIMI_K2_5, const.KIMI_K2],
        }),
        ("modelscope", {
            "label": "ModelScope",
            "api_key_field": "modelscope_api_key",
            "api_base_key": None,
            "api_base_default": None,
+            "api_base_placeholder": "",
            "models": [const.QWEN3_5_27B, const.QWEN3_235B_A22B_INSTRUCT_2507],
        }),
        ("linkai", {
@@ -829,17 +877,26 @@ class ConfigHandler:
            "api_key_field": "linkai_api_key",
            "api_base_key": None,
            "api_base_default": None,
+            "api_base_placeholder": "",
            "models": _RECOMMENDED_MODELS,
        }),
+        ("custom", {
+            "label": "自定义",
+            "api_key_field": "custom_api_key",
+            "api_base_key": "custom_api_base",
+            "api_base_default": "",
+            "api_base_placeholder": _PLACEHOLDER_V1,
+            "models": [],
+        }),
    ])

    EDITABLE_KEYS = {
        "model", "bot_type", "use_linkai",
        "open_ai_api_base", "deepseek_api_base", "claude_api_base", "gemini_api_base",
-        "zhipu_ai_api_base", "moonshot_base_url", "ark_base_url",
+        "zhipu_ai_api_base", "moonshot_base_url", "ark_base_url", "custom_api_base",
        "open_ai_api_key", "deepseek_api_key", "claude_api_key", "gemini_api_key",
        "zhipu_ai_api_key", "dashscope_api_key", "moonshot_api_key",
-        "ark_api_key", "minimax_api_key", "linkai_api_key",
+        "ark_api_key", "minimax_api_key", "linkai_api_key", "custom_api_key",
        "agent_max_context_tokens", "agent_max_context_turns", "agent_max_steps",
        "enable_thinking", "web_password",
    }
@@ -877,6 +934,7 @@ class ConfigHandler:
                    "models": p["models"],
                    "api_base_key": p["api_base_key"],
                    "api_base_default": p["api_base_default"],
+                    "api_base_placeholder": p.get("api_base_placeholder", ""),
                    "api_key_field": p.get("api_key_field"),
                }

@@ -894,7 +952,7 @@ class ConfigHandler:
                "agent_max_context_tokens": local_config.get("agent_max_context_tokens", 50000),
                "agent_max_context_turns": local_config.get("agent_max_context_turns", 20),
                "agent_max_steps": local_config.get("agent_max_steps", 20),
-                "enable_thinking": bool(local_config.get("enable_thinking", True)),
+                "enable_thinking": bool(local_config.get("enable_thinking", False)),
                "api_bases": api_bases,
                "api_keys": api_keys_masked,
                "providers": providers,
@@ -940,6 +998,19 @@ class ConfigHandler:
                json.dump(file_cfg, f, indent=4, ensure_ascii=False)

            logger.info(f"[WebChannel] Config updated: {list(applied.keys())}")
+
+            # Reset Bridge so that bot routing reflects the new config.
+            # Without this, Bridge keeps its cached bot instance (e.g. LinkAIBot)
+            # even after the user switches bot_type / use_linkai / model in UI.
+            bridge_routing_keys = {"bot_type", "use_linkai", "model"}
+            if any(k in applied for k in bridge_routing_keys):
+                try:
+                    from bridge.bridge import Bridge
+                    Bridge().reset_bot()
+                    logger.info("[WebChannel] Bridge bot routing reset due to config change")
+                except Exception as reset_err:
+                    logger.warning(f"[WebChannel] Failed to reset bridge: {reset_err}")
+
            return json.dumps({"status": "success", "applied": applied}, ensure_ascii=False)
        except Exception as e:
            logger.error(f"Error updating config: {e}")
--- a/cli/VERSION
+++ b/cli/VERSION
@@ -1 +1 @@
-2.0.6
+2.0.7
--- a/cli/commands/skill.py
+++ b/cli/commands/skill.py
@@ -644,32 +644,52 @@ def _list_local():
    skills_dir = get_skills_dir()
    builtin_dir = get_builtin_skills_dir()

+    # Merge builtin skills that are on disk but missing from config
+    _merge_builtin_into_config(config, builtin_dir, skills_dir)
+
    if not config:
-        # Fallback: scan directories directly
-        entries = []
-        for d in [builtin_dir, skills_dir]:
-            if not os.path.isdir(d):
-                continue
-            source = "builtin" if d == builtin_dir else "custom"
-            for name in sorted(os.listdir(d)):
-                skill_path = os.path.join(d, name)
-                if os.path.isdir(skill_path) and not name.startswith("."):
-                    has_skill_md = os.path.exists(os.path.join(skill_path, "SKILL.md"))
-                    if has_skill_md:
-                        entries.append({"name": name, "source": source, "enabled": True, "description": ""})
-        if not entries:
-            click.echo("No skills installed.")
-            return
-        _print_skill_table(entries)
+        click.echo("No skills installed.")
        return

    entries = sorted(config.values(), key=lambda x: x.get("name", ""))
-    if not entries:
-        click.echo("No skills installed.")
-        return
    _print_skill_table(entries)


+def _merge_builtin_into_config(config: dict, builtin_dir: str, skills_dir: str):
+    """Scan builtin and custom dirs, add any new skills into config dict."""
+    dirty = False
+    for d, source in [(builtin_dir, "builtin"), (skills_dir, "custom")]:
+        if not os.path.isdir(d):
+            continue
+        for name in os.listdir(d):
+            if name.startswith(".") or name in ("skills_config.json",):
+                continue
+            skill_path = os.path.join(d, name)
+            if not os.path.isdir(skill_path):
+                continue
+            if not os.path.isfile(os.path.join(skill_path, "SKILL.md")):
+                continue
+            if name in config:
+                continue
+            desc = _read_skill_description(skill_path)
+            config[name] = {
+                "name": name,
+                "description": desc,
+                "source": source,
+                "enabled": True,
+                "category": "skill",
+            }
+            dirty = True
+    if dirty:
+        config_path = os.path.join(skills_dir, "skills_config.json")
+        try:
+            os.makedirs(skills_dir, exist_ok=True)
+            with open(config_path, "w", encoding="utf-8") as f:
+                json.dump(config, f, indent=4, ensure_ascii=False)
+        except Exception:
+            pass
+
+
 def _print_skill_table(entries):
    """Print skills as a formatted table."""
    def _display_label(e):
--- a/common/cloud_client.py
+++ b/common/cloud_client.py
@@ -56,6 +56,7 @@ class CloudClient(LinkAIClient):
        self._memory_service = None
        self._knowledge_service = None
        self._chat_service = None
+        self._session_service = None

    @property
    def skill_service(self):
@@ -118,6 +119,18 @@ class CloudClient(LinkAIClient):
                logger.error(f"[CloudClient] Failed to init ChatService: {e}")
        return self._chat_service

+    @property
+    def session_service(self):
+        """Lazy-init SessionService."""
+        if self._session_service is None:
+            try:
+                from agent.chat.session_service import SessionService
+                self._session_service = SessionService()
+                logger.debug("[CloudClient] SessionService initialised")
+            except Exception as e:
+                logger.error(f"[CloudClient] Failed to init SessionService: {e}")
+        return self._session_service
+
    # ------------------------------------------------------------------
    # message push callback
    # ------------------------------------------------------------------
@@ -546,12 +559,23 @@ class CloudClient(LinkAIClient):
    # ------------------------------------------------------------------
    # history callback
    # ------------------------------------------------------------------
+    # Session-related actions handled via the HISTORY channel
+    _SESSION_ACTIONS = {
+        "list_sessions", "delete_session", "rename_session",
+        "clear_context", "generate_title",
+    }
+
    def on_history(self, data: dict) -> dict:
        """
        Handle HISTORY messages from the cloud console.
-        Returns paginated conversation history for a session.

-        :param data: message data with 'action' and 'payload' (session_id, page, page_size)
+        Supports both history query and session management actions
+        through a unified HISTORY message channel:
+          - query: paginated conversation history
+          - list_sessions / delete_session / rename_session /
+            clear_context / generate_title: session lifecycle
+
+        :param data: message data with 'action' and 'payload'
        :return: response dict
        """
        action = data.get("action", "query")
@@ -561,8 +585,19 @@ class CloudClient(LinkAIClient):
        if action == "query":
            return self._query_history(payload)

+        if action in self._SESSION_ACTIONS:
+            return self._dispatch_session(action, payload)
+
        return {"action": action, "code": 404, "message": f"unknown action: {action}", "payload": None}

+    def _dispatch_session(self, action: str, payload: dict) -> dict:
+        """Delegate session actions to SessionService."""
+        svc = self.session_service
+        if svc is None:
+            return {"action": action, "code": 500,
+                    "message": "SessionService not available", "payload": None}
+        return svc.dispatch(action, payload)
+
    def _query_history(self, payload: dict) -> dict:
        """Query paginated conversation history using ConversationStore."""
        session_id = payload.get("session_id", "")
--- a/common/const.py
+++ b/common/const.py
@@ -14,6 +14,7 @@ ZHIPU_AI = "zhipu"
 MOONSHOT = "moonshot"
 MiniMax = "minimax"
 DEEPSEEK = "deepseek"
+CUSTOM = "custom"  # custom OpenAI-compatible API, bot_type won't auto-switch on model change
 MODELSCOPE = "modelscope"

 # 模型列表
@@ -27,6 +28,7 @@ CLAUDE_35_SONNET = "claude-3-5-sonnet-latest"  # 带 latest 标签的模型名
 CLAUDE_35_SONNET_1022 = "claude-3-5-sonnet-20241022"  # 带具体日期的模型名称，会固定为该日期发布的模型
 CLAUDE_35_SONNET_0620 = "claude-3-5-sonnet-20240620"
 CLAUDE_4_OPUS = "claude-opus-4-0"
+CLAUDE_4_7_OPUS = "claude-opus-4-7"      # Claude Opus 4.7
 CLAUDE_4_6_OPUS = "claude-opus-4-6"      # Claude Opus 4.6 - Agent推荐模型
 CLAUDE_4_SONNET = "claude-sonnet-4-0"    # Claude Sonnet 4.0
 CLAUDE_4_5_SONNET = "claude-sonnet-4-5"  # Claude Sonnet 4.5 - Agent推荐模型
@@ -80,6 +82,8 @@ TTS_1_HD = "tts-1-hd"
 # DeepSeek
 DEEPSEEK_CHAT = "deepseek-chat"  # DeepSeek-V3对话模型
 DEEPSEEK_REASONER = "deepseek-reasoner"  # DeepSeek-R1模型
+DEEPSEEK_V4_FLASH = "deepseek-v4-flash"  # DeepSeek V4 Flash - 默认推荐 (思考模式 + 工具调用)
+DEEPSEEK_V4_PRO = "deepseek-v4-pro"  # DeepSeek V4 Pro - 复杂任务更强 (思考模式 + 工具调用)

 # Qwen (通义千问 - 阿里云 DashScope)
 QWEN_TURBO = "qwen-turbo"
@@ -101,7 +105,8 @@ MINIMAX_M2 = "MiniMax-M2"  # MiniMax M2
 MINIMAX_ABAB6_5 = "abab6.5-chat"  # MiniMax abab6.5

 # GLM (智谱AI)
-GLM_5_TURBO = "glm-5-turbo"  # 智谱 GLM-5-Turbo - Latest
+GLM_5_1 = "glm-5.1"  # 智谱 GLM-5.1 - Agent recommended model (default)
+GLM_5_TURBO = "glm-5-turbo"  # 智谱 GLM-5-Turbo
 GLM_5 = "glm-5"  # 智谱 GLM-5
 GLM_4 = "glm-4"
 GLM_4_PLUS = "glm-4-plus"
@@ -117,6 +122,7 @@ GLM_4_7 = "glm-4.7"  # 智谱 GLM-4.7 - Agent推荐模型
 MOONSHOT = "moonshot"
 KIMI_K2 = "kimi-k2"
 KIMI_K2_5 = "kimi-k2.5"
+KIMI_K2_6 = "kimi-k2.6"  # Kimi K2.6 - Agent recommended model (default)

 # Doubao (Volcengine Ark)
 DOUBAO = "doubao"
@@ -150,15 +156,21 @@ MODELSCOPE_MODEL_LIST = ["deepseek-ai/DeepSeek-R1-0528", "deepseek-ai/DeepSeek-R


 MODEL_LIST = [
+              # DeepSeek
+              DEEPSEEK_V4_FLASH, DEEPSEEK_V4_PRO, DEEPSEEK_CHAT, DEEPSEEK_REASONER,
+
+              # MiniMax
+              MiniMax, MINIMAX_M2_7, MINIMAX_M2_7_HIGHSPEED, MINIMAX_M2_5, MINIMAX_M2_1, MINIMAX_M2_1_LIGHTNING, MINIMAX_M2, MINIMAX_ABAB6_5,
+
              # Claude
-              CLAUDE3, CLAUDE_4_6_SONNET, CLAUDE_4_6_OPUS, CLAUDE_4_OPUS, CLAUDE_4_5_SONNET, CLAUDE_4_SONNET, CLAUDE_3_OPUS, CLAUDE_3_OPUS_0229, 
-              CLAUDE_35_SONNET, CLAUDE_35_SONNET_1022, CLAUDE_35_SONNET_0620, CLAUDE_3_SONNET, CLAUDE_3_HAIKU, 
+              CLAUDE3, CLAUDE_4_6_SONNET, CLAUDE_4_7_OPUS, CLAUDE_4_6_OPUS, CLAUDE_4_OPUS, CLAUDE_4_5_SONNET, CLAUDE_4_SONNET, CLAUDE_3_OPUS, CLAUDE_3_OPUS_0229,
+              CLAUDE_35_SONNET, CLAUDE_35_SONNET_1022, CLAUDE_35_SONNET_0620, CLAUDE_3_SONNET, CLAUDE_3_HAIKU,
              "claude", "claude-3-haiku", "claude-3-sonnet", "claude-3-opus", "claude-3.5-sonnet",
-              
+
              # Gemini
              GEMINI_31_FLASH_LITE_PRE, GEMINI_31_PRO_PRE, GEMINI_3_PRO_PRE, GEMINI_3_FLASH_PRE, GEMINI_25_PRO_PRE, GEMINI_25_FLASH_PRE,
              GEMINI_20_FLASH, GEMINI_20_flash_exp, GEMINI_15_PRO, GEMINI_15_flash, GEMINI_PRO, GEMINI,
-              
+
              # OpenAI
              GPT35, GPT35_0125, GPT35_1106, "gpt-3.5-turbo-16k",
              GPT4, GPT4_06_13, GPT4_32k, GPT4_32k_06_13,
@@ -168,31 +180,29 @@ MODEL_LIST = [
              GPT_5, GPT_5_MINI, GPT_5_NANO,
              GPT_54, GPT_54_MINI, GPT_54_NANO,
              O1, O1_MINI,
-              
-              # DeepSeek
-              DEEPSEEK_CHAT, DEEPSEEK_REASONER,
-              
-              # Qwen
-              QWEN36_PLUS, QWEN35_PLUS, QWEN3_MAX, QWEN_MAX, QWEN_PLUS, QWEN_TURBO, QWEN_LONG,
-              
-              # MiniMax
-              MiniMax, MINIMAX_M2_7, MINIMAX_M2_7_HIGHSPEED, MINIMAX_M2_5, MINIMAX_M2_1, MINIMAX_M2_1_LIGHTNING, MINIMAX_M2, MINIMAX_ABAB6_5,

-              # GLM
-              ZHIPU_AI, GLM_5_TURBO, GLM_5, GLM_4, GLM_4_PLUS, GLM_4_flash, GLM_4_LONG, GLM_4_ALLTOOLS,
+              # GLM (智谱AI)
+              ZHIPU_AI, GLM_5_1, GLM_5_TURBO, GLM_5, GLM_4, GLM_4_PLUS, GLM_4_flash, GLM_4_LONG, GLM_4_ALLTOOLS,
              GLM_4_0520, GLM_4_AIR, GLM_4_AIRX, GLM_4_7,

-              # Kimi
-              MOONSHOT, "moonshot-v1-8k", "moonshot-v1-32k", "moonshot-v1-128k",
-              KIMI_K2, KIMI_K2_5,
+              # Qwen (通义千问)
+              QWEN36_PLUS, QWEN35_PLUS, QWEN3_MAX, QWEN_MAX, QWEN_PLUS, QWEN_TURBO, QWEN_LONG,

-              # Doubao
+              # Doubao (豆包)
              DOUBAO, DOUBAO_SEED_2_CODE, DOUBAO_SEED_2_PRO, DOUBAO_SEED_2_LITE, DOUBAO_SEED_2_MINI,

+              # Kimi (Moonshot)
+              MOONSHOT, "moonshot-v1-8k", "moonshot-v1-32k", "moonshot-v1-128k",
+              KIMI_K2_6, KIMI_K2_5, KIMI_K2,
+
+              # ModelScope
+              MODELSCOPE,
+
+              # LinkAI
+              LINKAI_35, LINKAI_4_TURBO, LINKAI_4o,
+
              # 其他模型
              WEN_XIN, WEN_XIN_4, XUNFEI,
-              LINKAI_35, LINKAI_4_TURBO, LINKAI_4o,
-              MODELSCOPE
            ]

 MODEL_LIST = MODEL_LIST + GITEE_AI_MODEL_LIST + MODELSCOPE_MODEL_LIST
--- a/config-template.json
+++ b/config-template.json
@@ -1,6 +1,8 @@
 {
  "channel_type": "weixin",
-  "model": "MiniMax-M2.7",
+  "model": "deepseek-v4-flash",
+  "deepseek_api_key": "",
+  "deepseek_api_base": "https://api.deepseek.com/v1",
  "minimax_api_key": "",
  "zhipu_ai_api_key": "",
  "ark_api_key": "",
@@ -31,5 +33,6 @@
  "agent_max_context_tokens": 50000,
  "agent_max_context_turns": 20,
  "agent_max_steps": 20,
+  "enable_thinking": false,
  "knowledge": true
 }
--- a/config.py
+++ b/config.py
@@ -17,10 +17,12 @@ available_setting = {
    "open_ai_api_base": "https://api.openai.com/v1",
    "claude_api_base": "https://api.anthropic.com/v1",  # claude api base
    "gemini_api_base": "https://generativelanguage.googleapis.com",  # gemini api base
+    "custom_api_key": "",  # custom OpenAI-compatible provider api key (used when bot_type is "custom")
+    "custom_api_base": "",  # custom OpenAI-compatible provider api base (used when bot_type is "custom")
    "proxy": "",  # openai使用的代理
    # chatgpt模型， 当use_azure_chatgpt为true时，其名称为Azure上model deployment名称
    "model": "gpt-3.5-turbo",  # 可选择: gpt-4o, pt-4o-mini, gpt-4-turbo, claude-3-sonnet, wenxin, moonshot, qwen-turbo, xunfei, glm-4, minimax, gemini等模型，全部可选模型详见common/const.py文件
-    "bot_type": "",  # 可选配置，使用兼容openai格式的三方服务时候，需填"openai"（历史值"chatGPT"仍兼容）。bot具体名称详见common/const.py文件，如不填根据model名称判断
+    "bot_type": "",  # 可选配置，使用兼容openai格式的三方服务时候，需填"openai"或"custom"（custom模式下切换模型不会自动切换bot_type）。bot具体名称详见common/const.py文件，如不填根据model名称判断
    "use_azure_chatgpt": False,  # 是否使用azure的chatgpt
    "azure_deployment_id": "",  # azure 模型部署名称
    "azure_api_version": "",  # azure api版本
@@ -194,6 +196,8 @@ available_setting = {
    "minimax_api_key": "",
    "Minimax_group_id": "",
    "Minimax_base_url": "",
+    "deepseek_api_key": "",
+    "deepseek_api_base": "https://api.deepseek.com/v1",
    "web_port": 9899,
    "web_password": "",  # Web console password; empty means no authentication required
    "web_session_expire_days": 30,  # Auth session expiry in days
@@ -202,8 +206,12 @@ available_setting = {
    "agent_max_context_tokens": 50000,  # Agent模式下最大上下文tokens
    "agent_max_context_turns": 20,  # Agent模式下最大上下文记忆轮次
    "agent_max_steps": 20,  # Agent模式下单次运行最大决策步数
-    "enable_thinking": True,  # Whether to enable deep thinking for web channel
+    "enable_thinking": False,  # Enable deep-thinking mode for thinking-capable models
    "knowledge": True,  # 是否开启知识库功能
+    # Per-skill runtime config. Nested keys are flattened to env vars at startup
+    # using the rule: skill[<name>][<key>] -> SKILL_<NAME>_<KEY>
+    # (e.g. skill["image-generation"].model -> SKILL_IMAGE_GENERATION_MODEL).
+    "skill": {},
 }


@@ -376,12 +384,16 @@ def load_config():
        "gemini_api_base": "GEMINI_API_BASE",
        "minimax_api_key": "MINIMAX_API_KEY",
        "minimax_api_base": "MINIMAX_API_BASE",
+        "deepseek_api_key": "DEEPSEEK_API_KEY",
+        "deepseek_api_base": "DEEPSEEK_API_BASE",
        "zhipu_ai_api_key": "ZHIPU_AI_API_KEY",
        "zhipu_ai_api_base": "ZHIPU_AI_API_BASE",
        "moonshot_api_key": "MOONSHOT_API_KEY",
        "moonshot_api_base": "MOONSHOT_API_BASE",
        "ark_api_key": "ARK_API_KEY",
        "ark_api_base": "ARK_API_BASE",
+        "dashscope_api_key": "DASHSCOPE_API_KEY",
+        "dashscope_api_base": "DASHSCOPE_API_BASE",
        # Channel credentials (used by skills that check env vars)
        "feishu_app_id": "FEISHU_APP_ID",
        "feishu_app_secret": "FEISHU_APP_SECRET",
@@ -402,12 +414,45 @@ def load_config():
            if val:
                os.environ[env_key] = str(val)
                injected += 1
+
+    injected += _sync_skill_config_to_env(config.get("skill", {}))
+
    if injected:
        logger.info("[INIT] Synced {} config values to environment variables".format(injected))

    config.load_user_datas()


+def _sync_skill_config_to_env(skill_section) -> int:
+    """Flatten skill-namespaced config into environment variables.
+
+    Mapping rule: ``config["skill"][<name>][<key>]`` -> ``SKILL_<NAME>_<KEY>``
+    (e.g. ``skill["image-generation"].model`` -> ``SKILL_IMAGE_GENERATION_MODEL``).
+
+    This lets subprocess-based skill scripts read their own settings without
+    importing project code. Existing env vars are NOT overwritten so the
+    real environment always wins.
+
+    Returns the number of variables actually injected.
+    """
+    if not isinstance(skill_section, dict):
+        return 0
+    injected = 0
+    for skill_name, skill_conf in skill_section.items():
+        if not isinstance(skill_conf, dict):
+            continue
+        name_part = str(skill_name).replace("-", "_").upper()
+        for key, val in skill_conf.items():
+            if val is None or val == "":
+                continue
+            env_key = "SKILL_{}_{}".format(name_part, str(key).upper())
+            if env_key in os.environ:
+                continue
+            os.environ[env_key] = str(val)
+            injected += 1
+    return injected
+
+
 def get_root():
    return os.path.dirname(os.path.abspath(__file__))

--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -9,7 +9,9 @@ services:
      - "9899:9899"
    environment:
      CHANNEL_TYPE: 'weixin'
-      MODEL: 'MiniMax-M2.7'
+      MODEL: 'deepseek-v4-flash'
+      DEEPSEEK_API_KEY: ''
+      DEEPSEEK_API_BASE: 'https://api.deepseek.com/v1'
      MINIMAX_API_KEY: ''
      ZHIPU_AI_API_KEY: ''
      ARK_API_KEY: ''
--- a/docs/channels/web.mdx
+++ b/docs/channels/web.mdx
@@ -12,7 +12,7 @@ Web 控制台是 CowAgent 的默认通道，启动后会自动运行，通过浏
  "channel_type": "web",
  "web_port": 9899,
  "web_password": "",
-  "enable_thinking": true
+  "enable_thinking": false
 }
 ```

@@ -22,7 +22,7 @@ Web 控制台是 CowAgent 的默认通道，启动后会自动运行，通过浏
 | `web_port` | Web 服务监听端口 | `9899` |
 | `web_password` | 访问密码，留空表示不启用密码保护 | `""` |
 | `web_session_expire_days` | 登录会话有效天数 | `30` |
-| `enable_thinking` | 是否启用深度思考，开启后 Web 端展示推理过程，关闭可加速响应 | `true` |
+| `enable_thinking` | 是否启用深度思考模式 | `false` |

 配置密码后，访问控制台时需先输入密码完成登录。登录状态默认保持 30 天，期间重启服务也无需重新登录。密码也支持在控制台的「配置」页面中在线修改。

--- a/docs/cli/general.mdx
+++ b/docs/cli/general.mdx
@@ -58,17 +58,18 @@ Session: 12 messages | 8 skills loaded
 **修改配置项：**

 ```text
-/config model deepseek-chat
+/config model deepseek-v4-flash
 ```

 **支持修改的配置项：**

 | 配置项 | 说明 | 示例值 |
 | --- | --- | --- |
-| `model` | AI 模型名称 | `deepseek-chat` |
+| `model` | AI 模型名称 | `deepseek-v4-flash` |
 | `agent_max_context_tokens` | 最大上下文 tokens | `40000` |
 | `agent_max_context_turns` | 最大上下文记忆轮次 | `30` |
 | `agent_max_steps` | 单次任务最大决策步数 | `15` |
+| `enable_thinking` | 是否启用深度思考模式 | `true` / `false` |

 <Note>
  修改 `model` 时，系统会自动匹配对应的模型调用方式。配置会写入 `config.json` 并持久保存。
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -72,17 +72,18 @@
                "group": "模型配置",
                "pages": [
                  "models/index",
+                  "models/deepseek",
                  "models/minimax",
-                  "models/glm",
-                  "models/qwen",
-                  "models/kimi",
-                  "models/doubao",
                  "models/claude",
                  "models/gemini",
                  "models/openai",
-                  "models/deepseek",
+                  "models/glm",
+                  "models/qwen",
+                  "models/doubao",
+                  "models/kimi",
                  "models/linkai",
-                  "models/coding-plan"
+                  "models/coding-plan",
+                  "models/custom"
                ]
              }
            ]
@@ -132,6 +133,14 @@
                  "skills/create",
                  "skills/hub"
                ]
+              },
+              {
+                "group": "内置技能",
+                "pages": [
+                  "skills/skill-creator",
+                  "skills/knowledge-wiki",
+                  "skills/image-generation"
+                ]
              }
            ]
          },
@@ -199,6 +208,7 @@
                "group": "发布记录",
                "pages": [
                  "releases/overview",
+                  "releases/v2.0.7",
                  "releases/v2.0.6",
                  "releases/v2.0.5",
                  "releases/v2.0.4",
@@ -247,17 +257,18 @@
                "group": "Model Configuration",
                "pages": [
                  "en/models/index",
+                  "en/models/deepseek",
                  "en/models/minimax",
-                  "en/models/glm",
-                  "en/models/qwen",
-                  "en/models/kimi",
-                  "en/models/doubao",
                  "en/models/claude",
                  "en/models/gemini",
                  "en/models/openai",
-                  "en/models/deepseek",
+                  "en/models/glm",
+                  "en/models/qwen",
+                  "en/models/doubao",
+                  "en/models/kimi",
                  "en/models/linkai",
-                  "en/models/coding-plan"
+                  "en/models/coding-plan",
+                  "en/models/custom"
                ]
              }
            ]
@@ -304,9 +315,16 @@
                "pages": [
                  "en/skills/index",
                  "en/skills/install",
-                  "en/skills/skill-creator",
                  "en/skills/hub"
                ]
+              },
+              {
+                "group": "Built-in Skills",
+                "pages": [
+                  "en/skills/skill-creator",
+                  "en/skills/knowledge-wiki",
+                  "en/skills/image-generation"
+                ]
              }
            ]
          },
@@ -374,6 +392,7 @@
                "group": "Release Notes",
                "pages": [
                  "en/releases/overview",
+                  "en/releases/v2.0.7",
                  "en/releases/v2.0.6",
                  "en/releases/v2.0.5",
                  "en/releases/v2.0.4",
@@ -422,17 +441,18 @@
                "group": "モデル設定",
                "pages": [
                  "ja/models/index",
+                  "ja/models/deepseek",
                  "ja/models/minimax",
-                  "ja/models/glm",
-                  "ja/models/qwen",
-                  "ja/models/kimi",
-                  "ja/models/doubao",
                  "ja/models/claude",
                  "ja/models/gemini",
                  "ja/models/openai",
-                  "ja/models/deepseek",
+                  "ja/models/glm",
+                  "ja/models/qwen",
+                  "ja/models/doubao",
+                  "ja/models/kimi",
                  "ja/models/linkai",
-                  "ja/models/coding-plan"
+                  "ja/models/coding-plan",
+                  "ja/models/custom"
                ]
              }
            ]
@@ -482,6 +502,14 @@
                  "ja/skills/create",
                  "ja/skills/hub"
                ]
+              },
+              {
+                "group": "内蔵スキル",
+                "pages": [
+                  "ja/skills/skill-creator",
+                  "ja/skills/knowledge-wiki",
+                  "ja/skills/image-generation"
+                ]
              }
            ]
          },
@@ -549,6 +577,7 @@
                "group": "リリースノート",
                "pages": [
                  "ja/releases/overview",
+                  "ja/releases/v2.0.7",
                  "ja/releases/v2.0.6",
                  "ja/releases/v2.0.5",
                  "ja/releases/v2.0.4",
--- a/docs/en/README.md
+++ b/docs/en/README.md
@@ -28,7 +28,7 @@
 - ✅ **Tool System**: Built-in tools for file I/O, terminal execution, browser automation, scheduled tasks, messaging, and more — autonomously invoked by the Agent.
 - ✅ **CLI System**: Provides terminal commands and in-chat commands for process management, skill installation, configuration, and more.
 - ✅ **Multimodal Messages**: Supports parsing, processing, generating, and sending text, images, voice, files, and other message types.
- ✅ **Multiple Model Support**: Supports OpenAI, Claude, Gemini, DeepSeek, MiniMax, GLM, Qwen, Kimi, Doubao, and other mainstream model providers.
+- ✅ **Multiple Model Support**: Supports DeepSeek, MiniMax, Claude, Gemini, OpenAI, GLM, Qwen, Doubao, Kimi, and other mainstream model providers.
 - ✅ **Multi-platform Deployment**: Runs on local computers or servers, integrable into WeChat, Web, Feishu, DingTalk, WeChat Official Account, and WeCom applications.

 ## Disclaimer
@@ -164,15 +164,15 @@ Supports mainstream model providers. Recommended models for Agent mode:

 | Provider | Recommended Model |
 | --- | --- |
+| DeepSeek | `deepseek-v4-flash` |
 | MiniMax | `MiniMax-M2.7` |
-| GLM | `glm-5-turbo` |
-| Kimi | `kimi-k2.5` |
-| Doubao | `doubao-seed-2-0-code-preview-260215` |
-| Qwen | `qwen3.6-plus` |
 | Claude | `claude-sonnet-4-6` |
 | Gemini | `gemini-3.1-pro-preview` |
 | OpenAI | `gpt-5.4` |
-| DeepSeek | `deepseek-chat` |
+| GLM | `glm-5.1` |
+| Qwen | `qwen3.6-plus` |
+| Doubao | `doubao-seed-2-0-code-preview-260215` |
+| Kimi | `kimi-k2.6` |

 For detailed configuration of each model, see the [Models documentation](https://docs.cowagent.ai/en/models/index).

--- a/docs/en/cli/general.mdx
+++ b/docs/en/cli/general.mdx
@@ -44,17 +44,18 @@ View or modify runtime configuration. Changes take effect immediately without re
 **Modify a config item:**

 ```text
-/config model deepseek-chat
+/config model deepseek-v4-flash
 ```

 **Configurable items:**

 | Item | Description | Example |
 | --- | --- | --- |
-| `model` | AI model name | `deepseek-chat` |
+| `model` | AI model name | `deepseek-v4-flash` |
 | `agent_max_context_tokens` | Max context tokens | `40000` |
 | `agent_max_context_turns` | Max context memory turns | `30` |
 | `agent_max_steps` | Max decision steps per task | `15` |
+| `enable_thinking` | Enable deep thinking mode | `true` / `false` |

 <Note>
  When changing `model`, the system automatically matches the corresponding model API. Configuration is persisted to `config.json`.
--- a/docs/en/guide/manual-install.mdx
+++ b/docs/en/guide/manual-install.mdx
@@ -121,7 +121,8 @@ sudo docker logs -f chatgpt-on-wechat
 ```json
 {
  "channel_type": "web",
-  "model": "MiniMax-M2.5",
+  "model": "deepseek-v4-flash",
+  "deepseek_api_key": "",
  "agent": true,
  "agent_workspace": "~/cow",
  "agent_max_context_tokens": 40000,
@@ -133,7 +134,7 @@ sudo docker logs -f chatgpt-on-wechat
 | Parameter | Description | Default |
 | --- | --- | --- |
 | `channel_type` | Channel type | `web` |
-| `model` | Model name | `MiniMax-M2.5` |
+| `model` | Model name | `deepseek-v4-flash` |
 | `agent` | Enable Agent mode | `true` |
 | `agent_workspace` | Agent workspace path | `~/cow` |
 | `agent_max_context_tokens` | Max context tokens | `40000` |
--- a/docs/en/intro/architecture.mdx
+++ b/docs/en/intro/architecture.mdx
@@ -9,7 +9,7 @@ CowAgent 2.0 has evolved from a simple chatbot into a super intelligent assistan

 CowAgent's architecture consists of the following core modules:

-<img src="https://cdn.link-ai.tech/doc/68ef7b212c6f791e0e74314b912149f9-sz_5847990.png" alt="CowAgent Architecture" />
+<img src="https://cdn.link-ai.tech/doc/cow-agent-arch-en.jpg.jpg" alt="CowAgent Architecture" />

 | Module | Description |
 | --- | --- |
--- a/docs/en/memory/index.mdx
+++ b/docs/en/memory/index.mdx
@@ -5,6 +5,8 @@ description: CowAgent long-term memory system — file persistence, automatic wr

 Long-term memory is stored in workspace files, persisting across sessions. The Agent loads historical memory on demand via retrieval tools during conversation, and automatically writes conversation summaries to long-term memory when context is trimmed.

+<img src="https://cdn.link-ai.tech/doc/memory-architecture-en.jpg" alt="Memory Architecture" />
+
 ## Memory Types

 ### Core Memory (MEMORY.md)
@@ -39,20 +41,25 @@ The memory system supports hybrid retrieval modes:

 The Agent automatically triggers memory retrieval during conversation as needed, incorporating relevant historical information into context. Results are ranked by a combined score (default: 0.7 vector weight + 0.3 keyword weight). Daily memory scores decay over time (30-day half-life), while core memory does not decay.

-## First Launch
+## Related Files

-On first launch, the Agent will proactively ask the user for key information and save it to the workspace (default `~/cow`):
+Files related to memory in the workspace (default `~/cow`):

 | File | Description |
 | --- | --- |
-| `system.md` | Agent system prompt and behavior settings |
-| `user.md` | User identity information and preferences |
+| `AGENT.md` | Agent personality and behavior settings |
+| `USER.md` | User identity information and preferences |
+| `RULE.md` | Custom rules and constraints |
 | `MEMORY.md` | Core memory (long-term) |
 | `memory/YYYY-MM-DD.md` | Daily memory (created on demand) |
 | `memory/dreams/YYYY-MM-DD.md` | Dream diary (auto-generated by Deep Dream) |

+## Web Console
+
+The memory management page in the Web console allows browsing memory files and dream diaries, with tab switching support:
+
 <Frame>
-  <img src="https://cdn.link-ai.tech/doc/20260203000455.png" width="800" />
+  <img src="https://cdn.link-ai.tech/doc/20260414171014.png" width="800" />
 </Frame>

 ## Configuration
--- a/docs/en/models/claude.mdx
+++ b/docs/en/models/claude.mdx
@@ -12,6 +12,6 @@ description: Claude model configuration

 | Parameter | Description |
 | --- | --- |
-| `model` | Options include `claude-sonnet-4-6`, `claude-opus-4-6`, `claude-sonnet-4-5`, `claude-sonnet-4-0`, `claude-3-5-sonnet-latest`, etc. See [official models](https://docs.anthropic.com/en/docs/about-claude/models/overview) |
+| `model` | Options include `claude-sonnet-4-6`, `claude-opus-4-7`, `claude-opus-4-6`, `claude-sonnet-4-5`, `claude-sonnet-4-0`, `claude-3-5-sonnet-latest`, etc. See [official models](https://docs.anthropic.com/en/docs/about-claude/models/overview) |
 | `claude_api_key` | Create at [Claude Console](https://console.anthropic.com/settings/keys) |
 | `claude_api_base` | Optional. Defaults to `https://api.anthropic.com/v1`. Change to use third-party proxy |
--- a/docs/en/models/coding-plan.mdx
+++ b/docs/en/models/coding-plan.mdx
@@ -102,18 +102,18 @@ Reference: [China Quick Start](https://docs.bigmodel.cn/cn/coding-plan/quick-sta

 ```json
 {
-  "bot_type": "openai",
+  "bot_type": "moonshot",
  "model": "kimi-for-coding",
-  "open_ai_api_base": "https://api.kimi.com/coding/v1",
-  "open_ai_api_key": "YOUR_API_KEY"
+  "moonshot_base_url": "https://api.kimi.com/coding/v1",
+  "moonshot_api_key": "YOUR_API_KEY"
 }
 ```

 | Parameter | Description |
 | --- | --- |
-| `model` | `kimi-for-coding` |
-| `open_ai_api_base` | `https://api.kimi.com/coding/v1` |
-| `open_ai_api_key` | Coding Plan specific key (not shared with pay-as-you-go) |
+| `model` | Use `kimi-for-coding` for auto-updating model, or specify a model such as `kimi-k2.6` |
+| `moonshot_base_url` | `https://api.kimi.com/coding/v1` |
+| `moonshot_api_key` | Coding Plan specific key (not shared with pay-as-you-go) |

 Reference: [Key & Docs](https://www.kimi.com/code/docs/)

--- a/docs/en/models/custom.mdx
+++ b/docs/en/models/custom.mdx
@@ -0,0 +1,62 @@
+---
+title: Custom
+description: Custom provider for third-party APIs and local models
+---
+
+For models accessed via OpenAI-compatible APIs, such as:
+
+- **Third-party API proxies**: Use a unified API Base to call multiple models
+- **Local models**: Models deployed locally via Ollama, vLLM, LocalAI, etc.
+- **Private deployments**: Self-hosted model services within your organization
+
+<Note>
+  Unlike the `openai` provider, switching models under the Custom provider will not auto-switch the provider type. Your custom API address is always preserved.
+</Note>
+
+## Configuration
+
+### Third-party API Proxy
+
+```json
+{
+  "bot_type": "custom",
+  "model": "deepseek-v4-flash",
+  "custom_api_key": "YOUR_API_KEY",
+  "custom_api_base": "https://{your-proxy.com}/v1"
+}
+```
+
+| Parameter | Description |
+| --- | --- |
+| `bot_type` | Must be set to `custom` |
+| `model` | Model name, any model supported by your proxy service |
+| `custom_api_key` | API key provided by your proxy service |
+| `custom_api_base` | API base URL, must be OpenAI-compatible |
+
+### Local Models
+
+Local models typically don't require an API key — just set the API base:
+
+```json
+{
+  "bot_type": "custom",
+  "model": "qwen3.5:27b",
+  "custom_api_base": "http://localhost:11434/v1"
+}
+```
+
+Common local deployment tools and their default addresses:
+
+| Tool | Default API Base |
+| --- | --- |
+| [Ollama](https://ollama.com) | `http://localhost:11434/v1` |
+| [vLLM](https://docs.vllm.ai) | `http://localhost:8000/v1` |
+| [LocalAI](https://localai.io) | `http://localhost:8080/v1` |
+
+## Switching Models
+
+Under the Custom provider, switching models only changes `model` without affecting `bot_type` or the API address:
+
+```
+/config model qwen3.5:27b
+```
--- a/docs/en/models/deepseek.mdx
+++ b/docs/en/models/deepseek.mdx
@@ -7,26 +7,57 @@ Option 1: Native integration (recommended):

 ```json
 {
-  "model": "deepseek-chat",
+  "model": "deepseek-v4-flash",
  "deepseek_api_key": "YOUR_API_KEY"
 }
 ```

 | Parameter | Description |
 | --- | --- |
-| `model` | `deepseek-chat` (DeepSeek-V3.2, non-thinking mode), `deepseek-reasoner` (DeepSeek-R1, thinking mode) |
+| `model` | Supports `deepseek-v4-flash` (default) and `deepseek-v4-pro` |
 | `deepseek_api_key` | Create at [DeepSeek Platform](https://platform.deepseek.com/api_keys) |
 | `deepseek_api_base` | Optional, defaults to `https://api.deepseek.com/v1`. Can be changed to a third-party proxy |

+## Model Selection
+
+| Model | Use Case |
+| --- | --- |
+| `deepseek-v4-flash` | Default: fast and cost-effective |
+| `deepseek-v4-pro` | Stronger on complex tasks |
+
+## Thinking Mode
+
+The V4 series (`deepseek-v4-flash` / `deepseek-v4-pro`) supports an explicit "thinking mode": the model emits a chain-of-thought (`reasoning_content`) before the final answer to improve answer quality.
+
+### Toggle
+
+Controlled by the global `enable_thinking` setting:
+
+```json
+{
+  "enable_thinking": true
+}
+```
+
+- `true`: thinking is on across all channels. The Web console renders the reasoning trace; IM channels (WeChat / WeCom / DingTalk / Feishu) don't render it but still benefit from higher answer quality.
+- `false`: thinking off, faster responses with lower first-token latency.
+
+### Notes
+
+- **Sampling parameters**: under thinking mode, `temperature`, `top_p`, `presence_penalty`, and `frequency_penalty` are silently ignored by the server (no error). CowAgent skips sending them automatically.
+- **Multi-turn tool calls**: once the history contains any tool-call turn, DeepSeek requires `reasoning_content` on every assistant message. CowAgent handles the round-trip automatically, including across mid-session toggles of the thinking switch.
+
+<Tip>
+  Start with `deepseek-v4-flash`; switch to `deepseek-v4-pro` for harder tasks; enable `enable_thinking` when you want deeper reasoning.
+</Tip>
+
 Option 2: OpenAI-compatible configuration:

 ```json
 {
-  "model": "deepseek-chat",
+  "model": "deepseek-v4-flash",
  "bot_type": "openai",
  "open_ai_api_key": "YOUR_API_KEY",
  "open_ai_api_base": "https://api.deepseek.com/v1"
 }
 ```
-
-
--- a/docs/en/models/glm.mdx
+++ b/docs/en/models/glm.mdx
@@ -5,14 +5,14 @@ description: Zhipu AI GLM model configuration

 ```json
 {
-  "model": "glm-5-turbo",
+  "model": "glm-5.1",
  "zhipu_ai_api_key": "YOUR_API_KEY"
 }
 ```

 | Parameter | Description |
 | --- | --- |
-| `model` | Options include `glm-5-turbo`, `glm-5`, `glm-4.7`, `glm-4-plus`, `glm-4-flash`, `glm-4-air`, etc. See [model codes](https://bigmodel.cn/dev/api/normal-model/glm-4) |
+| `model` | Options include `glm-5.1`, `glm-5-turbo`, `glm-5`, `glm-4.7`, `glm-4-plus`, `glm-4-flash`, `glm-4-air`, etc. See [model codes](https://bigmodel.cn/dev/api/normal-model/glm-4) |
 | `zhipu_ai_api_key` | Create at [Zhipu AI Console](https://www.bigmodel.cn/usercenter/proj-mgmt/apikeys) |

 OpenAI-compatible configuration is also supported:
@@ -20,7 +20,7 @@ OpenAI-compatible configuration is also supported:
 ```json
 {
  "bot_type": "openai",
-  "model": "glm-5-turbo",
+  "model": "glm-5.1",
  "open_ai_api_base": "https://open.bigmodel.cn/api/paas/v4",
  "open_ai_api_key": "YOUR_API_KEY"
 }
--- a/docs/en/models/index.mdx
+++ b/docs/en/models/index.mdx
@@ -6,7 +6,7 @@ description: Supported models and recommended choices for CowAgent
 CowAgent supports mainstream LLMs from domestic and international providers. Model interfaces are implemented in the project's `models/` directory.

 <Note>
-  For Agent mode, the following models are recommended based on quality and cost: MiniMax-M2.7, glm-5-turbo, kimi-k2.5, qwen3.6-plus, claude-sonnet-4-6, gemini-3.1-pro-preview
+  For Agent mode, the following models are recommended based on quality and cost: deepseek-v4-flash, MiniMax-M2.7, claude-sonnet-4-6, gemini-3.1-pro-preview, glm-5.1, qwen3.6-plus, kimi-k2.6
 </Note>

 ## Configuration
@@ -18,21 +18,12 @@ You can also use the [LinkAI](https://link-ai.tech) platform interface to flexib
 ## Supported Models

 <CardGroup cols={2}>
+  <Card title="DeepSeek" href="/en/models/deepseek">
+    deepseek-v4-flash, deepseek-v4-pro, and more
+  </Card>
  <Card title="MiniMax" href="/en/models/minimax">
    MiniMax-M2.7 and other series models
  </Card>
-  <Card title="GLM (Zhipu AI)" href="/en/models/glm">
-    glm-5-turbo, glm-5 and other series models
-  </Card>
-  <Card title="Qwen (Tongyi Qianwen)" href="/en/models/qwen">
-    qwen3.6-plus, qwen3-max and more
-  </Card>
-  <Card title="Kimi" href="/en/models/kimi">
-    kimi-k2.5, kimi-k2 and more
-  </Card>
-  <Card title="Doubao (ByteDance)" href="/en/models/doubao">
-    doubao-seed series models
-  </Card>
  <Card title="Claude" href="/en/models/claude">
    claude-sonnet-4-6 and more
  </Card>
@@ -42,8 +33,17 @@ You can also use the [LinkAI](https://link-ai.tech) platform interface to flexib
  <Card title="OpenAI" href="/en/models/openai">
    gpt-5.4, gpt-4.1, o-series and more
  </Card>
-  <Card title="DeepSeek" href="/en/models/deepseek">
-    deepseek-chat, deepseek-reasoner
+  <Card title="GLM (Zhipu AI)" href="/en/models/glm">
+    glm-5.1, glm-5-turbo, glm-5 and other series models
+  </Card>
+  <Card title="Qwen (Tongyi Qianwen)" href="/en/models/qwen">
+    qwen3.6-plus, qwen3-max and more
+  </Card>
+  <Card title="Doubao (ByteDance)" href="/en/models/doubao">
+    doubao-seed series models
+  </Card>
+  <Card title="Kimi" href="/en/models/kimi">
+    kimi-k2.6, kimi-k2.5, kimi-k2 and more
  </Card>
  <Card title="LinkAI" href="/en/models/linkai">
    Unified multi-model interface + knowledge base
--- a/docs/en/models/kimi.mdx
+++ b/docs/en/models/kimi.mdx
@@ -5,14 +5,14 @@ description: Kimi (Moonshot) model configuration

 ```json
 {
-  "model": "kimi-k2.5",
+  "model": "kimi-k2.6",
  "moonshot_api_key": "YOUR_API_KEY"
 }
 ```

 | Parameter | Description |
 | --- | --- |
-| `model` | Options include `kimi-k2.5`, `kimi-k2`, `moonshot-v1-8k`, `moonshot-v1-32k`, `moonshot-v1-128k` |
+| `model` | Options include `kimi-k2.6`, `kimi-k2.5`, `kimi-k2`, `moonshot-v1-8k`, `moonshot-v1-32k`, `moonshot-v1-128k` |
 | `moonshot_api_key` | Create at [Moonshot Console](https://platform.moonshot.cn/console/api-keys) |

 OpenAI-compatible configuration is also supported:
@@ -20,7 +20,7 @@ OpenAI-compatible configuration is also supported:
 ```json
 {
  "bot_type": "openai",
-  "model": "kimi-k2.5",
+  "model": "kimi-k2.6",
  "open_ai_api_base": "https://api.moonshot.cn/v1",
  "open_ai_api_key": "YOUR_API_KEY"
 }
--- a/docs/en/models/linkai.mdx
+++ b/docs/en/models/linkai.mdx
@@ -3,7 +3,7 @@ title: LinkAI
 description: Unified access to multiple models via LinkAI platform
 ---

-The [LinkAI](https://link-ai.tech) platform lets you flexibly switch between OpenAI, Claude, Gemini, DeepSeek, Qwen, Kimi, and other models, with support for knowledge base, workflows, plugins, and other Agent capabilities.
+The [LinkAI](https://link-ai.tech) platform lets you flexibly switch between OpenAI, Claude, Gemini, DeepSeek, MiniMax, Qwen, Kimi, and other models, with support for knowledge base, workflows, plugins, and other Agent capabilities.

 ```json
 {
--- a/docs/en/releases/overview.mdx
+++ b/docs/en/releases/overview.mdx
@@ -5,6 +5,7 @@ description: CowAgent version history

 | Version | Date | Description |
 | --- | --- | --- |
+| [2.0.7](/en/releases/v2.0.7) | 2026.04.22 | Image Generation Skill (6-provider auto-routing), new models (Kimi K2.6, Claude Opus 4.7, GLM 5.1), knowledge base and Web Console improvements |
 | [2.0.6](/en/releases/v2.0.6) | 2026.04.14 | Knowledge Base, Deep Dream Memory Distillation, Smart Context Compression, Web Console upgrades |
 | [2.0.5](/en/releases/v2.0.5) | 2026.04.01 | Cow CLI, Skill Hub open source, Browser tool, WeCom Bot QR scan, and more |
 | [2.0.4](/en/releases/v2.0.4) | 2026.03.22 | Personal WeChat channel, new model support, Japanese docs, script refactoring and bug fixes |
--- a/docs/en/releases/v2.0.7.mdx
+++ b/docs/en/releases/v2.0.7.mdx
@@ -0,0 +1,65 @@
+---
+title: v2.0.7
+description: CowAgent 2.0.7 - Image Generation Skill (6-provider auto-routing), new models, knowledge base enhancements, Web Console improvements and bug fixes
+---
+
+## 🎨 Image Generation Skill
+
+New built-in `image-generation` skill supporting text-to-image, image-to-image, and multi-image fusion across six major providers:
+
+- **6-provider auto-routing**: OpenAI (GPT-Image-2) → Gemini (Nano Banana) → Seedream (Volcengine Ark) → Qwen (DashScope) → MiniMax → LinkAI — automatically selects from configured providers in fixed priority order, with automatic fallback on failure
+- **Zero model selection**: Just configure an API key and it works — no need to manually specify a model. You can also name a specific model in conversation (e.g. "draw a cat with seedream")
+- **Flexible control**: Supports `quality`, `size` (512/1K–4K), and `aspect_ratio` parameters, with each provider automatically mapping to its supported values
+- **Image editing**: Pass existing images for editing, style transfer, or multi-image fusion (Seedream supports up to 14 reference images)
+- **Skill-level config**: Pin a default model via `skill.image-generation.model` in `config.json`
+- **Image lightbox**: All images in the Web console now support click-to-enlarge preview
+
+Docs: [Image Generation Skill](https://docs.cowagent.ai/en/skills/image-generation)
+
+## 🤖 New Model Support
+
+- **Kimi K2.6**: Added `kimi-k2.6` model support
+- **Claude Opus 4.7**: Added `claude-opus-4-7` model support
+- **GLM 5.1**: Added `glm-5.1` model support
+- **Kimi Coding Plan**: Support for Kimi Coding Plan mode
+- **Custom model providers**: New custom model provider configuration for easier integration with additional vendors
+
+## 💬 Web Console Improvements
+
+- **Smart auto-scroll**: Improved chat scroll behaviour — no longer forces scroll to bottom while the user is reading earlier messages
+- **Reasoning content cap**: Deep thinking content capped at 4 KB to prevent frontend lag
+- **Mobile optimisation**: Session sidebar hidden by default on mobile, with overlay dismiss support
+- **Session title fix**: Fixed title auto-generation fallback logic and Bridge reset on config change
+- **Image preview dedup**: Fixed duplicate image rendering within the same message
+
+## 📚 Knowledge Base Enhancements
+
+- **Nested directory support**: Knowledge base listing and display now support multi-level nested directories
+- **Root-level file display**: Show `index.md`, `log.md` and other root-level files in the knowledge tree
+- **Empty state stats fix**: Root-level files no longer interfere with empty-state detection
+
+## 🌙 Dream Memory Improvements
+
+- **Structured organisation**: Dream memory files are now auto-archived by date with a cleaner directory structure
+- **Schedule jitter**: Daily dream trigger includes random jitter to avoid concurrency conflicts in cluster deployments
+
+## 🛠 Skill System Improvements
+
+- **Skill manager refresh**: `/skill` commands now automatically refresh the skill manager to keep state in sync
+- **Installation sources**: Skill installation supports multiple source formats (URL, zip, local file, etc.) with automatic target directory handling
+
+## 🐛 Other Fixes
+
+- **Gemini fix**: Fixed Gemini tool calls not returning results
+- **Agent retry**: Empty-response retries no longer drop `tool_calls`
+- **Docker env sync**: Fixed environment variables not syncing after config update in Docker environments
+- **Python 3.7 compat**: Deferred `Literal` import for Python 3.7 compatibility
+- **Model switch notification**: Fixed bot_type change notification not showing after model switch. Thanks @6vision
+- **Config command**: `/config` now supports setting `enable_thinking`
+- **Thinking display**: Deep thinking display disabled by default
+
+## 📦 Upgrade
+
+Run `cow update` or `./run.sh update` to upgrade, or pull the latest code and restart. See [Upgrade Guide](https://docs.cowagent.ai/en/guide/upgrade).
+
+**Release Date**: 2026.04.22 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.6...master)
--- a/docs/en/skills/image-generation.mdx
+++ b/docs/en/skills/image-generation.mdx
@@ -0,0 +1,158 @@
+---
+title: image-generation - Image Generation
+description: Text-to-image / image-to-image / multi-image fusion with automatic multi-provider routing and fallback
+---
+
+A general-purpose image generation and editing skill supporting six providers: OpenAI, Gemini, Seedream (Volcengine Ark), Qwen (DashScope), MiniMax, and LinkAI. No need to choose a model manually — the script automatically selects a configured provider based on a fixed priority order.
+
+## Model Selection
+
+`image-generation` uses a "fixed priority + automatic fallback" strategy — just configure your keys and it works:
+
+1. **Priority order**: `OpenAI → Gemini → Seedream → Qwen → MiniMax → LinkAI`
+2. **Unconfigured providers are skipped**: only providers with an API key participate
+3. **Automatic fallback on failure**: on errors like 401, model not enabled, or network issues, the next provider is tried
+4. **Specified model goes first**: if a specific model name is provided, its provider is promoted to the front
+
+### Supported Models
+
+| Provider | Models / Aliases | Notes |
+| --- | --- | --- |
+| OpenAI | `gpt-image-2`, `gpt-image-1` | General-purpose, high quality, supports `quality` parameter |
+| Gemini Nano Banana | `nano-banana-2`, `nano-banana-pro`, `nano-banana` | Corresponds to `gemini-3.1-flash`, `gemini-3-pro`, `gemini-2.5-flash` image variants |
+| Seedream (Volcengine Ark) | `seedream-5.0-lite`, `seedream-4.5` | Native 2K–4K, up to 14 reference images for fusion |
+| Qwen (DashScope) | `qwen-image-2.0`, `qwen-image-2.0-pro` | Strong with Chinese text rendering and text-image layouts |
+| MiniMax | `image-01` | Fast and simple image generation |
+| LinkAI | Any model | Universal proxy, used as fallback |
+
+<Note>
+By default, the Agent does not pick a model — it uses automatic routing. If you want a specific model, just say so in the conversation, e.g. "use seedream to draw a cat" or "generate a poster with gpt-image-2". You can also pin a default model via the "Custom Configuration" section below.
+</Note>
+
+## Custom Configuration
+
+### API Key Setup
+
+You need **at least one** provider key. Configuring multiple providers enables automatic fallback. There are three ways to set up keys:
+
+#### Option 1: Automatic Reuse of Existing Keys
+
+If you have already configured model keys in the web console or `config.json` (e.g. `openai_api_key`, `gemini_api_key`, etc.), these keys are **automatically synced** to the corresponding environment variables at startup. In other words, if your chat model works, image generation can use the same key with zero extra configuration.
+
+#### Option 2: Configure in config.json
+
+Add the key fields directly to `config.json`:
+
+```json
+{
+  "openai_api_key": "sk-xxx",
+  "openai_api_base": "https://api.openai.com/v1",
+  "gemini_api_key": "AIza-xxx",
+  "ark_api_key": "xxx",
+  "dashscope_api_key": "sk-xxx",
+  "minimax_api_key": "xxx",
+  "linkai_api_key": "xxx"
+}
+```
+
+A restart is required after changes. Each key also has a corresponding `*_api_base` field for custom endpoints.
+
+#### Option 3: Configure via Conversation
+
+Send an API key in the chat and the Agent will save it to `~/cow/.env` using the `env_config` tool — **no restart needed**. For example:
+
+```
+Set OPENAI_API_KEY to sk-xxx
+```
+
+Or:
+
+```
+Configure ARK_API_KEY as xxx
+```
+
+### API Key Reference
+
+| Environment Variable | config.json Field | Provider | Default Base URL |
+| --- | --- | --- | --- |
+| `OPENAI_API_KEY` | `openai_api_key` | OpenAI | `https://api.openai.com/v1` |
+| `GEMINI_API_KEY` | `gemini_api_key` | Gemini | `https://generativelanguage.googleapis.com` |
+| `ARK_API_KEY` | `ark_api_key` | Volcengine Ark (Seedream) | `https://ark.cn-beijing.volces.com/api/v3` |
+| `DASHSCOPE_API_KEY` | `dashscope_api_key` | Alibaba DashScope (Qwen) | `https://dashscope.aliyuncs.com` |
+| `MINIMAX_API_KEY` | `minimax_api_key` | MiniMax | `https://api.minimaxi.com` |
+| `LINKAI_API_KEY` | `linkai_api_key` | LinkAI | `https://api.link-ai.tech` |
+
+### Pinning a Default Model
+
+To force all image generation through a specific provider's model, add this to `config.json`:
+
+```json
+"skill": {
+  "image-generation": {
+    "model": "seedream-5.0-lite"
+  }
+}
+```
+
+At startup, this is automatically converted to the environment variable `SKILL_IMAGE_GENERATION_MODEL`, and the script will always use this model's provider for generation.
+
+## Enabling and Disabling
+
+`image-generation` is a built-in skill that **automatically adjusts its status based on API keys**:
+
+- **Key configured**: the skill is active — the Agent will invoke it when asked to draw
+- **Key not configured**: the skill still appears in context (marked as "needs configuration") — the Agent will guide the user to set up a key rather than failing silently
+
+To control it manually:
+
+```text
+/skill disable image-generation    # Disable (won't be invoked even if keys are present)
+/skill enable image-generation     # Re-enable
+```
+
+In the terminal: `cow skill disable image-generation` / `cow skill enable image-generation`.
+
+## Parameters
+
+| Parameter | Type | Required | Default | Description |
+| --- | --- | --- | --- | --- |
+| `prompt` | string | Yes | — | Image description |
+| `image_url` | string / list | No | null | Input image(s) for editing — local path or URL. Pass multiple for multi-image fusion |
+| `quality` | string | No | auto | `low` / `medium` / `high` — only some providers support this |
+| `size` | string | No | auto | `512` / `1K` / `2K` / `3K` / `4K`, or pixel value like `1024x1024` |
+| `aspect_ratio` | string | No | null | `1:1` / `3:2` / `2:3` / `16:9` / `9:16` / `21:9`; Gemini also supports `1:4` / `4:1` / `1:8` / `8:1` |
+
+<Warning>
+**Higher quality and larger size cost more and take longer.**
+
+- For everyday conversations and quick previews, use the defaults (`auto`) or `quality=low` + `size=1K` — roughly 20 seconds
+- For posters or when the user explicitly asks for high resolution, use `quality=high` + `size=2K/4K` — may take 1–5 minutes depending on the model
+</Warning>
+
+## Output
+
+On success:
+
+```json
+{
+  "model": "doubao-seedream-5-0-260128",
+  "images": [
+    {"url": "/path/to/output.png"}
+  ]
+}
+```
+
+On failure: `{ "error": "..." }`. After an error, **do not retry directly** — it is almost always a configuration issue (wrong key, incorrect API base, model not enabled). Have the user fix the configuration first.
+
+## Common Use Cases
+
+- **Text-to-image**: generate illustrations, posters, icons, avatars, storyboards, etc. from a description
+- **Image-to-image**: change styles, swap elements, add decorations or text on an existing image
+- **Multi-image fusion**: combine multiple reference images into one (outfit swaps, character group photos, etc.)
+
+<Note>
+- Bash timeout should be set to 600 seconds. Each provider has a 300-second HTTP timeout, but the script may try multiple providers sequentially
+- Input images are automatically compressed to ≤ 4 MB with the longest edge ≤ 4096 px
+- Gemini / Seedream / Qwen / MiniMax do not support the `quality` parameter — passing it has no effect
+- Seedream defaults to 2K; `seedream-5.0-lite` supports up to 3K; `seedream-4.5` supports up to 4K
+</Note>
--- a/docs/en/skills/knowledge-wiki.mdx
+++ b/docs/en/skills/knowledge-wiki.mdx
@@ -0,0 +1,112 @@
+---
+title: knowledge-wiki - Knowledge Base
+description: Maintain a local structured knowledge base with automatic archiving, categorisation, and cross-referencing
+---
+
+Organises notes, insights, and reference materials from your conversations into a structured local knowledge base, automatically maintaining an index and cross-references between pages.
+
+`knowledge-wiki` maintains a `knowledge/` directory in your workspace — essentially the Agent's "second brain". The skill is marked `always: true`, so it is **always loaded** and requires no external dependencies.
+
+## When It Triggers
+
+- You share an article, document, or URL that you want to keep for future reference
+- A conversation produces conclusions worth retaining long-term
+- You want to look up something you accumulated earlier
+
+## Directory Structure
+
+```
+knowledge/
+├── index.md           # Global index (must be maintained)
+├── log.md             # Operation log (append-only)
+└── <category>/        # Category subdirectories (grouped by content)
+    └── <slug>.md      # Knowledge page (lowercase-hyphenated filename)
+```
+
+## Three Core Operations
+
+### 1. Ingest
+
+When you share some material, the Agent will:
+
+1. Read and understand the original content, extracting key information
+2. Decide which category it belongs to — check `index.md` first; create a new category if none fits
+3. Generate a knowledge page at `knowledge/<category>/<slug>.md`
+4. Update the index `index.md` and the log `log.md`
+
+### 2. Synthesise
+
+When a conversation produces new conclusions or insights:
+
+1. Create a new knowledge page under an appropriate category
+2. Add cross-links to and from related existing pages
+3. Update the index and log
+
+### 3. Query
+
+When you ask about previously accumulated knowledge:
+
+1. Search `index.md` for potentially relevant pages
+2. Open specific pages with the `read` tool
+3. Supplement with `memory_search` if needed
+4. Include links to knowledge pages in the answer so you can click through to the source
+
+## Page Format
+
+```markdown
+# Page Title
+
+> Source: <source URL or brief description>
+
+Body content. Link between pages using relative paths:
+[Related Page](../category/related-page.md)
+
+## Key Points
+
+- ...
+
+## Related Pages
+
+- [Page A](../category/page-a.md) — why it's related
+```
+
+<Note>
+- `> Source:` records where this knowledge came from. Always include it when there is a clear source
+- Cross-references are important: when creating or updating a page, remember to add back-links in the related pages too
+- **Only link to pages that already exist.** If a concept deserves its own page, create it first, then add the link
+</Note>
+
+## Index Format
+
+`knowledge/index.md` uses a flat list grouped by category, one knowledge page per line:
+
+```markdown
+# Knowledge Index
+
+## Category A
+- [Page Title](category-a/page-slug.md) — one-line summary
+
+## Category B
+- [Page Title](category-b/page-slug.md) — one-line summary
+```
+
+No tables, no emojis. Category names and organisation can be adjusted freely.
+
+## Log Format
+
+`knowledge/log.md` is append-only — newest entries go at the bottom:
+
+```markdown
+## [YYYY-MM-DD] ingest | Page Title
+## [YYYY-MM-DD] synthesize | Page Title
+```
+
+## Writing Guidelines
+
+- **Filenames**: lowercase with hyphens, e.g. `machine-learning.md`
+- **One topic per page** — link related content across pages
+- **Update, don't duplicate** — if a page already exists, update it rather than creating a new one
+- **Always update the index** `knowledge/index.md` after any change
+- **Distill, don't copy** — capture the key points, not the entire source
+- **Use full paths when referencing knowledge pages in conversations**, e.g. `[Title](knowledge/<category>/<slug>.md)`. Use relative paths only for inter-page links
+- **Include links when answering questions based on knowledge pages** so users can dig deeper
--- a/docs/en/skills/skill-creator.mdx
+++ b/docs/en/skills/skill-creator.mdx
@@ -0,0 +1,180 @@
+---
+title: skill-creator - Skill Creator
+description: Create, install, and update skills — standardises SKILL.md format and directory structure
+---
+
+`skill-creator` is a "meta-skill" that helps the Agent create, install, and update other skills, ensuring every skill follows a consistent `SKILL.md` format and directory layout.
+
+## When It Triggers
+
+- The user wants to install a skill from a URL or remote repository
+- The user wants to create a brand-new skill from scratch
+- An existing skill needs upgrading or restructuring
+
+## What Is a Skill?
+
+A skill is a reusable instruction set plus optional scripts and assets. It injects domain expertise into the Agent so it can handle specific tasks like a specialist.
+
+A skill typically contains:
+
+1. **Specialised workflow** — step-by-step instructions for a category of tasks
+2. **Tool usage** — how to call a particular API or process a particular file format
+3. **Domain knowledge** — team conventions, business rules, data schemas, etc.
+4. **Attached resources** — scripts, reference docs, templates, etc.
+
+<Note>
+**Core principle: less is more.** Only write what the Agent wouldn't figure out on its own. For every line you add, ask yourself: is it worth the tokens?
+</Note>
+
+## Directory Structure
+
+```
+skill-name/
+├── SKILL.md            # Required: skill definition
+│   ├── YAML frontmatter (name / description are mandatory)
+│   └── Markdown body (instructions + examples)
+└── Optional resources
+    ├── scripts/        # Executable scripts (Python / Bash, etc.)
+    ├── references/     # Large reference docs the Agent reads on demand
+    └── assets/         # Templates, icons, etc. used directly in output
+```
+
+## SKILL.md Specification
+
+Frontmatter fields in the SKILL.md header:
+
+| Field | Description |
+| --- | --- |
+| `name` | Skill name — lowercase with hyphens, must match the directory name |
+| `description` | **The most important field.** Clearly state what the skill does and when to use it. The Agent reads this to decide whether to invoke it. All trigger-related descriptions go here, not in the body |
+| `metadata.cowagent.requires.bins` | System CLI tools that must be installed |
+| `metadata.cowagent.requires.env` | Required environment variables (all must be present) |
+| `metadata.cowagent.requires.anyEnv` | Multiple API keys — at least one must be set |
+| `metadata.cowagent.requires.anyBins` | Multiple tools — at least one must be installed |
+| `metadata.cowagent.always` | Set to `true` to always load, skipping dependency checks |
+| `metadata.cowagent.emoji` | Display emoji (optional) |
+| `metadata.cowagent.os` | OS restriction, e.g. `["darwin", "linux"]` |
+
+<Note>
+The `category` field does not need to be set manually — the system automatically sets it to `skill`.
+</Note>
+
+Two ways to declare API key dependencies:
+
+```yaml
+metadata:
+  cowagent:
+    requires:
+      env: ["MYAPI_KEY"]            # Must be present
+```
+
+```yaml
+metadata:
+  cowagent:
+    requires:
+      anyEnv: ["OPENAI_API_KEY", "LINKAI_API_KEY"]   # At least one
+```
+
+**Skills are auto-enabled/disabled based on dependencies**: they activate when all required environment variables are present and deactivate when any are missing — no need for manual `/skill enable`.
+
+## Resource Directories
+
+| Directory | What goes here | What does NOT go here |
+| --- | --- | --- |
+| `scripts/` | Code that needs to run repeatedly, or scripts that produce deterministic results | Demo-only code snippets |
+| `references/` | Documents **over 500 lines** that genuinely won't fit in SKILL.md (e.g. a full DB schema) | General API docs, tutorials, examples |
+| `assets/` | Files that appear in the final output (templates, icons, boilerplate, etc.) | Explanatory documentation |
+
+<Warning>
+**In principle, everything goes in `SKILL.md`** — only split into resource directories when it truly won't fit.
+
+Do not add `README.md`, `CHANGELOG.md`, or `INSTALLATION_GUIDE.md` to a skill — put everything in `SKILL.md`. Resource directories should only contain scripts that actually run or assets that are actually used.
+</Warning>
+
+## Installing External Skills
+
+After installation, the skill lands in `<workspace>/skills/<name>/`.
+
+| Source | How to install |
+| --- | --- |
+| URL (single file) | curl / web_fetch |
+| URL (zip archive) | Download and extract |
+| Local SKILL.md | Read directly |
+| Local zip archive | Extract |
+
+Installation steps:
+
+1. Locate the `SKILL.md` (may be at the root or in a subdirectory of the archive)
+2. Read the `name` from the frontmatter
+3. Copy the **entire skill directory** (including `SKILL.md`, `scripts/`, `assets/`, etc.) to `<workspace>/skills/<name>/`
+4. If the archive contains an `INSTALL.md` or similar setup script, run it — but the final result must still reside under `<workspace>/skills/<name>/`
+
+## Creating a Skill from Scratch
+
+Recommended order:
+
+1. **Clarify requirements** — ask the user for a few concrete use cases (don't ask too many at once)
+2. **Plan the structure** — does this skill need scripts? Reference docs? Template assets?
+3. **Scaffold** — use the init script:
+
+   ```bash
+   scripts/init_skill.py <skill-name> --path <workspace>/skills [--resources scripts,references,assets] [--examples]
+   ```
+
+4. **Fill in content** — write SKILL.md, add scripts and resources. Always test scripts after writing them
+5. **Validate** (optional):
+
+   ```bash
+   scripts/quick_validate.py <workspace>/skills/<skill-name>
+   ```
+
+6. **Iterate** — keep improving based on real-world usage feedback
+
+## Naming Conventions
+
+- Use only lowercase letters, digits, and hyphens. Normalise user-given names, e.g. `Plan Mode` → `plan-mode`
+- Maximum 64 characters
+- Keep it short, start with a verb, make it self-explanatory
+- Use tool names as prefixes when appropriate, e.g. `gh-address-comments`, `linear-address-issue`
+- The directory name and the `name` field must match exactly
+
+## Three-Level Loading
+
+Skills are not loaded into context all at once — they use a three-level progressive loading mechanism:
+
+1. **Metadata** (`name` + `description`) — always in context (~100 words). The Agent uses this to decide whether to invoke the skill
+2. **SKILL.md body** — loaded only when the skill is activated; keep it under 500 lines
+3. **Resource files** — read on demand by the Agent
+
+For skills with multiple variants (e.g. multi-cloud deployment), organise like this:
+
+```
+cloud-deploy/
+├── SKILL.md             # Main workflow and provider selection logic
+└── references/
+    ├── aws.md
+    ├── gcp.md
+    └── azure.md
+```
+
+When the user picks AWS, the Agent only reads `aws.md` — no need to load all three providers.
+
+## Common Design Patterns
+
+**Step-by-step**: numbered steps with corresponding scripts.
+
+```markdown
+1. Analyse form structure (run analyze_form.py)
+2. Generate field mappings (edit fields.json)
+3. Auto-fill the form (run fill_form.py)
+```
+
+**Branching**: different flows based on user intent.
+
+```markdown
+1. Determine operation type:
+   **Creating new content?** → follow the "Create" workflow
+   **Editing existing content?** → follow the "Edit" workflow
+```
+
+**Template-based**: when output format has strict requirements, include a template in SKILL.md for the Agent to follow.
--- a/docs/en/tools/vision.mdx
+++ b/docs/en/tools/vision.mdx
@@ -27,7 +27,7 @@ If the current provider fails, the tool automatically tries the next one until i
 | Claude | Main model | Anthropic native image format |
 | Gemini | Main model | inlineData format |
 | Doubao | Main model | doubao-seed-2-0 series natively supported |
-| Kimi (Moonshot) | Main model | kimi-k2.5 natively supported |
+| Kimi (Moonshot) | Main model | kimi-k2.6, kimi-k2.5 natively supported |
 | ZhipuAI | glm-5v-turbo | Always uses dedicated vision model |
 | MiniMax | MiniMax-Text-01 | Always uses dedicated vision model |

--- a/docs/guide/manual-install.mdx
+++ b/docs/guide/manual-install.mdx
@@ -139,7 +139,8 @@ sudo docker logs -f chatgpt-on-wechat
    ```json
    {
      "channel_type": "web",
-      "model": "MiniMax-M2.7",
+      "model": "deepseek-v4-flash",
+      "deepseek_api_key": "",
      "agent": true,
      "agent_workspace": "~/cow",
      "agent_max_context_tokens": 40000,
@@ -152,8 +153,9 @@ sudo docker logs -f chatgpt-on-wechat
    ```yaml
    environment:
      CHANNEL_TYPE: 'web'
-      MODEL: 'MiniMax-M2.7'
-      MINIMAX_API_KEY: 'your-api-key'
+      MODEL: 'deepseek-v4-flash'
+      DEEPSEEK_API_KEY: 'your-api-key'
+      DEEPSEEK_API_BASE: 'https://api.deepseek.com/v1'
      AGENT: 'True'
      AGENT_MAX_CONTEXT_TOKENS: 40000
      AGENT_MAX_CONTEXT_TURNS: 30
@@ -165,7 +167,7 @@ sudo docker logs -f chatgpt-on-wechat
 | 参数 | 环境变量 | 说明 | 默认值 |
 | --- | --- | --- | --- |
 | `channel_type` | `CHANNEL_TYPE` | 接入渠道类型 | `web` |
-| `model` | `MODEL` | 模型名称 | `MiniMax-M2.5` |
+| `model` | `MODEL` | 模型名称 | `deepseek-v4-flash` |
 | `agent` | `AGENT` | 是否启用 Agent 模式 | `true` |
 | `agent_workspace` | - | Agent 工作空间路径 | `~/cow` |
 | `agent_max_context_tokens` | `AGENT_MAX_CONTEXT_TOKENS` | 最大上下文 tokens | `40000` |
--- a/docs/intro/architecture.mdx
+++ b/docs/intro/architecture.mdx
@@ -9,7 +9,7 @@ CowAgent 2.0 从简单的聊天机器人全面升级为超级智能助理，采

 CowAgent 的整体架构由以下核心模块组成：

-<img src="https://cdn.link-ai.tech/doc/68ef7b212c6f791e0e74314b912149f9-sz_5847990.png" alt="CowAgent Architecture" />
+<img src="https://cdn.link-ai.tech/doc/cow-agent-arch-zh.jpg" alt="CowAgent Architecture" />

 | 模块 | 说明 |
 | --- | --- |
@@ -70,7 +70,7 @@ Agent 的工作空间默认位于 `~/cow` 目录，用于存储系统提示词
  "agent_max_context_tokens": 40000,
  "agent_max_context_turns": 30,
  "agent_max_steps": 15,
-  "enable_thinking": true
+  "enable_thinking": false
 }
 ```

@@ -81,5 +81,5 @@ Agent 的工作空间默认位于 `~/cow` 目录，用于存储系统提示词
 | `agent_max_context_tokens` | 最大上下文 token 数 | `50000` |
 | `agent_max_context_turns` | 最大上下文记忆轮次 | `20` |
 | `agent_max_steps` | 单次任务最大决策步数 | `20` |
-| `enable_thinking` | 是否启用深度思考，开启后 Web 端展示推理过程，关闭可加速响应 | `true` |
+| `enable_thinking` | 是否启用深度思考模式 | `false` |
 | `knowledge` | 是否启用个人知识库 | `true` |
--- a/docs/ja/README.md
+++ b/docs/ja/README.md
@@ -28,7 +28,7 @@
 - ✅ **ツールシステム**: ファイル読み書き、ターミナル実行、ブラウザ操作、スケジュールタスク、メッセージ送信などの組み込みツールを提供。Agentが自律的に呼び出して複雑なタスクを完了します。
 - ✅ **CLIシステム**: ターミナルコマンドとチャットコマンドを提供し、プロセス管理、Skillインストール、設定変更などの操作をサポートします。
 - ✅ **マルチモーダルメッセージ**: テキスト、画像、音声、ファイルなど、さまざまなメッセージタイプの解析・処理・生成・送信に対応しています。
- ✅ **複数モデル対応**: OpenAI、Claude、Gemini、DeepSeek、MiniMax、GLM、Qwen、Kimi、Doubaoなど、主要なモデルプロバイダーに対応しています。
+- ✅ **複数モデル対応**: DeepSeek、MiniMax、Claude、Gemini、OpenAI、GLM、Qwen、Doubao、Kimiなど、主要なモデルプロバイダーに対応しています。
 - ✅ **マルチプラットフォームデプロイ**: ローカルPCやサーバー上で実行でき、WeChat、Web、Feishu、DingTalk、WeChat公式アカウント、WeComアプリケーションに統合可能です。

 ## 免責事項
@@ -164,15 +164,15 @@ sudo docker logs -f chatgpt-on-wechat

 | プロバイダー | 推奨モデル |
 | --- | --- |
+| DeepSeek | `deepseek-v4-flash` |
 | MiniMax | `MiniMax-M2.7` |
-| GLM | `glm-5-turbo` |
-| Kimi | `kimi-k2.5` |
-| Doubao | `doubao-seed-2-0-code-preview-260215` |
-| Qwen | `qwen3.6-plus` |
 | Claude | `claude-sonnet-4-6` |
 | Gemini | `gemini-3.1-pro-preview` |
 | OpenAI | `gpt-5.4` |
-| DeepSeek | `deepseek-chat` |
+| GLM | `glm-5.1` |
+| Qwen | `qwen3.6-plus` |
+| Doubao | `doubao-seed-2-0-code-preview-260215` |
+| Kimi | `kimi-k2.6` |

 各モデルの詳細設定については、[モデルドキュメント](https://docs.cowagent.ai/en/models/index)を参照してください。

--- a/docs/ja/cli/general.mdx
+++ b/docs/ja/cli/general.mdx
@@ -44,17 +44,18 @@ description: ステータスの確認、設定管理、コンテキスト制御
 **設定項目を変更：**

 ```text
-/config model deepseek-chat
+/config model deepseek-v4-flash
 ```

 **変更可能な設定項目：**

 | 項目 | 説明 | 例 |
 | --- | --- | --- |
-| `model` | AI モデル名 | `deepseek-chat` |
+| `model` | AI モデル名 | `deepseek-v4-flash` |
 | `agent_max_context_tokens` | 最大コンテキストトークン数 | `40000` |
 | `agent_max_context_turns` | 最大コンテキスト記憶ターン数 | `30` |
 | `agent_max_steps` | タスクごとの最大判断ステップ数 | `15` |
+| `enable_thinking` | ディープシンキングモードの有効化 | `true` / `false` |

 <Note>
  `model` を変更すると、システムが対応するモデル API を自動的にマッチングします。設定は `config.json` に永続的に保存されます。
--- a/docs/ja/guide/manual-install.mdx
+++ b/docs/ja/guide/manual-install.mdx
@@ -121,7 +121,8 @@ sudo docker logs -f chatgpt-on-wechat
 ```json
 {
  "channel_type": "web",
-  "model": "MiniMax-M2.5",
+  "model": "deepseek-v4-flash",
+  "deepseek_api_key": "",
  "agent": true,
  "agent_workspace": "~/cow",
  "agent_max_context_tokens": 40000,
@@ -133,7 +134,7 @@ sudo docker logs -f chatgpt-on-wechat
 | パラメータ | 説明 | デフォルト値 |
 | --- | --- | --- |
 | `channel_type` | チャネルタイプ | `web` |
-| `model` | モデル名 | `MiniMax-M2.5` |
+| `model` | モデル名 | `deepseek-v4-flash` |
 | `agent` | Agent モードを有効化 | `true` |
 | `agent_workspace` | Agent のワークスペースパス | `~/cow` |
 | `agent_max_context_tokens` | 最大コンテキストトークン数 | `40000` |
--- a/docs/ja/intro/architecture.mdx
+++ b/docs/ja/intro/architecture.mdx
@@ -9,7 +9,7 @@ CowAgent 2.0 は、シンプルなチャットボットから、自律的な思

 CowAgent のアーキテクチャは以下のコアモジュールで構成されています：

-<img src="https://cdn.link-ai.tech/doc/68ef7b212c6f791e0e74314b912149f9-sz_5847990.png" alt="CowAgent Architecture" />
+<img src="https://cdn.link-ai.tech/doc/cow-agent-arch-en.jpg.jpg" alt="CowAgent Architecture" />

 | モジュール | 説明 |
 | --- | --- |
--- a/docs/ja/memory/index.mdx
+++ b/docs/ja/memory/index.mdx
@@ -5,6 +5,8 @@ description: CowAgent の長期記憶システム — ファイル永続化、

 長期記憶はワークスペースのファイルに保存され、セッション間で永続化されます。Agent は会話中に検索ツールを通じて過去の記憶をオンデマンドで読み込み、コンテキストのトリミング時に会話の要約を自動的に長期記憶に書き込みます。

+<img src="https://cdn.link-ai.tech/doc/memory-architecture-en.jpg" alt="Memory Architecture" />
+
 ## 記憶の種類

 ### コア記憶（MEMORY.md）
@@ -30,20 +32,25 @@ Agent は以下のメカニズムにより、会話内容を長期記憶に自

 すべての記憶書き込みはバックグラウンドスレッドで非同期に実行され（LLM の要約 + ファイル書き込み）、通常の会話応答をブロックしません。

-## 初回起動
+## 関連ファイル

-初回起動時に、Agent はユーザーに主要な情報を積極的に尋ね、ワークスペース（デフォルト `~/cow`）に保存します：
+ワークスペース（デフォルト `~/cow`）内の記憶関連ファイル：

 | ファイル | 説明 |
 | --- | --- |
-| `system.md` | Agent のシステムプロンプトと動作設定 |
-| `user.md` | ユーザーの身元情報と好み |
+| `AGENT.md` | Agent のパーソナリティと動作設定 |
+| `USER.md` | ユーザーの身元情報と好み |
+| `RULE.md` | カスタムルールと制約 |
 | `MEMORY.md` | コア記憶（長期） |
 | `memory/YYYY-MM-DD.md` | 日次記憶（オンデマンドで作成） |
 | `memory/dreams/YYYY-MM-DD.md` | 夢日記（Deep Dream で自動生成） |

+## Web コンソール
+
+Web コンソールの記憶管理ページで、記憶ファイルと夢日記を閲覧できます。タブ切り替えに対応：
+
 <Frame>
-  <img src="https://cdn.link-ai.tech/doc/20260203000455.png" width="800" />
+  <img src="https://cdn.link-ai.tech/doc/20260414171014.png" width="800" />
 </Frame>

 ## 設定
--- a/docs/ja/models/claude.mdx
+++ b/docs/ja/models/claude.mdx
@@ -12,6 +12,6 @@ description: Claudeモデルの設定

 | パラメータ | 説明 |
 | --- | --- |
-| `model` | `claude-sonnet-4-6`、`claude-opus-4-6`、`claude-sonnet-4-5`、`claude-sonnet-4-0`、`claude-3-5-sonnet-latest`などから選択可能。[公式モデル一覧](https://docs.anthropic.com/en/docs/about-claude/models/overview)を参照 |
+| `model` | `claude-sonnet-4-6`、`claude-opus-4-7`、`claude-opus-4-6`、`claude-sonnet-4-5`、`claude-sonnet-4-0`、`claude-3-5-sonnet-latest`などから選択可能。[公式モデル一覧](https://docs.anthropic.com/en/docs/about-claude/models/overview)を参照 |
 | `claude_api_key` | [Claude Console](https://console.anthropic.com/settings/keys)で作成 |
 | `claude_api_base` | 任意。デフォルトは`https://api.anthropic.com/v1`。サードパーティプロキシを使用する場合に変更 |
--- a/docs/ja/models/coding-plan.mdx
+++ b/docs/ja/models/coding-plan.mdx
@@ -102,18 +102,18 @@ description: Coding Planモデルの設定

 ```json
 {
-  "bot_type": "openai",
+  "bot_type": "moonshot",
  "model": "kimi-for-coding",
-  "open_ai_api_base": "https://api.kimi.com/coding/v1",
-  "open_ai_api_key": "YOUR_API_KEY"
+  "moonshot_base_url": "https://api.kimi.com/coding/v1",
+  "moonshot_api_key": "YOUR_API_KEY"
 }
 ```

 | パラメータ | 説明 |
 | --- | --- |
-| `model` | `kimi-for-coding` |
-| `open_ai_api_base` | `https://api.kimi.com/coding/v1` |
-| `open_ai_api_key` | Coding Plan専用キー（従量課金とは共有不可） |
+| `model` | `kimi-for-coding`で自動更新モデル、または`kimi-k2.6`などのモデルを指定 |
+| `moonshot_base_url` | `https://api.kimi.com/coding/v1` |
+| `moonshot_api_key` | Coding Plan専用キー（従量課金とは共有不可） |

 参考: [キー & ドキュメント](https://www.kimi.com/code/docs/)

--- a/docs/ja/models/custom.mdx
+++ b/docs/ja/models/custom.mdx
@@ -0,0 +1,62 @@
+---
+title: カスタム
+description: サードパーティAPIやローカルモデル向けのカスタムプロバイダー設定
+---
+
+OpenAI互換プロトコルでアクセスするモデルサービスに適用します：
+
+- **サードパーティAPIプロキシ**：統一APIベースで複数モデルを呼び出し
+- **ローカルモデル**：Ollama、vLLM、LocalAIなどでローカルにデプロイされたモデル
+- **プライベートデプロイ**：組織内でホストされたモデルサービス
+
+<Note>
+  `openai` プロバイダーとの違い：カスタムプロバイダーでは `/config model` でモデルを切り替えてもプロバイダータイプは自動切り替えされず、カスタムAPIアドレスが常に保持されます。
+</Note>
+
+## 設定方法
+
+### サードパーティAPIプロキシ
+
+```json
+{
+  "bot_type": "custom",
+  "model": "deepseek-v4-flash",
+  "custom_api_key": "YOUR_API_KEY",
+  "custom_api_base": "https://{your-proxy.com}/v1"
+}
+```
+
+| パラメータ | 説明 |
+| --- | --- |
+| `bot_type` | `custom` に設定必須 |
+| `model` | モデル名、プロキシサービスがサポートする任意のモデル名 |
+| `custom_api_key` | プロキシサービスが提供するAPIキー |
+| `custom_api_base` | APIアドレス、OpenAI互換プロトコルが必要 |
+
+### ローカルモデル
+
+ローカルモデルは通常APIキー不要で、APIベースのみ設定します：
+
+```json
+{
+  "bot_type": "custom",
+  "model": "qwen3.5:27b",
+  "custom_api_base": "http://localhost:11434/v1"
+}
+```
+
+一般的なローカルデプロイツールとデフォルトアドレス：
+
+| ツール | デフォルトAPIベース |
+| --- | --- |
+| [Ollama](https://ollama.com) | `http://localhost:11434/v1` |
+| [vLLM](https://docs.vllm.ai) | `http://localhost:8000/v1` |
+| [LocalAI](https://localai.io) | `http://localhost:8080/v1` |
+
+## モデル切り替え
+
+カスタムプロバイダーではモデル切り替え時に `model` のみ変更され、`bot_type` やAPIアドレスは変わりません：
+
+```
+/config model qwen3.5:27b
+```
--- a/docs/ja/models/deepseek.mdx
+++ b/docs/ja/models/deepseek.mdx
@@ -7,22 +7,55 @@ description: DeepSeekモデルの設定

 ```json
 {
-  "model": "deepseek-chat",
+  "model": "deepseek-v4-flash",
  "deepseek_api_key": "YOUR_API_KEY"
 }
 ```

 | パラメータ | 説明 |
 | --- | --- |
-| `model` | `deepseek-chat`（DeepSeek-V3.2、非思考モード）、`deepseek-reasoner`（DeepSeek-R1、思考モード） |
-| `deepseek_api_key` | [DeepSeek Platform](https://platform.deepseek.com/api_keys)で作成 |
+| `model` | `deepseek-v4-flash`（デフォルト）、`deepseek-v4-pro` をサポート |
+| `deepseek_api_key` | [DeepSeek Platform](https://platform.deepseek.com/api_keys) で作成 |
 | `deepseek_api_base` | オプション、デフォルトは `https://api.deepseek.com/v1`。サードパーティプロキシに変更可能 |

+## モデルの選び方
+
+| モデル | 適用シーン |
+| --- | --- |
+| `deepseek-v4-flash` | デフォルト推奨、高速・低コスト |
+| `deepseek-v4-pro` | 複雑なタスクでより強力 |
+
+## 思考モード
+
+V4シリーズ（`deepseek-v4-flash` / `deepseek-v4-pro`）は明示的な「思考モード」をサポートします。最終回答の前に思考内容（`reasoning_content`）を出力することで、回答品質を高めます。
+
+### スイッチ
+
+グローバル設定 `enable_thinking` で制御します：
+
+```json
+{
+  "enable_thinking": true
+}
+```
+
+- `true`：すべてのチャネルで思考モードがオン。Webコンソールでは思考過程を表示し、IMチャネル（WeChat / WeCom / DingTalk / Feishu）では表示されないものの、回答品質の向上というメリットを得られます。
+- `false`：思考オフ、応答が速く、初回トークンの遅延も低くなります。
+
+### 注意事項
+
+- **サンプリングパラメータ**：思考モード時は `temperature`、`top_p`、`presence_penalty`、`frequency_penalty` がサーバ側で無視されます（エラーにはなりません）。CowAgentは自動的に送信をスキップします。
+- **マルチターンのツール呼び出し**：履歴にツール呼び出しが含まれる場合、DeepSeekはすべてのassistantメッセージに `reasoning_content` を返送するよう要求します。CowAgentが自動でラウンドトリップ処理を行うため、セッション途中で思考スイッチを切り替えてもエラーになりません。
+
+<Tip>
+  通常は `deepseek-v4-flash` を使い、難しいタスクでは `deepseek-v4-pro` に切り替え、深い思考が必要な時は `enable_thinking` を有効にしてください。
+</Tip>
+
 方法2：OpenAI互換方式：

 ```json
 {
-  "model": "deepseek-chat",
+  "model": "deepseek-v4-flash",
  "bot_type": "openai",
  "open_ai_api_key": "YOUR_API_KEY",
  "open_ai_api_base": "https://api.deepseek.com/v1"
--- a/docs/ja/models/glm.mdx
+++ b/docs/ja/models/glm.mdx
@@ -5,14 +5,14 @@ description: 智谱AI GLMモデルの設定

 ```json
 {
-  "model": "glm-5-turbo",
+  "model": "glm-5.1",
  "zhipu_ai_api_key": "YOUR_API_KEY"
 }
 ```

 | パラメータ | 説明 |
 | --- | --- |
-| `model` | `glm-5-turbo`、`glm-5`、`glm-4.7`、`glm-4-plus`、`glm-4-flash`、`glm-4-air`などから選択可能。[モデルコード](https://bigmodel.cn/dev/api/normal-model/glm-4)を参照 |
+| `model` | `glm-5.1`、`glm-5-turbo`、`glm-5`、`glm-4.7`、`glm-4-plus`、`glm-4-flash`、`glm-4-air`などから選択可能。[モデルコード](https://bigmodel.cn/dev/api/normal-model/glm-4)を参照 |
 | `zhipu_ai_api_key` | [智谱AI Console](https://www.bigmodel.cn/usercenter/proj-mgmt/apikeys)で作成 |

 OpenAI互換の設定もサポートしています:
@@ -20,7 +20,7 @@ OpenAI互換の設定もサポートしています:
 ```json
 {
  "bot_type": "openai",
-  "model": "glm-5-turbo",
+  "model": "glm-5.1",
  "open_ai_api_base": "https://open.bigmodel.cn/api/paas/v4",
  "open_ai_api_key": "YOUR_API_KEY"
 }
--- a/docs/ja/models/index.mdx
+++ b/docs/ja/models/index.mdx
@@ -6,7 +6,7 @@ description: CowAgentがサポートするモデルとおすすめの選択肢
 CowAgentは国内外の主要なLLMをサポートしています。モデルインターフェースはプロジェクトの`models/`ディレクトリに実装されています。

 <Note>
-  Agent モードでは、品質とコストのバランスから以下のモデルをおすすめします: MiniMax-M2.7、glm-5-turbo、kimi-k2.5、qwen3.6-plus、claude-sonnet-4-6、gemini-3.1-pro-preview
+  Agent モードでは、品質とコストのバランスから以下のモデルをおすすめします: deepseek-v4-flash、MiniMax-M2.7、claude-sonnet-4-6、gemini-3.1-pro-preview、glm-5.1、qwen3.6-plus、kimi-k2.6
 </Note>

 ## 設定
@@ -18,21 +18,12 @@ CowAgentは国内外の主要なLLMをサポートしています。モデルイ
 ## サポートモデル

 <CardGroup cols={2}>
+  <Card title="DeepSeek" href="/ja/models/deepseek">
+    deepseek-v4-flash、deepseek-v4-pro など
+  </Card>
  <Card title="MiniMax" href="/ja/models/minimax">
    MiniMax-M2.7およびその他のシリーズモデル
  </Card>
-  <Card title="GLM (智谱AI)" href="/ja/models/glm">
-    glm-5-turbo、glm-5およびその他のシリーズモデル
-  </Card>
-  <Card title="Qwen (通义千问)" href="/ja/models/qwen">
-    qwen3.6-plus、qwen3-maxなど
-  </Card>
-  <Card title="Kimi" href="/ja/models/kimi">
-    kimi-k2.5、kimi-k2など
-  </Card>
-  <Card title="Doubao (ByteDance)" href="/ja/models/doubao">
-    doubao-seedシリーズモデル
-  </Card>
  <Card title="Claude" href="/ja/models/claude">
    claude-sonnet-4-6など
  </Card>
@@ -42,8 +33,17 @@ CowAgentは国内外の主要なLLMをサポートしています。モデルイ
  <Card title="OpenAI" href="/ja/models/openai">
    gpt-5.4、gpt-4.1、oシリーズなど
  </Card>
-  <Card title="DeepSeek" href="/ja/models/deepseek">
-    deepseek-chat、deepseek-reasoner
+  <Card title="GLM (智谱AI)" href="/ja/models/glm">
+    glm-5.1、glm-5-turbo、glm-5およびその他のシリーズモデル
+  </Card>
+  <Card title="Qwen (通义千问)" href="/ja/models/qwen">
+    qwen3.6-plus、qwen3-maxなど
+  </Card>
+  <Card title="Doubao (ByteDance)" href="/ja/models/doubao">
+    doubao-seedシリーズモデル
+  </Card>
+  <Card title="Kimi" href="/ja/models/kimi">
+    kimi-k2.6、kimi-k2.5、kimi-k2など
  </Card>
  <Card title="LinkAI" href="/ja/models/linkai">
    統合マルチモデルインターフェース + ナレッジベース
--- a/docs/ja/models/kimi.mdx
+++ b/docs/ja/models/kimi.mdx
@@ -5,14 +5,14 @@ description: Kimi (Moonshot) モデルの設定

 ```json
 {
-  "model": "kimi-k2.5",
+  "model": "kimi-k2.6",
  "moonshot_api_key": "YOUR_API_KEY"
 }
 ```

 | パラメータ | 説明 |
 | --- | --- |
-| `model` | `kimi-k2.5`、`kimi-k2`、`moonshot-v1-8k`、`moonshot-v1-32k`、`moonshot-v1-128k`から選択可能 |
+| `model` | `kimi-k2.6`、`kimi-k2.5`、`kimi-k2`、`moonshot-v1-8k`、`moonshot-v1-32k`、`moonshot-v1-128k`から選択可能 |
 | `moonshot_api_key` | [Moonshot Console](https://platform.moonshot.cn/console/api-keys)で作成 |

 OpenAI互換の設定もサポートしています:
@@ -20,7 +20,7 @@ OpenAI互換の設定もサポートしています:
 ```json
 {
  "bot_type": "openai",
-  "model": "kimi-k2.5",
+  "model": "kimi-k2.6",
  "open_ai_api_base": "https://api.moonshot.cn/v1",
  "open_ai_api_key": "YOUR_API_KEY"
 }
--- a/docs/ja/models/linkai.mdx
+++ b/docs/ja/models/linkai.mdx
@@ -3,7 +3,7 @@ title: LinkAI
 description: LinkAIプラットフォームで複数モデルに統合アクセス
 ---

-[LinkAI](https://link-ai.tech)プラットフォームでは、OpenAI、Claude、Gemini、DeepSeek、Qwen、Kimiなどのモデルを柔軟に切り替えることができ、ナレッジベース、ワークフロー、プラグイン、その他のAgent機能をサポートしています。
+[LinkAI](https://link-ai.tech)プラットフォームでは、OpenAI、Claude、Gemini、DeepSeek、MiniMax、Qwen、Kimiなどのモデルを柔軟に切り替えることができ、ナレッジベース、ワークフロー、プラグイン、その他のAgent機能をサポートしています。

 ```json
 {
--- a/docs/ja/releases/overview.mdx
+++ b/docs/ja/releases/overview.mdx
@@ -5,6 +5,7 @@ description: CowAgent バージョン履歴

 | バージョン | 日付 | 説明 |
 | --- | --- | --- |
+| [2.0.7](/ja/releases/v2.0.7) | 2026.04.22 | 画像生成スキル（6プロバイダー自動ルーティング）、新モデル（Kimi K2.6、Claude Opus 4.7、GLM 5.1）、ナレッジベースと Web コンソールの改善 |
 | [2.0.6](/ja/releases/v2.0.6) | 2026.04.14 | ナレッジベース、Deep Dream 記憶蒸留、スマートコンテキスト圧縮、Web コンソールアップグレード |
 | [2.0.5](/ja/releases/v2.0.5) | 2026.04.01 | Cow CLI、Skill Hub オープンソース、ブラウザツール、企業微信スキャン作成、その他改善 |
 | [2.0.4](/ja/releases/v2.0.4) | 2026.03.22 | 個人WeChatチャネル追加、新モデルサポート、日本語ドキュメント、スクリプトリファクタリングおよび複数修正 |
--- a/docs/ja/releases/v2.0.7.mdx
+++ b/docs/ja/releases/v2.0.7.mdx
@@ -0,0 +1,65 @@
+---
+title: v2.0.7
+description: CowAgent 2.0.7 - 画像生成スキル（6プロバイダー自動ルーティング）、新モデルサポート、ナレッジベース強化、Web コンソール改善およびバグ修正
+---
+
+## 🎨 画像生成スキル
+
+新しい内蔵スキル `image-generation` を追加。テキストから画像生成、画像編集、複数画像の融合に対応し、6 社の主要プロバイダーをカバー：
+
+- **6 プロバイダー自動ルーティング**：OpenAI (GPT-Image-2) → Gemini (Nano Banana) → Seedream (Volcengine Ark) → Qwen (DashScope) → MiniMax → LinkAI — 固定の優先順位で設定済みプロバイダーを自動選択、失敗時は次のプロバイダーへ自動フォールバック
+- **モデル選択不要**：API Key を設定するだけで使用可能、モデルを手動で指定する必要なし。会話で特定モデルを指名することも可能（例：「seedream で猫を描いて」）
+- **柔軟な制御**：`quality`（画質）、`size`（解像度、512/1K〜4K）、`aspect_ratio`（アスペクト比）パラメータ対応、各プロバイダーが自動的に有効な値にマッピング
+- **画像編集**：既存の画像を渡して編集・スタイル変換・複数画像融合が可能（Seedream は最大 14 枚の参照画像をサポート）
+- **スキルレベル設定**：`config.json` の `skill.image-generation.model` でデフォルトモデルを固定可能
+- **画像ライトボックス**：Web コンソールのすべての画像がクリックで拡大プレビュー対応
+
+ドキュメント：[画像生成スキル](https://docs.cowagent.ai/ja/skills/image-generation)
+
+## 🤖 新モデルサポート
+
+- **Kimi K2.6**：`kimi-k2.6` モデルサポートを追加
+- **Claude Opus 4.7**：`claude-opus-4-7` モデルサポートを追加
+- **GLM 5.1**：`glm-5.1` モデルサポートを追加
+- **Kimi Coding Plan**：Kimi Coding Plan モードをサポート
+- **カスタムモデルプロバイダー**：新しいカスタムモデルプロバイダー設定により、追加ベンダーとの統合が容易に
+
+## 💬 Web コンソール改善
+
+- **スマート自動スクロール**：チャットスクロールの動作を改善 — ユーザーが過去のメッセージを閲覧中に強制的に最下部にスクロールしなくなりました
+- **推論コンテンツ制限**：深い思考コンテンツを 4KB に制限し、フロントエンドのラグを防止
+- **モバイル最適化**：セッションサイドバーをモバイルではデフォルトで非表示、オーバーレイタップで閉じることが可能
+- **セッションタイトル修正**：タイトル自動生成のフォールバックロジックと設定変更時の Bridge リセットを修正
+- **画像プレビュー重複排除**：同一メッセージ内での画像の重複レンダリングを修正
+
+## 📚 ナレッジベース強化
+
+- **ネストディレクトリ対応**：ナレッジベースの一覧表示が多階層のネストディレクトリに対応
+- **ルートレベルファイル表示**：ナレッジツリーにルートディレクトリの `index.md`、`log.md` などを表示
+- **空状態統計の修正**：ルートレベルファイルが空状態検出に干渉しなくなりました
+
+## 🌙 夢の記憶改善
+
+- **構造化整理**：夢の記憶ファイルが日付別に自動アーカイブされ、ディレクトリ構造がより整理されました
+- **スケジュールジッター**：毎日の夢トリガーにランダムジッターを追加し、クラスター環境での同時実行の競合を回避
+
+## 🛠 スキルシステム改善
+
+- **スキルマネージャーの更新**：`/skill` コマンド実行後にスキルマネージャーを自動リフレッシュし、状態の同期を確保
+- **インストールソース拡張**：スキルインストールが複数のソース形式（URL、zip、ローカルファイルなど）に対応し、ターゲットディレクトリを自動的に確保
+
+## 🐛 その他の修正
+
+- **Gemini 修正**：Gemini の tool call が結果を返さない問題を修正
+- **Agent リトライ**：空レスポンスのリトライ時に `tool_calls` が破棄されなくなりました
+- **Docker 環境変数同期**：Docker 環境で設定更新後に環境変数が同期されない問題を修正
+- **Python 3.7 互換**：Python 3.7 互換性のために `Literal` のインポートを遅延
+- **モデル切替通知**：モデル切替後に bot_type 変更通知が表示されない問題を修正。Thanks @6vision
+- **設定コマンド**：`/config` で `enable_thinking` の設定が可能に
+- **思考表示**：深い思考の表示がデフォルトで無効に
+
+## 📦 アップグレード
+
+`cow update` または `./run.sh update` でアップグレード、またはコードを手動で pull して再起動。詳細は[アップグレードガイド](https://docs.cowagent.ai/ja/guide/upgrade)を参照。
+
+**リリース日**：2026.04.22 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.6...master)
--- a/docs/ja/skills/image-generation.mdx
+++ b/docs/ja/skills/image-generation.mdx
@@ -0,0 +1,158 @@
+---
+title: image-generation - 画像生成
+description: テキストから画像生成 / 画像編集 / 複数画像の融合、複数プロバイダーの自動ルーティングとフォールバック対応
+---
+
+汎用の画像生成・編集スキルです。OpenAI、Gemini、Seedream（Volcengine Ark）、Qwen（DashScope）、MiniMax、LinkAI の 6 社に対応。モデルを手動で選ぶ必要はなく、固定の優先順位に従って、設定済みのプロバイダーを自動的に選択します。
+
+## モデル選択
+
+`image-generation` は「固定優先度 + 自動フォールバック」のストラテジーを採用しています。API Key を設定するだけで使えます：
+
+1. **優先順位**: `OpenAI → Gemini → Seedream → Qwen → MiniMax → LinkAI`
+2. **未設定のプロバイダーはスキップ**: API Key が設定されているプロバイダーのみが参加
+3. **失敗時は自動で次へ**: 401、モデル未開通、ネットワークエラーなどの場合、次のプロバイダーを試行
+4. **モデル指定時は前置**: 特定のモデル名を渡すと、そのプロバイダーが最前列に昇格
+
+### 対応モデル
+
+| プロバイダー | モデル / エイリアス | 特徴 |
+| --- | --- | --- |
+| OpenAI | `gpt-image-2`、`gpt-image-1` | 汎用テキスト→画像、高品質、`quality` パラメータ対応 |
+| Gemini Nano Banana | `nano-banana-2`、`nano-banana-pro`、`nano-banana` | `gemini-3.1-flash`、`gemini-3-pro`、`gemini-2.5-flash` の画像バージョン |
+| Seedream（Volcengine Ark） | `seedream-5.0-lite`、`seedream-4.5` | ネイティブ 2K–4K、最大 14 枚の参照画像を融合 |
+| Qwen（DashScope） | `qwen-image-2.0`、`qwen-image-2.0-pro` | 中国語テキスト描画やテキスト・画像レイアウトに強い |
+| MiniMax | `image-01` | シンプルで高速な画像生成 |
+| LinkAI | 任意のモデル | 汎用プロキシ、フォールバック用 |
+
+<Note>
+デフォルトでは Agent はモデルを選ばず、自動ルーティングを使用します。特定のモデルを使いたい場合は、会話で直接指定してください（例：「seedream で猫を描いて」「gpt-image-2 でポスターを作って」）。下記の「カスタム設定」でデフォルトモデルを固定することもできます。
+</Note>
+
+## カスタム設定
+
+### API Key の設定
+
+**少なくとも 1 つ**のプロバイダーの Key が必要です。複数設定すると自動フォールバックが有効になります。設定方法は 3 通り：
+
+#### 方法 1：既存のモデル Key を自動再利用
+
+Web コンソールや `config.json` で対話モデルの Key（`openai_api_key`、`gemini_api_key` など）を設定済みの場合、起動時にこれらの Key は対応する環境変数に**自動同期**されます。つまり、対話モデルが使えていれば、画像生成も同じ Key で追加設定なしに利用できます。
+
+#### 方法 2：config.json で設定
+
+`config.json` に Key フィールドを直接記述：
+
+```json
+{
+  "openai_api_key": "sk-xxx",
+  "openai_api_base": "https://api.openai.com/v1",
+  "gemini_api_key": "AIza-xxx",
+  "ark_api_key": "xxx",
+  "dashscope_api_key": "sk-xxx",
+  "minimax_api_key": "xxx",
+  "linkai_api_key": "xxx"
+}
+```
+
+変更後は再起動が必要です。各 Key には対応する `*_api_base` フィールドがあり、カスタムエンドポイントを指定できます。
+
+#### 方法 3：会話で直接設定
+
+チャットで API Key を送信すると、Agent が `env_config` ツールで `~/cow/.env` に保存します。**再起動不要**でただちに反映されます。例：
+
+```
+OPENAI_API_KEY を sk-xxx に設定して
+```
+
+または：
+
+```
+ARK_API_KEY を xxx に設定して
+```
+
+### API Key 一覧
+
+| 環境変数 | config.json フィールド | プロバイダー | デフォルト Base URL |
+| --- | --- | --- | --- |
+| `OPENAI_API_KEY` | `openai_api_key` | OpenAI | `https://api.openai.com/v1` |
+| `GEMINI_API_KEY` | `gemini_api_key` | Gemini | `https://generativelanguage.googleapis.com` |
+| `ARK_API_KEY` | `ark_api_key` | Volcengine Ark（Seedream） | `https://ark.cn-beijing.volces.com/api/v3` |
+| `DASHSCOPE_API_KEY` | `dashscope_api_key` | Alibaba DashScope（Qwen） | `https://dashscope.aliyuncs.com` |
+| `MINIMAX_API_KEY` | `minimax_api_key` | MiniMax | `https://api.minimaxi.com` |
+| `LINKAI_API_KEY` | `linkai_api_key` | LinkAI | `https://api.link-ai.tech` |
+
+### デフォルトモデルの固定
+
+すべての画像生成を特定のプロバイダーのモデルで固定したい場合、`config.json` に以下を追加：
+
+```json
+"skill": {
+  "image-generation": {
+    "model": "seedream-5.0-lite"
+  }
+}
+```
+
+起動時にこの設定は環境変数 `SKILL_IMAGE_GENERATION_MODEL` に自動変換され、スクリプトはこのモデルのプロバイダーを常に使用します。
+
+## 有効化と無効化
+
+`image-generation` は内蔵スキルで、**API Key に基づいてステータスが自動調整**されます：
+
+- **Key 設定済み**：スキルはアクティブ — Agent は画像生成リクエストを受けると呼び出す
+- **Key 未設定**：スキルはコンテキストに表示される（「設定が必要」とマーク）— Agent は呼び出し失敗の代わりに Key の設定を案内する
+
+手動で制御する場合：
+
+```text
+/skill disable image-generation    # 無効化（Key があっても呼び出されない）
+/skill enable image-generation     # 再有効化
+```
+
+ターミナルでは `cow skill disable image-generation` / `cow skill enable image-generation`。
+
+## パラメータ
+
+| パラメータ | 型 | 必須 | デフォルト | 説明 |
+| --- | --- | --- | --- | --- |
+| `prompt` | string | はい | — | 画像の説明 |
+| `image_url` | string / list | いいえ | null | 編集用の入力画像。ローカルパスまたは URL。複数指定で複数画像融合 |
+| `quality` | string | いいえ | auto | `low` / `medium` / `high` — 一部のプロバイダーのみ対応 |
+| `size` | string | いいえ | auto | `512` / `1K` / `2K` / `3K` / `4K`、またはピクセル値（例: `1024x1024`） |
+| `aspect_ratio` | string | いいえ | null | `1:1` / `3:2` / `2:3` / `16:9` / `9:16` / `21:9`；Gemini は `1:4` / `4:1` / `1:8` / `8:1` にも対応 |
+
+<Warning>
+**品質が高いほど・解像度が大きいほど、コストが高く、時間がかかります。**
+
+- 日常の会話やプレビューにはデフォルト（`auto`）、または `quality=low` + `size=1K` を使用 — 約 20 秒で生成
+- ポスターやユーザーが高解像度を明示的に要求した場合は `quality=high` + `size=2K/4K` — モデルによって 1〜5 分かかる場合があります
+</Warning>
+
+## 出力
+
+成功時：
+
+```json
+{
+  "model": "doubao-seedream-5-0-260128",
+  "images": [
+    {"url": "/path/to/output.png"}
+  ]
+}
+```
+
+失敗時：`{ "error": "..." }`。エラー後は**直接リトライしないでください** — ほぼ確実に設定の問題です（Key の誤り、API ベース URL の不一致、モデル未開通など）。まず設定を修正してから再試行してください。
+
+## よくある使い方
+
+- **テキスト→画像**：説明からイラスト、ポスター、アイコン、アバター、絵コンテなどを生成
+- **画像→画像**：既存の画像のスタイル変更、要素の入れ替え、装飾やテキストの追加
+- **複数画像の融合**：複数の参照画像を 1 枚に合成（着せ替え、キャラクター集合写真など）
+
+<Note>
+- bash タイムアウトは 600 秒に設定してください。各プロバイダーの HTTP タイムアウトは 300 秒ですが、スクリプトが複数のプロバイダーを順番に試行する場合があります
+- 入力画像は自動的に 4 MB 以下・最長辺 4096 px 以下に圧縮されます
+- Gemini / Seedream / Qwen / MiniMax は `quality` パラメータに対応していません（渡しても無視されます）
+- Seedream のデフォルトは 2K。`seedream-5.0-lite` は 3K まで、`seedream-4.5` は 4K まで対応
+</Note>
--- a/docs/ja/skills/knowledge-wiki.mdx
+++ b/docs/ja/skills/knowledge-wiki.mdx
@@ -0,0 +1,112 @@
+---
+title: knowledge-wiki - ナレッジベース
+description: ローカルの構造化ナレッジベースを管理し、自動でアーカイブ・分類・相互参照を行う
+---
+
+会話で生まれた資料、アイデア、メモをローカルの構造化ナレッジベースに整理し、インデックスとページ間の相互参照を自動で維持します。
+
+`knowledge-wiki` はワークスペース内の `knowledge/` ディレクトリを管理します。Agent の「外部メモリ」のようなものです。`always: true` が設定されているため**常にコンテキストにロード**され、外部依存は不要です。
+
+## いつ起動するか
+
+- 記事、ドキュメント、URL を共有して、後で参照できるように残したいとき
+- 会話の中で長期保存に値する結論が出たとき
+- 以前蓄積したナレッジを調べたいとき
+
+## ディレクトリ構成
+
+```
+knowledge/
+├── index.md           # グローバルインデックス（必ずメンテナンスする）
+├── log.md             # 操作ログ（追記のみ）
+└── <category>/        # カテゴリサブディレクトリ（内容ごとにグループ化）
+    └── <slug>.md      # ナレッジページ（小文字ハイフン区切りのファイル名）
+```
+
+## 3 つの基本操作
+
+### 1. 収録（Ingest）
+
+資料を共有すると、Agent は：
+
+1. 原文を読んで理解し、重要な情報を抽出
+2. どのカテゴリに属するか判断 — まず `index.md` をチェックし、適切なカテゴリがなければ新規作成
+3. `knowledge/<category>/<slug>.md` にナレッジページを生成
+4. インデックス `index.md` とログ `log.md` を更新
+
+### 2. 統合（Synthesize）
+
+会話の中で新しい結論やインサイトが生まれたとき：
+
+1. 適切なカテゴリの下に新しいナレッジページを作成
+2. 関連する既存ページに相互リンクを追加
+3. インデックスとログを更新
+
+### 3. 検索（Query）
+
+以前蓄積したナレッジについて質問されたとき：
+
+1. `index.md` から関連しそうなページを探す
+2. `read` ツールで具体的なページを開く
+3. 必要に応じて `memory_search` で補完検索
+4. 回答にナレッジページへのリンクを含め、ユーザーが原文を確認できるようにする
+
+## ページの書き方
+
+```markdown
+# ページタイトル
+
+> Source: <ソース URL または簡単な説明>
+
+本文。ページ間は相対パスでリンク：
+[関連ページ](../category/related-page.md)
+
+## 要点
+
+- ...
+
+## 関連ページ
+
+- [ページ A](../category/page-a.md) — 関連する理由
+```
+
+<Note>
+- `> Source:` はこのナレッジの出典を記録します。明確な出典がある場合は必ず記載してください
+- 相互参照は重要です：ページを作成・更新したら、関連ページにも逆リンクを追加してください
+- **既に存在するページにのみリンクしてください**。ある概念が独立ページに値する場合は、先にページを作成してからリンクを追加してください
+</Note>
+
+## インデックス形式
+
+`knowledge/index.md` はフラットリスト形式で、カテゴリごとにグループ化し、各ナレッジページを 1 行で表します：
+
+```markdown
+# Knowledge Index
+
+## カテゴリ A
+- [ページタイトル](category-a/page-slug.md) — 一行の要約
+
+## カテゴリ B
+- [ページタイトル](category-b/page-slug.md) — 一行の要約
+```
+
+テーブルや絵文字は使いません。カテゴリ名や構成は柔軟に調整できます。
+
+## ログ形式
+
+`knowledge/log.md` は追記のみ、最新のエントリが一番下：
+
+```markdown
+## [YYYY-MM-DD] ingest | ページタイトル
+## [YYYY-MM-DD] synthesize | ページタイトル
+```
+
+## 執筆ガイドライン
+
+- **ファイル名**は小文字＋ハイフン（例: `machine-learning.md`）
+- **1 ページ 1 トピック** — 関連コンテンツはリンクで繋ぐ
+- **重複ページを作らず、既存ページを更新する**
+- **変更のたびにインデックスを更新する**（`knowledge/index.md`）
+- **要点を抽出し、全文をコピーしない**
+- **会話中にナレッジページを参照する際はフルパスを使用**（例: `[タイトル](knowledge/<category>/<slug>.md)`）。ページ間の相互リンクのみ相対パスを使用
+- **ナレッジページに基づいて回答する際はリンクを含める** — ユーザーが詳細を確認できるように
--- a/docs/ja/skills/skill-creator.mdx
+++ b/docs/ja/skills/skill-creator.mdx
@@ -0,0 +1,180 @@
+---
+title: skill-creator - スキル作成
+description: スキルの作成・インストール・更新、SKILL.md の書き方とディレクトリ構成の標準化
+---
+
+`skill-creator` は「メタスキル」です。Agent が他のスキルを作成・インストール・更新する際に呼び出され、すべてのスキルの `SKILL.md` の書き方とディレクトリ構成を統一します。
+
+## いつ起動するか
+
+- ユーザーが URL やリモートリポジトリからスキルをインストールしたいとき
+- ユーザーが新しいスキルをゼロから作成したいとき
+- 既存のスキルをアップグレード・リファクタリングする必要があるとき
+
+## スキルとは
+
+スキルは「再利用可能な説明書」にオプションのスクリプトやリソースを加えたものです。特定のドメインの専門知識を Agent に注入し、該当タスクをスペシャリストのように処理できるようにします。
+
+スキルには通常、以下が含まれます：
+
+1. **専門ワークフロー** — ある種のタスクの完全な手順
+2. **ツールの使い方** — 特定の API やファイル形式の処理方法
+3. **ドメイン知識** — チームの規約、ビジネスルール、データ構造など
+4. **付属リソース** — スクリプト、参考ドキュメント、テンプレートなど
+
+<Note>
+**基本原則：省けるものは省く。** Agent が自力で推測できない内容だけを書きましょう。1 行追加するたびに「このトークンコストに見合うか？」と自問してください。
+</Note>
+
+## ディレクトリ構成
+
+```
+skill-name/
+├── SKILL.md            # 必須：スキル定義
+│   ├── YAML frontmatter（name / description は必須）
+│   └── Markdown 本文（説明 + 例）
+└── オプションリソース
+    ├── scripts/        # 実行可能スクリプト（Python / Bash など）
+    ├── references/     # 分量が多い参考ドキュメント（Agent が必要時に読む）
+    └── assets/         # テンプレート、アイコンなど（出力に直接使われるもの）
+```
+
+## SKILL.md 仕様
+
+SKILL.md ヘッダーの `frontmatter` フィールド：
+
+| フィールド | 説明 |
+| --- | --- |
+| `name` | スキル名。小文字＋ハイフン、ディレクトリ名と一致させる |
+| `description` | **最も重要なフィールド**。「このスキルが何をするか」「いつ使うべきか」を明記する。Agent はこれを見て呼び出すかどうかを判断する。トリガーに関する記述はすべてここに書き、本文には書かない |
+| `metadata.cowagent.requires.bins` | システムに必要な CLI ツール |
+| `metadata.cowagent.requires.env` | 必要な環境変数（すべて揃っている必要がある） |
+| `metadata.cowagent.requires.anyEnv` | 複数の API Key のうち 1 つあればよい |
+| `metadata.cowagent.requires.anyBins` | 複数のツールのうち 1 つあればよい |
+| `metadata.cowagent.always` | `true` にすると常にロードされ、依存チェックをスキップ |
+| `metadata.cowagent.emoji` | 表示用の絵文字（任意） |
+| `metadata.cowagent.os` | OS 制限、例: `["darwin", "linux"]` |
+
+<Note>
+`category` フィールドは手動で設定する必要はありません。システムが自動的に `skill` に設定します。
+</Note>
+
+API Key 依存の宣言方法は 2 通り：
+
+```yaml
+metadata:
+  cowagent:
+    requires:
+      env: ["MYAPI_KEY"]            # 必須
+```
+
+```yaml
+metadata:
+  cowagent:
+    requires:
+      anyEnv: ["OPENAI_API_KEY", "LINKAI_API_KEY"]   # いずれか 1 つ
+```
+
+**スキルは依存関係に基づいて自動的に有効/無効になります**：環境変数が揃えば自動有効、不足すれば自動無効。手動で `/skill enable` する必要はありません。
+
+## リソースディレクトリの使い方
+
+| ディレクトリ | 入れるもの | 入れないもの |
+| --- | --- | --- |
+| `scripts/` | 繰り返し実行するコード、確定的な結果が必要なスクリプト | デモ用のコード片 |
+| `references/` | **500 行超**で SKILL.md に収まらない大きなドキュメント（完全な DB スキーマなど） | 一般的な API ドキュメント、チュートリアル |
+| `assets/` | 最終出力に含まれるファイル（テンプレート、アイコン、ボイラープレートなど） | 説明用ドキュメント |
+
+<Warning>
+**原則としてすべての内容を `SKILL.md` に書きます** — リソースディレクトリに分割するのは本当に収まらない場合だけです。
+
+`README.md`、`CHANGELOG.md`、`INSTALLATION_GUIDE.md` などをスキルに追加しないでください。すべて `SKILL.md` に入れましょう。リソースディレクトリには実際に実行するスクリプトや実際に使う素材だけを配置してください。
+</Warning>
+
+## 外部スキルのインストール
+
+インストール後、スキルは `<workspace>/skills/<name>/` に配置されます。
+
+| ソース | インストール方法 |
+| --- | --- |
+| URL（単一ファイル） | curl / web_fetch |
+| URL（zip アーカイブ） | ダウンロードして展開 |
+| ローカル SKILL.md | 直接読み込み |
+| ローカル zip アーカイブ | 展開 |
+
+インストール手順：
+
+1. `SKILL.md` を見つける（アーカイブのルートまたはサブディレクトリにある場合がある）
+2. frontmatter から `name` を読み取る
+3. **スキルディレクトリ全体**（`SKILL.md`、`scripts/`、`assets/` など）を `<workspace>/skills/<name>/` にコピー
+4. アーカイブに `INSTALL.md` などのセットアップスクリプトがあれば実行するが、最終的に `<workspace>/skills/<name>/` に収まっている必要がある
+
+## スキルをゼロから作成
+
+推奨手順：
+
+1. **要件を明確にする** — ユーザーに具体的なユースケースをいくつか挙げてもらう（一度に多く聞きすぎない）
+2. **構成を計画する** — スクリプトは必要か？参考ドキュメントは？テンプレートは？
+3. **スキャフォールド** — 初期化スクリプトを使用：
+
+   ```bash
+   scripts/init_skill.py <skill-name> --path <workspace>/skills [--resources scripts,references,assets] [--examples]
+   ```
+
+4. **内容を埋める** — SKILL.md を書き、スクリプトとリソースを追加。スクリプトは必ず実行テストする
+5. **バリデーション**（任意）：
+
+   ```bash
+   scripts/quick_validate.py <workspace>/skills/<skill-name>
+   ```
+
+6. **イテレーション** — 実際の使用フィードバックに基づいて継続的に改善
+
+## 命名規則
+
+- 小文字、数字、ハイフンのみ使用。ユーザーの入力は正規化する（例: `Plan Mode` → `plan-mode`）
+- 64 文字以内
+- 短く、動詞で始め、一目で何をするか分かるように
+- 必要に応じてツール名をプレフィックスにする（例: `gh-address-comments`、`linear-address-issue`）
+- ディレクトリ名と `name` フィールドは完全に一致させる
+
+## 3 段階ローディング
+
+スキルは一度にすべてコンテキストに読み込まれるわけではなく、3 段階で必要に応じてロードされます：
+
+1. **メタ情報**（`name` + `description`） — 常にコンテキスト内（約 100 語）。Agent がスキルを使うかどうかの判断に使用
+2. **SKILL.md 本文** — スキルが有効化されたときだけロード。500 行以内を推奨
+3. **リソースファイル** — Agent が必要なときに読み込む
+
+複数のバリエーション（例: マルチクラウドデプロイ）を持つスキルは次のように整理：
+
+```
+cloud-deploy/
+├── SKILL.md             # メインワークフローとプロバイダー選択ロジック
+└── references/
+    ├── aws.md
+    ├── gcp.md
+    └── azure.md
+```
+
+ユーザーが AWS を選んだら、Agent は `aws.md` だけを読みます。3 社分のドキュメントをすべてロードする必要はありません。
+
+## よくあるデザインパターン
+
+**ステップ式**：番号付きの手順と対応スクリプト。
+
+```markdown
+1. フォーム構造を分析（analyze_form.py を実行）
+2. フィールドマッピングを生成（fields.json を編集）
+3. フォームを自動入力（fill_form.py を実行）
+```
+
+**分岐式**：ユーザーの意図に応じて異なるフローへ。
+
+```markdown
+1. 操作タイプを判定：
+   **新規作成？** → 「作成フロー」へ
+   **既存の編集？** → 「編集フロー」へ
+```
+
+**テンプレート式**：出力形式に厳密な要件がある場合、SKILL.md にテンプレートを含め、Agent にそれに従って出力させる。
--- a/docs/ja/tools/vision.mdx
+++ b/docs/ja/tools/vision.mdx
@@ -27,7 +27,7 @@ Vision ツールは多段階の自動選択＋自動フォールバック戦略
 | Claude | メインモデル | Anthropic ネイティブ画像形式 |
 | Gemini | メインモデル | inlineData 形式 |
 | 豆包 (Doubao) | メインモデル | doubao-seed-2-0 シリーズがネイティブ対応 |
-| Kimi (Moonshot) | メインモデル | kimi-k2.5 がネイティブ対応 |
+| Kimi (Moonshot) | メインモデル | kimi-k2.6、kimi-k2.5 がネイティブ対応 |
 | 智谱 AI | glm-5v-turbo | 常にビジョン専用モデルを使用 |
 | MiniMax | MiniMax-Text-01 | 常にビジョン専用モデルを使用 |

--- a/docs/memory/index.mdx
+++ b/docs/memory/index.mdx
@@ -5,6 +5,8 @@ description: CowAgent 的长期记忆系统 — 文件持久化、自动写入

 长期记忆保存在工作空间文件中，跨会话持久存在。Agent 在对话中通过检索工具按需加载历史记忆，也会在上下文裁剪时自动将对话摘要写入长期记忆。

+<img src="https://cdn.link-ai.tech/doc/memory-architecture-zh.jpeg" alt="Memory Architecture" />
+
 ## 记忆类型

 ### 核心记忆（MEMORY.md）
@@ -39,20 +41,25 @@ Agent 通过以下机制自动将对话内容持久化为长期记忆：

 Agent 会在对话中根据需要自动触发记忆检索，将相关历史信息纳入上下文。检索结果按混合评分排序（默认向量权重 0.7、关键词权重 0.3），日级记忆会随时间衰减（半衰期 30 天），核心记忆不衰减。

-## 首次启动
+## 相关文件

-首次启动 Agent 时，Agent 会主动向用户询问关键信息，并记录至工作空间（默认 `~/cow`）中：
+工作空间（默认 `~/cow`）中与记忆相关的文件：

 | 文件 | 说明 |
 | --- | --- |
-| `system.md` | Agent 的系统提示词和行为设定 |
-| `user.md` | 用户身份信息和偏好 |
+| `AGENT.md` | Agent 的人格和行为设定 |
+| `USER.md` | 用户身份信息和偏好 |
+| `RULE.md` | 自定义规则和约束 |
 | `MEMORY.md` | 核心记忆（长期） |
 | `memory/YYYY-MM-DD.md` | 日级记忆（按需创建） |
 | `memory/dreams/YYYY-MM-DD.md` | 梦境日记（Deep Dream 自动生成） |

+## Web 控制台
+
+在 Web 控制台的记忆管理页面中，可浏览记忆文件和梦境日记，支持通过 Tab 切换查看：
+
 <Frame>
-  <img src="https://cdn.link-ai.tech/doc/20260203000455.png" width="800" />
+  <img src="https://cdn.link-ai.tech/doc/20260414171014.png" width="800" />
 </Frame>

 ## 相关配置
--- a/docs/models/claude.mdx
+++ b/docs/models/claude.mdx
@@ -12,6 +12,6 @@ description: Claude 模型配置

 | 参数 | 说明 |
 | --- | --- |
-| `model` | 支持 `claude-sonnet-4-6`、`claude-opus-4-6`、`claude-sonnet-4-5`、`claude-sonnet-4-0`、`claude-3-5-sonnet-latest` 等，参考 [官方模型](https://docs.anthropic.com/en/docs/about-claude/models/overview) |
+| `model` | 支持 `claude-sonnet-4-6`、`claude-opus-4-7`、`claude-opus-4-6`、`claude-sonnet-4-5`、`claude-sonnet-4-0`、`claude-3-5-sonnet-latest` 等，参考 [官方模型](https://docs.anthropic.com/en/docs/about-claude/models/overview) |
 | `claude_api_key` | 在 [Claude 控制台](https://console.anthropic.com/settings/keys) 创建 |
 | `claude_api_base` | 可选，默认为 `https://api.anthropic.com/v1`，修改可接入第三方代理 |
--- a/docs/models/coding-plan.mdx
+++ b/docs/models/coding-plan.mdx
@@ -99,27 +99,6 @@ description: Coding Plan 模式模型配置

 ---

-## Kimi
-
-```json
-{
-  "bot_type": "openai",
-  "model": "kimi-for-coding",
-  "open_ai_api_base": "https://api.kimi.com/coding/v1",
-  "open_ai_api_key": "YOUR_API_KEY"
-}
-```
-
-| 参数 | 说明 |
-| --- | --- |
-| `model` | `kimi-for-coding` |
-| `open_ai_api_base` | `https://api.kimi.com/coding/v1` |
-| `open_ai_api_key` | Coding Plan 专用 Key（与按量计费接口不通用） |
-
-官方文档：[Key 获取](https://www.kimi.com/code/docs/)
-
---
-
 ## 火山引擎

 ```json
@@ -138,3 +117,24 @@ description: Coding Plan 模式模型配置
 | `open_ai_api_key` | API Key 与普通接口通用 |

 官方文档：[快速开始](https://www.volcengine.com/docs/82379/1928261?lang=zh)
+
+---
+
+## Kimi
+
+```json
+{
+  "bot_type": "moonshot",
+  "model": "kimi-for-coding",
+  "moonshot_base_url": "https://api.kimi.com/coding/v1",
+  "moonshot_api_key": "YOUR_API_KEY"
+}
+```
+
+| 参数 | 说明 |
+| --- | --- |
+| `model` | 填写 `kimi-for-coding` 会自动更新模型，或指定模型例如 `kimi-k2.6` |
+| `moonshot_base_url` | `https://api.kimi.com/coding/v1` |
+| `moonshot_api_key` | Coding Plan 专用 Key（与按量计费接口不通用） |
+
+官方文档：[Key 获取](https://www.kimi.com/code/docs/)
--- a/docs/models/custom.mdx
+++ b/docs/models/custom.mdx
@@ -0,0 +1,62 @@
+---
+title: 自定义
+description: 自定义厂商配置，适用于第三方 API 代理和本地模型
+---
+
+适用于通过 OpenAI 兼容协议接入的第三方模型服务或本地部署的模型，例如：
+
+- **第三方 API 代理**：使用统一的 API Base 调用多种模型
+- **本地模型**：通过 Ollama、vLLM、LocalAI 等工具在本地部署的模型
+- **私有化部署**：企业内部部署的模型服务
+
+<Note>
+  与 `openai` 厂商的区别：选择自定义厂商后，通过 `/config model` 切换模型时，不会自动切换厂商类型，始终使用自定义的 API 地址。
+</Note>
+
+## 配置方式
+
+### 第三方 API 代理
+
+```json
+{
+  "bot_type": "custom",
+  "model": "",
+  "custom_api_key": "YOUR_API_KEY",
+  "custom_api_base": "https://{your-proxy.com}/v1"
+}
+```
+
+| 参数 | 说明 |
+| --- | --- |
+| `bot_type` | 必须设为 `custom` |
+| `model` | 模型名称，填写代理服务支持的任意模型名 |
+| `custom_api_key` | API 密钥，由代理服务提供 |
+| `custom_api_base` | API 地址，由代理服务提供，需兼容 OpenAI 协议 |
+
+### 本地模型
+
+本地模型通常不需要 API Key，只需填写 API Base 即可：
+
+```json
+{
+  "bot_type": "custom",
+  "model": "qwen3.5:27b",
+  "custom_api_base": "http://localhost:11434/v1"
+}
+```
+
+常见的本地部署工具及默认地址：
+
+| 工具 | 默认 API Base |
+| --- | --- |
+| [Ollama](https://ollama.com) | `http://localhost:11434/v1` |
+| [vLLM](https://docs.vllm.ai) | `http://localhost:8000/v1` |
+| [LocalAI](https://localai.io) | `http://localhost:8080/v1` |
+
+## 切换模型
+
+自定义厂商下切换模型时，只会修改 `model`，不会改变 `bot_type` 和 API 地址：
+
+```
+/config model qwen3.5:27b
+```
--- a/docs/models/deepseek.mdx
+++ b/docs/models/deepseek.mdx
@@ -7,25 +7,57 @@ description: DeepSeek 模型配置

 ```json
 {
-  "model": "deepseek-chat",
+  "model": "deepseek-v4-flash",
  "deepseek_api_key": "YOUR_API_KEY"
 }
 ```

 | 参数 | 说明 |
 | --- | --- |
-| `model` | `deepseek-chat`（DeepSeek-V3.2，非思考模式）、`deepseek-reasoner`（DeepSeek-R1，思考模式） |
+| `model` | 支持 `deepseek-v4-flash`（默认）、`deepseek-v4-pro` |
 | `deepseek_api_key` | 在 [DeepSeek 平台](https://platform.deepseek.com/api_keys) 创建 |
 | `deepseek_api_base` | 可选，默认为 `https://api.deepseek.com/v1`，可修改为第三方代理地址 |

+## 模型选择
+
+| 模型 | 适用场景 |
+| --- | --- |
+| `deepseek-v4-flash` | 默认推荐，速度快、成本低 |
+| `deepseek-v4-pro` | 更智能、复杂任务效果更强 |
+
+## 思考模式
+
+V4 系列（`deepseek-v4-flash` / `deepseek-v4-pro`）支持显式的"思考模式"：模型在输出最终回答前，先输出一段思维链（`reasoning_content`），从而提升答案质量。
+
+### 开关
+
+通过全局配置 `enable_thinking` 控制：
+
+```json
+{
+  "enable_thinking": true
+}
+```
+
+- `true`：所有渠道下模型都会先思考再作答。Web 控制台会展示思考过程，IM 渠道（微信 / 企微 / 钉钉 / 飞书）虽不展示但同样获得更好答案。
+- `false`：关闭思考，响应更快，首字延迟更低。
+
+### 行为说明
+
+- **采样参数**：思考模式下 `temperature`、`top_p`、`presence_penalty`、`frequency_penalty` 会被服务端忽略（不会报错），CowAgent 会自动跳过传入。
+- **多轮工具调用**：当历史中包含工具调用时，DeepSeek 要求所有 assistant 消息必须回传 `reasoning_content`。CowAgent 会自动处理回传逻辑，跨轮次切换思考开关也不会出错。
+
+<Tip>
+  默认使用 `deepseek-v4-flash`；复杂任务可使用 `deepseek-v4-pro`；需要深度思考可开启 `enable_thinking`。
+</Tip>
+
 方式二：OpenAI 兼容方式接入：

 ```json
 {
-  "model": "deepseek-chat",
+  "model": "deepseek-v4-flash",
  "bot_type": "openai",
  "open_ai_api_key": "YOUR_API_KEY",
  "open_ai_api_base": "https://api.deepseek.com/v1"
 }
 ```
-
--- a/docs/models/glm.mdx
+++ b/docs/models/glm.mdx
@@ -5,14 +5,14 @@ description: 智谱AI GLM 模型配置

 ```json
 {
-  "model": "glm-5-turbo",
+  "model": "glm-5.1",
  "zhipu_ai_api_key": "YOUR_API_KEY"
 }
 ```

 | 参数 | 说明 |
 | --- | --- |
-| `model` | 可填 `glm-5-turbo`、`glm-5`、`glm-4.7`、`glm-4-plus`、`glm-4-flash`、`glm-4-air` 等，参考 [模型编码](https://bigmodel.cn/dev/api/normal-model/glm-4) |
+| `model` | 可填 `glm-5.1`、`glm-5-turbo`、`glm-5`、`glm-4.7`、`glm-4-plus`、`glm-4-flash`、`glm-4-air` 等，参考 [模型编码](https://bigmodel.cn/dev/api/normal-model/glm-4) |
 | `zhipu_ai_api_key` | 在 [智谱AI 控制台](https://www.bigmodel.cn/usercenter/proj-mgmt/apikeys) 创建 |

 也支持 OpenAI 兼容方式接入：
@@ -20,7 +20,7 @@ description: 智谱AI GLM 模型配置
 ```json
 {
  "bot_type": "openai",
-  "model": "glm-5-turbo",
+  "model": "glm-5.1",
  "open_ai_api_base": "https://open.bigmodel.cn/api/paas/v4",
  "open_ai_api_key": "YOUR_API_KEY"
 }
--- a/docs/models/index.mdx
+++ b/docs/models/index.mdx
@@ -6,7 +6,7 @@ description: CowAgent 支持的模型及推荐选择
 CowAgent 支持国内外主流厂商的大语言模型，模型接口实现在项目的 `models/` 目录下。

 <Note>
-  Agent 模式下推荐使用以下模型，可根据效果及成本综合选择：MiniMax-M2.7、glm-5-turbo、kimi-k2.5、qwen3.6-plus、claude-sonnet-4-6、gemini-3.1-pro-preview
+  Agent 模式下推荐使用以下模型，可根据效果及成本综合选择：deepseek-v4-flash、MiniMax-M2.7、claude-sonnet-4-6、gemini-3.1-pro-preview、glm-5.1、qwen3.6-plus、kimi-k2.6

  同时支持使用 [LinkAI](https://link-ai.tech) 平台接口，可灵活切换多种模型，并支持知识库、工作流、插件等 Agent 能力。
 </Note>
@@ -23,21 +23,12 @@ CowAgent 支持国内外主流厂商的大语言模型，模型接口实现在
 ## 支持的模型

 <CardGroup cols={2}>
+  <Card title="DeepSeek" href="/models/deepseek">
+    deepseek-v4-flash、deepseek-v4-pro 等
+  </Card>
  <Card title="MiniMax" href="/models/minimax">
    MiniMax-M2.7 等系列模型
  </Card>
-  <Card title="智谱 GLM" href="/models/glm">
-    glm-5-turbo、glm-5 等系列模型
-  </Card>
-  <Card title="通义千问 Qwen" href="/models/qwen">
-    qwen3.6-plus、qwen3-max 等
-  </Card>
-  <Card title="Kimi" href="/models/kimi">
-    kimi-k2.5、kimi-k2 等
-  </Card>
-  <Card title="豆包 Doubao" href="/models/doubao">
-    doubao-seed 系列模型
-  </Card>
  <Card title="Claude" href="/models/claude">
    claude-sonnet-4-6 等
  </Card>
@@ -47,12 +38,24 @@ CowAgent 支持国内外主流厂商的大语言模型，模型接口实现在
  <Card title="OpenAI" href="/models/openai">
    gpt-5.4、gpt-4.1、o 系列等
  </Card>
-  <Card title="DeepSeek" href="/models/deepseek">
-    deepseek-chat、deepseek-reasoner
+  <Card title="智谱 GLM" href="/models/glm">
+    glm-5.1、glm-5-turbo、glm-5 等系列模型
+  </Card>
+  <Card title="通义千问 Qwen" href="/models/qwen">
+    qwen3.6-plus、qwen3-max 等
+  </Card>
+  <Card title="豆包 Doubao" href="/models/doubao">
+    doubao-seed 系列模型
+  </Card>
+  <Card title="Kimi" href="/models/kimi">
+    kimi-k2.6、kimi-k2.5、kimi-k2 等
  </Card>
  <Card title="LinkAI" href="/models/linkai">
    多模型统一接口 + 知识库
  </Card>
+  <Card title="自定义" href="/models/custom">
+    第三方代理、本地模型等
+  </Card>
 </CardGroup>


--- a/docs/models/kimi.mdx
+++ b/docs/models/kimi.mdx
@@ -5,14 +5,14 @@ description: Kimi (Moonshot) 模型配置

 ```json
 {
-  "model": "kimi-k2.5",
+  "model": "kimi-k2.6",
  "moonshot_api_key": "YOUR_API_KEY"
 }
 ```

 | 参数 | 说明 |
 | --- | --- |
-| `model` | 可填 `kimi-k2.5`、`kimi-k2`、`moonshot-v1-8k`、`moonshot-v1-32k`、`moonshot-v1-128k` |
+| `model` | 可填 `kimi-k2.6`、`kimi-k2.5`、`kimi-k2`、`moonshot-v1-8k`、`moonshot-v1-32k`、`moonshot-v1-128k` |
 | `moonshot_api_key` | 在 [Moonshot 控制台](https://platform.moonshot.cn/console/api-keys) 创建 |

 也支持 OpenAI 兼容方式接入：
@@ -20,7 +20,7 @@ description: Kimi (Moonshot) 模型配置
 ```json
 {
  "bot_type": "openai",
-  "model": "kimi-k2.5",
+  "model": "kimi-k2.6",
  "open_ai_api_base": "https://api.moonshot.cn/v1",
  "open_ai_api_key": "YOUR_API_KEY"
 }
--- a/docs/models/linkai.mdx
+++ b/docs/models/linkai.mdx
@@ -3,7 +3,7 @@ title: LinkAI
 description: 通过 LinkAI 平台统一接入多种模型
 ---

-通过 [LinkAI](https://link-ai.tech) 平台可灵活切换 OpenAI、Claude、Gemini、DeepSeek、Qwen、Kimi 等多种模型，并支持知识库、工作流、插件等 Agent 能力。
+通过 [LinkAI](https://link-ai.tech) 平台可灵活切换 OpenAI、Claude、Gemini、DeepSeek、MiniMax、Qwen、Kimi 等多种模型，并支持知识库、工作流、插件等 Agent 能力。

 ```json
 {
--- a/docs/releases/overview.mdx
+++ b/docs/releases/overview.mdx
@@ -5,6 +5,7 @@ description: CowAgent 版本更新历史

 | 版本 | 日期 | 说明 |
 | --- | --- | --- |
+| [2.0.7](/releases/v2.0.7) | 2026.04.22 | 图像生成技能（六厂商自动路由）、新模型支持（Kimi K2.6、Claude Opus 4.7、GLM 5.1）、知识库增强、Web 控制台优化 |
 | [2.0.6](/releases/v2.0.6) | 2026.04.14 | 项目更名、知识库系统、梦境记忆蒸馏、上下文智能压缩、Web 控制台多会话及多项优化 |
 | [2.0.5](/releases/v2.0.5) | 2026.04.01 | Cow CLI、Skill Hub 开源、浏览器工具、企微扫码创建、多项优化和修复 |
 | [2.0.4](/releases/v2.0.4) | 2026.03.22 | 新增个人微信通道、新模型支持、日文文档、脚本重构及多项修复 |
--- a/docs/releases/v2.0.6.mdx
+++ b/docs/releases/v2.0.6.mdx
@@ -12,7 +12,7 @@ description: CowAgent 2.0.6 - 知识库系统、梦境记忆蒸馏、上下文

 ## 📚 知识库系统

-新增个人知识库系统，Agent 可自主构建和维护结构化知识，并在对话中按需检索引用：
+新增个人知识库系统，Agent 可自主构建和维护结构化知识，并在对话中按需检索引用。

 - **索引驱动的自组织结构**：知识库采用 `knowledge/` 目录，按分类自动组织，每个知识页面为独立的 Markdown 文件
 - **自动写入**：向 Agent 发送文件、链接等知识，或在讨论中识别到有价值的知识时，自动创建或更新知识页面
@@ -22,9 +22,10 @@ description: CowAgent 2.0.6 - 知识库系统、梦境记忆蒸馏、上下文

 <img src="https://cdn.link-ai.tech/doc/20260413105435.png" width="750" />

-
 相关文档：[知识库](https://docs.cowagent.ai/knowledge)

+Inspired by Karpathy's [LLM Wiki](https://gist.github.com/karpathy/442a6bf555914893e9891c11519de94f).
+
 ## 🌙 梦境记忆蒸馏（Deep Dream）

 全新的记忆整理机制，每日自动将分散的对话记忆蒸馏为精炼的长期记忆：
--- a/docs/releases/v2.0.7.mdx
+++ b/docs/releases/v2.0.7.mdx
@@ -0,0 +1,64 @@
+---
+title: v2.0.7
+description: CowAgent 2.0.7 - 图像生成技能（六厂商自动路由）、新模型支持、知识库增强、Web 控制台优化及多项修复
+---
+
+## 🎨 图像生成技能
+
+新增图像生成内置技能，支持文生图、图生图、多图融合，支持 `GPT-Image-2`、`Nano Banana` 等多种模型：
+
+- **自动路由**：支持六种模型厂商自动切换，OpenAI (GPT-Image-2) → Gemini (Nano Banana) → Seedream (火山方舟) → Qwen (百炼) → MiniMax → LinkAI
+- **开箱即用**：配置 API Key 即可使用，无需手动指定模型。也支持在对话中指定特定模型
+- **灵活控制**：支持 `quality`（画质）、`size`（分辨率，512/1K~4K）、`aspect_ratio`（宽高比）等参数，各厂商自动适配有效值
+- **图片编辑**：传入已有图片即可进行编辑、风格迁移、多图融合
+- **Skill 级配置**：支持通过 `config.json` 中的 `skill.image-generation.model` 固定默认模型
+
+相关文档：[图像生成技能](https://docs.cowagent.ai/skills/image-generation)
+
+## 🤖 新模型支持
+
+- **Kimi K2.6**：新增 `kimi-k2.6` 模型支持
+- **Claude Opus 4.7**：新增 `claude-opus-4-7` 模型支持
+- **GLM 5.1**：新增 `glm-5.1` 模型支持
+- **Kimi Coding Plan**：支持 Kimi Coding Plan 模式
+- **自定义模型厂商**：新增[自定义模型](https://docs.cowagent.ai/models/custom)提供方配置，方便接入本地模型及更多厂商
+
+## 📚 知识库增强
+
+- **嵌套目录支持**：知识库列表和展示支持多级嵌套目录
+- **根级文件展示**：知识树中显示根目录下的 `index.md`、`log.md` 等文件
+- **空状态统计修复**：排除根级文件对知识库统计的干扰，正确保持空状态
+
+## 🌙 梦境记忆优化
+
+- **结构化组织**：梦境记忆文件按日期自动归档，目录结构更清晰
+- **定时抖动**：每日定时触发增加随机抖动，避免集群场景下的并发冲突
+
+## 🛠 技能系统改进
+
+- **技能管理刷新**：`/skill` 命令执行后自动加载最新技能，确保状态同步
+- **安装来源扩展**：技能安装支持多种来源格式（URL、zip、本地文件等）
+
+## 💬 Web 控制台优化
+
+- **智能自动滚动**：优化聊天窗口滚动逻辑，用户手动翻阅时不再强制跳到底部 Thanks @colin2060
+- **移动端适配**：侧边栏默认隐藏，支持点击遮罩关闭
+- **图片预览去重**：修复同一消息中图片重复渲染的问题
+- **推理内容截断**：深度思考内容超出阶段，解决前端卡顿问题
+- **会话标题修复**：修复标题自动生成的回退逻辑
+
+
+## 🐛 其他修复
+
+- **Gemini 修复**：修复 Gemini tool call 不返回结果的问题
+- **Agent 重试**：空响应重试时不再丢弃 tool_calls
+- **Docker 环境变量**：修复 Docker 环境下更新配置后环境变量未同步的问题 Thanks @sunboy0523 
+- **Python 3.7 兼容**：延迟导入 `Literal` 以兼容 Python 3.7
+- **模型切换通知**：修复切换模型后 bot_type 变更通知未显示的问题。Thanks @6vision
+- **配置命令增强**：`/config` 支持设置 `enable_thinking`
+
+## 📦 升级方式
+
+源码部署可执行 `cow update` 或 `./run.sh update` 一键升级，或手动拉取代码后重启。详见 [更新升级文档](https://docs.cowagent.ai/guide/upgrade)。
+
+**发布日期**：2026.04.22 | [Full Changelog](https://github.com/zhayujie/CowAgent/compare/2.0.6...2.0.7)
--- a/docs/skills/image-generation.mdx
+++ b/docs/skills/image-generation.mdx
@@ -0,0 +1,160 @@
+---
+title: image-generation - 图像生成
+description: 文生图 / 图生图 / 多图融合，支持多家厂商自动路由与回退
+---
+
+通用的图像生成与编辑技能，支持 OpenAI、Gemini、Seedream（火山方舟）、Qwen（百炼）、MiniMax、LinkAI 共六家厂商。不需要手动选模型，脚本会按固定优先级自动挑选已配置的厂商来出图。
+
+## 模型选择
+
+`image-generation` 采用「固定优先级 + 自动回退」的策略，配好 Key 就能用：
+
+1. **优先级顺序**：`OpenAI → Gemini → Seedream → Qwen → MiniMax → LinkAI`
+2. **没配 Key 的跳过**：只有设了 API Key 的厂商才会参与
+3. **失败自动切下一家**：遇到 401、模型未开通、网络异常等错误时，会自动试下一个
+4. **指定模型时前置**：如果明确传了某个模型名，对应厂商会被提到最前面先试
+
+### 支持的模型
+
+| 厂商 | 模型 / 别名 | 特点 |
+| --- | --- | --- |
+| OpenAI | `gpt-image-2`、`gpt-image-1` | 通用文生图，高质量、高智能，支持 `quality` 参数控制画质 |
+| Gemini Nano Banana | `nano-banana-2`、`nano-banana-pro`、`nano-banana` | 对应 `gemini-3.1-flash`、`gemini-3-pro`、`gemini-2.5-flash` 的图像版本 |
+| Seedream（火山方舟） | `seedream-5.0-lite`、`seedream-4.5` | 原生 2K–4K，最多 14 张图融合 |
+| Qwen（百炼） | `qwen-image-2.0`、`qwen-image-2.0-pro` | 擅长中文排版和图文融合 |
+| MiniMax | `image-01` | 简单快速的图片生成 |
+| LinkAI | 任意模型 | 通用代理，兜底用 |
+
+<Note>
+默认情况下 Agent 不会主动选模型，而是走自动路由。如果你想用某个特定模型，直接在对话里说就行，比如「用 seedream 画一只猫」或「用 gpt-image-2 生成海报」。也可以通过下面的「自定义配置」固定默认模型。
+</Note>
+
+## 自定义配置
+
+### API Key 配置
+
+至少需要配**一个**厂商的 Key，配多个就能享受自动回退能力。有三种配置方式：
+
+#### 方式一：已有模型 Key 自动复用
+
+如果你在 web控制台 或 `config.json` 中配置了对话模型的 Key（比如 `openai_api_key`、`gemini_api_key` 等），启动时这些 Key 会被**自动同步**到对应的环境变量。也就是说，只要你的对话模型能用，图像生成就能直接用同一个 Key，不需要额外配置。
+
+#### 方式二：在 config.json 中配置
+
+在 `config.json` 中直接写对应的 Key 字段即可，支持的字段如下：
+
+```json
+{
+  "openai_api_key": "sk-xxx",
+  "openai_api_base": "https://api.openai.com/v1",
+  "gemini_api_key": "AIza-xxx",
+  "ark_api_key": "xxx",
+  "dashscope_api_key": "sk-xxx",
+  "minimax_api_key": "xxx",
+  "linkai_api_key": "xxx"
+}
+```
+
+修改后需要重启生效。每个 Key 还有对应的 `*_api_base` 字段可以自定义接口地址。
+
+#### 方式三：对话中直接配置
+
+在对话里发送 API Key，Agent 会通过 `env_config` 工具自动保存到 `~/cow/.env`，**不需要重启**就能生效。例如：
+
+```
+帮我配置 OPENAI_API_KEY 为 sk-xxx
+```
+
+或者：
+
+```
+设置 ARK_API_KEY 为 xxx
+```
+
+### API Key 一览
+
+| 环境变量 | config.json 字段 | 对应厂商 | 默认 Base URL |
+| --- | --- | --- | --- |
+| `OPENAI_API_KEY` | `openai_api_key` | OpenAI | `https://api.openai.com/v1` |
+| `GEMINI_API_KEY` | `gemini_api_key` | Gemini | `https://generativelanguage.googleapis.com` |
+| `ARK_API_KEY` | `ark_api_key` | 火山方舟（Seedream） | `https://ark.cn-beijing.volces.com/api/v3` |
+| `DASHSCOPE_API_KEY` | `dashscope_api_key` | 阿里百炼（Qwen） | `https://dashscope.aliyuncs.com` |
+| `MINIMAX_API_KEY` | `minimax_api_key` | MiniMax | `https://api.minimaxi.com` |
+| `LINKAI_API_KEY` | `linkai_api_key` | LinkAI | `https://api.link-ai.tech` |
+
+
+### 指定默认模型
+
+如果想让所有图像生成固定走某个厂商的模型，可以在 `config.json` 里加：
+
+```json
+"skill": {
+  "image-generation": {
+    "model": "seedream-5.0-lite"
+  }
+}
+```
+
+启动时这段配置会被自动转成环境变量 `SKILL_IMAGE_GENERATION_MODEL`，脚本读到后会固定使用这个模型所在的厂商进行生成。
+
+
+## 开启和关闭
+
+`image-generation` 是内置技能，**会根据 API Key 自动调整状态**：
+
+- **Key 已配置**：技能正常可用，Agent 收到画图请求时会直接调用
+- **Key 未配置**：技能仍然会出现在上下文中（标记为「需要配置」），Agent 会引导用户去配 Key，而不是直接调用失败
+
+如果想手动控制，也可以用命令：
+
+```text
+/skill disable image-generation    # 手动关闭（即使有 Key 也不会被调用）
+/skill enable image-generation     # 重新开启
+```
+
+终端里对应的命令是 `cow skill disable image-generation` / `cow skill enable image-generation`。
+
+## 参数
+
+| 参数 | 类型 | 必填 | 默认 | 说明 |
+| --- | --- | --- | --- | --- |
+| `prompt` | string | 是 | — | 图像描述 |
+| `image_url` | string / list | 否 | null | 编辑用的输入图，支持本地路径或 URL。传多个就是多图融合 |
+| `quality` | string | 否 | auto | `low` / `medium` / `high`，只有部分厂商支持 |
+| `size` | string | 否 | auto | `512` / `1K` / `2K` / `3K` / `4K`，也可以写像素值如 `1024x1024` |
+| `aspect_ratio` | string | 否 | null | `1:1` / `3:2` / `2:3` / `16:9` / `9:16` / `21:9`；Gemini 还支持 `1:4` / `4:1` / `1:8` / `8:1` |
+
+<Warning>
+**质量越高、分辨率越大，花的钱越多、等的时间越长。**
+
+- 日常对话和快速预览直接用默认（`auto`），或者 `quality=low` + `size=1K`，大概 20 秒出图
+- 做海报、用户明确要高清的时候再上 `quality=high` + `size=2K/4K`，可能要等 1～5 分钟，取决于不同模型的速度
+</Warning>
+
+## 输出
+
+成功时返回：
+
+```json
+{
+  "model": "doubao-seedream-5-0-260128",
+  "images": [
+    {"url": "/path/to/output.png"}
+  ]
+}
+```
+
+失败时返回 `{ "error": "..." }`。出错后**不要直接重试**——大概率是配置问题（Key 填错、API 地址不对、模型没开通），让用户修好配置再试。
+
+## 常见用法
+
+- **文生图**：根据描述生成插画、海报、图标、头像、分镜图等
+- **图生图**：在已有图片上改风格、换元素、加装饰、加文字等
+- **多图融合**：把多张参考图合成一张（换装、角色合影等）
+
+<Note>
+- bash 超时建议设 600 秒。单个厂商的 HTTP 超时是 300 秒，但脚本可能依次尝试多个厂商
+- 输入的图片会自动压缩到 4MB 以内、最长边不超过 4096px
+- Gemini / Seedream / Qwen / MiniMax 不支持 `quality` 参数，传了也没用
+- Seedream 默认出 2K 图，`seedream-5.0-lite` 支持到 3K，`seedream-4.5` 支持到 4K
+</Note>
--- a/docs/skills/install.mdx
+++ b/docs/skills/install.mdx
@@ -34,7 +34,7 @@ CowAgent 支持通过统一的 `install` 命令安装来自 **[Cow 技能广场]

 ## 从 LinkAI 安装

-[LinkAI](https://link-ai.tech/console) 上的所有公开资源 (1w+个插件/应用/工作流) ，以及自己创建的资源 (应用/工作流/知识库/数据库/插件) 都可以通过命令一键安装：
+[LinkAI](https://link-ai.tech/console) 上的所有公开资源 (1w+个应用/工作流/插件) ，以及自己创建的资源 (应用/工作流/知识库/数据库/插件) 都可以通过命令一键安装：

 ```text
 /skill install linkai:<code>
--- a/docs/skills/knowledge-wiki.mdx
+++ b/docs/skills/knowledge-wiki.mdx
@@ -0,0 +1,112 @@
+---
+title: knowledge-wiki - 知识库
+description: 维护本地结构化知识库，自动归档、分类和交叉引用
+---
+
+帮你把对话中产生的资料、灵感和零散笔记整理成结构化的本地知识库，自动维护索引和页面之间的交叉引用。
+
+`knowledge-wiki` 在工作空间下维护一个 `knowledge/` 目录，相当于 Agent 的「外脑」。技能设置了 `always: true`，会**常驻上下文**，不需要任何外部依赖。
+
+## 什么时候会触发
+
+- 你分享了一篇文章、一份文档或一个 URL，想要沉淀下来
+- 聊天过程中聊出了值得长期保留的结论
+- 你想查一下之前积累过的知识
+
+## 目录结构
+
+```
+knowledge/
+├── index.md           # 全局索引（必须维护）
+├── log.md             # 操作日志（只追加）
+└── <category>/        # 分类子目录（按内容自由分组）
+    └── <slug>.md      # 知识页（文件名用小写加中划线）
+```
+
+## 三个核心操作
+
+### 1. 收录（Ingest）
+
+你分享了一段资料时，Agent 会：
+
+1. 读懂原文，提取关键信息
+2. 按内容决定放到哪个分类下——先看 `index.md` 里有没有合适的分类，没有就新建一个
+3. 生成知识页 `knowledge/<category>/<slug>.md`
+4. 更新索引 `index.md` 和日志 `log.md`
+
+### 2. 综合（Synthesize）
+
+聊天中产生了新的结论或洞见时：
+
+1. 在合适的分类下创建新知识页
+2. 给相关的已有页面加上互相指向的链接
+3. 更新索引和日志
+
+### 3. 查询（Query）
+
+你问到以前积累的知识时：
+
+1. 先从 `index.md` 里找可能相关的页面
+2. 用 `read` 工具打开具体页面
+3. 需要时再用 `memory_search` 补充检索
+4. 回答里会带上知识页的链接，方便你点过去看原文
+
+## 知识页怎么写
+
+```markdown
+# 页面标题
+
+> Source: <来源 URL 或简要说明>
+
+正文内容。页面之间用相对路径链接：
+[相关页](../category/related-page.md)
+
+## 要点
+
+- ...
+
+## 相关页面
+
+- [页面 A](../category/page-a.md) — 为什么相关
+```
+
+<Note>
+- `> Source:` 用来记录这条知识的来源。有明确来源时一定要写
+- 交叉引用很重要：创建或更新某页时，记得也去关联页面里补上反向链接
+- **只链接已经存在的页面**。如果某个概念值得单独成页，先建好再加链接
+</Note>
+
+## 索引格式
+
+`knowledge/index.md` 采用扁平列表，按分类分组，每个知识页占一行：
+
+```markdown
+# Knowledge Index
+
+## 分类 A
+- [页面标题](category-a/page-slug.md) — 一句话摘要
+
+## 分类 B
+- [页面标题](category-b/page-slug.md) — 一句话摘要
+```
+
+不用表格，不加 emoji。分类怎么起名、怎么组织都可以灵活调整。
+
+## 日志格式
+
+`knowledge/log.md` 只追加、不修改，最新的写在最下面：
+
+```markdown
+## [YYYY-MM-DD] ingest | 页面标题
+## [YYYY-MM-DD] synthesize | 页面标题
+```
+
+## 写作约定
+
+- **文件名**用小写加中划线，比如 `machine-learning.md`
+- **一页只讲一件事**，需要关联的内容通过链接串起来
+- **有了就更新，不要重复建页**
+- **每次改完都要更新索引** `knowledge/index.md`
+- **写精华别抄全文**，抓住要点就行
+- **对话里引用知识页时用完整路径**，比如 `[标题](knowledge/<category>/<slug>.md)`。页面之间互相链接才用相对路径
+- **基于知识页回答问题时附上链接**，方便深入查阅
--- a/docs/skills/skill-creator.mdx
+++ b/docs/skills/skill-creator.mdx
@@ -0,0 +1,180 @@
+---
+title: skill-creator - 技能创建
+description: 创建、安装、更新技能，规范 SKILL.md 写法与目录结构
+---
+
+`skill-creator` 是一个「元技能」，专门用来帮助 Agent 创建、安装和更新其他技能，确保所有技能的 `SKILL.md` 写法和目录结构保持一致。
+
+## 什么时候会触发
+
+- 用户想从 URL 或远程仓库安装一个技能
+- 用户想从头创建一个全新的技能
+- 需要升级或重构已有技能
+
+## 技能是什么
+
+简单来说，技能就是一份「可复用的说明书」加上可选的脚本和资源。它给 Agent 注入了某个领域的专业知识，让 Agent 在遇到对应任务时能像专家一样处理。
+
+一个技能通常包含以下内容：
+
+1. **专项工作流** — 某类任务的完整步骤
+2. **工具用法** — 怎么调某种 API 或处理某种文件
+3. **领域知识** — 团队约定、业务规则、数据结构之类
+4. **附带资源** — 脚本、参考文档、模板等
+
+<Note>
+**核心原则：能省则省**。只写 Agent 自己想不到的内容，每加一行都要问自己：值不值得占这些 token？
+</Note>
+
+## 目录结构
+
+```
+skill-name/
+├── SKILL.md            # 必需：技能定义
+│   ├── YAML frontmatter（必填 name / description）
+│   └── Markdown 正文（说明 + 示例）
+└── 可选资源
+    ├── scripts/        # 可执行脚本（Python / Bash 等）
+    ├── references/     # 内容较多的参考文档，Agent 按需读取
+    └── assets/         # 模板、图标等，会直接用在输出里
+```
+
+## SKILL.md 规范定义
+
+SKILL.md 文件头部的 `frontmatter` 字段：
+
+| 字段 | 说明 |
+| --- | --- |
+| `name` | 技能名，小写加中划线，必须和目录名一致 |
+| `description` | **最关键的字段**。写清楚「这个技能干什么」和「什么情况下该用它」，Agent 看到这段来决定要不要调它。注意：所有触发相关的描述都放在这里，不要写到正文里 |
+| `metadata.cowagent.requires.bins` | 系统里必须装了哪些命令行工具 |
+| `metadata.cowagent.requires.env` | 需要哪些环境变量（全部满足才行） |
+| `metadata.cowagent.requires.anyEnv` | 多个 API Key 满足一个就行 |
+| `metadata.cowagent.requires.anyBins` | 多个工具满足一个就行 |
+| `metadata.cowagent.always` | 设为 `true` 会始终加载，不检查依赖 |
+| `metadata.cowagent.emoji` | 展示用的 emoji（可选） |
+| `metadata.cowagent.os` | 限定系统，如 `["darwin", "linux"]` |
+
+<Note>
+`category` 字段不需要手写，系统会自动设成 `skill`。
+</Note>
+
+声明 API Key 依赖有两种写法：
+
+```yaml
+metadata:
+  cowagent:
+    requires:
+      env: ["MYAPI_KEY"]            # 必须有
+```
+
+```yaml
+metadata:
+  cowagent:
+    requires:
+      anyEnv: ["OPENAI_API_KEY", "LINKAI_API_KEY"]   # 有一个就行
+```
+
+**技能会自动按依赖启禁用**：环境变量齐了就自动启用，缺了就自动禁用，不需要手动 `/skill enable`。
+
+## 资源目录怎么用
+
+| 目录 | 放什么 | 不要放 |
+| --- | --- | --- |
+| `scripts/` | 需要反复执行的代码，或需要确定性结果的脚本 | 纯演示用的代码片段 |
+| `references/` | **超过 500 行**、SKILL.md 实在塞不下的大文档（比如完整的数据库 Schema） | 普通 API 文档、示例、教程 |
+| `assets/` | 会出现在最终产物里的文件（模板、图标、样板代码等） | 说明性文档 |
+
+<Warning>
+**原则上所有内容都写在 `SKILL.md` 里**，只有确实放不下才拆到资源目录。
+
+不要给技能加 `README.md`、`CHANGELOG.md`、`INSTALLATION_GUIDE.md` 之类的文件——全部放进 `SKILL.md`。资源目录里只放真正要跑的脚本或真正要用的素材。
+</Warning>
+
+## 安装外部技能
+
+安装后最终落在 `<workspace>/skills/<name>/` 目录。
+
+| 来源 | 怎么装 |
+| --- | --- |
+| URL（单文件） | curl / web_fetch 直接拉 |
+| URL（zip 包） | 下载解压 |
+| 本地 SKILL.md | 直接读 |
+| 本地 zip 包 | 解压 |
+
+安装步骤：
+
+1. 找到 `SKILL.md`（可能在包的根目录或某个子目录里）
+2. 从 frontmatter 里读出 `name`
+3. 把**整个技能目录**（包括 `SKILL.md`、`scripts/`、`assets/` 等）复制到 `<workspace>/skills/<name>/`
+4. 如果包里有 `INSTALL.md` 之类的安装脚本，照着跑一遍，但最终结果仍然要落在 `<workspace>/skills/<name>/` 下
+
+## 从头创建技能
+
+推荐按这个顺序来：
+
+1. **搞清楚需求** — 让用户举几个具体的使用场景，一次别问太多
+2. **想好结构** — 这个技能需要脚本吗？需要参考文档吗？需要模板素材吗？
+3. **生成骨架** — 用初始化脚本：
+
+   ```bash
+   scripts/init_skill.py <skill-name> --path <workspace>/skills [--resources scripts,references,assets] [--examples]
+   ```
+
+4. **填充内容** — 写好 SKILL.md、补上脚本和资源。脚本写完一定要实际跑一遍
+5. **格式校验**（可选）：
+
+   ```bash
+   scripts/quick_validate.py <workspace>/skills/<skill-name>
+   ```
+
+6. **迭代完善** — 实际用起来之后根据反馈持续改进
+
+## 命名规则
+
+- 只用小写字母、数字和中划线。用户给的名字需要做标准化处理，比如 `Plan Mode` → `plan-mode`
+- 长度别超过 64 个字符
+- 尽量短、用动词开头、一看就知道干什么
+- 必要时用工具名做前缀，比如 `gh-address-comments`、`linear-address-issue`
+- 目录名和 `name` 字段必须完全一致
+
+## 三级加载机制
+
+技能不会一次性全部塞进上下文，而是分三级按需加载：
+
+1. **元信息**（`name` + `description`）— 常驻上下文，约 100 词。Agent 靠它判断「要不要用这个技能」
+2. **SKILL.md 正文** — 确定要用了才加载，建议控制在 500 行以内
+3. **资源文件** — Agent 需要的时候再读
+
+如果一个技能涉及多个变体（比如多云厂商部署），建议这样组织：
+
+```
+cloud-deploy/
+├── SKILL.md             # 主流程和厂商选择逻辑
+└── references/
+    ├── aws.md
+    ├── gcp.md
+    └── azure.md
+```
+
+用户选了 AWS，Agent 只需要读 `aws.md`，不用把三家的文档全加载进来。
+
+## 常见设计模式
+
+**步骤式**：按编号列出操作步骤和对应脚本。
+
+```markdown
+1. 分析表单结构（运行 analyze_form.py）
+2. 生成字段映射（编辑 fields.json）
+3. 自动填充表单（运行 fill_form.py）
+```
+
+**分支式**：根据用户意图走不同流程。
+
+```markdown
+1. 判断操作类型：
+   **新建内容？** → 走「创建流程」
+   **编辑已有内容？** → 走「编辑流程」
+```
+
+**模板式**：输出格式有严格要求时，在 SKILL.md 里直接给一个样板，让 Agent 照着写。
--- a/docs/tools/index.mdx
+++ b/docs/tools/index.mdx
@@ -50,7 +50,7 @@ description: CowAgent 内置工具系统
  <Card title="web_search - 联网搜索" icon="magnifying-glass" href="/tools/web-search">
    搜索互联网获取实时信息
  </Card>
-  <Card title="vision - 图片分析" icon="eye" href="/tools/vision">
+  <Card title="vision - 图片理解" icon="eye" href="/tools/vision">
    分析图片内容（识别、描述、OCR 文字提取等）
  </Card>
  <Card title="browser - 浏览器" icon="window" href="/tools/browser">
--- a/docs/tools/vision.mdx
+++ b/docs/tools/vision.mdx
@@ -1,5 +1,5 @@
 ---
-title: vision - 图片分析
+title: vision - 图片理解
 description: 分析图片内容（识别、描述、OCR 等）
 ---

@@ -23,7 +23,7 @@ Vision 工具采用多级自动选择 + 自动兜底策略，无需手动配置
 | Claude | 使用主模型 | Anthropic 原生图像格式 |
 | Gemini | 使用主模型 | inlineData 格式 |
 | 豆包 (Doubao) | 使用主模型 | doubao-seed-2-0 系列原生支持 |
-| Kimi (Moonshot) | 使用主模型 | kimi-k2.5 原生支持 |
+| Kimi (Moonshot) | 使用主模型 | kimi-k2.6、kimi-k2.5 原生支持 |
 | 智谱 AI | glm-5v-turbo | 固定使用视觉专用模型 |
 | MiniMax | MiniMax-Text-01 | 固定使用视觉专用模型 |

--- a/models/bot_factory.py
+++ b/models/bot_factory.py
@@ -21,7 +21,7 @@ def create_bot(bot_type):
        from models.deepseek.deepseek_bot import DeepSeekBot
        return DeepSeekBot()

-    elif bot_type in (const.OPENAI, const.CHATGPT):  # OpenAI-compatible API
+    elif bot_type in (const.OPENAI, const.CHATGPT, const.CUSTOM):  # OpenAI-compatible API
        from models.chatgpt.chat_gpt_bot import ChatGPTBot
        return ChatGPTBot()

--- a/models/chatgpt/chat_gpt_bot.py
+++ b/models/chatgpt/chat_gpt_bot.py
@@ -23,10 +23,15 @@ from models.baidu.baidu_wenxin_session import BaiduWenxinSession
 class ChatGPTBot(Bot, OpenAIImage, OpenAICompatibleBot):
    def __init__(self):
        super().__init__()
-        # set the default api_key
-        openai.api_key = conf().get("open_ai_api_key")
-        if conf().get("open_ai_api_base"):
-            openai.api_base = conf().get("open_ai_api_base")
+        # set the default api_key / api_base based on bot_type
+        if conf().get("bot_type") == "custom":
+            openai.api_key = conf().get("custom_api_key", "")
+            if conf().get("custom_api_base"):
+                openai.api_base = conf().get("custom_api_base")
+        else:
+            openai.api_key = conf().get("open_ai_api_key")
+            if conf().get("open_ai_api_base"):
+                openai.api_base = conf().get("open_ai_api_base")
        proxy = conf().get("proxy")
        if proxy:
            openai.proxy = proxy
@@ -56,9 +61,10 @@ class ChatGPTBot(Bot, OpenAIImage, OpenAICompatibleBot):

    def get_api_config(self):
        """Get API configuration for OpenAI-compatible base class"""
+        is_custom = conf().get("bot_type") == "custom"
        return {
-            'api_key': conf().get("open_ai_api_key"),
-            'api_base': conf().get("open_ai_api_base"),
+            'api_key': conf().get("custom_api_key") if is_custom else conf().get("open_ai_api_key"),
+            'api_base': conf().get("custom_api_base") if is_custom else conf().get("open_ai_api_base"),
            'model': conf().get("model", "gpt-3.5-turbo"),
            'default_temperature': conf().get("temperature", 0.9),
            'default_top_p': conf().get("top_p", 1.0),
@@ -166,9 +172,10 @@ class ChatGPTBot(Bot, OpenAIImage, OpenAICompatibleBot):
            mime_type = mime_type_map.get(extension, "image/jpeg")
            
            # Get model and API config
+            is_custom = conf().get("bot_type") == "custom"
            model = context.get("gpt_model") or conf().get("model", "gpt-4o")
-            api_key = context.get("openai_api_key") or conf().get("open_ai_api_key")
-            api_base = conf().get("open_ai_api_base")
+            api_key = context.get("openai_api_key") or (conf().get("custom_api_key") if is_custom else conf().get("open_ai_api_key"))
+            api_base = conf().get("custom_api_base") if is_custom else conf().get("open_ai_api_base")
            
            # Build vision request
            messages = [
--- a/models/deepseek/deepseek_bot.py
+++ b/models/deepseek/deepseek_bot.py
@@ -2,9 +2,27 @@

 """
 DeepSeek Bot — fully OpenAI-compatible, uses its own API key / base config.
+
+Supported models:
+- deepseek-chat       (V3, no thinking)
+- deepseek-reasoner   (R1, built-in reasoning, no `thinking` switch)
+- deepseek-v4-flash   (V4, supports thinking mode + tool calls)
+- deepseek-v4-flash   (V4 Flash, default; thinking mode + tool calls)
+- deepseek-v4-pro     (V4 Pro, stronger on complex tasks)
+
+Thinking mode notes (for V4 models):
+- Toggle: ``{"thinking": {"type": "enabled" | "disabled"}}`` (default: enabled)
+- Effort: ``reasoning_effort`` ∈ {"high", "max"} (low/medium → high, xhigh → max)
+- In thinking mode, ``temperature``/``top_p``/``presence_penalty``/``frequency_penalty``
+  are silently ignored by the server; we drop them locally to avoid confusion.
+- ``reasoning_content`` is returned alongside ``content``. For turns that triggered
+  tool calls, ``reasoning_content`` MUST be echoed back in subsequent requests, or
+  the API returns 400.
 """

+import json
 import time
+from typing import Optional

 import requests
 from models.bot import Bot
@@ -25,9 +43,9 @@ class DeepSeekBot(Bot, OpenAICompatibleBot):
        super().__init__()
        self.sessions = SessionManager(
            DeepSeekSession,
-            model=conf().get("model") or const.DEEPSEEK_CHAT,
+            model=conf().get("model") or const.DEEPSEEK_V4_FLASH,
        )
-        conf_model = conf().get("model") or const.DEEPSEEK_CHAT
+        conf_model = conf().get("model") or const.DEEPSEEK_V4_FLASH
        self.args = {
            "model": conf_model,
            "temperature": conf().get("temperature", 0.7),
@@ -56,13 +74,32 @@ class DeepSeekBot(Bot, OpenAICompatibleBot):
        return {
            "api_key": self.api_key,
            "api_base": self.api_base,
-            "model": conf().get("model", const.DEEPSEEK_CHAT),
+            "model": conf().get("model", const.DEEPSEEK_V4_FLASH),
            "default_temperature": conf().get("temperature", 0.7),
            "default_top_p": conf().get("top_p", 1.0),
            "default_frequency_penalty": conf().get("frequency_penalty", 0.0),
            "default_presence_penalty": conf().get("presence_penalty", 0.0),
        }

+    @staticmethod
+    def _model_supports_thinking(model_name: str) -> bool:
+        """V4 series models expose the explicit `thinking` switch."""
+        if not model_name:
+            return False
+        m = model_name.lower()
+        return m.startswith("deepseek-v4")
+
+    @staticmethod
+    def _is_reasoner_model(model_name: str) -> bool:
+        """deepseek-reasoner (R1) always thinks internally; no toggle."""
+        return bool(model_name) and "reasoner" in model_name.lower()
+
+    def _build_headers(self) -> dict:
+        return {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {self.api_key}",
+        }
+
    # ---------- simple chat (non-agent mode) ----------

    def reply(self, query, context=None):
@@ -112,13 +149,16 @@ class DeepSeekBot(Bot, OpenAICompatibleBot):

    def reply_text(self, session, args=None, retry_count: int = 0) -> dict:
        try:
-            headers = {
-                "Content-Type": "application/json",
-                "Authorization": "Bearer " + self.api_key,
-            }
-            body = args.copy()
+            headers = self._build_headers()
+            body = dict(args) if args else dict(self.args)
            body["messages"] = session.messages

+            # Thinking mode ignores temperature/top_p/penalties — strip to avoid noise.
+            model_name = str(body.get("model", ""))
+            if self._model_supports_thinking(model_name) or self._is_reasoner_model(model_name):
+                for k in ("temperature", "top_p", "presence_penalty", "frequency_penalty"):
+                    body.pop(k, None)
+
            res = requests.post(
                f"{self.api_base}/chat/completions",
                headers=headers,
@@ -158,3 +198,483 @@ class DeepSeekBot(Bot, OpenAICompatibleBot):
            if retry_count < 2:
                return self.reply_text(session, args, retry_count + 1)
            return {"completion_tokens": 0, "content": "我现在有点累了，等会再来吧"}
+
+    # ==================== Agent mode support ====================
+
+    def call_with_tools(self, messages, tools=None, stream: bool = False, **kwargs):
+        """
+        Call DeepSeek API with tool support for agent integration.
+
+        Handles:
+        - Claude → OpenAI message/tool format conversion (with reasoning_content round-trip)
+        - System prompt injection
+        - Streaming SSE with tool_calls + reasoning_content delta
+        - Thinking mode toggle and reasoning_effort for V4 models
+        """
+        try:
+            converted_messages = self._convert_messages_to_openai_format(messages)
+
+            system_prompt = kwargs.pop("system", None)
+            if system_prompt:
+                if not converted_messages or converted_messages[0].get("role") != "system":
+                    converted_messages.insert(0, {"role": "system", "content": system_prompt})
+                else:
+                    converted_messages[0] = {"role": "system", "content": system_prompt}
+
+            converted_tools = None
+            if tools:
+                converted_tools = self._convert_tools_to_openai_format(tools)
+
+            model = kwargs.pop("model", None) or self.args["model"]
+            max_tokens = kwargs.pop("max_tokens", None)
+
+            request_body = {
+                "model": model,
+                "messages": converted_messages,
+                "stream": stream,
+            }
+            if max_tokens is not None:
+                request_body["max_tokens"] = max_tokens
+
+            if converted_tools:
+                request_body["tools"] = converted_tools
+                request_body["tool_choice"] = kwargs.pop("tool_choice", "auto")
+
+            # Thinking mode (V4 only). Honour the toggle propagated by agent_bridge.
+            thinking_param = kwargs.pop("thinking", None)
+            reasoning_effort = kwargs.pop("reasoning_effort", None)
+            thinking_active = False
+
+            if self._model_supports_thinking(model):
+                # Default to enabled per DeepSeek docs unless caller explicitly disables.
+                thinking_param = thinking_param or {"type": "enabled"}
+                request_body["thinking"] = thinking_param
+                thinking_active = thinking_param.get("type") == "enabled"
+                if thinking_active:
+                    # Default to "high"; allow caller override (e.g. "max" for heavy agent loops).
+                    request_body["reasoning_effort"] = reasoning_effort or "high"
+            elif self._is_reasoner_model(model):
+                # R1 thinks unconditionally — no `thinking` field, but reasoning_content still flows.
+                thinking_active = True
+
+            # Strip params silently ignored under thinking mode to keep the wire clean.
+            if thinking_active:
+                for k in ("temperature", "top_p", "presence_penalty", "frequency_penalty"):
+                    request_body.pop(k, None)
+                    kwargs.pop(k, None)
+            else:
+                # Non-thinking path: forward standard sampling controls.
+                temperature = kwargs.pop("temperature", None)
+                if temperature is not None:
+                    request_body["temperature"] = temperature
+                top_p = kwargs.pop("top_p", None)
+                if top_p is not None:
+                    request_body["top_p"] = top_p
+
+            logger.debug(
+                f"[DEEPSEEK] API call: model={model}, "
+                f"tools={len(converted_tools) if converted_tools else 0}, "
+                f"stream={stream}, thinking={thinking_active}"
+            )
+
+            if stream:
+                return self._handle_stream_response(request_body)
+            else:
+                return self._handle_sync_response(request_body)
+
+        except Exception as e:
+            logger.error(f"[DEEPSEEK] call_with_tools error: {e}")
+            import traceback
+            logger.error(traceback.format_exc())
+
+            def error_generator():
+                yield {"error": True, "message": str(e), "status_code": 500}
+            return error_generator()
+
+    # -------------------- streaming --------------------
+
+    def _handle_stream_response(self, request_body: dict):
+        """Stream SSE chunks from DeepSeek and yield OpenAI-format deltas (with reasoning_content)."""
+        try:
+            headers = self._build_headers()
+            url = f"{self.api_base}/chat/completions"
+            response = requests.post(url, headers=headers, json=request_body, stream=True, timeout=180)
+
+            if response.status_code != 200:
+                error_msg = response.text
+                logger.error(f"[DEEPSEEK] API error: status={response.status_code}, msg={error_msg}")
+                yield {"error": True, "message": error_msg, "status_code": response.status_code}
+                return
+
+            current_tool_calls = {}
+            finish_reason = None
+
+            for line in response.iter_lines():
+                if not line:
+                    continue
+
+                line = line.decode("utf-8")
+                if line.startswith("data: "):
+                    data_str = line[6:]
+                elif line.startswith("data:"):
+                    data_str = line[5:]
+                else:
+                    continue
+                if data_str.strip() == "[DONE]":
+                    break
+
+                try:
+                    chunk = json.loads(data_str)
+                except json.JSONDecodeError as e:
+                    logger.warning(f"[DEEPSEEK] JSON decode error: {e}, data: {data_str[:200]}")
+                    continue
+
+                if chunk.get("error"):
+                    error_data = chunk["error"]
+                    error_msg = error_data.get("message", "Unknown error") if isinstance(error_data, dict) else str(error_data)
+                    logger.error(f"[DEEPSEEK] stream error: {error_msg}")
+                    yield {"error": True, "message": error_msg, "status_code": 500}
+                    return
+
+                if not chunk.get("choices"):
+                    continue
+                choice = chunk["choices"][0]
+                delta = choice.get("delta", {})
+
+                if choice.get("finish_reason"):
+                    finish_reason = choice["finish_reason"]
+
+                # Reasoning content (thinking mode). Forward as its own delta so
+                # agent_stream.py can stitch it into a `thinking` block.
+                if delta.get("reasoning_content"):
+                    yield {
+                        "choices": [{
+                            "index": 0,
+                            "delta": {
+                                "role": "assistant",
+                                "reasoning_content": delta["reasoning_content"],
+                            },
+                            "finish_reason": None,
+                        }]
+                    }
+
+                if delta.get("content"):
+                    yield {
+                        "choices": [{
+                            "index": 0,
+                            "delta": {
+                                "role": "assistant",
+                                "content": delta["content"],
+                            },
+                        }]
+                    }
+
+                if "tool_calls" in delta and delta["tool_calls"]:
+                    for tool_call_chunk in delta["tool_calls"]:
+                        index = tool_call_chunk.get("index", 0)
+                        if index not in current_tool_calls:
+                            current_tool_calls[index] = {
+                                "id": tool_call_chunk.get("id", ""),
+                                "name": tool_call_chunk.get("function", {}).get("name", ""),
+                                "arguments": "",
+                            }
+                        if "function" in tool_call_chunk and "arguments" in tool_call_chunk["function"]:
+                            current_tool_calls[index]["arguments"] += tool_call_chunk["function"]["arguments"]
+
+                        yield {
+                            "choices": [{
+                                "index": 0,
+                                "delta": {"tool_calls": [tool_call_chunk]},
+                            }]
+                        }
+
+            yield {
+                "choices": [{
+                    "index": 0,
+                    "delta": {},
+                    "finish_reason": finish_reason,
+                }]
+            }
+
+        except requests.exceptions.Timeout:
+            logger.error("[DEEPSEEK] Request timeout")
+            yield {"error": True, "message": "Request timeout", "status_code": 500}
+        except Exception as e:
+            logger.error(f"[DEEPSEEK] stream response error: {e}")
+            import traceback
+            logger.error(traceback.format_exc())
+            yield {"error": True, "message": str(e), "status_code": 500}
+
+    # -------------------- sync --------------------
+
+    def _handle_sync_response(self, request_body: dict):
+        """Single-shot response. Yields a Claude-format dict for symmetry with stream path."""
+        try:
+            headers = self._build_headers()
+            request_body.pop("stream", None)
+            url = f"{self.api_base}/chat/completions"
+            response = requests.post(url, headers=headers, json=request_body, timeout=180)
+
+            if response.status_code != 200:
+                error_msg = response.text
+                logger.error(f"[DEEPSEEK] API error: status={response.status_code}, msg={error_msg}")
+                yield {"error": True, "message": error_msg, "status_code": response.status_code}
+                return
+
+            result = response.json()
+            message = result["choices"][0]["message"]
+            finish_reason = result["choices"][0]["finish_reason"]
+
+            response_data = {"role": "assistant", "content": []}
+
+            # Surface reasoning as a `thinking` block so the agent layer can persist it
+            # and round-trip it on tool-call turns (required by DeepSeek API).
+            if message.get("reasoning_content"):
+                response_data["content"].append({
+                    "type": "thinking",
+                    "thinking": message["reasoning_content"],
+                })
+
+            if message.get("content"):
+                response_data["content"].append({
+                    "type": "text",
+                    "text": message["content"],
+                })
+
+            if message.get("tool_calls"):
+                for tool_call in message["tool_calls"]:
+                    try:
+                        tool_input = json.loads(tool_call["function"]["arguments"])
+                    except (json.JSONDecodeError, TypeError):
+                        tool_input = {}
+                    response_data["content"].append({
+                        "type": "tool_use",
+                        "id": tool_call["id"],
+                        "name": tool_call["function"]["name"],
+                        "input": tool_input,
+                    })
+
+            if finish_reason == "tool_calls":
+                response_data["stop_reason"] = "tool_use"
+            elif finish_reason == "stop":
+                response_data["stop_reason"] = "end_turn"
+            else:
+                response_data["stop_reason"] = finish_reason
+
+            yield response_data
+
+        except requests.exceptions.Timeout:
+            logger.error("[DEEPSEEK] Request timeout")
+            yield {"error": True, "message": "Request timeout", "status_code": 500}
+        except Exception as e:
+            logger.error(f"[DEEPSEEK] sync response error: {e}")
+            import traceback
+            logger.error(traceback.format_exc())
+            yield {"error": True, "message": str(e), "status_code": 500}
+
+    # -------------------- format conversion --------------------
+
+    def _convert_messages_to_openai_format(self, messages):
+        """
+        Convert Claude-format messages (content blocks) to OpenAI format.
+
+        Crucially, once any assistant turn in the history triggered a tool
+        call, DeepSeek requires `reasoning_content` on **every subsequent
+        assistant message** (not just the tool-call one) until the next user
+        turn — and in fact the API enforces this for the whole history when
+        thinking mode is enabled. Missing `reasoning_content` on any
+        assistant message returns 400. We back-fill an empty string when the
+        trace was not captured (e.g. history recorded while thinking was
+        disabled, or upstream proxy stripped the field).
+        """
+        if not messages:
+            return []
+
+        # Determine whether the history contains any tool-call assistant turn.
+        # If so, every assistant message must carry `reasoning_content`.
+        has_tool_call_history = False
+        for msg in messages:
+            if msg.get("role") != "assistant":
+                continue
+            if msg.get("tool_calls"):
+                has_tool_call_history = True
+                break
+            content = msg.get("content")
+            if isinstance(content, list) and any(
+                isinstance(b, dict) and b.get("type") == "tool_use" for b in content
+            ):
+                has_tool_call_history = True
+                break
+
+        converted = []
+
+        for msg in messages:
+            role = msg.get("role")
+            content = msg.get("content")
+
+            # Pass-through path for non-list content (e.g. plain string).
+            # Back-fill `reasoning_content` on assistant messages whenever the
+            # history contains any tool-call turn.
+            if not isinstance(content, list):
+                if (
+                    role == "assistant"
+                    and isinstance(msg, dict)
+                    and has_tool_call_history
+                    and "reasoning_content" not in msg
+                ):
+                    patched = dict(msg)
+                    patched["reasoning_content"] = ""
+                    converted.append(patched)
+                else:
+                    converted.append(msg)
+                continue
+
+            if role == "user":
+                has_tool_result = any(
+                    isinstance(b, dict) and b.get("type") == "tool_result" for b in content
+                )
+                if has_tool_result:
+                    text_parts = []
+                    tool_results = []
+
+                    for block in content:
+                        if not isinstance(block, dict):
+                            continue
+                        if block.get("type") == "text":
+                            text_parts.append(block.get("text", ""))
+                        elif block.get("type") == "tool_result":
+                            tool_call_id = block.get("tool_use_id") or ""
+                            result_content = block.get("content", "")
+                            if not isinstance(result_content, str):
+                                result_content = json.dumps(result_content, ensure_ascii=False)
+                            tool_results.append({
+                                "role": "tool",
+                                "tool_call_id": tool_call_id,
+                                "content": result_content,
+                            })
+
+                    converted.extend(tool_results)
+
+                    if text_parts:
+                        converted.append({"role": "user", "content": "\n".join(text_parts)})
+                else:
+                    converted.append(msg)
+
+            elif role == "assistant":
+                openai_msg = {"role": "assistant"}
+                text_parts = []
+                tool_calls = []
+                reasoning_parts = []
+
+                for block in content:
+                    if not isinstance(block, dict):
+                        continue
+                    btype = block.get("type")
+                    if btype == "text":
+                        text_parts.append(block.get("text", ""))
+                    elif btype == "tool_use":
+                        tool_calls.append({
+                            "id": block.get("id"),
+                            "type": "function",
+                            "function": {
+                                "name": block.get("name"),
+                                "arguments": json.dumps(block.get("input", {})),
+                            },
+                        })
+                    elif btype == "thinking":
+                        reasoning_parts.append(block.get("thinking", ""))
+
+                if text_parts:
+                    openai_msg["content"] = "\n".join(text_parts)
+                elif not tool_calls:
+                    openai_msg["content"] = ""
+
+                if tool_calls:
+                    openai_msg["tool_calls"] = tool_calls
+                    if not text_parts:
+                        openai_msg["content"] = None
+
+                # Round-trip reasoning_content: required for every assistant
+                # message once the history contains any tool-call turn (see
+                # outer comment). Use empty string as fallback when the trace
+                # was not captured — DeepSeek validates field presence, not
+                # value; non-thinking backends silently ignore it.
+                if reasoning_parts:
+                    openai_msg["reasoning_content"] = "\n".join(reasoning_parts)
+                elif has_tool_call_history:
+                    openai_msg["reasoning_content"] = ""
+
+                converted.append(openai_msg)
+            else:
+                converted.append(msg)
+
+        return converted
+
+    def _convert_tools_to_openai_format(self, tools):
+        """
+        Convert tools from Claude format to OpenAI format.
+
+        Claude: {name, description, input_schema}
+        OpenAI: {type: "function", function: {name, description, parameters}}
+        """
+        if not tools:
+            return None
+
+        converted = []
+        for tool in tools:
+            if "type" in tool and tool["type"] == "function":
+                converted.append(tool)
+            else:
+                converted.append({
+                    "type": "function",
+                    "function": {
+                        "name": tool.get("name"),
+                        "description": tool.get("description"),
+                        "parameters": tool.get("input_schema", {}),
+                    },
+                })
+        return converted
+
+    # -------------------- vision --------------------
+
+    def call_vision(self, image_url: str, question: str,
+                    model: Optional[str] = None,
+                    max_tokens: int = 1000) -> dict:
+        """Analyse an image via DeepSeek's OpenAI-compatible /chat/completions endpoint."""
+        try:
+            vision_model = model or self.args.get("model", const.DEEPSEEK_V4_FLASH)
+            payload = {
+                "model": vision_model,
+                "max_tokens": max_tokens,
+                "messages": [{
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": question},
+                        {"type": "image_url", "image_url": {"url": image_url}},
+                    ],
+                }],
+            }
+            headers = self._build_headers()
+            resp = requests.post(
+                f"{self.api_base}/chat/completions",
+                headers=headers, json=payload, timeout=60,
+            )
+            if resp.status_code != 200:
+                return {"error": True, "message": f"HTTP {resp.status_code}: {resp.text[:300]}"}
+            data = resp.json()
+            if "error" in data:
+                return {"error": True, "message": data["error"].get("message", str(data["error"]))}
+            content = data.get("choices", [{}])[0].get("message", {}).get("content", "")
+            usage = data.get("usage", {})
+            return {
+                "model": vision_model,
+                "content": content,
+                "usage": {
+                    "prompt_tokens": usage.get("prompt_tokens", 0),
+                    "completion_tokens": usage.get("completion_tokens", 0),
+                    "total_tokens": usage.get("total_tokens", 0),
+                },
+            }
+        except Exception as e:
+            logger.error(f"[DEEPSEEK] call_vision error: {e}")
+            return {"error": True, "message": str(e)}
--- a/models/deepseek/deepseek_session.py
+++ b/models/deepseek/deepseek_session.py
@@ -3,7 +3,7 @@ from common.log import logger


 class DeepSeekSession(Session):
-    def __init__(self, session_id, system_prompt=None, model="deepseek-chat"):
+    def __init__(self, session_id, system_prompt=None, model="deepseek-v4-flash"):
        super().__init__(session_id, system_prompt)
        self.model = model
        self.reset()
--- a/models/gemini/google_gemini_bot.py
+++ b/models/gemini/google_gemini_bot.py
@@ -335,6 +335,18 @@ class GoogleGeminiBot(Bot):
                # Convert role
                gemini_role = "user" if role in ["user", "tool"] else "model"
                
+                # For model messages that carry original Gemini parts (with
+                # thoughtSignature etc.), use them directly instead of
+                # reconstructing from Claude-format tool_use blocks.
+                if gemini_role == "model" and "_gemini_raw_parts" in msg:
+                    raw_parts = msg["_gemini_raw_parts"]
+                    if raw_parts:
+                        payload["contents"].append({
+                            "role": "model",
+                            "parts": raw_parts
+                        })
+                        continue
+                
                # Handle different content formats
                parts = []
                
@@ -398,6 +410,17 @@ class GoogleGeminiBot(Bot):
                            else:
                                logger.warning(f"[Gemini] Skip invalid image block: {str(block)[:200]}")
                            
+                        elif block_type == "tool_use":
+                            # Convert Claude tool_use to Gemini functionCall
+                            fc_name = block.get("name", "unknown")
+                            fc_args = block.get("input") or {}
+                            parts.append({
+                                "functionCall": {
+                                    "name": fc_name,
+                                    "args": fc_args
+                                }
+                            })
+
                        elif block_type == "tool_result":
                            # Convert Claude tool_result to Gemini functionResponse
                            tool_use_id = block.get("tool_use_id")
@@ -413,7 +436,6 @@ class GoogleGeminiBot(Bot):
                                tool_result_data = {"result": tool_content}
                            
                            # Find the tool name from previous messages
-                            # Look for the corresponding tool_call in model's message
                            tool_name = None
                            for prev_msg in reversed(messages):
                                if prev_msg.get("role") == "assistant":
@@ -427,13 +449,14 @@ class GoogleGeminiBot(Bot):
                                    if tool_name:
                                        break
                            
-                            # Gemini functionResponse format
-                            parts.append({
-                                "functionResponse": {
-                                    "name": tool_name or "unknown",
-                                    "response": tool_result_data
-                                }
-                            })
+                            # Gemini functionResponse format (Gemini 3 requires `id`)
+                            fn_response = {
+                                "name": tool_name or "unknown",
+                                "response": tool_result_data
+                            }
+                            if tool_use_id:
+                                fn_response["id"] = tool_use_id
+                            parts.append({"functionResponse": fn_response})
                            
                        elif "text" in block:
                            # Generic text field
@@ -601,10 +624,11 @@ class GoogleGeminiBot(Bot):
                # Check for functionCall (per REST API docs)
                if "functionCall" in part:
                    fc = part["functionCall"]
-                    logger.info(f"[Gemini] Function call detected: {fc.get('name')}")
+                    fc_id = fc.get("id") or f"call_{int(time.time() * 1000000)}"
+                    logger.info(f"[Gemini] Function call detected: {fc.get('name')} (id={fc_id})")
                    
                    tool_calls.append({
-                        "id": f"call_{int(time.time() * 1000000)}",
+                        "id": fc_id,
                        "type": "function",
                        "function": {
                            "name": fc.get("name"),
@@ -648,11 +672,14 @@ class GoogleGeminiBot(Bot):
        """Handle Gemini REST API stream response"""
        try:
            all_tool_calls = []
+            all_raw_parts = []  # Preserve all Gemini parts (incl. thoughtSignature) for round-trip
            has_sent_tool_calls = False
            has_content = False  # Track if any content was sent
            chunk_count = 0
            last_finish_reason = None
            last_safety_ratings = None
+            raw_chunks = []  # Buffer raw chunks for diagnostics on empty response
+            non_text_part_keys = []  # Track non-text/functionCall part keys (e.g. thoughtSignature)
            
            for line in response.iter_lines():
                if not line:
@@ -670,10 +697,16 @@ class GoogleGeminiBot(Bot):
                try:
                    chunk_data = json.loads(line)
                    chunk_count += 1
+                    raw_chunks.append(chunk_data)
                    
                    candidates = chunk_data.get("candidates", [])
                    if not candidates:
-                        logger.debug("[Gemini] No candidates in chunk")
+                        # Could be a chunk with only usageMetadata / promptFeedback
+                        prompt_feedback = chunk_data.get("promptFeedback")
+                        if prompt_feedback:
+                            logger.warning(f"[Gemini] promptFeedback in chunk: {prompt_feedback}")
+                        else:
+                            logger.debug(f"[Gemini] No candidates in chunk: {chunk_data}")
                        continue
                    
                    candidate = candidates[0]
@@ -688,10 +721,16 @@ class GoogleGeminiBot(Bot):
                    parts = content.get("parts", [])
                    
                    if not parts:
-                        logger.debug("[Gemini] No parts in candidate content")
+                        logger.debug(f"[Gemini] No parts in candidate content, candidate={candidate}")
                    
                    # Stream text content
                    for part in parts:
+                        # Track unknown part types for diagnostics
+                        if "text" not in part and "functionCall" not in part:
+                            for k in part.keys():
+                                if k not in non_text_part_keys:
+                                    non_text_part_keys.append(k)
+
                        if "text" in part and part["text"]:
                            has_content = True
                            yield {
@@ -709,23 +748,31 @@ class GoogleGeminiBot(Bot):
                        # Collect function calls
                        if "functionCall" in part:
                            fc = part["functionCall"]
-                            logger.info(f"[Gemini] Function call: {fc.get('name')}")
+                            logger.info(f"[Gemini] Function call: {fc.get('name')} (id={fc.get('id')})")
+                            # Prefer Gemini's native id; fall back to generated one
+                            fc_id = fc.get("id") or f"call_{int(time.time() * 1000000)}_{len(all_tool_calls)}"
                            all_tool_calls.append({
-                                "index": len(all_tool_calls),  # Add index to differentiate multiple tool calls
-                                "id": f"call_{int(time.time() * 1000000)}_{len(all_tool_calls)}",
+                                "index": len(all_tool_calls),
+                                "id": fc_id,
                                "type": "function",
                                "function": {
                                    "name": fc.get("name"),
                                    "arguments": json.dumps(fc.get("args", {}))
                                }
                            })
+
+                    # Preserve all raw parts for round-trip (thoughtSignature, etc.)
+                    all_raw_parts.extend(parts)
                    
                except json.JSONDecodeError as je:
-                    logger.debug(f"[Gemini] JSON decode error: {je}")
+                    logger.debug(f"[Gemini] JSON decode error: {je}, line={line[:500]}")
                    continue
            
            # Send tool calls if any were collected
            if all_tool_calls and not has_sent_tool_calls:
+                delta = {"tool_calls": all_tool_calls}
+                if all_raw_parts:
+                    delta["_gemini_raw_parts"] = all_raw_parts
                yield {
                    "id": f"chatcmpl-{time.time()}",
                    "object": "chat.completion.chunk",
@@ -733,15 +780,44 @@ class GoogleGeminiBot(Bot):
                    "model": model_name,
                    "choices": [{
                        "index": 0,
-                        "delta": {"tool_calls": all_tool_calls},
+                        "delta": delta,
                        "finish_reason": None
                    }]
                }
                has_sent_tool_calls = True
+            elif not has_sent_tool_calls and all_raw_parts:
+                # No tool calls but we have raw parts (e.g. text-only response with
+                # thoughtSignature) — pass them through for round-trip fidelity.
+                yield {
+                    "id": f"chatcmpl-{time.time()}",
+                    "object": "chat.completion.chunk",
+                    "created": int(time.time()),
+                    "model": model_name,
+                    "choices": [{
+                        "index": 0,
+                        "delta": {"_gemini_raw_parts": all_raw_parts},
+                        "finish_reason": None
+                    }]
+                }
            
-            # 如果返回空响应，记录详细警告
+            # 如果返回空响应，dump 完整原始 chunks 以便诊断
            if not has_content and not all_tool_calls:
-                logger.warning(f"[Gemini] ⚠️  Empty response detected!")
+                logger.warning(
+                    f"[Gemini] ⚠️  Empty response detected! "
+                    f"chunks={chunk_count}, finish_reason={last_finish_reason}, "
+                    f"non_text_part_keys={non_text_part_keys}"
+                )
+                if last_safety_ratings:
+                    logger.warning(f"[Gemini] safetyRatings: {last_safety_ratings}")
+                # Dump raw chunks (truncate each to avoid huge logs)
+                try:
+                    for i, ch in enumerate(raw_chunks):
+                        ch_str = json.dumps(ch, ensure_ascii=False)
+                        if len(ch_str) > 2000:
+                            ch_str = ch_str[:2000] + f"...[truncated, total {len(ch_str)} chars]"
+                        logger.warning(f"[Gemini] raw chunk[{i}]: {ch_str}")
+                except Exception as dump_err:
+                    logger.warning(f"[Gemini] Failed to dump raw chunks: {dump_err}")
            
            # Final chunk
            yield {
--- a/models/linkai/link_ai_bot.py
+++ b/models/linkai/link_ai_bot.py
@@ -673,6 +673,9 @@ def _handle_linkai_stream_response(self, base_url, headers, body):
                        }
                        return

+                    # Forward SSE JSON as-is so extensions (e.g. delta._gemini_raw_parts
+                    # for Gemini via LinkAI) reach agent_stream and are stored on assistant
+                    # messages for the next request. Standard OpenAI fields are unchanged.
                    yield chunk
                        
    except Exception as e:
@@ -683,7 +686,75 @@ def _handle_linkai_stream_response(self, base_url, headers, body):
            "status_code": 500
        }

+def _linkai_convert_messages_to_openai_format(self, messages):
+    """
+    Override the base OpenAI-compatible conversion to round-trip
+    ``reasoning_content`` on assistant messages.
+
+    Internally, the agent layer keeps the model's reasoning trace as a
+    Claude-style ``thinking`` content block on the assistant message. The
+    base converter drops that block. For thinking-capable models proxied via
+    LinkAI (DeepSeek V4, Kimi K2 thinking, …), the upstream API requires
+    the trace to be echoed back as a top-level ``reasoning_content`` field
+    on every assistant turn that contained tool calls — otherwise the next
+    request returns 400. We re-emit it for every assistant turn (it's
+    silently ignored on plain text turns).
+    """
+    openai_messages = OpenAICompatibleBot._convert_messages_to_openai_format(self, messages)
+    if not messages:
+        return openai_messages
+
+    # DeepSeek (proxied via LinkAI) requires `reasoning_content` on EVERY
+    # assistant message once the history contains any tool-call turn — not
+    # just the tool-call turn itself. Detect that condition first.
+    has_tool_call_history = False
+    for src in messages:
+        if src.get("role") != "assistant":
+            continue
+        if src.get("tool_calls"):
+            has_tool_call_history = True
+            break
+        content = src.get("content")
+        if isinstance(content, list) and any(
+            isinstance(b, dict) and b.get("type") == "tool_use" for b in content
+        ):
+            has_tool_call_history = True
+            break
+
+    # Walk the original Claude messages to collect each assistant turn's
+    # reasoning text, then attach it to the matching converted entry.
+    dst_idx = 0
+    for src in messages:
+        if src.get("role") != "assistant":
+            continue
+        content = src.get("content")
+        reasoning_parts = []
+        if isinstance(content, list):
+            reasoning_parts = [
+                b.get("thinking", "") for b in content
+                if isinstance(b, dict) and b.get("type") == "thinking"
+            ]
+        # Locate the corresponding assistant entry in the converted list.
+        while dst_idx < len(openai_messages) and openai_messages[dst_idx].get("role") != "assistant":
+            dst_idx += 1
+        if dst_idx >= len(openai_messages):
+            break
+        dst_msg = openai_messages[dst_idx]
+        if reasoning_parts:
+            dst_msg["reasoning_content"] = "\n".join(reasoning_parts)
+        elif has_tool_call_history:
+            # Fallback when the trace was lost (proxy stripped it, model
+            # switched mid-session, thinking toggled on after tool calls).
+            # DeepSeek-style backends validate field presence, not value;
+            # non-thinking backends silently ignore the empty string.
+            dst_msg["reasoning_content"] = ""
+        dst_idx += 1
+
+    return openai_messages
+
+
 # Attach methods to LinkAIBot class
 LinkAIBot.call_with_tools = _linkai_call_with_tools
 LinkAIBot._handle_linkai_sync_response = _handle_linkai_sync_response
 LinkAIBot._handle_linkai_stream_response = _handle_linkai_stream_response
+LinkAIBot._convert_messages_to_openai_format = _linkai_convert_messages_to_openai_format
--- a/models/moonshot/moonshot_bot.py
+++ b/models/moonshot/moonshot_bot.py
@@ -39,6 +39,29 @@ class MoonshotBot(Bot):
            url = url.rsplit("/chat/completions", 1)[0]
        return url.rstrip("/")

+    @property
+    def _is_kimi_coding_plan(self) -> bool:
+        """Detect Kimi Coding Plan by model name or API base URL."""
+        model = str(conf().get("model", ""))
+        base = str(conf().get("moonshot_base_url", ""))
+        return model == "kimi-for-coding" or "api.kimi.com/coding" in base
+
+    @staticmethod
+    def _model_supports_thinking(model_name: str) -> bool:
+        """Return True if the model supports the ``thinking`` request parameter."""
+        m = model_name.lower()
+        return m.startswith("kimi-k2") or m.startswith("kimi-k1.5")
+
+    def _build_headers(self) -> dict:
+        """Build HTTP headers, adding Coding-Agent User-Agent for Kimi Coding Plan."""
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {self.api_key}",
+        }
+        if self._is_kimi_coding_plan:
+            headers["User-Agent"] = "claude-cli/2.1.39"
+        return headers
+
    def reply(self, query, context=None):
        # acquire reply content
        if context.type == ContextType.TEXT:
@@ -97,12 +120,17 @@ class MoonshotBot(Bot):
        :return: {}
        """
        try:
-            headers = {
-                "Content-Type": "application/json",
-                "Authorization": "Bearer " + self.api_key
-            }
-            body = args
+            headers = self._build_headers()
+            # Fallback to default args (e.g. when called by session title
+            # generation which passes only the session). Always copy to avoid
+            # mutating the shared self.args across calls.
+            body = dict(args) if args else dict(self.args)
            body["messages"] = session.messages
+            model_name = str(body.get("model", ""))
+            # K2.x / Coding Plan enforce fixed temperature/top_p; strip them.
+            if model_name.startswith("kimi-k2") or model_name == "kimi-for-coding":
+                body.pop("temperature", None)
+                body.pop("top_p", None)
            res = requests.post(
                f"{self.base_url}/chat/completions",
                headers=headers,
@@ -153,7 +181,7 @@ class MoonshotBot(Bot):
                    max_tokens: int = 1000) -> dict:
        """Analyze an image using Moonshot (Kimi) OpenAI-compatible API."""
        try:
-            vision_model = model or self.args.get("model", "kimi-k2.5")
+            vision_model = model or self.args.get("model", "kimi-k2.6")
            payload = {
                "model": vision_model,
                "max_tokens": max_tokens,
@@ -165,10 +193,7 @@ class MoonshotBot(Bot):
                    ],
                }],
            }
-            headers = {
-                "Authorization": f"Bearer {self.api_key}",
-                "Content-Type": "application/json",
-            }
+            headers = self._build_headers()
            resp = requests.post(f"{self.base_url}/chat/completions",
                                 headers=headers, json=payload, timeout=60)
            if resp.status_code != 200:
@@ -249,7 +274,12 @@ class MoonshotBot(Bot):
                request_body["tools"] = converted_tools
                request_body["tool_choice"] = "auto"

-            request_body["thinking"] = kwargs.get("thinking", {"type": "enabled"})
+            # Kimi Coding Plan has built-in reasoning and ignores the thinking param.
+            # For regular Kimi models, only K2/K1.5 series support the thinking param.
+            # Respect the enable_thinking config passed from agent_bridge.
+            if not self._is_kimi_coding_plan and self._model_supports_thinking(model):
+                thinking = kwargs.get("thinking", {"type": "enabled"})
+                request_body["thinking"] = thinking

            logger.debug(f"[MOONSHOT] API call: model={model}, "
                         f"tools={len(converted_tools) if converted_tools else 0}, stream={stream}")
@@ -273,10 +303,7 @@ class MoonshotBot(Bot):
    def _handle_stream_response(self, request_body: dict):
        """Handle streaming SSE response from Moonshot API and yield OpenAI-format chunks."""
        try:
-            headers = {
-                "Content-Type": "application/json",
-                "Authorization": f"Bearer {self.api_key}"
-            }
+            headers = self._build_headers()

            url = f"{self.base_url}/chat/completions"
            response = requests.post(url, headers=headers, json=request_body, stream=True, timeout=120)
@@ -295,10 +322,13 @@ class MoonshotBot(Bot):
                    continue

                line = line.decode("utf-8")
-                if not line.startswith("data: "):
+                # Handle both "data: {...}" and "data:{...}" (Kimi Coding Plan omits the space)
+                if line.startswith("data: "):
+                    data_str = line[6:]
+                elif line.startswith("data:"):
+                    data_str = line[5:]
+                else:
                    continue
-
-                data_str = line[6:]  # Remove "data: " prefix
                if data_str.strip() == "[DONE]":
                    break

@@ -319,9 +349,16 @@ class MoonshotBot(Bot):
                if not chunk.get("choices"):
                    continue

-                choice = chunk["choices"][0]
+                choices = chunk["choices"]
+                if not choices:
+                    continue
+                choice = choices[0]
                delta = choice.get("delta", {})

+                # Capture finish_reason early (it may arrive on any chunk type)
+                if choice.get("finish_reason"):
+                    finish_reason = choice["finish_reason"]
+
                if delta.get("reasoning_content"):
                    yield {
                        "choices": [{
@@ -373,10 +410,6 @@ class MoonshotBot(Bot):
                            }]
                        }

-                # Capture finish_reason
-                if choice.get("finish_reason"):
-                    finish_reason = choice["finish_reason"]
-
            # Final chunk with finish_reason
            yield {
                "choices": [{
@@ -400,10 +433,7 @@ class MoonshotBot(Bot):
    def _handle_sync_response(self, request_body: dict):
        """Handle synchronous API response and yield a single result dict."""
        try:
-            headers = {
-                "Content-Type": "application/json",
-                "Authorization": f"Bearer {self.api_key}"
-            }
+            headers = self._build_headers()

            request_body.pop("stream", None)
            url = f"{self.base_url}/chat/completions"
@@ -521,6 +551,7 @@ class MoonshotBot(Bot):
                openai_msg = {"role": "assistant"}
                text_parts = []
                tool_calls = []
+                reasoning_parts = []

                for block in content:
                    if not isinstance(block, dict):
@@ -536,6 +567,8 @@ class MoonshotBot(Bot):
                                "arguments": json.dumps(block.get("input", {}))
                            }
                        })
+                    elif block.get("type") == "thinking":
+                        reasoning_parts.append(block.get("thinking", ""))

                if text_parts:
                    openai_msg["content"] = "\n".join(text_parts)
@@ -547,6 +580,12 @@ class MoonshotBot(Bot):
                    if not text_parts:
                        openai_msg["content"] = None

+                # Kimi API requires reasoning_content in assistant messages
+                # when thinking was active for that turn. The presence of
+                # reasoning_parts means thinking was on, so always round-trip it.
+                if reasoning_parts:
+                    openai_msg["reasoning_content"] = "\n".join(reasoning_parts)
+
                converted.append(openai_msg)
            else:
                converted.append(msg)
--- a/plugins/cow_cli/cow_cli.py
+++ b/plugins/cow_cli/cow_cli.py
@@ -174,7 +174,7 @@ class CowCliPlugin(Plugin):
    # status
    # ------------------------------------------------------------------

-    def _cmd_status(self, args: str, e_context: EventContext, session_id: str = "") -> str:
+    def _cmd_status(self, args: str, e_context: EventContext, session_id: str = "", **_) -> str:
        from config import conf

        cfg = conf()
@@ -256,7 +256,7 @@ class CowCliPlugin(Plugin):
    # context
    # ------------------------------------------------------------------

-    def _cmd_context(self, args: str, e_context: EventContext, session_id: str = "") -> str:
+    def _cmd_context(self, args: str, e_context: EventContext, session_id: str = "", **_) -> str:
        session_id = self._get_session_id(e_context, fallback=session_id)
        agent = self._get_agent(session_id)

@@ -316,6 +316,7 @@ class CowCliPlugin(Plugin):
        "agent_max_context_turns",
        "agent_max_steps",
        "knowledge",
+        "enable_thinking",
    }

    _CONFIG_READABLE = _CONFIG_WRITABLE | {"channel_type"}
@@ -357,7 +358,7 @@ class CowCliPlugin(Plugin):
        return f"⚙️ {key}: {val}"

    def _config_set(self, key: str, value_str: str) -> str:
-        from config import conf, load_config
+        from config import conf, load_config, available_setting
        import json as _json

        if key not in self._CONFIG_WRITABLE:
@@ -379,11 +380,14 @@ class CowCliPlugin(Plugin):
                new_val = value_str

        updates = {key: new_val}
+        old_bot_type = conf().get("bot_type", "")

-        if key == "model" and conf().get("bot_type"):
-            resolved = self._resolve_bot_type_for_model(str(new_val))
-            if resolved:
-                updates["bot_type"] = resolved
+        if key == "model" and old_bot_type:
+            from common import const
+            if old_bot_type not in (const.CUSTOM,):
+                resolved = self._resolve_bot_type_for_model(str(new_val))
+                if resolved:
+                    updates["bot_type"] = resolved

        project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
        config_path = os.path.join(project_root, "config.json")
@@ -396,14 +400,30 @@ class CowCliPlugin(Plugin):
        except Exception as e:
            return f"写入 config.json 失败: {e}"

+        # Sync updated values to environment variables so that load_config()
+        # won't overwrite the new value with a stale env var (common in Docker).
+        # Match env var keys case-insensitively (Docker compose typically uses
+        # upper-case like MODEL, but lower-case is also possible).
+        synced_envs = {}
+        for k, v in updates.items():
+            if k not in available_setting:
+                continue
+            str_val = str(v)
+            k_lower = k.lower()
+            for env_key in list(os.environ):
+                if env_key.lower() == k_lower:
+                    os.environ[env_key] = str_val
+                    synced_envs[env_key] = str_val
+        logger.info(f"[CowCli] config update: {updates}, synced envs: {synced_envs}")
+
        try:
            load_config()
        except Exception as e:
            logger.warning(f"[CowCli] config reload warning: {e}")

        result = f"✅ 配置已更新\n\n  {key}: {old_val} → {new_val}"
-        if "bot_type" in updates and updates["bot_type"] != conf().get("bot_type"):
-            result += f"\n  bot_type: → {updates['bot_type']}"
+        if "bot_type" in updates and updates["bot_type"] != old_bot_type:
+            result += f"\n  bot_type: {old_bot_type} → {updates['bot_type']}"
        return result

    @staticmethod
@@ -520,8 +540,22 @@ class CowCliPlugin(Plugin):
                "  disable <名称>   禁用技能"
            )

+    def _refresh_skill_manager(self):
+        """Re-scan skill directories so skills_config.json reflects disk state."""
+        try:
+            from bridge.bridge import Bridge
+            bridge = Bridge()
+            agent_bridge = bridge.get_agent_bridge()
+            for agent in [agent_bridge.default_agent] + list(agent_bridge.agents.values()):
+                if agent and hasattr(agent, 'skill_manager') and agent.skill_manager:
+                    agent.skill_manager.refresh_skills()
+                    break
+        except Exception as e:
+            logger.debug(f"[CowCli] skill refresh skipped: {e}")
+
    def _skill_list_local(self) -> str:
        from cli.utils import load_skills_config, get_skills_dir, get_builtin_skills_dir
+        self._refresh_skill_manager()
        config = load_skills_config()

        if not config:
@@ -885,7 +919,6 @@ class CowCliPlugin(Plugin):
        if agent and agent.memory_manager:
            flush_mgr = agent.memory_manager.flush_manager

-        # Fallback: construct a temporary MemoryFlushManager when agent is not yet initialized
        if not flush_mgr:
            try:
                flush_mgr = self._create_standalone_flush_manager()
@@ -895,24 +928,38 @@ class CowCliPlugin(Plugin):
        if not flush_mgr.llm_model:
            return "⚠️ 未配置 LLM 模型，无法执行记忆蒸馏"

+        # SaaS (e_context is None): run synchronously, return full result
+        if e_context is None:
+            return self._memory_dream_sync(flush_mgr, days)
+
+        # Local channels: run in background, notify via channel.send()
        is_web = self._is_web_channel(e_context)

        def _run():
            try:
                result = flush_mgr.deep_dream(lookback_days=days, force=True)
                if result:
-                    msg = self._build_dream_result(flush_mgr, is_web)
-                    self._notify(e_context, msg)
+                    self._notify(e_context, self._build_dream_result(flush_mgr, is_web))
                else:
                    self._notify(e_context, "💤 记忆蒸馏跳过 — 没有新的记忆内容需要整理")
            except Exception as e:
                logger.warning(f"[CowCli] /memory dream failed: {e}")
                self._notify(e_context, f"❌ 记忆蒸馏失败: {e}")

-        thread = threading.Thread(target=_run, daemon=True)
-        thread.start()
+        threading.Thread(target=_run, daemon=True).start()
        return f"🌙 记忆蒸馏已启动 (整理近 {days} 天的记忆)\n\n整理在后台执行，完成后会通知你。"

+    def _memory_dream_sync(self, flush_mgr, days: int) -> str:
+        """Run deep dream synchronously and return the full result."""
+        try:
+            result = flush_mgr.deep_dream(lookback_days=days, force=True)
+            if result:
+                return self._build_dream_result(flush_mgr, is_web=True)
+            return "💤 记忆蒸馏跳过 — 没有新的记忆内容需要整理"
+        except Exception as e:
+            logger.warning(f"[CowCli] /memory dream sync failed: {e}")
+            return f"❌ 记忆蒸馏失败: {e}"
+
    @staticmethod
    def _notify(e_context, text: str):
        """Push a notification message back to the chat channel."""
--- a/run.sh
+++ b/run.sh
@@ -309,26 +309,27 @@ select_model() {
    echo -e "${CYAN}${BOLD}=========================================${NC}"
    echo -e "${CYAN}${BOLD}   Select AI Model${NC}"
    echo -e "${CYAN}${BOLD}=========================================${NC}"
-    echo -e "${YELLOW}1) MiniMax (MiniMax-M2.7, MiniMax-M2.5, etc.)${NC}"
-    echo -e "${YELLOW}2) Zhipu AI (glm-5-turbo, glm-5, etc.)${NC}"
-    echo -e "${YELLOW}3) Kimi (kimi-k2.5, kimi-k2, etc.)${NC}"
-    echo -e "${YELLOW}4) Doubao (doubao-seed-2-0-code-preview-260215, etc.)${NC}"
-    echo -e "${YELLOW}5) Qwen (qwen3.6-plus, qwen3.5-plus, qwen3-max, qwq-plus, etc.)${NC}"
-    echo -e "${YELLOW}6) Claude (claude-sonnet-4-6, claude-opus-4-6, etc.)${NC}"
-    echo -e "${YELLOW}7) Gemini (gemini-3.1-flash-lite-preview, gemini-3.1-pro-preview, etc.)${NC}"
-    echo -e "${YELLOW}8) OpenAI GPT (gpt-5.4, gpt-5.2, gpt-4.1, etc.)${NC}"
-    echo -e "${YELLOW}9) LinkAI (access multiple models via one API)${NC}"
+    echo -e "${YELLOW}1) DeepSeek (deepseek-v4-flash, deepseek-v4-pro, etc.)${NC}"
+    echo -e "${YELLOW}2) MiniMax (MiniMax-M2.7, MiniMax-M2.5, etc.)${NC}"
+    echo -e "${YELLOW}3) Claude (claude-sonnet-4-6, claude-opus-4-7, claude-opus-4-6, etc.)${NC}"
+    echo -e "${YELLOW}4) Gemini (gemini-3.1-flash-lite-preview, gemini-3.1-pro-preview, etc.)${NC}"
+    echo -e "${YELLOW}5) OpenAI GPT (gpt-5.4, gpt-5.2, gpt-4.1, etc.)${NC}"
+    echo -e "${YELLOW}6) Zhipu AI (glm-5.1, glm-5-turbo, glm-5, etc.)${NC}"
+    echo -e "${YELLOW}7) Qwen (qwen3.6-plus, qwen3.5-plus, qwen3-max, qwq-plus, etc.)${NC}"
+    echo -e "${YELLOW}8) Doubao (doubao-seed-2-0-code-preview-260215, etc.)${NC}"
+    echo -e "${YELLOW}9) Kimi (kimi-k2.6, kimi-k2.5, kimi-k2, etc.)${NC}"
+    echo -e "${YELLOW}10) LinkAI (access multiple models via one API)${NC}"
    echo ""
    
    while true; do
-        read -p "Enter your choice [press Enter for default: 1 - MiniMax]: " model_choice
+        read -p "Enter your choice [press Enter for default: 1 - DeepSeek]: " model_choice
        model_choice=${model_choice:-1}
        case "$model_choice" in
-            1|2|3|4|5|6|7|8|9)
+            1|2|3|4|5|6|7|8|9|10)
                break
                ;;
            *)
-                echo -e "${RED}Invalid choice. Please enter 1-9.${NC}"
+                echo -e "${RED}Invalid choice. Please enter 1-10.${NC}"
                ;;
        esac
    done
@@ -356,25 +357,26 @@ read_api_base() {
 # Configure model
 configure_model() {
    case "$model_choice" in
-        1) read_model_config "MiniMax" "MiniMax-M2.7" "MINIMAX_KEY" ;;
-        2) read_model_config "Zhipu AI" "glm-5-turbo" "ZHIPU_KEY" ;;
-        3) read_model_config "Kimi (Moonshot)" "kimi-k2.5" "MOONSHOT_KEY" ;;
-        4) read_model_config "Doubao (Volcengine Ark)" "doubao-seed-2-0-code-preview-260215" "ARK_KEY" ;;
-        5) read_model_config "Qwen (DashScope)" "qwen3.6-plus" "DASHSCOPE_KEY" ;;
-        6)
+        1) read_model_config "DeepSeek" "deepseek-v4-flash" "DEEPSEEK_KEY" ;;
+        2) read_model_config "MiniMax" "MiniMax-M2.7" "MINIMAX_KEY" ;;
+        3)
            read_model_config "Claude" "claude-sonnet-4-6" "CLAUDE_KEY"
            read_api_base "CLAUDE_BASE" "https://api.anthropic.com/v1"
            ;;
-        7)
+        4)
            read_model_config "Gemini" "gemini-3.1-pro-preview" "GEMINI_KEY"
            read_api_base "GEMINI_BASE" "https://generativelanguage.googleapis.com"
            ;;
-        8)
+        5)
            read_model_config "OpenAI GPT" "gpt-5.4" "OPENAI_KEY"
            read_api_base "OPENAI_BASE" "https://api.openai.com/v1"
            ;;
-        9)
-            read_model_config "LinkAI" "MiniMax-M2.7" "LINKAI_KEY"
+        6) read_model_config "Zhipu AI" "glm-5.1" "ZHIPU_KEY" ;;
+        7) read_model_config "Qwen (DashScope)" "qwen3.6-plus" "DASHSCOPE_KEY" ;;
+        8) read_model_config "Doubao (Volcengine Ark)" "doubao-seed-2-0-code-preview-260215" "ARK_KEY" ;;
+        9) read_model_config "Kimi (Moonshot)" "kimi-k2.6" "MOONSHOT_KEY" ;;
+        10)
+            read_model_config "LinkAI" "deepseek-v4-flash" "LINKAI_KEY"
            USE_LINKAI="true"
            ;;
    esac
@@ -511,6 +513,8 @@ create_config_file() {
    ARK_KEY="${ARK_KEY:-}" \
    DASHSCOPE_KEY="${DASHSCOPE_KEY:-}" \
    MINIMAX_KEY="${MINIMAX_KEY:-}" \
+    DEEPSEEK_KEY="${DEEPSEEK_KEY:-}" \
+    DEEPSEEK_BASE="${DEEPSEEK_BASE:-https://api.deepseek.com/v1}" \
    USE_LINKAI="${USE_LINKAI:-false}" \
    LINKAI_KEY="${LINKAI_KEY:-}" \
    FEISHU_APP_ID="${FEISHU_APP_ID:-}" \
@@ -545,6 +549,8 @@ base = {
    'ark_api_key': e('ARK_KEY', ''),
    'dashscope_api_key': e('DASHSCOPE_KEY', ''),
    'minimax_api_key': e('MINIMAX_KEY', ''),
+    'deepseek_api_key': e('DEEPSEEK_KEY', ''),
+    'deepseek_api_base': e('DEEPSEEK_BASE'),
    'voice_to_text': 'openai',
    'text_to_voice': 'openai',
    'voice_reply_voice': False,
--- a/scripts/run.ps1
+++ b/scripts/run.ps1
@@ -169,36 +169,38 @@ function Install-Dependencies {

 # ── model selection ──────────────────────────────────────────────
 $ModelChoices = @{
-    "1" = @{ Provider = "MiniMax";                  Default = "MiniMax-M2.7";                           Key = "MINIMAX_KEY" }
-    "2" = @{ Provider = "Zhipu AI";                 Default = "glm-5-turbo";                            Key = "ZHIPU_KEY" }
-    "3" = @{ Provider = "Kimi (Moonshot)";          Default = "kimi-k2.5";                              Key = "MOONSHOT_KEY" }
-    "4" = @{ Provider = "Doubao (Volcengine Ark)";  Default = "doubao-seed-2-0-code-preview-260215";    Key = "ARK_KEY" }
-    "5" = @{ Provider = "Qwen (DashScope)";         Default = "qwen3.6-plus";                           Key = "DASHSCOPE_KEY" }
-    "6" = @{ Provider = "Claude";                   Default = "claude-sonnet-4-6";                      Key = "CLAUDE_KEY";  Base = "https://api.anthropic.com/v1" }
-    "7" = @{ Provider = "Gemini";                   Default = "gemini-3.1-pro-preview";                 Key = "GEMINI_KEY";  Base = "https://generativelanguage.googleapis.com" }
-    "8" = @{ Provider = "OpenAI GPT";               Default = "gpt-5.4";                                Key = "OPENAI_KEY";  Base = "https://api.openai.com/v1" }
-    "9" = @{ Provider = "LinkAI";                   Default = "MiniMax-M2.7";                           Key = "LINKAI_KEY" }
+    "1" = @{ Provider = "DeepSeek";                 Default = "deepseek-v4-flash";                      Key = "DEEPSEEK_KEY" }
+    "2" = @{ Provider = "MiniMax";                  Default = "MiniMax-M2.7";                           Key = "MINIMAX_KEY" }
+    "3" = @{ Provider = "Zhipu AI";                 Default = "glm-5.1";                                Key = "ZHIPU_KEY" }
+    "4" = @{ Provider = "Kimi (Moonshot)";          Default = "kimi-k2.6";                              Key = "MOONSHOT_KEY" }
+    "5" = @{ Provider = "Doubao (Volcengine Ark)";  Default = "doubao-seed-2-0-code-preview-260215";    Key = "ARK_KEY" }
+    "6" = @{ Provider = "Qwen (DashScope)";         Default = "qwen3.6-plus";                           Key = "DASHSCOPE_KEY" }
+    "7" = @{ Provider = "Claude";                   Default = "claude-sonnet-4-6";                      Key = "CLAUDE_KEY";  Base = "https://api.anthropic.com/v1" }
+    "8" = @{ Provider = "Gemini";                   Default = "gemini-3.1-pro-preview";                 Key = "GEMINI_KEY";  Base = "https://generativelanguage.googleapis.com" }
+    "9" = @{ Provider = "OpenAI GPT";               Default = "gpt-5.4";                                Key = "OPENAI_KEY";  Base = "https://api.openai.com/v1" }
+    "10" = @{ Provider = "LinkAI";                  Default = "deepseek-v4-flash";                      Key = "LINKAI_KEY" }
 }

 function Select-Model {
    Write-Info "========================================="
    Write-Info "   Select AI Model"
    Write-Info "========================================="
-    Write-Host "1) MiniMax (MiniMax-M2.7, MiniMax-M2.5, etc.)"
-    Write-Host "2) Zhipu AI (glm-5-turbo, glm-5, etc.)"
-    Write-Host "3) Kimi (kimi-k2.5, kimi-k2, etc.)"
-    Write-Host "4) Doubao (doubao-seed-2-0-code-preview-260215, etc.)"
-    Write-Host "5) Qwen (qwen3.6-plus, qwen3.5-plus, qwen3-max, qwq-plus, etc.)"
-    Write-Host "6) Claude (claude-sonnet-4-6, claude-opus-4-6, etc.)"
-    Write-Host "7) Gemini (gemini-3.1-flash-lite-preview, gemini-3.1-pro-preview, etc.)"
-    Write-Host "8) OpenAI GPT (gpt-5.4, gpt-5.2, gpt-4.1, etc.)"
-    Write-Host "9) LinkAI (access multiple models via one API)"
+    Write-Host "1) DeepSeek (deepseek-v4-flash, deepseek-v4-pro, etc.)"
+    Write-Host "2) MiniMax (MiniMax-M2.7, MiniMax-M2.5, etc.)"
+    Write-Host "3) Zhipu AI (glm-5.1, glm-5-turbo, glm-5, etc.)"
+    Write-Host "4) Kimi (kimi-k2.6, kimi-k2.5, kimi-k2, etc.)"
+    Write-Host "5) Doubao (doubao-seed-2-0-code-preview-260215, etc.)"
+    Write-Host "6) Qwen (qwen3.6-plus, qwen3.5-plus, qwen3-max, qwq-plus, etc.)"
+    Write-Host "7) Claude (claude-sonnet-4-6, claude-opus-4-6, etc.)"
+    Write-Host "8) Gemini (gemini-3.1-flash-lite-preview, gemini-3.1-pro-preview, etc.)"
+    Write-Host "9) OpenAI GPT (gpt-5.4, gpt-5.2, gpt-4.1, etc.)"
+    Write-Host "10) LinkAI (access multiple models via one API)"
    Write-Host ""

    do {
-        $choice = Read-Host "Enter your choice [default: 1 - MiniMax]"
+        $choice = Read-Host "Enter your choice [default: 1 - DeepSeek]"
        if (-not $choice) { $choice = "1" }
-    } while ($choice -notmatch '^[1-9]$')
+    } while ($choice -notmatch '^([1-9]|10)$')

    $m = $ModelChoices[$choice]
    Write-Cow "Configuring $($m.Provider)..."
@@ -208,7 +210,7 @@ function Select-Model {
    if (-not $model) { $model = $m.Default }
    $script:ModelName = $model
    $script:KeyName   = $m.Key
-    $script:UseLinkai = ($choice -eq "9")
+    $script:UseLinkai = ($choice -eq "10")

    if ($m.Base) {
        $base = Read-Host "Enter API Base URL [default: $($m.Base)]"
@@ -302,6 +304,8 @@ function New-ConfigFile {
        ark_api_key               = ""
        dashscope_api_key         = ""
        minimax_api_key           = ""
+        deepseek_api_key          = ""
+        deepseek_api_base         = "https://api.deepseek.com/v1"
        voice_to_text             = "openai"
        text_to_voice             = "openai"
        voice_reply_voice         = $false
@@ -326,6 +330,7 @@ function New-ConfigFile {
        ARK_KEY      = "ark_api_key"
        DASHSCOPE_KEY = "dashscope_api_key"
        MINIMAX_KEY  = "minimax_api_key"
+        DEEPSEEK_KEY = "deepseek_api_key"
        LINKAI_KEY   = "linkai_api_key"
    }
    if ($keyMap.ContainsKey($KeyName)) {
@@ -334,9 +339,9 @@ function New-ConfigFile {

    # Set API base if provided
    $baseMap = @{
-        "6" = "claude_api_base"
-        "7" = "gemini_api_base"
-        "8" = "open_ai_api_base"
+        "7" = "claude_api_base"
+        "8" = "gemini_api_base"
+        "9" = "open_ai_api_base"
    }
    if ($ApiBase -and $baseMap.ContainsKey($ModelChoice)) {
        $config[$baseMap[$ModelChoice]] = $ApiBase
--- a/skills/image-generation/SKILL.md
+++ b/skills/image-generation/SKILL.md
@@ -0,0 +1,117 @@
+---
+name: image-generation
+description: Generate or edit images from text prompts. Use when the user asks to create, draw, design, or edit an image, illustration, photo, icon, poster, or any visual content.
+metadata:
+  cowagent:
+    requires:
+      anyEnv:
+        - OPENAI_API_KEY
+        - GEMINI_API_KEY
+        - ARK_API_KEY
+        - DASHSCOPE_API_KEY
+        - MINIMAX_API_KEY
+        - LINKAI_API_KEY
+---
+
+# Image Generation
+
+Generate and edit images using AI models. The script automatically picks a backend based on which API keys are configured — **you don't need to specify a model unless the user explicitly names one**.
+
+Supported models (passed via `model` only when the user asks for a specific one):
+
+- **OpenAI** — `gpt-image-2`, `gpt-image-1`
+- **Gemini Nano Banana** — `nano-banana-2`, `nano-banana-pro`, `nano-banana`
+- **Seedream (Volcengine Ark)** — `seedream-5.0-lite`, `seedream-4.5`
+- **Qwen (DashScope)** — `qwen-image-2.0`, `qwen-image-2.0-pro`
+- **MiniMax** — `image-01`
+
+## Usage
+
+Run `scripts/generate.py` with a JSON argument. The path is relative to this skill's `base_dir`.
+
+```bash
+python <base_dir>/scripts/generate.py '<json_args>'
+```
+
+**Set bash timeout to at least 600 seconds**, as image generation can take 30–200s per provider, and the script may try multiple providers sequentially.
+
+### Parameters
+
+| Parameter | Type | Required | Default | Description |
+|-----------|------|----------|---------|-------------|
+| `prompt` | string | yes | — | Image description |
+| `image_url` | string / list | no | null | Input image(s) for editing: local file path or URL. Multi-image fusion is supported (pass a list) |
+| `quality` | string | no | auto | `low` / `medium` / `high` (only some backends honour this) |
+| `size` | string | no | auto | `512` / `1K` / `2K` / `3K` / `4K`, or pixel value (`1024x1024`) |
+| `aspect_ratio` | string | no | null | `1:1` / `3:2` / `2:3` / `16:9` / `9:16` / `21:9` (some backends also support extreme ratios like `1:4` / `8:1`) |
+
+**Higher `quality` and larger `size` cost more and run slower.** In normal cases, when the user does not explicitly specify, `low` or `medium` is sufficient. Only use `high` when the user asks for it.
+
+### Example — generate
+
+```bash
+python <base_dir>/scripts/generate.py '{"prompt": "A corgi astronaut floating in space"}'
+```
+
+With aspect ratio:
+
+```bash
+python <base_dir>/scripts/generate.py '{"prompt": "Isometric miniature city of Shanghai at sunset", "size": "2K", "aspect_ratio": "16:9"}'
+```
+
+### Important: Editing vs Generating
+
+When the user asks to **edit, modify, or improve an existing image**, pass the original image via `image_url`. Prefer **local file paths** directly — the script handles file reading internally. Without `image_url`, the script generates a brand-new image instead of editing.
+
+### Example — edit (image-to-image)
+
+```bash
+python <base_dir>/scripts/generate.py '{"prompt": "Add a Santa hat to the dog", "image_url": "/path/to/dog.png"}'
+```
+
+Multi-image fusion — pass a list:
+
+```bash
+python <base_dir>/scripts/generate.py '{"prompt": "Combine these characters into a group photo", "image_url": ["/path/a.png", "/path/b.png"]}'
+```
+
+### Output
+
+Prints JSON to stdout:
+
+```json
+{
+  "model": "doubao-seedream-5-0-260128",
+  "images": [
+    {"url": "/path/to/output.png"}
+  ]
+}
+```
+
+After success, display the image to the user. You can either embed it in markdown (`![description](/path/to/output.png)`) or use the `send` tool.
+
+On error:
+
+```json
+{
+  "error": "error message"
+}
+```
+
+### Setup
+
+The script needs **at least one** of these API keys (set via `env_config` or `config.json`):
+
+`OPENAI_API_KEY` / `GEMINI_API_KEY` / `ARK_API_KEY` / `DASHSCOPE_API_KEY` / `MINIMAX_API_KEY` / `LINKAI_API_KEY`
+
+Each also has an optional `*_API_BASE` for custom endpoints. The script automatically picks the first configured backend and falls back to the next if it fails — no need to specify a model.
+
+### Error Handling
+
+If the script returns an error after trying all configured backends, **do NOT retry with the same parameters** — the failure is almost always a configuration issue (wrong API key, unsupported API base). Tell the user to fix it via `env_config`, then retry.
+
+### Notes
+
+- HTTP timeout is 300s — high-resolution generation can take over 200s.
+- Omit `quality` / `size` to let the model pick automatically (`auto`).
+- Input images for editing are auto-compressed to ≤ 4MB / longest edge ≤ 4096px.
--- a/skills/image-generation/scripts/generate.py
+++ b/skills/image-generation/scripts/generate.py
--- a/skills/knowledge-wiki/SKILL.md
+++ b/skills/knowledge-wiki/SKILL.md
@@ -40,6 +40,8 @@ Maintain a persistent, structured knowledge base in the `knowledge/` directory.
 ```markdown
 # Page Title

+> Source: <URL or description of the original material>
+
 Content here. Cross-reference related pages with markdown links:
 [Related Page](../category/related-page.md)

@@ -53,6 +55,8 @@ Content here. Cross-reference related pages with markdown links:
 - [Page B](../category/page-b.md) — how it relates
 ```

+The `> Source:` line records where the knowledge came from (URL, document name, conversation, etc.). Always include it when the material originates from a specific source.
+
 Cross-references build a knowledge graph. When creating or updating a page, link to related pages and update those pages to link back. **Only link to pages that already exist** — if a concept deserves its own page, create it first, then add the link.

 ## Index Format (`knowledge/index.md`)
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
zhayujie	5c65196e44	feat(web): hint API base version path in config placeholder	2026-04-26 17:10:24 +08:00
zhayujie	f5798bfe90	fix: remove unnecessary API Base URL in run scripts	2026-04-26 16:29:08 +08:00
zhayujie	0e556b3468	feat: switch default model to deepseek-v4-flash	2026-04-26 15:54:50 +08:00
zhayujie	31820f56e7	fix(deepseek): back-fill reasoning_content for all assistant turns	2026-04-24 16:39:48 +08:00
zhayujie	fd88828abd	fix(models): unify enable_thinking for deepseek-v4	2026-04-24 15:29:43 +08:00
zhayujie	ae11159918	feat(models): unify enable_thinking for deepseek-v4 and other thinking models	2026-04-24 15:22:45 +08:00
zhayujie	472a8605c0	feat(models): support deepseek-v4-pro and deepseek-v4-flash	2026-04-24 11:35:38 +08:00
zhayujie	e1760ba211	feat: release 2.0.7 version	2026-04-23 18:13:53 +08:00
zhayujie	ce4c0a0aa4	feat: release 2.0.7	2026-04-23 17:18:19 +08:00
zhayujie	64511593c4	feat: release 2.0.7	2026-04-23 17:16:17 +08:00
zhayujie	b0e00dfceb	feat: support glm-5.1	2026-04-23 16:43:05 +08:00
zhayujie	fc465b463d	feat: support kimi coding plan by temporary solution	2026-04-23 16:24:37 +08:00
zhayujie	68ce2e5232	feat(skill): multi-provider image generation with auto-fallback - Add Gemini, Seedream (Volcengine Ark), Qwen (DashScope), MiniMax providers to image-generation skill with universal sequential fallback: OpenAI → Gemini → Seedream → Qwen → MiniMax → LinkAI - Each provider filters unsupported size tiers to valid values (e.g. Seedream 1K→2K, Qwen 3K→2K, Gemini 3K→2K) - Pinned model only tries its native provider; auto-routing uses each provider's default model - Support skill-namespaced config (config.skill.image-generation.model → SKILL_IMAGE_GENERATION_MODEL env var) - Add image lightbox (click-to-enlarge) in web console - Add docs for built-in skills (skill-creator, knowledge-wiki, image-generation) under docs/skills/	2026-04-23 12:39:39 +08:00
zhayujie	81e8bb62ae	feat(skill): support gpt-image-2 in image generation skill	2026-04-22 20:39:49 +08:00
zhayujie	2c13e1b923	feat(models): support kimi-k2.6	2026-04-22 12:01:40 +08:00
zhayujie	a0748c2e3b	fix(web): cap reasoning content to 4KB across stream/storage/display	2026-04-21 20:31:38 +08:00
zhayujie	40599bb751	fix(web): smart auto-scroll for chat #2775	2026-04-20 21:43:21 +08:00
zhayujie	f3c64ceea7	fix: refresh skill manager on /skill	2026-04-19 19:50:16 +08:00
zhayujie	15c60de709	fix: improve skill installation to support multiple source formats and ensure target directory	2026-04-19 19:05:51 +08:00
zhayujie	6dd316547f	fix(web): fix session title generation fallback and reset Bridge on config change	2026-04-19 18:43:48 +08:00
zhayujie	54c7676a44	docs: update architecture diagram	2026-04-18 23:08:36 +08:00
zhayujie	d25b8966ce	fix(web): prevent duplicate image previews	2026-04-18 22:32:34 +08:00
zhayujie	14a119c48c	fix(gemini): solving the problem of tool call not returnings	2026-04-18 21:18:27 +08:00
zhayujie	c82515a927	fix(agent): don't drop tool_calls from empty-response retry	2026-04-18 20:50:40 +08:00
zhayujie	26e630c2dd	feat(cli): /config support set enable_thinking	2026-04-17 16:09:43 +08:00
zhayujie	13370d2056	fix: thinking display is disabled by default	2026-04-17 15:31:59 +08:00
zhayujie	35282db9e0	feat(models): support claude-opus-4-7	2026-04-16 23:24:16 +08:00
zhayujie	426fb88ce7	fix(knowledge): exclude root-level files from knowledge stats to preserve empty state	2026-04-16 22:55:46 +08:00
zhayujie	2384bd0e10	fix: update CI workflows for repo rename and add latest tag	2026-04-16 21:57:20 +08:00
zhayujie	ba3f66d3d1	feat: show root-level files (index.md, log.md) in knowledge tree	2026-04-16 21:47:44 +08:00
zhayujie	7293a0f670	fix: modify repo name in github workflow	2026-04-16 21:38:58 +08:00
zhayujie	9e86d46267	fix: sync env vars when updating config in docker env	2026-04-16 21:32:07 +08:00
zhayujie	848430f062	feat(knowledge): support nested directories in knowledge base listing and display	2026-04-16 12:28:18 +08:00
zhayujie	abd21335c4	Merge pull request #2772 from 6vision/master fix: bot_type change notification never shown after model switch	2026-04-16 10:43:41 +08:00
6vision	8fa95f058a	fix: bot_type change notification never shown after model switch Made-with: Cursor	2026-04-15 21:48:50 +08:00
zhayujie	d4e5ecd497	fix: compatible with Python 3.7 by deferring Literal import in truncate.py	2026-04-15 12:29:09 +08:00
zhayujie	3830f76729	feat: add custom model provider	2026-04-15 12:26:05 +08:00
zhayujie	83f778fec9	feat(dream): structured organization of dream memories	2026-04-15 11:27:46 +08:00
zhayujie	cabd24605f	fix: add random jitter to daily dream schedule	2026-04-15 00:33:33 +08:00
zhayujie	ae20ba1148	Merge branch 'master' of github.com:zhayujie/chatgpt-on-wechat	2026-04-14 22:58:59 +08:00
zhayujie	3a50b64977	feat: web multi session interface	2026-04-14 22:58:25 +08:00
zhayujie	8692e74536	fix(web): hide session panel by default on mobile and support overlay dismiss	2026-04-14 21:09:01 +08:00
zhayujie	1c18bd9889	docs(memory): update long-term memory docs	2026-04-14 17:14:28 +08:00
@@ -1 +1 @@
 .0.6
 .0.7