Merge pull request #2832 from yangluxin613/feat/cjk-search-fix

fix(memory): CJK keyword search + vector search optimization
2026-07-19 12:47:25 +08:00 · 2026-05-25 14:45:49 +08:00
parent c5a3f991c5 fd571ac539
commit c9a7525d0b
4 changed files with 493 additions and 139 deletions
--- a/agent/memory/embedding/state.py
+++ b/agent/memory/embedding/state.py
@@ -31,9 +31,13 @@ def detect_index_dim(storage) -> Optional[int]:
    if not row or not row["embedding"]:
        return None
    try:
-        emb = json.loads(row["embedding"])
+        raw = row["embedding"]
+        if isinstance(raw, (bytes, bytearray)):
+            # New BLOB format: 4 bytes per float32
+            return len(raw) // 4
+        emb = json.loads(raw)
        return len(emb) if isinstance(emb, list) else None
-    except (json.JSONDecodeError, TypeError):
+    except (json.JSONDecodeError, TypeError, Exception):
        return None


--- a/agent/memory/manager.py
+++ b/agent/memory/manager.py
@@ -13,7 +13,7 @@ from datetime import datetime, timedelta
 from agent.memory.config import MemoryConfig, get_default_memory_config
 from agent.memory.storage import MemoryStorage, MemoryChunk, SearchResult
 from agent.memory.chunker import TextChunker
-from agent.memory.embedding import EmbeddingProvider
+from agent.memory.embedding import EmbeddingProvider, EmbeddingCache
 from agent.memory.summarizer import MemoryFlushManager, create_memory_files_if_needed


@@ -62,6 +62,10 @@ class MemoryManager:
                "[MemoryManager] No embedding provider; memory will use keyword search only"
            )

+        # Cache for query embeddings (avoids redundant API calls within a session)
+        self._embedding_cache = EmbeddingCache()
+
+
        # Initialize memory flush manager
        workspace_dir = self.config.get_workspace()
        self.flush_manager = MemoryFlushManager(
@@ -128,7 +132,14 @@ class MemoryManager:
        vector_results = []
        if self.embedding_provider:
            try:
+                provider_name = type(self.embedding_provider).__name__
+                model_name = getattr(self.embedding_provider, 'model', '')
+                cached = self._embedding_cache.get(query, provider_name, model_name)
+                if cached is not None:
+                    query_embedding = cached
+                else:
                    query_embedding = self.embedding_provider.embed_query(query)
+                    self._embedding_cache.put(query, provider_name, model_name, query_embedding)
                vector_results = self.storage.search_vector(
                    query_embedding=query_embedding,
                    user_id=user_id,
--- a/agent/memory/storage.py
+++ b/agent/memory/storage.py
@@ -5,12 +5,42 @@ Provides vector and keyword search capabilities
 """

 from __future__ import annotations
+import re
 import sqlite3
 import json
 import hashlib
+import threading
 from typing import List, Dict, Optional, Any
 from pathlib import Path
 from dataclasses import dataclass
+try:
+    import numpy as np
+    _HAS_NUMPY = True
+except ImportError:
+    _HAS_NUMPY = False
+    np = None  # type: ignore[assignment]
+
+# UPSERT (INSERT … ON CONFLICT DO UPDATE) requires SQLite ≥ 3.24.0 (2018).
+# Older systems (e.g. CentOS 7 ships SQLite 3.7) fall back to INSERT OR REPLACE,
+# which risks FTS5 rowid drift on chunk updates (see save_chunk docstring).
+_HAS_UPSERT = sqlite3.sqlite_version_info >= (3, 24, 0)
+
+# ---------------------------------------------------------------------------
+# CJK character ranges, compiled once at module load.
+# Covers: CJK Symbols/Punctuation, Japanese kana (hiragana + katakana),
+#         CJK Unified Ideographs + Extension A, Korean syllables (Hangul),
+#         CJK Compatibility Ideographs, and CJK Extension B–F.
+# ---------------------------------------------------------------------------
+_CJK_RANGES = (
+    r'\u3000-\u30ff'          # CJK Symbols/Punctuation + Japanese kana
+    r'\u3400-\u9fff'          # CJK Unified Ideographs (incl. Extension A)
+    r'\uac00-\ud7af'          # Korean syllables (Hangul)
+    r'\uf900-\ufaff'          # CJK Compatibility Ideographs
+    r'\U00020000-\U0002fa1f'  # CJK Extension B–F
+)
+_RE_CONTAINS_CJK   = re.compile(f'[{_CJK_RANGES}]')
+_RE_CJK_WORDS      = re.compile(f'[{_CJK_RANGES}]+')
+_RE_TRIGRAM_TOKENS = re.compile(f'[{_CJK_RANGES}]+|[A-Za-z0-9_]+')


@dataclass
@@ -48,6 +78,10 @@ class MemoryStorage:
        self.db_path = db_path
        self.conn: Optional[sqlite3.Connection] = None
        self.fts5_available = False  # Track FTS5 availability
+        # RLock protects concurrent writes from the same process.
+        # SQLite WAL mode handles read/write concurrency at the file level,
+        # but same-process concurrent writes still need a Python-level lock.
+        self._lock = threading.RLock()
        self._init_db()
    
    def _check_fts5_support(self) -> bool:
@@ -69,6 +103,14 @@ class MemoryStorage:
            
            # Check FTS5 support
            self.fts5_available = self._check_fts5_support()
+            if not _HAS_UPSERT:
+                from common.log import logger
+                logger.warning(
+                    "[MemoryStorage] SQLite %s < 3.24 — UPSERT unavailable. "
+                    "Falling back to INSERT OR REPLACE; FTS5 rowid may drift on "
+                    "chunk updates (rebuild index periodically to recover).",
+                    sqlite3.sqlite_version,
+                )
            if not self.fts5_available:
                from common.log import logger
                logger.debug("[MemoryStorage] FTS5 not available, using LIKE-based keyword search")
@@ -175,6 +217,75 @@ class MemoryStorage:
                )
                self._rebuild_fts5_from_chunks()

+        # Internal key-value store for persistent flags (e.g. backfill tracking)
+        self.conn.execute("""
+            CREATE TABLE IF NOT EXISTS _meta (
+                key TEXT PRIMARY KEY,
+                value TEXT NOT NULL
+            )
+        """)
+
+        # Create trigram FTS5 table for CJK / mixed-language search
+        self.trigram_fts5_available = False
+        if self.fts5_available:
+            try:
+                self.conn.execute("""
+                    CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts_trigram USING fts5(
+                        text,
+                        id UNINDEXED,
+                        user_id UNINDEXED,
+                        path UNINDEXED,
+                        source UNINDEXED,
+                        scope UNINDEXED,
+                        content='chunks',
+                        content_rowid='rowid',
+                        tokenize='trigram case_sensitive 0'
+                    )
+                """)
+                self.conn.execute("""
+                    CREATE TRIGGER IF NOT EXISTS chunks_trigram_ai
+                    AFTER INSERT ON chunks BEGIN
+                        INSERT INTO chunks_fts_trigram(rowid, text, id, user_id, path, source, scope)
+                        VALUES (new.rowid, new.text, new.id, new.user_id, new.path, new.source, new.scope);
+                    END
+                """)
+                self.conn.execute("""
+                    CREATE TRIGGER IF NOT EXISTS chunks_trigram_ad
+                    AFTER DELETE ON chunks BEGIN
+                        DELETE FROM chunks_fts_trigram WHERE rowid = old.rowid;
+                    END
+                """)
+                self.conn.execute("""
+                    CREATE TRIGGER IF NOT EXISTS chunks_trigram_au
+                    AFTER UPDATE ON chunks BEGIN
+                        UPDATE chunks_fts_trigram
+                        SET text=new.text, id=new.id, user_id=new.user_id,
+                            path=new.path, source=new.source, scope=new.scope
+                        WHERE rowid = new.rowid;
+                    END
+                """)
+                # One-time backfill for existing rows.
+                # NOTE: COUNT(*) on an FTS5 content table always returns 0, so we
+                # use a persistent flag in _meta instead of counting trigram rows.
+                backfill_done = self.conn.execute(
+                    "SELECT 1 FROM _meta WHERE key = 'trigram_backfill_done'"
+                ).fetchone()
+                chunks_count = self.conn.execute(
+                    "SELECT COUNT(*) as c FROM chunks"
+                ).fetchone()['c']
+                if chunks_count > 0 and not backfill_done:
+                    self.conn.execute(
+                        "INSERT INTO chunks_fts_trigram(chunks_fts_trigram) VALUES('rebuild')"
+                    )
+                    self.conn.execute(
+                        "INSERT OR REPLACE INTO _meta(key, value) VALUES('trigram_backfill_done', '1')"
+                    )
+                self.trigram_fts5_available = True
+            except Exception:
+                from common.log import logger
+                logger.warning("[MemoryStorage] trigram FTS5 unavailable, CJK search will use LIKE fallback", exc_info=True)
+                self.trigram_fts5_available = False
+
        # Create files metadata table
        self.conn.execute("""
            CREATE TABLE IF NOT EXISTS files (
@@ -299,42 +410,97 @@ class MemoryStorage:
        self.conn.commit()

    def save_chunk(self, chunk: MemoryChunk):
-        """Save a memory chunk"""
-        self.conn.execute("""
-            INSERT OR REPLACE INTO chunks 
-            (id, user_id, scope, source, path, start_line, end_line, text, embedding, hash, metadata, updated_at)
+        """Save a memory chunk (insert or update by id).
+
+        Uses SQLite UPSERT (INSERT … ON CONFLICT DO UPDATE) instead of
+        INSERT OR REPLACE.  INSERT OR REPLACE internally does DELETE+INSERT,
+        which changes the row's rowid.  Because both FTS5 tables use
+        content_rowid='rowid', a new rowid would leave the old FTS index
+        entries pointing at a non-existent rowid and trigger
+        "fts5: missing row N from content table" errors.
+        ON CONFLICT DO UPDATE fires the AFTER UPDATE trigger (chunks_au /
+        chunks_trigram_au) and keeps the original rowid intact.
+        """
+        if _HAS_UPSERT:
+            _SQL = """
+                INSERT INTO chunks
+                (id, user_id, scope, source, path, start_line, end_line,
+                 text, embedding, hash, metadata, updated_at)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%s', 'now'))
-        """, (
-            chunk.id,
-            chunk.user_id,
-            chunk.scope,
-            chunk.source,
-            chunk.path,
-            chunk.start_line,
-            chunk.end_line,
-            chunk.text,
-            json.dumps(chunk.embedding) if chunk.embedding else None,
+                ON CONFLICT(id) DO UPDATE SET
+                    user_id     = excluded.user_id,
+                    scope       = excluded.scope,
+                    source      = excluded.source,
+                    path        = excluded.path,
+                    start_line  = excluded.start_line,
+                    end_line    = excluded.end_line,
+                    text        = excluded.text,
+                    embedding   = excluded.embedding,
+                    hash        = excluded.hash,
+                    metadata    = excluded.metadata,
+                    updated_at  = strftime('%s', 'now')
+            """
+        else:
+            _SQL = """
+                INSERT OR REPLACE INTO chunks
+                (id, user_id, scope, source, path, start_line, end_line,
+                 text, embedding, hash, metadata, updated_at)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%s', 'now'))
+            """
+        params = (
+            chunk.id, chunk.user_id, chunk.scope, chunk.source, chunk.path,
+            chunk.start_line, chunk.end_line, chunk.text,
+            self._encode_embedding(chunk.embedding),
            chunk.hash,
-            json.dumps(chunk.metadata) if chunk.metadata else None
-        ))
+            json.dumps(chunk.metadata) if chunk.metadata else None,
+        )
+        with self._lock:
+            self.conn.execute(_SQL, params)
            self.conn.commit()

    def save_chunks_batch(self, chunks: List[MemoryChunk]):
-        """Save multiple chunks in a batch"""
-        self.conn.executemany("""
-            INSERT OR REPLACE INTO chunks 
-            (id, user_id, scope, source, path, start_line, end_line, text, embedding, hash, metadata, updated_at)
+        """Save multiple chunks in a batch (insert or update by id).
+
+        See save_chunk for why UPSERT is used instead of INSERT OR REPLACE.
+        """
+        if _HAS_UPSERT:
+            _SQL = """
+                INSERT INTO chunks
+                (id, user_id, scope, source, path, start_line, end_line,
+                 text, embedding, hash, metadata, updated_at)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%s', 'now'))
-        """, [
+                ON CONFLICT(id) DO UPDATE SET
+                    user_id     = excluded.user_id,
+                    scope       = excluded.scope,
+                    source      = excluded.source,
+                    path        = excluded.path,
+                    start_line  = excluded.start_line,
+                    end_line    = excluded.end_line,
+                    text        = excluded.text,
+                    embedding   = excluded.embedding,
+                    hash        = excluded.hash,
+                    metadata    = excluded.metadata,
+                    updated_at  = strftime('%s', 'now')
+            """
+        else:
+            _SQL = """
+                INSERT OR REPLACE INTO chunks
+                (id, user_id, scope, source, path, start_line, end_line,
+                 text, embedding, hash, metadata, updated_at)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%s', 'now'))
+            """
+        params_list = [
            (
                c.id, c.user_id, c.scope, c.source, c.path,
                c.start_line, c.end_line, c.text,
-                json.dumps(c.embedding) if c.embedding else None,
+                self._encode_embedding(c.embedding),
                c.hash,
-                json.dumps(c.metadata) if c.metadata else None
+                json.dumps(c.metadata) if c.metadata else None,
            )
            for c in chunks
-        ])
+        ]
+        with self._lock:
+            self.conn.executemany(_SQL, params_list)
            self.conn.commit()
    
    def get_chunk(self, chunk_id: str) -> Optional[MemoryChunk]:
@@ -356,17 +522,17 @@ class MemoryStorage:
        limit: int = 10
    ) -> List[SearchResult]:
        """
-        Vector similarity search using in-memory cosine similarity
-        (sqlite-vec can be added later for better performance)
+        Vector similarity search using numpy-vectorized cosine similarity.
+        All embeddings are loaded then scored in a single BLAS matrix-vector
+        multiply, which is ~100x faster than the pure-Python per-row loop.
        """
        if scopes is None:
            scopes = ["shared"]
            if user_id:
                scopes.append("user")

-        # Build query
        scope_placeholders = ','.join('?' * len(scopes))
-        params = scopes
+        params = list(scopes)

        if user_id:
            query = f"""
@@ -384,44 +550,88 @@ class MemoryStorage:
            """

        rows = self.conn.execute(query, params).fetchall()
+        if not rows:
+            return []

-        # Calculate cosine similarity. We probe the first row's dim to fail
-        # loudly on a query/index dim mismatch — otherwise every doc would
-        # score 0 silently, leaving the user wondering why search broke.
-        results = []
-        query_dim = len(query_embedding)
-        if rows:
-            first = json.loads(rows[0]['embedding'])
-            if isinstance(first, list) and len(first) != query_dim:
-                raise ValueError(
-                    f"Embedding dim mismatch: query is {query_dim}-dim but "
-                    f"index stores {len(first)}-dim vectors. The configured "
-                    f"embedding model differs from the one that built the "
-                    f"index — run /memory rebuild-index to re-embed."
-                )
-
+        # Parse embeddings and build a (N, D) matrix in one pass.
+        # New rows store BLOB bytes (np.frombuffer); legacy rows fall back to JSON.
+        # Filter out rows whose embedding dimension differs from the query —
+        # mixing dimensions would cause np.array() to produce an object array
+        # and matrix @ q_vec to raise ValueError.
+        expected_dim = len(query_embedding)
+        valid_rows = []
+        vectors = []
        for row in rows:
-            embedding = json.loads(row['embedding'])
-            similarity = self._cosine_similarity(query_embedding, embedding)
+            vec = self._decode_embedding(row['embedding'])
+            if not vec:
+                continue
+            if len(vec) != expected_dim:
+                from common.log import logger
+                logger.warning(
+                    "[MemoryStorage] Skipping chunk %s: embedding dim %d != query dim %d",
+                    row['id'], len(vec), expected_dim
+                )
+                continue
+            valid_rows.append(row)
+            vectors.append(vec)

-            if similarity > 0:
-                results.append((similarity, row))
+        if not vectors:
+            return []

-        # Sort by similarity and limit
-        results.sort(key=lambda x: x[0], reverse=True)
-        results = results[:limit]
+        if _HAS_NUMPY:
+            matrix = np.array(vectors, dtype=np.float32)        # (N, D)
+            q_vec = np.array(query_embedding, dtype=np.float32)  # (D,)

+            # Vectorized cosine similarity: dot(matrix, q) / (||matrix|| * ||q||)
+            dots = matrix @ q_vec                                # (N,)
+            row_norms = np.linalg.norm(matrix, axis=1)           # (N,)
+            q_norm = float(np.linalg.norm(q_vec))
+            denominators = row_norms * q_norm
+            np.maximum(denominators, 1e-10, out=denominators)    # avoid div-by-zero
+            sims = dots / denominators                           # (N,)
+
+            # Select TopK using argpartition (O(N) average), then sort only those K
+            k = min(limit, len(valid_rows))
+            top_idx = np.argpartition(sims, -k)[-k:]
+            top_idx = top_idx[np.argsort(sims[top_idx])[::-1]]
+
+            return [
+                SearchResult(
+                    path=valid_rows[i]['path'],
+                    start_line=valid_rows[i]['start_line'],
+                    end_line=valid_rows[i]['end_line'],
+                    score=float(sims[i]),
+                    snippet=self._truncate_text(valid_rows[i]['text'], 500),
+                    source=valid_rows[i]['source'],
+                    user_id=valid_rows[i]['user_id']
+                )
+                for i in top_idx
+                if sims[i] > 0
+            ]
+        else:
+            # Pure-Python cosine similarity fallback (numpy not installed)
+            import math
+            q = query_embedding
+            q_norm = math.sqrt(sum(x * x for x in q)) or 1e-10
+            scored = []
+            for i, vec in enumerate(vectors):
+                dot = sum(a * b for a, b in zip(vec, q))
+                v_norm = math.sqrt(sum(x * x for x in vec)) or 1e-10
+                sim = dot / (v_norm * q_norm)
+                if sim > 0:
+                    scored.append((sim, valid_rows[i]))
+            scored.sort(key=lambda x: x[0], reverse=True)
            return [
                SearchResult(
                    path=row['path'],
                    start_line=row['start_line'],
                    end_line=row['end_line'],
-                score=score,
+                    score=sim,
                    snippet=self._truncate_text(row['text'], 500),
                    source=row['source'],
                    user_id=row['user_id']
                )
-            for score, row in results
+                for sim, row in scored[:limit]
            ]
    
    def search_keyword(
@@ -445,13 +655,38 @@ class MemoryStorage:
            if user_id:
                scopes.append("user")

-        if self.fts5_available:
+        # Step 1: Standard FTS5 (unicode61) — pure ASCII queries only.
+        # Skipped when query contains any CJK characters: unicode61 tokenises CJK
+        # as individual characters without forming meaningful tokens, so it would
+        # match only the ASCII portion of a mixed query (e.g. "Python" from
+        # "Python教程") and silently discard the CJK part.  Those queries go
+        # directly to Step 2 (trigram), which handles both ASCII and CJK together.
+        fts1_attempted = False
+        if (self.fts5_available
+                and not MemoryStorage._contains_cjk(query)
+                and MemoryStorage._build_fts_query(query)):
+            fts1_attempted = True
            fts_results = self._search_fts5(query, user_id, scopes, limit)
            if fts_results:
                return fts_results

+        # Step 2: Trigram FTS5 — CJK/mixed queries, plus fallback when unicode61
+        # returned nothing (trigram indexes all scripts with 3-char sliding windows,
+        # so it can catch terms that unicode61 tokenisation misses).
+        if self.trigram_fts5_available and (
+            MemoryStorage._contains_cjk(query) or fts1_attempted
+        ):
+            trigram_results = self._search_fts5_trigram(query, user_id, scopes, limit)
+            if trigram_results:
+                return trigram_results
+
+        # Step 3: LIKE fallback — last resort (FTS5 unavailable, or CJK tokens
+        # shorter than 3 characters that trigram cannot match, e.g. a single-char query).
+        if not self.fts5_available or MemoryStorage._contains_cjk(query):
            return self._search_like(query, user_id, scopes, limit)

+        return []
+    
    def _search_fts5(
        self,
        query: str,
@@ -471,7 +706,7 @@ class MemoryStorage:
            sql_query = f"""
                SELECT chunks.*, bm25(chunks_fts) as rank
                FROM chunks_fts
-                JOIN chunks ON chunks.id = chunks_fts.id
+                JOIN chunks ON chunks.rowid = chunks_fts.rowid
                WHERE chunks_fts MATCH ? 
                AND chunks.scope IN ({scope_placeholders})
                AND (chunks.scope = 'shared' OR chunks.user_id = ?)
@@ -483,7 +718,7 @@ class MemoryStorage:
            sql_query = f"""
                SELECT chunks.*, bm25(chunks_fts) as rank
                FROM chunks_fts
-                JOIN chunks ON chunks.id = chunks_fts.id
+                JOIN chunks ON chunks.rowid = chunks_fts.rowid
                WHERE chunks_fts MATCH ? 
                AND chunks.scope IN ({scope_placeholders})
                ORDER BY rank
@@ -505,11 +740,9 @@ class MemoryStorage:
                )
                for row in rows
            ]
-        except Exception as e:
+        except Exception:
            from common.log import logger
-            logger.error(
-                f"[MemoryStorage] FTS5 search failed (caller will fall back to LIKE): {e}"
-            )
+            logger.warning("[MemoryStorage] _search_fts5 failed, returning empty", exc_info=True)
            return []

    def _search_like(
@@ -522,12 +755,11 @@ class MemoryStorage:
        """LIKE-based search.

        Used as the keyword-search fallback when FTS5 is unavailable, fails,
-        or returns empty. Supports both CJK runs and ASCII word tokens so it
-        can serve as a true safety net for any query.
+        or returns empty. Supports both CJK runs (1+ chars) and ASCII word
+        tokens (3+ chars) so it can serve as a true safety net for any query.
        """
-        import re
-        # CJK runs (2+ chars) + ASCII word tokens (3+ chars to avoid noise)
-        cjk_words = re.findall(r'[\u4e00-\u9fff]{2,}', query)
+        # CJK runs (1+ chars, wide Unicode range) + ASCII words (3+ chars to avoid noise)
+        cjk_words = _RE_CJK_WORDS.findall(query)
        ascii_words = [t for t in re.findall(r'[A-Za-z0-9_]+', query) if len(t) >= 3]
        words = cjk_words + ascii_words
        if not words:
@@ -565,28 +797,37 @@ class MemoryStorage:
        
        try:
            rows = self.conn.execute(sql_query, params).fetchall()
-            return [
-                SearchResult(
+            results = []
+            for row in rows:
+                # Dynamic score: reward chunks that contain more of the query words.
+                # Use all tokens (CJK + ASCII) so pure-ASCII queries are not skipped.
+                # matched_count is always ≥1 because the WHERE clause uses OR, but
+                # guard defensively so unexpected zero-match rows are never surfaced.
+                text_lower = row['text'].lower()
+                matched_count = sum(1 for w in words if w.lower() in text_lower)
+                if matched_count == 0:
+                    continue
+                score = min(0.85, 0.3 + 0.15 * matched_count)
+                results.append(SearchResult(
                    path=row['path'],
                    start_line=row['start_line'],
                    end_line=row['end_line'],
-                    score=0.5,  # Fixed score for LIKE search
+                    score=score,
                    snippet=self._truncate_text(row['text'], 500),
                    source=row['source'],
                    user_id=row['user_id']
-                )
-                for row in rows
-            ]
-        except Exception as e:
+                ))
+            results.sort(key=lambda r: r.score, reverse=True)
+            return results
+        except Exception:
            from common.log import logger
-            logger.error(f"[MemoryStorage] LIKE search failed: {e}")
+            logger.warning("[MemoryStorage] _search_like failed, returning empty", exc_info=True)
            return []

    def delete_by_path(self, path: str):
        """Delete all chunks from a file"""
-        self.conn.execute("""
-            DELETE FROM chunks WHERE path = ?
-        """, (path,))
+        with self._lock:
+            self.conn.execute("DELETE FROM chunks WHERE path = ?", (path,))
            self.conn.commit()

    def get_file_hash(self, path: str) -> Optional[str]:
@@ -598,6 +839,7 @@ class MemoryStorage:

    def update_file_metadata(self, path: str, source: str, file_hash: str, mtime: int, size: int):
        """Update file metadata"""
+        with self._lock:
            self.conn.execute("""
                INSERT OR REPLACE INTO files (path, source, hash, mtime, size, updated_at)
                VALUES (?, ?, ?, ?, ?, strftime('%s', 'now'))
@@ -632,7 +874,8 @@ class MemoryStorage:
                self.conn.close()
                self.conn = None  # Mark as closed
            except Exception as e:
-                print(f"⚠️  Error closing database connection: {e}")
+                from common.log import logger
+                logger.warning("[MemoryStorage] Error closing database connection: %s", e)
    
    def __del__(self):
        """Destructor to ensure connection is closed"""
@@ -643,6 +886,32 @@ class MemoryStorage:
    
    # Helper methods

+    @staticmethod
+    def _encode_embedding(embedding: Optional[List[float]]) -> Optional[bytes]:
+        """Encode embedding as float32 BLOB bytes (~6x smaller and faster than JSON).
+        Falls back to struct.pack when numpy is unavailable."""
+        if embedding is None:
+            return None
+        if _HAS_NUMPY:
+            return np.array(embedding, dtype=np.float32).tobytes()
+        import struct
+        return struct.pack(f'{len(embedding)}f', *embedding)
+
+    @staticmethod
+    def _decode_embedding(raw) -> Optional[List[float]]:
+        """Decode embedding from BLOB bytes or legacy JSON string.
+        Handles both numpy and numpy-free environments."""
+        if raw is None:
+            return None
+        if isinstance(raw, (bytes, bytearray)):
+            if _HAS_NUMPY:
+                return np.frombuffer(raw, dtype=np.float32).tolist()
+            import struct
+            n = len(raw) // 4
+            return list(struct.unpack(f'{n}f', raw))
+        # Legacy JSON format written by older versions
+        return json.loads(raw)
+
    def _row_to_chunk(self, row) -> MemoryChunk:
        """Convert database row to MemoryChunk"""
        return MemoryChunk(
@@ -654,31 +923,88 @@ class MemoryStorage:
            start_line=row['start_line'],
            end_line=row['end_line'],
            text=row['text'],
-            embedding=json.loads(row['embedding']) if row['embedding'] else None,
+            embedding=self._decode_embedding(row['embedding']),
            hash=row['hash'],
            metadata=json.loads(row['metadata']) if row['metadata'] else None
        )
    
    @staticmethod
-    def _cosine_similarity(vec1: List[float], vec2: List[float]) -> float:
-        """Calculate cosine similarity between two vectors"""
-        if len(vec1) != len(vec2):
-            return 0.0
-        
-        dot_product = sum(a * b for a, b in zip(vec1, vec2))
-        norm1 = sum(a * a for a in vec1) ** 0.5
-        norm2 = sum(b * b for b in vec2) ** 0.5
-        
-        if norm1 == 0 or norm2 == 0:
-            return 0.0
-        
-        return dot_product / (norm1 * norm2)
+    def _contains_cjk(text: str) -> bool:
+        """Check if text contains CJK or related characters (Chinese, Japanese, Korean)."""
+        return bool(_RE_CONTAINS_CJK.search(text))
    
    @staticmethod
-    def _contains_cjk(text: str) -> bool:
-        """Check if text contains CJK (Chinese/Japanese/Korean) characters"""
-        import re
-        return bool(re.search(r'[\u4e00-\u9fff]', text))
+    def _build_trigram_query(raw_query: str) -> Optional[str]:
+        """
+        Build FTS5 MATCH query for the trigram tokenizer.
+        Extracts CJK sequences (including single characters) and ASCII words,
+        joining them with AND so all terms must appear in the matched chunk.
+        """
+        tokens = _RE_TRIGRAM_TOKENS.findall(raw_query)
+        tokens = [t for t in tokens if t]
+        if not tokens:
+            return None
+        # Escape embedded double-quotes (FTS5 uses "" inside quoted phrases)
+        quoted = [f'"{t.replace(chr(34), chr(34)*2)}"' for t in tokens]
+        return ' AND '.join(quoted)
+
+    def _search_fts5_trigram(
+        self,
+        query: str,
+        user_id: Optional[str],
+        scopes: List[str],
+        limit: int
+    ) -> List[SearchResult]:
+        """Trigram FTS5 search — handles CJK and mixed queries with BM25 ranking."""
+        trigram_query = self._build_trigram_query(query)
+        if not trigram_query:
+            return []
+
+        scope_placeholders = ','.join('?' * len(scopes))
+        params = [trigram_query] + list(scopes)
+
+        if user_id:
+            sql = f"""
+                SELECT chunks.*, bm25(chunks_fts_trigram) as rank
+                FROM chunks_fts_trigram
+                JOIN chunks ON chunks.rowid = chunks_fts_trigram.rowid
+                WHERE chunks_fts_trigram MATCH ?
+                AND chunks.scope IN ({scope_placeholders})
+                AND (chunks.scope = 'shared' OR chunks.user_id = ?)
+                ORDER BY rank
+                LIMIT ?
+            """
+            params.extend([user_id, limit])
+        else:
+            sql = f"""
+                SELECT chunks.*, bm25(chunks_fts_trigram) as rank
+                FROM chunks_fts_trigram
+                JOIN chunks ON chunks.rowid = chunks_fts_trigram.rowid
+                WHERE chunks_fts_trigram MATCH ?
+                AND chunks.scope IN ({scope_placeholders})
+                ORDER BY rank
+                LIMIT ?
+            """
+            params.append(limit)
+
+        try:
+            rows = self.conn.execute(sql, params).fetchall()
+            return [
+                SearchResult(
+                    path=row['path'],
+                    start_line=row['start_line'],
+                    end_line=row['end_line'],
+                    score=self._bm25_rank_to_score(row['rank']),
+                    snippet=self._truncate_text(row['text'], 500),
+                    source=row['source'],
+                    user_id=row['user_id']
+                )
+                for row in rows
+            ]
+        except Exception:
+            from common.log import logger
+            logger.warning("[MemoryStorage] _search_fts5_trigram failed, returning empty", exc_info=True)
+            return []

    @staticmethod
    def _build_fts_query(raw_query: str) -> Optional[str]:
@@ -688,7 +1014,6 @@ class MemoryStorage:
        Works best for English and word-based languages.
        For CJK characters, LIKE search will be used as fallback.
        """
-        import re
        # Extract words (primarily English words and numbers)
        tokens = re.findall(r'[A-Za-z0-9_]+', raw_query)
        if not tokens:
@@ -701,9 +1026,22 @@ class MemoryStorage:
    
    @staticmethod
    def _bm25_rank_to_score(rank: float) -> float:
-        """Convert BM25 rank to 0-1 score"""
-        normalized = max(0, rank) if rank is not None else 999
-        return 1 / (1 + normalized)
+        """Convert SQLite BM25 rank to a [0, 1) relevance score.
+
+        SQLite's bm25() returns a non-positive float (0 or negative).
+        More negative = more relevant.  max(0, rank) would clip every
+        negative value to 0, making every score 1/(1+0) = 1.0 and
+        destroying all ranking information.
+
+        abs(rank) / (1 + abs(rank)) maps the absolute relevance magnitude
+        to [0, 1): larger |rank| (stronger match) → score closer to 1.
+        """
+        if rank is None:
+            return 0.0
+        # Add a floor of 0.3 so any FTS5 match always exceeds typical
+        # min_score thresholds (default 0.1).  Small-corpus ranks close to
+        # 0 would otherwise produce score≈0 and be filtered out downstream.
+        return 0.3 + 0.69 * (abs(rank) / (1.0 + abs(rank)))
    
    @staticmethod
    def _truncate_text(text: str, max_chars: int) -> str:
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
+numpy>=1.24
 aiohttp>=3.8.6,<3.10
 requests>=2.28.2
 chardet>=5.1.0