feat(eval): memoir A/B chapter judging and eval-web parity with dialogue

- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas. - Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error. - MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings. - app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS. - Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014. - Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.
2026-04-10 10:23:43 +08:00
parent b0251e5b26
commit ac49bc7f23
59 changed files with 4773 additions and 696 deletions
--- a/api/app/features/memory/evidence.py
+++ b/api/app/features/memory/evidence.py
@@ -11,12 +11,14 @@ Celery 使用 sync + 向量 chunks；`HybridRetriever` 使用 async + 向量 chu

 from __future__ import annotations

+from concurrent.futures import ThreadPoolExecutor
 from typing import TYPE_CHECKING

 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.orm import Session

 from app.core.config import settings
+from app.core.db import get_sync_db
 from app.core.logging import get_logger
 from app.features.memory.repo import (
    list_summaries_for_evidence_async,
@@ -88,7 +90,7 @@ def _stories_to_dicts(story_rows) -> list[dict]:
 def fetch_evidence_metadata_sync(
    session: Session, user_id: str, q: str, top_k: int
 ) -> dict:
-    """非 chunk 证据：摘要、事实、时间线、故事（sync）。"""
+    """非 chunk 证据：摘要、事实、时间线、故事（sync）。保留 session 入参供单连接路径使用。"""
    facts = search_facts_for_user_sync(session, user_id, q, top_k)
    events = search_timeline_events_for_user_sync(session, user_id, q, top_k)
    relevant_summaries = list_summaries_for_evidence_sync(
@@ -105,6 +107,49 @@ def fetch_evidence_metadata_sync(
    }


+def fetch_evidence_metadata_parallel_sync(user_id: str, q: str, top_k: int) -> dict:
+    """
+    与 fetch_evidence_metadata_sync 等价语义；四路查询各用独立 sync Session 并行，降低总 RTT。
+    """
+
+    def _facts():
+        with get_sync_db() as session:
+            return search_facts_for_user_sync(session, user_id, q, top_k)
+
+    def _events():
+        with get_sync_db() as session:
+            return search_timeline_events_for_user_sync(session, user_id, q, top_k)
+
+    def _summaries():
+        with get_sync_db() as session:
+            return list_summaries_for_evidence_sync(
+                session, user_id=user_id, q=q, limit=top_k
+            )
+
+    def _stories():
+        with get_sync_db() as session:
+            return list_recent_stories_for_evidence_sync(
+                session, user_id, query=q, limit=top_k
+            )
+
+    with ThreadPoolExecutor(max_workers=4) as pool:
+        f_facts = pool.submit(_facts)
+        f_events = pool.submit(_events)
+        f_summaries = pool.submit(_summaries)
+        f_stories = pool.submit(_stories)
+        facts = f_facts.result()
+        events = f_events.result()
+        relevant_summaries = f_summaries.result()
+        story_rows = f_stories.result()
+
+    return {
+        "relevant_facts": _facts_to_dicts(facts),
+        "timeline_hints": _timeline_to_dicts(events),
+        "relevant_summaries": relevant_summaries,
+        "relevant_stories": _stories_to_dicts(story_rows),
+    }
+
+
 async def fetch_evidence_metadata_async(
    db: AsyncSession, user_id: str, q: str, top_k: int
 ) -> dict:
@@ -255,7 +300,7 @@ def retrieve_evidence_bundle_sync(
            "retrieve_evidence_bundle_sync no_embedding_provider user_id={}",
            user_id,
        )
-    meta = fetch_evidence_metadata_sync(session, user_id, q, top_k)
+    meta = fetch_evidence_metadata_parallel_sync(user_id, q, top_k)
    return {
        "relevant_chunks": relevant_chunks,
        **meta,