feat(eval): memoir A/B chapter judging and eval-web parity with dialogue

- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas.

- Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error.

- MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings.

- app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS.

- Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014.

- Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.
This commit is contained in:
Kevin
2026-04-10 10:23:43 +08:00
parent b0251e5b26
commit ac49bc7f23
59 changed files with 4773 additions and 696 deletions

View File

@@ -11,12 +11,14 @@ Celery 使用 sync + 向量 chunks`HybridRetriever` 使用 async + 向量 chu
from __future__ import annotations
from concurrent.futures import ThreadPoolExecutor
from typing import TYPE_CHECKING
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import Session
from app.core.config import settings
from app.core.db import get_sync_db
from app.core.logging import get_logger
from app.features.memory.repo import (
list_summaries_for_evidence_async,
@@ -88,7 +90,7 @@ def _stories_to_dicts(story_rows) -> list[dict]:
def fetch_evidence_metadata_sync(
session: Session, user_id: str, q: str, top_k: int
) -> dict:
"""非 chunk 证据摘要、事实、时间线、故事sync"""
"""非 chunk 证据摘要、事实、时间线、故事sync保留 session 入参供单连接路径使用。"""
facts = search_facts_for_user_sync(session, user_id, q, top_k)
events = search_timeline_events_for_user_sync(session, user_id, q, top_k)
relevant_summaries = list_summaries_for_evidence_sync(
@@ -105,6 +107,49 @@ def fetch_evidence_metadata_sync(
}
def fetch_evidence_metadata_parallel_sync(user_id: str, q: str, top_k: int) -> dict:
"""
与 fetch_evidence_metadata_sync 等价语义;四路查询各用独立 sync Session 并行,降低总 RTT。
"""
def _facts():
with get_sync_db() as session:
return search_facts_for_user_sync(session, user_id, q, top_k)
def _events():
with get_sync_db() as session:
return search_timeline_events_for_user_sync(session, user_id, q, top_k)
def _summaries():
with get_sync_db() as session:
return list_summaries_for_evidence_sync(
session, user_id=user_id, q=q, limit=top_k
)
def _stories():
with get_sync_db() as session:
return list_recent_stories_for_evidence_sync(
session, user_id, query=q, limit=top_k
)
with ThreadPoolExecutor(max_workers=4) as pool:
f_facts = pool.submit(_facts)
f_events = pool.submit(_events)
f_summaries = pool.submit(_summaries)
f_stories = pool.submit(_stories)
facts = f_facts.result()
events = f_events.result()
relevant_summaries = f_summaries.result()
story_rows = f_stories.result()
return {
"relevant_facts": _facts_to_dicts(facts),
"timeline_hints": _timeline_to_dicts(events),
"relevant_summaries": relevant_summaries,
"relevant_stories": _stories_to_dicts(story_rows),
}
async def fetch_evidence_metadata_async(
db: AsyncSession, user_id: str, q: str, top_k: int
) -> dict:
@@ -255,7 +300,7 @@ def retrieve_evidence_bundle_sync(
"retrieve_evidence_bundle_sync no_embedding_provider user_id={}",
user_id,
)
meta = fetch_evidence_metadata_sync(session, user_id, q, top_k)
meta = fetch_evidence_metadata_parallel_sync(user_id, q, top_k)
return {
"relevant_chunks": relevant_chunks,
**meta,