From ac49bc7f2356559ef791979d1b378463d28ab3f0 Mon Sep 17 00:00:00 2001 From: Kevin Date: Fri, 10 Apr 2026 10:23:43 +0800 Subject: [PATCH] feat(eval): memoir A/B chapter judging and eval-web parity with dialogue - Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas. - Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error. - MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings. - app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS. - Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014. - Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples. --- api/.env.example | 18 +- api/.env.production | 13 +- api/.env.staging | 13 +- api/README.md | 7 +- .../0014_memory_evidence_query_indexes.py | 82 ++ api/app/agents/chat/orchestrator.py | 2 +- api/app/agents/memoir/batch_phase1_prep.py | 10 +- api/app/agents/memoir/orchestrator.py | 14 +- api/app/core/agent_logging.py | 60 +- api/app/core/celery_log_context.py | 33 + api/app/core/config.py | 33 +- api/app/core/llm_call.py | 66 +- api/app/core/log_events.py | 131 +++ api/app/core/logging.py | 106 ++- api/app/core/memoir_pipeline_progress.py | 294 ++++++ .../conversation/ws/connection_manager.py | 4 +- api/app/features/conversation/ws/pipeline.py | 2 +- api/app/features/conversation/ws/router.py | 18 +- .../features/evaluation/eval_trace_format.py | 31 +- .../evaluation/judge_manual_service.py | 481 ++++++++-- api/app/features/evaluation/judge_schemas.py | 207 +++-- api/app/features/evaluation/judge_service.py | 21 +- .../evaluation/memoir_compare_summary.py | 150 ++++ api/app/features/evaluation/router.py | 74 ++ api/app/features/evaluation/schemas.py | 20 + .../features/memoir/story_pipeline_sync.py | 131 ++- api/app/features/memory/enrichment.py | 235 ++--- api/app/features/memory/evidence.py | 49 +- api/app/features/memory/llm_schemas.py | 12 + api/app/features/memory/service.py | 172 +++- api/app/features/story/post_commit.py | 73 +- api/app/main.py | 2 +- api/app/tasks/celery_app.py | 99 +- api/app/tasks/chapter_compose_tasks.py | 75 +- api/app/tasks/chapter_cover_tasks.py | 18 +- api/app/tasks/memoir_quality_pass_tasks.py | 78 +- api/app/tasks/memoir_tasks.py | 346 ++++++- api/app/tasks/memory_compaction_tasks.py | 31 +- api/app/tasks/memory_enrichment_tasks.py | 114 ++- api/app/tasks/story_image_tasks.py | 101 ++- api/app/tasks/story_title_tasks.py | 134 +++ api/development.sh | 2 +- api/docker-compose.yml | 2 +- api/docs/internal-eval.md | 2 +- api/docs/memory-retrieval.md | 9 +- .../test_memoir_pipeline_run_router.py | 101 +++ api/tests/test_agent_logging.py | 73 ++ api/tests/test_judge_schemas.py | 18 + api/tests/test_log_events.py | 62 ++ .../test_memoir_pipeline_optimization.py | 2 +- api/tests/test_memoir_pipeline_progress.py | 105 +++ api/tests/test_memory_enrichment_baseline.py | 116 +++ api/tests/test_story_route_oral_invariant.py | 14 +- app-eval-web/src/components/ScoreCard.tsx | 842 ++++++++++++++++-- app-eval-web/src/eval.css | 72 ++ app-eval-web/src/pages/MemoirPage.tsx | 358 ++++++-- app-eval-web/src/pages/PlaygroundPage.tsx | 55 ++ app-eval-web/src/utils/evalMemoirJudgePref.ts | 66 ++ app-eval-web/vite.config.ts | 10 + 59 files changed, 4773 insertions(+), 696 deletions(-) create mode 100644 api/alembic/versions/0014_memory_evidence_query_indexes.py create mode 100644 api/app/core/celery_log_context.py create mode 100644 api/app/core/log_events.py create mode 100644 api/app/core/memoir_pipeline_progress.py create mode 100644 api/app/features/evaluation/memoir_compare_summary.py create mode 100644 api/app/tasks/story_title_tasks.py create mode 100644 api/tests/evaluation/test_memoir_pipeline_run_router.py create mode 100644 api/tests/test_agent_logging.py create mode 100644 api/tests/test_log_events.py create mode 100644 api/tests/test_memoir_pipeline_progress.py create mode 100644 api/tests/test_memory_enrichment_baseline.py create mode 100644 app-eval-web/src/utils/evalMemoirJudgePref.ts diff --git a/api/.env.example b/api/.env.example index 8e9cb73..47a9c92 100644 --- a/api/.env.example +++ b/api/.env.example @@ -17,19 +17,26 @@ # ============================================================================= # Logging(loguru sink 最低级别:TRACE / DEBUG / INFO / WARNING / ERROR / CRITICAL) # ============================================================================= +# 生产/预发:保持 INFO,避免 DEBUG 把全文 prompt/响应打进日志。排查 Agent 耗时可仅开 LOG_AGENT_VERBOSE。 LOG_LEVEL=INFO -# Agent 单行 INFO 摘要(耗时、路由、段落规模);与 LOG_LEVEL 独立,便于生产短暂排查 +# Agent 单行 INFO 摘要(耗时、sha、字符数);与 LOG_LEVEL 独立,生产可短时设为 1 # LOG_AGENT_VERBOSE=0 -# DEBUG 下 prompt/响应预览最大字符数 +# DEBUG 下 prompt/响应预览最大字符数(Settings 默认 4096);0=不截断全文(慎用) # AGENT_LOG_MAX_CHARS=4096 +# DEBUG 下 *.prompt:preview=截断预览 | hash_only=仅 sha12+长度,无正文 +# AGENT_LOG_PROMPT_MODE=preview +# DEBUG 下同一 label 连续相同 prompt 则跳过重复行(减模板重复) +# AGENT_LOG_PROMPT_DEDUP=0 # DEBUG 下访谈/资料:省略 SystemMessage 正文(仅 total_len+sha12);0/false=打出全文 # AGENT_LOG_OMIT_SYSTEM_MESSAGE_BODY=1 -# DEBUG 下超长单段 *.prompt:总长超过下一项时,先跳过前 N 字符再预览(0=不跳过) +# DEBUG 下超长单段 *.prompt:总长超过下一项时,先跳过前 N 字符再预览(0=不跳过;短时 DEBUG 可设 2500–8000) # AGENT_LOG_JSON_PROMPT_PREFIX_CHARS=0 # AGENT_LOG_JSON_PROMPT_PREFIX_ONLY_IF_LEN_GT=4000 -# 第三方 stdlib logging(空=自动:LOG_LEVEL 为 DEBUG/TRACE 时 Celery→INFO、httpx/httpcore→WARNING,减少刷屏) +# 第三方 stdlib logging(空=自动:LOG_LEVEL 为 DEBUG/TRACE 时 Celery→INFO;否则 Celery 与 httpx 默认 WARNING;需原始框架行时设为 INFO) # CELERY_LOG_LEVEL= # HTTPX_LOG_LEVEL= +# 聚合用 JSONL(空=不写);与 stderr 并存,loguru serialize=True、按 20MB 切割、保留 7 天 +# LOG_JSON_FILE=/var/log/life-echo/app.jsonl # ============================================================================= # LLM / DeepSeek @@ -111,6 +118,9 @@ DATABASE_URL=postgresql://postgres:postgres@localhost:5432/life_echo REDIS_URL=redis://localhost:6379/0 REDIS_SESSION_TTL=86400 +# Celery:ingest 后 Memory LLM 富化任务投递队列(须被 worker 消费;见 README) +# CELERY_MEMORY_ENRICHMENT_QUEUE=memory_idle + # ============================================================================= # Memory compaction(近重复 memory chunk 软排除;Celery + Redis 防抖) # 模板统一默认开启;须同时运行 celery worker 与 celery-beat(docker-compose 已含 beat,负责 memory_compaction_sweep)。 diff --git a/api/.env.production b/api/.env.production index 592cec6..e67c503 100644 --- a/api/.env.production +++ b/api/.env.production @@ -15,14 +15,18 @@ # ============================================================================= # Logging(loguru sink 最低级别:TRACE / DEBUG / INFO / WARNING / ERROR / CRITICAL) # ============================================================================= +# 生产默认 INFO;勿长期 DEBUG。排障 Agent 耗时可短时 LOG_AGENT_VERBOSE=1。 LOG_LEVEL=INFO -# Agent 单行 INFO 摘要(耗时、路由、段落规模);与 LOG_LEVEL 独立,便于生产短暂排查 +# Agent 单行 INFO 摘要;与 LOG_LEVEL 独立,便于生产短暂排查 # LOG_AGENT_VERBOSE=0 -# DEBUG 下 prompt/响应预览最大字符数 +# DEBUG 下预览上限(默认 4096);0=全文 # AGENT_LOG_MAX_CHARS=4096 +# DEBUG 下 *.prompt:preview | hash_only +# AGENT_LOG_PROMPT_MODE=preview +# AGENT_LOG_PROMPT_DEDUP=0 # DEBUG 下访谈/资料:省略 SystemMessage 正文(仅 total_len+sha12);0/false=打出全文 # AGENT_LOG_OMIT_SYSTEM_MESSAGE_BODY=1 -# DEBUG 下超长单段 *.prompt:总长超过下一项时,先跳过前 N 字符再预览(0=不跳过) +# DEBUG 下超长单段 *.prompt:先跳过前 N 字符再预览 # AGENT_LOG_JSON_PROMPT_PREFIX_CHARS=0 # AGENT_LOG_JSON_PROMPT_PREFIX_ONLY_IF_LEN_GT=4000 # 第三方 stdlib logging(空=自动:LOG_LEVEL 为 DEBUG/TRACE 时 Celery→INFO、httpx/httpcore→WARNING,减少刷屏) @@ -103,6 +107,9 @@ DATABASE_URL=postgresql://postgres:postgres@postgres:5432/life_echo REDIS_URL=redis://redis:6379/0 REDIS_SESSION_TTL=86400 +# Celery:ingest 后 Memory LLM 富化任务投递队列(须被 worker 消费;见 README) +# CELERY_MEMORY_ENRICHMENT_QUEUE=memory_idle + # ============================================================================= # Memory compaction(近重复 memory chunk 软排除;Celery + Redis 防抖) # 与 .env.example / .env.development 一致默认开启;需 running:celery worker + celery-beat(见 docker-compose.yml)。 diff --git a/api/.env.staging b/api/.env.staging index 891e683..1274ebb 100644 --- a/api/.env.staging +++ b/api/.env.staging @@ -8,14 +8,18 @@ # ============================================================================= # Logging(loguru sink 最低级别:TRACE / DEBUG / INFO / WARNING / ERROR / CRITICAL) # ============================================================================= +# 预发默认 INFO;勿长期 DEBUG。查慢路径可短时 LOG_AGENT_VERBOSE=1。 LOG_LEVEL=INFO -# Agent 单行 INFO 摘要(耗时、路由、段落规模);与 LOG_LEVEL 独立 +# Agent 单行 INFO 摘要;与 LOG_LEVEL 独立 # LOG_AGENT_VERBOSE=0 -# DEBUG 下 prompt/响应预览最大字符数 +# DEBUG 下预览上限(默认 4096);0=全文 # AGENT_LOG_MAX_CHARS=4096 +# DEBUG 下 *.prompt:preview | hash_only +# AGENT_LOG_PROMPT_MODE=preview +# AGENT_LOG_PROMPT_DEDUP=0 # DEBUG 下访谈/资料:省略 SystemMessage 正文(仅 total_len+sha12);0/false=打出全文 # AGENT_LOG_OMIT_SYSTEM_MESSAGE_BODY=1 -# DEBUG 下超长单段 *.prompt:总长超过下一项时,先跳过前 N 字符再预览(0=不跳过) +# DEBUG 下超长单段 *.prompt:先跳过前 N 字符再预览 # AGENT_LOG_JSON_PROMPT_PREFIX_CHARS=0 # AGENT_LOG_JSON_PROMPT_PREFIX_ONLY_IF_LEN_GT=4000 # 第三方 stdlib logging(空=自动) @@ -48,6 +52,9 @@ DATABASE_URL=postgresql://postgres:postgres@postgres:5432/life_echo REDIS_URL=redis://redis:6379/0 REDIS_SESSION_TTL=86400 +# Celery:ingest 后 Memory LLM 富化任务投递队列(须被 worker 消费;见 README) +# CELERY_MEMORY_ENRICHMENT_QUEUE=memory_idle + # ============================================================================= # Memory compaction(近重复 memory chunk 软排除;Celery + Redis 防抖) # 与 example / development / production 一致默认开启;预发须跑 worker + celery-beat。 diff --git a/api/README.md b/api/README.md index fd01a08..c152d08 100644 --- a/api/README.md +++ b/api/README.md @@ -17,6 +17,7 @@ Life Echo API 是一个智能对话系统,通过 WebSocket 实时连接,使 - **JSON 模式**:结构化抽取/路由/叙事 JSON 使用 `app/core/langchain_llm.py` 的 `bind_json_object_mode`(与 [DeepSeek JSON Output](https://api-docs.deepseek.com/guides/json_mode) 一致);详见 [`docs/llm-json-mode.md`](docs/llm-json-mode.md)。适配器说明见 [`app/adapters/llm/deepseek.py`](app/adapters/llm/deepseek.py)。 - **记忆检索**:异步与 Celery 均使用 **向量(pgvector)** chunks,见 [`docs/memory-retrieval.md`](docs/memory-retrieval.md)。 - **Memory compaction**:`.env.example` / [`.env.development`](.env.development) / [`.env.staging`](.env.staging) / [`.env.production`](.env.production) 均默认 `MEMORY_COMPACTION_ENABLED=true`。须运行 **Celery worker** 与 **celery-beat**([`docker-compose.yml`](docker-compose.yml) 已包含 `celery-beat`,用于定期 `memory_compaction_sweep`)。 +- **Memory LLM enrichment(单次 LLM:会话摘要 + 事实)**:任务路由到 **`memory_idle`** 队列(`CELERY_MEMORY_ENRICHMENT_QUEUE`,默认 `memory_idle`)。本地与 compose 内 worker 已使用 `-Q celery,memory_idle`;生产可单独起低并发 worker 只消费 `memory_idle`,与主队列隔离。 ## 技术栈 @@ -157,11 +158,11 @@ export REDIS_URL=redis://localhost:6379/0 uvicorn main:app --reload --host 0.0.0.0 --port 8000 # 5. 启动 Celery Worker(终端 2) -# macOS 使用 solo 池避免 fork 崩溃问题 -celery -A tasks.celery_app worker --loglevel=info --pool=solo +# macOS 使用 solo 池避免 fork 崩溃问题;须同时消费 memory_idle(Memory 富化) +celery -A app.tasks.celery_app worker --loglevel=info --pool=solo -Q celery,memory_idle # Linux/生产环境可以使用 prefork 池 -# celery -A tasks.celery_app worker --loglevel=info --concurrency=4 +# celery -A app.tasks.celery_app worker --loglevel=info --concurrency=4 -Q celery,memory_idle ``` ### 验证服务 diff --git a/api/alembic/versions/0014_memory_evidence_query_indexes.py b/api/alembic/versions/0014_memory_evidence_query_indexes.py new file mode 100644 index 0000000..d6ef0c1 --- /dev/null +++ b/api/alembic/versions/0014_memory_evidence_query_indexes.py @@ -0,0 +1,82 @@ +"""pg_trgm + composite indexes for memory evidence ILIKE and filters. + +Revision ID: 0014_memory_evidence_indexes +Revises: 0013_playground_judge +""" + +from typing import Sequence, Union + +from alembic import op + +revision: str = "0014_memory_evidence_indexes" +down_revision: Union[str, None] = "0013_playground_judge" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm") + + op.create_index( + "ix_memory_facts_user_status", + "memory_facts", + ["user_id", "status"], + unique=False, + ) + + op.execute( + "CREATE INDEX IF NOT EXISTS ix_memory_facts_subject_trgm " + "ON memory_facts USING gin (subject gin_trgm_ops)" + ) + op.execute( + "CREATE INDEX IF NOT EXISTS ix_memory_facts_predicate_trgm " + "ON memory_facts USING gin (predicate gin_trgm_ops)" + ) + + op.execute( + "CREATE INDEX IF NOT EXISTS ix_timeline_events_title_trgm " + "ON timeline_events USING gin (title gin_trgm_ops)" + ) + op.execute( + "CREATE INDEX IF NOT EXISTS ix_timeline_events_description_trgm " + "ON timeline_events USING gin (description gin_trgm_ops)" + ) + + op.execute( + "CREATE INDEX IF NOT EXISTS ix_memory_summaries_content_trgm " + "ON memory_summaries USING gin (content gin_trgm_ops)" + ) + + op.create_index( + "ix_stories_user_status", + "stories", + ["user_id", "status"], + unique=False, + ) + op.execute( + "CREATE INDEX IF NOT EXISTS ix_stories_title_trgm " + "ON stories USING gin (title gin_trgm_ops)" + ) + op.execute( + "CREATE INDEX IF NOT EXISTS ix_stories_summary_trgm " + "ON stories USING gin (summary gin_trgm_ops)" + ) + + op.execute( + "CREATE INDEX IF NOT EXISTS ix_memory_chunks_embedding_hnsw " + "ON memory_chunks USING hnsw (embedding vector_cosine_ops) " + "WITH (m = 16, ef_construction = 64)" + ) + + +def downgrade() -> None: + op.execute("DROP INDEX IF EXISTS ix_memory_chunks_embedding_hnsw") + op.execute("DROP INDEX IF EXISTS ix_stories_summary_trgm") + op.execute("DROP INDEX IF EXISTS ix_stories_title_trgm") + op.drop_index("ix_stories_user_status", table_name="stories") + op.execute("DROP INDEX IF EXISTS ix_memory_summaries_content_trgm") + op.execute("DROP INDEX IF EXISTS ix_timeline_events_description_trgm") + op.execute("DROP INDEX IF EXISTS ix_timeline_events_title_trgm") + op.execute("DROP INDEX IF EXISTS ix_memory_facts_predicate_trgm") + op.execute("DROP INDEX IF EXISTS ix_memory_facts_subject_trgm") + op.drop_index("ix_memory_facts_user_status", table_name="memory_facts") diff --git a/api/app/agents/chat/orchestrator.py b/api/app/agents/chat/orchestrator.py index 371f2a5..a91e0ba 100644 --- a/api/app/agents/chat/orchestrator.py +++ b/api/app/agents/chat/orchestrator.py @@ -216,7 +216,7 @@ class ChatOrchestrator: messages=responses, skip_tts=False, memory_retrieval_trace=None ) except Exception as e: - logger.error(f"资料收集处理失败: {e}", exc_info=True) + logger.exception("资料收集处理失败: {}", e) return AgentChatTurn( messages=["不好意思刚才没接住,你再说一遍好吗?"], skip_tts=False, diff --git a/api/app/agents/memoir/batch_phase1_prep.py b/api/app/agents/memoir/batch_phase1_prep.py index c26ae21..b2aa8dc 100644 --- a/api/app/agents/memoir/batch_phase1_prep.py +++ b/api/app/agents/memoir/batch_phase1_prep.py @@ -6,7 +6,7 @@ from __future__ import annotations import math from dataclasses import dataclass -from typing import Any, Dict, List +from typing import Any, Callable, Dict, List from app.agents.memoir.prompts import get_batch_memoir_phase1_prep_prompt from app.agents.memoir.schemas import BatchPhase1LLMOutput @@ -135,7 +135,7 @@ def _run_batch_phase1_prep_chunk_with_bisect( if merged.keys() != expected: raise ValueError( "batch phase1 chunked bisect merge: segment ids do not match input" - ) + ) from None return merged @@ -145,6 +145,7 @@ def run_batch_phase1_prep_chunked( llm: Any, *, chunk_size: int, + on_chunk: Callable[[int, int], None] | None = None, ) -> Dict[str, BatchPhase1SegmentRow]: """ 将 segments 按 chunk_size 切片多次调用 Phase1 批处理 LLM,合并 by_id。 @@ -161,13 +162,16 @@ def run_batch_phase1_prep_chunked( chunk_idx = i // chunk_size + 1 sub = segments[i : i + chunk_size] logger.info( - "event=batch_phase1_chunk chunk_idx={}/{} segment_count={} batch_path=chunked", + "event=batch_phase1_chunk chunk_idx={}/{} segment_count={} batch_path=chunked " + "msg=Phase1 批处理分块调用", chunk_idx, total_chunks, len(sub), ) part = _run_batch_phase1_prep_chunk_with_bisect(sub, state, llm) merged.update(part) + if on_chunk is not None: + on_chunk(chunk_idx, total_chunks) expected = {str(s.id) for s in segments} if merged.keys() != expected: missing = expected - merged.keys() diff --git a/api/app/agents/memoir/orchestrator.py b/api/app/agents/memoir/orchestrator.py index 325b92a..9e20101 100644 --- a/api/app/agents/memoir/orchestrator.py +++ b/api/app/agents/memoir/orchestrator.py @@ -8,7 +8,7 @@ from __future__ import annotations import time from dataclasses import dataclass -from typing import Any, Callable, Dict, List, Set, Tuple +from typing import Any, Callable, Dict, List, Optional, Set, Tuple from app.agents.memoir.batch_phase1_prep import ( STAGE_ALLOWED_SLOTS, @@ -63,6 +63,7 @@ class MemoirOrchestrator: get_or_create_state: Callable[[], MemoirStateSchema], update_slot: Callable[[str, str, str, List[str]], MemoirStateSchema], llm_fast: Any | None = None, + on_phase1_chunk: Optional[Callable[[int, int], None]] = None, ) -> PreparedMemoirBatches: """ 遍历 segments:Extraction → slot 更新 → Classification → 按 category 分桶。 @@ -89,15 +90,19 @@ class MemoirOrchestrator: state=state, classify_extract_llm=classify_extract_llm, update_slot=update_slot, + on_phase1_chunk=on_phase1_chunk, ) logger.info( - "event=phase1_batch_path_used segment_count={}", + "event=phase1_batch_path_used segment_count={} " + "msg=Phase1 批处理 LLM 路径已使用", len(segments), ) return result except Exception as e: logger.warning( - "MemoirOrchestrator.prepare_batches batch LLM 失败,回退逐段: {}", + "event=phase1_batch_path_fallback segment_count={} exc={} " + "msg=Phase1 批处理失败,回退逐段", + len(segments), e, ) @@ -172,6 +177,7 @@ class MemoirOrchestrator: state: MemoirStateSchema, classify_extract_llm: Any, update_slot: Callable[[str, str, str, List[str]], MemoirStateSchema], + on_phase1_chunk: Optional[Callable[[int, int], None]] = None, ) -> PreparedMemoirBatches: category_to_segments: Dict[str, List[Segment]] = {} segment_skip_story_ids: Set[str] = set() @@ -182,6 +188,7 @@ class MemoirOrchestrator: state, classify_extract_llm, chunk_size=int(settings.memoir_phase1_batch_llm_chunk_size), + on_chunk=on_phase1_chunk, ) for segment in segments: @@ -294,6 +301,7 @@ class MemoirOrchestrator: llm_fast=llm_fast, get_or_create_state=get_or_create_state, update_slot=update_slot, + on_phase1_chunk=None, ) state = prepared.state chapters_to_enqueue: Set[str] = set() diff --git a/api/app/core/agent_logging.py b/api/app/core/agent_logging.py index ead6444..e2021e0 100644 --- a/api/app/core/agent_logging.py +++ b/api/app/core/agent_logging.py @@ -1,28 +1,45 @@ """ Agent / LLM 诊断日志:耗时、输入输出规模、截断预览。 -- **详情**(完整 prompt 预览等):仅在 ``LOG_LEVEL`` 为 ``TRACE`` / ``DEBUG`` 时通过 ``logger.debug`` 输出。 +- **详情**(prompt 预览 / hash / 响应预览):仅在 ``LOG_LEVEL`` 为 ``TRACE`` / ``DEBUG`` 时通过 ``logger.debug`` 输出。 - **摘要**(单行:耗时、字符数、operation 名):当 ``LOG_AGENT_VERBOSE=1`` 时通过 ``logger.info`` 输出, 便于生产环境在不把全局日志调到 DEBUG 的情况下排查 Agent 性能与路径。 -敏感内容:DEBUG 下会记录用户相关文本;``AGENT_LOG_MAX_CHARS=0`` 时记录全文,生产环境请勿长期开启 DEBUG。 +生产/预发建议 ``LOG_LEVEL=INFO``;需看 Agent 耗时与规模时可设 ``LOG_AGENT_VERBOSE=1``,无需长期 DEBUG。 + +敏感内容:DEBUG 下会记录用户相关文本;``AGENT_LOG_MAX_CHARS=0`` 时预览不截断(完整输出,慎用)。 配置(节选):``AGENT_LOG_OMIT_SYSTEM_MESSAGE_BODY``(默认 true)省略聊天 System 正文,仅打 len+sha12; ``AGENT_LOG_JSON_PROMPT_PREFIX_CHARS`` + ``AGENT_LOG_JSON_PROMPT_PREFIX_ONLY_IF_LEN_GT`` 在 DEBUG 下跳过 -超长单段 prompt 的前缀再预览。 +超长单段 ``*.prompt`` 的前缀再预览; +``AGENT_LOG_PROMPT_MODE=hash_only`` 时 ``*.prompt`` 仅输出 sha12 + 长度,无正文; +``AGENT_LOG_PROMPT_DEDUP=1`` 时同一 label 连续相同全文则跳过重复行。 """ from __future__ import annotations +import hashlib +import threading import time from contextlib import contextmanager from typing import Any, Iterator from app.core.config import settings +_dedup_lock = threading.Lock() +_last_prompt_sha256_by_label: dict[str, str] = {} + + +def _payload_sha256_hex(text: str) -> str: + return hashlib.sha256(text.encode("utf-8", errors="replace")).hexdigest() + + +def _payload_sha12(text: str) -> str: + return _payload_sha256_hex(text)[:12] + def agent_verbose_enabled() -> bool: - """是否输出含完整 prompt 预览等 DEBUG 级详情。""" + """是否输出含 prompt/response 等 DEBUG 级详情。""" raw = (settings.log_level or "INFO").strip().upper() return raw in ("TRACE", "DEBUG") @@ -97,26 +114,55 @@ def log_agent_payload( *, max_chars: int | None = None, ) -> None: - """在 DEBUG 下记录文本长度与截断预览。""" + """在 DEBUG 下记录文本长度与截断预览(*.prompt 可 hash_only / 去重)。""" if not agent_verbose_enabled(): return raw = text or "" total_len = len(raw) + digest = _payload_sha256_hex(raw) + sha12 = digest[:12] + + is_prompt = label.endswith(".prompt") + if is_prompt and settings.agent_log_prompt_dedup: + with _dedup_lock: + if _last_prompt_sha256_by_label.get(label) == digest: + logger.debug( + "agent_payload_skipped label={} reason=same_as_previous sha12={} total_len={}", + label, + sha12, + total_len, + ) + return + _last_prompt_sha256_by_label[label] = digest + preview_source = raw extra_note = "" if ( - label.endswith(".prompt") + is_prompt and settings.agent_log_json_prompt_prefix_chars > 0 and total_len > settings.agent_log_json_prompt_prefix_only_if_len_gt ): skip = settings.agent_log_json_prompt_prefix_chars preview_source = raw[skip:] extra_note = f" skipped_prefix_chars={skip}" + + mode = (settings.agent_log_prompt_mode or "preview").strip().lower() + if is_prompt and mode == "hash_only": + logger.debug( + "agent_payload label={} total_len={} sha12={} mode=hash_only{}", + label, + total_len, + sha12, + extra_note, + ) + return + preview = truncate_for_log(preview_source, max_chars=max_chars) logger.debug( - "agent_payload {} total_len={}{} preview={}", + "agent_payload {} total_len={}{} sha12={} preview={}", label, total_len, extra_note, + sha12, preview, ) diff --git a/api/app/core/celery_log_context.py b/api/app/core/celery_log_context.py new file mode 100644 index 0000000..bb85d00 --- /dev/null +++ b/api/app/core/celery_log_context.py @@ -0,0 +1,33 @@ +"""Celery worker:任务生命周期内通过 ContextVar 注入 loguru extra(user_id、correlation_id 等)。""" + +from __future__ import annotations + +from contextvars import ContextVar +from typing import Mapping + +_ctx: ContextVar[dict[str, str] | None] = ContextVar("celery_log_extras", default=None) + + +def set_celery_log_extras(extras: Mapping[str, str] | None) -> None: + """在 ``task_prerun`` 中调用;值会并入后续 loguru 记录的 ``extra``(不覆盖已有非空 bind)。""" + if not extras: + _ctx.set(None) + return + cleaned: dict[str, str] = {} + for k, v in extras.items(): + if v is None: + continue + s = str(v).strip() + if s: + cleaned[str(k).strip()] = s + _ctx.set(cleaned or None) + + +def clear_celery_log_extras() -> None: + """在 ``task_postrun`` 中调用,避免泄漏到同进程下一任务。""" + _ctx.set(None) + + +def get_celery_log_extras() -> dict[str, str]: + v = _ctx.get() + return dict(v) if v else {} diff --git a/api/app/core/config.py b/api/app/core/config.py index 83a6fb5..21fdd50 100644 --- a/api/app/core/config.py +++ b/api/app/core/config.py @@ -42,6 +42,8 @@ class Settings(BaseSettings): ) # 非 production 且为 True 时,在 main/internal_main 连接 Redis 后清空 Celery 队列(不 FLUSHDB,不影响会话键) celery_purge_broker_on_startup: bool = False + # Memory LLM 富化任务路由队列;可与主 worker 分离(见 README / docker-compose) + celery_memory_enrichment_queue: str = "memory_idle" # ── Auth / JWT ──────────────────────────────────────────── secret_key: str = Field(default_factory=lambda: secrets.token_urlsafe(32)) @@ -112,6 +114,8 @@ class Settings(BaseSettings): memoir_phase1_batch_llm_max_tokens: int = Field(default=4096, ge=512, le=32_768) #: Phase1 批处理 LLM:单次请求最多包含的 segment 数(多块合并,避免 completion 顶满截断) memoir_phase1_batch_llm_chunk_size: int = Field(default=24, ge=1, le=500) + #: 回忆录流水线细粒度进度 Redis 快照 TTL(memoir_pipeline_run:*) + memoir_pipeline_run_ttl_seconds: int = Field(default=172_800, ge=3600, le=2_592_000) # Memoir agents:`invoke_json_object` / `llm_json_call` 的 max_tokens(原硬编码迁至配置) memoir_extraction_max_tokens: int = Field(default=1024, ge=64, le=8192) memoir_classification_max_tokens: int = Field(default=256, ge=32, le=4096) @@ -188,9 +192,11 @@ class Settings(BaseSettings): agent_log_prompt_mode: str = Field(default="preview") # AGENT_LOG_PROMPT_DEDUP:DEBUG 下同一 label 连续相同全文时第二条起跳过(减重复模板噪音) agent_log_prompt_dedup: bool = False - # 第三方 stdlib logging(空=自动:LOG_LEVEL 为 DEBUG/TRACE 时 Celery→INFO、httpx/httpcore→WARNING) + # 第三方 stdlib logging(空=自动:DEBUG/TRACE 时 Celery→INFO;否则 Celery 与 httpx 默认 WARNING) celery_log_level: str = "" httpx_log_level: str = "" + # 非空时额外写入 JSONL(serialize=True),便于 Loki/ELK;与 stderr 彩色控制台并存 + log_json_file: str = "" @field_validator("celery_purge_broker_on_startup", mode="before") @classmethod @@ -405,6 +411,31 @@ class Settings(BaseSettings): eval_judge_compare_prompt_overhead_chars: int = Field( default=14_000, ge=500, le=500_000 ) + # 回忆录音评:章节 LLM 并发上限(仅评审请求;准备阶段仍串行访问 DB) + eval_judge_memoir_chapter_concurrency: int = Field( + default=4, + ge=1, + le=32, + ) + # 回忆录评审 prompt 内粗截断(汉字计字符);万字级章节请保持 body ≥ 正文峰值 + eval_judge_memoir_body_max_chars: int = Field( + default=36_000, + ge=8_000, + le=500_000, + description="【当前回忆录正文】注入评审 prompt 前的最大字符", + ) + eval_judge_memoir_evidence_max_chars: int = Field( + default=32_000, + ge=8_000, + le=500_000, + description="对话证据 / 结构化证据 / 参考基线各块的最大字符(与 eval_trace_format 对齐)", + ) + # json_object 完成预算;MemoirJudgeOutput 字段多,需预留足量 token + eval_judge_memoir_completion_max_tokens: int = Field( + default=3072, + ge=512, + le=16_384, + ) # 候选对话回放:与生产访谈类似的温度 eval_candidate_temperature: float = 0.7 # 门禁:受保护 session 合成份数下跌超过该阈值视为回归(0–100 分制) diff --git a/api/app/core/llm_call.py b/api/app/core/llm_call.py index b14a0ca..8116a05 100644 --- a/api/app/core/llm_call.py +++ b/api/app/core/llm_call.py @@ -15,6 +15,13 @@ from typing import Any, Callable, Literal, TypeVar from pydantic import BaseModel, ValidationError +try: + from openai import ( + ContentFilterFinishReasonError as _OpenAIContentFilterFinishReasonError, + ) +except ImportError: # 兼容性:旧版 SDK 无此类 + _OpenAIContentFilterFinishReasonError = None + from app.core.agent_logging import agent_verbose_enabled, log_agent_payload from app.core.json_utils import extract_json_payload from app.core.langchain_llm import ( @@ -61,6 +68,57 @@ def _prompt_sha12(prompt: str) -> str: return hashlib.sha256((prompt or "").encode("utf-8")).hexdigest()[:12] +def _iter_exception_chain(exc: BaseException): + """包含自身与 ``__cause__`` / ``__context__`` 链,去重防环。""" + seen: set[int] = set() + cur: BaseException | None = exc + while cur is not None and id(cur) not in seen: + yield cur + seen.add(id(cur)) + cur = cur.__cause__ or cur.__context__ + + +def _is_content_filter_refusal(exc: BaseException) -> bool: + """OpenAI / Azure 等内容审核拦截:无模型 JSON 可解析,属可预期失败,不宜打 ERROR 堆栈。""" + for e in _iter_exception_chain(exc): + if _OpenAIContentFilterFinishReasonError is not None and isinstance( + e, + _OpenAIContentFilterFinishReasonError, + ): + return True + msg = str(e).lower() + if "content filter" in msg and ( + "reject" in msg or "blocked" in msg or "filter" in msg + ): + return True + return False + + +_LLM_MSG_CONTENT_FILTER = ( + "模型输出被服务商内容安全策略拦截(content filter),通常与提示或上下文中触发了合规扫描有关;" + "可尝试更换模型、缩短送入模型的正文/证据节选,或在服务商控制台调整内容过滤策略。" +) + + +def _format_llm_invoke_error_message(exc: BaseException) -> str: + if _is_content_filter_refusal(exc): + return _LLM_MSG_CONTENT_FILTER + return str(exc) + + +def _log_invoke_failure(*, agent: str, exc: BaseException, sync: bool) -> None: + if _is_content_filter_refusal(exc): + logger.info( + "event=llm_content_filter_blocked agent={} sync={} detail={}", + agent, + sync, + str(exc)[:500], + ) + return + tag = "llm_json_call" if sync else "allm_json_call" + logger.bind(agent=agent).exception("{} invoke error: {}", tag, exc) + + def _invoke_raw_sync( llm: Any, prompt: str, @@ -272,7 +330,7 @@ def llm_json_call( return fallback_factory() raise except Exception as e: - logger.bind(agent=agent).exception("llm_json_call invoke error: {}", e) + _log_invoke_failure(agent=agent, exc=e, sync=True) used_fb = fallback_factory is not None _emit_meta( agent=agent, @@ -295,7 +353,7 @@ def llm_json_call( return fallback_factory() raise LLMCallError( "invoke", - str(e), + _format_llm_invoke_error_message(e), raw_content=raw[:4096] if raw else None, ) from e @@ -366,7 +424,7 @@ async def allm_json_call( return fallback_factory() raise except Exception as e: - logger.bind(agent=agent).exception("allm_json_call invoke error: {}", e) + _log_invoke_failure(agent=agent, exc=e, sync=False) used_fb = fallback_factory is not None _emit_meta( agent=agent, @@ -389,7 +447,7 @@ async def allm_json_call( return fallback_factory() raise LLMCallError( "invoke", - str(e), + _format_llm_invoke_error_message(e), raw_content=raw[:4096] if raw else None, ) from e diff --git a/api/app/core/log_events.py b/api/app/core/log_events.py new file mode 100644 index 0000000..788d5a4 --- /dev/null +++ b/api/app/core/log_events.py @@ -0,0 +1,131 @@ +"""结构化日志辅助:统一 ``event=`` 行格式与 Celery prerun 可提取的上下文字段。""" + +from __future__ import annotations + +from typing import Any + + +def format_log_event(event: str, **fields: Any) -> str: + """ + 生成单行 ``event=...`` 日志正文:``event`` 固定首位;``msg`` 固定末位(若提供);其余键按字母序。 + + ``None`` 与空字符串会跳过;浮点数默认保留一位小数(适用于 ``duration_ms``)。 + """ + parts: list[str] = [f"event={event}"] + keys = sorted(k for k in fields if k != "msg") + ordered = list(keys) + if "msg" in fields: + ordered.append("msg") + for k in ordered: + v = fields[k] + if v is None: + continue + if isinstance(v, float): + parts.append(f"{k}={v:.1f}") + elif isinstance(v, bool): + parts.append(f"{k}={str(v).lower()}") + else: + s = str(v).strip() + if not s: + continue + parts.append(f"{k}={s}") + return " ".join(parts) + + +def correlation_bind_kwargs( + *, + user_id: str | None = None, + memoir_correlation_id: str | None = None, + correlation_id: str | None = None, + **more: str | None, +) -> dict[str, str]: + """供 ``logger.bind(**...)``:``memoir_correlation_id`` 会以 ``correlation_id`` 写入(统一检索键)。""" + out: dict[str, str] = {} + uid = (user_id or "").strip() + if uid: + out["user_id"] = uid + cid = (correlation_id or memoir_correlation_id or "").strip() + if cid: + out["correlation_id"] = cid + for k, v in more.items(): + if v is None: + continue + s = str(v).strip() + if s: + out[str(k)] = s + return out + + +# bind=True 任务的 positional 与字段名映射(kwargs 优先,缺位再填) +_TASK_POSITIONAL_FIELDS: dict[str, tuple[str, ...]] = { + "app.tasks.memory_enrichment_tasks.enrich_memory_source": ("user_id", "source_id"), + "app.tasks.memory_compaction_tasks.memory_compaction_run": ("user_id",), + "app.tasks.chapter_compose_tasks.recompose_chapter": ("chapter_id",), + "app.tasks.memoir_quality_pass_tasks.memoir_quality_pass": ("user_id",), + "app.tasks.memoir_tasks.process_memoir_phase2": ("user_id", "chapter_category"), + "app.tasks.memoir_tasks.process_memoir_phase1": ("user_id",), + "app.tasks.memoir_tasks.generate_chapter_content": ("user_id", "stage"), + "app.tasks.chapter_cover_tasks.generate_chapter_cover": ("chapter_id",), + "app.tasks.story_image_tasks.generate_story_image": ("story_id",), + "app.tasks.story_title_tasks.generate_story_title_after_create": ( + "story_id", + "chapter_category", + "oral_scope", + "user_id", + ), +} + +_KW_KEYS_COPY: tuple[str, ...] = ( + "user_id", + "source_id", + "chapter_id", + "story_id", + "chapter_category", + "stage", + "oral_scope", +) + + +def celery_prerun_extras( + task_name: str | None, + args: tuple[Any, ...], + kwargs: dict[str, Any] | None, +) -> dict[str, str]: + """ + 从 Celery ``task_prerun`` 的 args/kwargs 提取 ``user_id``、``correlation_id`` 等, + 供 ``set_celery_log_extras`` 与任务体内 loguru 记录关联。 + """ + out: dict[str, str] = {} + kw = dict(kwargs or {}) + + mcid = kw.get("memoir_correlation_id") + if mcid is not None: + s = str(mcid).strip() + if s: + out["correlation_id"] = s + + for key in _KW_KEYS_COPY: + if key not in kw: + continue + val = kw[key] + if val is None: + continue + s = str(val).strip() + if s: + out[key] = s + + name = (task_name or "").strip() + fields = _TASK_POSITIONAL_FIELDS.get(name) + if fields and args: + for i, field in enumerate(fields): + if i >= len(args): + break + if field in out: + continue + val = args[i] + if val is None: + continue + s = str(val).strip() + if s: + out[field] = s + return out diff --git a/api/app/core/logging.py b/api/app/core/logging.py index a2f01c7..18175bd 100644 --- a/api/app/core/logging.py +++ b/api/app/core/logging.py @@ -6,16 +6,26 @@ loguru 统一日志配置 + InterceptHandler 拦截第三方库的标准库 logg 直接走 loguru sink;占位符用 **``{}``**(勿用 ``%s``,否则不会插值)。 **禁止**用 ``import logging`` 取业务 logger(适配器层与第三方 SDK 除外)。 - **第三方**(uvicorn、celery、httpx、langchain 等):仍用标准库 ``logging``,经 ``InterceptHandler`` 汇入 loguru。 + 默认将 ``celery*``、``httpx``/``httpcore`` 调到 WARNING,避免刷屏;任务边界见 ``app.tasks.celery_app`` 中 ``event=celery_task_*``。 级别: -- INFO:面向运维的稳定摘要。 -- DEBUG:可含完整上下文、用户内容;仅受控环境长期开启。 +- INFO:面向运维的稳定摘要(生产/预发推荐长期保持)。 +- DEBUG:可含 prompt/响应预览或哈希;会显著增噪与体积,仅短时排障;可与 ``AGENT_LOG_MAX_CHARS`` / ``AGENT_LOG_PROMPT_MODE`` 配合。 由 ``Settings.log_level`` 控制 sink(``LOG_LEVEL``);``LOG_LEVEL=DEBUG`` 时业务 ``logger.debug`` 可见。 +不打开全局 DEBUG 也可设 ``LOG_AGENT_VERBOSE=1`` 查看 Agent 单行耗时与规模(见 ``app.core.agent_logging``)。 **实践说明**:开发/终端用「人类可读」单行格式;若上生产聚合(ELK、Loki、CloudWatch),建议**另加** JSON sink(``serialize=True`` 或自定义 ``format``)与现有 stderr 并存,便于检索与关联,而不是在控制台格式里硬塞结构化字段。 -Agent / LLM 诊断见 ``app.core.agent_logging``;``LOG_AGENT_VERBOSE``、``AGENT_LOG_MAX_CHARS`` 见配置说明。 +**字段约定(可读性)**: + +- 机读键用英文 ``snake_case``:优先 ``event=...``,其余 ``key=value`` 空格分隔;与人相关的说明用 ``msg=中文短句``(可含空格),放在行尾或紧邻 ``event`` 后。 +- **HTTP**:``request_id`` 由中间件 ``contextualize``;业务处可 ``logger.bind(**correlation_bind_kwargs(user_id=..., memoir_correlation_id=...))``(见 ``app.core.log_events``)。 +- **Celery**:``task_prerun`` 会通过 ``app.core.celery_log_context`` 注入 ``user_id`` / ``correlation_id`` / ``task_id`` 等到 loguru ``extra``(不覆盖已有 ``bind``);``task_postrun`` 清除,避免串任务。 +- **耗时**:业务里程碑的结束行带 ``duration_ms``(``perf_counter`` × 1000);LLM 细粒度见 ``app.core.agent_logging`` 的 ``agent_span`` / ``LOG_AGENT_VERBOSE``。 +- **级别**:INFO=里程碑与任务起止;DEBUG=体积与路径;WARNING=可恢复失败与降级。 + +Agent / LLM 诊断见 ``app.core.agent_logging``;``LOG_AGENT_VERBOSE``、``AGENT_LOG_MAX_CHARS``、``AGENT_LOG_PROMPT_MODE``、``AGENT_LOG_PROMPT_DEDUP`` 见 ``api/.env.example`` 与 ``Settings``。 """ from __future__ import annotations @@ -28,6 +38,11 @@ from typing import TYPE_CHECKING, Any from loguru import logger from app.core.config import settings +from app.core.log_events import ( + celery_prerun_extras, + correlation_bind_kwargs, + format_log_event, +) if TYPE_CHECKING: from loguru import Logger @@ -94,33 +109,66 @@ def _stdlib_emit_display(log_record: logging.LogRecord) -> tuple[str, int]: def _stderr_format(record: Any) -> str: - """控制台 sink 格式:无有效 request_id 时不占一列 ``-``,减少 Celery/Worker 噪声。""" + """控制台 sink:request_id / correlation_id / user_id 有值时才显示对应列。""" rid = str(record["extra"].get("request_id") or "").strip() - rid_part = "{extra[request_id]} | " if rid and rid != "-" else "" + rid_part = "rid={extra[request_id]} | " if rid and rid != "-" else "" + cid = str(record["extra"].get("correlation_id") or "").strip() + cid_part = "corr={extra[correlation_id]} | " if cid else "" + uid = str(record["extra"].get("user_id") or "").strip() + uid_part = "uid={extra[user_id]} | " if uid else "" return ( "{time:YYYY-MM-DD HH:mm:ss.SSS} | " "{level.name: <8} | " "{extra[module]}:{function}:{line} | " - f"{rid_part}" + f"{rid_part}{cid_part}{uid_part}" "{message}\n{exception}" ) +def _merge_celery_worker_extra(record: Any) -> None: + """把 ContextVar 中的 Celery 上下文字段并入本条 loguru 记录(不覆盖已有非空 extra)。""" + try: + from app.core.celery_log_context import get_celery_log_extras + + ctx = get_celery_log_extras() + if not ctx: + return + except Exception: + return + ex = record["extra"] + for k, v in ctx.items(): + if not v: + continue + cur = ex.get(k) + if cur is None or str(cur).strip() in ("", "-"): + ex[k] = v + + def _apply_third_party_log_levels() -> None: - """在全局 sink 为 DEBUG/TRACE 时压低 Celery/httpx 噪声;可通过 CELERY_LOG_LEVEL / HTTPX_LOG_LEVEL 覆盖。""" + """压低 Celery/httpx 框架日志噪声。 + + 根 logger 为 NOTSET 时,子 logger 若也为 NOTSET,有效级别会变成 0(NOTSET),INFO 会全部通过, + 因此这里**必须**写死默认级别,不能依赖 NOTSET「继承」。 + + 默认(未设 CELERY_LOG_LEVEL / HTTPX_LOG_LEVEL): + - ``LOG_LEVEL`` 为 TRACE/DEBUG:Celery→INFO,httpx/httpcore→WARNING + - 否则:Celery 与 httpx/httpcore→WARNING(保留业务 loguru 与 ``event=celery_task_*`` 摘要) + + 需要框架原始行时,设置 ``CELERY_LOG_LEVEL=INFO``、``HTTPX_LOG_LEVEL=INFO`` 等。 + """ sink = _sink_min_level() verbose = sink in ("TRACE", "DEBUG") + # 无效环境变量时的回退:与「未设置变量」分支一致,禁止 NOTSET + default_celery = logging.INFO if verbose else logging.WARNING + default_httpx = logging.WARNING + raw_c = (settings.celery_log_level or "").strip() if raw_c: parsed = _parse_stdlib_level(raw_c) - cel_level = ( - parsed - if parsed is not None - else (logging.INFO if verbose else logging.NOTSET) - ) + cel_level = parsed if parsed is not None else default_celery else: - cel_level = logging.INFO if verbose else logging.NOTSET + cel_level = default_celery for name in ("celery", "celery.worker"): logging.getLogger(name).setLevel(cel_level) @@ -128,13 +176,9 @@ def _apply_third_party_log_levels() -> None: raw_h = (settings.httpx_log_level or "").strip() if raw_h: parsed = _parse_stdlib_level(raw_h) - httpx_level = ( - parsed - if parsed is not None - else (logging.WARNING if verbose else logging.NOTSET) - ) + httpx_level = parsed if parsed is not None else default_httpx else: - httpx_level = logging.WARNING if verbose else logging.NOTSET + httpx_level = default_httpx for name in ("httpx", "httpcore"): logging.getLogger(name).setLevel(httpx_level) @@ -175,6 +219,7 @@ def setup_logging() -> None: Celery 需 ``worker_hijack_root_logger=False``,否则会覆盖根 logger。 """ + global logger logger.remove() logger.add( @@ -185,7 +230,20 @@ def setup_logging() -> None: diagnose=False, ) + json_path = (settings.log_json_file or "").strip() + if json_path: + logger.add( + json_path, + level=_sink_min_level(), + serialize=True, + rotation="20 MB", + retention="7 days", + encoding="utf-8", + enqueue=True, + ) + logger.configure(extra={"request_id": "-", "module": "-"}) + logger = logger.patch(_merge_celery_worker_extra) # 仅 root 挂 InterceptHandler,避免子 logger 与 root 各处理一次导致重复行 root = logging.getLogger() @@ -201,4 +259,12 @@ def get_logger(name: str) -> Logger: # 供 middleware 等使用 ``contextualize`` 的同一 loguru 实例(与 get_logger 同源) -__all__ = ["logger", "setup_logging", "get_logger", "InterceptHandler"] +__all__ = [ + "logger", + "setup_logging", + "get_logger", + "InterceptHandler", + "format_log_event", + "correlation_bind_kwargs", + "celery_prerun_extras", +] diff --git a/api/app/core/memoir_pipeline_progress.py b/api/app/core/memoir_pipeline_progress.py new file mode 100644 index 0000000..232302d --- /dev/null +++ b/api/app/core/memoir_pipeline_progress.py @@ -0,0 +1,294 @@ +""" +回忆录流水线细粒度进度:Redis JSON 快照,以 memoir_correlation_id 为聚合根。 +供 Celery worker(同步 Redis)与 internal eval API 读取。 +""" + +from __future__ import annotations + +import json +import threading +from datetime import datetime, timezone +from typing import Any + +import redis + +from app.core.config import settings +from app.core.logging import get_logger + +logger = get_logger(__name__) + +_lock = threading.Lock() +_client: redis.Redis | None = None + + +def _redis() -> redis.Redis: + global _client + if _client is None: + with _lock: + if _client is None: + _client = redis.from_url(settings.redis_url, decode_responses=True) + return _client + + +def _run_key(correlation_id: str) -> str: + return f"memoir_pipeline_run:{correlation_id}" + + +def _phase1_index_key(phase1_task_id: str) -> str: + return f"memoir_pipeline_run:by_phase1_task:{phase1_task_id}" + + +def _ttl() -> int: + return int(settings.memoir_pipeline_run_ttl_seconds) + + +def _empty_fanout() -> dict[str, Any]: + return { + "story_images": [], + "recompose_chapters": [], + "memory_enrichment": [], + "quality_pass": None, + "compaction": None, + } + + +def _default_doc(correlation_id: str) -> dict[str, Any]: + return { + "memoir_correlation_id": correlation_id, + "user_id": None, + "started_at_utc": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"), + "phase1": None, + "phase2": [], + "fanout": _empty_fanout(), + } + + +def _merge_phase2_list( + existing: list[dict[str, Any]], updates: list[dict[str, Any]] +) -> list[dict[str, Any]]: + by_tid: dict[str, dict[str, Any]] = {} + for x in existing: + tid = str(x.get("task_id") or "").strip() + if tid: + by_tid[tid] = dict(x) + for u in updates: + tid = str(u.get("task_id") or "").strip() + if not tid: + continue + if tid in by_tid: + merged = {**by_tid[tid], **u} + by_tid[tid] = merged + else: + by_tid[tid] = dict(u) + return list(by_tid.values()) + + +def _fanout_list_merge_key(items: list[dict], patch_items: list[dict], id_key: str) -> None: + by_id: dict[str, dict[str, Any]] = {} + for x in items: + k = str(x.get(id_key) or "").strip() + if k: + by_id[k] = dict(x) + for u in patch_items: + k = str(u.get(id_key) or "").strip() + if not k: + continue + if k in by_id: + by_id[k] = {**by_id[k], **u} + else: + by_id[k] = dict(u) + items.clear() + items.extend(by_id.values()) + + +def _merge_fanout(base: dict[str, Any], patch: dict[str, Any]) -> dict[str, Any]: + out = dict(base) + for k, v in patch.items(): + if k in ("story_images", "recompose_chapters", "memory_enrichment") and isinstance( + v, list + ): + id_key = ( + "story_id" + if k == "story_images" + else "chapter_id" + if k == "recompose_chapters" + else "source_id" + ) + existing = list(out.get(k) or []) + _fanout_list_merge_key(existing, v, id_key) + out[k] = existing + elif k == "quality_pass" and isinstance(v, dict): + out[k] = {**(out.get(k) or {}), **v} if out.get(k) else dict(v) + elif k == "compaction" and isinstance(v, dict): + out[k] = {**(out.get(k) or {}), **v} if out.get(k) else dict(v) + else: + out[k] = v + return out + + +def _merge_doc(base: dict[str, Any], patch: dict[str, Any]) -> dict[str, Any]: + out = dict(base) + for k, v in patch.items(): + if k == "phase2" and isinstance(v, list): + out["phase2"] = _merge_phase2_list(list(out.get("phase2") or []), v) + elif k == "fanout" and isinstance(v, dict): + out["fanout"] = _merge_fanout( + dict(out.get("fanout") or _empty_fanout()), v + ) + elif k == "phase1" and isinstance(v, dict): + cur = dict(out.get("phase1") or {}) + for pk, pv in v.items(): + if pk == "detail" and isinstance(pv, dict) and isinstance( + cur.get("detail"), dict + ): + cur["detail"] = {**cur["detail"], **pv} + else: + cur[pk] = pv + out["phase1"] = cur + elif isinstance(v, dict) and isinstance(out.get(k), dict): + out[k] = {**out[k], **v} + else: + out[k] = v + return out + + +def merge_pipeline_run(correlation_id: str, patch: dict[str, Any]) -> None: + """合并补丁到流水线快照(不存在则创建最小文档)。""" + cid = (correlation_id or "").strip() + if not cid: + return + try: + r = _redis() + key = _run_key(cid) + raw = r.get(key) + if raw: + doc = json.loads(raw) + else: + doc = _default_doc(cid) + doc = _merge_doc(doc, patch) + r.setex(key, _ttl(), json.dumps(doc, ensure_ascii=False)) + except Exception as e: + logger.warning( + "memoir_pipeline_progress merge failed correlation_id={} err={}", + cid, + e, + ) + + +def init_pipeline_run_from_phase1( + user_id: str, + correlation_id: str, + phase1_task_id: str, + *, + segment_count: int, +) -> None: + cid = (correlation_id or "").strip() + uid = (user_id or "").strip() + tid = (phase1_task_id or "").strip() + if not cid or not uid or not tid: + return + try: + r = _redis() + now = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z") + doc = { + "memoir_correlation_id": cid, + "user_id": uid, + "started_at_utc": now, + "phase1": { + "task_id": tid, + "status": "running", + "step": "started", + "detail": {"segment_count": int(segment_count)}, + }, + "phase2": [], + "fanout": _empty_fanout(), + } + ttl = _ttl() + r.setex(_run_key(cid), ttl, json.dumps(doc, ensure_ascii=False)) + r.setex(_phase1_index_key(tid), ttl, cid) + except Exception as e: + logger.warning( + "memoir_pipeline_progress init failed correlation_id={} err={}", + cid, + e, + ) + + +def get_pipeline_run_snapshot(correlation_id: str) -> dict[str, Any] | None: + cid = (correlation_id or "").strip() + if not cid: + return None + try: + raw = _redis().get(_run_key(cid)) + if not raw: + return None + return json.loads(raw) + except Exception as e: + logger.warning( + "memoir_pipeline_progress get failed correlation_id={} err={}", + cid, + e, + ) + return None + + +def resolve_correlation_id_for_phase1_task(phase1_task_id: str) -> str | None: + tid = (phase1_task_id or "").strip() + if not tid: + return None + try: + cid = _redis().get(_phase1_index_key(tid)) + return (cid or "").strip() or None + except Exception as e: + logger.warning( + "memoir_pipeline_progress resolve phase1_task={} err={}", + tid, + e, + ) + return None + + +def get_pipeline_run_for_eval( + user_id: str, + *, + memoir_correlation_id: str | None = None, + phase1_task_id: str | None = None, +) -> dict[str, Any] | None: + """Internal eval:校验 user_id 与快照一致后返回。""" + uid = (user_id or "").strip() + if not uid: + return None + cid = (memoir_correlation_id or "").strip() + if not cid and phase1_task_id: + cid = resolve_correlation_id_for_phase1_task(phase1_task_id) or "" + if not cid: + return None + snap = get_pipeline_run_snapshot(cid) + if not snap: + return None + if str(snap.get("user_id") or "").strip() != uid: + return None + return snap + + +def merge_fanout_item( + correlation_id: str | None, + *, + list_name: str, + id_field: str, + item_id: str, + task_id: str, + status: str, + extra: dict[str, Any] | None = None, +) -> None: + cid = (correlation_id or "").strip() + if not cid: + return + item: dict[str, Any] = { + id_field: item_id, + "task_id": task_id, + "status": status, + } + if extra: + item.update(extra) + merge_pipeline_run(cid, {"fanout": {list_name: [item]}}) diff --git a/api/app/features/conversation/ws/connection_manager.py b/api/app/features/conversation/ws/connection_manager.py index d43fef7..43ddd05 100644 --- a/api/app/features/conversation/ws/connection_manager.py +++ b/api/app/features/conversation/ws/connection_manager.py @@ -32,7 +32,9 @@ class ConnectionManager: try: await websocket.send_json(message) except (RuntimeError, Exception) as e: - logger.warning(f"发送消息失败 (conversation_id={conversation_id}): {e}") + logger.warning( + "发送消息失败 (conversation_id={}): {}", conversation_id, e + ) if conversation_id in self.active_connections: del self.active_connections[conversation_id] diff --git a/api/app/features/conversation/ws/pipeline.py b/api/app/features/conversation/ws/pipeline.py index a25a74d..85dd847 100644 --- a/api/app/features/conversation/ws/pipeline.py +++ b/api/app/features/conversation/ws/pipeline.py @@ -873,4 +873,4 @@ async def process_conversation_segments( len(segment_ids), ) except Exception as e: - logger.error(f"提交 Celery 任务失败: {e}") + logger.error("提交 Celery 任务失败: {}", e) diff --git a/api/app/features/conversation/ws/router.py b/api/app/features/conversation/ws/router.py index 5d85dc2..8253441 100644 --- a/api/app/features/conversation/ws/router.py +++ b/api/app/features/conversation/ws/router.py @@ -190,7 +190,7 @@ async def websocket_endpoint( if i < ng - 1: await asyncio.sleep(0.5) except Exception as e: - logger.error(f"发送资料收集开场白失败: {e}", exc_info=True) + logger.exception("发送资料收集开场白失败: {}", e) else: try: state = memoir_state @@ -239,7 +239,7 @@ async def websocket_endpoint( if i < no - 1: await asyncio.sleep(0.5) except Exception as e: - logger.error(f"发送空对话开场白失败: {e}", exc_info=True) + logger.exception("发送空对话开场白失败: {}", e) while True: try: @@ -597,7 +597,7 @@ async def websocket_endpoint( ) except Exception as e: - logger.error(f"处理音频消息失败: {e}", exc_info=True) + logger.exception("处理音频消息失败: {}", e) await manager.send_message( conversation_id, { @@ -638,7 +638,7 @@ async def websocket_endpoint( }, ) except Exception as e: - logger.error(f"仅转写失败: {e}", exc_info=True) + logger.exception("仅转写失败: {}", e) await manager.send_message( conversation_id, { @@ -703,7 +703,7 @@ async def websocket_endpoint( ) break else: - logger.error(f"处理消息时发生 RuntimeError: {e}", exc_info=True) + logger.exception("处理消息时发生 RuntimeError: {}", e) if conversation_id in manager.active_connections: try: await manager.send_message( @@ -717,7 +717,7 @@ async def websocket_endpoint( }, ) except Exception as send_error: - logger.warning(f"发送错误消息失败: {send_error}") + logger.warning("发送错误消息失败: {}", send_error) break except WebSocketDisconnect as disc: logger.info( @@ -727,7 +727,7 @@ async def websocket_endpoint( ) break except Exception as e: - logger.error(f"处理消息时发生错误: {e}", exc_info=True) + logger.exception("处理消息时发生错误: {}", e) if conversation_id in manager.active_connections: try: await manager.send_message( @@ -739,7 +739,7 @@ async def websocket_endpoint( }, ) except Exception as send_error: - logger.warning(f"发送错误消息失败: {send_error}") + logger.warning("发送错误消息失败: {}", send_error) break except WebSocketDisconnect as disc: @@ -751,7 +751,7 @@ async def websocket_endpoint( await manager.disconnect(conversation_id) cleanup_segment_states(conversation_id) except Exception as e: - logger.error(f"WebSocket 端点发生错误: {e}", exc_info=True) + logger.exception("WebSocket 端点发生错误: {}", e) await manager.disconnect(conversation_id) cleanup_segment_states(conversation_id) finally: diff --git a/api/app/features/evaluation/eval_trace_format.py b/api/app/features/evaluation/eval_trace_format.py index cdb11ed..2e0fa26 100644 --- a/api/app/features/evaluation/eval_trace_format.py +++ b/api/app/features/evaluation/eval_trace_format.py @@ -2,6 +2,7 @@ from __future__ import annotations +from app.core.config import settings from app.features.conversation.models import Segment from app.features.evaluation.eval_trace_schemas import ( ChapterEvidenceBundle, @@ -16,9 +17,10 @@ from app.features.memory.models import ( TimelineEvent, ) -# 与 judge_service._MEMOIR_EVIDENCE_MAX 对齐:访谈与结构化证据分预算,避免总长失控 -_MEMOIR_TRANSCRIPT_CAP = 12_000 -_MEMOIR_STRUCTURED_CAP = 12_000 + +def _memoir_evidence_char_cap() -> int: + """与 ``Settings.eval_judge_memoir_evidence_max_chars`` 对齐。""" + return max(1000, int(settings.eval_judge_memoir_evidence_max_chars)) def _approx_tokens(chars: int) -> int: @@ -75,11 +77,12 @@ def build_structured_evidence_text( facts: list[MemoryFact], events: list[TimelineEvent], summaries: list[MemorySummary], - max_chars: int = _MEMOIR_STRUCTURED_CAP, + max_chars: int | None = None, ) -> tuple[str, bool, list[str]]: """ 结构化记忆证据块;返回 (text, truncated, dropped_section_tags)。 """ + cap = max_chars if max_chars is not None else _memoir_evidence_char_cap() parts: list[str] = [] dropped: list[str] = [] used = 0 @@ -90,7 +93,7 @@ def build_structured_evidence_text( block = f"{title}\n{body}".strip() if not block: return - if used + len(block) + 2 > max_chars: + if used + len(block) + 2 > cap: truncated = True dropped.append(title.strip("【】").split("·")[0].strip()) return @@ -172,23 +175,22 @@ def format_chapter_for_judge( events: list[TimelineEvent], summaries: list[MemorySummary], ) -> FormattedMemoirEvidence: - t_cap = _MEMOIR_TRANSCRIPT_CAP - s_cap = _MEMOIR_STRUCTURED_CAP + ev_cap = _memoir_evidence_char_cap() dropped: list[str] = [] truncated = False t_in = transcript.strip() - if len(t_in) > t_cap: + if len(t_in) > ev_cap: truncated = True dropped.append("source_transcript_tail") - t_in = t_in[:t_cap] + "\n\n…(原始对话证据已截断)" + t_in = t_in[:ev_cap] + "\n\n…(原始对话证据已截断)" struct, s_trunc, s_drop = build_structured_evidence_text( chunks=chunks, facts=facts, events=events, summaries=summaries, - max_chars=s_cap, + max_chars=ev_cap, ) if s_trunc: truncated = True @@ -228,23 +230,22 @@ def format_story_for_judge( events: list[TimelineEvent], summaries: list[MemorySummary], ) -> FormattedMemoirEvidence: - t_cap = _MEMOIR_TRANSCRIPT_CAP - s_cap = _MEMOIR_STRUCTURED_CAP + ev_cap = _memoir_evidence_char_cap() dropped: list[str] = [] truncated = False t_in = transcript.strip() - if len(t_in) > t_cap: + if len(t_in) > ev_cap: truncated = True dropped.append("source_transcript_tail") - t_in = t_in[:t_cap] + "\n\n…(原始对话证据已截断)" + t_in = t_in[:ev_cap] + "\n\n…(原始对话证据已截断)" struct, s_trunc, s_drop = build_structured_evidence_text( chunks=chunks, facts=facts, events=events, summaries=summaries, - max_chars=s_cap, + max_chars=ev_cap, ) if s_trunc: truncated = True diff --git a/api/app/features/evaluation/judge_manual_service.py b/api/app/features/evaluation/judge_manual_service.py index 0af17c4..271a403 100644 --- a/api/app/features/evaluation/judge_manual_service.py +++ b/api/app/features/evaluation/judge_manual_service.py @@ -2,6 +2,7 @@ from __future__ import annotations +import asyncio import copy import re from collections.abc import AsyncIterator @@ -10,19 +11,20 @@ from typing import Any from sqlalchemy.ext.asyncio import AsyncSession +from app.core.config import settings from app.core.dependencies import ( EvalJudgeProvider, build_eval_judge_llm_spec, ) from app.core.logging import get_logger from app.features.conversation import repo as conversation_repo +from app.features.evaluation.conversation_compare_summary import ( + build_conversation_compare_summary, +) from app.features.evaluation.errors import ( EvaluationBadRequestError, EvaluationNotFoundError, ) -from app.features.evaluation.conversation_compare_summary import ( - build_conversation_compare_summary, -) from app.features.evaluation.eval_trace_service import EvalTraceService from app.features.evaluation.judge_schemas import ConversationJudgeOutput from app.features.evaluation.judge_service import ( @@ -30,6 +32,9 @@ from app.features.evaluation.judge_service import ( eval_judge_compare_transcript_each_max_chars_for_context, eval_judge_conversation_transcript_max_chars_for_context, ) +from app.features.evaluation.memoir_compare_summary import ( + build_memoir_compare_summary, +) from app.features.evaluation.schemas import MemoirSectionBaselineOut from app.features.evaluation.session_catalog_service import SessionCatalogService from app.features.evaluation.transcript_for_judge import ( @@ -45,9 +50,8 @@ from app.features.story.repo import get_stories_for_user logger = get_logger(__name__) -_MAX_JUDGE_MARKDOWN_CHARS = 20_000 _MAX_EVAL_CHAPTERS = 30 -_MAX_EVAL_STORIES = 40 +_MAX_EVAL_STORIES = 40 # memoir_snapshot 等仍限幅 _PRIOR_TRANSCRIPT_MAX_CHARS = 8000 _JUDGE_CONFIG_HINT = ( @@ -117,11 +121,16 @@ async def _iter_turn_judgments_for_turns( prior_blocks.append(format_eval_turn_block(idx, u, reply)) -def _clip_md_for_judge(text: str, max_chars: int = _MAX_JUDGE_MARKDOWN_CHARS) -> str: +def _clip_md_for_judge(text: str, max_chars: int | None = None) -> str: + cap = ( + max_chars + if max_chars is not None + else max(1000, int(settings.eval_judge_memoir_body_max_chars)) + ) s = (text or "").strip() - if len(s) <= max_chars: + if len(s) <= cap: return s - return f"{s[:max_chars]}\n\n…(已截断供评审)" + return f"{s[:cap]}\n\n…(已截断供评审)" async def _conversation_transcript_for_manual( @@ -675,7 +684,7 @@ class EvalJudgeManualService: if not uid: raise EvaluationBadRequestError("user_id is required") - judge, _resolved = _make_eval_judge(judge_provider, judge_model) + judge, resolved_model = _make_eval_judge(judge_provider, judge_model) if not judge: raise EvaluationBadRequestError(_JUDGE_CONFIG_HINT) baselines = list(baseline_sections or []) @@ -692,96 +701,408 @@ class EvalJudgeManualService: ) chapter_results: list[dict[str, Any]] = [] + errors: list[str] = [] try: chapters = await get_chapters_for_memoir_list( uid, self._db, active_only=True, is_new_only=None ) - for i, ch in enumerate(chapters[:_MAX_EVAL_CHAPTERS]): - body = (ch.canonical_markdown or "").strip() - if not body: - continue - bl = _baseline_for_chapter_title(baselines, str(ch.title or ""), i) - baseline_excerpt = "" - if bl and (bl.body or "").strip(): - baseline_excerpt = _clip_md_for_judge(bl.body, max_chars=6000) - md = f"# 章节:{ch.title}\n\n" - if baseline_excerpt: - md += f"## 导出基线(节选)\n\n{baseline_excerpt}\n\n" - md += f"## 当前成稿\n\n{_clip_md_for_judge(body)}" + except Exception as e: + logger.exception("manual memoir: chapter list failed user_id={}", uid) + errors.append(f"加载章节列表失败:{e}") + chapters = [] + + def _nonempty_chapters(cols: list[Any]) -> int: + return sum( + 1 for x in cols if (getattr(x, "canonical_markdown", None) or "").strip() + ) + + conc = max(1, min(32, int(settings.eval_judge_memoir_chapter_concurrency))) + logger.info( + "event=eval_memoir_judge_start user_id={} judge_provider={} judge_model={} " + "chapters_total={} chapters_nonempty={} chapter_concurrency={}", + uid, + judge_provider, + resolved_model or "", + len(chapters), + _nonempty_chapters(chapters[:_MAX_EVAL_CHAPTERS]), + conc, + ) + + prepared: list[dict[str, Any]] = [] + enum_idx = 0 + for i, ch in enumerate(chapters[:_MAX_EVAL_CHAPTERS]): + body = (ch.canonical_markdown or "").strip() + if not body: + continue + bl = _baseline_for_chapter_title(baselines, str(ch.title or ""), i) + baseline_excerpt = "" + if bl and (bl.body or "").strip(): + baseline_excerpt = _clip_md_for_judge( + bl.body, + max_chars=max( + 1000, int(settings.eval_judge_memoir_evidence_max_chars) + ), + ) + md = f"# 章节:{ch.title}\n\n{_clip_md_for_judge(body)}" + try: cb = await trace_svc.build_chapter_bundle(uid, ch) formatted, cb2 = await trace_svc.format_chapter_bundle(cb) fm = formatted.format_meta - cj = await judge.judge_memoir( - memoir_markdown=md, - source_transcript=formatted.source_transcript, - structured_evidence=formatted.structured_evidence, - reference_memoir_markdown=baseline_excerpt, - evidence_notes=_chapter_evidence_notes( - cb2.lineage_tier, - formatted.evidence_summary, - fm.truncated, - fm.dropped_sections, - ), - ) - chapter_results.append( + prepared.append( { - "id": ch.id, - "title": ch.title, - "order_index": ch.order_index, - "baseline_title": bl.title if bl else None, - "lineage_tier": cb2.lineage_tier, - "evidence_summary": formatted.evidence_summary, - "evidence_trace": cb2.model_dump(), - "format_meta": fm.model_dump(), - "judge": cj.model_dump() if cj else None, + "enum_idx": enum_idx, + "ch": ch, + "bl": bl, + "md": md, + "baseline_excerpt": baseline_excerpt, + "formatted": formatted, + "cb2": cb2, + "fm": fm, } ) - except Exception as e: - logger.warning("manual memoir chapter judges failed: {}", e) + enum_idx += 1 + except Exception as e: + logger.exception( + "manual memoir: chapter prepare failed user_id={} chapter_id={}", + uid, + ch.id, + ) + label = str(ch.title or ch.id) + errors.append(f"章节「{label}」证据打包失败:{e}") + + sem = asyncio.Semaphore(conc) + + async def _judge_one(payload: dict[str, Any]) -> dict[str, Any]: + async with sem: + ch = payload["ch"] + formatted = payload["formatted"] + cb2 = payload["cb2"] + fm = payload["fm"] + baseline_excerpt = payload["baseline_excerpt"] + md = payload["md"] + bl = payload["bl"] + ch_label = str(ch.title or ch.id) + row_errs: list[str] = [] + try: + cj_res = await judge.judge_memoir_result( + memoir_markdown=md, + source_transcript=formatted.source_transcript, + structured_evidence=formatted.structured_evidence, + reference_memoir_markdown=baseline_excerpt, + evidence_notes=_chapter_evidence_notes( + cb2.lineage_tier, + formatted.evidence_summary, + fm.truncated, + fm.dropped_sections, + ), + ) + except Exception as e: + logger.exception( + "manual memoir: chapter judge failed user_id={} chapter_id={}", + uid, + ch.id, + ) + return { + "enum_idx": payload["enum_idx"], + "order_index": ch.order_index, + "row": None, + "errors": [ + *row_errs, + f"章节「{ch_label}」评审失败:{e}", + ], + } + cj = cj_res.output + row: dict[str, Any] = { + "id": ch.id, + "title": ch.title, + "order_index": ch.order_index, + "baseline_title": bl.title if bl else None, + "lineage_tier": cb2.lineage_tier, + "evidence_summary": formatted.evidence_summary, + "evidence_trace": cb2.model_dump(), + "format_meta": fm.model_dump(), + "judge": cj.model_dump() if cj else None, + } + if cj_res.error: + row["judge_error"] = cj_res.error + row_errs.append(f"章节「{ch_label}」LLM 评审失败:{cj_res.error}") + logger.info( + "event=eval_memoir_chapter_judge_failed user_id={} chapter_id={} msg={}", + uid, + ch.id, + cj_res.error, + ) + elif not cj: + row["judge_error"] = "empty_output" + row_errs.append(f"章节「{ch_label}」评审返回空结果") + logger.info( + "event=eval_memoir_chapter_judge_empty user_id={} chapter_id={}", + uid, + ch.id, + ) + return { + "enum_idx": payload["enum_idx"], + "order_index": ch.order_index, + "row": row, + "errors": row_errs, + } + + judged = await asyncio.gather(*[_judge_one(p) for p in prepared]) + judged.sort( + key=lambda r: ( + r["order_index"] + if r["order_index"] is not None + else 10**9, + r["enum_idx"], + ) + ) + for r in judged: + errors.extend(r["errors"]) + if r["row"] is not None: + chapter_results.append(r["row"]) story_results: list[dict[str, Any]] = [] - try: - stories = await get_stories_for_user(self._db, uid, status="active") - for st in stories[:_MAX_EVAL_STORIES]: - body = (st.canonical_markdown or "").strip() - if not body: - continue - md = f"# 故事:{st.title}\n\n{_clip_md_for_judge(body)}" - sb = await trace_svc.build_story_bundle(uid, str(st.id)) - formatted, sb2 = await trace_svc.format_story_bundle(sb) - fm = formatted.format_meta - sj = await judge.judge_memoir( - memoir_markdown=md, - source_transcript=formatted.source_transcript, - structured_evidence=formatted.structured_evidence, - evidence_notes=_chapter_evidence_notes( - sb2.lineage_tier, - formatted.evidence_summary, - fm.truncated, - fm.dropped_sections, - ), - ) - story_results.append( - { - "id": st.id, - "title": st.title, - "stage": st.stage, - "lineage_tier": sb2.lineage_tier, - "evidence_summary": formatted.evidence_summary, - "evidence_trace": sb2.model_dump(), - "format_meta": fm.model_dump(), - "judge": sj.model_dump() if sj else None, - } - ) - except Exception as e: - logger.warning("manual memoir story judges failed: {}", e) + + warnings: list[str] = [] + if not chapter_results and not errors: + warnings.append( + "未发现可评分的回忆录章节。请确认该用户存在 active 章节且 " + "canonical_markdown 非空;需要与导出对照时请加载带章节的 user export 作为基线。" + ) + + logger.info( + "event=eval_memoir_judge_done user_id={} chapter_rows={} story_rows={} " + "errors={} warnings={}", + uid, + len(chapter_results), + 0, + len(errors), + len(warnings), + ) return { "user_id": uid, + "judge_provider": judge_provider, + "judge_model": resolved_model or "", "chapter_results": chapter_results, "story_results": story_results, + "errors": errors, + "warnings": warnings, } + async def iter_memoir_chapter_judge_sse( + self, + user_id: str, + baseline_sections: list[MemoirSectionBaselineOut] | None, + *, + judge_provider: EvalJudgeProvider = "zhipu", + judge_model: str | None = None, + ) -> AsyncIterator[dict[str, Any]]: + """Streaming SSE: one event per chapter judge result, concurrent LLM calls.""" + uid = (user_id or "").strip() + if not uid: + yield {"event": "error", "phase": "validate", "message": "user_id is required"} + return + + judge, resolved_model = _make_eval_judge(judge_provider, judge_model) + if not judge: + yield {"event": "error", "phase": "config", "message": _JUDGE_CONFIG_HINT} + return + + baselines = list(baseline_sections or []) + trace_svc = EvalTraceService(self._db) + + def _chapter_evidence_notes( + lineage_tier: str, evidence_summary: str, truncated: bool, dropped: list[str] + ) -> str: + drops = ",".join(dropped[:12]) if dropped else "" + return ( + "严格按文档打分;真实性、事实覆盖率、可追溯性以本章节绑定的证据闭包为准。" + f" lineage_tier={lineage_tier};evidence_summary={evidence_summary};" + f" prompt_truncated={truncated};dropped_sections={drops or 'none'}" + ) + + try: + chapters = await get_chapters_for_memoir_list( + uid, self._db, active_only=True, is_new_only=None + ) + except Exception as e: + logger.exception("manual memoir stream: chapter list failed user_id={}", uid) + yield {"event": "error", "phase": "load", "message": f"加载章节列表失败:{e}"} + return + + yield { + "event": "meta", + "user_id": uid, + "judge_provider": judge_provider, + "judge_model": resolved_model or "", + "total_chapters": len(chapters), + } + + prepared: list[dict[str, Any]] = [] + for i, ch in enumerate(chapters[:_MAX_EVAL_CHAPTERS]): + body = (ch.canonical_markdown or "").strip() + if not body: + continue + bl = _baseline_for_chapter_title(baselines, str(ch.title or ""), i) + baseline_excerpt = "" + if bl and (bl.body or "").strip(): + baseline_excerpt = _clip_md_for_judge( + bl.body, + max_chars=max( + 1000, int(settings.eval_judge_memoir_evidence_max_chars) + ), + ) + md = f"# 章节:{ch.title}\n\n{_clip_md_for_judge(body)}" + try: + cb = await trace_svc.build_chapter_bundle(uid, ch) + formatted, cb2 = await trace_svc.format_chapter_bundle(cb) + fm = formatted.format_meta + prepared.append({ + "ch": ch, "bl": bl, "md": md, + "baseline_excerpt": baseline_excerpt, + "formatted": formatted, "cb2": cb2, "fm": fm, + }) + except Exception as e: + logger.exception( + "manual memoir stream: chapter prepare failed user_id={} chapter_id={}", + uid, ch.id, + ) + yield { + "event": "chapter_error", + "chapter_id": ch.id, + "title": ch.title, + "message": f"证据打包失败:{e}", + } + + if not prepared: + yield { + "event": "warning", + "message": "未发现可评分的回忆录章节(成稿为空或无 active 章节)。", + } + + yield {"event": "chapters_prepared", "count": len(prepared)} + + conc = max(1, min(32, int(settings.eval_judge_memoir_chapter_concurrency))) + sem = asyncio.Semaphore(conc) + result_queue: asyncio.Queue[dict[str, Any] | None] = asyncio.Queue() + + async def _judge_one(idx: int, payload: dict[str, Any]) -> None: + async with sem: + ch = payload["ch"] + formatted = payload["formatted"] + cb2 = payload["cb2"] + fm = payload["fm"] + baseline_excerpt = payload["baseline_excerpt"] + md = payload["md"] + bl = payload["bl"] + ev_notes = _chapter_evidence_notes( + cb2.lineage_tier, + formatted.evidence_summary, + fm.truncated, + fm.dropped_sections, + ) + + baseline_judge_obj = None + baseline_judge_dict = None + baseline_error: str | None = None + if baseline_excerpt: + try: + bl_md = f"# 章节:{bl.title if bl else ch.title}\n\n{baseline_excerpt}" + bl_res = await judge.judge_memoir_result( + memoir_markdown=bl_md, + source_transcript=formatted.source_transcript, + structured_evidence=formatted.structured_evidence, + evidence_notes=ev_notes, + ) + baseline_judge_obj = bl_res.output + baseline_judge_dict = ( + baseline_judge_obj.model_dump() + if baseline_judge_obj + else None + ) + if bl_res.error: + baseline_error = bl_res.error + except Exception as exc: + logger.warning( + "memoir stream: baseline judge failed ch={} err={}", + ch.id, exc, + ) + baseline_error = str(exc) + + try: + cj_res = await judge.judge_memoir_result( + memoir_markdown=md, + source_transcript=formatted.source_transcript, + structured_evidence=formatted.structured_evidence, + reference_memoir_markdown=baseline_excerpt, + evidence_notes=ev_notes, + ) + except Exception as e: + logger.exception( + "manual memoir stream: chapter judge failed user_id={} chapter_id={}", + uid, ch.id, + ) + await result_queue.put({ + "event": "chapter_error", + "chapter_id": ch.id, + "title": ch.title, + "message": f"评审失败:{e}", + }) + return + cj = cj_res.output + compare_summary = build_memoir_compare_summary( + baseline_judge=baseline_judge_obj, + chapter_judge=cj, + ) + row: dict[str, Any] = { + "id": ch.id, + "title": ch.title, + "order_index": ch.order_index, + "baseline_title": bl.title if bl else None, + "lineage_tier": cb2.lineage_tier, + "evidence_summary": formatted.evidence_summary, + "evidence_trace": cb2.model_dump(), + "format_meta": fm.model_dump(), + "baseline_judge": baseline_judge_dict, + "judge": cj.model_dump() if cj else None, + "compare_summary": compare_summary, + } + if baseline_error: + row["baseline_judge_error"] = baseline_error + if cj_res.error: + row["judge_error"] = cj_res.error + if not cj and not cj_res.error: + row["judge_error"] = "empty_output" + await result_queue.put({ + "event": "chapter_judge", + "index": idx, + "chapter": row, + "ok": cj is not None, + }) + + tasks = [asyncio.create_task(_judge_one(i, p)) for i, p in enumerate(prepared)] + + finished = 0 + total = len(tasks) + while finished < total: + item = await result_queue.get() + if item is not None: + yield item + finished_now = sum(1 for t in tasks if t.done()) + if finished_now > finished: + finished = finished_now + + for t in tasks: + await t + + while not result_queue.empty(): + item = result_queue.get_nowait() + if item is not None: + yield item + + yield {"event": "done"} + async def memoir_snapshot(self, user_id: str) -> dict[str, Any]: uid = (user_id or "").strip() if not uid: diff --git a/api/app/features/evaluation/judge_schemas.py b/api/app/features/evaluation/judge_schemas.py index 0ce95a8..615e655 100644 --- a/api/app/features/evaluation/judge_schemas.py +++ b/api/app/features/evaluation/judge_schemas.py @@ -45,6 +45,14 @@ def _is_judge_list_placeholder_empty(s: str) -> bool: return False +def _safe_int_bounds(value: Any, *, default: int, ge: int, le: int) -> int: + try: + v = int(value) + except (TypeError, ValueError): + return default + return max(ge, min(le, v)) + + def _coerce_judge_str_list(value: Any) -> list[Any]: """将评审 JSON 中的 list[str] 字段从 str / null 规范为列表(兼容 GLM-5 等输出的非数组形态)。""" if value is None: @@ -176,57 +184,87 @@ class TurnJudgeOutput(BaseModel): ConversationJudgeOutput = TurnJudgeOutput +# 评审 LLM 常把细项打成「略超满分」的浮点;先钳制再校验,避免整 JSON 丢弃。 +_MEMOIR_LEAF_SCORE_BOUNDS: dict[str, tuple[float, float]] = { + "mem_fidelity": (0, 9), + "mem_factual_accuracy": (0, 5), + "mem_factual_coverage": (0, 5), + "mem_traceability": (0, 4), + "info_slot_coverage": (0, 6), + "info_sufficiency": (0, 4), + "info_density": (0, 4), + "narr_structure": (0, 6), + "narr_paragraphs": (0, 5), + "narr_pacing": (0, 3), + "lang_fluency": (0, 3), + "lang_conciseness": (0, 3), + "lang_literary": (0, 4), + "lang_controlled_expansion": (0, 4), + "lang_detail": (0, 2), + "lang_style": (0, 2), + "emo_authenticity": (0, 5), + "emo_depth": (0, 4), + "char_understanding": (0, 4), + "char_consistency": (0, 3), + "char_integration": (0, 2), + "coh_timeline": (0, 2), + "coh_cross_chapter": (0, 2), + "rich_analogy": (0, 3), + "rich_diversity": (0, 2), + "pub_editorial_cost": (0, 2), + "pub_completeness": (0, 2), +} + + class MemoirJudgeOutput(BaseModel): - """成稿回忆录评分(总分 100,子项上限见 rubric)。""" + """成稿回忆录评分(总分 100,子项上限见 rubric)。 + + 产品优先保留 **文字**(对照说明、改进建议):细项分值允许模型乱写,入模时先放宽到 + ``0–100``,再在 ``mode=\"after\"`` 中按 rubric 上限钳制并重算 total,避免因分数校验丢整段 JSON。 + """ model_config = ConfigDict(extra="ignore") - # 一、真实性与覆盖(小计最高 23;由原 25 收紧) - mem_fidelity: float = Field(ge=0, le=9, description="记忆忠实度") - mem_factual_accuracy: float = Field(ge=0, le=5, description="事实准确性") - mem_factual_coverage: float = Field(ge=0, le=5, description="事实覆盖率") - mem_traceability: float = Field(ge=0, le=4, description="记忆可追溯性") + # 细项:校验放宽到 0–100;真实满分仍以 rubric 为准,由 after 钳制 + mem_fidelity: float = Field(default=0, ge=0, le=100, description="记忆忠实度") + mem_factual_accuracy: float = Field(default=0, ge=0, le=100, description="事实准确性") + mem_factual_coverage: float = Field(default=0, ge=0, le=100, description="事实覆盖率") + mem_traceability: float = Field(default=0, ge=0, le=100, description="记忆可追溯性") - # 二、信息质量(小计最高 14;由原 15 收紧) - info_slot_coverage: float = Field(ge=0, le=6, description="槽位覆盖度") - info_sufficiency: float = Field(ge=0, le=4, description="信息充分性") - info_density: float = Field(ge=0, le=4, description="信息密度") + info_slot_coverage: float = Field(default=0, ge=0, le=100, description="槽位覆盖度") + info_sufficiency: float = Field(default=0, ge=0, le=100, description="信息充分性") + info_density: float = Field(default=0, ge=0, le=100, description="信息密度") - # 三、叙事结构(小计最高 14;由原 15 收紧) - narr_structure: float = Field(ge=0, le=6, description="故事结构") - narr_paragraphs: float = Field(ge=0, le=5, description="段落组织") - narr_pacing: float = Field(ge=0, le=3, description="节奏控制") + narr_structure: float = Field(default=0, ge=0, le=100, description="故事结构") + narr_paragraphs: float = Field(default=0, ge=0, le=100, description="段落组织") + narr_pacing: float = Field(default=0, ge=0, le=100, description="节奏控制") - # 四、语言与文笔(小计最高 18;由原 20 及六项上限一并收紧) - lang_fluency: float = Field(ge=0, le=3, description="语言流畅度") - lang_conciseness: float = Field(ge=0, le=3, description="表达精炼度") - lang_literary: float = Field(ge=0, le=4, description="文笔质量") - lang_controlled_expansion: float = Field(ge=0, le=4, description="控制性扩写能力") - lang_detail: float = Field(ge=0, le=2, description="细节还原与强化") - lang_style: float = Field(ge=0, le=2, description="风格一致性") + lang_fluency: float = Field(default=0, ge=0, le=100, description="语言流畅度") + lang_conciseness: float = Field(default=0, ge=0, le=100, description="表达精炼度") + lang_literary: float = Field(default=0, ge=0, le=100, description="文笔质量") + lang_controlled_expansion: float = Field( + default=0, ge=0, le=100, description="控制性扩写能力" + ) + lang_detail: float = Field(default=0, ge=0, le=100, description="细节还原与强化") + lang_style: float = Field(default=0, ge=0, le=100, description="风格一致性") - # 五、情感表达(小计最高 9;由原 10 收紧) - emo_authenticity: float = Field(ge=0, le=5, description="情感真实度") - emo_depth: float = Field(ge=0, le=4, description="情感深度") + emo_authenticity: float = Field(default=0, ge=0, le=100, description="情感真实度") + emo_depth: float = Field(default=0, ge=0, le=100, description="情感深度") - # 六、人物建模(小计最高 9;由原 10 收紧) - char_understanding: float = Field(ge=0, le=4, description="人物理解") - char_consistency: float = Field(ge=0, le=3, description="人物一致性") - char_integration: float = Field(ge=0, le=2, description="人物融入度") + char_understanding: float = Field(default=0, ge=0, le=100, description="人物理解") + char_consistency: float = Field(default=0, ge=0, le=100, description="人物一致性") + char_integration: float = Field(default=0, ge=0, le=100, description="人物融入度") - # 七、连贯性(小计最高 4;由原 5 收紧) - coh_timeline: float = Field(ge=0, le=2, description="时间线一致性") - coh_cross_chapter: float = Field(ge=0, le=2, description="跨章节关联") + coh_timeline: float = Field(default=0, ge=0, le=100, description="时间线一致性") + coh_cross_chapter: float = Field(default=0, ge=0, le=100, description="跨章节关联") - # 八、表达丰富度(小计最高 5) - rich_analogy: float = Field(ge=0, le=3, description="类比与引用") - rich_diversity: float = Field(ge=0, le=2, description="表达多样性") + rich_analogy: float = Field(default=0, ge=0, le=100, description="类比与引用") + rich_diversity: float = Field(default=0, ge=0, le=100, description="表达多样性") - # 九、出版就绪度(小计最高 4;由原 5 收紧) - pub_editorial_cost: float = Field(ge=0, le=2, description="编辑成本") - pub_completeness: float = Field(ge=0, le=2, description="完整度") + pub_editorial_cost: float = Field(default=0, ge=0, le=100, description="编辑成本") + pub_completeness: float = Field(default=0, ge=0, le=100, description="完整度") - total_score: float = Field(ge=0, le=100) + total_score: float = Field(default=0, ge=0, le=100) rationale: str = "" major_strengths: list[str] = Field(default_factory=list) @@ -235,24 +273,80 @@ class MemoirJudgeOutput(BaseModel): evidence_refs: list[JudgeEvidenceRef] = Field(default_factory=list) confidence: float = Field(default=0.75, ge=0.0, le=1.0) - authenticity_score: float = Field(default=0, ge=0, le=23) - information_score: float = Field(default=0, ge=0, le=14) - narrative_score: float = Field(default=0, ge=0, le=14) - language_score: float = Field(default=0, ge=0, le=18) - emotion_score: float = Field(default=0, ge=0, le=9) - character_score: float = Field(default=0, ge=0, le=9) - coherence_score: float = Field(default=0, ge=0, le=4) - richness_score: float = Field(default=0, ge=0, le=5) - publish_ready_score: float = Field(default=0, ge=0, le=4) + authenticity_score: float = Field(default=0, ge=0, le=100) + information_score: float = Field(default=0, ge=0, le=100) + narrative_score: float = Field(default=0, ge=0, le=100) + language_score: float = Field(default=0, ge=0, le=100) + emotion_score: float = Field(default=0, ge=0, le=100) + character_score: float = Field(default=0, ge=0, le=100) + coherence_score: float = Field(default=0, ge=0, le=100) + richness_score: float = Field(default=0, ge=0, le=100) + publish_ready_score: float = Field(default=0, ge=0, le=100) @model_validator(mode="before") @classmethod - def _coerce_null_lists(cls, data: Any) -> Any: - if isinstance(data, dict): - for key in ("major_strengths", "major_issues", "insufficient_evidence"): - data[key] = _coerce_judge_str_list(data.get(key)) - if data.get("evidence_refs") is None: - data["evidence_refs"] = [] + def _coerce_memoir_judge_input(cls, data: Any) -> Any: + if not isinstance(data, dict): + return data + data["rationale"] = "" if data.get("rationale") is None else str(data["rationale"]) + for key in ("major_strengths", "major_issues", "insufficient_evidence"): + data[key] = _coerce_judge_str_list(data.get(key)) + raw_refs = data.get("evidence_refs") + if not isinstance(raw_refs, list): + data["evidence_refs"] = [] + else: + clean: list[dict[str, Any]] = [] + for item in raw_refs: + if not isinstance(item, dict): + continue + clean.append( + { + "dimension": str(item.get("dimension", ""))[:200], + "turn_index": _safe_int_bounds( + item.get("turn_index"), default=-1, ge=-1, le=500_000 + ), + "snippet": str(item.get("snippet", ""))[:400], + } + ) + data["evidence_refs"] = clean + + def _loose_score(v: Any) -> float: + if v is None: + return 0.0 + try: + x = float(v) + except (TypeError, ValueError): + return 0.0 + if x != x or x in (float("inf"), float("-inf")): + return 0.0 + return max(0.0, min(100.0, x)) + + for fname in _MEMOIR_LEAF_SCORE_BOUNDS: + data[fname] = _loose_score(data.get(fname)) + _agg_keys = ( + "authenticity_score", + "information_score", + "narrative_score", + "language_score", + "emotion_score", + "character_score", + "coherence_score", + "richness_score", + "publish_ready_score", + "total_score", + ) + for fname in _agg_keys: + if fname not in data or data[fname] is None: + continue + data[fname] = _loose_score(data[fname]) + if "confidence" in data and data["confidence"] is not None: + try: + c = float(data["confidence"]) + if c != c: + raise ValueError + data["confidence"] = max(0.0, min(1.0, c)) + except (TypeError, ValueError): + del data["confidence"] return data @model_validator(mode="after") @@ -283,6 +377,13 @@ class MemoirJudgeOutput(BaseModel): refs = list(self.evidence_refs)[:12] object.__setattr__(self, "evidence_refs", refs) + for fname, (lo, hi) in _MEMOIR_LEAF_SCORE_BOUNDS.items(): + try: + raw = float(getattr(self, fname)) + except (TypeError, ValueError): + raw = 0.0 + object.__setattr__(self, fname, max(lo, min(hi, raw))) + authenticity = ( self.mem_fidelity + self.mem_factual_accuracy diff --git a/api/app/features/evaluation/judge_service.py b/api/app/features/evaluation/judge_service.py index bc03aab..7b61645 100644 --- a/api/app/features/evaluation/judge_service.py +++ b/api/app/features/evaluation/judge_service.py @@ -30,10 +30,7 @@ TJudgeOutput = TypeVar( _TURN_MAX = 768 _CONV_JUDGE_JSON_MAX = 2048 _CONV_HEADER = "【完整对话】(每轮以 `[Turn k]` 开头)\n\n" -_MEMOIR_MAX = 12000 -_MEMOIR_JSON_MAX = 1536 _COMPARE_STREAM_MAX = 6144 -_MEMOIR_EVIDENCE_MAX = 12000 def _eval_judge_prompt_char_pool_for_context(context_window_tokens: int) -> int: @@ -150,10 +147,12 @@ def _build_memoir_judge_prompt( "若存在 `lineage_tier=fallback` 或证据不足,须保守打分并写 `insufficient_evidence`。", "", ] + ev_cap = max(1, int(settings.eval_judge_memoir_evidence_max_chars)) + body_cap = max(1, int(settings.eval_judge_memoir_body_max_chars)) if notes: sections.extend(["【评审说明】", notes[:1200], ""]) if source: - sections.extend(["【原始访谈/对话证据】", source[:_MEMOIR_EVIDENCE_MAX], ""]) + sections.extend(["【原始访谈/对话证据】", source[:ev_cap], ""]) else: sections.extend( [ @@ -164,7 +163,7 @@ def _build_memoir_judge_prompt( ) if struct: sections.extend( - ["【结构化记忆证据】", struct[:_MEMOIR_EVIDENCE_MAX], ""] + ["【结构化记忆证据】", struct[:ev_cap], ""] ) else: sections.extend( @@ -175,8 +174,8 @@ def _build_memoir_judge_prompt( ] ) if reference: - sections.extend(["【参考基线/导出成稿】", reference[:_MEMOIR_EVIDENCE_MAX], ""]) - sections.extend(["【当前回忆录正文】", memoir_markdown[:_MEMOIR_MAX]]) + sections.extend(["【参考基线/导出成稿】", reference[:ev_cap], ""]) + sections.extend(["【当前回忆录正文】", memoir_markdown[:body_cap]]) return "\n".join(sections) @@ -391,11 +390,15 @@ class EvalJudgeService: self._llm, prompt, MemoirJudgeOutput, - max_tokens=_MEMOIR_JSON_MAX, + max_tokens=max(512, int(settings.eval_judge_memoir_completion_max_tokens)), agent="EvalJudgeService.judge_memoir", ) return JudgeCallResult(output=out) except LLMCallError as e: error = _judge_error_message(e) - logger.warning("memoir judge failed: {}", error) + # 回忆录评审在 INFO 也要可见(eval-web 排障);非异常路径、不刷堆栈 + logger.info( + "event=eval_memoir_judge_llm_call_failed agent=EvalJudgeService.judge_memoir msg={}", + error, + ) return JudgeCallResult(output=None, error=error) diff --git a/api/app/features/evaluation/memoir_compare_summary.py b/api/app/features/evaluation/memoir_compare_summary.py new file mode 100644 index 0000000..7eec251 --- /dev/null +++ b/api/app/features/evaluation/memoir_compare_summary.py @@ -0,0 +1,150 @@ +"""Structured A/B compare summary for internal eval memoir chapter judging. + +Mirrors `conversation_compare_summary.py`: for each chapter, take the +baseline judge and the new-chapter judge, compute group-level and leaf-level +deltas, and produce a gate verdict. +""" + +from __future__ import annotations + +from typing import Any + +from app.features.evaluation.judge_schemas import MemoirJudgeOutput + +_GROUP_KEYS: tuple[tuple[str, str, float], ...] = ( + ("authenticity_score", "记忆与真实度", 23), + ("information_score", "信息呈现", 14), + ("narrative_score", "叙事结构", 14), + ("language_score", "语言表达", 18), + ("emotion_score", "情感", 9), + ("character_score", "人物", 9), + ("coherence_score", "连贯一致", 4), + ("richness_score", "丰富度", 5), + ("publish_ready_score", "出版就绪", 4), +) + +_LEAF_KEYS: tuple[tuple[str, str, float], ...] = ( + ("mem_fidelity", "记忆忠实度", 9), + ("mem_factual_accuracy", "事实准确性", 5), + ("mem_factual_coverage", "事实覆盖率", 5), + ("mem_traceability", "记忆可追溯性", 4), + ("info_slot_coverage", "槽位覆盖度", 6), + ("info_sufficiency", "信息充分性", 4), + ("info_density", "信息密度", 4), + ("narr_structure", "故事结构", 6), + ("narr_paragraphs", "段落组织", 5), + ("narr_pacing", "节奏控制", 3), + ("lang_fluency", "语言流畅度", 3), + ("lang_conciseness", "表达精炼度", 3), + ("lang_literary", "文笔质量", 4), + ("lang_controlled_expansion", "控制性扩写能力", 4), + ("lang_detail", "细节还原与强化", 2), + ("lang_style", "风格一致性", 2), + ("emo_authenticity", "情感真实度", 5), + ("emo_depth", "情感深度", 4), + ("char_understanding", "人物理解", 4), + ("char_consistency", "人物一致性", 3), + ("char_integration", "人物融入度", 2), + ("coh_timeline", "时间线一致性", 2), + ("coh_cross_chapter", "跨章节关联", 2), + ("rich_analogy", "类比与引用", 3), + ("rich_diversity", "表达多样性", 2), + ("pub_editorial_cost", "编辑成本", 2), + ("pub_completeness", "完整度", 2), +) + + +def _round(x: float) -> float: + return round(float(x), 2) + + +def build_memoir_compare_summary( + *, + baseline_judge: MemoirJudgeOutput | None, + chapter_judge: MemoirJudgeOutput | None, +) -> dict[str, Any]: + if not chapter_judge: + return { + "mode": "single", + "gate": { + "status": "insufficient_data", + "reasons": ["缺少新稿评分,无法进行 A/B 对比。"], + }, + } + if not baseline_judge: + return { + "mode": "single", + "chapter_total": _round(chapter_judge.total_score), + "gate": { + "status": "single_side_only", + "reasons": ["缺少基线评分,仅有新稿单侧分数。"], + }, + } + + group_deltas = { + key: { + "label": label, + "max": mx, + "baseline": _round(getattr(baseline_judge, key)), + "chapter": _round(getattr(chapter_judge, key)), + "delta": _round(getattr(chapter_judge, key) - getattr(baseline_judge, key)), + } + for key, label, mx in _GROUP_KEYS + } + leaf_deltas = { + key: { + "label": label, + "max": mx, + "baseline": _round(getattr(baseline_judge, key)), + "chapter": _round(getattr(chapter_judge, key)), + "delta": _round(getattr(chapter_judge, key) - getattr(baseline_judge, key)), + } + for key, label, mx in _LEAF_KEYS + } + + total_delta = _round(chapter_judge.total_score - baseline_judge.total_score) + key_regressions = [ + v["label"] for v in leaf_deltas.values() if float(v["delta"]) <= -0.5 + ] + key_gains = [ + v["label"] for v in leaf_deltas.values() if float(v["delta"]) >= 0.5 + ] + + parity_passed = total_delta >= -2.0 and len(key_regressions) <= 3 + surpass_passed = total_delta >= 2.0 and len(key_regressions) <= 1 + + if surpass_passed: + status = "surpass" + elif parity_passed: + status = "parity" + else: + status = "regressed" + + reasons: list[str] = [] + if total_delta >= 2.0: + reasons.append("总分显著超过基线。") + elif total_delta >= -2.0: + reasons.append("总分基本追平基线。") + else: + reasons.append("总分明显落后基线。") + if key_regressions: + reasons.append(f"回落项:{'、'.join(key_regressions[:6])}。") + if key_gains: + reasons.append(f"提升项:{'、'.join(key_gains[:6])}。") + + return { + "mode": "ab", + "baseline_total": _round(baseline_judge.total_score), + "chapter_total": _round(chapter_judge.total_score), + "total_delta": total_delta, + "group_deltas": group_deltas, + "leaf_deltas": leaf_deltas, + "key_regressions": key_regressions, + "key_gains": key_gains, + "gate": { + "status": status, + "parity_passed": parity_passed, + "surpass_passed": surpass_passed, + "reasons": reasons, + }, + } diff --git a/api/app/features/evaluation/router.py b/api/app/features/evaluation/router.py index de59201..0dacfbf 100644 --- a/api/app/features/evaluation/router.py +++ b/api/app/features/evaluation/router.py @@ -10,6 +10,7 @@ from fastapi.responses import StreamingResponse from sqlalchemy.ext.asyncio import AsyncSession from app.core.db import get_async_db +from app.core.memoir_pipeline_progress import get_pipeline_run_for_eval from app.features.evaluation.admin_service import EvaluationAdminService from app.features.evaluation.deps import ( get_eval_judge_manual_service, @@ -37,6 +38,7 @@ from app.features.evaluation.schemas import ( ManualJudgeMemoirBody, ManualJudgeMemoirOut, MemoirPhase1ReadyOut, + MemoirPipelineRunOut, MemoirSectionBaselineOut, MemoirSubmitOut, PlaygroundConversationJudgeOut, @@ -166,6 +168,42 @@ async def get_playground_conversation_judge( ) +@router.get( + "/users/{user_id}/memoir-pipeline-run", + response_model=MemoirPipelineRunOut, +) +async def get_memoir_pipeline_run( + user_id: str, + _auth: InternalEvalAuth, + phase1_task_id: Annotated[ + str | None, + Query(description="Phase1 Celery task id(与 memoir-submit 返回一致)"), + ] = None, + memoir_correlation_id: Annotated[ + str | None, + Query(description="流水线聚合根 ID(与日志 memoir_correlation_id 一致)"), + ] = None, +): + if not phase1_task_id and not memoir_correlation_id: + raise HTTPException( + status_code=400, + detail="provide phase1_task_id or memoir_correlation_id", + ) + if phase1_task_id and memoir_correlation_id: + raise HTTPException( + status_code=400, + detail="provide only one of phase1_task_id or memoir_correlation_id", + ) + snap = get_pipeline_run_for_eval( + user_id.strip(), + memoir_correlation_id=memoir_correlation_id, + phase1_task_id=phase1_task_id, + ) + if not snap: + raise HTTPException(status_code=404, detail="pipeline snapshot not found") + return MemoirPipelineRunOut.model_validate(snap) + + @router.get( "/sessions/{conversation_id}/memoir-phase1-ready", response_model=MemoirPhase1ReadyOut, @@ -412,6 +450,42 @@ async def judge_memoir_chapters_manual( return ManualJudgeMemoirOut.model_validate(payload) +@router.post("/judge/memoir-chapters-stream") +async def judge_memoir_chapters_stream( + body: ManualJudgeMemoirBody, + _auth: InternalEvalAuth, + judge_svc: Annotated[ + EvalJudgeManualService, Depends(get_eval_judge_manual_service) + ], +): + async def event_iter(): + try: + async for evt in judge_svc.iter_memoir_chapter_judge_sse( + body.user_id, + body.baseline_sections, + judge_provider=body.judge_provider, + judge_model=body.judge_model, + ): + yield f"data: {json.dumps(evt, ensure_ascii=False)}\n\n" + except Exception as e: + err = json.dumps( + {"event": "error", "phase": "server", "message": str(e)}, + ensure_ascii=False, + ) + yield f"data: {err}\n\n" + yield f"data: {json.dumps({'event': 'done'}, ensure_ascii=False)}\n\n" + + return StreamingResponse( + event_iter(), + media_type="text/event-stream", + headers={ + "Cache-Control": "no-cache", + "Connection": "keep-alive", + "X-Accel-Buffering": "no", + }, + ) + + @router.get("/users/{user_id}/memoir-snapshot", response_model=UserMemoirSnapshotOut) async def get_user_memoir_snapshot( user_id: str, diff --git a/api/app/features/evaluation/schemas.py b/api/app/features/evaluation/schemas.py index 7a82a86..b3bbe79 100644 --- a/api/app/features/evaluation/schemas.py +++ b/api/app/features/evaluation/schemas.py @@ -134,6 +134,19 @@ class MemoirSubmitOut(BaseModel): elapsed_ms: int | None = Field(default=None, ge=0) +class MemoirPipelineRunOut(BaseModel): + """Redis 流水线快照(memoir_pipeline_run:*);字段随迭代扩展。""" + + model_config = ConfigDict(extra="allow") + + memoir_correlation_id: str + user_id: str | None = None + started_at_utc: str | None = None + phase1: dict[str, Any] | None = None + phase2: list[Any] = Field(default_factory=list) + fanout: dict[str, Any] = Field(default_factory=dict) + + class ManualJudgeConversationBody(BaseModel): conversation_id: str """与当前评测台选中的 MD 一致,供基准 transcript / 整体打分。""" @@ -202,8 +215,15 @@ class ManualJudgeMemoirBody(BaseModel): class ManualJudgeMemoirOut(BaseModel): user_id: str + judge_provider: EvalJudgeProviderLiteral = "zhipu" + judge_model: str = "" + """本次请求实际解析后的模型 id(与 `build_eval_judge_llm_spec` 一致)。""" chapter_results: list[dict[str, Any]] = Field(default_factory=list) story_results: list[dict[str, Any]] = Field(default_factory=list) + errors: list[str] = Field(default_factory=list) + """单条章节/故事评审或列表加载失败时的可读原因(HTTP 仍为 200)。""" + warnings: list[str] = Field(default_factory=list) + """无失败但未评到任何条目时的提示(例如成稿均为空)。""" class MemoirChapterSnapOut(BaseModel): diff --git a/api/app/features/memoir/story_pipeline_sync.py b/api/app/features/memoir/story_pipeline_sync.py index 64db4a9..c841650 100644 --- a/api/app/features/memoir/story_pipeline_sync.py +++ b/api/app/features/memoir/story_pipeline_sync.py @@ -10,9 +10,10 @@ import json import re import time import uuid +from concurrent.futures import ThreadPoolExecutor from typing import Any -from sqlalchemy import select +from sqlalchemy import func, select from sqlalchemy.orm import Session, joinedload from app.agents.memoir.narrative_agent import NarrativeAgent @@ -403,11 +404,13 @@ def _gate_narrative_fidelity( llm: Any, *, existing_canonical: str | None = None, + fidelity_llm: Any | None = None, ) -> tuple[str, str]: """返回 (文本, fallback 原因);忠实度不通过时第二项为 fidelity_failed。""" from app.agents.memoir.fidelity_check_agent import FidelityCheckAgent - if not settings.memoir_fidelity_check_enabled or not llm: + check_llm = fidelity_llm if fidelity_llm is not None else llm + if not settings.memoir_fidelity_check_enabled or not check_llm: return narrative_raw, "none" agent = FidelityCheckAgent() ex = (existing_canonical or "").strip() or None @@ -415,7 +418,7 @@ def _gate_narrative_fidelity( if agent.passes( oral_text=oral_text, narrative_json=narrative_raw, - llm=llm, + llm=check_llm, existing_canonical_markdown=ex, is_append=is_append, ): @@ -562,14 +565,23 @@ def _merge_fallback_type(gate_ft: str, apply_ft: str) -> str: def _story_meta_for_route( session: Session, candidates: list ) -> dict[str, dict[str, int]]: - meta: dict[str, dict[str, int]] = {} - for s in candidates: - sid = str(s.id) - meta[sid] = { + if not candidates: + return {} + sids = [str(s.id) for s in candidates] + stmt = ( + select(StoryVersion.story_id, func.count(StoryVersion.id)) + .where(StoryVersion.story_id.in_(sids)) + .group_by(StoryVersion.story_id) + ) + rows = session.execute(stmt).all() + counts: dict[str, int] = {str(r[0]): int(r[1] or 0) for r in rows} + return { + str(s.id): { "char_count": len((s.canonical_markdown or "").strip()), - "version_count": count_story_versions_sync(session, sid), + "version_count": counts.get(str(s.id), 0), } - return meta + for s in candidates + } def _ensure_chapter_record( @@ -613,7 +625,6 @@ def _ensure_chapter_record( ) chapter.is_new = True session.flush() - refresh_chapter_evidence_snapshot_with_retry_sync(session, str(chapter.id)) return chapter @@ -710,6 +721,7 @@ def _execute_narrative_unit( background_voice: str = "default", occupation: str = "", memoir_correlation_id: str | None = None, + fidelity_llm: Any | None = None, ) -> tuple[str | None, bool]: """ Unified narrative unit executor: generate narrative, apply fidelity/safety, @@ -744,6 +756,7 @@ def _execute_narrative_unit( raw_gen, llm, existing_canonical=existing_for_narrative or None, + fidelity_llm=fidelity_llm, ) narrative_raw, fb_apply = _apply_narrative_fallbacks( narrative_raw, @@ -792,16 +805,7 @@ def _execute_narrative_unit( sid_log = target_story_id is_append = True else: - story_title = _maybe_generate_title( - narrative_agent, - chapter_category=chapter_category, - md=md, - slot_snippets=slot_snippets, - user_profile=user_profile, - user_birth_year=user_birth_year, - llm=llm, - oral_scope=oral_norm, - ) + story_title = _placeholder_title(chapter_category) st = create_story_with_version_sync( session, user_id=user_id, @@ -809,6 +813,21 @@ def _execute_narrative_unit( canonical_markdown=md, stage=chapter_category, ) + try: + from app.tasks.story_title_tasks import generate_story_title_after_create + + generate_story_title_after_create.delay( + str(st.id), + chapter_category, + oral_norm, + user_id, + ) + except Exception as exc: + logger.warning( + "event=story_title_enqueue_failed story_id={} err={}", + st.id, + exc, + ) ensure_chapter_story_link_sync( session, chapter_id=str(chapter.id), story_id=str(st.id) ) @@ -874,6 +893,7 @@ def _run_batch_plan_writes( background_voice: str = "default", occupation: str = "", memoir_correlation_id: str | None = None, + fidelity_llm: Any | None = None, ) -> set[str]: dispatch_ids: set[str] = set() for unit in plan.units: @@ -917,6 +937,7 @@ def _run_batch_plan_writes( background_voice=background_voice, occupation=occupation, memoir_correlation_id=memoir_correlation_id, + fidelity_llm=fidelity_llm, ) if sid: dispatch_ids.add(sid) @@ -936,6 +957,7 @@ def run_story_pipeline_for_category_batch( background_voice: str = "default", occupation: str = "", memoir_correlation_id: str | None = None, + llm_fast: Any | None = None, ) -> tuple[Chapter | None, bool, set[str]]: """ 返回 (chapter, needs_cover_enqueue, story_ids_to_dispatch_after_commit)。 @@ -944,6 +966,8 @@ def run_story_pipeline_for_category_batch( narrative_agent = NarrativeAgent() route_agent = StoryRouteAgent() dispatch_ids: set[str] = set() + llm_route = llm_fast if llm_fast is not None else llm + llm_fidelity = llm_fast if llm_fast is not None else llm segment_texts = [seg.user_input_text or "" for seg in category_segments] combined_text = "\n\n".join(segment_texts) @@ -955,25 +979,40 @@ def run_story_pipeline_for_category_batch( top_k = int(settings.evidence_top_k_large_batch) emb = get_embedding_provider() embedding_available = emb.is_available() - _t0 = time.perf_counter() - try: - evidence = retrieve_evidence_sync( - session, - user_id, - combined_text, - top_k=top_k, - embedding_provider=emb, - ) - except Exception as e: - logger.warning("Evidence 检索跳过: {}", e) - evidence = { - "relevant_chunks": [], - "relevant_summaries": [], - "relevant_facts": [], - "timeline_hints": [], - "relevant_stories": [], - } - pipeline_phase_timings["evidence"] = time.perf_counter() - _t0 + + def _oral_job() -> tuple[str, float]: + t_oral = time.perf_counter() + out = normalize_oral_for_memoir(combined_text, llm=llm) + return out, time.perf_counter() - t_oral + + _t_parallel = time.perf_counter() + with ThreadPoolExecutor(max_workers=1) as pool: + oral_future = pool.submit(_oral_job) + _t_ev = time.perf_counter() + try: + evidence = retrieve_evidence_sync( + session, + user_id, + combined_text, + top_k=top_k, + embedding_provider=emb, + ) + except Exception as e: + logger.warning("Evidence 检索跳过: {}", e) + evidence = { + "relevant_chunks": [], + "relevant_summaries": [], + "relevant_facts": [], + "timeline_hints": [], + "relevant_stories": [], + } + ev_elapsed = time.perf_counter() - _t_ev + oral_for_memoir, oral_elapsed = oral_future.result() + pipeline_phase_timings["evidence"] = ev_elapsed + pipeline_phase_timings["oral_normalize"] = oral_elapsed + pipeline_phase_timings["evidence_oral_parallel_wall"] = ( + time.perf_counter() - _t_parallel + ) logger.info( "memoir_evidence_retrieved user_id={} chunks={} facts={} summaries={} stories={} vector_ok={}", @@ -986,9 +1025,6 @@ def run_story_pipeline_for_category_batch( ) evidence_text = format_evidence_chunks_for_prompt(evidence) - _t0 = time.perf_counter() - oral_for_memoir = normalize_oral_for_memoir(combined_text, llm=llm) - pipeline_phase_timings["oral_normalize"] = time.perf_counter() - _t0 ct_raw = (combined_text or "").strip() om_norm = (oral_for_memoir or "").strip() if ct_raw != om_norm: @@ -997,7 +1033,6 @@ def run_story_pipeline_for_category_batch( len(ct_raw), len(om_norm), ) - new_content_input = format_narrative_user_content(oral_for_memoir, evidence_text) logger.info( "event=memoir_story_pipeline_start memoir_correlation_id={} user_id={} " "chapter_category={} segment_count={}", @@ -1042,7 +1077,7 @@ def run_story_pipeline_for_category_batch( _t0 = time.perf_counter() use_batch_plan = ( - llm + llm_route and len(category_segments) >= 2 and len(category_segments) <= PLAN_BATCH_MAX_SEGMENTS ) @@ -1054,7 +1089,7 @@ def run_story_pipeline_for_category_batch( chapter_title=title, segments=segs, candidate_stories=candidates, - llm=llm, + llm=llm_route, valid_story_ids=valid_ids, story_meta=story_meta, ) @@ -1091,6 +1126,7 @@ def run_story_pipeline_for_category_batch( background_voice=background_voice, occupation=occupation, memoir_correlation_id=memoir_correlation_id, + fidelity_llm=llm_fidelity, ) else: route = route_agent.decide( @@ -1098,12 +1134,12 @@ def run_story_pipeline_for_category_batch( chapter_title=title, batch_transcript=route_transcript, candidate_stories=candidates, - llm=llm, + llm=llm_route, valid_story_ids=valid_ids, story_meta=story_meta, ) - decision_source = "fallback_no_llm" if not llm else "single_decide" + decision_source = "fallback_no_llm" if not llm_route else "single_decide" target_story_id, existing_for_narrative, decision_source = _resolve_append_target( session, route_decision=route.decision, @@ -1141,6 +1177,7 @@ def run_story_pipeline_for_category_batch( background_voice=background_voice, occupation=occupation, memoir_correlation_id=memoir_correlation_id, + fidelity_llm=llm_fidelity, ) if sid: dispatch_ids.add(sid) diff --git a/api/app/features/memory/enrichment.py b/api/app/features/memory/enrichment.py index ad9e795..e639ef2 100644 --- a/api/app/features/memory/enrichment.py +++ b/api/app/features/memory/enrichment.py @@ -1,7 +1,8 @@ """ -Transcript ingest 之后的记忆富化:摘要、事实、时间线。 +Transcript ingest 之后的记忆富化:单次 LLM 调用产出会话摘要 + 结构化事实。 由 Celery(sync)与 MemoryService.ingest(async)调用;失败仅打日志,不阻断主流程。 +不再维护 ingest 路径上的 rolling 摘要与 timeline 物化。 """ from __future__ import annotations @@ -12,38 +13,25 @@ from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.orm import Session +from app.core.langchain_llm import ainvoke_json_object, invoke_json_object from app.core.logging import get_logger from app.features.memory.enrichment_pipeline import ( dedupe_key, normalize_object_json, normalize_subject, ) -from app.features.memory.extractor import ( - extract_facts_from_transcript_async, - extract_facts_from_transcript_sync, +from app.features.memory.llm_schemas import ( + EnrichmentPayload, + enrichment_payload_to_fact_dicts, + parse_json_payload, ) -from app.features.memory.models import MemoryChunk, MemorySource, MemorySummary +from app.features.memory.models import MemoryChunk, MemorySource from app.features.memory.repo import ( create_memory_fact, create_memory_fact_sync, create_memory_summary, create_memory_summary_sync, - create_timeline_event, - create_timeline_event_sync, - delete_timeline_events_by_memory_source, - delete_timeline_events_by_memory_source_sync, list_chunks_for_source_sync, - upsert_rolling_summary_sync, -) -from app.features.memory.summarizer import ( - generate_rolling_summary_async, - generate_rolling_summary_sync, - generate_session_summary_async, - generate_session_summary_sync, -) -from app.features.memory.timeline import ( - build_timeline_events_from_facts_async, - build_timeline_events_from_facts_sync, ) from app.features.user.models import User @@ -65,6 +53,77 @@ def _resolve_llm_sync() -> Any | None: return None +def _max_enrichment_chars() -> int: + from app.core.config import settings + + return settings.memory_enrichment_max_chars + + +def _enrichment_prompt(numbered_blocks: str, narrator_label: str) -> str: + """合并会话摘要说明与事实抽取规则,供单次 JSON 输出。""" + text = numbered_blocks.strip()[: _max_enrichment_chars()] + return ( + "你是回忆录记忆分析助手。用户正在口述人生回忆,所有内容默认是**过去发生的事**," + "而非当前或未来计划(除非原文明确说「现在」「打算」「准备将要」等)。\n\n" + "请从下列口述内容中完成两件事:\n" + "1) 用 2~8 句中文概括要点(不编造、不评价)\n" + "2) 抽取结构化事实列表(见下方规则)\n\n" + "## 事实抽取规则\n" + "1. subject 必须用明确的人名或固定称谓:\n" + f" - 叙述者本人统一用「{narrator_label}」\n" + " - 其他人用全名或稳定专名(如「王伟」),禁止用「他」「她」「我」「我们大伙」等代词作 subject;" + "若代词在上下文中可唯一解析为某人,则 subject 写该人姓名/专名\n" + "2. 事件、职务变动、地点迁移等一律按**过去回忆**理解;travel/调动/命令类表述勿写成「即将要做」" + "除非原文明确为未来时态\n" + "3. 若可推断大约年代或人生阶段,将 approximate_era 写入 object_json(与 value 等字段并存)," + '例如 "1990年代"、"2001年"、"退休后"、"30岁前后"\n' + "4. fact_type: person|event|relation|place|milestone\n" + "5. predicate:简短中文谓语(如「出生地」「担任职务」「调往」)\n" + "6. object_json:字符串或对象;可含 value、approximate_era 等\n" + "7. confidence 0..1;source_chunk_id 必须等于某段 [chunk_id=...] 中的 id\n\n" + '只输出 JSON:{"summary":"...","facts":[...]},无事实则 "facts":[]。\n\n' + f"{text}" + ) + + +def _run_enrichment_llm_sync( + llm: Any, numbered: str, narrator_label: str +) -> EnrichmentPayload | None: + if not llm or not (numbered or "").strip(): + return None + prompt = _enrichment_prompt(numbered, narrator_label) + try: + raw = invoke_json_object( + llm, + prompt, + max_tokens=8192, + agent="memory.enrichment_sync", + ) + return parse_json_payload(raw, EnrichmentPayload) + except (TypeError, ValueError) as e: + logger.warning("enrichment LLM sync 解析失败: {}", e) + return None + + +async def _run_enrichment_llm_async( + llm: Any, numbered: str, narrator_label: str +) -> EnrichmentPayload | None: + if not llm or not (numbered or "").strip(): + return None + prompt = _enrichment_prompt(numbered, narrator_label) + try: + raw = await ainvoke_json_object( + llm, + prompt, + max_tokens=8192, + agent="memory.enrichment_async", + ) + return parse_json_payload(raw, EnrichmentPayload) + except (TypeError, ValueError) as e: + logger.warning("enrichment LLM async 解析失败: {}", e) + return None + + def enrich_memory_after_ingest_sync( session: Session, user_id: str, @@ -88,13 +147,18 @@ def enrich_memory_after_ingest_sync( return src_row = session.get(MemorySource, source_id) lineage_snapshot = _lineage_snapshot_from_source(src_row) - chunk_texts = [c.content for c in chunks] chunk_ids = [c.id for c in chunks] + chunk_texts = [c.content for c in chunks] numbered = "\n\n".join( f"[chunk_id={cid}]\n{txt}" for cid, txt in zip(chunk_ids, chunk_texts) ) + narrator_label = (narrator_name or "").strip() or "叙述者" - session_summary_text = generate_session_summary_sync(llm, chunk_texts) + payload = _run_enrichment_llm_sync(llm, numbered, narrator_label) + if payload is None: + return + + session_summary_text = str(payload.summary or "").strip() if session_summary_text: create_memory_summary_sync( session, @@ -104,34 +168,8 @@ def enrich_memory_after_ingest_sync( source_chunk_ids=chunk_ids, ) - existing_rolling = ( - session.execute( - select(MemorySummary) - .where( - MemorySummary.user_id == user_id, - MemorySummary.summary_type == "rolling", - ) - .order_by(MemorySummary.updated_at.desc()) - .limit(1) - ) - .unique() - .scalar_one_or_none() - ) - existing_text = existing_rolling.content if existing_rolling else None - rolling_text = generate_rolling_summary_sync(llm, existing_text, chunk_texts) - if rolling_text: - upsert_rolling_summary_sync( - session, - user_id=user_id, - content=rolling_text, - source_chunk_ids=chunk_ids, - ) - - raw_facts = extract_facts_from_transcript_sync( - llm, numbered, narrator_name=narrator_name - ) + raw_facts = enrichment_payload_to_fact_dicts(payload) seen: set[tuple] = set() - inserted: list[dict] = [] for f in raw_facts: key = dedupe_key(f, narrator_name=narrator_name) if key in seen: @@ -140,7 +178,7 @@ def enrich_memory_after_ingest_sync( scid = f.get("source_chunk_id") if scid and scid not in chunk_ids: scid = chunk_ids[0] if chunk_ids else None - row = create_memory_fact_sync( + create_memory_fact_sync( session, user_id=user_id, fact_type=f.get("fact_type") or "event", @@ -152,33 +190,6 @@ def enrich_memory_after_ingest_sync( status="confirmed", lineage_json=lineage_snapshot, ) - inserted.append( - { - "id": row.id, - "fact_type": row.fact_type, - "subject": row.subject, - "predicate": row.predicate, - "object_json": row.object_json, - } - ) - - if inserted: - delete_timeline_events_by_memory_source_sync( - session, user_id=user_id, memory_source_id=source_id - ) - events = build_timeline_events_from_facts_sync(llm, inserted) - for ev in events: - create_timeline_event_sync( - session, - user_id=user_id, - event_year=ev.get("event_year"), - event_date=ev.get("event_date"), - title=ev["title"], - description=ev.get("description"), - source_fact_ids=ev.get("source_fact_ids") or None, - memory_source_id=source_id, - lineage_json=lineage_snapshot, - ) async def enrich_memory_after_ingest_async( @@ -210,13 +221,18 @@ async def enrich_memory_after_ingest_async( return src_row = await db.get(MemorySource, source_id) lineage_snapshot = _lineage_snapshot_from_source(src_row) - chunk_texts = [c.content for c in chunks] chunk_ids = [c.id for c in chunks] + chunk_texts = [c.content for c in chunks] numbered = "\n\n".join( f"[chunk_id={cid}]\n{txt}" for cid, txt in zip(chunk_ids, chunk_texts) ) + narrator_label = (narrator_name or "").strip() or "叙述者" - session_summary_text = await generate_session_summary_async(llm, chunk_texts) + payload = await _run_enrichment_llm_async(llm, numbered, narrator_label) + if payload is None: + return + + session_summary_text = str(payload.summary or "").strip() if session_summary_text: await create_memory_summary( db, @@ -226,38 +242,8 @@ async def enrich_memory_after_ingest_async( source_chunk_ids=chunk_ids, ) - roll_stmt = ( - select(MemorySummary) - .where( - MemorySummary.user_id == user_id, - MemorySummary.summary_type == "rolling", - ) - .order_by(MemorySummary.updated_at.desc()) - .limit(1) - ) - r_result = await db.execute(roll_stmt) - existing_row = r_result.unique().scalar_one_or_none() - existing_text = existing_row.content if existing_row else None - - rolling_text = await generate_rolling_summary_async(llm, existing_text, chunk_texts) - if rolling_text: - if existing_row: - existing_row.content = rolling_text - existing_row.source_chunk_ids = chunk_ids - else: - await create_memory_summary( - db, - user_id=user_id, - summary_type="rolling", - content=rolling_text, - source_chunk_ids=chunk_ids, - ) - - raw_facts = await extract_facts_from_transcript_async( - llm, numbered, narrator_name=narrator_name - ) + raw_facts = enrichment_payload_to_fact_dicts(payload) seen: set[tuple] = set() - inserted: list[dict] = [] for f in raw_facts: key = dedupe_key(f, narrator_name=narrator_name) if key in seen: @@ -266,7 +252,7 @@ async def enrich_memory_after_ingest_async( scid = f.get("source_chunk_id") if scid and scid not in chunk_ids: scid = chunk_ids[0] if chunk_ids else None - row = await create_memory_fact( + await create_memory_fact( db, user_id=user_id, fact_type=f.get("fact_type") or "event", @@ -278,30 +264,3 @@ async def enrich_memory_after_ingest_async( status="confirmed", lineage_json=lineage_snapshot, ) - inserted.append( - { - "id": row.id, - "fact_type": row.fact_type, - "subject": row.subject, - "predicate": row.predicate, - "object_json": row.object_json, - } - ) - - if inserted: - await delete_timeline_events_by_memory_source( - db, user_id=user_id, memory_source_id=source_id - ) - events = await build_timeline_events_from_facts_async(llm, inserted) - for ev in events: - await create_timeline_event( - db, - user_id=user_id, - event_year=ev.get("event_year"), - event_date=ev.get("event_date"), - title=ev["title"], - description=ev.get("description"), - source_fact_ids=ev.get("source_fact_ids") or None, - memory_source_id=source_id, - lineage_json=lineage_snapshot, - ) diff --git a/api/app/features/memory/evidence.py b/api/app/features/memory/evidence.py index 0ae25a1..447f459 100644 --- a/api/app/features/memory/evidence.py +++ b/api/app/features/memory/evidence.py @@ -11,12 +11,14 @@ Celery 使用 sync + 向量 chunks;`HybridRetriever` 使用 async + 向量 chu from __future__ import annotations +from concurrent.futures import ThreadPoolExecutor from typing import TYPE_CHECKING from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.orm import Session from app.core.config import settings +from app.core.db import get_sync_db from app.core.logging import get_logger from app.features.memory.repo import ( list_summaries_for_evidence_async, @@ -88,7 +90,7 @@ def _stories_to_dicts(story_rows) -> list[dict]: def fetch_evidence_metadata_sync( session: Session, user_id: str, q: str, top_k: int ) -> dict: - """非 chunk 证据:摘要、事实、时间线、故事(sync)。""" + """非 chunk 证据:摘要、事实、时间线、故事(sync)。保留 session 入参供单连接路径使用。""" facts = search_facts_for_user_sync(session, user_id, q, top_k) events = search_timeline_events_for_user_sync(session, user_id, q, top_k) relevant_summaries = list_summaries_for_evidence_sync( @@ -105,6 +107,49 @@ def fetch_evidence_metadata_sync( } +def fetch_evidence_metadata_parallel_sync(user_id: str, q: str, top_k: int) -> dict: + """ + 与 fetch_evidence_metadata_sync 等价语义;四路查询各用独立 sync Session 并行,降低总 RTT。 + """ + + def _facts(): + with get_sync_db() as session: + return search_facts_for_user_sync(session, user_id, q, top_k) + + def _events(): + with get_sync_db() as session: + return search_timeline_events_for_user_sync(session, user_id, q, top_k) + + def _summaries(): + with get_sync_db() as session: + return list_summaries_for_evidence_sync( + session, user_id=user_id, q=q, limit=top_k + ) + + def _stories(): + with get_sync_db() as session: + return list_recent_stories_for_evidence_sync( + session, user_id, query=q, limit=top_k + ) + + with ThreadPoolExecutor(max_workers=4) as pool: + f_facts = pool.submit(_facts) + f_events = pool.submit(_events) + f_summaries = pool.submit(_summaries) + f_stories = pool.submit(_stories) + facts = f_facts.result() + events = f_events.result() + relevant_summaries = f_summaries.result() + story_rows = f_stories.result() + + return { + "relevant_facts": _facts_to_dicts(facts), + "timeline_hints": _timeline_to_dicts(events), + "relevant_summaries": relevant_summaries, + "relevant_stories": _stories_to_dicts(story_rows), + } + + async def fetch_evidence_metadata_async( db: AsyncSession, user_id: str, q: str, top_k: int ) -> dict: @@ -255,7 +300,7 @@ def retrieve_evidence_bundle_sync( "retrieve_evidence_bundle_sync no_embedding_provider user_id={}", user_id, ) - meta = fetch_evidence_metadata_sync(session, user_id, q, top_k) + meta = fetch_evidence_metadata_parallel_sync(user_id, q, top_k) return { "relevant_chunks": relevant_chunks, **meta, diff --git a/api/app/features/memory/llm_schemas.py b/api/app/features/memory/llm_schemas.py index 5673181..67b57b8 100644 --- a/api/app/features/memory/llm_schemas.py +++ b/api/app/features/memory/llm_schemas.py @@ -31,6 +31,13 @@ class FactsExtractionPayload(BaseModel): facts: list[ExtractedFactItem] = Field(default_factory=list) +class EnrichmentPayload(BaseModel): + """单轮记忆富化:会话摘要 + 结构化事实(ingest 后一次 LLM 调用)。""" + + summary: str = "" + facts: list[ExtractedFactItem] = Field(default_factory=list) + + class SessionSummaryPayload(BaseModel): summary: str = "" @@ -85,6 +92,11 @@ def facts_payload_to_dicts(payload: FactsExtractionPayload) -> list[dict]: return out +def enrichment_payload_to_fact_dicts(payload: EnrichmentPayload) -> list[dict]: + """将 EnrichmentPayload.facts 转为与 extract_facts 一致的字典列表。""" + return facts_payload_to_dicts(FactsExtractionPayload(facts=list(payload.facts))) + + def timeline_payload_to_dicts(payload: TimelineEventsPayload) -> list[dict]: out: list[dict] = [] for ev in payload.events: diff --git a/api/app/features/memory/service.py b/api/app/features/memory/service.py index 1d5ae4f..43a44f1 100644 --- a/api/app/features/memory/service.py +++ b/api/app/features/memory/service.py @@ -2,7 +2,7 @@ MemoryService — conversation / memoir 的统一门面。 - ingest_transcript: transcript -> memory_sources, chunks, embedding -- ingest 后可选:LLM 富化(session/rolling 摘要、事实、时间线) +- ingest 成功后:向 ``memory_idle`` 队列派发 LLM 富化(见 ``schedule_memory_enrichment``),不阻塞请求 - retrieve: 委托 HybridRetriever 返回 evidence bundle(向量 chunks) Celery 侧使用 `ingest_transcript_sync` + `retrieve_evidence_sync`,与异步路径对齐见 @@ -12,6 +12,9 @@ Celery 侧使用 `ingest_transcript_sync` + `retrieve_evidence_sync`,与异步 from sqlalchemy.ext.asyncio import AsyncSession from app.core.logging import get_logger +from app.features.conversation.lineage_schemas import ( + primary_user_message_id_from_lineage, +) from app.features.memory.chunker import chunk_transcript from app.features.memory.repo import ( create_chunk, @@ -21,7 +24,6 @@ from app.features.memory.repo import ( set_memory_fact_status, update_chunk_embedding, ) -from app.features.conversation.lineage_schemas import primary_user_message_id_from_lineage from app.features.memory.schemas import EvidenceBundle from app.ports.embedding import EmbeddingProvider @@ -95,29 +97,23 @@ class MemoryService: vectors_written += 1 await update_chunk_embedding(self._db, chunk_id, emb) - enrichment_ok: bool | None = None - try: - from app.core.dependencies import get_llm_provider_fast - from app.features.memory.enrichment import enrich_memory_after_ingest_async - - if settings.memory_enrichment_enabled: - llm = get_llm_provider_fast().langchain_llm - await enrich_memory_after_ingest_async( - self._db, user_id, source.id, llm - ) - enrichment_ok = True - except Exception as e: - if settings.memory_enrichment_enabled: - enrichment_ok = False - logger.warning( - "memory enrichment 跳过: {} exc_type={}", e, type(e).__name__ - ) - await self._db.commit() emb_ok = self._embedding.is_available() if self._embedding else False + enrichment_task_id: str | None = None + try: + from app.tasks.memory_enrichment_tasks import schedule_memory_enrichment + + enrichment_task_id = schedule_memory_enrichment( + user_id, source.id, memoir_correlation_id=None + ) + except Exception as e: + logger.warning( + "memory enrichment 派发跳过: {} exc_type={}", e, type(e).__name__ + ) + logger.info( "event=memory_ingest_done user_id={} conversation_id={} source_id={} " - "chunks={} vectors_written={} embedding_available={} enrichment_enabled={} enrichment_ok={}", + "chunks={} vectors_written={} embedding_available={} enrichment_enabled={} enrichment_task_id={}", user_id, conversation_id, source.id, @@ -125,7 +121,7 @@ class MemoryService: vectors_written, emb_ok, settings.memory_enrichment_enabled, - enrichment_ok, + enrichment_task_id, ) return source.id @@ -275,7 +271,6 @@ def ingest_transcript_sync( vectors_written = 0 embedding_available = False - enrichment_ok: bool | None = None try: embedding_provider = get_embedding_provider() @@ -290,7 +285,7 @@ def ingest_transcript_sync( embedding_provider = None # 向量写入在 SAVEPOINT 内;失败仅回滚本段,source/chunks 主体仍可由外层提交。 - # enrichment 已迁移到独立异步任务 (memory_enrichment_tasks.enrich_memory_source)。 + # LLM enrichment 在 commit 后由 schedule_memory_enrichment 入 memory_idle 队列。 try: with session.begin_nested(): if chunk_records and embedding_provider is not None: @@ -309,14 +304,15 @@ def ingest_transcript_sync( session.commit() + enrichment_task_id: str | None = None if settings.memory_enrichment_enabled: try: - from app.tasks.memory_enrichment_tasks import enrich_memory_source + from app.tasks.memory_enrichment_tasks import schedule_memory_enrichment - enrich_memory_source.delay(user_id, source.id) - enrichment_ok = True + enrichment_task_id = schedule_memory_enrichment( + user_id, source.id, memoir_correlation_id=None + ) except Exception as e: - enrichment_ok = False logger.warning( "memory enrichment 任务派发失败: {} exc_type={}", e, @@ -325,7 +321,7 @@ def ingest_transcript_sync( logger.info( "event=memory_ingest_done user_id={} conversation_id={} source_id={} " - "chunks={} vectors_written={} embedding_available={} enrichment_enabled={} enrichment_ok={} sync=1", + "chunks={} vectors_written={} embedding_available={} enrichment_enabled={} enrichment_task_id={} sync=1", user_id, conversation_id, source.id, @@ -333,6 +329,122 @@ def ingest_transcript_sync( vectors_written, embedding_available, settings.memory_enrichment_enabled, - enrichment_ok, + enrichment_task_id, ) return source.id + + +def ingest_transcripts_batch_sync( + session, + user_id: str, + items: list[tuple[str, str, dict | None]], +) -> list[str]: + """ + Phase1 批量:多段 transcript 在同一会话内建 source/chunks,并单次 embed_texts_sync(在适配器 batch 限制内)。 + + 不 commit;不派发 enrichment(由调用方 commit 后 ``schedule_enrichment_for_sources``)。 + items: (conversation_id, transcript, lineage_json) + 返回与有效 items 顺序一致的 source_id 列表。 + """ + from app.core.dependencies import get_embedding_provider + from app.features.memory.chunker import chunk_transcript + from app.features.memory.repo import ( + create_chunk_sync, + create_source_sync, + update_chunk_embedding_sync, + ) + + source_ids: list[str] = [] + all_chunk_records: list[tuple[str, str]] = [] + + for conversation_id, transcript, lineage_json in items: + text = (transcript or "").strip() + if not text: + continue + primary_mid = ( + primary_user_message_id_from_lineage(lineage_json) + if lineage_json + else None + ) + source = create_source_sync( + session, + user_id=user_id, + source_type="transcript", + raw_text=text, + conversation_id=conversation_id or None, + lineage_json=lineage_json, + primary_user_message_id=primary_mid, + ) + session.flush() + + chunks_text = chunk_transcript(text) + for i, content in enumerate(chunks_text): + chunk = create_chunk_sync( + session, + source_id=source.id, + user_id=user_id, + content=content, + chunk_index=i, + ) + session.flush() + all_chunk_records.append((chunk.id, content)) + source_ids.append(source.id) + + embedding_provider = None + try: + embedding_provider = get_embedding_provider() + except Exception as e: + logger.warning( + "memory embedding provider 不可用(batch sync): {} exc_type={}", + e, + type(e).__name__, + ) + + vectors_written = 0 + try: + with session.begin_nested(): + if all_chunk_records and embedding_provider is not None: + texts = [content for _, content in all_chunk_records] + embeddings = embedding_provider.embed_texts_sync(texts) + for (chunk_id, _), emb in zip(all_chunk_records, embeddings): + if emb: + vectors_written += 1 + update_chunk_embedding_sync(session, chunk_id, emb) + except Exception as e: + logger.warning( + "memory embedding 跳过(batch sync): {} exc_type={}", + e, + type(e).__name__, + ) + + emb_ok = ( + embedding_provider.is_available() + if embedding_provider is not None + else False + ) + logger.info( + "event=memory_ingest_batch_done user_id={} sources={} chunks={} " + "vectors_written={} embedding_available={}", + user_id, + len(source_ids), + len(all_chunk_records), + vectors_written, + emb_ok, + ) + return source_ids + + +def schedule_enrichment_for_sources( + user_id: str, + source_ids: list[str], + *, + memoir_correlation_id: str | None = None, +) -> None: + """After successful ingest commit, enqueue LLM enrichment for each source (memory_idle queue).""" + from app.tasks.memory_enrichment_tasks import schedule_memory_enrichment + + for sid in source_ids: + if sid: + schedule_memory_enrichment( + user_id, sid, memoir_correlation_id=memoir_correlation_id + ) diff --git a/api/app/features/story/post_commit.py b/api/app/features/story/post_commit.py index 00ba729..d5367a2 100644 --- a/api/app/features/story/post_commit.py +++ b/api/app/features/story/post_commit.py @@ -15,6 +15,7 @@ import redis from app.core.config import settings from app.core.logging import get_logger +from app.core.memoir_pipeline_progress import merge_pipeline_run from app.core.memory_compaction_schedule import schedule_memory_compaction_run logger = get_logger(__name__) @@ -92,8 +93,26 @@ def enqueue_story_post_commit_effects( exc, ) try: - cast(Any, gen_story_image_task).delay(sid) + img_ar = cast(Any, gen_story_image_task).delay( + sid, memoir_correlation_id=memoir_correlation_id + ) result.enqueued_story_image_count += 1 + tid_img = getattr(img_ar, "id", None) + if memoir_correlation_id and tid_img: + merge_pipeline_run( + memoir_correlation_id, + { + "fanout": { + "story_images": [ + { + "story_id": sid, + "task_id": str(tid_img), + "status": "enqueued", + } + ] + }, + }, + ) except Exception as exc: logger.warning( "generate_story_image.delay failed story={} trigger={}: {}", @@ -115,10 +134,31 @@ def enqueue_story_post_commit_effects( cd = int(settings.recompose_chapter_delay_seconds) for cid in sorted(chapter_ids): try: - cast(Any, recompose_chapter_task).apply_async( - args=[cid], countdown=max(0, cd) + rkwargs: dict[str, Any] = {} + if memoir_correlation_id: + rkwargs["memoir_correlation_id"] = memoir_correlation_id + rec_ar = cast(Any, recompose_chapter_task).apply_async( + args=[cid], + kwargs=rkwargs, + countdown=max(0, cd), ) result.enqueued_chapter_recompose_count += 1 + tid_rec = getattr(rec_ar, "id", None) + if memoir_correlation_id and tid_rec: + merge_pipeline_run( + memoir_correlation_id, + { + "fanout": { + "recompose_chapters": [ + { + "chapter_id": cid, + "task_id": str(tid_rec), + "status": "enqueued", + } + ] + }, + }, + ) except Exception as exc: logger.warning( "recompose_chapter.apply_async failed chapter={} trigger={}: {}", @@ -140,6 +180,18 @@ def enqueue_story_post_commit_effects( ctx.update(compaction_extra) schedule_memory_compaction_run(user_id, ctx) result.compaction_scheduled = True + if memoir_correlation_id: + merge_pipeline_run( + memoir_correlation_id, + { + "fanout": { + "compaction": { + "status": "scheduled", + "note": "debounce", + }, + }, + }, + ) except Exception as exc: logger.warning( "schedule_memory_compaction_run failed user_id={} trigger={}: {}", @@ -156,12 +208,25 @@ def enqueue_story_post_commit_effects( ) cd = int(settings.memoir_quality_pass_delay_seconds) - cast(Any, quality_pass_task).apply_async( + qp_ar = cast(Any, quality_pass_task).apply_async( args=[user_id, sorted(story_ids), sorted(chapter_ids)], kwargs={"memoir_correlation_id": memoir_correlation_id}, countdown=max(0, cd), ) result.quality_pass_scheduled = True + tid_qp = getattr(qp_ar, "id", None) + if memoir_correlation_id and tid_qp: + merge_pipeline_run( + memoir_correlation_id, + { + "fanout": { + "quality_pass": { + "task_id": str(tid_qp), + "status": "enqueued", + }, + }, + }, + ) except Exception as exc: logger.warning( "memoir_quality_pass enqueue failed user_id={} trigger={}: {}", diff --git a/api/app/main.py b/api/app/main.py index 9c8cbf3..a9ae9dd 100644 --- a/api/app/main.py +++ b/api/app/main.py @@ -121,7 +121,7 @@ async def startup_event(): await asyncio.to_thread(_init_wechat_pay_client) logger.info("微信支付客户端已预初始化") except Exception as e: - logger.warning(f"微信支付预初始化失败(首次下单时再初始化): {e}") + logger.warning("微信支付预初始化失败(首次下单时再初始化): {}", e) @app.on_event("shutdown") diff --git a/api/app/tasks/celery_app.py b/api/app/tasks/celery_app.py index 445246c..ce5bece 100644 --- a/api/app/tasks/celery_app.py +++ b/api/app/tasks/celery_app.py @@ -5,14 +5,21 @@ Worker 启动时需聚合注册所有 feature 的 model,否则 User 等 relati 与 main.py / Alembic 一致:下方 import 仅用于注册 ORM model。 """ -from app.core.logging import setup_logging +from __future__ import annotations + +from typing import Any + +from app.core.logging import get_logger, setup_logging # 与 app.main 一致:先配置 loguru + InterceptHandler,再加载会打日志的依赖 setup_logging() from celery import Celery +from celery.signals import task_failure, task_postrun, task_prerun, task_success +from app.core.celery_log_context import clear_celery_log_extras, set_celery_log_extras from app.core.config import settings +from app.core.log_events import celery_prerun_extras from app.features.asset import models as _asset_models # noqa: F401 - register Asset from app.features.auth import models as _auth_models # noqa: F401 from app.features.conversation import models as _conv_models # noqa: F401 @@ -32,6 +39,7 @@ celery_app = Celery( backend=REDIS_URL, include=[ "app.tasks.memoir_tasks", + "app.tasks.story_title_tasks", "app.tasks.story_image_tasks", "app.tasks.chapter_cover_tasks", "app.tasks.chapter_compose_tasks", @@ -63,12 +71,99 @@ celery_app.conf.update( # 任务重试设置 task_acks_late=True, # 任务完成后再确认 task_reject_on_worker_lost=True, # worker 丢失时拒绝任务 - # 不设置自定义队列路由,使用 Celery 默认队列 + task_routes={ + "app.tasks.memory_enrichment_tasks.enrich_memory_source": { + "queue": settings.celery_memory_enrichment_queue, + }, + }, ) +celery_app.conf.task_annotations = { + "app.tasks.memory_enrichment_tasks.enrich_memory_source": { + "soft_time_limit": 660, + "time_limit": 960, + }, +} + celery_app.conf.beat_schedule = { "memory-compaction-sweep": { "task": "app.tasks.memory_compaction_tasks.memory_compaction_sweep", "schedule": 6 * 3600.0, }, } + +_celery_lifecycle_log = get_logger(__name__) + + +def _summarize_task_return(retval: object) -> str: + if retval is None: + return "None" + if isinstance(retval, dict): + keys = list(retval.keys())[:14] + return "dict:" + ",".join(str(k) for k in keys) + text = repr(retval) + if len(text) > 180: + return text[:180] + "..." + return text + + +@task_prerun.connect +def _log_task_prerun( + task_id: str | None = None, + task: object | None = None, + args: tuple[Any, ...] | None = None, + kwargs: dict[str, Any] | None = None, + **_: object, +) -> None: + name = getattr(task, "name", None) or "?" + extras = celery_prerun_extras(name, tuple(args or ()), dict(kwargs or {})) + if task_id: + extras["task_id"] = str(task_id).strip() + set_celery_log_extras(extras if extras else None) + _celery_lifecycle_log.info( + "event=celery_task_start task={} task_id={} msg=Celery 任务已开始", + name, + task_id, + ) + + +@task_success.connect +def _log_task_success(sender: object | None = None, result: object | None = None, **_: object) -> None: + """仅成功路径;失败见 ``task_failure``(避免 ``task_postrun`` 在异常态仍触发)。""" + name = getattr(sender, "name", None) if sender is not None else None + name = name or "?" + task_id: str | None = None + if sender is not None: + req = getattr(sender, "request", None) + if req is not None: + task_id = getattr(req, "id", None) + _celery_lifecycle_log.info( + "event=celery_task_ok task={} task_id={} result={} msg=Celery 任务已成功结束", + name, + task_id, + _summarize_task_return(result), + ) + + +@task_failure.connect +def _log_task_failure( + task_id: str | None = None, + task: object | None = None, + exception: BaseException | None = None, + **kwargs: object, +) -> None: + name = getattr(task, "name", None) or "?" + et = type(exception).__name__ if isinstance(exception, BaseException) else "?" + _celery_lifecycle_log.warning( + "event=celery_task_failed task={} task_id={} exc_type={} exc={} msg=Celery 任务失败", + name, + task_id, + et, + exception, + ) + + +@task_postrun.connect +def _clear_worker_log_context(**kwargs: object) -> None: + """任务体结束后清除 ContextVar,避免同一 worker 进程串上下文。""" + clear_celery_log_extras() diff --git a/api/app/tasks/chapter_compose_tasks.py b/api/app/tasks/chapter_compose_tasks.py index a3f3efc..98f71c8 100644 --- a/api/app/tasks/chapter_compose_tasks.py +++ b/api/app/tasks/chapter_compose_tasks.py @@ -1,5 +1,6 @@ """Celery:story 变更后重组关联章节的 canonical_markdown(物化视图)。""" +import time from datetime import datetime, timezone from celery import shared_task @@ -11,6 +12,7 @@ from app.core.chapter_pipeline_lock import ( from app.core.config import settings from app.core.db import get_sync_db from app.core.logging import get_logger +from app.core.memoir_pipeline_progress import merge_fanout_item from app.core.memoir_pipeline_trace import new_memoir_correlation_id from app.core.memory_compaction_schedule import schedule_memory_compaction_run from app.features.memoir import repo as memoir_repo @@ -20,40 +22,88 @@ logger = get_logger(__name__) @shared_task(bind=True, max_retries=8, default_retry_delay=30) -def recompose_chapter(self, chapter_id: str) -> dict: +def recompose_chapter( + self, chapter_id: str, memoir_correlation_id: str | None = None +) -> dict: """ 按章节物化 canonical_markdown:仅当 markdown_compose_dirty 为 True 时执行; 与 pipeline 共用章节级 Redis 锁,拿不到锁则跳过(依赖后续触发重试)。 """ lock_ttl = int(settings.chapter_pipeline_lock_ttl_seconds) + tid = str(self.request.id) + t0 = time.perf_counter() + merge_fanout_item( + memoir_correlation_id, + list_name="recompose_chapters", + id_field="chapter_id", + item_id=chapter_id, + task_id=tid, + status="running", + ) user_id: str | None = None composed = False with get_sync_db() as session: chapter = session.get(Chapter, chapter_id) if not chapter: - logger.info("recompose_chapter: chapter_id={} status=not_found", chapter_id) + ms = (time.perf_counter() - t0) * 1000 + logger.info( + "event=recompose_chapter status=not_found chapter_id={} duration_ms={:.1f} " + "msg=章节重组跳过(章节不存在)", + chapter_id, + ms, + ) + merge_fanout_item( + memoir_correlation_id, + list_name="recompose_chapters", + id_field="chapter_id", + item_id=chapter_id, + task_id=tid, + status="not_found", + ) return {"status": "not_found"} if chapter.markdown_compose_dirty is not True: + ms = (time.perf_counter() - t0) * 1000 logger.info( - "recompose_chapter: chapter_id={} status=skip_not_dirty", + "event=recompose_chapter status=skip_not_dirty chapter_id={} duration_ms={:.1f} " + "msg=章节重组跳过(无需重组)", chapter_id, + ms, + ) + merge_fanout_item( + memoir_correlation_id, + list_name="recompose_chapters", + id_field="chapter_id", + item_id=chapter_id, + task_id=tid, + status="skip_not_dirty", ) return {"status": "skip_not_dirty"} uid = str(chapter.user_id) stage = str(chapter.category) lock_handle = acquire_chapter_pipeline_lock(uid, stage, ttl_seconds=lock_ttl) if lock_handle is None: + ms = (time.perf_counter() - t0) * 1000 logger.info( "event=recompose_chapter status=lock_busy_retry " - "chapter_id={} user_id={} stage={} retry_on_lock={}", + "chapter_id={} user_id={} stage={} retry_on_lock={} duration_ms={:.1f} " + "msg=章节重组等待锁或重试", chapter_id, uid, stage, settings.memoir_recompose_retry_on_lock_contention, + ms, ) if settings.memoir_recompose_retry_on_lock_contention: countdown = max(15, min(120, lock_ttl // 4)) raise self.retry(countdown=countdown) + merge_fanout_item( + memoir_correlation_id, + list_name="recompose_chapters", + id_field="chapter_id", + item_id=chapter_id, + task_id=tid, + status="skip_lock_contention", + ) return {"status": "skip_lock_contention"} try: composed = memoir_repo.compose_chapter_from_story_links_sync( @@ -81,9 +131,22 @@ def recompose_chapter(self, chapter_id: str) -> dict: "recomposed_chapter_ids": [chapter_id], }, ) + ms = (time.perf_counter() - t0) * 1000 + st = "composed" if composed else "empty" logger.info( - "recompose_chapter: chapter_id={} status={}", + "event=recompose_chapter status={} chapter_id={} user_id={} duration_ms={:.1f} " + "msg=章节物化重组完成", + st, chapter_id, - "composed" if composed else "empty", + user_id or "-", + ms, + ) + merge_fanout_item( + memoir_correlation_id, + list_name="recompose_chapters", + id_field="chapter_id", + item_id=chapter_id, + task_id=tid, + status="composed" if composed else "empty", ) return {"status": "composed" if composed else "empty", "chapter_id": chapter_id} diff --git a/api/app/tasks/chapter_cover_tasks.py b/api/app/tasks/chapter_cover_tasks.py index da39018..5dde8ce 100644 --- a/api/app/tasks/chapter_cover_tasks.py +++ b/api/app/tasks/chapter_cover_tasks.py @@ -6,6 +6,7 @@ Chapter 封面生成 Celery 任务。 """ import hashlib +import time import uuid from datetime import datetime, timedelta, timezone @@ -153,6 +154,11 @@ def generate_chapter_cover(self, chapter_id: str): 从 chapter_cover_intents 原子认领 intent,或创建新 intent 后生成, 写入 assets 并绑定到 chapters.cover_asset_id。 """ + t0 = time.perf_counter() + logger.info( + "event=chapter_cover_task_start chapter_id={} msg=章节封面生成任务开始", + chapter_id, + ) lock_key = f"lock:chapter-images:{chapter_id}" lock_handle = acquire_redis_lock( lock_key, ttl_seconds=CHAPTER_COVER_LOCK_TTL_SECONDS @@ -282,10 +288,13 @@ def generate_chapter_cover(self, chapter_id: str): db.commit() + ms = (time.perf_counter() - t0) * 1000 logger.info( - "generate_chapter_cover: chapter={}, asset={}", + "event=chapter_cover_task_done chapter_id={} asset_id={} duration_ms={:.1f} " + "msg=章节封面生成完成", chapter_id, asset_id, + ms, ) logger.debug( "generate_chapter_cover: chapter={} asset={} url={} cos_key={} prompt_final={}", @@ -310,8 +319,13 @@ def generate_chapter_cover(self, chapter_id: str): intent_db.error = str(exc) intent_db.updated_at = datetime.now(timezone.utc) db.commit() + ms = (time.perf_counter() - t0) * 1000 logger.warning( - "generate_chapter_cover failed: chapter={}, error={}", chapter_id, exc + "event=chapter_cover_task_failed chapter_id={} duration_ms={:.1f} error={} " + "msg=章节封面生成失败", + chapter_id, + ms, + exc, ) raise self.retry(exc=exc) from exc finally: diff --git a/api/app/tasks/memoir_quality_pass_tasks.py b/api/app/tasks/memoir_quality_pass_tasks.py index 884a74d..f7c983e 100644 --- a/api/app/tasks/memoir_quality_pass_tasks.py +++ b/api/app/tasks/memoir_quality_pass_tasks.py @@ -20,10 +20,10 @@ from app.core.config import settings from app.core.db import get_sync_db from app.core.dependencies import get_llm_provider from app.core.logging import get_logger +from app.core.memoir_pipeline_progress import merge_pipeline_run from app.features.memoir.models import Chapter from app.features.memoir.repo import mark_chapter_dirty_sync from app.features.story.models import Story -from app.features.story.sync_write import append_story_version_sync logger = get_logger(__name__) @@ -43,7 +43,6 @@ def _polish_story_title( chapter_category: str, ) -> bool: """Re-generate title if current title is a placeholder. Returns True if updated.""" - from app.agents.stage_constants import CHAPTER_CATEGORIES from app.features.memoir.story_pipeline_sync import _placeholder_title current = (story.title or "").strip() @@ -85,23 +84,54 @@ def memoir_quality_pass( Post-draft quality pass: polish titles, recheck fidelity on flagged stories. Runs asynchronously after the fast draft is committed and visible. """ + qptid = str(self.request.id) if not settings.memoir_quality_pass_enabled: + if memoir_correlation_id: + merge_pipeline_run( + memoir_correlation_id, + { + "fanout": { + "quality_pass": {"task_id": qptid, "status": "disabled"}, + }, + }, + ) return {"status": "disabled"} t0 = time.perf_counter() logger.info( "event=quality_pass_start user_id={} stories={} chapters={} " - "memoir_correlation_id={}", + "memoir_correlation_id={} msg=成稿质量巡检开始", user_id, len(story_ids), len(chapter_ids), memoir_correlation_id or "", ) + if memoir_correlation_id: + merge_pipeline_run( + memoir_correlation_id, + { + "fanout": { + "quality_pass": {"task_id": qptid, "status": "running"}, + }, + }, + ) try: llm = _get_llm() if not llm: logger.warning("event=quality_pass_no_llm user_id={}", user_id) + if memoir_correlation_id: + merge_pipeline_run( + memoir_correlation_id, + { + "fanout": { + "quality_pass": { + "task_id": qptid, + "status": "no_llm", + }, + }, + }, + ) return {"status": "no_llm"} titles_polished = 0 @@ -137,13 +167,15 @@ def memoir_quality_pass( db.commit() elapsed = time.perf_counter() - t0 + duration_ms = elapsed * 1000 logger.info( "event=quality_pass_done user_id={} titles_polished={} " - "chapters_dirtied={} seconds={:.3f} memoir_correlation_id={}", + "chapters_dirtied={} duration_ms={:.1f} memoir_correlation_id={} " + "msg=成稿质量巡检完成", user_id, titles_polished, len(chapters_dirtied), - elapsed, + duration_ms, memoir_correlation_id or "", ) @@ -154,7 +186,12 @@ def memoir_quality_pass( for ch_id in sorted(chapters_dirtied): try: - recompose_chapter_task.apply_async(args=[ch_id], countdown=2) + rckw: dict = {} + if memoir_correlation_id: + rckw["memoir_correlation_id"] = memoir_correlation_id + recompose_chapter_task.apply_async( + args=[ch_id], kwargs=rckw, countdown=2 + ) except Exception as exc: logger.warning( "quality_pass recompose enqueue failed chapter={}: {}", @@ -162,6 +199,22 @@ def memoir_quality_pass( exc, ) + if memoir_correlation_id: + merge_pipeline_run( + memoir_correlation_id, + { + "fanout": { + "quality_pass": { + "task_id": qptid, + "status": "success", + "detail": { + "titles_polished": titles_polished, + "chapters_dirtied": len(chapters_dirtied), + }, + }, + }, + }, + ) return { "status": "success", "titles_polished": titles_polished, @@ -174,4 +227,17 @@ def memoir_quality_pass( logger.error( "event=quality_pass_failed user_id={} exc={}", user_id, e ) + if memoir_correlation_id: + merge_pipeline_run( + memoir_correlation_id, + { + "fanout": { + "quality_pass": { + "task_id": qptid, + "status": "failure", + "detail": {"error": str(e)}, + }, + }, + }, + ) raise self.retry(exc=e) from e diff --git a/api/app/tasks/memoir_tasks.py b/api/app/tasks/memoir_tasks.py index afe88e9..ccde6db 100644 --- a/api/app/tasks/memoir_tasks.py +++ b/api/app/tasks/memoir_tasks.py @@ -29,6 +29,10 @@ from app.core.config import settings from app.core.db import get_sync_db from app.core.dependencies import get_llm_provider, get_llm_provider_fast from app.core.logging import get_logger +from app.core.memoir_pipeline_progress import ( + init_pipeline_run_from_phase1, + merge_pipeline_run, +) from app.core.memoir_pipeline_trace import ( effective_correlation_id, new_memoir_correlation_id, @@ -61,6 +65,10 @@ from app.features.memoir.state_service import ( from app.features.memoir.story_pipeline_sync import ( run_story_pipeline_for_category_batch, ) +from app.features.memory.service import ( + ingest_transcripts_batch_sync, + schedule_enrichment_for_sources, +) from app.features.user.models import User from app.tasks.celery_app import celery_app @@ -177,7 +185,7 @@ def _update_task_status_sync( logger.debug("任务状态已更新: task_id={} status={}", task_id, status) except Exception as e: - logger.error(f"更新任务状态失败: {e}") + logger.error("event=memoir_task_status_update_failed msg=更新任务状态失败 exc={}", e) def _merge_chapter_image_assets( @@ -300,19 +308,20 @@ def _phase2_immediate_task_id(user_id: str, chapter_category: str) -> str: def _schedule_phase2_timeout( user_id: str, chapter_category: str, memoir_correlation_id: str | None = None -) -> None: - """Reset countdown for Phase 2 narrative for one category.""" +) -> str | None: + """Reset countdown for Phase 2 narrative for one category。返回 Celery task_id。""" _revoke_phase2_timeout(user_id, chapter_category) countdown = float(max(1.0, settings.memoir_narrative_batch_max_wait_seconds)) p2_kwargs: dict = {} if memoir_correlation_id: p2_kwargs["memoir_correlation_id"] = memoir_correlation_id + timeout_tid = _phase2_timeout_task_id(user_id, chapter_category) celery_app.send_task( "app.tasks.memoir_tasks.process_memoir_phase2", args=[user_id, chapter_category], kwargs=p2_kwargs, countdown=countdown, - task_id=_phase2_timeout_task_id(user_id, chapter_category), + task_id=timeout_tid, ) logger.info( "event=phase2_timeout_scheduled user_id={} chapter_category={} countdown={} " @@ -322,11 +331,12 @@ def _schedule_phase2_timeout( countdown, memoir_correlation_id or "", ) + return timeout_tid def _dispatch_phase2_immediate( user_id: str, chapter_category: str, memoir_correlation_id: str | None = None -) -> None: +) -> str | None: _revoke_phase2_timeout(user_id, chapter_category) p2_kwargs: dict = {} if memoir_correlation_id: @@ -335,17 +345,22 @@ def _dispatch_phase2_immediate( "args": [user_id, chapter_category], "kwargs": p2_kwargs, } + fixed_tid: str | None = None if settings.memoir_phase2_singleflight_immediate: - send_kw["task_id"] = _phase2_immediate_task_id(user_id, chapter_category) - celery_app.send_task("app.tasks.memoir_tasks.process_memoir_phase2", **send_kw) + fixed_tid = _phase2_immediate_task_id(user_id, chapter_category) + send_kw["task_id"] = fixed_tid + ar = celery_app.send_task("app.tasks.memoir_tasks.process_memoir_phase2", **send_kw) + out_tid = fixed_tid or getattr(ar, "id", None) logger.info( "event=phase2_dispatched_immediate user_id={} chapter_category={} " - "memoir_correlation_id={} task_id_mode={}", + "memoir_correlation_id={} task_id_mode={} celery_task_id={}", user_id, chapter_category, memoir_correlation_id or "", "singleflight" if settings.memoir_phase2_singleflight_immediate else "unique", + out_tid or "", ) + return out_tid def dispatch_pending_memoir_phase2_for_user(user_id: str) -> None: @@ -370,18 +385,34 @@ def dispatch_pending_memoir_phase2_for_user(user_id: str) -> None: for chapter_category in cats: _revoke_phase2_timeout(user_id, chapter_category) flush_cid = new_memoir_correlation_id() - celery_app.send_task( + ar = celery_app.send_task( "app.tasks.memoir_tasks.process_memoir_phase2", args=[user_id, chapter_category], kwargs={"memoir_correlation_id": flush_cid}, ) + p2tid = getattr(ar, "id", None) logger.info( "event=phase2_dispatched_flush user_id={} chapter_category={} " - "memoir_correlation_id={}", + "memoir_correlation_id={} celery_task_id={}", user_id, chapter_category, flush_cid, + p2tid or "", ) + if p2tid and flush_cid: + merge_pipeline_run( + flush_cid, + { + "user_id": user_id, + "phase2": [ + { + "chapter_category": chapter_category, + "task_id": str(p2tid), + "status": "enqueued", + } + ], + }, + ) except Exception as e: logger.error( "event=phase2_flush_failed user_id={} exc_type={} exc={}", @@ -406,12 +437,25 @@ def process_memoir_phase2( phase2_t0 = time.perf_counter() logger.info( "event=memoir_phase2_start user_id={} task_id={} chapter_category={} " - "memoir_correlation_id={}", + "memoir_correlation_id={} msg=回忆录第二阶段叙事任务开始", user_id, task_id, chapter_category, cid, ) + merge_pipeline_run( + cid, + { + "user_id": user_id, + "phase2": [ + { + "chapter_category": chapter_category, + "task_id": str(task_id), + "status": "running", + } + ], + }, + ) try: with get_sync_db() as db: user_convs = select(Conversation.id).where( @@ -431,14 +475,30 @@ def process_memoir_phase2( category_segments = list(db.execute(stmt).scalars().all()) if not category_segments: + ms = (time.perf_counter() - phase2_t0) * 1000 logger.info( - "event=memoir_phase2_noop user_id={} chapter_category={}", + "event=memoir_phase2_noop user_id={} chapter_category={} " + "duration_ms={:.1f} msg=第二阶段无待叙事片段", user_id, chapter_category, + ms, + ) + merge_pipeline_run( + cid, + { + "phase2": [ + { + "chapter_category": chapter_category, + "task_id": str(task_id), + "status": "noop", + } + ], + }, ) return {"status": "noop"} llm = _get_llm() + llm_fast = _get_llm_fast() or llm user_obj = db.get(User, user_id) user_profile = "" user_birth_year = None @@ -477,6 +537,19 @@ def process_memoir_phase2( # 锁内再查一次,避免等待锁期间状态已变 category_segments = list(db.execute(stmt).scalars().all()) if not category_segments: + merge_pipeline_run( + cid, + { + "phase2": [ + { + "chapter_category": chapter_category, + "task_id": str(task_id), + "status": "noop", + "detail": {"reason": "empty_after_lock"}, + } + ], + }, + ) return {"status": "noop"} state = get_or_create_state_sync(user_id, db) @@ -493,6 +566,7 @@ def process_memoir_phase2( background_voice=background_voice, occupation=user_occupation, memoir_correlation_id=cid, + llm_fast=llm_fast, ) pipeline_elapsed = time.perf_counter() - pipeline_t0 story_dispatch_ids |= disp @@ -564,11 +638,13 @@ def process_memoir_phase2( ) phase2_elapsed = time.perf_counter() - phase2_t0 + duration_ms = phase2_elapsed * 1000 logger.info( "event=memoir_phase2_done user_id={} task_id={} chapter_category={} " "segment_count={} memoir_correlation_id={} " "lock_seconds={:.3f} pipeline_seconds={:.3f} " - "phase2_total_seconds={:.3f}", + "phase2_total_seconds={:.3f} duration_ms={:.1f} " + "msg=回忆录第二阶段叙事完成", user_id, task_id, chapter_category, @@ -577,6 +653,20 @@ def process_memoir_phase2( lock_elapsed, pipeline_elapsed, phase2_elapsed, + duration_ms, + ) + merge_pipeline_run( + cid, + { + "phase2": [ + { + "chapter_category": chapter_category, + "task_id": str(task_id), + "status": "success", + "detail": {"segments": len(category_segments)}, + } + ], + }, ) return { "status": "success", @@ -590,11 +680,25 @@ def process_memoir_phase2( raise except Exception as e: logger.error( - "event=memoir_phase2_failed user_id={} chapter_category={} exc={}", + "event=memoir_phase2_failed user_id={} chapter_category={} exc={} " + "msg=回忆录第二阶段失败", user_id, chapter_category, e, ) + merge_pipeline_run( + cid, + { + "phase2": [ + { + "chapter_category": chapter_category, + "task_id": str(task_id), + "status": "failure", + "detail": {"error": str(e)}, + } + ], + }, + ) raise self.retry(exc=e) from e @@ -608,13 +712,19 @@ def process_memoir_phase1(self, user_id: str, segment_ids: List[str]): memoir_correlation_id = new_memoir_correlation_id() logger.info( "event=memoir_phase1_start user_id={} task_id={} segments={} " - "memoir_correlation_id={}", + "memoir_correlation_id={} msg=回忆录第一阶段抽取与分类开始", user_id, task_id, len(segment_ids), memoir_correlation_id, ) _update_task_status_sync(user_id, task_id, "running") + init_pipeline_run_from_phase1( + user_id, + memoir_correlation_id, + task_id, + segment_count=len(segment_ids), + ) phase1_t0 = time.perf_counter() try: @@ -629,6 +739,16 @@ def process_memoir_phase1(self, user_id: str, segment_ids: List[str]): if not segments: logger.warning("event=memoir_phase1_no_segments ids={}", segment_ids) + merge_pipeline_run( + memoir_correlation_id, + { + "phase1": { + "status": "success", + "step": "no_segments", + "detail": {"processed": 0}, + }, + }, + ) _update_task_status_sync( user_id, task_id, @@ -637,42 +757,66 @@ def process_memoir_phase1(self, user_id: str, segment_ids: List[str]): ) return {"status": "no_segments"} + merge_pipeline_run( + memoir_correlation_id, + { + "phase1": { + "step": "memory_ingest", + "detail": {"candidates": len(segments)}, + }, + }, + ) ingest_t0 = time.perf_counter() + ingest_items: list[tuple[str, str, dict | None]] = [] + non_empty_segments: list = [] for seg in segments: - conv_id = getattr(seg, "conversation_id", None) or "" text = (seg.user_input_text or "").strip() if not text: continue - try: - from app.features.memory.service import ingest_transcript_sync + conv_id = getattr(seg, "conversation_id", None) or "" + ln = getattr(seg, "lineage_json", None) + lineage_payload = ln if isinstance(ln, dict) else None + ingest_items.append((conv_id, text, lineage_payload)) + non_empty_segments.append(seg) - ln = getattr(seg, "lineage_json", None) - lineage_payload = ln if isinstance(ln, dict) else None - source_id = ingest_transcript_sync( - db, - user_id, - conv_id, - text, - lineage_json=lineage_payload, - ) - logger.info( - "event=memory_transcript_ingested user_id={} task_id={} " - "source_id={} conversation_id={} segment_id={} transcript_chars={}", - user_id, - task_id, - source_id, - conv_id, - seg.id, - len(text), + ingested_source_ids: list[str] = [] + if ingest_items: + try: + ingested_source_ids = ingest_transcripts_batch_sync( + db, user_id, ingest_items ) + for seg, sid in zip( + non_empty_segments, ingested_source_ids, strict=True + ): + logger.info( + "event=memory_transcript_ingested user_id={} task_id={} " + "source_id={} conversation_id={} segment_id={} transcript_chars={}", + user_id, + task_id, + sid, + getattr(seg, "conversation_id", None) or "", + seg.id, + len((seg.user_input_text or "").strip()), + ) except Exception as e: logger.warning( - "Memory ingest 跳过 segment_id={}: {} exc_type={}", - getattr(seg, "id", ""), + "Memory batch ingest 失败: {} exc_type={}", e, type(e).__name__, ) ingest_elapsed = time.perf_counter() - ingest_t0 + merge_pipeline_run( + memoir_correlation_id, + { + "phase1": { + "step": "prepare_batches", + "detail": { + "memory_ingest_seconds": round(ingest_elapsed, 3), + "ingested_sources": len(ingested_source_ids), + }, + }, + }, + ) llm = _get_llm() llm_fast = _get_llm_fast() or llm @@ -684,6 +828,13 @@ def process_memoir_phase1(self, user_id: str, segment_ids: List[str]): prep_t0 = time.perf_counter() memoir_orchestrator = MemoirOrchestrator() + + def _phase1_chunk_cb(idx: int, total: int) -> None: + merge_pipeline_run( + memoir_correlation_id, + {"phase1": {"detail": {"prepare_batches_chunk": [idx, total]}}}, + ) + prepared = memoir_orchestrator.prepare_batches( segments=list(segments), llm=llm, @@ -698,8 +849,18 @@ def process_memoir_phase1(self, user_id: str, segment_ids: List[str]): db, memoir_batch=True, ), + on_phase1_chunk=_phase1_chunk_cb, ) prep_elapsed = time.perf_counter() - prep_t0 + merge_pipeline_run( + memoir_correlation_id, + { + "phase1": { + "step": "persist_topics", + "detail": {"prepare_batches_seconds": round(prep_elapsed, 3)}, + }, + }, + ) skip_ids = prepared.segment_skip_story_ids missing_cat = [ @@ -749,13 +910,72 @@ def process_memoir_phase1(self, user_id: str, segment_ids: List[str]): db.commit() + merge_pipeline_run( + memoir_correlation_id, + { + "phase1": { + "step": "dispatch_phase2", + "detail": { + "phase2_immediate": list(phase2_immediate), + "phase2_timeout": list(phase2_timeout), + }, + }, + }, + ) + + if ingested_source_ids: + schedule_enrichment_for_sources( + user_id, + ingested_source_ids, + memoir_correlation_id=memoir_correlation_id, + ) + for cc in phase2_immediate: - _dispatch_phase2_immediate(user_id, cc, memoir_correlation_id) + p2tid = _dispatch_phase2_immediate(user_id, cc, memoir_correlation_id) + if p2tid: + merge_pipeline_run( + memoir_correlation_id, + { + "phase2": [ + { + "chapter_category": cc, + "task_id": str(p2tid), + "status": "enqueued", + } + ], + }, + ) for cc in phase2_timeout: - _schedule_phase2_timeout(user_id, cc, memoir_correlation_id) + p2tid = _schedule_phase2_timeout(user_id, cc, memoir_correlation_id) + if p2tid: + merge_pipeline_run( + memoir_correlation_id, + { + "phase2": [ + { + "chapter_category": cc, + "task_id": str(p2tid), + "status": "scheduled_timeout", + } + ], + }, + ) categories_processed = sorted(prepared.category_to_segments.keys()) phase1_elapsed = time.perf_counter() - phase1_t0 + merge_pipeline_run( + memoir_correlation_id, + { + "phase1": { + "status": "success", + "step": "completed", + "detail": { + "processed": len(segments), + "phase1_total_seconds": round(phase1_elapsed, 3), + }, + }, + }, + ) _update_task_status_sync( user_id, task_id, @@ -766,11 +986,13 @@ def process_memoir_phase1(self, user_id: str, segment_ids: List[str]): "phase2_watch_categories": sorted(categories_for_phase2), }, ) + duration_ms = phase1_elapsed * 1000 logger.info( "event=memoir_phase1_done user_id={} task_id={} segment_count={} " "categories={} memoir_correlation_id={} " "memory_ingest_seconds={:.3f} prepare_batches_seconds={:.3f} " - "phase1_total_seconds={:.3f}", + "phase1_total_seconds={:.3f} duration_ms={:.1f} " + "msg=回忆录第一阶段完成", user_id, task_id, len(segments), @@ -779,6 +1001,7 @@ def process_memoir_phase1(self, user_id: str, segment_ids: List[str]): ingest_elapsed, prep_elapsed, phase1_elapsed, + duration_ms, ) return { "status": "success", @@ -789,7 +1012,21 @@ def process_memoir_phase1(self, user_id: str, segment_ids: List[str]): except Retry: raise except Exception as e: - logger.error("event=memoir_phase1_failed user_id={} exc={}", user_id, e) + logger.error( + "event=memoir_phase1_failed user_id={} exc={} msg=回忆录第一阶段失败", + user_id, + e, + ) + merge_pipeline_run( + memoir_correlation_id, + { + "phase1": { + "status": "failure", + "step": "error", + "detail": {"error": str(e)}, + }, + }, + ) _update_task_status_sync(user_id, task_id, "failure", {"error": str(e)}) raise self.retry(exc=e) from e @@ -810,8 +1047,10 @@ def generate_chapter_content(self, user_id: str, stage: str, new_content: str): """ stage = normalize_chapter_category(stage, fallback="summary") cid = effective_correlation_id(explicit=None, celery_task_id=str(self.request.id)) + gen_t0 = time.perf_counter() logger.info( - "event=generate_chapter_content_start user_id={} stage={} memoir_correlation_id={}", + "event=generate_chapter_content_start user_id={} stage={} memoir_correlation_id={} " + "msg=实时章节生成任务开始", user_id, stage, cid, @@ -820,6 +1059,7 @@ def generate_chapter_content(self, user_id: str, stage: str, new_content: str): try: with get_sync_db() as db: llm = _get_llm() + llm_fast = _get_llm_fast() or llm user_obj = db.get(User, user_id) user_profile = "" user_birth_year = None @@ -854,6 +1094,7 @@ def generate_chapter_content(self, user_id: str, stage: str, new_content: str): background_voice=background_voice, occupation=user_occupation, memoir_correlation_id=cid, + llm_fast=llm_fast, ) db.flush() if chapter is None: @@ -886,10 +1127,27 @@ def generate_chapter_content(self, user_id: str, stage: str, new_content: str): need_quality_pass=True, memoir_correlation_id=cid, ) + ms = (time.perf_counter() - gen_t0) * 1000 + logger.info( + "event=generate_chapter_content_done user_id={} stage={} " + "memoir_correlation_id={} duration_ms={:.1f} msg=实时章节生成完成", + user_id, + stage, + cid, + ms, + ) return {"status": "success"} except Retry: raise except Exception as e: - logger.error(f"章节生成失败: {e}") + ms = (time.perf_counter() - gen_t0) * 1000 + logger.error( + "event=generate_chapter_content_failed user_id={} stage={} duration_ms={:.1f} " + "exc={} msg=实时章节生成失败", + user_id, + stage, + ms, + e, + ) raise self.retry(exc=e) from e diff --git a/api/app/tasks/memory_compaction_tasks.py b/api/app/tasks/memory_compaction_tasks.py index 76c35fc..1ebf3db 100644 --- a/api/app/tasks/memory_compaction_tasks.py +++ b/api/app/tasks/memory_compaction_tasks.py @@ -28,6 +28,7 @@ logger = get_logger(__name__) @shared_task def memory_compaction_sweep() -> dict[str, Any]: """Beat:为近期有记忆写入的用户调度 compaction(debounce 仍由 schedule 合并)。""" + t0 = time.perf_counter() if not settings.memory_compaction_enabled: return {"skipped": True, "reason": "disabled"} hours = int(settings.memory_compaction_sweep_recent_hours) @@ -36,8 +37,13 @@ def memory_compaction_sweep() -> dict[str, Any]: ctx_base: dict[str, Any] = {"trigger_source": "beat", "sweep_hours": hours} for uid in user_ids: schedule_memory_compaction_run(uid, dict(ctx_base)) + ms = (time.perf_counter() - t0) * 1000 logger.info( - "memory_compaction_sweep hours={} scheduled_users={}", hours, len(user_ids) + "event=memory_compaction_sweep_done hours={} scheduled_users={} duration_ms={:.1f} " + "msg=记忆压缩定时扫描已调度", + hours, + len(user_ids), + ms, ) return {"scheduled": len(user_ids), "user_ids": user_ids} @@ -46,6 +52,7 @@ def memory_compaction_sweep() -> dict[str, Any]: def memory_compaction_run( self, user_id: str, context: dict[str, Any] | None = None ) -> dict[str, Any]: + run_t0 = time.perf_counter() if not settings.memory_compaction_enabled: return {"skipped": True, "reason": "disabled"} @@ -61,9 +68,12 @@ def memory_compaction_run( ttl_seconds=settings.memory_compaction_lock_ttl_seconds, ) if lock is None: + ms = (time.perf_counter() - run_t0) * 1000 logger.info( - "memory_compaction_skipped user_id={} skipped_reason=lock_not_acquired", + "event=memory_compaction_skipped user_id={} reason=lock_not_acquired " + "duration_ms={:.1f} msg=记忆压缩跳过(未拿到锁)", user_id, + ms, ) out = {"skipped": True, "reason": "lock_not_acquired"} finalize_memory_compaction_run( @@ -89,10 +99,23 @@ def memory_compaction_run( observed_deadline_ts=deadline, context=ctx, ) + ms = (time.perf_counter() - run_t0) * 1000 + logger.info( + "event=memory_compaction_done user_id={} duration_ms={:.1f} msg=记忆压缩运行完成", + user_id, + ms, + ) return out except Exception as exc: - logger.warning("memory_compaction_run failed user_id={} err={}", user_id, exc) + ms = (time.perf_counter() - run_t0) * 1000 + logger.warning( + "event=memory_compaction_failed user_id={} duration_ms={:.1f} err={} " + "msg=记忆压缩运行失败", + user_id, + ms, + exc, + ) release_scheduler_gate(user_id) - raise self.retry(exc=exc) + raise self.retry(exc=exc) from exc finally: release_redis_lock(lock) diff --git a/api/app/tasks/memory_enrichment_tasks.py b/api/app/tasks/memory_enrichment_tasks.py index f08ed45..2619e64 100644 --- a/api/app/tasks/memory_enrichment_tasks.py +++ b/api/app/tasks/memory_enrichment_tasks.py @@ -1,45 +1,145 @@ """ Memory enrichment Celery task — runs asynchronously after ingest to generate -summaries, facts, and timeline events without blocking the memoir hot path. +summaries, facts, and timeline events without blocking ingest or memoir pipeline. + +Tasks are routed to ``settings.celery_memory_enrichment_queue`` (default ``memory_idle``); +run workers with ``-Q celery,memory_idle`` or a dedicated low-priority worker for that queue. """ +import time + from celery import shared_task -from sqlalchemy.orm import Session from app.core.config import settings from app.core.db import get_sync_db from app.core.logging import get_logger +from app.core.memoir_pipeline_progress import merge_fanout_item logger = get_logger(__name__) -@shared_task(bind=True, max_retries=2, default_retry_delay=30) -def enrich_memory_source(self, user_id: str, source_id: str): +def schedule_memory_enrichment( + user_id: str, + source_id: str, + *, + memoir_correlation_id: str | None = None, +) -> str | None: """ - Post-ingest enrichment: session summary, rolling summary, facts, timeline. + Enqueue post-ingest LLM enrichment on the memory idle queue. + + When ``memoir_correlation_id`` is set, records ``fanout.memory_enrichment`` as enqueued + for eval / pipeline progress (same as the former Phase1 loop). + """ + if not settings.memory_enrichment_enabled: + return None + uid = (user_id or "").strip() + sid = (source_id or "").strip() + if not uid or not sid: + return None + q = (settings.celery_memory_enrichment_queue or "").strip() or "memory_idle" + try: + ar = enrich_memory_source.apply_async( + args=[uid, sid], + kwargs={"memoir_correlation_id": memoir_correlation_id}, + queue=q, + ) + enr_id = getattr(ar, "id", None) + if not enr_id: + return None + cid = (memoir_correlation_id or "").strip() + if cid: + merge_fanout_item( + cid, + list_name="memory_enrichment", + id_field="source_id", + item_id=sid, + task_id=str(enr_id), + status="enqueued", + ) + return str(enr_id) + except Exception as e: + logger.warning( + "event=memory_enrichment_schedule_failed user_id={} source_id={} exc={} exc_type={}", + uid, + sid, + e, + type(e).__name__, + ) + return None + + +@shared_task(bind=True, max_retries=2, default_retry_delay=30) +def enrich_memory_source( + self, + user_id: str, + source_id: str, + memoir_correlation_id: str | None = None, +): + """ + Post-ingest enrichment: one LLM call → session summary + structured facts. Runs outside the memoir Phase1 hot path so narrative generation isn't blocked. """ if not settings.memory_enrichment_enabled: return {"status": "disabled"} + tid = str(self.request.id) + t0 = time.perf_counter() + logger.info( + "event=memory_enrichment_start user_id={} source_id={} task_id={} " + "msg=开始记忆富化(会话摘要+事实)", + user_id, + source_id, + tid, + ) + merge_fanout_item( + memoir_correlation_id, + list_name="memory_enrichment", + id_field="source_id", + item_id=source_id, + task_id=tid, + status="running", + ) try: with get_sync_db() as db: from app.features.memory.enrichment import enrich_memory_after_ingest_sync enrich_memory_after_ingest_sync(db, user_id, source_id, llm=None) db.commit() + ms = (time.perf_counter() - t0) * 1000 logger.info( - "event=memory_enrichment_done user_id={} source_id={}", + "event=memory_enrichment_done user_id={} source_id={} duration_ms={:.1f} " + "msg=记忆富化完成", user_id, source_id, + ms, + ) + merge_fanout_item( + memoir_correlation_id, + list_name="memory_enrichment", + id_field="source_id", + item_id=source_id, + task_id=tid, + status="success", ) return {"status": "success", "source_id": source_id} except Exception as e: + ms = (time.perf_counter() - t0) * 1000 logger.warning( - "event=memory_enrichment_failed user_id={} source_id={} exc={} exc_type={}", + "event=memory_enrichment_failed user_id={} source_id={} duration_ms={:.1f} " + "exc={} exc_type={} msg=记忆富化失败", user_id, source_id, + ms, e, type(e).__name__, ) + merge_fanout_item( + memoir_correlation_id, + list_name="memory_enrichment", + id_field="source_id", + item_id=source_id, + task_id=tid, + status="failure", + extra={"error": str(e)}, + ) raise self.retry(exc=e) from e diff --git a/api/app/tasks/story_image_tasks.py b/api/app/tasks/story_image_tasks.py index cf0cebd..8982b13 100644 --- a/api/app/tasks/story_image_tasks.py +++ b/api/app/tasks/story_image_tasks.py @@ -6,6 +6,7 @@ Story 主插图生成 Celery 任务。 """ import hashlib +import time import uuid from datetime import datetime, timedelta, timezone @@ -17,6 +18,7 @@ from app.agents.image_prompt import get_image_prompt_orchestrator from app.core.db import get_sync_db from app.core.dependencies import get_image_generator from app.core.logging import get_logger +from app.core.memoir_pipeline_progress import merge_fanout_item from app.core.redis_lock import acquire_redis_lock, release_redis_lock from app.features.asset.models import Asset from app.features.memoir.asset_resolver import strip_asset_image_refs_from_markdown @@ -149,15 +151,32 @@ def _claim_story_image_intent_sync(db, story_id: str, claim_token: str): @shared_task(bind=True, max_retries=3, default_retry_delay=30) -def generate_story_image(self, story_id: str): +def generate_story_image( + self, story_id: str, memoir_correlation_id: str | None = None +): """ 为 story 生成主插图。 从 story_image_intents 原子认领 primary intent,生成后写入 assets 并更新 intent。 """ + celery_tid = str(self.request.id) + t0 = time.perf_counter() + logger.info( + "event=story_image_task_start story_id={} task_id={} msg=故事主图生成任务开始", + story_id, + celery_tid, + ) lock_key = f"lock:story-image:{story_id}" lock_handle = acquire_redis_lock(lock_key, ttl_seconds=STORY_IMAGE_LOCK_TTL_SECONDS) if lock_handle is None: logger.debug("generate_story_image: story={}, reason=locked", story_id) + merge_fanout_item( + memoir_correlation_id, + list_name="story_images", + id_field="story_id", + item_id=story_id, + task_id=celery_tid, + status="locked", + ) return {"status": "locked"} claim_token = uuid.uuid4().hex @@ -171,6 +190,14 @@ def generate_story_image(self, story_id: str): "generate_story_image: story={}, reason=no_claimable_intent", story_id, ) + merge_fanout_item( + memoir_correlation_id, + list_name="story_images", + id_field="story_id", + item_id=story_id, + task_id=celery_tid, + status="no_intent", + ) return {"status": "no_intent"} intent, story = row @@ -197,8 +224,25 @@ def generate_story_image(self, story_id: str): len(plain), min_body, ) + merge_fanout_item( + memoir_correlation_id, + list_name="story_images", + id_field="story_id", + item_id=story_id, + task_id=celery_tid, + status="skipped_body_too_short", + ) return {"status": "skipped_body_too_short"} + merge_fanout_item( + memoir_correlation_id, + list_name="story_images", + id_field="story_id", + item_id=story_id, + task_id=celery_tid, + status="running", + ) + image_generator = get_image_generator() storage = TencentCosStorageService.from_env() @@ -247,6 +291,14 @@ def generate_story_image(self, story_id: str): getattr(intent_db, "status", None), getattr(intent_db, "claim_token", None), ) + merge_fanout_item( + memoir_correlation_id, + list_name="story_images", + id_field="story_id", + item_id=story_id, + task_id=celery_tid, + status="superseded_or_cancelled", + ) return {"status": "superseded_or_cancelled"} asset = Asset( @@ -286,11 +338,27 @@ def generate_story_image(self, story_id: str): url, asset_id, ) + merge_fanout_item( + memoir_correlation_id, + list_name="story_images", + id_field="story_id", + item_id=story_id, + task_id=celery_tid, + status="success_stale", + ) return {"status": "success_stale", "asset_id": asset_id} ver = db.get(StoryVersion, target_vid) if not ver: db.commit() + merge_fanout_item( + memoir_correlation_id, + list_name="story_images", + id_field="story_id", + item_id=story_id, + task_id=celery_tid, + status="success_no_snapshot", + ) return {"status": "success_no_snapshot", "asset_id": asset_id} base_md = strip_asset_image_refs_from_markdown(ver.markdown_snapshot or "") @@ -326,10 +394,13 @@ def generate_story_image(self, story_id: str): _enqueue_chapter_effects_after_image_backfill(story_id) + ms = (time.perf_counter() - t0) * 1000 logger.info( - "generate_story_image: story={}, asset={}", + "event=story_image_task_done story_id={} asset_id={} duration_ms={:.1f} " + "msg=故事主图生成完成", story_id, asset_id, + ms, ) logger.debug( "generate_story_image: story={} asset={} url={} cos_key={} prompt_final={}", @@ -339,6 +410,14 @@ def generate_story_image(self, story_id: str): cos_key, prompt_final, ) + merge_fanout_item( + memoir_correlation_id, + list_name="story_images", + id_field="story_id", + item_id=story_id, + task_id=celery_tid, + status="success", + ) return {"status": "success", "asset_id": asset_id} except Exception as exc: if intent is not None: @@ -355,7 +434,23 @@ def generate_story_image(self, story_id: str): intent_db.error = str(exc) intent_db.updated_at = datetime.now(timezone.utc) db.commit() - logger.warning("generate_story_image failed: story={}, error={}", story_id, exc) + merge_fanout_item( + memoir_correlation_id, + list_name="story_images", + id_field="story_id", + item_id=story_id, + task_id=celery_tid, + status="failure", + extra={"error": str(exc)}, + ) + ms = (time.perf_counter() - t0) * 1000 + logger.warning( + "event=story_image_task_failed story_id={} duration_ms={:.1f} error={} " + "msg=故事主图生成失败", + story_id, + ms, + exc, + ) raise self.retry(exc=exc) from exc finally: release_redis_lock(lock_handle) diff --git a/api/app/tasks/story_title_tasks.py b/api/app/tasks/story_title_tasks.py new file mode 100644 index 0000000..ec1166d --- /dev/null +++ b/api/app/tasks/story_title_tasks.py @@ -0,0 +1,134 @@ +"""Async story title refinement after new story create (placeholder first).""" + +import time + +from celery import shared_task + +from app.core.db import get_sync_db +from app.core.dependencies import get_llm_provider +from app.core.logging import get_logger + +logger = get_logger(__name__) + + +@shared_task(bind=True, max_retries=2, default_retry_delay=15) +def generate_story_title_after_create( + self, + story_id: str, + chapter_category: str, + oral_scope: str, + user_id: str, +): + """Replace placeholder title with LLM title when body is long enough.""" + from app.agents.chat.prompts_profile import format_user_profile_context + from app.agents.memoir.narrative_agent import NarrativeAgent + from app.features.memoir.state_service import get_or_create_state_sync + from app.features.memoir.story_pipeline_sync import ( + _maybe_generate_title, + _placeholder_title, + _slot_snippets_for_narrative, + ) + from app.features.story.models import Story + from app.features.user.models import User + + t0 = time.perf_counter() + logger.info( + "event=story_title_task_start story_id={} user_id={} chapter_category={} " + "msg=故事标题精修任务开始", + story_id, + user_id, + chapter_category, + ) + try: + with get_sync_db() as db: + st = db.get(Story, story_id) + if not st or str(st.user_id) != str(user_id): + ms = (time.perf_counter() - t0) * 1000 + logger.info( + "event=story_title_task_skip story_id={} reason=not_found duration_ms={:.1f} " + "msg=标题精修跳过(故事不存在或无权限)", + story_id, + ms, + ) + return {"status": "skip_not_found"} + expected_ph = _placeholder_title(chapter_category) + if (st.title or "").strip() and (st.title or "").strip() != expected_ph: + ms = (time.perf_counter() - t0) * 1000 + logger.info( + "event=story_title_task_skip story_id={} reason=user_modified duration_ms={:.1f} " + "msg=标题精修跳过(用户已改标题)", + story_id, + ms, + ) + return {"status": "skip_user_modified"} + + llm = getattr(get_llm_provider(), "langchain_llm", None) + if not llm: + ms = (time.perf_counter() - t0) * 1000 + logger.info( + "event=story_title_task_skip story_id={} reason=no_llm duration_ms={:.1f} " + "msg=标题精修跳过(无 LLM)", + story_id, + ms, + ) + return {"status": "skip_no_llm"} + + user_obj = db.get(User, user_id) + user_profile = "" + birth_year = None + if user_obj: + birth_year = user_obj.birth_year + user_profile = format_user_profile_context( + birth_year=user_obj.birth_year, + birth_place=user_obj.birth_place, + grew_up_place=user_obj.grew_up_place, + occupation=user_obj.occupation, + ) + + state = get_or_create_state_sync(user_id, db) + slot_snippets = _slot_snippets_for_narrative( + state=state, + chapter_category=chapter_category, + user_id=user_id, + ) + md = (st.canonical_markdown or "").strip() + new_title = _maybe_generate_title( + NarrativeAgent(), + chapter_category=chapter_category, + md=md, + slot_snippets=slot_snippets, + user_profile=user_profile, + user_birth_year=birth_year, + llm=llm, + oral_scope=oral_scope or "", + ) + if not new_title.strip() or new_title.strip() == expected_ph: + ms = (time.perf_counter() - t0) * 1000 + logger.info( + "event=story_title_task_skip story_id={} reason=placeholder duration_ms={:.1f} " + "msg=标题精修跳过(仍为占位)", + story_id, + ms, + ) + return {"status": "skip_placeholder"} + st.title = new_title + db.commit() + ms = (time.perf_counter() - t0) * 1000 + logger.info( + "event=story_title_task_done story_id={} user_id={} duration_ms={:.1f} " + "msg=故事标题精修完成", + story_id, + user_id, + ms, + ) + return {"status": "ok", "title": new_title} + except Exception as exc: + ms = (time.perf_counter() - t0) * 1000 + logger.warning( + "event=generate_story_title_after_create_failed story_id={} duration_ms={:.1f} err={} " + "msg=故事标题精修失败", + story_id, + ms, + exc, + ) + raise self.retry(exc=exc) from exc diff --git a/api/development.sh b/api/development.sh index eca1b62..0dda676 100755 --- a/api/development.sh +++ b/api/development.sh @@ -519,7 +519,7 @@ start_services() { ;; esac - "${CELERY_BIN}" -A app.tasks.celery_app worker --loglevel=info --pool="${CELERY_POOL}" & + "${CELERY_BIN}" -A app.tasks.celery_app worker --loglevel=info --pool="${CELERY_POOL}" -Q celery,memory_idle & CELERY_PID=$! ensure_background_process_alive "Celery" "${CELERY_PID}" print_ok "Celery 已启动 (PID: ${CELERY_PID})" diff --git a/api/docker-compose.yml b/api/docker-compose.yml index a6178e8..b3e6368 100644 --- a/api/docker-compose.yml +++ b/api/docker-compose.yml @@ -95,7 +95,7 @@ services: dockerfile: Dockerfile image: life-echo-api:latest container_name: life-echo-celery-worker - command: uv run celery -A app.tasks.celery_app worker --loglevel=info --concurrency=4 + command: uv run celery -A app.tasks.celery_app worker --loglevel=info --concurrency=4 -Q celery,memory_idle env_file: - .env environment: diff --git a/api/docs/internal-eval.md b/api/docs/internal-eval.md index e0e315b..f214143 100644 --- a/api/docs/internal-eval.md +++ b/api/docs/internal-eval.md @@ -49,7 +49,7 @@ uv run uvicorn app.internal_main:internal_app --host 0.0.0.0 --port 8001 Celery worker 与主站共用(`celery_app` 已 `include` 回忆录等任务;**不再**包含已下线的 `evaluation_tasks` 实验批量跑批)。需 Phase1 / 叙事推进时请启动 worker: ```bash -uv run celery -A app.tasks.celery_app worker -l info +uv run celery -A app.tasks.celery_app worker -l info -Q celery,memory_idle ``` ## 前端(`app-eval-web`) diff --git a/api/docs/memory-retrieval.md b/api/docs/memory-retrieval.md index a0ffd2c..99bba9e 100644 --- a/api/docs/memory-retrieval.md +++ b/api/docs/memory-retrieval.md @@ -21,14 +21,17 @@ ## 富化(ingest 后 LLM) -- `memory_enrichment_enabled`(默认 `true`):`ingest_transcript` / `ingest_transcript_sync` 后执行摘要、事实、时间线;`false` 时跳过。 +- `memory_enrichment_enabled`(默认 `true`):ingest 成功并 **commit** 后,通过 `schedule_memory_enrichment` 将任务投递到 **`CELERY_MEMORY_ENRICHMENT_QUEUE`**(默认 `memory_idle`),在 worker 上 **单次 LLM 调用**产出 **会话摘要(`MemorySummary` session)+ 结构化事实(`MemoryFact`)**;`false` 时不投递。 +- ingest 路径 **不再**维护滚动摘要(rolling)与 **时间线表**(`timeline_events`)的物化;检索中的 `timeline_hints` 依赖既有数据(若有)或为空;空 query 下「浏览」模式若开启 `memory_evidence_empty_query_include_rolling`,仅当库内仍有历史 rolling 行时才会出现。 +- 异步 `MemoryService.ingest` 与同步 `ingest_transcript_sync` 均 **不**在请求/任务热路径内内联 LLM 富化;回忆录 Phase1 在 DB commit 后调用 `schedule_enrichment_for_sources`(与 `memoir_correlation_id` 观测一致)。 +- Worker 须消费该队列(例如 `-Q celery,memory_idle`),否则任务会堆积。 - `memory_enrichment_max_chars`:截断送入 LLM 的文本长度。 -- 同一 `memory_source_id` 的时间线在重跑富化前会先删后插入,避免重复事件。 - Ingest 写入 **embedding**(best-effort);历史 FTS 列 `content_tsv` 已由迁移 `0007_drop_chunk_content_tsv` 删除。 +- 叙事阶段 `retrieve_evidence_sync` **不等待**富化完成;证据随富化渐进变丰富。 ## Celery 任务中的顺序 -`process_memoir_segments`(`app/tasks/memoir_tasks.py`)在**同一任务**内先执行 `ingest_transcript_sync`(并 `commit`),再执行 `MemoirOrchestrator` 与 `run_story_pipeline_for_category_batch`。因此 `retrieve_evidence_sync` 能看到**本批刚写入**的 memory chunks(无竞态),前提是 embedding API 已成功写入向量。 +`process_memoir_segments`(`app/tasks/memoir_tasks.py`)在**同一任务**内先执行批量 ingest(`ingest_transcripts_batch_sync` 并 `commit`),再富化入队与 `MemoirOrchestrator`、派发 Phase2。Phase2 内 `retrieve_evidence_sync` 能看到**本批刚写入**的 memory chunks(无竞态),前提是 embedding API 已成功写入向量;富化 Summary/Facts 可能稍后才就绪。 章节分类上,若模型返回 **none** 或命中零散档案启发式,Story 侧会统一落入 **`summary` 章节**并继续叙事落库,与「本批 transcript 已进 memory」一致,避免误以为内容被丢弃。 diff --git a/api/tests/evaluation/test_memoir_pipeline_run_router.py b/api/tests/evaluation/test_memoir_pipeline_run_router.py new file mode 100644 index 0000000..4155a75 --- /dev/null +++ b/api/tests/evaluation/test_memoir_pipeline_run_router.py @@ -0,0 +1,101 @@ +"""GET /users/{user_id}/memoir-pipeline-run(快照读取)。""" + +import pytest +from httpx import ASGITransport, AsyncClient + +from app.features.evaluation.internal_auth import get_internal_eval_principal + + +@pytest.mark.asyncio +async def test_memoir_pipeline_run_ok_by_phase1_task( + monkeypatch: pytest.MonkeyPatch, +) -> None: + from fastapi import FastAPI + + monkeypatch.setattr( + "app.core.config.settings.internal_eval_api_key", + "secret", + raising=False, + ) + from app.features.evaluation.router import router + + def _fake_eval(user_id: str, **kwargs: object): + assert user_id == "u1" + assert kwargs.get("phase1_task_id") == "tid-z" + return { + "memoir_correlation_id": "cid-z", + "user_id": "u1", + "started_at_utc": "2026-04-09T00:00:00Z", + "phase1": {"task_id": "tid-z", "status": "running", "step": "started"}, + "phase2": [], + "fanout": { + "story_images": [], + "recompose_chapters": [], + "memory_enrichment": [], + "quality_pass": None, + "compaction": None, + }, + } + + monkeypatch.setattr( + "app.features.evaluation.router.get_pipeline_run_for_eval", + _fake_eval, + ) + + app = FastAPI() + app.include_router(router, prefix="/internal/api/evaluation") + + async def _override_auth(): + from app.features.evaluation.internal_auth import InternalEvalPrincipal + + return InternalEvalPrincipal() + + app.dependency_overrides[get_internal_eval_principal] = _override_auth + + transport = ASGITransport(app=app) + async with AsyncClient(transport=transport, base_url="http://t") as client: + r = await client.get( + "/internal/api/evaluation/users/u1/memoir-pipeline-run", + headers={"X-Internal-Eval-Key": "secret"}, + params={"phase1_task_id": "tid-z"}, + ) + assert r.status_code == 200 + body = r.json() + assert body["memoir_correlation_id"] == "cid-z" + assert body["phase1"]["task_id"] == "tid-z" + + +@pytest.mark.asyncio +async def test_memoir_pipeline_run_400_both_ids( + monkeypatch: pytest.MonkeyPatch, +) -> None: + from fastapi import FastAPI + + monkeypatch.setattr( + "app.core.config.settings.internal_eval_api_key", + "secret", + raising=False, + ) + from app.features.evaluation.router import router + + app = FastAPI() + app.include_router(router, prefix="/internal/api/evaluation") + + async def _override_auth(): + from app.features.evaluation.internal_auth import InternalEvalPrincipal + + return InternalEvalPrincipal() + + app.dependency_overrides[get_internal_eval_principal] = _override_auth + + transport = ASGITransport(app=app) + async with AsyncClient(transport=transport, base_url="http://t") as client: + r = await client.get( + "/internal/api/evaluation/users/u1/memoir-pipeline-run", + headers={"X-Internal-Eval-Key": "secret"}, + params={ + "phase1_task_id": "a", + "memoir_correlation_id": "b", + }, + ) + assert r.status_code == 400 diff --git a/api/tests/test_agent_logging.py b/api/tests/test_agent_logging.py new file mode 100644 index 0000000..f97b235 --- /dev/null +++ b/api/tests/test_agent_logging.py @@ -0,0 +1,73 @@ +"""agent_logging: DEBUG 下载荷、hash_only、去重。""" + +from __future__ import annotations + +import app.core.agent_logging as agent_logging + + +class _StubLogger: + def __init__(self) -> None: + self.debug_calls: list[tuple[str, tuple[object, ...]]] = [] + + def debug(self, msg: str, *args: object, **kwargs: object) -> None: + self.debug_calls.append((msg, args)) + + +def _clear_dedup() -> None: + with agent_logging._dedup_lock: + agent_logging._last_prompt_sha256_by_label.clear() + + +def test_log_agent_payload_skips_when_not_verbose(monkeypatch: object) -> None: + monkeypatch.setattr("app.core.config.settings.log_level", "INFO") + log = _StubLogger() + agent_logging.log_agent_payload(log, "x.prompt", "hello") + assert log.debug_calls == [] + + +def test_log_agent_payload_preview_includes_sha12(monkeypatch: object) -> None: + monkeypatch.setattr("app.core.config.settings.log_level", "DEBUG") + monkeypatch.setattr("app.core.config.settings.agent_log_prompt_mode", "preview") + monkeypatch.setattr("app.core.config.settings.agent_log_prompt_dedup", False) + monkeypatch.setattr("app.core.config.settings.agent_log_max_chars", 100) + _clear_dedup() + log = _StubLogger() + agent_logging.log_agent_payload(log, "Unit.prompt", "hello world") + assert len(log.debug_calls) == 1 + msg, args = log.debug_calls[0] + assert "agent_payload" in msg + assert "sha12=" in msg + assert args[0] == "Unit.prompt" + assert args[4] == "hello world" + + +def test_log_agent_payload_hash_only_no_preview(monkeypatch: object) -> None: + monkeypatch.setattr("app.core.config.settings.log_level", "DEBUG") + monkeypatch.setattr("app.core.config.settings.agent_log_prompt_mode", "hash_only") + monkeypatch.setattr("app.core.config.settings.agent_log_prompt_dedup", False) + _clear_dedup() + log = _StubLogger() + body = "x" * 500 + agent_logging.log_agent_payload(log, "Unit.prompt", body) + assert len(log.debug_calls) == 1 + msg, args = log.debug_calls[0] + assert "mode=hash_only" in msg + assert args[0] == "Unit.prompt" + assert args[1] == 500 + assert isinstance(args[2], str) and len(args[2]) == 12 + + +def test_log_agent_payload_dedup_second_call_skipped(monkeypatch: object) -> None: + monkeypatch.setattr("app.core.config.settings.log_level", "DEBUG") + monkeypatch.setattr("app.core.config.settings.agent_log_prompt_mode", "preview") + monkeypatch.setattr("app.core.config.settings.agent_log_prompt_dedup", True) + monkeypatch.setattr("app.core.config.settings.agent_log_max_chars", 200) + _clear_dedup() + log = _StubLogger() + agent_logging.log_agent_payload(log, "DedupLabel.prompt", "same text") + agent_logging.log_agent_payload(log, "DedupLabel.prompt", "same text") + assert len(log.debug_calls) == 2 + assert "agent_payload_skipped" in log.debug_calls[1][0] + skip_args = log.debug_calls[1][1] + assert skip_args[0] == "DedupLabel.prompt" + assert skip_args[2] == len("same text") diff --git a/api/tests/test_judge_schemas.py b/api/tests/test_judge_schemas.py index b3be4ad..5fbdfc0 100644 --- a/api/tests/test_judge_schemas.py +++ b/api/tests/test_judge_schemas.py @@ -138,6 +138,24 @@ def test_memoir_judge_coerces_string_lists_from_llm() -> None: assert m.insufficient_evidence == [] +def test_memoir_judge_clamps_leaf_scores_over_max_from_llm() -> None: + """细项略超满分(如 rich_diversity=2.5)时钳制到 rubric 上限,避免 validation 整单失败。""" + leaves = _full_memoir_leaves_max() + leaves["rich_diversity"] = 2.5 + m = MemoirJudgeOutput.model_validate( + { + **leaves, + "total_score": 100.0, + "rationale": "", + "major_strengths": [], + "major_issues": [], + "insufficient_evidence": [], + "evidence_refs": [], + } + ) + assert m.rich_diversity == 2.0 + + def test_conversation_judge_meta_fields_default() -> None: leaves = { "emotion_carry": 10, diff --git a/api/tests/test_log_events.py b/api/tests/test_log_events.py new file mode 100644 index 0000000..693c962 --- /dev/null +++ b/api/tests/test_log_events.py @@ -0,0 +1,62 @@ +"""log_events:format_log_event 与 celery_prerun_extras。""" + +from __future__ import annotations + +from app.core.log_events import ( + celery_prerun_extras, + correlation_bind_kwargs, + format_log_event, +) + + +def test_format_log_event_msg_last() -> None: + s = format_log_event( + "demo", + z_last=1, + a_first="x", + msg="你好 世界", + ) + assert s.startswith("event=demo ") + assert s.endswith(" msg=你好 世界") + assert "a_first=x" in s + assert "z_last=1" in s + + +def test_format_log_event_skips_empty() -> None: + s = format_log_event("x", empty="", none_val=None, ok=5) + assert "empty=" not in s + assert "none_val=" not in s + assert "ok=5" in s + + +def test_format_log_event_float() -> None: + s = format_log_event("t", duration_ms=12.3456) + assert "duration_ms=12.3" in s + + +def test_correlation_bind_kwargs() -> None: + d = correlation_bind_kwargs( + user_id="u1", + memoir_correlation_id="c1", + ) + assert d == {"user_id": "u1", "correlation_id": "c1"} + + +def test_celery_prerun_extras_from_kwargs() -> None: + ex = celery_prerun_extras( + "app.tasks.memory_enrichment_tasks.enrich_memory_source", + ("uid", "sid"), + {"memoir_correlation_id": "mc"}, + ) + assert ex["user_id"] == "uid" + assert ex["source_id"] == "sid" + assert ex["correlation_id"] == "mc" + + +def test_celery_prerun_extras_positional_only() -> None: + ex = celery_prerun_extras( + "app.tasks.chapter_compose_tasks.recompose_chapter", + ("chap-1",), + {}, + ) + assert ex == {"chapter_id": "chap-1"} diff --git a/api/tests/test_memoir_pipeline_optimization.py b/api/tests/test_memoir_pipeline_optimization.py index a24f0c0..793359b 100644 --- a/api/tests/test_memoir_pipeline_optimization.py +++ b/api/tests/test_memoir_pipeline_optimization.py @@ -144,7 +144,7 @@ def test_ingest_transcript_sync_no_longer_calls_enrichment_inline() -> None: source = inspect.getsource(ingest_transcript_sync) assert "enrich_memory_after_ingest_sync" not in source - assert "enrich_memory_source" in source + assert "schedule_memory_enrichment" in source # --------------------------------------------------------------------------- diff --git a/api/tests/test_memoir_pipeline_progress.py b/api/tests/test_memoir_pipeline_progress.py new file mode 100644 index 0000000..202ca9d --- /dev/null +++ b/api/tests/test_memoir_pipeline_progress.py @@ -0,0 +1,105 @@ +"""memoir_pipeline_progress:合并与读取逻辑(假 Redis 客户端)。""" + +import json + +import pytest + +import app.core.memoir_pipeline_progress as mpp + + +class _FakeRedis: + def __init__(self) -> None: + self.store: dict[str, str] = {} + + def get(self, key: str) -> str | None: + return self.store.get(key) + + def setex(self, key: str, _ttl: int, value: str) -> None: + self.store[key] = value + + +@pytest.fixture +def fake_redis(monkeypatch: pytest.MonkeyPatch) -> _FakeRedis: + fr = _FakeRedis() + monkeypatch.setattr(mpp, "_client", fr) + return fr + + +def test_merge_pipeline_run_creates_doc(fake_redis: _FakeRedis) -> None: + mpp.merge_pipeline_run("cid-1", {"phase1": {"step": "memory_ingest"}}) + raw = fake_redis.store.get("memoir_pipeline_run:cid-1") + assert raw + doc = json.loads(raw) + assert doc["memoir_correlation_id"] == "cid-1" + assert doc["phase1"]["step"] == "memory_ingest" + + +def test_merge_phase2_merges_by_task_id(fake_redis: _FakeRedis) -> None: + mpp.merge_pipeline_run( + "cid-2", + { + "phase2": [ + {"chapter_category": "a", "task_id": "t1", "status": "enqueued"}, + ], + }, + ) + mpp.merge_pipeline_run( + "cid-2", + {"phase2": [{"task_id": "t1", "status": "running"}]}, + ) + raw = fake_redis.store["memoir_pipeline_run:cid-2"] + doc = json.loads(raw) + assert len(doc["phase2"]) == 1 + assert doc["phase2"][0]["task_id"] == "t1" + assert doc["phase2"][0]["status"] == "running" + assert doc["phase2"][0]["chapter_category"] == "a" + + +def test_merge_fanout_lists_merge_by_id(fake_redis: _FakeRedis) -> None: + mpp.merge_pipeline_run( + "cid-3", + { + "fanout": { + "story_images": [ + {"story_id": "s1", "task_id": "img1", "status": "enqueued"}, + ], + }, + }, + ) + mpp.merge_pipeline_run( + "cid-3", + { + "fanout": { + "story_images": [ + {"story_id": "s1", "status": "success"}, + ], + }, + }, + ) + doc = json.loads(fake_redis.store["memoir_pipeline_run:cid-3"]) + assert len(doc["fanout"]["story_images"]) == 1 + assert doc["fanout"]["story_images"][0]["task_id"] == "img1" + assert doc["fanout"]["story_images"][0]["status"] == "success" + + +def test_init_and_index_resolve(fake_redis: _FakeRedis) -> None: + mpp.init_pipeline_run_from_phase1( + "user-a", "cid-4", "p1tid", segment_count=3 + ) + cid = mpp.resolve_correlation_id_for_phase1_task("p1tid") + assert cid == "cid-4" + snap = mpp.get_pipeline_run_for_eval( + "user-a", phase1_task_id="p1tid" + ) + assert snap is not None + assert snap["user_id"] == "user-a" + assert snap["phase1"]["task_id"] == "p1tid" + + +def test_get_pipeline_run_for_eval_user_mismatch(fake_redis: _FakeRedis) -> None: + mpp.init_pipeline_run_from_phase1( + "user-a", "cid-5", "p1b", segment_count=1 + ) + assert ( + mpp.get_pipeline_run_for_eval("other", phase1_task_id="p1b") is None + ) diff --git a/api/tests/test_memory_enrichment_baseline.py b/api/tests/test_memory_enrichment_baseline.py new file mode 100644 index 0000000..45f86b7 --- /dev/null +++ b/api/tests/test_memory_enrichment_baseline.py @@ -0,0 +1,116 @@ +"""Baseline memory enrichment: single LLM call → session summary + facts.""" + +from __future__ import annotations + +from types import SimpleNamespace + +import pytest + +from app.features.memory.enrichment import enrich_memory_after_ingest_sync +from app.features.memory.llm_schemas import EnrichmentPayload, parse_json_payload +from app.features.memory.models import MemorySource +from app.features.user.models import User + + +def test_enrichment_payload_roundtrip() -> None: + raw = ( + '{"summary":"要点摘要",' + '"facts":[{"fact_type":"event","subject":"王伟","predicate":"去",' + '"object_json":{"value":"北京","approximate_era":"1990年代"},' + '"confidence":0.85,"source_chunk_id":"ch-1"}]}' + ) + p = parse_json_payload(raw, EnrichmentPayload) + assert p is not None + assert p.summary == "要点摘要" + assert len(p.facts) == 1 + assert p.facts[0].subject == "王伟" + + +def test_enrich_memory_after_ingest_sync_single_llm_call(monkeypatch: pytest.MonkeyPatch) -> None: + from app.features.memory import enrichment as mod + + monkeypatch.setattr("app.core.config.settings.memory_enrichment_enabled", True) + + invoke_count = {"n": 0} + + def fake_invoke(llm, prompt, max_tokens, agent): + invoke_count["n"] += 1 + assert agent == "memory.enrichment_sync" + return ( + '{"summary":"本轮要点",' + '"facts":[{"fact_type":"event","subject":"王伟","predicate":"住",' + '"object_json":{"value":"上海"},"confidence":0.8,"source_chunk_id":"ch1"}]}' + ) + + monkeypatch.setattr(mod, "invoke_json_object", fake_invoke) + monkeypatch.setattr( + mod, + "list_chunks_for_source_sync", + lambda s, sid: [SimpleNamespace(id="ch1", content="王伟住在上海。")], + ) + + summaries: list[dict] = [] + facts: list[dict] = [] + + def capture_summary(session, **kwargs): + summaries.append(kwargs) + + def capture_fact(session, **kwargs): + facts.append(kwargs) + + monkeypatch.setattr(mod, "create_memory_summary_sync", capture_summary) + monkeypatch.setattr(mod, "create_memory_fact_sync", capture_fact) + + class FakeSession: + def get(self, model, key): + if model is User and key == "u1": + return SimpleNamespace(nickname="老王") + if model is MemorySource and key == "src-1": + return SimpleNamespace(lineage_json=None) + return None + + enrich_memory_after_ingest_sync(FakeSession(), "u1", "src-1", llm=object()) + + assert invoke_count["n"] == 1 + assert len(summaries) == 1 + assert summaries[0]["summary_type"] == "session" + assert summaries[0]["content"] == "本轮要点" + assert summaries[0]["source_chunk_ids"] == ["ch1"] + assert len(facts) == 1 + assert facts[0]["predicate"] == "住" + assert facts[0]["status"] == "confirmed" + + +def test_enrich_memory_skips_when_parse_returns_none(monkeypatch: pytest.MonkeyPatch) -> None: + from app.features.memory import enrichment as mod + + monkeypatch.setattr("app.core.config.settings.memory_enrichment_enabled", True) + monkeypatch.setattr(mod, "invoke_json_object", lambda *a, **k: "{not json") + monkeypatch.setattr( + mod, + "list_chunks_for_source_sync", + lambda s, sid: [SimpleNamespace(id="c1", content="x")], + ) + called = {"summary": False, "fact": False} + + monkeypatch.setattr( + mod, + "create_memory_summary_sync", + lambda *a, **k: called.update(summary=True), + ) + monkeypatch.setattr( + mod, + "create_memory_fact_sync", + lambda *a, **k: called.update(fact=True), + ) + + class FakeSession: + def get(self, model, key): + if model is User and key == "u": + return None + if model is MemorySource and key == "s": + return SimpleNamespace(lineage_json=None) + return None + + enrich_memory_after_ingest_sync(FakeSession(), "u", "s", llm=object()) + assert called == {"summary": False, "fact": False} diff --git a/api/tests/test_story_route_oral_invariant.py b/api/tests/test_story_route_oral_invariant.py index a32df9a..8b4b58c 100644 --- a/api/tests/test_story_route_oral_invariant.py +++ b/api/tests/test_story_route_oral_invariant.py @@ -90,13 +90,18 @@ def test_single_segment_decide_receives_only_combined_text_not_evidence() -> Non patch( "app.features.memoir.story_pipeline_sync.MemoirImageSettings", ) as mis, + patch( + "app.tasks.story_title_tasks.generate_story_title_after_create.delay", + ), + patch( + "app.features.memoir.story_pipeline_sync.refresh_chapter_evidence_snapshot_with_retry_sync", + ), ): route_agent_mock.plan_batch.return_value = None route_agent_mock.decide.side_effect = decide_capture na = MagicMock() nac.return_value = na - na.generate_title.return_value = "章节标题" na.generate_narrative.return_value = '{"paragraphs": [{"content": "叙事正文段落足够长用于测试合并逻辑避免触发过短回退"}]}' mock_story = MagicMock() @@ -200,13 +205,18 @@ def test_decide_receives_only_same_stage_story_candidates() -> None: patch( "app.features.memoir.story_pipeline_sync.MemoirImageSettings", ) as mis, + patch( + "app.tasks.story_title_tasks.generate_story_title_after_create.delay", + ), + patch( + "app.features.memoir.story_pipeline_sync.refresh_chapter_evidence_snapshot_with_retry_sync", + ), ): route_agent_mock.plan_batch.return_value = None route_agent_mock.decide.side_effect = decide_capture na = MagicMock() nac.return_value = na - na.generate_title.return_value = "章节标题" na.generate_narrative.return_value = '{"paragraphs": [{"content": "叙事正文段落足够长用于测试合并逻辑避免触发过短回退"}]}' mock_story = MagicMock() diff --git a/app-eval-web/src/components/ScoreCard.tsx b/app-eval-web/src/components/ScoreCard.tsx index 6e74d03..d12be9a 100644 --- a/app-eval-web/src/components/ScoreCard.tsx +++ b/app-eval-web/src/components/ScoreCard.tsx @@ -90,6 +90,368 @@ function DialogueLineageTurnsTable({ dlg }: { dlg: Record }) { ); } +/** 与后端 judge_schemas._MEMOIR_LEAF_SCORE_BOUNDS 一致(满分制总分 100) */ +const MEMOIR_SCORE_GROUPS: { + title: string; + items: { key: string; label: string; max: number }[]; +}[] = [ + { + title: "记忆与真实度", + items: [ + { key: "mem_fidelity", label: "记忆忠实度", max: 9 }, + { key: "mem_factual_accuracy", label: "事实准确性", max: 5 }, + { key: "mem_factual_coverage", label: "事实覆盖率", max: 5 }, + { key: "mem_traceability", label: "记忆可追溯性", max: 4 }, + ], + }, + { + title: "信息呈现", + items: [ + { key: "info_slot_coverage", label: "槽位覆盖度", max: 6 }, + { key: "info_sufficiency", label: "信息充分性", max: 4 }, + { key: "info_density", label: "信息密度", max: 4 }, + ], + }, + { + title: "叙事结构", + items: [ + { key: "narr_structure", label: "故事结构", max: 6 }, + { key: "narr_paragraphs", label: "段落组织", max: 5 }, + { key: "narr_pacing", label: "节奏控制", max: 3 }, + ], + }, + { + title: "语言表达", + items: [ + { key: "lang_fluency", label: "语言流畅度", max: 3 }, + { key: "lang_conciseness", label: "表达精炼度", max: 3 }, + { key: "lang_literary", label: "文笔质量", max: 4 }, + { + key: "lang_controlled_expansion", + label: "控制性扩写能力", + max: 4, + }, + { key: "lang_detail", label: "细节还原与强化", max: 2 }, + { key: "lang_style", label: "风格一致性", max: 2 }, + ], + }, + { + title: "情感", + items: [ + { key: "emo_authenticity", label: "情感真实度", max: 5 }, + { key: "emo_depth", label: "情感深度", max: 4 }, + ], + }, + { + title: "人物", + items: [ + { key: "char_understanding", label: "人物理解", max: 4 }, + { key: "char_consistency", label: "人物一致性", max: 3 }, + { key: "char_integration", label: "人物融入度", max: 2 }, + ], + }, + { + title: "连贯一致", + items: [ + { key: "coh_timeline", label: "时间线一致性", max: 2 }, + { key: "coh_cross_chapter", label: "跨章节关联", max: 2 }, + ], + }, + { + title: "丰富度", + items: [ + { key: "rich_analogy", label: "类比与引用", max: 3 }, + { key: "rich_diversity", label: "表达多样性", max: 2 }, + ], + }, + { + title: "出版就绪", + items: [ + { key: "pub_editorial_cost", label: "编辑成本", max: 2 }, + { key: "pub_completeness", label: "完整度", max: 2 }, + ], + }, +]; + +function coercStrList(v: unknown): string[] { + if (!Array.isArray(v)) return []; + return v.map((x) => String(x).trim()).filter(Boolean); +} + +function readMemoirLeafScore( + judge: Record, + key: string, +): number | null { + const v = judge[key]; + if (typeof v !== "number" || Number.isNaN(v)) return null; + return v; +} + +/** 回忆录章节评分卡:对齐对话 ScoreCard(总分 + 分项 / 满分 + 总评与对比建议) */ +export function MemoirScoreCard({ + judge, + emptyHint, +}: { + judge: unknown; + emptyHint?: string; +}) { + if (!isRecord(judge)) { + return ( +

+ {emptyHint ?? "暂无评分结果"} +

+ ); + } + + const total = judge.total_score; + const rationale = + typeof judge.rationale === "string" ? judge.rationale.trim() : ""; + const strengths = coercStrList(judge.major_strengths); + const issues = coercStrList(judge.major_issues); + const insuff = coercStrList(judge.insufficient_evidence); + const confRaw = judge.confidence; + const confidence = + typeof confRaw === "number" && !Number.isNaN(confRaw) + ? Math.max(0, Math.min(1, confRaw)) + : null; + + const refsRaw = judge.evidence_refs; + const refs = Array.isArray(refsRaw) + ? refsRaw.filter((x): x is Record => isRecord(x)) + : []; + + const anyText = + rationale.length > 0 || + strengths.length > 0 || + issues.length > 0 || + insuff.length > 0; + + return ( +
+
+ 本章成稿总分 + + {typeof total === "number" ? total.toFixed(1) : "—"} + + / 100 +
+

+ 下列分项为模型对照基线节选给出的得分;「总评摘要」「对比与改进要点」为文字对照与修改建议。 +

+ + {MEMOIR_SCORE_GROUPS.map((group) => { + let sub = 0; + let subMax = 0; + const rows = []; + for (const { key, label, max } of group.items) { + subMax += max; + const sc = readMemoirLeafScore(judge, key); + if (sc != null) sub += sc; + rows.push( +
  • + {label} + + {sc != null ? `${sc.toFixed(1)} / ${max}` : `— / ${max}`} + +
  • , + ); + } + return ( +
    +
    + {group.title} + + {" "} + · 小计 {sub.toFixed(1)} / {subMax} + +
    +
      + {rows} +
    +
    + ); + })} + + {rationale ? ( +
    + 总评摘要(对照说明) +

    + {rationale} +

    +
    + ) : null} + + {anyText && !rationale ? ( +

    + (本段未返回总评段落,以下为要点列表。) +

    + ) : null} + + {strengths.length || issues.length || insuff.length ? ( +
    +
    + 对比与改进要点 +
    + {strengths.length ? ( + <> +
    + 亮点 +
    +
      + {strengths.map((t, idx) => ( +
    • {t}
    • + ))} +
    + + ) : null} + {issues.length ? ( + <> +
    + 主要问题与提升方向 +
    +
      + {issues.map((t, idx) => ( +
    • {t}
    • + ))} +
    + + ) : null} + {insuff.length ? ( + <> +
    + 证据不足说明 +
    +
      + {insuff.map((t, idx) => ( +
    • {t}
    • + ))} +
    + + ) : null} +
    + ) : !rationale ? ( +

    + 未返回文字对比或建议(请查看原始 JSON 中的 judge 字段)。 +

    + ) : null} + + {refs.length ? ( +
    + 引用摘录(evidence_refs) +
      + {refs.map((r, idx) => { + const dim = + typeof r.dimension === "string" ? r.dimension.trim() : ""; + const snip = + typeof r.snippet === "string" ? r.snippet.trim() : ""; + const ti = r.turn_index; + const tiLabel = + typeof ti === "number" && ti >= 0 ? `turn #${ti}` : null; + return ( +
    • + {dim ? ( + {dim} + ) : ( + (维度未填) + )} + {tiLabel ? ( + + {tiLabel} + + ) : null} + {snip ? ( +
      + {snip} +
      + ) : null} +
    • + ); + })} +
    +
    + ) : null} + + {confidence != null ? ( +

    + 模型置信度:{confidence.toFixed(2)} +

    + ) : null} +
    + ); +} + +/** evidence_summary + evidence_trace:默认折叠,仅供排障 / 审计 */ +function MemoirEvidenceTechBlock({ + evidenceSummary, + trace, + formatMeta, +}: { + evidenceSummary: string | null | undefined; + trace: unknown; + formatMeta: unknown; +}) { + const summary = String(evidenceSummary ?? "").trim(); + const truncated = isRecord(formatMeta) && formatMeta.truncated === true; + const hasSummary = summary.length > 0; + const hasTrace = + trace != null && isRecord(trace) && Object.keys(trace).length > 0; + if (!hasSummary && !hasTrace) return null; + return ( +
    + + 证据包与溯源(技术详情,默认折叠) + +
    + {hasSummary ? ( +

    + {summary} + {truncated ? " · 已截断" : ""} +

    + ) : null} + {hasTrace ? : null} +
    +
    + ); +} + /** 章节 / 故事行内:折叠展示 evidence_trace 中的 id 列表(内审计) */ function EvidenceTraceFold({ trace }: { trace: unknown }) { if (trace == null || !isRecord(trace)) return null; @@ -314,8 +676,201 @@ export function ConversationCompareSummary({ summary }: { summary: unknown }) { ); } -/** 手工评审 API 用 chapter_results/story_results;自动化 run 的 judge_bundle_json 用 chapters/stories。 */ -function pickMemoirChapterList(data: Record): unknown[] { +/** Memoir chapter A/B compare summary (mirrors ConversationCompareSummary) */ +export function MemoirCompareSummary({ + summary, +}: { + summary: unknown; +}) { + if (!isRecord(summary)) return null; + const gate = isRecord(summary.gate) ? summary.gate : null; + const groupDeltas = isRecord(summary.group_deltas) + ? summary.group_deltas + : null; + const leafDeltas = isRecord(summary.leaf_deltas) + ? summary.leaf_deltas + : null; + const totalDelta = + typeof summary.total_delta === "number" ? summary.total_delta : null; + const baselineTotal = + typeof summary.baseline_total === "number" + ? summary.baseline_total + : null; + const chapterTotal = + typeof summary.chapter_total === "number" + ? summary.chapter_total + : null; + const gateStatus = typeof gate?.status === "string" ? gate.status : ""; + const reasons = Array.isArray(gate?.reasons) + ? gate.reasons.map((x) => String(x)).filter(Boolean) + : []; + const groupRows = groupDeltas + ? Object.entries(groupDeltas).filter(([, raw]) => isRecord(raw)) + : []; + + const keyRegressions = Array.isArray(summary.key_regressions) + ? summary.key_regressions.map(String).filter(Boolean) + : []; + const keyGains = Array.isArray(summary.key_gains) + ? summary.key_gains.map(String).filter(Boolean) + : []; + + return ( +
    +
    + A/B 对比结论 + + {gateStatus === "surpass" + ? "超过基线" + : gateStatus === "parity" + ? "基本追平" + : gateStatus === "regressed" + ? "仍落后基线" + : "待判定"} + +
    + {baselineTotal != null && + chapterTotal != null && + totalDelta != null ? ( +

    + 基线 {baselineTotal.toFixed(1)} 分,新稿{" "} + {chapterTotal.toFixed(1)} 分,差值{" "} + {formatSigned(totalDelta)} +

    + ) : null} + {groupRows.length ? ( + <> +
    + 九维差值 +
    +
      + {groupRows.map(([key, raw]) => { + const row = raw as Record; + const label = + typeof row.label === "string" ? row.label : key; + const delta = + typeof row.delta === "number" ? row.delta : null; + const bl = + typeof row.baseline === "number" ? row.baseline : null; + const ch = + typeof row.chapter === "number" ? row.chapter : null; + const mx = + typeof row.max === "number" ? row.max : null; + if (delta == null) return null; + return ( +
    • + {label} + + {bl != null ? `${bl.toFixed(1)}` : "—"}{" → "} + {ch != null ? `${ch.toFixed(1)}` : "—"} + {mx != null ? ` / ${mx}` : ""} + {" "} + 0.3 + ? "var(--success-text)" + : delta < -0.3 + ? "var(--danger-text)" + : undefined, + }} + > + ({formatSigned(delta)}) + + +
    • + ); + })} +
    + + ) : null} + {keyGains.length ? ( +

    + 提升项: + {keyGains.join("、")} +

    + ) : null} + {keyRegressions.length ? ( +

    + 回落项: + {keyRegressions.join("、")} +

    + ) : null} + {reasons.length ? ( +
      + {reasons.map((reason, idx) => ( +
    • {reason}
    • + ))} +
    + ) : null} + {leafDeltas && Object.keys(leafDeltas).length ? ( +
    + 全部细项差值 +
      + {Object.entries(leafDeltas) + .filter(([, raw]) => isRecord(raw)) + .map(([key, raw]) => { + const row = raw as Record; + const label = + typeof row.label === "string" ? row.label : key; + const delta = + typeof row.delta === "number" ? row.delta : null; + const bl = + typeof row.baseline === "number" ? row.baseline : null; + const ch = + typeof row.chapter === "number" ? row.chapter : null; + const mx = + typeof row.max === "number" ? row.max : null; + if (delta == null) return null; + return ( +
    • + {label} + + {bl != null ? `${bl.toFixed(1)}` : "—"}{" → "} + {ch != null ? `${ch.toFixed(1)}` : "—"} + {mx != null ? ` / ${mx}` : ""} + {" "} + 0.1 + ? "var(--success-text)" + : (delta ?? 0) < -0.1 + ? "var(--danger-text)" + : undefined, + }} + > + ({formatSigned(delta)}) + + +
    • + ); + })} +
    +
    + ) : null} +
    + ); +} + +function pickMemoirChapterList( + data: Record, +): unknown[] { const manual = data.chapter_results; const exec = data.chapters; if (Array.isArray(manual) && manual.length > 0) return manual; @@ -323,7 +878,9 @@ function pickMemoirChapterList(data: Record): unknown[] { return Array.isArray(manual) ? manual : []; } -function pickMemoirStoryList(data: Record): unknown[] { +function pickMemoirStoryList( + data: Record, +): unknown[] { const manual = data.story_results; const exec = data.stories; if (Array.isArray(manual) && manual.length > 0) return manual; @@ -331,13 +888,28 @@ function pickMemoirStoryList(data: Record): unknown[] { return Array.isArray(manual) ? manual : []; } -/** 回忆录评审结果:列表章节分 + 原始 JSON */ +function MemoirJudgeError({ error }: { error: unknown }) { + if (typeof error !== "string" || !error) return null; + return ( +
    + LLM:{error} +
    + ); +} + +/** 回忆录评审结果:每章 A/B 双列评分卡 + 对比结论 */ export function MemoirScoreSummary({ data, showRawJson = true, }: { data: unknown; - /** 为 false 时仅渲染结构化章节/故事块(供外层再贴完整 JSON) */ showRawJson?: boolean; }) { if (!isRecord(data)) { @@ -345,97 +917,195 @@ export function MemoirScoreSummary({ } const chapters = pickMemoirChapterList(data); const stories = pickMemoirStoryList(data); + const apiErrors = Array.isArray(data.errors) + ? data.errors.filter((x): x is string => typeof x === "string") + : []; + const apiWarnings = Array.isArray(data.warnings) + ? data.warnings.filter((x): x is string => typeof x === "string") + : []; + const jp = + typeof data.judge_provider === "string" ? data.judge_provider : null; + const jmRaw = + typeof data.judge_model === "string" ? data.judge_model : ""; + const jm = jmRaw.trim() ? jmRaw.trim() : null; + let judgeMeta: string | null = null; + if (jp === "zhipu") judgeMeta = jm ? `智谱 · ${jm}` : "智谱"; + else if (jp === "deepseek") + judgeMeta = jm ? `DeepSeek · ${jm}` : "DeepSeek"; return (
    + {apiErrors.length ? ( +
    +
    评分错误
    +
      + {apiErrors.map((t, i) => ( +
    • {t}
    • + ))} +
    +
    + ) : null} + {apiWarnings.length ? ( +
    + 提示 +
      + {apiWarnings.map((t, i) => ( +
    • {t}
    • + ))} +
    +
    + ) : null} + {judgeMeta ? ( +

    + 本次评分模型:{judgeMeta} +

    + ) : null} {Array.isArray(chapters) && chapters.length > 0 ? (

    章节

    -
      - {chapters.map((c, i) => ( -
    • - {isRecord(c) ? ( - <> - - {String(c.title ?? c.chapter_title ?? `章节 ${i + 1}`)} - - {typeof c.lineage_tier === "string" ? ( - - {c.lineage_tier} - - ) : null} - {(() => { - const j = isRecord(c.judge) ? c.judge : null; - const sc = - j && typeof j.total_score === "number" - ? j.total_score - : null; - return sc != null ? ( - - {sc.toFixed(1)} 分 - - ) : null; - })()} - {typeof c.evidence_summary === "string" && - c.evidence_summary ? ( -
      - {c.evidence_summary} - {c.format_meta && - isRecord(c.format_meta) && - c.format_meta.truncated === true ? ( - · 已截断 - ) : null} -
      - ) : null} - - - ) : ( - {JSON.stringify(c)} - )} -
    • - ))} -
    + {chapters.map((c, i) => + isRecord(c) ? ( +
    +
    + + {String( + c.title ?? c.chapter_title ?? `章节 ${i + 1}`, + )} + + {typeof c.lineage_tier === "string" ? ( + + {c.lineage_tier} + + ) : null} +
    +
    +
    +
    + A · 基线 + {typeof c.baseline_title === "string" && + c.baseline_title + ? ` — ${c.baseline_title}` + : ""} +
    + + +
    +
    +
    + B · 新稿 +
    + + +
    +
    + + +
    + ) : ( + {JSON.stringify(c)} + ), + )}
    ) : null} {Array.isArray(stories) && stories.length > 0 ? (

    故事

    -
      - {stories.map((s, i) => ( -
    • - {isRecord(s) ? ( - <> - {String(s.title ?? `故事 ${i + 1}`)} - {typeof s.lineage_tier === "string" - ? ` · ${s.lineage_tier}` - : ""} - {(() => { - const j = isRecord(s.judge) ? s.judge : null; - const sc = - j && typeof j.total_score === "number" - ? j.total_score - : null; - return sc != null ? ` · ${sc.toFixed(1)} 分` : ""; - })()} - {typeof s.evidence_summary === "string" && s.evidence_summary ? ( -
      - {s.evidence_summary} -
      - ) : null} - - - ) : null} -
    • - ))} -
    + {stories.map((s, i) => + isRecord(s) ? ( +
    + + {String(s.title ?? `故事 ${i + 1}`)} + +
    +
    +
    + A · 基线 +
    + + +
    +
    +
    + B · 新稿 +
    + + +
    +
    + + +
    + ) : null, + )}
    ) : null} {showRawJson ? : null} diff --git a/app-eval-web/src/eval.css b/app-eval-web/src/eval.css index fe3e512..20a3ff5 100644 --- a/app-eval-web/src/eval.css +++ b/app-eval-web/src/eval.css @@ -390,6 +390,66 @@ code { color: var(--text); } +/* ── Phase tag (inline badge in meta bar) ── */ +.eval-memoir-phase-tag { + display: inline-block; + margin-left: var(--s-3); + padding: 0.15em 0.55em; + font-size: var(--text-xs); + font-weight: 600; + border-radius: var(--r-sm); + vertical-align: middle; +} +.eval-memoir-phase-tag--active { + background: var(--accent-muted); + color: var(--accent); + animation: eval-memoir-pulse 1.4s ease-in-out infinite; +} +.eval-memoir-phase-tag--done { + background: var(--success-bg); + color: var(--success-text); +} +.eval-memoir-phase-tag--error { + background: var(--danger-bg); + color: var(--danger-text); +} +@keyframes eval-memoir-pulse { + 0%, 100% { opacity: 1; } + 50% { opacity: .55; } +} + +/* ── Progress bar ── */ +.eval-memoir-progress { + height: 4px; + margin: 0 0 var(--s-3); + border-radius: 2px; + background: var(--bg-muted); + overflow: hidden; +} +.eval-memoir-progress__bar { + height: 100%; + background: var(--accent); + border-radius: 2px; + transition: width 0.35s ease; +} + +/* ── Danger button ── */ +.eval-btn--danger { + background: var(--danger-bg); + border-color: var(--danger-border); + color: var(--danger-text); + font-weight: 600; +} +.eval-btn--danger:hover:not(:disabled) { + background: oklch(0.94 0.04 18); +} + +/* ── Raw JSON details toggle ── */ +.eval-memoir-raw-detail summary { + font-size: var(--text-sm); + user-select: none; +} + .eval-memoir-compare { display: grid; grid-template-columns: 1fr 1fr; @@ -1892,6 +1952,18 @@ code { margin-bottom: var(--s-2); } +.eval-memoir-chapter-block { + border: 1px solid var(--border); + border-radius: var(--r-lg); + padding: var(--s-4); + margin-bottom: var(--s-4); + background: var(--bg-elevated); +} + +.eval-memoir-compare-section { + margin-top: var(--s-3); +} + /* Diff table */ .eval-diff-wrap { diff --git a/app-eval-web/src/pages/MemoirPage.tsx b/app-eval-web/src/pages/MemoirPage.tsx index 2d121c3..59d448f 100644 --- a/app-eval-web/src/pages/MemoirPage.tsx +++ b/app-eval-web/src/pages/MemoirPage.tsx @@ -1,18 +1,29 @@ -import { useCallback, useEffect, useMemo, useState } from "react"; -import { api } from "../api"; +import { useCallback, useEffect, useMemo, useRef, useState } from "react"; +import { api, apiBase, apiKey } from "../api"; import { DEFAULT_USER_EXPORT_FIXTURE } from "../config"; import { usePushNotice } from "../context/NoticeContext"; import { CopyTextButton } from "../components/CopyTextButton"; import { MemoirCompareView } from "../components/MemoirCompareView"; import { MemoirScoreSummary } from "../components/ScoreCard"; +import { JsonPreview } from "../components/JsonPreview"; import type { FixtureDetailResponse } from "../types"; import { getMemoirUserIdFromHash } from "../utils/hashRouteParams"; import { parseMemoirSnapshot } from "../utils/memoirSnapshot"; import { - loadEvalJudgeProvider, - saveEvalJudgeProvider, - type EvalJudgeProvider, -} from "../utils/evalJudgePref"; + loadMemoirJudgeBackend, + memoirBackendToJudgeBody, + saveMemoirJudgeBackend, + type MemoirJudgeBackend, +} from "../utils/evalMemoirJudgePref"; + +type ChapterRow = Record; +type JudgePhase = + | "" + | "connecting" + | "preparing" + | "judging" + | "done" + | "error"; export default function MemoirPage() { const pushNotice = usePushNotice(); @@ -24,11 +35,20 @@ export default function MemoirPage() { const [evalUserId, setEvalUserId] = useState(""); const [memoirSnapshot, setMemoirSnapshot] = useState(null); const [memoirSnapBusy, setMemoirSnapBusy] = useState(false); - const [memoirJudgeBusy, setMemoirJudgeBusy] = useState(false); - const [manualMemoirJudge, setManualMemoirJudge] = useState(null); - const [judgeProvider, setJudgeProvider] = useState(() => - loadEvalJudgeProvider(), - ); + const [memoirJudgeBackend, setMemoirJudgeBackend] = + useState(() => loadMemoirJudgeBackend()); + + const [judgePhase, setJudgePhase] = useState(""); + const [judgeProvider, setJudgeProvider] = useState(""); + const [judgeModel, setJudgeModel] = useState(""); + const [chapterRows, setChapterRows] = useState([]); + const [chapterErrors, setChapterErrors] = useState([]); + const [, setTotalChapters] = useState(0); + const [preparedCount, setPreparedCount] = useState(0); + const [finishedCount, setFinishedCount] = useState(0); + + const abortRef = useRef(null); + const judging = judgePhase === "connecting" || judgePhase === "preparing" || judgePhase === "judging"; useEffect(() => { void (async () => { @@ -61,8 +81,6 @@ export default function MemoirPage() { ); if (r.ok && r.data?.turns) { setFixtureMemoirSections(r.data.memoir_sections ?? []); - // 勿用 source_user_id 预填快照用户:那是导出时的生产用户,与 Playground 临时 eval 用户不同, - // 预填会导致「刷新数据库快照」始终查错人、JSON 为空。 } else { setFixtureMemoirSections([]); } @@ -83,7 +101,7 @@ export default function MemoirPage() { ); if (r.ok) { setMemoirSnapshot(r.data); - pushNotice("已刷新库中快照(章节对照 + 故事条数见下方)", "success"); + pushNotice("已刷新库中快照", "success"); return true; } pushNotice(r.error ?? "加载失败", "error"); @@ -108,68 +126,191 @@ export default function MemoirPage() { return () => window.removeEventListener("hashchange", syncFromHash); }, [fetchMemoirSnapshotByUser]); - async function runMemoirSnapshot() { - await fetchMemoirSnapshotByUser(evalUserId); + function resetJudgeState() { + setJudgePhase(""); + setJudgeProvider(""); + setJudgeModel(""); + setChapterRows([]); + setChapterErrors([]); + setTotalChapters(0); + setPreparedCount(0); + setFinishedCount(0); } - async function runJudgeMemoir() { + async function runJudgeMemoirStream() { const uid = evalUserId.trim(); if (!uid) { pushNotice("请填写用户 ID", "error"); return; } - setMemoirJudgeBusy(true); + + abortRef.current?.abort(); + const ac = new AbortController(); + abortRef.current = ac; + + resetJudgeState(); + setJudgePhase("connecting"); + + const { judge_provider, judge_model } = + memoirBackendToJudgeBody(memoirJudgeBackend); + try { - const r = await api( - "/internal/api/evaluation/judge/memoir-chapters", - { - method: "POST", - body: JSON.stringify({ - user_id: uid, - baseline_sections: fixtureMemoirSections.length - ? fixtureMemoirSections - : null, - judge_provider: judgeProvider, - judge_model: null, - }), + const url = `${apiBase}/internal/api/evaluation/judge/memoir-chapters-stream`; + const res = await fetch(url, { + method: "POST", + headers: { + "X-Internal-Eval-Key": apiKey, + "Content-Type": "application/json", }, - ); - if (r.ok) { - pushNotice("回忆录自动评分完成", "success"); - setManualMemoirJudge(r.data); - } else { - pushNotice(r.error ?? "评分失败", "error"); + body: JSON.stringify({ + user_id: uid, + baseline_sections: fixtureMemoirSections.length + ? fixtureMemoirSections + : null, + judge_provider, + judge_model, + }), + signal: ac.signal, + }); + if (!res.ok) { + const t = await res.text(); + pushNotice(`评分流 HTTP ${res.status}:${t.slice(0, 300)}`, "error"); + setJudgePhase("error"); + return; } - } finally { - setMemoirJudgeBusy(false); + + const reader = res.body?.getReader(); + if (!reader) { + pushNotice("无法读取流式响应", "error"); + setJudgePhase("error"); + return; + } + setJudgePhase("preparing"); + const decoder = new TextDecoder(); + let buf = ""; + while (true) { + const { done, value } = await reader.read(); + if (done) break; + buf += decoder.decode(value, { stream: true }); + const parts = buf.split("\n\n"); + buf = parts.pop() ?? ""; + for (const raw of parts) { + for (const line of raw.split("\n")) { + if (!line.startsWith("data: ")) continue; + let evt: Record; + try { + evt = JSON.parse(line.slice(6)) as Record; + } catch { + continue; + } + switch (evt.event) { + case "meta": + setJudgeProvider(String(evt.judge_provider ?? "")); + setJudgeModel(String(evt.judge_model ?? "")); + setTotalChapters( + typeof evt.total_chapters === "number" + ? evt.total_chapters + : 0, + ); + break; + case "chapters_prepared": + setPreparedCount( + typeof evt.count === "number" ? evt.count : 0, + ); + setJudgePhase("judging"); + break; + case "chapter_judge": + setFinishedCount((n) => n + 1); + if (evt.chapter && typeof evt.chapter === "object") { + setChapterRows((prev) => [ + ...prev, + evt.chapter as ChapterRow, + ]); + } + break; + case "chapter_error": + setFinishedCount((n) => n + 1); + setChapterErrors((prev) => [ + ...prev, + String(evt.message ?? "unknown error"), + ]); + break; + case "warning": + pushNotice(String(evt.message ?? ""), "info"); + break; + case "error": + setChapterErrors((prev) => [ + ...prev, + String(evt.message ?? "server error"), + ]); + pushNotice(String(evt.message ?? "评分服务错误"), "error"); + setJudgePhase("error"); + break; + case "done": + setJudgePhase("done"); + break; + } + } + } + } + setJudgePhase((p) => (p === "done" || p === "error" ? p : "done")); + } catch (e: unknown) { + if (e instanceof Error && e.name === "AbortError") return; + pushNotice( + `评分流异常:${e instanceof Error ? e.message : "network error"}`, + "error", + ); + setJudgePhase("error"); } } + function handleStop() { + abortRef.current?.abort(); + setJudgePhase((p) => (p === "done" ? p : "error")); + } + const storyCount = useMemo(() => { const p = parseMemoirSnapshot(memoirSnapshot); return p?.stories.length ?? 0; }, [memoirSnapshot]); + const phaseLabel: Record = { + "": "", + connecting: "连接评分服务…", + preparing: "准备章节证据包…", + judging: `评分中 ${finishedCount}/${preparedCount}`, + done: `完成 · ${chapterRows.length} 章`, + error: "评分中断", + }; + + const assembledResult = useMemo(() => { + if (!chapterRows.length && !chapterErrors.length) return null; + return { + user_id: evalUserId, + judge_provider: judgeProvider, + judge_model: judgeModel, + chapter_results: chapterRows, + story_results: [], + errors: chapterErrors, + warnings: [] as string[], + }; + }, [chapterRows, chapterErrors, evalUserId, judgeProvider, judgeModel]); + return (
    -

    Memoir · 章节对照

    -

    - 将导出 MD 中的基线小节与库中章节按标题相似度逐行并排对照;未配对的库中章节单独列出。 - 故事(stories)在侧栏「Stories」页查看。Playground 重放请填评测临时用户 - UUID;勿用导出 source_user_id。 -

    +

    Memoir · 章节评分

    + {/* ── Toolbar ── */}
    +
    setEvalUserId(e.target.value)} autoComplete="off" placeholder="UUID" - aria-label="评测用户 ID" - /> - +
    +
    +
    - + {!judging ? ( + + ) : ( + + )}
    + {/* ── Status bar ── */}
    - 基线小节 {fixtureMemoirSections.length} 段 + 基线 {fixtureMemoirSections.length} 段 {memoirSnapshot ? ( - <> - {" "} - · 库中故事 {storyCount} 条 - + <> · 库中故事 {storyCount} 条 ) : ( - <> · 刷新快照后显示配对与库内章节 + <> · 刷新快照后显示对照 )} + {judgePhase && judgePhase !== "done" && judgePhase !== "error" ? ( + + {phaseLabel[judgePhase]} + + ) : null} + {judgePhase === "done" ? ( + + {phaseLabel.done} + + ) : null} + {judgePhase === "error" ? ( + + {phaseLabel.error} + + ) : null}
    + {/* ── Progress bar ── */} + {judging && preparedCount > 0 ? ( +
    +
    +
    + ) : null} + + {/* ── Compare view ── */}

    基线 ↔ 库中章节

    - 左右一行为一对;配对为启发式(标题包含/相等),仅供参考。 + 左右一行为一对;按标题启发式配对,仅供参考。

    + {/* ── Chapter results (streaming) ── */}
    -

    评分结果

    - {manualMemoirJudge ? ( - +

    章节评分结果

    + + {assembledResult ? ( + ) : (

    - 运行自动评分后显示摘要与明细。 + {judging + ? "评分进行中,结果将逐条显示…" + : "点击「开始评分」后结果将流式展示。"}

    )} + + {assembledResult ? ( +
    + + 原始 JSON + + +
    + ) : null}
    ); diff --git a/app-eval-web/src/pages/PlaygroundPage.tsx b/app-eval-web/src/pages/PlaygroundPage.tsx index 308cf22..011dc2b 100644 --- a/app-eval-web/src/pages/PlaygroundPage.tsx +++ b/app-eval-web/src/pages/PlaygroundPage.tsx @@ -71,12 +71,19 @@ type MemoirPhase1PollBody = { durations_ms?: Record; }; +type PipelinePollOptions = { + userId: string; + phase1TaskId: string; + onPipeline: (snap: Record) => void; +}; + /** 轮询直到 Phase1 写入 topic_category 或超时 / 中止 */ async function waitUntilMemoirPhase1Ready( conversationId: string, segmentIds: string[], signal: AbortSignal, onPoll?: (body: MemoirPhase1PollBody) => void, + pipeline?: PipelinePollOptions, ): Promise<{ ok: true } | { ok: false; error: string }> { if (!segmentIds.length) return { ok: true }; const deadline = Date.now() + MEMOIR_PHASE1_WAIT_MAX_MS; @@ -92,6 +99,13 @@ async function waitUntilMemoirPhase1Ready( error: r.error ?? "memoir-phase1-ready 请求失败", }; if (r.data && onPoll) onPoll(r.data); + if (pipeline?.userId && pipeline.phase1TaskId) { + const pr = await api>( + `/internal/api/evaluation/users/${encodeURIComponent(pipeline.userId)}/memoir-pipeline-run?phase1_task_id=${encodeURIComponent(pipeline.phase1TaskId)}`, + { signal }, + ); + if (pr.ok && pr.data) pipeline.onPipeline(pr.data); + } if (r.data?.ready) return { ok: true }; try { await delay(MEMOIR_PHASE1_POLL_MS, signal); @@ -188,6 +202,10 @@ export default function PlaygroundPage() { const [phase1WaitServerMs, setPhase1WaitServerMs] = useState( null, ); + const [pipelineRunSnap, setPipelineRunSnap] = useState | null>(null); const replayUtterances = useMemo( () => utterancesForReplayFromTurns(fixtureTurns), @@ -634,6 +652,7 @@ export default function PlaygroundPage() { setMemoirSubmitBusy(true); setLastMemoirSubmitElapsedMs(null); setPhase1WaitServerMs(null); + setPipelineRunSnap(null); try { type SubmitOut = { conversation_id: string; @@ -668,6 +687,8 @@ export default function PlaygroundPage() { `已提交 ${ids.length} 段至记忆管线(Celery ${r.data.celery_task_id ? "task 已排队" : "未返回 task id"})`, "success", ); + const uidWait = (r.data.user_id || evalUserId).trim(); + const phase1Tid = r.data.celery_task_id?.trim() ?? ""; if (waitAfterMemoirSubmit) { const mem = await waitUntilMemoirPhase1Ready( cid, @@ -678,6 +699,13 @@ export default function PlaygroundPage() { setPhase1WaitServerMs(body.elapsed_ms_since_submit); } }, + uidWait && phase1Tid + ? { + userId: uidWait, + phase1TaskId: phase1Tid, + onPipeline: (snap) => setPipelineRunSnap(snap), + } + : undefined, ); if (!mem.ok) { if (mem.error === "aborted") { @@ -688,6 +716,14 @@ export default function PlaygroundPage() { return; } pushNotice("记忆 Phase1 已全部就绪(topic_category 已写入)", "success"); + } else if (uidWait && phase1Tid) { + void (async () => { + const pr = await api>( + `/internal/api/evaluation/users/${encodeURIComponent(uidWait)}/memoir-pipeline-run?phase1_task_id=${encodeURIComponent(phase1Tid)}`, + { signal }, + ); + if (pr.ok && pr.data) setPipelineRunSnap(pr.data); + })(); } setWizardStep(3); } finally { @@ -1213,6 +1249,25 @@ export default function PlaygroundPage() { : ""}

    )} + {pipelineRunSnap ? ( +
    + 流水线细粒度进度(Redis / Celery) +
    +              {JSON.stringify(pipelineRunSnap, null, 2)}
    +            
    +
    + ) : null}
    { "/internal": { target: proxyTarget, changeOrigin: true, + /** 后端未启动时默认 Vite 控制台几乎无输出;这里打出直连目标便于对照 API 终端。 */ + configure: (proxy) => { + proxy.on("error", (err) => { + console.error( + `[vite proxy] /internal → ${proxyTarget} failed:`, + (err as Error).message, + "(请在该地址启动 Internal Eval API,默认 api 目录下 :7999)", + ); + }); + }, }, }, },