feat(api+app): 对话阶段化、回忆录流水线与客户端会话体验

- DB: segments 用户输入文本(Alembic 0002)
- Chat: 阶段检测/阶段提示/回复限制,编排与访谈/画像 prompts 调整
- Memoir: 忠实度检查 agent,叙事与分类等链路更新
- Core: agent 日志、Alembic 启动、LangChain/日志/配置等
- Story: time_hints;Memory 检索与相关测试
- Expo: 助手头像、会话页与消息拆分、实时会话与文案/i18n
- Docs/scripts/tests: 迁移脚本、LLM JSON/记忆检索文档、新增单测
This commit is contained in:
Kevin
2026-03-26 12:13:36 +08:00
parent 49b089354c
commit a3f61fcc0f
94 changed files with 3332 additions and 672 deletions

View File

@@ -4,6 +4,7 @@ Celery 用:按批次将 transcript 写入 Story并物化 Chapter canonical_
from __future__ import annotations
import json
import uuid
from typing import Any
@@ -29,7 +30,10 @@ from app.features.memoir.helpers import _chapter_markdown
from app.features.memoir.memoir_images.settings import MemoirImageSettings
from app.features.memoir.models import Chapter
from app.features.memoir.narrative_to_markdown import narrative_to_markdown
from app.features.memoir.repo import compose_chapter_from_story_links_sync
from app.features.memoir.repo import (
compose_chapter_from_story_links_sync,
reorder_chapter_story_links_by_life_order_sync,
)
from app.features.memory.repo import retrieve_evidence_sync
from app.features.story.models import Story
from app.features.story.sync_write import (
@@ -42,6 +46,28 @@ from app.features.story.sync_write import (
logger = get_logger(__name__)
def _gate_narrative_fidelity(oral_text: str, narrative_raw: str, llm: Any) -> str:
"""叙事 JSON 忠实度检查;不通过则回退为单段口述正文。"""
from app.agents.memoir.fidelity_check_agent import FidelityCheckAgent
if not settings.memoir_fidelity_check_enabled or not llm:
return narrative_raw
agent = FidelityCheckAgent()
if agent.passes(oral_text=oral_text, narrative_json=narrative_raw, llm=llm):
return narrative_raw
logger.warning(
"event=fidelity_gate_fallback oral_len={}",
len((oral_text or "").strip()),
)
o = (oral_text or "").strip()
if not o:
return narrative_raw
return json.dumps(
{"paragraphs": [{"content": o[:15000]}]},
ensure_ascii=False,
)
def _should_fallback_to_transcript(md: str, oral: str) -> bool:
"""模型输出相对口述明显过短时回退为口述原文防「1999」类压缩"""
o = (oral or "").strip()
@@ -68,7 +94,7 @@ def _is_json_narrative(text: str) -> bool:
def _ordered_text_for_segment_ids(
category_segments: list, segment_ids: list[str]
) -> str:
id_to_text = {seg.id: (seg.transcript_text or "") for seg in category_segments}
id_to_text = {seg.id: (seg.user_input_text or "") for seg in category_segments}
return "\n\n".join(id_to_text.get(sid, "") for sid in segment_ids)
@@ -80,12 +106,28 @@ def _apply_narrative_fallbacks(
*,
chapter_category: str,
) -> str:
# 整篇合并JSON输出异常缩水回退为旧文 + 本段口述,避免覆盖丢失
if existing_for_narrative and _is_json_narrative(narrative_raw):
merged_md = narrative_to_markdown(narrative_raw).strip()
ex = (existing_for_narrative or "").strip()
if ex and len(ex) > 400 and len(merged_md) < len(ex) * 0.35:
logger.warning(
"event=narrative_fallback reason=merge_shrink action=append_oral "
"chapter_category={}",
chapter_category,
)
return f"{ex}\n\n{combined_unit_text.strip()}"
if (
existing_for_narrative
and not _is_json_narrative(narrative_raw)
and len(narrative_raw) < len(existing_for_narrative) * 0.8
):
logger.warning("叙事长度异常: 回退为原文追加")
logger.warning(
"event=narrative_fallback reason=length_anomaly action=append_raw "
"chapter_category={}",
chapter_category,
)
return f"{existing_for_narrative}\n\n{combined_unit_text}"
if (
@@ -95,7 +137,8 @@ def _apply_narrative_fallbacks(
and len(narrative_raw) < len(existing_chapter_md) * 0.8
):
logger.warning(
"章节级长度异常: 回退为 transcript 追加, category=%s",
"event=narrative_fallback reason=chapter_length_anomaly action=append_transcript "
"chapter_category={}",
chapter_category,
)
return f"{existing_chapter_md}\n\n{combined_unit_text}"
@@ -104,7 +147,8 @@ def _apply_narrative_fallbacks(
oral = (combined_unit_text or "").strip()
if oral and _should_fallback_to_transcript(md_check, oral):
logger.warning(
"叙事相对口述过短,回退为口述原文 category=%s oral_len=%s md_len=%s",
"event=narrative_fallback reason=body_too_short_vs_oral "
"chapter_category={} oral_len={} md_len={}",
chapter_category,
len(oral),
len(md_check),
@@ -196,6 +240,7 @@ def _run_batch_plan_writes(
birth_year=user_birth_year,
llm=llm,
)
narrative_raw = _gate_narrative_fidelity(unit_text, narrative_raw, llm)
narrative_raw = _apply_narrative_fallbacks(
narrative_raw,
unit_text,
@@ -259,14 +304,14 @@ def run_story_pipeline_for_category_batch(
route_agent = StoryRouteAgent()
dispatch_ids: set[str] = set()
segment_texts = [seg.transcript_text or "" for seg in category_segments]
segment_texts = [seg.user_input_text or "" for seg in category_segments]
combined_text = "\n\n".join(segment_texts)
source_ids = [seg.id for seg in category_segments]
try:
evidence = retrieve_evidence_sync(session, user_id, combined_text, top_k=10)
except Exception as e:
logger.warning("Evidence 检索跳过: %s", e)
logger.warning("Evidence 检索跳过: {}", e)
evidence = {
"relevant_chunks": [],
"relevant_summaries": [],
@@ -332,7 +377,7 @@ def run_story_pipeline_for_category_batch(
)
plan: StoryBatchPlan | None = None
if use_batch_plan:
segs = [(seg.id, seg.transcript_text or "") for seg in category_segments]
segs = [(seg.id, seg.user_input_text or "") for seg in category_segments]
plan = route_agent.plan_batch(
chapter_category=chapter_category,
chapter_title=title,
@@ -394,6 +439,7 @@ def run_story_pipeline_for_category_batch(
birth_year=user_birth_year,
llm=llm,
)
narrative_raw = _gate_narrative_fidelity(combined_text, narrative_raw, llm)
narrative_raw = _apply_narrative_fallbacks(
narrative_raw,
@@ -440,6 +486,7 @@ def run_story_pipeline_for_category_batch(
session, chapter_id=chapter.id, story_id=st.id
)
reorder_chapter_story_links_by_life_order_sync(session, chapter.id)
compose_chapter_from_story_links_sync(session, chapter.id)
session.flush()