feat(api): 访谈人格/回复长度策略、口述归一、背景语气与输入净稿全链路

Chat 访谈 - 新增 persona 系统（default / warm_listener / curious_guide）与 background_voice 语气层 - 回复长度由 compute_reply_plan 统一决策（brief / standard / expanded），融合信息密度启发式 - 输入净稿（input_normalize）：编排层可选 rules/llm 归一用户口语后再喂模型与记忆检索 - 记忆证据注入：按用户话检索 memory evidence 并注入 prompt Memoir 回忆录 - 口述归一（oral_normalize）：segment 原文保留，story 管线取派生净稿作叙事输入 - segment 入队批次门闸：累计字数 + 最长等待秒数，减少零碎提交 - fidelity_check / prompts / narrative_agent 微调 - Alembic 0005：清理跨章节 story 外键 Infra - Dockerfile 加入 ffmpeg - pyproject.toml 新增依赖并同步 uv.lock - .env.example / .env.production 补全新配置项 Tests - 新增 test_background_voice、test_chat_input_normalize、test_experience_regressions - 扩展 test_interview_prompts、test_interview_reply_length、test_story_route_oral_invariant Made-with: Cursor
2026-03-31 23:55:26 +08:00
parent 42ae2a5e91
commit 69a673e6c6
44 changed files with 2998 additions and 259 deletions
--- a/api/app/tasks/memoir_tasks.py
+++ b/api/app/tasks/memoir_tasks.py
@@ -12,6 +12,7 @@ from celery import shared_task
 from sqlalchemy import select
 from sqlalchemy.orm import Session

+from app.agents.chat.background_voice import infer_background_voice
 from app.agents.chat.prompts_profile import format_user_profile_context
 from app.agents.memoir import MemoirOrchestrator
 from app.agents.state_schema import MemoirStateSchema, SlotData, default_state
@@ -312,6 +313,7 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]):
            user_obj = db.get(User, user_id)
            user_profile = ""
            user_birth_year = None
+            background_voice = "default"
            if user_obj:
                user_birth_year = user_obj.birth_year
                user_profile = format_user_profile_context(
@@ -320,6 +322,7 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]):
                    grew_up_place=user_obj.grew_up_place,
                    occupation=user_obj.occupation,
                )
+                background_voice = infer_background_voice(user_obj.occupation)

            story_dispatch_ids: Set[str] = set()

@@ -349,6 +352,26 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]):
                    )
                    raise self.retry(countdown=10)
                try:
+                    batch_ids = {str(s.id) for s in category_segments}
+                    skip_ids = prepared.segment_skip_story_ids
+                    in_skip = batch_ids & skip_ids
+                    if in_skip:
+                        logger.info(
+                            "event=memoir_skip_story_signal chapter_category={} "
+                            "segment_ids_in_skip_set={}",
+                            chapter_category,
+                            sorted(in_skip),
+                        )
+
+                    if batch_ids and batch_ids <= skip_ids:
+                        logger.info(
+                            "event=story_pipeline_skipped reason=no_substantive_after_none "
+                            "chapter_category={} segment_ids={}",
+                            chapter_category,
+                            sorted(batch_ids),
+                        )
+                        continue
+
                    chapter, needs_cover, disp = run_story_pipeline_for_category_batch(
                        db,
                        user_id=user_id,
@@ -358,6 +381,7 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]):
                        user_profile=user_profile,
                        user_birth_year=user_birth_year,
                        llm=llm,
+                        background_voice=background_voice,
                    )
                    story_dispatch_ids |= disp
                    db.flush()
@@ -487,6 +511,7 @@ def generate_chapter_content(self, user_id: str, stage: str, new_content: str):
            user_obj = db.get(User, user_id)
            user_profile = ""
            user_birth_year = None
+            background_voice = "default"
            if user_obj:
                user_birth_year = user_obj.birth_year
                user_profile = format_user_profile_context(
@@ -495,6 +520,7 @@ def generate_chapter_content(self, user_id: str, stage: str, new_content: str):
                    grew_up_place=user_obj.grew_up_place,
                    occupation=user_obj.occupation,
                )
+                background_voice = infer_background_voice(user_obj.occupation)

            class _Seg:
                def __init__(self, text: str):
@@ -511,6 +537,7 @@ def generate_chapter_content(self, user_id: str, stage: str, new_content: str):
                user_profile=user_profile,
                user_birth_year=user_birth_year,
                llm=llm,
+                background_voice=background_voice,
            )
            db.commit()
            db.refresh(chapter)