feat(api): 访谈人格/回复长度策略、口述归一、背景语气与输入净稿全链路

Chat 访谈
- 新增 persona 系统(default / warm_listener / curious_guide)与 background_voice 语气层
- 回复长度由 compute_reply_plan 统一决策(brief / standard / expanded),融合信息密度启发式
- 输入净稿(input_normalize):编排层可选 rules/llm 归一用户口语后再喂模型与记忆检索
- 记忆证据注入:按用户话检索 memory evidence 并注入 prompt

Memoir 回忆录
- 口述归一(oral_normalize):segment 原文保留,story 管线取派生净稿作叙事输入
- segment 入队批次门闸:累计字数 + 最长等待秒数,减少零碎提交
- fidelity_check / prompts / narrative_agent 微调
- Alembic 0005:清理跨章节 story 外键

Infra
- Dockerfile 加入 ffmpeg
- pyproject.toml 新增依赖并同步 uv.lock
- .env.example / .env.production 补全新配置项

Tests
- 新增 test_background_voice、test_chat_input_normalize、test_experience_regressions
- 扩展 test_interview_prompts、test_interview_reply_length、test_story_route_oral_invariant

Made-with: Cursor
This commit is contained in:
Kevin
2026-03-31 23:55:26 +08:00
parent 42ae2a5e91
commit 69a673e6c6
44 changed files with 2998 additions and 259 deletions

View File

@@ -12,6 +12,7 @@ from celery import shared_task
from sqlalchemy import select
from sqlalchemy.orm import Session
from app.agents.chat.background_voice import infer_background_voice
from app.agents.chat.prompts_profile import format_user_profile_context
from app.agents.memoir import MemoirOrchestrator
from app.agents.state_schema import MemoirStateSchema, SlotData, default_state
@@ -312,6 +313,7 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]):
user_obj = db.get(User, user_id)
user_profile = ""
user_birth_year = None
background_voice = "default"
if user_obj:
user_birth_year = user_obj.birth_year
user_profile = format_user_profile_context(
@@ -320,6 +322,7 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]):
grew_up_place=user_obj.grew_up_place,
occupation=user_obj.occupation,
)
background_voice = infer_background_voice(user_obj.occupation)
story_dispatch_ids: Set[str] = set()
@@ -349,6 +352,26 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]):
)
raise self.retry(countdown=10)
try:
batch_ids = {str(s.id) for s in category_segments}
skip_ids = prepared.segment_skip_story_ids
in_skip = batch_ids & skip_ids
if in_skip:
logger.info(
"event=memoir_skip_story_signal chapter_category={} "
"segment_ids_in_skip_set={}",
chapter_category,
sorted(in_skip),
)
if batch_ids and batch_ids <= skip_ids:
logger.info(
"event=story_pipeline_skipped reason=no_substantive_after_none "
"chapter_category={} segment_ids={}",
chapter_category,
sorted(batch_ids),
)
continue
chapter, needs_cover, disp = run_story_pipeline_for_category_batch(
db,
user_id=user_id,
@@ -358,6 +381,7 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]):
user_profile=user_profile,
user_birth_year=user_birth_year,
llm=llm,
background_voice=background_voice,
)
story_dispatch_ids |= disp
db.flush()
@@ -487,6 +511,7 @@ def generate_chapter_content(self, user_id: str, stage: str, new_content: str):
user_obj = db.get(User, user_id)
user_profile = ""
user_birth_year = None
background_voice = "default"
if user_obj:
user_birth_year = user_obj.birth_year
user_profile = format_user_profile_context(
@@ -495,6 +520,7 @@ def generate_chapter_content(self, user_id: str, stage: str, new_content: str):
grew_up_place=user_obj.grew_up_place,
occupation=user_obj.occupation,
)
background_voice = infer_background_voice(user_obj.occupation)
class _Seg:
def __init__(self, text: str):
@@ -511,6 +537,7 @@ def generate_chapter_content(self, user_id: str, stage: str, new_content: str):
user_profile=user_profile,
user_birth_year=user_birth_year,
llm=llm,
background_voice=background_voice,
)
db.commit()
db.refresh(chapter)