feat(api): 访谈人格/回复长度策略、口述归一、背景语气与输入净稿全链路
Chat 访谈 - 新增 persona 系统(default / warm_listener / curious_guide)与 background_voice 语气层 - 回复长度由 compute_reply_plan 统一决策(brief / standard / expanded),融合信息密度启发式 - 输入净稿(input_normalize):编排层可选 rules/llm 归一用户口语后再喂模型与记忆检索 - 记忆证据注入:按用户话检索 memory evidence 并注入 prompt Memoir 回忆录 - 口述归一(oral_normalize):segment 原文保留,story 管线取派生净稿作叙事输入 - segment 入队批次门闸:累计字数 + 最长等待秒数,减少零碎提交 - fidelity_check / prompts / narrative_agent 微调 - Alembic 0005:清理跨章节 story 外键 Infra - Dockerfile 加入 ffmpeg - pyproject.toml 新增依赖并同步 uv.lock - .env.example / .env.production 补全新配置项 Tests - 新增 test_background_voice、test_chat_input_normalize、test_experience_regressions - 扩展 test_interview_prompts、test_interview_reply_length、test_story_route_oral_invariant Made-with: Cursor
This commit is contained in:
@@ -32,6 +32,10 @@ from app.features.memoir.cover_eligibility import chapter_needs_cover_enqueue
|
||||
from app.features.memoir.memoir_images.settings import MemoirImageSettings
|
||||
from app.features.memoir.models import Chapter
|
||||
from app.features.memoir.narrative_to_markdown import narrative_to_markdown
|
||||
from app.features.memoir.oral_normalize import (
|
||||
apply_oral_normalization_rules,
|
||||
normalize_oral_for_memoir,
|
||||
)
|
||||
from app.features.memoir.repo import (
|
||||
mark_chapter_dirty_sync,
|
||||
reorder_chapter_story_links_by_life_order_sync,
|
||||
@@ -49,6 +53,23 @@ from app.features.story.sync_write import (
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def _route_segment_texts(category_segments: list) -> list[tuple[str, str]]:
|
||||
"""批量路由 plan_batch:每段仅做规则归一,避免 N 次 LLM。"""
|
||||
out: list[tuple[str, str]] = []
|
||||
for seg in category_segments:
|
||||
raw = seg.user_input_text or ""
|
||||
if (
|
||||
settings.memoir_oral_normalize_enabled
|
||||
and (settings.memoir_oral_normalize_mode or "rules").strip().lower()
|
||||
!= "off"
|
||||
):
|
||||
t = apply_oral_normalization_rules(raw)
|
||||
else:
|
||||
t = raw
|
||||
out.append((str(seg.id), t))
|
||||
return out
|
||||
|
||||
|
||||
def _fidelity_fallback_json(oral: str, existing_canonical: str | None) -> str:
|
||||
"""忠实度未通过时的安全回退:续写场景保留旧文 + 本段口述,避免只剩一句。"""
|
||||
o = (oral or "").strip()[:15000]
|
||||
@@ -102,7 +123,7 @@ def _gate_narrative_fidelity(
|
||||
|
||||
|
||||
def _should_fallback_to_transcript(md: str, oral: str) -> bool:
|
||||
"""模型输出相对口述明显过短时回退为口述原文(防「1999」类压缩)。"""
|
||||
"""模型输出相对口述极度过短时才回退(仅防极端压缩如「1999」)。"""
|
||||
o = (oral or "").strip()
|
||||
if not o:
|
||||
return False
|
||||
@@ -165,7 +186,7 @@ def _apply_narrative_fallbacks(
|
||||
if existing_for_narrative and _is_json_narrative(narrative_raw):
|
||||
merged_md = narrative_to_markdown(narrative_raw).strip()
|
||||
ex = (existing_for_narrative or "").strip()
|
||||
if ex and len(ex) > 400 and len(merged_md) < len(ex) * 0.35:
|
||||
if ex and len(ex) > 400 and len(merged_md) < len(ex) * 0.25:
|
||||
logger.warning(
|
||||
"event=narrative_fallback reason=merge_shrink action=append_oral "
|
||||
"chapter_category={}",
|
||||
@@ -176,7 +197,7 @@ def _apply_narrative_fallbacks(
|
||||
if (
|
||||
existing_for_narrative
|
||||
and not _is_json_narrative(narrative_raw)
|
||||
and len(narrative_raw) < len(existing_for_narrative) * 0.8
|
||||
and len(narrative_raw) < len(existing_for_narrative) * 0.5
|
||||
):
|
||||
logger.warning(
|
||||
"event=narrative_fallback reason=length_anomaly action=append_raw "
|
||||
@@ -290,6 +311,7 @@ def _run_batch_plan_writes(
|
||||
user_birth_year: int | None,
|
||||
llm: Any,
|
||||
narrative_agent: NarrativeAgent,
|
||||
background_voice: str = "default",
|
||||
) -> set[str]:
|
||||
dispatch_ids: set[str] = set()
|
||||
max_chars = int(settings.story_append_max_canonical_chars)
|
||||
@@ -297,7 +319,16 @@ def _run_batch_plan_writes(
|
||||
for unit in plan.units:
|
||||
t0 = time.perf_counter()
|
||||
unit_text = _ordered_text_for_segment_ids(category_segments, unit.segment_ids)
|
||||
new_content_input = format_narrative_user_content(unit_text, evidence_text)
|
||||
oral_unit = normalize_oral_for_memoir(unit_text, llm=llm)
|
||||
ut_raw = (unit_text or "").strip()
|
||||
ut_norm = (oral_unit or "").strip()
|
||||
if ut_raw != ut_norm:
|
||||
logger.info(
|
||||
"event=oral_normalized context=batch_unit raw_len={} norm_len={}",
|
||||
len(ut_raw),
|
||||
len(ut_norm),
|
||||
)
|
||||
new_content_input = format_narrative_user_content(oral_unit, evidence_text)
|
||||
|
||||
target_story_id: str | None = None
|
||||
existing_for_narrative = ""
|
||||
@@ -330,6 +361,7 @@ def _run_batch_plan_writes(
|
||||
user_profile=user_profile,
|
||||
birth_year=user_birth_year,
|
||||
llm=llm,
|
||||
background_voice=background_voice,
|
||||
)
|
||||
json_invalid = False
|
||||
s0 = (raw_gen or "").strip()
|
||||
@@ -340,14 +372,14 @@ def _run_batch_plan_writes(
|
||||
json_invalid = True
|
||||
|
||||
narrative_raw, fb_gate = _gate_narrative_fidelity(
|
||||
unit_text,
|
||||
oral_unit,
|
||||
raw_gen,
|
||||
llm,
|
||||
existing_canonical=existing_for_narrative or None,
|
||||
)
|
||||
narrative_raw, fb_apply = _apply_narrative_fallbacks(
|
||||
narrative_raw,
|
||||
unit_text,
|
||||
oral_unit,
|
||||
existing_for_narrative,
|
||||
chapter_category=chapter_category,
|
||||
)
|
||||
@@ -357,7 +389,7 @@ def _run_batch_plan_writes(
|
||||
|
||||
md = _coalesce_story_markdown(
|
||||
narrative_to_markdown(narrative_raw).strip(),
|
||||
unit_text.strip(),
|
||||
oral_unit.strip(),
|
||||
existing_for_narrative or "",
|
||||
)
|
||||
|
||||
@@ -399,7 +431,7 @@ def _run_batch_plan_writes(
|
||||
"event=story_generated route_type=batch decision_source={} route_decision={} "
|
||||
"unit_segments={} used_evidence={} narrative_json_valid={} fidelity_passed={} "
|
||||
"fallback_type={} oral_len={} md_len={} chapter_category={} is_append={} "
|
||||
"story_id={} seconds={:.3f}",
|
||||
"story_id={} seconds={:.3f} oral_normalize_changed={}",
|
||||
decision_source,
|
||||
unit.decision,
|
||||
len(unit.segment_ids),
|
||||
@@ -407,12 +439,13 @@ def _run_batch_plan_writes(
|
||||
_is_json_narrative(raw_gen),
|
||||
fb_gate == "none",
|
||||
fallback_type,
|
||||
len(unit_text.strip()),
|
||||
len(ut_norm),
|
||||
len(md.strip()),
|
||||
chapter_category,
|
||||
is_append,
|
||||
sid_log,
|
||||
elapsed,
|
||||
ut_raw != ut_norm,
|
||||
)
|
||||
return dispatch_ids
|
||||
|
||||
@@ -427,6 +460,7 @@ def run_story_pipeline_for_category_batch(
|
||||
user_profile: str,
|
||||
user_birth_year: int | None,
|
||||
llm: Any,
|
||||
background_voice: str = "default",
|
||||
) -> tuple[Chapter | None, bool, set[str]]:
|
||||
"""
|
||||
返回 (chapter, needs_cover_enqueue, story_ids_to_dispatch_after_commit)。
|
||||
@@ -456,7 +490,16 @@ def run_story_pipeline_for_category_batch(
|
||||
}
|
||||
|
||||
evidence_text = format_evidence_chunks_for_prompt(evidence)
|
||||
new_content_input = format_narrative_user_content(combined_text, evidence_text)
|
||||
oral_for_memoir = normalize_oral_for_memoir(combined_text, llm=llm)
|
||||
ct_raw = (combined_text or "").strip()
|
||||
om_norm = (oral_for_memoir or "").strip()
|
||||
if ct_raw != om_norm:
|
||||
logger.info(
|
||||
"event=oral_normalized context=category_batch raw_len={} norm_len={}",
|
||||
len(ct_raw),
|
||||
len(om_norm),
|
||||
)
|
||||
new_content_input = format_narrative_user_content(oral_for_memoir, evidence_text)
|
||||
|
||||
stmt_chapter = (
|
||||
select(Chapter)
|
||||
@@ -493,15 +536,14 @@ def run_story_pipeline_for_category_batch(
|
||||
llm=llm,
|
||||
)
|
||||
|
||||
candidates = list_active_stories_for_user_sync(session, user_id)
|
||||
# 仅同 chapter_category(story.stage)的 Story 可作为 append 候选,避免跨章节链接导致多章内容相同
|
||||
all_stories = list_active_stories_for_user_sync(session, user_id)
|
||||
candidates = [s for s in all_stories if s.stage == chapter_category]
|
||||
valid_ids = {str(s.id) for s in candidates}
|
||||
story_meta = _story_meta_for_route(session, candidates)
|
||||
|
||||
batch_for_route = (
|
||||
f"{combined_text}\n\n{evidence_text}"
|
||||
if evidence_text.strip()
|
||||
else combined_text
|
||||
)
|
||||
# Story route 仅依据本批用户口述;evidence 只进入叙事/合并,不参与 new/append 判定。
|
||||
route_transcript = oral_for_memoir
|
||||
|
||||
calculated_order_index = STAGE_TO_ORDER.get(chapter_category, 999)
|
||||
|
||||
@@ -512,7 +554,7 @@ def run_story_pipeline_for_category_batch(
|
||||
)
|
||||
plan: StoryBatchPlan | None = None
|
||||
if use_batch_plan:
|
||||
segs = [(seg.id, seg.user_input_text or "") for seg in category_segments]
|
||||
segs = _route_segment_texts(category_segments)
|
||||
plan = route_agent.plan_batch(
|
||||
chapter_category=chapter_category,
|
||||
chapter_title=title,
|
||||
@@ -546,12 +588,13 @@ def run_story_pipeline_for_category_batch(
|
||||
user_birth_year=user_birth_year,
|
||||
llm=llm,
|
||||
narrative_agent=narrative_agent,
|
||||
background_voice=background_voice,
|
||||
)
|
||||
else:
|
||||
route = route_agent.decide(
|
||||
chapter_category=chapter_category,
|
||||
chapter_title=title,
|
||||
batch_transcript=batch_for_route,
|
||||
batch_transcript=route_transcript,
|
||||
candidate_stories=candidates,
|
||||
llm=llm,
|
||||
valid_story_ids=valid_ids,
|
||||
@@ -592,6 +635,7 @@ def run_story_pipeline_for_category_batch(
|
||||
user_profile=user_profile,
|
||||
birth_year=user_birth_year,
|
||||
llm=llm,
|
||||
background_voice=background_voice,
|
||||
)
|
||||
json_invalid = False
|
||||
s0 = (raw_gen or "").strip()
|
||||
@@ -602,7 +646,7 @@ def run_story_pipeline_for_category_batch(
|
||||
json_invalid = True
|
||||
|
||||
narrative_raw, fb_gate = _gate_narrative_fidelity(
|
||||
combined_text,
|
||||
oral_for_memoir,
|
||||
raw_gen,
|
||||
llm,
|
||||
existing_canonical=existing_for_narrative or None,
|
||||
@@ -610,7 +654,7 @@ def run_story_pipeline_for_category_batch(
|
||||
|
||||
narrative_raw, fb_apply = _apply_narrative_fallbacks(
|
||||
narrative_raw,
|
||||
combined_text,
|
||||
oral_for_memoir,
|
||||
existing_for_narrative,
|
||||
chapter_category=chapter_category,
|
||||
)
|
||||
@@ -620,7 +664,7 @@ def run_story_pipeline_for_category_batch(
|
||||
|
||||
md = _coalesce_story_markdown(
|
||||
narrative_to_markdown(narrative_raw).strip(),
|
||||
combined_text.strip(),
|
||||
oral_for_memoir.strip(),
|
||||
existing_for_narrative or "",
|
||||
)
|
||||
|
||||
@@ -664,7 +708,7 @@ def run_story_pipeline_for_category_batch(
|
||||
"event=story_generated route_type=single decision_source={} route_decision={} "
|
||||
"unit_segments={} used_evidence={} narrative_json_valid={} fidelity_passed={} "
|
||||
"fallback_type={} oral_len={} md_len={} chapter_category={} is_append={} "
|
||||
"story_id={} seconds={:.3f}",
|
||||
"story_id={} seconds={:.3f} oral_normalize_changed={}",
|
||||
decision_source,
|
||||
route.decision,
|
||||
len(category_segments),
|
||||
@@ -672,12 +716,13 @@ def run_story_pipeline_for_category_batch(
|
||||
_is_json_narrative(raw_gen),
|
||||
fb_gate == "none",
|
||||
fallback_type,
|
||||
len(combined_text.strip()),
|
||||
len(om_norm),
|
||||
len(md.strip()),
|
||||
chapter_category,
|
||||
is_append,
|
||||
sid_log,
|
||||
elapsed,
|
||||
ct_raw != om_norm,
|
||||
)
|
||||
|
||||
reorder_chapter_story_links_by_life_order_sync(session, str(chapter.id))
|
||||
|
||||
Reference in New Issue
Block a user