feat(api): 访谈路径轻量门控、Memoir Phase1 批处理与叙事/记忆管线加固

- 新增 utterance_substance:短时/应答/元话语可跳过记忆检索、阶段 LLM 与资料抽取 LLM;可配置
- 输入归一化:LLM 模式默认仅语音/ASR;配置项写入 .env.example
- Memoir Phase1:可选 batch LLM 一次性抽取+分类(失败回退逐段);Extraction 空槽位时阶段与 current_stage 对齐,prompt 约束收紧
- 叙事与忠实度:narrative_safety、证据重叠/场合锚点、标题 slots 与履历短语 grounded;fidelity 解析失败 fail-open 可配置
- 章节管线:锁 TTL 上调、锁竞争 Celery 重试、Phase2 immediate singleflight 等;story_pipeline_sync / chapter_compose / memoir_tasks 联动
- Memory:compaction / repo / summarizer / evidence 小修;事实 FTS 未命中是否回退最近事实可配置
- 新增 memoir_pipeline_trace;补充 memoir_reliability 文档与多项回归/门控测试
This commit is contained in:
Kevin
2026-04-03 10:12:59 +08:00
parent 6b930808a3
commit 07c6478742
49 changed files with 12258 additions and 57 deletions

View File

@@ -75,6 +75,20 @@ EMBEDDING_MODEL=embedding-3
# CHAT_INPUT_NORMALIZE_MODE=rules
# CHAT_INPUT_NORMALIZE_LLM_MAX_TOKENS=512
# CHAT_INPUT_NORMALIZE_LLM_MAX_INPUT_CHARS=8000
# True仅 is_from_voice 时走 LLM 纠错;键盘输入仅规则归一
# CHAT_INPUT_NORMALIZE_LLM_VOICE_ONLY=true
# 短时/应答/元话语:本轮跳过阶段 LLM 与记忆向量检索(仍保留访谈主 LLM关则每轮完整路径
# CHAT_SUBSTANTIVE_HEURISTIC_ENABLED=true
# CHAT_SUBSTANTIVE_MIN_CHARS=12
# CHAT_STAGE_DETECTION_SKIP_LLM_ON_INSUFFICIENT_SIGNAL=true
# CHAT_MEMORY_RETRIEVAL_REQUIRE_SUBSTANTIVE=true
# 资料收集:短时/元话语不跑资料字段抽取 LLM仍生成追问
# CHAT_PROFILE_EXTRACT_REQUIRE_SUBSTANTIVE=true
# Memoir Phase1True 时用一次「批量 JSON」做抽取+分类(单段或多段均可;失败自动回退逐段)。
# False 时始终逐段(与启用本开关前的行为一致,含防抖合并后的多段任务)。
# MEMOIR_PHASE1_BATCH_LLM_ENABLED=false
# MEMOIR_PHASE1_BATCH_LLM_MAX_TOKENS=4096
# =============================================================================
# Database
@@ -119,12 +133,25 @@ REDIS_SESSION_TTL=86400
# =============================================================================
# STORY_IMAGE_ENQUEUE_DEDUP_TTL=300
# RECOMPOSE_CHAPTER_DELAY_SECONDS=8
# CHAPTER_PIPELINE_LOCK_TTL_SECONDS=120
# 与 Phase2 / 章节物化共用;应 ≥ 最长单次叙事+物化耗时
# CHAPTER_PIPELINE_LOCK_TTL_SECONDS=360
# STORY_APPEND_MAX_CANONICAL_CHARS=12000
# STORY_APPEND_MAX_VERSIONS=20
# EVIDENCE_TOP_K_DEFAULT=10
# EVIDENCE_TOP_K_LARGE_BATCH=5
# EVIDENCE_LARGE_BATCH_THRESHOLD=3
#
# Memoir 可靠性(叙事 faithful、标题 slots、证据渗漏、Phase1→2 追踪)
# MEMOIR_FIDELITY_FAIL_OPEN_ON_PARSE_ERROR=false
# MEMOIR_NARRATIVE_EVIDENCE_OVERLAP_MIN_CHARS=14
# MEMOIR_EVIDENCE_SCENE_ANCHOR_CHECK_ENABLED=true
# MEMOIR_TITLE_SLOTS_REQUIRE_BODY_OR_ORAL_MATCH=true
# MEMOIR_TITLE_HAY_GROUNDING_STRICT_PHRASES_ENABLED=true
# MEMOIR_RECOMPOSE_RETRY_ON_LOCK_CONTENTION=true
# MEMOIR_PHASE2_SINGLEFLIGHT_IMMEDIATE=true
#
# Memory事实检索未命中时是否退回「最近 confirmed 事实」(默认关,易串台)
# MEMORY_FACT_SEARCH_USE_RECENT_FALLBACK=false
# =============================================================================
# Auth