feat(api): 访谈路径轻量门控、Memoir Phase1 批处理与叙事/记忆管线加固

- 新增 utterance_substance:短时/应答/元话语可跳过记忆检索、阶段 LLM 与资料抽取 LLM;可配置
- 输入归一化:LLM 模式默认仅语音/ASR;配置项写入 .env.example
- Memoir Phase1:可选 batch LLM 一次性抽取+分类(失败回退逐段);Extraction 空槽位时阶段与 current_stage 对齐,prompt 约束收紧
- 叙事与忠实度:narrative_safety、证据重叠/场合锚点、标题 slots 与履历短语 grounded;fidelity 解析失败 fail-open 可配置
- 章节管线:锁 TTL 上调、锁竞争 Celery 重试、Phase2 immediate singleflight 等;story_pipeline_sync / chapter_compose / memoir_tasks 联动
- Memory:compaction / repo / summarizer / evidence 小修;事实 FTS 未命中是否回退最近事实可配置
- 新增 memoir_pipeline_trace;补充 memoir_reliability 文档与多项回归/门控测试
This commit is contained in:
Kevin
2026-04-03 10:12:59 +08:00
parent 6b930808a3
commit 07c6478742
49 changed files with 12258 additions and 57 deletions

View File

@@ -29,6 +29,10 @@ from app.core.config import settings
from app.core.db import get_sync_db
from app.core.dependencies import get_llm_provider, get_llm_provider_fast
from app.core.logging import get_logger
from app.core.memoir_pipeline_trace import (
effective_correlation_id,
new_memoir_correlation_id,
)
from app.features.conversation.models import Conversation, Segment
from app.tasks.celery_app import celery_app
@@ -241,34 +245,57 @@ def _should_trigger_phase2(
return False
def _schedule_phase2_timeout(user_id: str, chapter_category: str) -> None:
def _phase2_immediate_task_id(user_id: str, chapter_category: str) -> str:
return f"phase2-immediate-{user_id}-{chapter_category}"
def _schedule_phase2_timeout(
user_id: str, chapter_category: str, memoir_correlation_id: str | None = None
) -> None:
"""Reset countdown for Phase 2 narrative for one category."""
_revoke_phase2_timeout(user_id, chapter_category)
countdown = float(max(1.0, settings.memoir_narrative_batch_max_wait_seconds))
p2_kwargs: dict = {}
if memoir_correlation_id:
p2_kwargs["memoir_correlation_id"] = memoir_correlation_id
celery_app.send_task(
"app.tasks.memoir_tasks.process_memoir_phase2",
args=[user_id, chapter_category],
kwargs=p2_kwargs,
countdown=countdown,
task_id=_phase2_timeout_task_id(user_id, chapter_category),
)
logger.info(
"event=phase2_timeout_scheduled user_id={} chapter_category={} countdown={}",
"event=phase2_timeout_scheduled user_id={} chapter_category={} countdown={} "
"memoir_correlation_id={}",
user_id,
chapter_category,
countdown,
memoir_correlation_id or "",
)
def _dispatch_phase2_immediate(user_id: str, chapter_category: str) -> None:
def _dispatch_phase2_immediate(
user_id: str, chapter_category: str, memoir_correlation_id: str | None = None
) -> None:
_revoke_phase2_timeout(user_id, chapter_category)
celery_app.send_task(
"app.tasks.memoir_tasks.process_memoir_phase2",
args=[user_id, chapter_category],
)
p2_kwargs: dict = {}
if memoir_correlation_id:
p2_kwargs["memoir_correlation_id"] = memoir_correlation_id
send_kw: dict = {
"args": [user_id, chapter_category],
"kwargs": p2_kwargs,
}
if settings.memoir_phase2_singleflight_immediate:
send_kw["task_id"] = _phase2_immediate_task_id(user_id, chapter_category)
celery_app.send_task("app.tasks.memoir_tasks.process_memoir_phase2", **send_kw)
logger.info(
"event=phase2_dispatched_immediate user_id={} chapter_category={}",
"event=phase2_dispatched_immediate user_id={} chapter_category={} "
"memoir_correlation_id={} task_id_mode={}",
user_id,
chapter_category,
memoir_correlation_id or "",
"singleflight" if settings.memoir_phase2_singleflight_immediate else "unique",
)
@@ -293,14 +320,18 @@ def dispatch_pending_memoir_phase2_for_user(user_id: str) -> None:
cats = [r[0] for r in db.execute(stmt).all() if r[0]]
for chapter_category in cats:
_revoke_phase2_timeout(user_id, chapter_category)
flush_cid = new_memoir_correlation_id()
celery_app.send_task(
"app.tasks.memoir_tasks.process_memoir_phase2",
args=[user_id, chapter_category],
kwargs={"memoir_correlation_id": flush_cid},
)
logger.info(
"event=phase2_dispatched_flush user_id={} chapter_category={}",
"event=phase2_dispatched_flush user_id={} chapter_category={} "
"memoir_correlation_id={}",
user_id,
chapter_category,
flush_cid,
)
except Exception as e:
logger.error(
@@ -312,14 +343,24 @@ def dispatch_pending_memoir_phase2_for_user(user_id: str) -> None:
@shared_task(bind=True, max_retries=3, default_retry_delay=30)
def process_memoir_phase2(self, user_id: str, chapter_category: str):
def process_memoir_phase2(
self,
user_id: str,
chapter_category: str,
memoir_correlation_id: str | None = None,
):
"""Phase 2叙事 / 路由 / 忠实度 / 标题;按类目加锁,消费未叙事且非 skip 的 segments。"""
task_id = self.request.id
cid = effective_correlation_id(
explicit=memoir_correlation_id, celery_task_id=str(task_id)
)
logger.info(
"event=memoir_phase2_start user_id={} task_id={} chapter_category={}",
"event=memoir_phase2_start user_id={} task_id={} chapter_category={} "
"memoir_correlation_id={}",
user_id,
task_id,
chapter_category,
cid,
)
try:
with get_sync_db() as db:
@@ -398,6 +439,7 @@ def process_memoir_phase2(self, user_id: str, chapter_category: str):
llm=llm,
background_voice=background_voice,
occupation=user_occupation,
memoir_correlation_id=cid,
)
story_dispatch_ids |= disp
db.flush()
@@ -461,6 +503,7 @@ def process_memoir_phase2(self, user_id: str, chapter_category: str):
need_compaction=True,
compaction_extra={
"pipeline_run_id": str(task_id),
"memoir_correlation_id": cid,
"story_dispatch_ids": sorted(story_dispatch_ids),
"chapters_to_enqueue": sorted(chapters_to_enqueue),
"chapter_category": chapter_category,
@@ -489,11 +532,12 @@ def process_memoir_phase2(self, user_id: str, chapter_category: str):
logger.info(
"event=memoir_phase2_done user_id={} task_id={} chapter_category={} "
"segment_count={}",
"segment_count={} memoir_correlation_id={}",
user_id,
task_id,
chapter_category,
len(category_segments),
cid,
)
return {
"status": "success",
@@ -522,11 +566,14 @@ def process_memoir_phase1(self, user_id: str, segment_ids: List[str]):
按需派发 Phase 2阈值或延迟兜底
"""
task_id = self.request.id
memoir_correlation_id = new_memoir_correlation_id()
logger.info(
"event=memoir_phase1_start user_id={} task_id={} segments={}",
"event=memoir_phase1_start user_id={} task_id={} segments={} "
"memoir_correlation_id={}",
user_id,
task_id,
len(segment_ids),
memoir_correlation_id,
)
_update_task_status_sync(user_id, task_id, "running")
@@ -649,9 +696,9 @@ def process_memoir_phase1(self, user_id: str, segment_ids: List[str]):
db.commit()
for cc in phase2_immediate:
_dispatch_phase2_immediate(user_id, cc)
_dispatch_phase2_immediate(user_id, cc, memoir_correlation_id)
for cc in phase2_timeout:
_schedule_phase2_timeout(user_id, cc)
_schedule_phase2_timeout(user_id, cc, memoir_correlation_id)
categories_processed = sorted(prepared.category_to_segments.keys())
_update_task_status_sync(
@@ -666,11 +713,12 @@ def process_memoir_phase1(self, user_id: str, segment_ids: List[str]):
)
logger.info(
"event=memoir_phase1_done user_id={} task_id={} segment_count={} "
"categories={}",
"categories={} memoir_correlation_id={}",
user_id,
task_id,
len(segments),
categories_processed,
memoir_correlation_id,
)
return {
"status": "success",
@@ -701,7 +749,13 @@ def generate_chapter_content(self, user_id: str, stage: str, new_content: str):
new_content: 新内容
"""
stage = normalize_chapter_category(stage, fallback="summary")
logger.info(f"生成章节内容: user_id={user_id}, stage={stage}")
cid = effective_correlation_id(explicit=None, celery_task_id=str(self.request.id))
logger.info(
"event=generate_chapter_content_start user_id={} stage={} memoir_correlation_id={}",
user_id,
stage,
cid,
)
try:
with get_sync_db() as db:
@@ -739,6 +793,7 @@ def generate_chapter_content(self, user_id: str, stage: str, new_content: str):
llm=llm,
background_voice=background_voice,
occupation=user_occupation,
memoir_correlation_id=cid,
)
db.flush()
if chapter is None: