feat(api): 访谈路径轻量门控、Memoir Phase1 批处理与叙事/记忆管线加固
- 新增 utterance_substance:短时/应答/元话语可跳过记忆检索、阶段 LLM 与资料抽取 LLM;可配置 - 输入归一化:LLM 模式默认仅语音/ASR;配置项写入 .env.example - Memoir Phase1:可选 batch LLM 一次性抽取+分类(失败回退逐段);Extraction 空槽位时阶段与 current_stage 对齐,prompt 约束收紧 - 叙事与忠实度:narrative_safety、证据重叠/场合锚点、标题 slots 与履历短语 grounded;fidelity 解析失败 fail-open 可配置 - 章节管线:锁 TTL 上调、锁竞争 Celery 重试、Phase2 immediate singleflight 等;story_pipeline_sync / chapter_compose / memoir_tasks 联动 - Memory:compaction / repo / summarizer / evidence 小修;事实 FTS 未命中是否回退最近事实可配置 - 新增 memoir_pipeline_trace;补充 memoir_reliability 文档与多项回归/门控测试
This commit is contained in:
@@ -29,6 +29,10 @@ from app.core.config import settings
|
||||
from app.core.db import get_sync_db
|
||||
from app.core.dependencies import get_llm_provider, get_llm_provider_fast
|
||||
from app.core.logging import get_logger
|
||||
from app.core.memoir_pipeline_trace import (
|
||||
effective_correlation_id,
|
||||
new_memoir_correlation_id,
|
||||
)
|
||||
from app.features.conversation.models import Conversation, Segment
|
||||
|
||||
from app.tasks.celery_app import celery_app
|
||||
@@ -241,34 +245,57 @@ def _should_trigger_phase2(
|
||||
return False
|
||||
|
||||
|
||||
def _schedule_phase2_timeout(user_id: str, chapter_category: str) -> None:
|
||||
def _phase2_immediate_task_id(user_id: str, chapter_category: str) -> str:
|
||||
return f"phase2-immediate-{user_id}-{chapter_category}"
|
||||
|
||||
|
||||
def _schedule_phase2_timeout(
|
||||
user_id: str, chapter_category: str, memoir_correlation_id: str | None = None
|
||||
) -> None:
|
||||
"""Reset countdown for Phase 2 narrative for one category."""
|
||||
_revoke_phase2_timeout(user_id, chapter_category)
|
||||
countdown = float(max(1.0, settings.memoir_narrative_batch_max_wait_seconds))
|
||||
p2_kwargs: dict = {}
|
||||
if memoir_correlation_id:
|
||||
p2_kwargs["memoir_correlation_id"] = memoir_correlation_id
|
||||
celery_app.send_task(
|
||||
"app.tasks.memoir_tasks.process_memoir_phase2",
|
||||
args=[user_id, chapter_category],
|
||||
kwargs=p2_kwargs,
|
||||
countdown=countdown,
|
||||
task_id=_phase2_timeout_task_id(user_id, chapter_category),
|
||||
)
|
||||
logger.info(
|
||||
"event=phase2_timeout_scheduled user_id={} chapter_category={} countdown={}",
|
||||
"event=phase2_timeout_scheduled user_id={} chapter_category={} countdown={} "
|
||||
"memoir_correlation_id={}",
|
||||
user_id,
|
||||
chapter_category,
|
||||
countdown,
|
||||
memoir_correlation_id or "",
|
||||
)
|
||||
|
||||
|
||||
def _dispatch_phase2_immediate(user_id: str, chapter_category: str) -> None:
|
||||
def _dispatch_phase2_immediate(
|
||||
user_id: str, chapter_category: str, memoir_correlation_id: str | None = None
|
||||
) -> None:
|
||||
_revoke_phase2_timeout(user_id, chapter_category)
|
||||
celery_app.send_task(
|
||||
"app.tasks.memoir_tasks.process_memoir_phase2",
|
||||
args=[user_id, chapter_category],
|
||||
)
|
||||
p2_kwargs: dict = {}
|
||||
if memoir_correlation_id:
|
||||
p2_kwargs["memoir_correlation_id"] = memoir_correlation_id
|
||||
send_kw: dict = {
|
||||
"args": [user_id, chapter_category],
|
||||
"kwargs": p2_kwargs,
|
||||
}
|
||||
if settings.memoir_phase2_singleflight_immediate:
|
||||
send_kw["task_id"] = _phase2_immediate_task_id(user_id, chapter_category)
|
||||
celery_app.send_task("app.tasks.memoir_tasks.process_memoir_phase2", **send_kw)
|
||||
logger.info(
|
||||
"event=phase2_dispatched_immediate user_id={} chapter_category={}",
|
||||
"event=phase2_dispatched_immediate user_id={} chapter_category={} "
|
||||
"memoir_correlation_id={} task_id_mode={}",
|
||||
user_id,
|
||||
chapter_category,
|
||||
memoir_correlation_id or "",
|
||||
"singleflight" if settings.memoir_phase2_singleflight_immediate else "unique",
|
||||
)
|
||||
|
||||
|
||||
@@ -293,14 +320,18 @@ def dispatch_pending_memoir_phase2_for_user(user_id: str) -> None:
|
||||
cats = [r[0] for r in db.execute(stmt).all() if r[0]]
|
||||
for chapter_category in cats:
|
||||
_revoke_phase2_timeout(user_id, chapter_category)
|
||||
flush_cid = new_memoir_correlation_id()
|
||||
celery_app.send_task(
|
||||
"app.tasks.memoir_tasks.process_memoir_phase2",
|
||||
args=[user_id, chapter_category],
|
||||
kwargs={"memoir_correlation_id": flush_cid},
|
||||
)
|
||||
logger.info(
|
||||
"event=phase2_dispatched_flush user_id={} chapter_category={}",
|
||||
"event=phase2_dispatched_flush user_id={} chapter_category={} "
|
||||
"memoir_correlation_id={}",
|
||||
user_id,
|
||||
chapter_category,
|
||||
flush_cid,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
@@ -312,14 +343,24 @@ def dispatch_pending_memoir_phase2_for_user(user_id: str) -> None:
|
||||
|
||||
|
||||
@shared_task(bind=True, max_retries=3, default_retry_delay=30)
|
||||
def process_memoir_phase2(self, user_id: str, chapter_category: str):
|
||||
def process_memoir_phase2(
|
||||
self,
|
||||
user_id: str,
|
||||
chapter_category: str,
|
||||
memoir_correlation_id: str | None = None,
|
||||
):
|
||||
"""Phase 2:叙事 / 路由 / 忠实度 / 标题;按类目加锁,消费未叙事且非 skip 的 segments。"""
|
||||
task_id = self.request.id
|
||||
cid = effective_correlation_id(
|
||||
explicit=memoir_correlation_id, celery_task_id=str(task_id)
|
||||
)
|
||||
logger.info(
|
||||
"event=memoir_phase2_start user_id={} task_id={} chapter_category={}",
|
||||
"event=memoir_phase2_start user_id={} task_id={} chapter_category={} "
|
||||
"memoir_correlation_id={}",
|
||||
user_id,
|
||||
task_id,
|
||||
chapter_category,
|
||||
cid,
|
||||
)
|
||||
try:
|
||||
with get_sync_db() as db:
|
||||
@@ -398,6 +439,7 @@ def process_memoir_phase2(self, user_id: str, chapter_category: str):
|
||||
llm=llm,
|
||||
background_voice=background_voice,
|
||||
occupation=user_occupation,
|
||||
memoir_correlation_id=cid,
|
||||
)
|
||||
story_dispatch_ids |= disp
|
||||
db.flush()
|
||||
@@ -461,6 +503,7 @@ def process_memoir_phase2(self, user_id: str, chapter_category: str):
|
||||
need_compaction=True,
|
||||
compaction_extra={
|
||||
"pipeline_run_id": str(task_id),
|
||||
"memoir_correlation_id": cid,
|
||||
"story_dispatch_ids": sorted(story_dispatch_ids),
|
||||
"chapters_to_enqueue": sorted(chapters_to_enqueue),
|
||||
"chapter_category": chapter_category,
|
||||
@@ -489,11 +532,12 @@ def process_memoir_phase2(self, user_id: str, chapter_category: str):
|
||||
|
||||
logger.info(
|
||||
"event=memoir_phase2_done user_id={} task_id={} chapter_category={} "
|
||||
"segment_count={}",
|
||||
"segment_count={} memoir_correlation_id={}",
|
||||
user_id,
|
||||
task_id,
|
||||
chapter_category,
|
||||
len(category_segments),
|
||||
cid,
|
||||
)
|
||||
return {
|
||||
"status": "success",
|
||||
@@ -522,11 +566,14 @@ def process_memoir_phase1(self, user_id: str, segment_ids: List[str]):
|
||||
按需派发 Phase 2(阈值或延迟兜底)。
|
||||
"""
|
||||
task_id = self.request.id
|
||||
memoir_correlation_id = new_memoir_correlation_id()
|
||||
logger.info(
|
||||
"event=memoir_phase1_start user_id={} task_id={} segments={}",
|
||||
"event=memoir_phase1_start user_id={} task_id={} segments={} "
|
||||
"memoir_correlation_id={}",
|
||||
user_id,
|
||||
task_id,
|
||||
len(segment_ids),
|
||||
memoir_correlation_id,
|
||||
)
|
||||
_update_task_status_sync(user_id, task_id, "running")
|
||||
|
||||
@@ -649,9 +696,9 @@ def process_memoir_phase1(self, user_id: str, segment_ids: List[str]):
|
||||
db.commit()
|
||||
|
||||
for cc in phase2_immediate:
|
||||
_dispatch_phase2_immediate(user_id, cc)
|
||||
_dispatch_phase2_immediate(user_id, cc, memoir_correlation_id)
|
||||
for cc in phase2_timeout:
|
||||
_schedule_phase2_timeout(user_id, cc)
|
||||
_schedule_phase2_timeout(user_id, cc, memoir_correlation_id)
|
||||
|
||||
categories_processed = sorted(prepared.category_to_segments.keys())
|
||||
_update_task_status_sync(
|
||||
@@ -666,11 +713,12 @@ def process_memoir_phase1(self, user_id: str, segment_ids: List[str]):
|
||||
)
|
||||
logger.info(
|
||||
"event=memoir_phase1_done user_id={} task_id={} segment_count={} "
|
||||
"categories={}",
|
||||
"categories={} memoir_correlation_id={}",
|
||||
user_id,
|
||||
task_id,
|
||||
len(segments),
|
||||
categories_processed,
|
||||
memoir_correlation_id,
|
||||
)
|
||||
return {
|
||||
"status": "success",
|
||||
@@ -701,7 +749,13 @@ def generate_chapter_content(self, user_id: str, stage: str, new_content: str):
|
||||
new_content: 新内容
|
||||
"""
|
||||
stage = normalize_chapter_category(stage, fallback="summary")
|
||||
logger.info(f"生成章节内容: user_id={user_id}, stage={stage}")
|
||||
cid = effective_correlation_id(explicit=None, celery_task_id=str(self.request.id))
|
||||
logger.info(
|
||||
"event=generate_chapter_content_start user_id={} stage={} memoir_correlation_id={}",
|
||||
user_id,
|
||||
stage,
|
||||
cid,
|
||||
)
|
||||
|
||||
try:
|
||||
with get_sync_db() as db:
|
||||
@@ -739,6 +793,7 @@ def generate_chapter_content(self, user_id: str, stage: str, new_content: str):
|
||||
llm=llm,
|
||||
background_voice=background_voice,
|
||||
occupation=user_occupation,
|
||||
memoir_correlation_id=cid,
|
||||
)
|
||||
db.flush()
|
||||
if chapter is None:
|
||||
|
||||
Reference in New Issue
Block a user