feat(api): 访谈路径轻量门控、Memoir Phase1 批处理与叙事/记忆管线加固

- 新增 utterance_substance:短时/应答/元话语可跳过记忆检索、阶段 LLM 与资料抽取 LLM;可配置
- 输入归一化:LLM 模式默认仅语音/ASR;配置项写入 .env.example
- Memoir Phase1:可选 batch LLM 一次性抽取+分类(失败回退逐段);Extraction 空槽位时阶段与 current_stage 对齐,prompt 约束收紧
- 叙事与忠实度:narrative_safety、证据重叠/场合锚点、标题 slots 与履历短语 grounded;fidelity 解析失败 fail-open 可配置
- 章节管线:锁 TTL 上调、锁竞争 Celery 重试、Phase2 immediate singleflight 等;story_pipeline_sync / chapter_compose / memoir_tasks 联动
- Memory:compaction / repo / summarizer / evidence 小修;事实 FTS 未命中是否回退最近事实可配置
- 新增 memoir_pipeline_trace;补充 memoir_reliability 文档与多项回归/门控测试
This commit is contained in:
Kevin
2026-04-03 10:12:59 +08:00
parent 6b930808a3
commit 07c6478742
49 changed files with 12258 additions and 57 deletions

View File

@@ -10,15 +10,22 @@ import time
from dataclasses import dataclass
from typing import Any, Callable, Dict, List, Set, Tuple
from app.agents.memoir.batch_phase1_prep import (
STAGE_ALLOWED_SLOTS,
run_batch_phase1_prep,
)
from app.agents.memoir.classification_agent import (
ClassificationAgent,
)
from app.agents.memoir.classification_agent import (
_detect_stage as detect_stage_from_keywords,
)
from app.agents.memoir.classification_agent import _looks_like_fragment_only
from app.agents.memoir.extraction_agent import ExtractionAgent, ExtractionResult
from app.agents.stage_constants import normalize_chapter_category, normalize_chat_stage
from app.agents.state_schema import MemoirStateSchema
from app.core.agent_logging import agent_span, agent_summary_enabled, log_agent_detail
from app.core.config import settings
from app.core.logging import get_logger
from app.features.conversation.models import Segment
@@ -69,6 +76,26 @@ class MemoirOrchestrator:
segment_chapter_category: Dict[str, str] = {}
classify_extract_llm = llm_fast if llm_fast is not None else llm
# 仅 MEMOIR_PHASE1_BATCH_LLM_ENABLED=true 时走批处理;关则与旧版一致逐段(含多段一批)
use_batch = (
bool(segments)
and classify_extract_llm is not None
and settings.memoir_phase1_batch_llm_enabled
)
if use_batch:
try:
return self._prepare_batches_via_batch_llm(
segments=segments,
state=state,
classify_extract_llm=classify_extract_llm,
update_slot=update_slot,
)
except Exception as e:
logger.warning(
"MemoirOrchestrator.prepare_batches batch LLM 失败,回退逐段: {}",
e,
)
for segment in segments:
text = segment.user_input_text or ""
seg_t0 = time.perf_counter()
@@ -133,6 +160,92 @@ class MemoirOrchestrator:
segment_chapter_category=segment_chapter_category,
)
def _prepare_batches_via_batch_llm(
self,
*,
segments: List[Segment],
state: MemoirStateSchema,
classify_extract_llm: Any,
update_slot: Callable[[str, str, str, List[str]], MemoirStateSchema],
) -> PreparedMemoirBatches:
category_to_segments: Dict[str, List[Segment]] = {}
segment_skip_story_ids: Set[str] = set()
segment_chapter_category: Dict[str, str] = {}
by_id = run_batch_phase1_prep(segments, state, classify_extract_llm)
for segment in segments:
text = segment.user_input_text or ""
seg_t0 = time.perf_counter()
row = by_id[str(segment.id)]
result_slots = dict(row.slots)
fb = state.current_stage or "childhood"
if not result_slots:
detected_stage = normalize_chat_stage(fb, fb)
else:
detected_stage = normalize_chat_stage(row.detected_stage, fb)
allowed = STAGE_ALLOWED_SLOTS.get(detected_stage, frozenset())
result_slots = {k: v for k, v in result_slots.items() if k in allowed}
if not result_slots:
detected_stage = normalize_chat_stage(fb, fb)
with agent_span(
logger,
"MemoirOrchestrator.BatchPhase1Prep.apply",
segment_id=segment.id,
):
for slot_name, snippet in result_slots.items():
state = update_slot(
detected_stage, slot_name, snippet, [segment.id]
)
if _looks_like_fragment_only(text):
chapter_category = "summary"
llm_said_none = False
else:
raw_cat = (row.chapter_category_raw or "").strip().lower()
if raw_cat == "none":
chapter_category = "summary"
llm_said_none = True
else:
chapter_category = normalize_chapter_category(
row.chapter_category_raw,
"summary",
)
llm_said_none = False
if (not result_slots) and llm_said_none:
segment_skip_story_ids.add(str(segment.id))
segment_chapter_category[str(segment.id)] = chapter_category
if agent_summary_enabled():
logger.info(
"MemoirOrchestrator.segment(batch) segment_id={} text_len={} "
"detected_stage={} category={} segment_total_ms={:.2f}",
segment.id,
len(text),
detected_stage,
chapter_category,
(time.perf_counter() - seg_t0) * 1000,
)
log_agent_detail(
logger,
"MemoirOrchestrator.segment_done(batch) segment_id={} slots={}",
segment.id,
list(result_slots.keys()),
)
category_to_segments.setdefault(chapter_category, []).append(segment)
return PreparedMemoirBatches(
state=state,
category_to_segments=category_to_segments,
segment_skip_story_ids=segment_skip_story_ids,
segment_chapter_category=segment_chapter_category,
)
def run(
self,
*,