feat(api): 访谈路径轻量门控、Memoir Phase1 批处理与叙事/记忆管线加固
- 新增 utterance_substance:短时/应答/元话语可跳过记忆检索、阶段 LLM 与资料抽取 LLM;可配置 - 输入归一化:LLM 模式默认仅语音/ASR;配置项写入 .env.example - Memoir Phase1:可选 batch LLM 一次性抽取+分类(失败回退逐段);Extraction 空槽位时阶段与 current_stage 对齐,prompt 约束收紧 - 叙事与忠实度:narrative_safety、证据重叠/场合锚点、标题 slots 与履历短语 grounded;fidelity 解析失败 fail-open 可配置 - 章节管线:锁 TTL 上调、锁竞争 Celery 重试、Phase2 immediate singleflight 等;story_pipeline_sync / chapter_compose / memoir_tasks 联动 - Memory:compaction / repo / summarizer / evidence 小修;事实 FTS 未命中是否回退最近事实可配置 - 新增 memoir_pipeline_trace;补充 memoir_reliability 文档与多项回归/门控测试
This commit is contained in:
@@ -10,15 +10,22 @@ import time
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Callable, Dict, List, Set, Tuple
|
||||
|
||||
from app.agents.memoir.batch_phase1_prep import (
|
||||
STAGE_ALLOWED_SLOTS,
|
||||
run_batch_phase1_prep,
|
||||
)
|
||||
from app.agents.memoir.classification_agent import (
|
||||
ClassificationAgent,
|
||||
)
|
||||
from app.agents.memoir.classification_agent import (
|
||||
_detect_stage as detect_stage_from_keywords,
|
||||
)
|
||||
from app.agents.memoir.classification_agent import _looks_like_fragment_only
|
||||
from app.agents.memoir.extraction_agent import ExtractionAgent, ExtractionResult
|
||||
from app.agents.stage_constants import normalize_chapter_category, normalize_chat_stage
|
||||
from app.agents.state_schema import MemoirStateSchema
|
||||
from app.core.agent_logging import agent_span, agent_summary_enabled, log_agent_detail
|
||||
from app.core.config import settings
|
||||
from app.core.logging import get_logger
|
||||
from app.features.conversation.models import Segment
|
||||
|
||||
@@ -69,6 +76,26 @@ class MemoirOrchestrator:
|
||||
segment_chapter_category: Dict[str, str] = {}
|
||||
classify_extract_llm = llm_fast if llm_fast is not None else llm
|
||||
|
||||
# 仅 MEMOIR_PHASE1_BATCH_LLM_ENABLED=true 时走批处理;关则与旧版一致逐段(含多段一批)
|
||||
use_batch = (
|
||||
bool(segments)
|
||||
and classify_extract_llm is not None
|
||||
and settings.memoir_phase1_batch_llm_enabled
|
||||
)
|
||||
if use_batch:
|
||||
try:
|
||||
return self._prepare_batches_via_batch_llm(
|
||||
segments=segments,
|
||||
state=state,
|
||||
classify_extract_llm=classify_extract_llm,
|
||||
update_slot=update_slot,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"MemoirOrchestrator.prepare_batches batch LLM 失败,回退逐段: {}",
|
||||
e,
|
||||
)
|
||||
|
||||
for segment in segments:
|
||||
text = segment.user_input_text or ""
|
||||
seg_t0 = time.perf_counter()
|
||||
@@ -133,6 +160,92 @@ class MemoirOrchestrator:
|
||||
segment_chapter_category=segment_chapter_category,
|
||||
)
|
||||
|
||||
def _prepare_batches_via_batch_llm(
|
||||
self,
|
||||
*,
|
||||
segments: List[Segment],
|
||||
state: MemoirStateSchema,
|
||||
classify_extract_llm: Any,
|
||||
update_slot: Callable[[str, str, str, List[str]], MemoirStateSchema],
|
||||
) -> PreparedMemoirBatches:
|
||||
category_to_segments: Dict[str, List[Segment]] = {}
|
||||
segment_skip_story_ids: Set[str] = set()
|
||||
segment_chapter_category: Dict[str, str] = {}
|
||||
|
||||
by_id = run_batch_phase1_prep(segments, state, classify_extract_llm)
|
||||
|
||||
for segment in segments:
|
||||
text = segment.user_input_text or ""
|
||||
seg_t0 = time.perf_counter()
|
||||
row = by_id[str(segment.id)]
|
||||
|
||||
result_slots = dict(row.slots)
|
||||
fb = state.current_stage or "childhood"
|
||||
|
||||
if not result_slots:
|
||||
detected_stage = normalize_chat_stage(fb, fb)
|
||||
else:
|
||||
detected_stage = normalize_chat_stage(row.detected_stage, fb)
|
||||
|
||||
allowed = STAGE_ALLOWED_SLOTS.get(detected_stage, frozenset())
|
||||
result_slots = {k: v for k, v in result_slots.items() if k in allowed}
|
||||
if not result_slots:
|
||||
detected_stage = normalize_chat_stage(fb, fb)
|
||||
|
||||
with agent_span(
|
||||
logger,
|
||||
"MemoirOrchestrator.BatchPhase1Prep.apply",
|
||||
segment_id=segment.id,
|
||||
):
|
||||
for slot_name, snippet in result_slots.items():
|
||||
state = update_slot(
|
||||
detected_stage, slot_name, snippet, [segment.id]
|
||||
)
|
||||
|
||||
if _looks_like_fragment_only(text):
|
||||
chapter_category = "summary"
|
||||
llm_said_none = False
|
||||
else:
|
||||
raw_cat = (row.chapter_category_raw or "").strip().lower()
|
||||
if raw_cat == "none":
|
||||
chapter_category = "summary"
|
||||
llm_said_none = True
|
||||
else:
|
||||
chapter_category = normalize_chapter_category(
|
||||
row.chapter_category_raw,
|
||||
"summary",
|
||||
)
|
||||
llm_said_none = False
|
||||
|
||||
if (not result_slots) and llm_said_none:
|
||||
segment_skip_story_ids.add(str(segment.id))
|
||||
segment_chapter_category[str(segment.id)] = chapter_category
|
||||
|
||||
if agent_summary_enabled():
|
||||
logger.info(
|
||||
"MemoirOrchestrator.segment(batch) segment_id={} text_len={} "
|
||||
"detected_stage={} category={} segment_total_ms={:.2f}",
|
||||
segment.id,
|
||||
len(text),
|
||||
detected_stage,
|
||||
chapter_category,
|
||||
(time.perf_counter() - seg_t0) * 1000,
|
||||
)
|
||||
log_agent_detail(
|
||||
logger,
|
||||
"MemoirOrchestrator.segment_done(batch) segment_id={} slots={}",
|
||||
segment.id,
|
||||
list(result_slots.keys()),
|
||||
)
|
||||
category_to_segments.setdefault(chapter_category, []).append(segment)
|
||||
|
||||
return PreparedMemoirBatches(
|
||||
state=state,
|
||||
category_to_segments=category_to_segments,
|
||||
segment_skip_story_ids=segment_skip_story_ids,
|
||||
segment_chapter_category=segment_chapter_category,
|
||||
)
|
||||
|
||||
def run(
|
||||
self,
|
||||
*,
|
||||
|
||||
Reference in New Issue
Block a user