feat(api): 访谈路径轻量门控、Memoir Phase1 批处理与叙事/记忆管线加固
- 新增 utterance_substance:短时/应答/元话语可跳过记忆检索、阶段 LLM 与资料抽取 LLM;可配置 - 输入归一化:LLM 模式默认仅语音/ASR;配置项写入 .env.example - Memoir Phase1:可选 batch LLM 一次性抽取+分类(失败回退逐段);Extraction 空槽位时阶段与 current_stage 对齐,prompt 约束收紧 - 叙事与忠实度:narrative_safety、证据重叠/场合锚点、标题 slots 与履历短语 grounded;fidelity 解析失败 fail-open 可配置 - 章节管线:锁 TTL 上调、锁竞争 Celery 重试、Phase2 immediate singleflight 等;story_pipeline_sync / chapter_compose / memoir_tasks 联动 - Memory:compaction / repo / summarizer / evidence 小修;事实 FTS 未命中是否回退最近事实可配置 - 新增 memoir_pipeline_trace;补充 memoir_reliability 文档与多项回归/门控测试
This commit is contained in:
@@ -19,9 +19,13 @@ from app.agents.chat.stage_detection import (
|
||||
detect_primary_life_stage,
|
||||
life_stage_display_name,
|
||||
)
|
||||
from app.agents.chat.utterance_substance import should_run_chat_stage_memory_heavy_work
|
||||
from app.core.config import settings
|
||||
from app.core.dependencies import get_llm_provider
|
||||
from app.features.conversation.input_normalize import normalize_chat_input_for_agent
|
||||
from app.features.conversation.input_normalize import (
|
||||
apply_conversation_input_rules,
|
||||
normalize_chat_input_for_agent,
|
||||
)
|
||||
from app.features.memoir.state_service import get_or_create_state, switch_stage
|
||||
|
||||
|
||||
@@ -58,6 +62,11 @@ async def _fetch_interview_memory_evidence(
|
||||
msg = (user_message or "").strip()
|
||||
if not msg:
|
||||
return ""
|
||||
if (
|
||||
settings.chat_memory_retrieval_require_substantive
|
||||
and not should_run_chat_stage_memory_heavy_work(msg)
|
||||
):
|
||||
return ""
|
||||
try:
|
||||
ms = MemoryService(db, embedding_provider=get_embedding_provider())
|
||||
bundle = await ms.retrieve(user_id, msg, top_k=settings.chat_memory_top_k)
|
||||
@@ -122,9 +131,19 @@ class ChatOrchestrator:
|
||||
missing,
|
||||
len(user_message or ""),
|
||||
)
|
||||
extracted = await self.profile_agent.extract_profile_from_message(
|
||||
user_message, missing, conversation_id=conversation_id
|
||||
)
|
||||
run_extract = True
|
||||
if settings.chat_profile_extract_require_substantive:
|
||||
rules_only = apply_conversation_input_rules(user_message or "")
|
||||
run_extract = should_run_chat_stage_memory_heavy_work(
|
||||
rules_only
|
||||
)
|
||||
extracted = None
|
||||
if run_extract:
|
||||
extracted = (
|
||||
await self.profile_agent.extract_profile_from_message(
|
||||
user_message, missing, conversation_id=conversation_id
|
||||
)
|
||||
)
|
||||
if extracted:
|
||||
await apply_extracted_profile_fn(user, extracted, db)
|
||||
|
||||
@@ -184,12 +203,17 @@ class ChatOrchestrator:
|
||||
normalized_user_message = normalize_chat_input_for_agent(
|
||||
user_message or "",
|
||||
llm=llm_n,
|
||||
is_from_voice=is_from_voice,
|
||||
)
|
||||
state = await get_or_create_state(user_id, db)
|
||||
substantive_turn = should_run_chat_stage_memory_heavy_work(
|
||||
normalized_user_message
|
||||
)
|
||||
detected = await detect_primary_life_stage(
|
||||
normalized_user_message,
|
||||
state.current_stage,
|
||||
self.interview_agent.llm,
|
||||
skip_llm=not substantive_turn,
|
||||
)
|
||||
if detected != state.current_stage:
|
||||
state = await switch_stage(user_id, detected, db)
|
||||
|
||||
@@ -55,15 +55,22 @@ async def detect_primary_life_stage(
|
||||
user_message: str,
|
||||
current_stage: str,
|
||||
llm: Any,
|
||||
*,
|
||||
skip_llm: bool = False,
|
||||
) -> str:
|
||||
"""
|
||||
返回合法的人生阶段 key;失败时回退为 current_stage。
|
||||
skip_llm=True 时仅用关键词(短时/元话语等路径,不调阶段 LLM)。
|
||||
"""
|
||||
fb = normalize_chat_stage(current_stage, "childhood")
|
||||
if not settings.chat_stage_detection_enabled:
|
||||
k = keyword_fallback_primary_stage(user_message)
|
||||
return normalize_chat_stage(k, fb) if k else fb
|
||||
|
||||
if skip_llm and settings.chat_stage_detection_skip_llm_on_insufficient_signal:
|
||||
k = keyword_fallback_primary_stage(user_message)
|
||||
return normalize_chat_stage(k, fb) if k else fb
|
||||
|
||||
if not llm:
|
||||
k = keyword_fallback_primary_stage(user_message)
|
||||
return normalize_chat_stage(k, fb) if k else fb
|
||||
|
||||
73
api/app/agents/chat/utterance_substance.py
Normal file
73
api/app/agents/chat/utterance_substance.py
Normal file
@@ -0,0 +1,73 @@
|
||||
"""
|
||||
启发式判断访谈「本轮」是否值得跑阶段 LLM / 记忆检索等高成本步骤。
|
||||
|
||||
短答、应答词、元话语(谈整理回忆本身而非人生经历)为 False;长文本或中等长度非常用词为 True。
|
||||
与配置 `chat_substantive_*` 配合;关闭启发式时恒为 True。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Final
|
||||
|
||||
from app.core.config import settings
|
||||
|
||||
# 极短应答(整句精确匹配)
|
||||
_SHORT_ACK_EXACT: Final[frozenset[str]] = frozenset(
|
||||
{
|
||||
"嗯",
|
||||
"对",
|
||||
"好",
|
||||
"是",
|
||||
"行的",
|
||||
"是的",
|
||||
"没有",
|
||||
"行",
|
||||
"噢",
|
||||
"哦",
|
||||
"好吧",
|
||||
"嗯嗯",
|
||||
"对对",
|
||||
"好嘞",
|
||||
"对的",
|
||||
"没了",
|
||||
"可以",
|
||||
"就这样",
|
||||
"还行",
|
||||
"还好",
|
||||
}
|
||||
)
|
||||
|
||||
# 元话语:谈回忆过程/访谈本身,不足以切换人生阶段或拉记忆证据
|
||||
_META_PROCESS: Final[re.Pattern[str]] = re.compile(
|
||||
r"(回忆|想起).{0,20}(细节|收获|快忘|忘的|很多东西)"
|
||||
r"|(整理|聊聊|谈到).{0,8}(回忆|访谈|记录)"
|
||||
r"|最大的收获",
|
||||
re.UNICODE,
|
||||
)
|
||||
|
||||
|
||||
def should_run_chat_stage_memory_heavy_work(text: str) -> bool:
|
||||
"""
|
||||
True:值得调用阶段检测 LLM、记忆检索(向量等)。
|
||||
False:仅用关键词阶段回退、跳过记忆检索。
|
||||
"""
|
||||
if not settings.chat_substantive_heuristic_enabled:
|
||||
return True
|
||||
s = (text or "").strip()
|
||||
if not s:
|
||||
return False
|
||||
# 元话语可略长,须在「达到 min_chars」分支之前判断
|
||||
if _META_PROCESS.search(s):
|
||||
return False
|
||||
min_chars = int(settings.chat_substantive_min_chars)
|
||||
if len(s) >= min_chars:
|
||||
return True
|
||||
if s in _SHORT_ACK_EXACT:
|
||||
return False
|
||||
if len(s) <= 4:
|
||||
# 极短:多为语气/应答
|
||||
if all(ch in "嗯哦噢对对好好的没行是的不没一下的了呗嘛呀啊" for ch in s):
|
||||
return False
|
||||
# 偏短但未命中噪音规则:默认走完整路径;5 字常见为有信息短句(旧逻辑用 >=6 会误杀)
|
||||
return len(s) >= 5
|
||||
114
api/app/agents/memoir/batch_phase1_prep.py
Normal file
114
api/app/agents/memoir/batch_phase1_prep.py
Normal file
@@ -0,0 +1,114 @@
|
||||
"""
|
||||
Phase1 批处理:一次 LLM 调用完成多段的抽取 + 章节分类(与逐段循环语义对齐)。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from app.agents.memoir.prompts import get_batch_memoir_phase1_prep_prompt
|
||||
from app.agents.state_schema import MemoirStateSchema
|
||||
from app.core.config import settings
|
||||
from app.core.json_utils import extract_json_payload
|
||||
from app.core.langchain_llm import invoke_json_object
|
||||
from app.core.logging import get_logger
|
||||
from app.features.conversation.models import Segment
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
STAGE_ALLOWED_SLOTS: Dict[str, frozenset[str]] = {
|
||||
"childhood": frozenset(
|
||||
{"place", "people", "daily_life", "emotion", "turning_event"}
|
||||
),
|
||||
"education": frozenset({"school", "city", "motivation", "challenge", "change"}),
|
||||
"career": frozenset({"job", "environment", "decision", "pressure", "growth"}),
|
||||
"family": frozenset(
|
||||
{"relationship", "conflict", "support", "responsibility", "change"}
|
||||
),
|
||||
"belief": frozenset({"value", "regret", "pride", "lesson"}),
|
||||
}
|
||||
|
||||
|
||||
def _slots_snapshot(state: MemoirStateSchema) -> dict:
|
||||
snap: dict = {}
|
||||
for stage, buckets in (state.slots or {}).items():
|
||||
snap[stage] = {}
|
||||
for k, v in (buckets or {}).items():
|
||||
if hasattr(v, "snippet"):
|
||||
sn = getattr(v, "snippet", None) or ""
|
||||
elif isinstance(v, dict):
|
||||
sn = (
|
||||
(v.get("snippet") or "")
|
||||
if isinstance(v.get("snippet"), str)
|
||||
else ""
|
||||
)
|
||||
else:
|
||||
sn = ""
|
||||
snap[stage][k] = (sn or "")[:120]
|
||||
return snap
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class BatchPhase1SegmentRow:
|
||||
detected_stage: str
|
||||
slots: Dict[str, str]
|
||||
chapter_category_raw: str
|
||||
|
||||
|
||||
def run_batch_phase1_prep(
|
||||
segments: List[Segment],
|
||||
state: MemoirStateSchema,
|
||||
llm: Any,
|
||||
) -> Dict[str, BatchPhase1SegmentRow]:
|
||||
"""对 segments 顺序批量调用 LLM;返回 id → 行。id 集合必须与入参完全一致。"""
|
||||
if not llm:
|
||||
raise ValueError("batch phase1 requires llm")
|
||||
if not segments:
|
||||
return {}
|
||||
items = [(str(s.id), (s.user_input_text or "").strip()) for s in segments]
|
||||
prompt = get_batch_memoir_phase1_prep_prompt(
|
||||
system_current_stage=state.current_stage or "childhood",
|
||||
slots_snapshot=_slots_snapshot(state),
|
||||
segment_items=items,
|
||||
)
|
||||
raw = invoke_json_object(
|
||||
llm,
|
||||
prompt,
|
||||
max_tokens=int(settings.memoir_phase1_batch_llm_max_tokens),
|
||||
agent="BatchPhase1Prep.run",
|
||||
)
|
||||
parsed = json.loads(extract_json_payload(raw))
|
||||
rows = parsed.get("segments") or []
|
||||
if not isinstance(rows, list):
|
||||
raise ValueError("batch phase1: segments must be a list")
|
||||
|
||||
by_id: Dict[str, BatchPhase1SegmentRow] = {}
|
||||
for row in rows:
|
||||
if not isinstance(row, dict):
|
||||
continue
|
||||
sid = str(row.get("id", "")).strip()
|
||||
if not sid:
|
||||
continue
|
||||
ds = str(row.get("detected_stage", "") or "").strip().lower()
|
||||
slots_raw = row.get("slots") or {}
|
||||
slots: Dict[str, str] = {}
|
||||
if isinstance(slots_raw, dict):
|
||||
for k, v in slots_raw.items():
|
||||
if k and isinstance(k, str):
|
||||
slots[k] = v if isinstance(v, str) else str(v)
|
||||
cat_raw = str(row.get("chapter_category", row.get("category", "")) or "")
|
||||
by_id[sid] = BatchPhase1SegmentRow(
|
||||
detected_stage=ds or (state.current_stage or "childhood"),
|
||||
slots=slots,
|
||||
chapter_category_raw=cat_raw,
|
||||
)
|
||||
|
||||
expected = {str(s.id) for s in segments}
|
||||
if by_id.keys() != expected:
|
||||
missing = expected - by_id.keys()
|
||||
extra = by_id.keys() - expected
|
||||
logger.warning("batch phase1 id mismatch missing={} extra={}", missing, extra)
|
||||
raise ValueError("batch phase1 response segment ids do not match input")
|
||||
return by_id
|
||||
@@ -64,15 +64,21 @@ class ExtractionAgent:
|
||||
agent="ExtractionAgent.extract",
|
||||
)
|
||||
parsed = json.loads(extract_json_payload(raw))
|
||||
raw_detected = parsed.get("detected_stage", detected_stage)
|
||||
detected_stage = normalize_chat_stage(
|
||||
str(raw_detected) if raw_detected is not None else None,
|
||||
fallback=current_stage,
|
||||
)
|
||||
raw_slots = parsed.get("slots", {}) or {}
|
||||
extracted_slots = {
|
||||
k: v if isinstance(v, str) else str(v) for k, v in raw_slots.items()
|
||||
}
|
||||
if not extracted_slots:
|
||||
# 无实质 slot 时不推断阶段,避免元话语被标成任意 childhood 等(与服务端护栏一致)
|
||||
detected_stage = normalize_chat_stage(
|
||||
current_stage, fallback=current_stage
|
||||
)
|
||||
else:
|
||||
raw_detected = parsed.get("detected_stage", current_stage)
|
||||
detected_stage = normalize_chat_stage(
|
||||
str(raw_detected) if raw_detected is not None else None,
|
||||
fallback=current_stage,
|
||||
)
|
||||
except (json.JSONDecodeError, Exception) as e:
|
||||
logger.warning("ExtractionAgent LLM 解析失败: {}", e)
|
||||
|
||||
|
||||
@@ -45,6 +45,7 @@ class FidelityCheckAgent:
|
||||
narrative_json: str,
|
||||
llm: Any,
|
||||
existing_canonical_markdown: str | None = None,
|
||||
is_append: bool = False,
|
||||
) -> bool:
|
||||
if not llm or not settings.memoir_fidelity_check_enabled:
|
||||
return True
|
||||
@@ -65,7 +66,8 @@ class FidelityCheckAgent:
|
||||
- 新增口述中**没有**的具体人名、地名、时间、数字、对话原文
|
||||
- 补全口述未说明的结果或结局(如「最终没考上」)
|
||||
- 把系统摘录/档案里才有的信息写成用户亲口经历
|
||||
- 虚构具体场景细节来「让文章更好看」"""
|
||||
- 虚构具体场景细节来「让文章更好看」
|
||||
- 叙述中新增**具体场合/场景锚点**而口述没有同类表述(如写入「聚餐」「酒席」「那晚」「前一晚」等聚会或时间场合,但口述仅有话题内容而未提及该场合;把摘录里才有的场合写成亲历)"""
|
||||
|
||||
if existing:
|
||||
prompt = f"""你是事实核对员。当前为**续写合并**:生成稿应保留「已有故事正文」中的事实并融入「本轮口述」中的新事实。
|
||||
@@ -126,5 +128,9 @@ class FidelityCheckAgent:
|
||||
)
|
||||
return ok
|
||||
except Exception as e:
|
||||
logger.warning("FidelityCheckAgent 解析失败,放行: {}", e)
|
||||
return True
|
||||
logger.warning("FidelityCheckAgent 解析失败: {}", e)
|
||||
if is_append or settings.memoir_fidelity_fail_open_on_parse_error:
|
||||
logger.info("event=fidelity_parse_fail_open is_append={}", is_append)
|
||||
return True
|
||||
logger.warning("event=fidelity_parse_fail_closed")
|
||||
return False
|
||||
|
||||
@@ -70,15 +70,23 @@ class NarrativeAgent:
|
||||
llm: Any = None,
|
||||
background_voice: str = "default",
|
||||
occupation: str = "",
|
||||
*,
|
||||
fallback_plain_oral: str = "",
|
||||
) -> str:
|
||||
"""将新对话改写为叙述。若无 LLM 则直接拼接。
|
||||
|
||||
若 `existing_content` 非空(append 路径),使用整篇合并提示,输出覆盖全篇的有序段落。
|
||||
|
||||
`fallback_plain_oral`:仅含本段口述(勿传含 evidence 的组装串)。LLM 异常时只回退到
|
||||
口述/旧正文拼接,避免把「本段用户口述+摘录」整包写入 story。
|
||||
"""
|
||||
oral_fb = (fallback_plain_oral or "").strip()
|
||||
if not llm:
|
||||
if existing_content:
|
||||
if oral_fb:
|
||||
return f"{existing_content}\n\n{oral_fb}"
|
||||
return f"{existing_content}\n\n{new_content}"
|
||||
return new_content
|
||||
return oral_fb or new_content
|
||||
try:
|
||||
merge_mode = bool((existing_content or "").strip())
|
||||
if merge_mode:
|
||||
@@ -115,6 +123,11 @@ class NarrativeAgent:
|
||||
).strip()
|
||||
except Exception as e:
|
||||
logger.warning("NarrativeAgent 生成叙事失败: {}", e)
|
||||
if existing_content:
|
||||
return f"{existing_content}\n\n{new_content}"
|
||||
return new_content
|
||||
ex = (existing_content or "").strip()
|
||||
if ex and oral_fb:
|
||||
return f"{existing_content}\n\n{oral_fb}"
|
||||
if oral_fb:
|
||||
return oral_fb
|
||||
if ex:
|
||||
return str(existing_content)
|
||||
return ""
|
||||
|
||||
@@ -10,15 +10,22 @@ import time
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Callable, Dict, List, Set, Tuple
|
||||
|
||||
from app.agents.memoir.batch_phase1_prep import (
|
||||
STAGE_ALLOWED_SLOTS,
|
||||
run_batch_phase1_prep,
|
||||
)
|
||||
from app.agents.memoir.classification_agent import (
|
||||
ClassificationAgent,
|
||||
)
|
||||
from app.agents.memoir.classification_agent import (
|
||||
_detect_stage as detect_stage_from_keywords,
|
||||
)
|
||||
from app.agents.memoir.classification_agent import _looks_like_fragment_only
|
||||
from app.agents.memoir.extraction_agent import ExtractionAgent, ExtractionResult
|
||||
from app.agents.stage_constants import normalize_chapter_category, normalize_chat_stage
|
||||
from app.agents.state_schema import MemoirStateSchema
|
||||
from app.core.agent_logging import agent_span, agent_summary_enabled, log_agent_detail
|
||||
from app.core.config import settings
|
||||
from app.core.logging import get_logger
|
||||
from app.features.conversation.models import Segment
|
||||
|
||||
@@ -69,6 +76,26 @@ class MemoirOrchestrator:
|
||||
segment_chapter_category: Dict[str, str] = {}
|
||||
classify_extract_llm = llm_fast if llm_fast is not None else llm
|
||||
|
||||
# 仅 MEMOIR_PHASE1_BATCH_LLM_ENABLED=true 时走批处理;关则与旧版一致逐段(含多段一批)
|
||||
use_batch = (
|
||||
bool(segments)
|
||||
and classify_extract_llm is not None
|
||||
and settings.memoir_phase1_batch_llm_enabled
|
||||
)
|
||||
if use_batch:
|
||||
try:
|
||||
return self._prepare_batches_via_batch_llm(
|
||||
segments=segments,
|
||||
state=state,
|
||||
classify_extract_llm=classify_extract_llm,
|
||||
update_slot=update_slot,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"MemoirOrchestrator.prepare_batches batch LLM 失败,回退逐段: {}",
|
||||
e,
|
||||
)
|
||||
|
||||
for segment in segments:
|
||||
text = segment.user_input_text or ""
|
||||
seg_t0 = time.perf_counter()
|
||||
@@ -133,6 +160,92 @@ class MemoirOrchestrator:
|
||||
segment_chapter_category=segment_chapter_category,
|
||||
)
|
||||
|
||||
def _prepare_batches_via_batch_llm(
|
||||
self,
|
||||
*,
|
||||
segments: List[Segment],
|
||||
state: MemoirStateSchema,
|
||||
classify_extract_llm: Any,
|
||||
update_slot: Callable[[str, str, str, List[str]], MemoirStateSchema],
|
||||
) -> PreparedMemoirBatches:
|
||||
category_to_segments: Dict[str, List[Segment]] = {}
|
||||
segment_skip_story_ids: Set[str] = set()
|
||||
segment_chapter_category: Dict[str, str] = {}
|
||||
|
||||
by_id = run_batch_phase1_prep(segments, state, classify_extract_llm)
|
||||
|
||||
for segment in segments:
|
||||
text = segment.user_input_text or ""
|
||||
seg_t0 = time.perf_counter()
|
||||
row = by_id[str(segment.id)]
|
||||
|
||||
result_slots = dict(row.slots)
|
||||
fb = state.current_stage or "childhood"
|
||||
|
||||
if not result_slots:
|
||||
detected_stage = normalize_chat_stage(fb, fb)
|
||||
else:
|
||||
detected_stage = normalize_chat_stage(row.detected_stage, fb)
|
||||
|
||||
allowed = STAGE_ALLOWED_SLOTS.get(detected_stage, frozenset())
|
||||
result_slots = {k: v for k, v in result_slots.items() if k in allowed}
|
||||
if not result_slots:
|
||||
detected_stage = normalize_chat_stage(fb, fb)
|
||||
|
||||
with agent_span(
|
||||
logger,
|
||||
"MemoirOrchestrator.BatchPhase1Prep.apply",
|
||||
segment_id=segment.id,
|
||||
):
|
||||
for slot_name, snippet in result_slots.items():
|
||||
state = update_slot(
|
||||
detected_stage, slot_name, snippet, [segment.id]
|
||||
)
|
||||
|
||||
if _looks_like_fragment_only(text):
|
||||
chapter_category = "summary"
|
||||
llm_said_none = False
|
||||
else:
|
||||
raw_cat = (row.chapter_category_raw or "").strip().lower()
|
||||
if raw_cat == "none":
|
||||
chapter_category = "summary"
|
||||
llm_said_none = True
|
||||
else:
|
||||
chapter_category = normalize_chapter_category(
|
||||
row.chapter_category_raw,
|
||||
"summary",
|
||||
)
|
||||
llm_said_none = False
|
||||
|
||||
if (not result_slots) and llm_said_none:
|
||||
segment_skip_story_ids.add(str(segment.id))
|
||||
segment_chapter_category[str(segment.id)] = chapter_category
|
||||
|
||||
if agent_summary_enabled():
|
||||
logger.info(
|
||||
"MemoirOrchestrator.segment(batch) segment_id={} text_len={} "
|
||||
"detected_stage={} category={} segment_total_ms={:.2f}",
|
||||
segment.id,
|
||||
len(text),
|
||||
detected_stage,
|
||||
chapter_category,
|
||||
(time.perf_counter() - seg_t0) * 1000,
|
||||
)
|
||||
log_agent_detail(
|
||||
logger,
|
||||
"MemoirOrchestrator.segment_done(batch) segment_id={} slots={}",
|
||||
segment.id,
|
||||
list(result_slots.keys()),
|
||||
)
|
||||
category_to_segments.setdefault(chapter_category, []).append(segment)
|
||||
|
||||
return PreparedMemoirBatches(
|
||||
state=state,
|
||||
category_to_segments=category_to_segments,
|
||||
segment_skip_story_ids=segment_skip_story_ids,
|
||||
segment_chapter_category=segment_chapter_category,
|
||||
)
|
||||
|
||||
def run(
|
||||
self,
|
||||
*,
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
回忆录整理 Agent 提示词模板
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Optional
|
||||
|
||||
@@ -16,7 +18,7 @@ from app.features.memory.evidence_format import (
|
||||
def _memoir_fidelity_core_rules() -> str:
|
||||
"""事实边界 1–4 条(与文体第 5 条拆分,供 story 叙事与标题等复用)。"""
|
||||
return """## 事实边界(必须遵守,优先于文采)
|
||||
1. **正文只能展开「本段用户口述」区块中的内容**。若输入中有「相关记忆摘录」等参考区,其中信息**不得**写成本人本轮亲口经历的细节;最多用一两句作主题衔接,且不得引入摘录里才有的具体人名、地点、时间、对话、数字。
|
||||
1. **正文只能展开「本段用户口述」区块中的内容**。若输入中有「相关记忆摘录」等参考区,其中信息**不得**写成本人本轮亲口经历的细节;最多用一两句作主题衔接,且不得引入摘录里才有的具体人名、地点、时间、对话、数字。**若口述未提及具体场合**(如聚餐、酒席、当晚、前一晚等),不得借用摘录中的场合描写写成本轮亲历。
|
||||
2. **禁止编造**:不得新增用户未提及的具体人物姓名、对话原文、地点、时间、事件经过、因果、数字;不得推断性心理描写或「典型年代场景」填充。**口述未明确结果、结局或对方最终决定时**,不得用常识补全为确定断言(例如未清楚表达落选、未通过、被拒绝等,则不得写「未能被选中」「最终没有录用」等);只写已明确的过程与事实,不确定处宁可略写或使用中性表述。
|
||||
3. **禁止为凑字数扩写**:材料短则输出短;段落数量与长度随材料而定。
|
||||
4. 允许:去除口语赘词与寒暄、调整语序、合并重复指代、把口语改为书面语;**不得**用虚构细节「让文章更好看」。
|
||||
@@ -165,11 +167,63 @@ def get_state_extraction_prompt(
|
||||
|
||||
要求:
|
||||
1. **先忽略话语中的语气词、填充词、寒暄、与AI的交互指令等无关内容**,只关注涉及人生经历的实质信息
|
||||
2. **detected_stage 必须根据用户话语的实际内容判断**,不要默认沿用系统当前阶段。用户可能在聊不同阶段的事情
|
||||
2. **仅当 slots 非空时**:detected_stage 必须根据用户话语的实际内容判断;用户可能在聊与系统当前阶段不同的人生阶段
|
||||
3. slots 的 key 必须属于 detected_stage 对应的 slot 列表
|
||||
4. slots 只填写确实提到的、与人生经历相关的实质内容
|
||||
5. **snippet 应是提炼后的核心信息**,去除语气词和冗余表达,50 字以内
|
||||
6. 如果用户话语中没有任何与人生经历相关的实质内容(如纯粹的寒暄、指令、语气词),slots 为空对象
|
||||
6. 如果用户话语中没有任何与人生经历相关的实质内容(如纯粹的寒暄、元话语「整理回忆」、指令、语气词),**slots 必须为空对象**,且 **detected_stage 必须恰好等于系统当前跟踪的阶段**(「不明确」时不得另猜阶段)
|
||||
"""
|
||||
|
||||
|
||||
def get_batch_memoir_phase1_prep_prompt(
|
||||
*,
|
||||
system_current_stage: str,
|
||||
slots_snapshot: dict,
|
||||
segment_items: list[tuple[str, str]],
|
||||
) -> str:
|
||||
"""
|
||||
Phase1 批处理:多段口述一次 JSON 输出「抽取 + 章节分类」。
|
||||
segment_items: (segment_id, user_text),须按时间顺序。
|
||||
"""
|
||||
lines: list[str] = []
|
||||
for sid, text in segment_items:
|
||||
lines.append(f"- id={sid}\n 文本:{text}")
|
||||
|
||||
return f"""你是回忆录访谈助手。下面有多段用户口述(按时间顺序),请**逐段**完成:
|
||||
1)信息抽取(slots、detected_stage)——规则与单段抽取相同;
|
||||
2)章节分类(chapter_category)——规则与单段分类相同。
|
||||
|
||||
系统当前跟踪的人生阶段(chat stage key):{system_current_stage}
|
||||
当前各阶段已占用的 slots 摘要(仅作语境,勿编造未出现的细节):
|
||||
{json.dumps(slots_snapshot, ensure_ascii=False, indent=2)}
|
||||
|
||||
detected_stage 仅允许:childhood | education | career | family | belief
|
||||
slots 的 key 必须属于该 detected_stage 对应集合:
|
||||
- childhood: place, people, daily_life, emotion, turning_event
|
||||
- education: school, city, motivation, challenge, change
|
||||
- career: job, environment, decision, pressure, growth
|
||||
- family: relationship, conflict, support, responsibility, change
|
||||
- belief: value, regret, pride, lesson
|
||||
|
||||
chapter_category 仅允许:childhood | education | career_early | career_achievement | career_challenge | family | beliefs | summary | **none**
|
||||
(不足以成篇的档案点/纯寒暄 → **none**;与单段分类一致。)
|
||||
|
||||
逐段任务(按下列列表顺序,**segments 数组须覆盖每一行 id,且顺序一致**):
|
||||
{chr(10).join(lines)}
|
||||
|
||||
**JSON 输出**:只输出一个合法 JSON 对象,不要 markdown。格式:
|
||||
{{
|
||||
"segments": [
|
||||
{{
|
||||
"id": "<与输入相同的 segment id>",
|
||||
"detected_stage": "childhood|education|career|family|belief",
|
||||
"slots": {{ "slot_key": "snippet 50 字以内" }},
|
||||
"chapter_category": "childhood|education|career_early|career_achievement|career_challenge|family|beliefs|summary|none"
|
||||
}}
|
||||
]
|
||||
}}
|
||||
|
||||
与单段抽取一致:**仅当 slots 非空时** detected_stage 才按内容推断;若本段无人生经历实质、slots 为空,则 detected_stage 必须等于系统当前跟踪阶段 {system_current_stage}。
|
||||
"""
|
||||
|
||||
|
||||
@@ -220,7 +274,8 @@ def get_creative_title_prompt(
|
||||
要求:
|
||||
1. 格式:「时间标注 · 标题正文」(时间标注可用年龄、年代或阶段,须与上列信息一致;勿编造未出现的年份)。
|
||||
2. 标题正文 **12–18 字**,须概括用户口述或 slots 中已出现的主题/事实;可以用书面化的概括与凝练表达,但**禁止虚构**口述中不存在的人、事、地、物。
|
||||
3. 语言凝练、有回忆录感,不需要平白直叙也不需要堆砌辞藻。
|
||||
3. **标题中的具体事实**(职务升迁链、部队番号驻地、战役名、生死去向等)必须能在正文摘录或其它已给出的 slots 中找到**逐字**依据;不得仅凭阶段名或年龄提示臆补未出现的履历词。
|
||||
4. 语言凝练、有回忆录感,不需要平白直叙也不需要堆砌辞藻。
|
||||
|
||||
只输出标题这一行文字,不要加引号或书名号。
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user