Files
life-echo/api/app/agents/chat/interview_state_hints.py
Kevin 71fbd39e32 feat(api)!: memory single chain — async MemoryService, strict eval closure
Route all memory ingest/retrieve/enrichment/compaction through async MemoryService.
Remove legacy sync memory implementations (ingest/retrieve/compaction); Celery and
memoir Phase2 call asyncio.run into MemoryService-backed helpers.

Memoir Phase1 batch ingest uses MemoryService.ingest_transcripts_batch; drop chapters.
evidence_bundle_json mirror (Alembic 0015). Evaluation uses snapshot/link-only bundles;
raise EvidenceClosureMissing instead of partial/fallback lineage tiers.

Split memoir state into NarrativeCoverageState and InterviewControlState; delete the
_interview_meta_store adapter layer. Remove rolling-query and recent-fact fallback
settings from config and evidence assembly.

Update judges, docs, tests, and PlaygroundPage alignment.

Made-with: Cursor
2026-04-30 14:11:50 +08:00

437 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Interview quality helpers: known facts, persona threads, and anti-repeat guard."""
from __future__ import annotations
import re
from collections.abc import Iterable
from langchain_core.messages import AIMessage, BaseMessage
from app.agents.stage_constants import STAGE_DISPLAY_ZH, STAGE_SLOT_KEYS
from app.agents.state_schema import (
KnownFact,
MemoirStateSchema,
PersonaThread,
narrative_coverage_state,
)
# 与 `apply_duplicate_question_guard` 中整段替换句一致;用于判定是否需触发二次生成。
DUPLICATE_QUESTION_GUARD_FALLBACK_ZH = "这一段我记住了。"
_QUESTION_SPLIT_RE = re.compile(r"[?]+")
_SENTENCE_SPLIT_RE = re.compile(r"(?<=[。!?!?])")
_PUNCT_RE = re.compile(r"[\s、“”()《》【】\\[\\],.!?:;\"'`~·…-]+")
# 「我演罗密欧」等扮演亲历,但排除「我演示…」类口癖
_AUTOBIO_IYAN_NOT_DEMO_RE = re.compile(r"我演(?!示)")
_TRAIT_HINTS: tuple[tuple[str, tuple[str, ...]], ...] = (
("执着坚持", ("坚持", "执着", "咬牙", "熬过", "顶住", "训练", "反复")),
("规划目标感", ("计划", "规划", "目标", "打算", "一步步", "安排", "准备")),
("求真较真", ("弄明白", "搞清楚", "想通", "为什么", "较真", "求证")),
("行动力", ("决定", "创业", "开始做", "尝试", "报名", "跑去", "去做")),
(
"家庭责任感",
("家里", "父母", "妈妈", "爸爸", "妻子", "丈夫", "孩子", "照顾", "支持"),
),
("即时反馈驱动", ("反馈", "看到结果", "成就感", "立刻", "马上见效")),
("自由天性", ("自由", "无拘无束", "满世界跑", "疯玩", "", "不管")),
("动手创造", ("自己动手", "", "", "", "", "", "", "生火", "")),
("重感情念旧", ("想起来", "怀念", "舍不得", "还记得", "那时候", "小时候")),
("好胜争先", ("比赛", "", "", "第一", "不服输", "较劲")),
)
_SCENE_CUE_WORDS: tuple[tuple[str, str], ...] = (
("田野", "田野的泥土和青草气息"),
("河里", "河水的凉意"),
("海边", "海风和咸咸的空气"),
("溜冰", "冰面上咔嚓咔嚓的声响"),
("游泳", "一头扎进水里的畅快"),
("烤红薯", "红薯外焦里糯、掰开冒热气的香味"),
("", "火堆噼啪响、烟气里混着食物焦香"),
("打水漂", "石片在水面跳跃、一圈一圈涟漪散开"),
("", "追着跑、手心攥紧怕跑掉的紧张"),
("", "雪花落在脸上化成水珠的凉"),
("", "风灌进领子里的感觉"),
("下雨", "雨点打在屋顶上的声音"),
("自行车", "骑车下坡风呼呼吹过耳朵"),
("火车", "绿皮车厢里混着泡面和橘子皮的味道"),
("学校", "教室里粉笔灰飘在阳光里的样子"),
("考试", "翻卷子时纸张沙沙响"),
("工厂", "机器轰鸣、油污和铁锈的气味"),
("做饭", "锅铲碰锅底的声响、油花溅起来的滋滋声"),
)
def extract_scene_cues(user_message: str) -> list[str]:
msg = (user_message or "").strip()
if not msg:
return []
cues: list[str] = []
for keyword, description in _SCENE_CUE_WORDS:
if keyword in msg:
cues.append(description)
return cues[:3]
_SLOT_REPEAT_PATTERNS: dict[str, tuple[str, ...]] = {
"place": ("哪里长大", "家乡", "老家", "在哪长大", "什么地方长大"),
"people": ("谁对你影响", "家里都有谁", "小时候和谁", "身边有什么人"),
"daily_life": ("平时怎么过", "日常都做什么", "小时候都玩什么"),
"emotion": ("那时候什么感觉", "当时什么感受", "小时候开心吗"),
"turning_event": ("印象最深的事", "难忘的事", "转折"),
"school": ("什么学校", "在哪上学", "读的什么学校", "上什么学校"),
"city": ("在哪个城市", "去了哪里读书", "在哪读书"),
"motivation": ("为什么想学", "为什么选这个", "动力是什么"),
"challenge": ("遇到什么困难", "最大的难处", "辛苦吗"),
"change": ("后来有什么变化", "这件事怎么改变你", "之后有什么不同"),
"job": ("做什么工作", "具体做什么", "工作内容是什么"),
"environment": ("工作环境", "在哪工作", "什么单位"),
"decision": ("为什么做这个决定", "怎么决定的"),
"pressure": ("压力大吗", "最难的时候", "最大的压力"),
"growth": ("学到了什么", "成长在哪里", "后来有什么提升"),
"relationship": ("和家人关系怎么样", "和伴侣关系怎么样"),
"conflict": ("有什么矛盾", "怎么吵起来的", "冲突"),
"support": ("谁支持你", "谁帮过你", "怎么支持你的"),
"responsibility": ("承担什么责任", "家里靠谁", "你主要负责什么"),
"value": ("你最看重什么", "信念是什么", "原则是什么"),
"regret": ("最大的遗憾", "后悔过吗"),
"pride": ("最骄傲的事", "最自豪的事"),
"lesson": ("学到了什么道理", "最大的感悟", "给你什么启发"),
}
def _normalize_text(text: str) -> str:
return _PUNCT_RE.sub("", (text or "").strip().lower())
def _dedupe_keep_last(items: Iterable[str], *, limit: int) -> list[str]:
out: list[str] = []
seen: set[str] = set()
for raw in reversed([str(x).strip() for x in items if str(x).strip()]):
key = _normalize_text(raw)
if not key or key in seen:
continue
seen.add(key)
out.append(raw)
if len(out) >= limit:
break
out.reverse()
return out
def _merge_known_facts(
existing: Iterable[KnownFact],
additions: Iterable[KnownFact],
*,
limit: int = 24,
) -> list[KnownFact]:
merged: dict[tuple[str, str, str], KnownFact] = {}
for item in list(existing) + list(additions):
key = (
(item.stage or "").strip(),
(item.slot_name or "").strip(),
_normalize_text(f"{item.label}:{item.value}"),
)
if not key[2]:
continue
merged[key] = item
values = list(merged.values())[-limit:]
return values
def _merge_persona_threads(
existing: Iterable[PersonaThread],
additions: Iterable[PersonaThread],
*,
limit: int = 12,
) -> list[PersonaThread]:
merged: dict[tuple[str, str], PersonaThread] = {}
for item in list(existing) + list(additions):
key = (_normalize_text(item.trait), _normalize_text(item.evidence))
if not key[0]:
continue
merged[key] = item
values = list(merged.values())[-limit:]
return values
def _trim_sentence(text: str, *, limit: int = 80) -> str:
s = re.sub(r"\s+", " ", (text or "").strip())
if len(s) <= limit:
return s
return s[: limit - 1].rstrip() + ""
def build_runtime_interview_state(
state: MemoirStateSchema,
*,
user_message: str,
active_stage: str,
birth_year: int | None = None,
birth_place: str = "",
grew_up_place: str = "",
occupation: str = "",
) -> MemoirStateSchema:
"""Merge current-turn hints into a prompt-only state view."""
additions: list[KnownFact] = []
if birth_year:
additions.append(
KnownFact(
label="出生年份",
value=f"{birth_year}",
source="profile",
)
)
if birth_place:
additions.append(
KnownFact(
label="出生地",
value=birth_place.strip(),
source="profile",
stage="childhood",
slot_name="place",
)
)
if grew_up_place:
additions.append(
KnownFact(
label="成长地",
value=grew_up_place.strip(),
source="profile",
stage="childhood",
slot_name="place",
)
)
if occupation:
additions.append(
KnownFact(
label="职业背景",
value=occupation.strip(),
source="profile",
stage="career",
slot_name="job",
)
)
msg = _trim_sentence(user_message, limit=120)
if msg:
additions.append(
KnownFact(
label="本轮新信息",
value=msg,
source="current_turn",
stage=active_stage,
)
)
persona_additions: list[PersonaThread] = []
narrative_state = narrative_coverage_state(state)
haystack = " ".join(
[msg]
+ [fact.value for fact in state.known_facts[-8:]]
+ list(narrative_state.filled_slots_for_stage(active_stage).values())[:4]
)
for trait, markers in _TRAIT_HINTS:
for marker in markers:
if marker and marker in haystack:
persona_additions.append(
PersonaThread(
trait=trait,
evidence=_trim_sentence(
marker if marker in msg else haystack, limit=70
),
source="heuristic",
stage=active_stage,
)
)
break
return state.model_copy(
update={
"known_facts": _merge_known_facts(state.known_facts, additions),
"persona_threads": _merge_persona_threads(
state.persona_threads, persona_additions
),
}
)
def extract_recent_questions(
messages: Iterable[BaseMessage], *, limit: int = 4
) -> list[str]:
questions: list[str] = []
for msg in messages:
if not isinstance(msg, AIMessage):
continue
text = str(getattr(msg, "content", "") or "").strip()
if not text:
continue
for part in _QUESTION_SPLIT_RE.split(text):
part = part.strip()
if not part:
continue
if any(w in text for w in ("", "?")):
questions.append(_trim_sentence(part + "", limit=50))
return _dedupe_keep_last(questions, limit=limit)
def update_recent_questions(
existing: Iterable[str],
generated_segments: Iterable[str],
*,
limit: int = 4,
) -> list[str]:
fresh: list[str] = list(existing)
for seg in generated_segments:
text = str(seg or "").strip()
if not text or ("" not in text and "?" not in text):
continue
parts = [p.strip() for p in _QUESTION_SPLIT_RE.split(text) if p.strip()]
if not parts:
continue
fresh.append(_trim_sentence(parts[-1] + "", limit=50))
return _dedupe_keep_last(fresh, limit=limit)
def apply_duplicate_question_guard(
segments: Iterable[str],
*,
state: MemoirStateSchema,
recent_questions: Iterable[str],
) -> tuple[list[str], bool]:
"""Downgrade obvious repeated-fact questions into acknowledgment-only text."""
recent_norms = {_normalize_text(q) for q in recent_questions if _normalize_text(q)}
known_patterns: list[str] = []
for fact in state.known_facts:
slot_patterns = _SLOT_REPEAT_PATTERNS.get(fact.slot_name or "", ())
known_patterns.extend(slot_patterns)
if fact.label == "本轮新信息":
known_patterns.append(fact.value)
cleaned: list[str] = []
touched = False
for seg in segments:
text = str(seg or "").strip()
if not text:
continue
text_norm = _normalize_text(text)
repeated = False
if ("" in text or "?" in text) and text_norm:
if any(q and (q in text_norm or text_norm in q) for q in recent_norms):
repeated = True
if not repeated:
for pattern in known_patterns:
pat_norm = _normalize_text(pattern)
if pat_norm and pat_norm in text_norm:
repeated = True
break
if repeated:
sentences = [s.strip() for s in _SENTENCE_SPLIT_RE.split(text) if s.strip()]
kept = [s for s in sentences if "" not in s and "?" not in s]
replacement = kept[0] if kept else DUPLICATE_QUESTION_GUARD_FALLBACK_ZH
if not replacement.endswith(("", "", "")):
replacement += ""
cleaned.append(replacement)
touched = True
else:
cleaned.append(text)
if not cleaned:
cleaned = [DUPLICATE_QUESTION_GUARD_FALLBACK_ZH]
return cleaned, touched
def segments_are_only_duplicate_guard_fallback(segments: Iterable[str]) -> bool:
"""是否为「仅兜底_ack、无实质承接」——适合再打一枪模型。"""
parts = [str(s or "").strip() for s in segments if str(s or "").strip()]
return len(parts) == 1 and parts[0] == DUPLICATE_QUESTION_GUARD_FALLBACK_ZH
# 助手可见回复中,明显声称「我本人有过某种人生经历」的高置信子串(偏保守、宁可漏网不误伤泛化共情)。
_AUTOBIOGRAPHICAL_MARKERS_ZH: tuple[str, ...] = (
"我小时候",
"我小学",
"我中学",
"我初中",
"我高中",
"我大学",
"我上学那",
"我念书",
"我读书那",
"我暗恋",
"我当时暗恋",
"我爸妈",
"我父亲",
"我母亲",
"我爹",
"我妈",
"我爷爷",
"我奶奶",
"我外公",
"我外婆",
"我前任",
"我老公",
"我老婆",
"我丈夫",
"我妻子",
"我男友",
"我女友",
"我对象",
"我儿子",
"我女儿",
"我孩子",
"我以前也",
"我当时也",
"我那时候也",
"我也经历过",
"我也有过",
"我也演过",
"我也上台",
"我演过",
"我饰演",
"我演出",
"我演的是",
"我演的",
"我扮演",
"感觉我熟",
"这我熟",
)
AUTOBIOGRAPHICAL_BOUNDARY_FALLBACK_ZH = (
"你刚说的这段很有画面,我想多听你讲讲那时候你心里是什么感觉。"
)
def _segment_has_autobiographical_claim_zh(text: str) -> bool:
s = (text or "").strip()
if not s:
return False
if _AUTOBIO_IYAN_NOT_DEMO_RE.search(s):
return True
return any(m and m in s for m in _AUTOBIOGRAPHICAL_MARKERS_ZH)
def apply_autobiographical_boundary_guard(
segments: Iterable[str],
) -> tuple[list[str], bool]:
"""将明显带有「助手自传式经历」的段落替换为中性承接,避免身份越界。"""
cleaned: list[str] = []
touched = False
for seg in segments:
text = str(seg or "").strip()
if not text:
continue
if _segment_has_autobiographical_claim_zh(text):
cleaned.append(AUTOBIOGRAPHICAL_BOUNDARY_FALLBACK_ZH)
touched = True
else:
cleaned.append(text)
if not cleaned:
cleaned = [AUTOBIOGRAPHICAL_BOUNDARY_FALLBACK_ZH]
touched = True
return cleaned, touched
def stage_slot_hint_lines(stage: str) -> list[str]:
keys = STAGE_SLOT_KEYS.get(stage, ())
stage_zh = STAGE_DISPLAY_ZH.get(stage, stage)
return [f"{stage_zh}:{key}" for key in keys]