2026-04-08 21:36:12 +08:00
|
|
|
|
"""Interview quality helpers: known facts, persona threads, and anti-repeat guard."""
|
|
|
|
|
|
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
|
|
import re
|
|
|
|
|
|
from collections.abc import Iterable
|
|
|
|
|
|
|
2026-04-10 15:33:28 +08:00
|
|
|
|
# 与 `apply_duplicate_question_guard` 中整段替换句一致;用于判定是否需触发二次生成。
|
|
|
|
|
|
DUPLICATE_QUESTION_GUARD_FALLBACK_ZH = "这一段我记住了。"
|
|
|
|
|
|
|
2026-04-08 21:36:12 +08:00
|
|
|
|
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
|
|
|
|
|
|
|
|
|
|
|
|
from app.agents.stage_constants import STAGE_DISPLAY_ZH, STAGE_SLOT_KEYS
|
|
|
|
|
|
from app.agents.state_schema import KnownFact, MemoirStateSchema, PersonaThread
|
|
|
|
|
|
|
|
|
|
|
|
_QUESTION_SPLIT_RE = re.compile(r"[??]+")
|
|
|
|
|
|
_SENTENCE_SPLIT_RE = re.compile(r"(?<=[。!?!?])")
|
|
|
|
|
|
_PUNCT_RE = re.compile(r"[\s,。!?;:、“”‘’()()《》【】\\[\\],.!?:;\"'`~·…-]+")
|
2026-04-22 16:56:28 +08:00
|
|
|
|
# 「我演罗密欧」等扮演亲历,但排除「我演示…」类口癖
|
|
|
|
|
|
_AUTOBIO_IYAN_NOT_DEMO_RE = re.compile(r"我演(?!示)")
|
2026-04-08 21:36:12 +08:00
|
|
|
|
|
|
|
|
|
|
_TRAIT_HINTS: tuple[tuple[str, tuple[str, ...]], ...] = (
|
|
|
|
|
|
("执着坚持", ("坚持", "执着", "咬牙", "熬过", "顶住", "训练", "反复")),
|
|
|
|
|
|
("规划目标感", ("计划", "规划", "目标", "打算", "一步步", "安排", "准备")),
|
|
|
|
|
|
("求真较真", ("弄明白", "搞清楚", "想通", "为什么", "较真", "求证")),
|
|
|
|
|
|
("行动力", ("决定", "创业", "开始做", "尝试", "报名", "跑去", "去做")),
|
2026-04-09 15:32:35 +08:00
|
|
|
|
(
|
|
|
|
|
|
"家庭责任感",
|
|
|
|
|
|
("家里", "父母", "妈妈", "爸爸", "妻子", "丈夫", "孩子", "照顾", "支持"),
|
|
|
|
|
|
),
|
2026-04-08 21:36:12 +08:00
|
|
|
|
("即时反馈驱动", ("反馈", "看到结果", "成就感", "立刻", "马上见效")),
|
|
|
|
|
|
("自由天性", ("自由", "无拘无束", "满世界跑", "疯玩", "野", "不管")),
|
|
|
|
|
|
("动手创造", ("自己动手", "搭", "做", "造", "修", "拆", "烤", "生火", "种")),
|
|
|
|
|
|
("重感情念旧", ("想起来", "怀念", "舍不得", "还记得", "那时候", "小时候")),
|
|
|
|
|
|
("好胜争先", ("比赛", "赢", "比", "第一", "不服输", "较劲")),
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
_SCENE_CUE_WORDS: tuple[tuple[str, str], ...] = (
|
|
|
|
|
|
("田野", "田野的泥土和青草气息"),
|
|
|
|
|
|
("河里", "河水的凉意"),
|
|
|
|
|
|
("海边", "海风和咸咸的空气"),
|
|
|
|
|
|
("溜冰", "冰面上咔嚓咔嚓的声响"),
|
|
|
|
|
|
("游泳", "一头扎进水里的畅快"),
|
|
|
|
|
|
("烤红薯", "红薯外焦里糯、掰开冒热气的香味"),
|
|
|
|
|
|
("烤", "火堆噼啪响、烟气里混着食物焦香"),
|
|
|
|
|
|
("打水漂", "石片在水面跳跃、一圈一圈涟漪散开"),
|
|
|
|
|
|
("捉", "追着跑、手心攥紧怕跑掉的紧张"),
|
|
|
|
|
|
("雪", "雪花落在脸上化成水珠的凉"),
|
|
|
|
|
|
("风", "风灌进领子里的感觉"),
|
|
|
|
|
|
("下雨", "雨点打在屋顶上的声音"),
|
|
|
|
|
|
("自行车", "骑车下坡风呼呼吹过耳朵"),
|
|
|
|
|
|
("火车", "绿皮车厢里混着泡面和橘子皮的味道"),
|
|
|
|
|
|
("学校", "教室里粉笔灰飘在阳光里的样子"),
|
|
|
|
|
|
("考试", "翻卷子时纸张沙沙响"),
|
|
|
|
|
|
("工厂", "机器轰鸣、油污和铁锈的气味"),
|
|
|
|
|
|
("做饭", "锅铲碰锅底的声响、油花溅起来的滋滋声"),
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def extract_scene_cues(user_message: str) -> list[str]:
|
|
|
|
|
|
msg = (user_message or "").strip()
|
|
|
|
|
|
if not msg:
|
|
|
|
|
|
return []
|
|
|
|
|
|
cues: list[str] = []
|
|
|
|
|
|
for keyword, description in _SCENE_CUE_WORDS:
|
|
|
|
|
|
if keyword in msg:
|
|
|
|
|
|
cues.append(description)
|
|
|
|
|
|
return cues[:3]
|
|
|
|
|
|
|
2026-04-09 15:32:35 +08:00
|
|
|
|
|
2026-04-08 21:36:12 +08:00
|
|
|
|
_SLOT_REPEAT_PATTERNS: dict[str, tuple[str, ...]] = {
|
|
|
|
|
|
"place": ("哪里长大", "家乡", "老家", "在哪长大", "什么地方长大"),
|
|
|
|
|
|
"people": ("谁对你影响", "家里都有谁", "小时候和谁", "身边有什么人"),
|
|
|
|
|
|
"daily_life": ("平时怎么过", "日常都做什么", "小时候都玩什么"),
|
|
|
|
|
|
"emotion": ("那时候什么感觉", "当时什么感受", "小时候开心吗"),
|
|
|
|
|
|
"turning_event": ("印象最深的事", "难忘的事", "转折"),
|
|
|
|
|
|
"school": ("什么学校", "在哪上学", "读的什么学校", "上什么学校"),
|
|
|
|
|
|
"city": ("在哪个城市", "去了哪里读书", "在哪读书"),
|
|
|
|
|
|
"motivation": ("为什么想学", "为什么选这个", "动力是什么"),
|
|
|
|
|
|
"challenge": ("遇到什么困难", "最大的难处", "辛苦吗"),
|
|
|
|
|
|
"change": ("后来有什么变化", "这件事怎么改变你", "之后有什么不同"),
|
|
|
|
|
|
"job": ("做什么工作", "具体做什么", "工作内容是什么"),
|
|
|
|
|
|
"environment": ("工作环境", "在哪工作", "什么单位"),
|
|
|
|
|
|
"decision": ("为什么做这个决定", "怎么决定的"),
|
|
|
|
|
|
"pressure": ("压力大吗", "最难的时候", "最大的压力"),
|
|
|
|
|
|
"growth": ("学到了什么", "成长在哪里", "后来有什么提升"),
|
|
|
|
|
|
"relationship": ("和家人关系怎么样", "和伴侣关系怎么样"),
|
|
|
|
|
|
"conflict": ("有什么矛盾", "怎么吵起来的", "冲突"),
|
|
|
|
|
|
"support": ("谁支持你", "谁帮过你", "怎么支持你的"),
|
|
|
|
|
|
"responsibility": ("承担什么责任", "家里靠谁", "你主要负责什么"),
|
|
|
|
|
|
"value": ("你最看重什么", "信念是什么", "原则是什么"),
|
|
|
|
|
|
"regret": ("最大的遗憾", "后悔过吗"),
|
|
|
|
|
|
"pride": ("最骄傲的事", "最自豪的事"),
|
|
|
|
|
|
"lesson": ("学到了什么道理", "最大的感悟", "给你什么启发"),
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _normalize_text(text: str) -> str:
|
|
|
|
|
|
return _PUNCT_RE.sub("", (text or "").strip().lower())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _dedupe_keep_last(items: Iterable[str], *, limit: int) -> list[str]:
|
|
|
|
|
|
out: list[str] = []
|
|
|
|
|
|
seen: set[str] = set()
|
|
|
|
|
|
for raw in reversed([str(x).strip() for x in items if str(x).strip()]):
|
|
|
|
|
|
key = _normalize_text(raw)
|
|
|
|
|
|
if not key or key in seen:
|
|
|
|
|
|
continue
|
|
|
|
|
|
seen.add(key)
|
|
|
|
|
|
out.append(raw)
|
|
|
|
|
|
if len(out) >= limit:
|
|
|
|
|
|
break
|
|
|
|
|
|
out.reverse()
|
|
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _merge_known_facts(
|
|
|
|
|
|
existing: Iterable[KnownFact],
|
|
|
|
|
|
additions: Iterable[KnownFact],
|
|
|
|
|
|
*,
|
|
|
|
|
|
limit: int = 24,
|
|
|
|
|
|
) -> list[KnownFact]:
|
|
|
|
|
|
merged: dict[tuple[str, str, str], KnownFact] = {}
|
|
|
|
|
|
for item in list(existing) + list(additions):
|
|
|
|
|
|
key = (
|
|
|
|
|
|
(item.stage or "").strip(),
|
|
|
|
|
|
(item.slot_name or "").strip(),
|
|
|
|
|
|
_normalize_text(f"{item.label}:{item.value}"),
|
|
|
|
|
|
)
|
|
|
|
|
|
if not key[2]:
|
|
|
|
|
|
continue
|
|
|
|
|
|
merged[key] = item
|
|
|
|
|
|
values = list(merged.values())[-limit:]
|
|
|
|
|
|
return values
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _merge_persona_threads(
|
|
|
|
|
|
existing: Iterable[PersonaThread],
|
|
|
|
|
|
additions: Iterable[PersonaThread],
|
|
|
|
|
|
*,
|
|
|
|
|
|
limit: int = 12,
|
|
|
|
|
|
) -> list[PersonaThread]:
|
|
|
|
|
|
merged: dict[tuple[str, str], PersonaThread] = {}
|
|
|
|
|
|
for item in list(existing) + list(additions):
|
|
|
|
|
|
key = (_normalize_text(item.trait), _normalize_text(item.evidence))
|
|
|
|
|
|
if not key[0]:
|
|
|
|
|
|
continue
|
|
|
|
|
|
merged[key] = item
|
|
|
|
|
|
values = list(merged.values())[-limit:]
|
|
|
|
|
|
return values
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _trim_sentence(text: str, *, limit: int = 80) -> str:
|
|
|
|
|
|
s = re.sub(r"\s+", " ", (text or "").strip())
|
|
|
|
|
|
if len(s) <= limit:
|
|
|
|
|
|
return s
|
|
|
|
|
|
return s[: limit - 1].rstrip() + "…"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def build_runtime_interview_state(
|
|
|
|
|
|
state: MemoirStateSchema,
|
|
|
|
|
|
*,
|
|
|
|
|
|
user_message: str,
|
|
|
|
|
|
active_stage: str,
|
|
|
|
|
|
birth_year: int | None = None,
|
|
|
|
|
|
birth_place: str = "",
|
|
|
|
|
|
grew_up_place: str = "",
|
|
|
|
|
|
occupation: str = "",
|
|
|
|
|
|
) -> MemoirStateSchema:
|
|
|
|
|
|
"""Merge current-turn hints into a prompt-only state view."""
|
|
|
|
|
|
additions: list[KnownFact] = []
|
|
|
|
|
|
if birth_year:
|
|
|
|
|
|
additions.append(
|
|
|
|
|
|
KnownFact(
|
|
|
|
|
|
label="出生年份",
|
|
|
|
|
|
value=f"{birth_year}年",
|
|
|
|
|
|
source="profile",
|
|
|
|
|
|
)
|
|
|
|
|
|
)
|
|
|
|
|
|
if birth_place:
|
|
|
|
|
|
additions.append(
|
|
|
|
|
|
KnownFact(
|
|
|
|
|
|
label="出生地",
|
|
|
|
|
|
value=birth_place.strip(),
|
|
|
|
|
|
source="profile",
|
|
|
|
|
|
stage="childhood",
|
|
|
|
|
|
slot_name="place",
|
|
|
|
|
|
)
|
|
|
|
|
|
)
|
|
|
|
|
|
if grew_up_place:
|
|
|
|
|
|
additions.append(
|
|
|
|
|
|
KnownFact(
|
|
|
|
|
|
label="成长地",
|
|
|
|
|
|
value=grew_up_place.strip(),
|
|
|
|
|
|
source="profile",
|
|
|
|
|
|
stage="childhood",
|
|
|
|
|
|
slot_name="place",
|
|
|
|
|
|
)
|
|
|
|
|
|
)
|
|
|
|
|
|
if occupation:
|
|
|
|
|
|
additions.append(
|
|
|
|
|
|
KnownFact(
|
|
|
|
|
|
label="职业背景",
|
|
|
|
|
|
value=occupation.strip(),
|
|
|
|
|
|
source="profile",
|
|
|
|
|
|
stage="career",
|
|
|
|
|
|
slot_name="job",
|
|
|
|
|
|
)
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
msg = _trim_sentence(user_message, limit=120)
|
|
|
|
|
|
if msg:
|
|
|
|
|
|
additions.append(
|
|
|
|
|
|
KnownFact(
|
|
|
|
|
|
label="本轮新信息",
|
|
|
|
|
|
value=msg,
|
|
|
|
|
|
source="current_turn",
|
|
|
|
|
|
stage=active_stage,
|
|
|
|
|
|
)
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
persona_additions: list[PersonaThread] = []
|
|
|
|
|
|
haystack = " ".join(
|
|
|
|
|
|
[msg]
|
|
|
|
|
|
+ [fact.value for fact in state.known_facts[-8:]]
|
|
|
|
|
|
+ list(state.filled_slots_for_stage(active_stage).values())[:4]
|
|
|
|
|
|
)
|
|
|
|
|
|
for trait, markers in _TRAIT_HINTS:
|
|
|
|
|
|
for marker in markers:
|
|
|
|
|
|
if marker and marker in haystack:
|
|
|
|
|
|
persona_additions.append(
|
|
|
|
|
|
PersonaThread(
|
|
|
|
|
|
trait=trait,
|
2026-04-09 15:32:35 +08:00
|
|
|
|
evidence=_trim_sentence(
|
|
|
|
|
|
marker if marker in msg else haystack, limit=70
|
|
|
|
|
|
),
|
2026-04-08 21:36:12 +08:00
|
|
|
|
source="heuristic",
|
|
|
|
|
|
stage=active_stage,
|
|
|
|
|
|
)
|
|
|
|
|
|
)
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
return state.model_copy(
|
|
|
|
|
|
update={
|
|
|
|
|
|
"known_facts": _merge_known_facts(state.known_facts, additions),
|
|
|
|
|
|
"persona_threads": _merge_persona_threads(
|
|
|
|
|
|
state.persona_threads, persona_additions
|
|
|
|
|
|
),
|
|
|
|
|
|
}
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-04-09 15:32:35 +08:00
|
|
|
|
def extract_recent_questions(
|
|
|
|
|
|
messages: Iterable[BaseMessage], *, limit: int = 4
|
|
|
|
|
|
) -> list[str]:
|
2026-04-08 21:36:12 +08:00
|
|
|
|
questions: list[str] = []
|
|
|
|
|
|
for msg in messages:
|
|
|
|
|
|
if not isinstance(msg, AIMessage):
|
|
|
|
|
|
continue
|
|
|
|
|
|
text = str(getattr(msg, "content", "") or "").strip()
|
|
|
|
|
|
if not text:
|
|
|
|
|
|
continue
|
|
|
|
|
|
for part in _QUESTION_SPLIT_RE.split(text):
|
|
|
|
|
|
part = part.strip()
|
|
|
|
|
|
if not part:
|
|
|
|
|
|
continue
|
|
|
|
|
|
if any(w in text for w in ("?", "?")):
|
|
|
|
|
|
questions.append(_trim_sentence(part + "?", limit=50))
|
|
|
|
|
|
return _dedupe_keep_last(questions, limit=limit)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def update_recent_questions(
|
|
|
|
|
|
existing: Iterable[str],
|
|
|
|
|
|
generated_segments: Iterable[str],
|
|
|
|
|
|
*,
|
|
|
|
|
|
limit: int = 4,
|
|
|
|
|
|
) -> list[str]:
|
|
|
|
|
|
fresh: list[str] = list(existing)
|
|
|
|
|
|
for seg in generated_segments:
|
|
|
|
|
|
text = str(seg or "").strip()
|
|
|
|
|
|
if not text or ("?" not in text and "?" not in text):
|
|
|
|
|
|
continue
|
|
|
|
|
|
parts = [p.strip() for p in _QUESTION_SPLIT_RE.split(text) if p.strip()]
|
|
|
|
|
|
if not parts:
|
|
|
|
|
|
continue
|
|
|
|
|
|
fresh.append(_trim_sentence(parts[-1] + "?", limit=50))
|
|
|
|
|
|
return _dedupe_keep_last(fresh, limit=limit)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def apply_duplicate_question_guard(
|
|
|
|
|
|
segments: Iterable[str],
|
|
|
|
|
|
*,
|
|
|
|
|
|
state: MemoirStateSchema,
|
|
|
|
|
|
recent_questions: Iterable[str],
|
|
|
|
|
|
) -> tuple[list[str], bool]:
|
|
|
|
|
|
"""Downgrade obvious repeated-fact questions into acknowledgment-only text."""
|
|
|
|
|
|
recent_norms = {_normalize_text(q) for q in recent_questions if _normalize_text(q)}
|
|
|
|
|
|
known_patterns: list[str] = []
|
|
|
|
|
|
for fact in state.known_facts:
|
|
|
|
|
|
slot_patterns = _SLOT_REPEAT_PATTERNS.get(fact.slot_name or "", ())
|
|
|
|
|
|
known_patterns.extend(slot_patterns)
|
|
|
|
|
|
if fact.label == "本轮新信息":
|
|
|
|
|
|
known_patterns.append(fact.value)
|
|
|
|
|
|
cleaned: list[str] = []
|
|
|
|
|
|
touched = False
|
|
|
|
|
|
for seg in segments:
|
|
|
|
|
|
text = str(seg or "").strip()
|
|
|
|
|
|
if not text:
|
|
|
|
|
|
continue
|
|
|
|
|
|
text_norm = _normalize_text(text)
|
|
|
|
|
|
repeated = False
|
|
|
|
|
|
if ("?" in text or "?" in text) and text_norm:
|
|
|
|
|
|
if any(q and (q in text_norm or text_norm in q) for q in recent_norms):
|
|
|
|
|
|
repeated = True
|
|
|
|
|
|
if not repeated:
|
|
|
|
|
|
for pattern in known_patterns:
|
|
|
|
|
|
pat_norm = _normalize_text(pattern)
|
|
|
|
|
|
if pat_norm and pat_norm in text_norm:
|
|
|
|
|
|
repeated = True
|
|
|
|
|
|
break
|
|
|
|
|
|
if repeated:
|
|
|
|
|
|
sentences = [s.strip() for s in _SENTENCE_SPLIT_RE.split(text) if s.strip()]
|
|
|
|
|
|
kept = [s for s in sentences if "?" not in s and "?" not in s]
|
2026-04-10 15:33:28 +08:00
|
|
|
|
replacement = kept[0] if kept else DUPLICATE_QUESTION_GUARD_FALLBACK_ZH
|
2026-04-08 21:36:12 +08:00
|
|
|
|
if not replacement.endswith(("。", "!", "…")):
|
|
|
|
|
|
replacement += "。"
|
|
|
|
|
|
cleaned.append(replacement)
|
|
|
|
|
|
touched = True
|
|
|
|
|
|
else:
|
|
|
|
|
|
cleaned.append(text)
|
|
|
|
|
|
if not cleaned:
|
2026-04-10 15:33:28 +08:00
|
|
|
|
cleaned = [DUPLICATE_QUESTION_GUARD_FALLBACK_ZH]
|
2026-04-08 21:36:12 +08:00
|
|
|
|
return cleaned, touched
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-04-10 15:33:28 +08:00
|
|
|
|
def segments_are_only_duplicate_guard_fallback(segments: Iterable[str]) -> bool:
|
|
|
|
|
|
"""是否为「仅兜底_ack、无实质承接」——适合再打一枪模型。"""
|
|
|
|
|
|
parts = [str(s or "").strip() for s in segments if str(s or "").strip()]
|
|
|
|
|
|
return len(parts) == 1 and parts[0] == DUPLICATE_QUESTION_GUARD_FALLBACK_ZH
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-04-22 16:56:28 +08:00
|
|
|
|
# 助手可见回复中,明显声称「我本人有过某种人生经历」的高置信子串(偏保守、宁可漏网不误伤泛化共情)。
|
|
|
|
|
|
_AUTOBIOGRAPHICAL_MARKERS_ZH: tuple[str, ...] = (
|
|
|
|
|
|
"我小时候",
|
|
|
|
|
|
"我小学",
|
|
|
|
|
|
"我中学",
|
|
|
|
|
|
"我初中",
|
|
|
|
|
|
"我高中",
|
|
|
|
|
|
"我大学",
|
|
|
|
|
|
"我上学那",
|
|
|
|
|
|
"我念书",
|
|
|
|
|
|
"我读书那",
|
|
|
|
|
|
"我暗恋",
|
|
|
|
|
|
"我当时暗恋",
|
|
|
|
|
|
"我爸妈",
|
|
|
|
|
|
"我父亲",
|
|
|
|
|
|
"我母亲",
|
|
|
|
|
|
"我爹",
|
|
|
|
|
|
"我妈",
|
|
|
|
|
|
"我爷爷",
|
|
|
|
|
|
"我奶奶",
|
|
|
|
|
|
"我外公",
|
|
|
|
|
|
"我外婆",
|
|
|
|
|
|
"我前任",
|
|
|
|
|
|
"我老公",
|
|
|
|
|
|
"我老婆",
|
|
|
|
|
|
"我丈夫",
|
|
|
|
|
|
"我妻子",
|
|
|
|
|
|
"我男友",
|
|
|
|
|
|
"我女友",
|
|
|
|
|
|
"我对象",
|
|
|
|
|
|
"我儿子",
|
|
|
|
|
|
"我女儿",
|
|
|
|
|
|
"我孩子",
|
|
|
|
|
|
"我以前也",
|
|
|
|
|
|
"我当时也",
|
|
|
|
|
|
"我那时候也",
|
|
|
|
|
|
"我也经历过",
|
|
|
|
|
|
"我也有过",
|
|
|
|
|
|
"我也演过",
|
|
|
|
|
|
"我也上台",
|
|
|
|
|
|
"我演过",
|
|
|
|
|
|
"我饰演",
|
|
|
|
|
|
"我演出",
|
|
|
|
|
|
"我演的是",
|
|
|
|
|
|
"我演的",
|
|
|
|
|
|
"我扮演",
|
|
|
|
|
|
"感觉我熟",
|
|
|
|
|
|
"这我熟",
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
AUTOBIOGRAPHICAL_BOUNDARY_FALLBACK_ZH = (
|
|
|
|
|
|
"你刚说的这段很有画面,我想多听你讲讲那时候你心里是什么感觉。"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _segment_has_autobiographical_claim_zh(text: str) -> bool:
|
|
|
|
|
|
s = (text or "").strip()
|
|
|
|
|
|
if not s:
|
|
|
|
|
|
return False
|
|
|
|
|
|
if _AUTOBIO_IYAN_NOT_DEMO_RE.search(s):
|
|
|
|
|
|
return True
|
|
|
|
|
|
return any(m and m in s for m in _AUTOBIOGRAPHICAL_MARKERS_ZH)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def apply_autobiographical_boundary_guard(
|
|
|
|
|
|
segments: Iterable[str],
|
|
|
|
|
|
) -> tuple[list[str], bool]:
|
|
|
|
|
|
"""将明显带有「助手自传式经历」的段落替换为中性承接,避免身份越界。"""
|
|
|
|
|
|
cleaned: list[str] = []
|
|
|
|
|
|
touched = False
|
|
|
|
|
|
for seg in segments:
|
|
|
|
|
|
text = str(seg or "").strip()
|
|
|
|
|
|
if not text:
|
|
|
|
|
|
continue
|
|
|
|
|
|
if _segment_has_autobiographical_claim_zh(text):
|
|
|
|
|
|
cleaned.append(AUTOBIOGRAPHICAL_BOUNDARY_FALLBACK_ZH)
|
|
|
|
|
|
touched = True
|
|
|
|
|
|
else:
|
|
|
|
|
|
cleaned.append(text)
|
|
|
|
|
|
if not cleaned:
|
|
|
|
|
|
cleaned = [AUTOBIOGRAPHICAL_BOUNDARY_FALLBACK_ZH]
|
|
|
|
|
|
touched = True
|
|
|
|
|
|
return cleaned, touched
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-04-08 21:36:12 +08:00
|
|
|
|
def stage_slot_hint_lines(stage: str) -> list[str]:
|
|
|
|
|
|
keys = STAGE_SLOT_KEYS.get(stage, ())
|
|
|
|
|
|
stage_zh = STAGE_DISPLAY_ZH.get(stage, stage)
|
|
|
|
|
|
return [f"{stage_zh}:{key}" for key in keys]
|