Files
life-echo/api/app/agents/chat/interview_state_hints.py
yangshilin e1341c6d18 feat:
1. 建立问题库大纲,对应每个人生阶段槽位
2. 鼓励使用更生活化的交流语言共情与总结
3. 降低评审模型可能发生截断的概率
4. 成稿质量维度强化情感表达和上下文连贯性
2026-04-09 15:32:35 +08:00

335 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Interview quality helpers: known facts, persona threads, and anti-repeat guard."""
from __future__ import annotations
import re
from collections.abc import Iterable
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
from app.agents.stage_constants import STAGE_DISPLAY_ZH, STAGE_SLOT_KEYS
from app.agents.state_schema import KnownFact, MemoirStateSchema, PersonaThread
_QUESTION_SPLIT_RE = re.compile(r"[?]+")
_SENTENCE_SPLIT_RE = re.compile(r"(?<=[。!?!?])")
_PUNCT_RE = re.compile(r"[\s、“”()《》【】\\[\\],.!?:;\"'`~·…-]+")
_TRAIT_HINTS: tuple[tuple[str, tuple[str, ...]], ...] = (
("执着坚持", ("坚持", "执着", "咬牙", "熬过", "顶住", "训练", "反复")),
("规划目标感", ("计划", "规划", "目标", "打算", "一步步", "安排", "准备")),
("求真较真", ("弄明白", "搞清楚", "想通", "为什么", "较真", "求证")),
("行动力", ("决定", "创业", "开始做", "尝试", "报名", "跑去", "去做")),
(
"家庭责任感",
("家里", "父母", "妈妈", "爸爸", "妻子", "丈夫", "孩子", "照顾", "支持"),
),
("即时反馈驱动", ("反馈", "看到结果", "成就感", "立刻", "马上见效")),
("自由天性", ("自由", "无拘无束", "满世界跑", "疯玩", "", "不管")),
("动手创造", ("自己动手", "", "", "", "", "", "", "生火", "")),
("重感情念旧", ("想起来", "怀念", "舍不得", "还记得", "那时候", "小时候")),
("好胜争先", ("比赛", "", "", "第一", "不服输", "较劲")),
)
_SCENE_CUE_WORDS: tuple[tuple[str, str], ...] = (
("田野", "田野的泥土和青草气息"),
("河里", "河水的凉意"),
("海边", "海风和咸咸的空气"),
("溜冰", "冰面上咔嚓咔嚓的声响"),
("游泳", "一头扎进水里的畅快"),
("烤红薯", "红薯外焦里糯、掰开冒热气的香味"),
("", "火堆噼啪响、烟气里混着食物焦香"),
("打水漂", "石片在水面跳跃、一圈一圈涟漪散开"),
("", "追着跑、手心攥紧怕跑掉的紧张"),
("", "雪花落在脸上化成水珠的凉"),
("", "风灌进领子里的感觉"),
("下雨", "雨点打在屋顶上的声音"),
("自行车", "骑车下坡风呼呼吹过耳朵"),
("火车", "绿皮车厢里混着泡面和橘子皮的味道"),
("学校", "教室里粉笔灰飘在阳光里的样子"),
("考试", "翻卷子时纸张沙沙响"),
("工厂", "机器轰鸣、油污和铁锈的气味"),
("做饭", "锅铲碰锅底的声响、油花溅起来的滋滋声"),
)
def extract_scene_cues(user_message: str) -> list[str]:
msg = (user_message or "").strip()
if not msg:
return []
cues: list[str] = []
for keyword, description in _SCENE_CUE_WORDS:
if keyword in msg:
cues.append(description)
return cues[:3]
_SLOT_REPEAT_PATTERNS: dict[str, tuple[str, ...]] = {
"place": ("哪里长大", "家乡", "老家", "在哪长大", "什么地方长大"),
"people": ("谁对你影响", "家里都有谁", "小时候和谁", "身边有什么人"),
"daily_life": ("平时怎么过", "日常都做什么", "小时候都玩什么"),
"emotion": ("那时候什么感觉", "当时什么感受", "小时候开心吗"),
"turning_event": ("印象最深的事", "难忘的事", "转折"),
"school": ("什么学校", "在哪上学", "读的什么学校", "上什么学校"),
"city": ("在哪个城市", "去了哪里读书", "在哪读书"),
"motivation": ("为什么想学", "为什么选这个", "动力是什么"),
"challenge": ("遇到什么困难", "最大的难处", "辛苦吗"),
"change": ("后来有什么变化", "这件事怎么改变你", "之后有什么不同"),
"job": ("做什么工作", "具体做什么", "工作内容是什么"),
"environment": ("工作环境", "在哪工作", "什么单位"),
"decision": ("为什么做这个决定", "怎么决定的"),
"pressure": ("压力大吗", "最难的时候", "最大的压力"),
"growth": ("学到了什么", "成长在哪里", "后来有什么提升"),
"relationship": ("和家人关系怎么样", "和伴侣关系怎么样"),
"conflict": ("有什么矛盾", "怎么吵起来的", "冲突"),
"support": ("谁支持你", "谁帮过你", "怎么支持你的"),
"responsibility": ("承担什么责任", "家里靠谁", "你主要负责什么"),
"value": ("你最看重什么", "信念是什么", "原则是什么"),
"regret": ("最大的遗憾", "后悔过吗"),
"pride": ("最骄傲的事", "最自豪的事"),
"lesson": ("学到了什么道理", "最大的感悟", "给你什么启发"),
}
def _normalize_text(text: str) -> str:
return _PUNCT_RE.sub("", (text or "").strip().lower())
def _dedupe_keep_last(items: Iterable[str], *, limit: int) -> list[str]:
out: list[str] = []
seen: set[str] = set()
for raw in reversed([str(x).strip() for x in items if str(x).strip()]):
key = _normalize_text(raw)
if not key or key in seen:
continue
seen.add(key)
out.append(raw)
if len(out) >= limit:
break
out.reverse()
return out
def _merge_known_facts(
existing: Iterable[KnownFact],
additions: Iterable[KnownFact],
*,
limit: int = 24,
) -> list[KnownFact]:
merged: dict[tuple[str, str, str], KnownFact] = {}
for item in list(existing) + list(additions):
key = (
(item.stage or "").strip(),
(item.slot_name or "").strip(),
_normalize_text(f"{item.label}:{item.value}"),
)
if not key[2]:
continue
merged[key] = item
values = list(merged.values())[-limit:]
return values
def _merge_persona_threads(
existing: Iterable[PersonaThread],
additions: Iterable[PersonaThread],
*,
limit: int = 12,
) -> list[PersonaThread]:
merged: dict[tuple[str, str], PersonaThread] = {}
for item in list(existing) + list(additions):
key = (_normalize_text(item.trait), _normalize_text(item.evidence))
if not key[0]:
continue
merged[key] = item
values = list(merged.values())[-limit:]
return values
def _trim_sentence(text: str, *, limit: int = 80) -> str:
s = re.sub(r"\s+", " ", (text or "").strip())
if len(s) <= limit:
return s
return s[: limit - 1].rstrip() + ""
def build_runtime_interview_state(
state: MemoirStateSchema,
*,
user_message: str,
active_stage: str,
birth_year: int | None = None,
birth_place: str = "",
grew_up_place: str = "",
occupation: str = "",
) -> MemoirStateSchema:
"""Merge current-turn hints into a prompt-only state view."""
additions: list[KnownFact] = []
if birth_year:
additions.append(
KnownFact(
label="出生年份",
value=f"{birth_year}",
source="profile",
)
)
if birth_place:
additions.append(
KnownFact(
label="出生地",
value=birth_place.strip(),
source="profile",
stage="childhood",
slot_name="place",
)
)
if grew_up_place:
additions.append(
KnownFact(
label="成长地",
value=grew_up_place.strip(),
source="profile",
stage="childhood",
slot_name="place",
)
)
if occupation:
additions.append(
KnownFact(
label="职业背景",
value=occupation.strip(),
source="profile",
stage="career",
slot_name="job",
)
)
msg = _trim_sentence(user_message, limit=120)
if msg:
additions.append(
KnownFact(
label="本轮新信息",
value=msg,
source="current_turn",
stage=active_stage,
)
)
persona_additions: list[PersonaThread] = []
haystack = " ".join(
[msg]
+ [fact.value for fact in state.known_facts[-8:]]
+ list(state.filled_slots_for_stage(active_stage).values())[:4]
)
for trait, markers in _TRAIT_HINTS:
for marker in markers:
if marker and marker in haystack:
persona_additions.append(
PersonaThread(
trait=trait,
evidence=_trim_sentence(
marker if marker in msg else haystack, limit=70
),
source="heuristic",
stage=active_stage,
)
)
break
return state.model_copy(
update={
"known_facts": _merge_known_facts(state.known_facts, additions),
"persona_threads": _merge_persona_threads(
state.persona_threads, persona_additions
),
}
)
def extract_recent_questions(
messages: Iterable[BaseMessage], *, limit: int = 4
) -> list[str]:
questions: list[str] = []
for msg in messages:
if not isinstance(msg, AIMessage):
continue
text = str(getattr(msg, "content", "") or "").strip()
if not text:
continue
for part in _QUESTION_SPLIT_RE.split(text):
part = part.strip()
if not part:
continue
if any(w in text for w in ("", "?")):
questions.append(_trim_sentence(part + "", limit=50))
return _dedupe_keep_last(questions, limit=limit)
def update_recent_questions(
existing: Iterable[str],
generated_segments: Iterable[str],
*,
limit: int = 4,
) -> list[str]:
fresh: list[str] = list(existing)
for seg in generated_segments:
text = str(seg or "").strip()
if not text or ("" not in text and "?" not in text):
continue
parts = [p.strip() for p in _QUESTION_SPLIT_RE.split(text) if p.strip()]
if not parts:
continue
fresh.append(_trim_sentence(parts[-1] + "", limit=50))
return _dedupe_keep_last(fresh, limit=limit)
def apply_duplicate_question_guard(
segments: Iterable[str],
*,
state: MemoirStateSchema,
recent_questions: Iterable[str],
) -> tuple[list[str], bool]:
"""Downgrade obvious repeated-fact questions into acknowledgment-only text."""
recent_norms = {_normalize_text(q) for q in recent_questions if _normalize_text(q)}
known_patterns: list[str] = []
for fact in state.known_facts:
slot_patterns = _SLOT_REPEAT_PATTERNS.get(fact.slot_name or "", ())
known_patterns.extend(slot_patterns)
if fact.label == "本轮新信息":
known_patterns.append(fact.value)
cleaned: list[str] = []
touched = False
for seg in segments:
text = str(seg or "").strip()
if not text:
continue
text_norm = _normalize_text(text)
repeated = False
if ("" in text or "?" in text) and text_norm:
if any(q and (q in text_norm or text_norm in q) for q in recent_norms):
repeated = True
if not repeated:
for pattern in known_patterns:
pat_norm = _normalize_text(pattern)
if pat_norm and pat_norm in text_norm:
repeated = True
break
if repeated:
sentences = [s.strip() for s in _SENTENCE_SPLIT_RE.split(text) if s.strip()]
kept = [s for s in sentences if "" not in s and "?" not in s]
replacement = kept[0] if kept else "这一段我记住了。"
if not replacement.endswith(("", "", "")):
replacement += ""
cleaned.append(replacement)
touched = True
else:
cleaned.append(text)
if not cleaned:
cleaned = ["这一段我记住了。"]
return cleaned, touched
def stage_slot_hint_lines(stage: str) -> list[str]:
keys = STAGE_SLOT_KEYS.get(stage, ())
stage_zh = STAGE_DISPLAY_ZH.get(stage, stage)
return [f"{stage_zh}:{key}" for key in keys]