api/app/agents/chat/interview_state_hints.py

"""Interview quality helpers: known facts, persona threads, and anti-repeat guard."""

from __future__ import annotations

import re
from collections.abc import Iterable

# 与 `apply_duplicate_question_guard` 中整段替换句一致；用于判定是否需触发二次生成。
DUPLICATE_QUESTION_GUARD_FALLBACK_ZH = "这一段我记住了。"

from langchain_core.messages import AIMessage, BaseMessage, HumanMessage

from app.agents.stage_constants import STAGE_DISPLAY_ZH, STAGE_SLOT_KEYS
from app.agents.state_schema import KnownFact, MemoirStateSchema, PersonaThread

_QUESTION_SPLIT_RE = re.compile(r"[?？]+")
_SENTENCE_SPLIT_RE = re.compile(r"(?<=[。！？!?])")
_PUNCT_RE = re.compile(r"[\s，。！？；：、“”‘’（）()《》【】\\[\\],.!?:;\"'`~·…-]+")
# 「我演罗密欧」等扮演亲历，但排除「我演示…」类口癖
_AUTOBIO_IYAN_NOT_DEMO_RE = re.compile(r"我演(?!示)")

_TRAIT_HINTS: tuple[tuple[str, tuple[str, ...]], ...] = (
    ("执着坚持", ("坚持", "执着", "咬牙", "熬过", "顶住", "训练", "反复")),
    ("规划目标感", ("计划", "规划", "目标", "打算", "一步步", "安排", "准备")),
    ("求真较真", ("弄明白", "搞清楚", "想通", "为什么", "较真", "求证")),
    ("行动力", ("决定", "创业", "开始做", "尝试", "报名", "跑去", "去做")),
    (
        "家庭责任感",
        ("家里", "父母", "妈妈", "爸爸", "妻子", "丈夫", "孩子", "照顾", "支持"),
    ),
    ("即时反馈驱动", ("反馈", "看到结果", "成就感", "立刻", "马上见效")),
    ("自由天性", ("自由", "无拘无束", "满世界跑", "疯玩", "野", "不管")),
    ("动手创造", ("自己动手", "搭", "做", "造", "修", "拆", "烤", "生火", "种")),
    ("重感情念旧", ("想起来", "怀念", "舍不得", "还记得", "那时候", "小时候")),
    ("好胜争先", ("比赛", "赢", "比", "第一", "不服输", "较劲")),
)

_SCENE_CUE_WORDS: tuple[tuple[str, str], ...] = (
    ("田野", "田野的泥土和青草气息"),
    ("河里", "河水的凉意"),
    ("海边", "海风和咸咸的空气"),
    ("溜冰", "冰面上咔嚓咔嚓的声响"),
    ("游泳", "一头扎进水里的畅快"),
    ("烤红薯", "红薯外焦里糯、掰开冒热气的香味"),
    ("烤", "火堆噼啪响、烟气里混着食物焦香"),
    ("打水漂", "石片在水面跳跃、一圈一圈涟漪散开"),
    ("捉", "追着跑、手心攥紧怕跑掉的紧张"),
    ("雪", "雪花落在脸上化成水珠的凉"),
    ("风", "风灌进领子里的感觉"),
    ("下雨", "雨点打在屋顶上的声音"),
    ("自行车", "骑车下坡风呼呼吹过耳朵"),
    ("火车", "绿皮车厢里混着泡面和橘子皮的味道"),
    ("学校", "教室里粉笔灰飘在阳光里的样子"),
    ("考试", "翻卷子时纸张沙沙响"),
    ("工厂", "机器轰鸣、油污和铁锈的气味"),
    ("做饭", "锅铲碰锅底的声响、油花溅起来的滋滋声"),
)


def extract_scene_cues(user_message: str) -> list[str]:
    msg = (user_message or "").strip()
    if not msg:
        return []
    cues: list[str] = []
    for keyword, description in _SCENE_CUE_WORDS:
        if keyword in msg:
            cues.append(description)
    return cues[:3]


_SLOT_REPEAT_PATTERNS: dict[str, tuple[str, ...]] = {
    "place": ("哪里长大", "家乡", "老家", "在哪长大", "什么地方长大"),
    "people": ("谁对你影响", "家里都有谁", "小时候和谁", "身边有什么人"),
    "daily_life": ("平时怎么过", "日常都做什么", "小时候都玩什么"),
    "emotion": ("那时候什么感觉", "当时什么感受", "小时候开心吗"),
    "turning_event": ("印象最深的事", "难忘的事", "转折"),
    "school": ("什么学校", "在哪上学", "读的什么学校", "上什么学校"),
    "city": ("在哪个城市", "去了哪里读书", "在哪读书"),
    "motivation": ("为什么想学", "为什么选这个", "动力是什么"),
    "challenge": ("遇到什么困难", "最大的难处", "辛苦吗"),
    "change": ("后来有什么变化", "这件事怎么改变你", "之后有什么不同"),
    "job": ("做什么工作", "具体做什么", "工作内容是什么"),
    "environment": ("工作环境", "在哪工作", "什么单位"),
    "decision": ("为什么做这个决定", "怎么决定的"),
    "pressure": ("压力大吗", "最难的时候", "最大的压力"),
    "growth": ("学到了什么", "成长在哪里", "后来有什么提升"),
    "relationship": ("和家人关系怎么样", "和伴侣关系怎么样"),
    "conflict": ("有什么矛盾", "怎么吵起来的", "冲突"),
    "support": ("谁支持你", "谁帮过你", "怎么支持你的"),
    "responsibility": ("承担什么责任", "家里靠谁", "你主要负责什么"),
    "value": ("你最看重什么", "信念是什么", "原则是什么"),
    "regret": ("最大的遗憾", "后悔过吗"),
    "pride": ("最骄傲的事", "最自豪的事"),
    "lesson": ("学到了什么道理", "最大的感悟", "给你什么启发"),
}


def _normalize_text(text: str) -> str:
    return _PUNCT_RE.sub("", (text or "").strip().lower())


def _dedupe_keep_last(items: Iterable[str], *, limit: int) -> list[str]:
    out: list[str] = []
    seen: set[str] = set()
    for raw in reversed([str(x).strip() for x in items if str(x).strip()]):
        key = _normalize_text(raw)
        if not key or key in seen:
            continue
        seen.add(key)
        out.append(raw)
        if len(out) >= limit:
            break
    out.reverse()
    return out


def _merge_known_facts(
    existing: Iterable[KnownFact],
    additions: Iterable[KnownFact],
    *,
    limit: int = 24,
) -> list[KnownFact]:
    merged: dict[tuple[str, str, str], KnownFact] = {}
    for item in list(existing) + list(additions):
        key = (
            (item.stage or "").strip(),
            (item.slot_name or "").strip(),
            _normalize_text(f"{item.label}:{item.value}"),
        )
        if not key[2]:
            continue
        merged[key] = item
    values = list(merged.values())[-limit:]
    return values


def _merge_persona_threads(
    existing: Iterable[PersonaThread],
    additions: Iterable[PersonaThread],
    *,
    limit: int = 12,
) -> list[PersonaThread]:
    merged: dict[tuple[str, str], PersonaThread] = {}
    for item in list(existing) + list(additions):
        key = (_normalize_text(item.trait), _normalize_text(item.evidence))
        if not key[0]:
            continue
        merged[key] = item
    values = list(merged.values())[-limit:]
    return values


def _trim_sentence(text: str, *, limit: int = 80) -> str:
    s = re.sub(r"\s+", " ", (text or "").strip())
    if len(s) <= limit:
        return s
    return s[: limit - 1].rstrip() + "…"


def build_runtime_interview_state(
    state: MemoirStateSchema,
    *,
    user_message: str,
    active_stage: str,
    birth_year: int | None = None,
    birth_place: str = "",
    grew_up_place: str = "",
    occupation: str = "",
) -> MemoirStateSchema:
    """Merge current-turn hints into a prompt-only state view."""
    additions: list[KnownFact] = []
    if birth_year:
        additions.append(
            KnownFact(
                label="出生年份",
                value=f"{birth_year}年",
                source="profile",
            )
        )
    if birth_place:
        additions.append(
            KnownFact(
                label="出生地",
                value=birth_place.strip(),
                source="profile",
                stage="childhood",
                slot_name="place",
            )
        )
    if grew_up_place:
        additions.append(
            KnownFact(
                label="成长地",
                value=grew_up_place.strip(),
                source="profile",
                stage="childhood",
                slot_name="place",
            )
        )
    if occupation:
        additions.append(
            KnownFact(
                label="职业背景",
                value=occupation.strip(),
                source="profile",
                stage="career",
                slot_name="job",
            )
        )

    msg = _trim_sentence(user_message, limit=120)
    if msg:
        additions.append(
            KnownFact(
                label="本轮新信息",
                value=msg,
                source="current_turn",
                stage=active_stage,
            )
        )

    persona_additions: list[PersonaThread] = []
    haystack = " ".join(
        [msg]
        + [fact.value for fact in state.known_facts[-8:]]
        + list(state.filled_slots_for_stage(active_stage).values())[:4]
    )
    for trait, markers in _TRAIT_HINTS:
        for marker in markers:
            if marker and marker in haystack:
                persona_additions.append(
                    PersonaThread(
                        trait=trait,
                        evidence=_trim_sentence(
                            marker if marker in msg else haystack, limit=70
                        ),
                        source="heuristic",
                        stage=active_stage,
                    )
                )
                break

    return state.model_copy(
        update={
            "known_facts": _merge_known_facts(state.known_facts, additions),
            "persona_threads": _merge_persona_threads(
                state.persona_threads, persona_additions
            ),
        }
    )


def extract_recent_questions(
    messages: Iterable[BaseMessage], *, limit: int = 4
) -> list[str]:
    questions: list[str] = []
    for msg in messages:
        if not isinstance(msg, AIMessage):
            continue
        text = str(getattr(msg, "content", "") or "").strip()
        if not text:
            continue
        for part in _QUESTION_SPLIT_RE.split(text):
            part = part.strip()
            if not part:
                continue
            if any(w in text for w in ("？", "?")):
                questions.append(_trim_sentence(part + "？", limit=50))
    return _dedupe_keep_last(questions, limit=limit)


def update_recent_questions(
    existing: Iterable[str],
    generated_segments: Iterable[str],
    *,
    limit: int = 4,
) -> list[str]:
    fresh: list[str] = list(existing)
    for seg in generated_segments:
        text = str(seg or "").strip()
        if not text or ("？" not in text and "?" not in text):
            continue
        parts = [p.strip() for p in _QUESTION_SPLIT_RE.split(text) if p.strip()]
        if not parts:
            continue
        fresh.append(_trim_sentence(parts[-1] + "？", limit=50))
    return _dedupe_keep_last(fresh, limit=limit)


def apply_duplicate_question_guard(
    segments: Iterable[str],
    *,
    state: MemoirStateSchema,
    recent_questions: Iterable[str],
) -> tuple[list[str], bool]:
    """Downgrade obvious repeated-fact questions into acknowledgment-only text."""
    recent_norms = {_normalize_text(q) for q in recent_questions if _normalize_text(q)}
    known_patterns: list[str] = []
    for fact in state.known_facts:
        slot_patterns = _SLOT_REPEAT_PATTERNS.get(fact.slot_name or "", ())
        known_patterns.extend(slot_patterns)
        if fact.label == "本轮新信息":
            known_patterns.append(fact.value)
    cleaned: list[str] = []
    touched = False
    for seg in segments:
        text = str(seg or "").strip()
        if not text:
            continue
        text_norm = _normalize_text(text)
        repeated = False
        if ("？" in text or "?" in text) and text_norm:
            if any(q and (q in text_norm or text_norm in q) for q in recent_norms):
                repeated = True
            if not repeated:
                for pattern in known_patterns:
                    pat_norm = _normalize_text(pattern)
                    if pat_norm and pat_norm in text_norm:
                        repeated = True
                        break
        if repeated:
            sentences = [s.strip() for s in _SENTENCE_SPLIT_RE.split(text) if s.strip()]
            kept = [s for s in sentences if "？" not in s and "?" not in s]
            replacement = kept[0] if kept else DUPLICATE_QUESTION_GUARD_FALLBACK_ZH
            if not replacement.endswith(("。", "！", "…")):
                replacement += "。"
            cleaned.append(replacement)
            touched = True
        else:
            cleaned.append(text)
    if not cleaned:
        cleaned = [DUPLICATE_QUESTION_GUARD_FALLBACK_ZH]
    return cleaned, touched


def segments_are_only_duplicate_guard_fallback(segments: Iterable[str]) -> bool:
    """是否为「仅兜底_ack、无实质承接」——适合再打一枪模型。"""
    parts = [str(s or "").strip() for s in segments if str(s or "").strip()]
    return len(parts) == 1 and parts[0] == DUPLICATE_QUESTION_GUARD_FALLBACK_ZH


# 助手可见回复中，明显声称「我本人有过某种人生经历」的高置信子串（偏保守、宁可漏网不误伤泛化共情）。
_AUTOBIOGRAPHICAL_MARKERS_ZH: tuple[str, ...] = (
    "我小时候",
    "我小学",
    "我中学",
    "我初中",
    "我高中",
    "我大学",
    "我上学那",
    "我念书",
    "我读书那",
    "我暗恋",
    "我当时暗恋",
    "我爸妈",
    "我父亲",
    "我母亲",
    "我爹",
    "我妈",
    "我爷爷",
    "我奶奶",
    "我外公",
    "我外婆",
    "我前任",
    "我老公",
    "我老婆",
    "我丈夫",
    "我妻子",
    "我男友",
    "我女友",
    "我对象",
    "我儿子",
    "我女儿",
    "我孩子",
    "我以前也",
    "我当时也",
    "我那时候也",
    "我也经历过",
    "我也有过",
    "我也演过",
    "我也上台",
    "我演过",
    "我饰演",
    "我演出",
    "我演的是",
    "我演的",
    "我扮演",
    "感觉我熟",
    "这我熟",
)

AUTOBIOGRAPHICAL_BOUNDARY_FALLBACK_ZH = (
    "你刚说的这段很有画面，我想多听你讲讲那时候你心里是什么感觉。"
)


def _segment_has_autobiographical_claim_zh(text: str) -> bool:
    s = (text or "").strip()
    if not s:
        return False
    if _AUTOBIO_IYAN_NOT_DEMO_RE.search(s):
        return True
    return any(m and m in s for m in _AUTOBIOGRAPHICAL_MARKERS_ZH)


def apply_autobiographical_boundary_guard(
    segments: Iterable[str],
) -> tuple[list[str], bool]:
    """将明显带有「助手自传式经历」的段落替换为中性承接，避免身份越界。"""
    cleaned: list[str] = []
    touched = False
    for seg in segments:
        text = str(seg or "").strip()
        if not text:
            continue
        if _segment_has_autobiographical_claim_zh(text):
            cleaned.append(AUTOBIOGRAPHICAL_BOUNDARY_FALLBACK_ZH)
            touched = True
        else:
            cleaned.append(text)
    if not cleaned:
        cleaned = [AUTOBIOGRAPHICAL_BOUNDARY_FALLBACK_ZH]
        touched = True
    return cleaned, touched


def stage_slot_hint_lines(stage: str) -> list[str]:
    keys = STAGE_SLOT_KEYS.get(stage, ())
    stage_zh = STAGE_DISPLAY_ZH.get(stage, stage)
    return [f"{stage_zh}:{key}" for key in keys]
-												refactor(eval+memoir)：精简内部评测路由与服务，composite/对话摘要与 judge 能力补强

- 访谈：新增 interview_state_hints，联动 orchestrator 与提示词
- 回忆录：story_pipeline_sync/state/memory/post_commit 与 Celery 任务调整
- 基建：开发用 celery broker、compose/development 脚本、依赖注入
- eval-web：移除数据集/实验/版本等页面与流式轮询，突出 Playground
- 文档与单测同步

											
										
										
											2026-04-08 21:36:12 +08:00
+								"""Interview quality helpers: known facts, persona threads, and anti-repeat guard."""
 								from __future__ import annotations
 								import re
 								from collections.abc import Iterable
-												fix(chat): 重复追问被拦截时再多问一次模型

防重复问句会把整段回复削成「这一段我记住了。」只剩一句套话时，用带纠偏说明的 system 再调一次 LLM，尽量避免用户只看到干巴巴_ack。仍只重试一次，并打日志与 meta 标记 duplicate_question_guard_llm_retry。

											
										
										
											2026-04-10 15:33:28 +08:00
+								# 与 `apply_duplicate_question_guard` 中整段替换句一致；用于判定是否需触发二次生成。
 								DUPLICATE_QUESTION_GUARD_FALLBACK_ZH = "这一段我记住了。"
-												refactor(eval+memoir)：精简内部评测路由与服务，composite/对话摘要与 judge 能力补强

- 访谈：新增 interview_state_hints，联动 orchestrator 与提示词
- 回忆录：story_pipeline_sync/state/memory/post_commit 与 Celery 任务调整
- 基建：开发用 celery broker、compose/development 脚本、依赖注入
- eval-web：移除数据集/实验/版本等页面与流式轮询，突出 Playground
- 文档与单测同步

											
										
										
											2026-04-08 21:36:12 +08:00
+								from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
 								from app.agents.stage_constants import STAGE_DISPLAY_ZH, STAGE_SLOT_KEYS
 								from app.agents.state_schema import KnownFact, MemoirStateSchema, PersonaThread
 								_QUESTION_SPLIT_RE = re.compile(r"[?？]+")
 								_SENTENCE_SPLIT_RE = re.compile(r"(?<=[。！？!?])")
 								_PUNCT_RE = re.compile(r"[\s，。！？；：、“”‘’（）()《》【】\\[\\],.!?:;\"'`~·…-]+")
-												WIP: memory system improvements (in progress)

Interview/chat prompt layers, reply planner, style profiles, memory
injection, interview meta store, and related tests. Work not finished.

Made-with: Cursor

											
										
										
											2026-04-22 16:56:28 +08:00
+								# 「我演罗密欧」等扮演亲历，但排除「我演示…」类口癖
 								_AUTOBIO_IYAN_NOT_DEMO_RE = re.compile(r"我演(?!示)")
-												refactor(eval+memoir)：精简内部评测路由与服务，composite/对话摘要与 judge 能力补强

- 访谈：新增 interview_state_hints，联动 orchestrator 与提示词
- 回忆录：story_pipeline_sync/state/memory/post_commit 与 Celery 任务调整
- 基建：开发用 celery broker、compose/development 脚本、依赖注入
- eval-web：移除数据集/实验/版本等页面与流式轮询，突出 Playground
- 文档与单测同步

											
										
										
											2026-04-08 21:36:12 +08:00
 								_TRAIT_HINTS: tuple[tuple[str, tuple[str, ...]], ...] = (
 								    ("执着坚持", ("坚持", "执着", "咬牙", "熬过", "顶住", "训练", "反复")),
 								    ("规划目标感", ("计划", "规划", "目标", "打算", "一步步", "安排", "准备")),
 								    ("求真较真", ("弄明白", "搞清楚", "想通", "为什么", "较真", "求证")),
 								    ("行动力", ("决定", "创业", "开始做", "尝试", "报名", "跑去", "去做")),
-												feat:
1. 建立问题库大纲，对应每个人生阶段槽位
2. 鼓励使用更生活化的交流语言共情与总结
3. 降低评审模型可能发生截断的概率
4. 成稿质量维度强化情感表达和上下文连贯性

											
										
										
											2026-04-09 15:32:35 +08:00
+								    (
 								        "家庭责任感",
 								        ("家里", "父母", "妈妈", "爸爸", "妻子", "丈夫", "孩子", "照顾", "支持"),
 								    ),
-												refactor(eval+memoir)：精简内部评测路由与服务，composite/对话摘要与 judge 能力补强

- 访谈：新增 interview_state_hints，联动 orchestrator 与提示词
- 回忆录：story_pipeline_sync/state/memory/post_commit 与 Celery 任务调整
- 基建：开发用 celery broker、compose/development 脚本、依赖注入
- eval-web：移除数据集/实验/版本等页面与流式轮询，突出 Playground
- 文档与单测同步

											
										
										
											2026-04-08 21:36:12 +08:00
+								    ("即时反馈驱动", ("反馈", "看到结果", "成就感", "立刻", "马上见效")),
 								    ("自由天性", ("自由", "无拘无束", "满世界跑", "疯玩", "野", "不管")),
 								    ("动手创造", ("自己动手", "搭", "做", "造", "修", "拆", "烤", "生火", "种")),
 								    ("重感情念旧", ("想起来", "怀念", "舍不得", "还记得", "那时候", "小时候")),
 								    ("好胜争先", ("比赛", "赢", "比", "第一", "不服输", "较劲")),
 								)
 								_SCENE_CUE_WORDS: tuple[tuple[str, str], ...] = (
 								    ("田野", "田野的泥土和青草气息"),
 								    ("河里", "河水的凉意"),
 								    ("海边", "海风和咸咸的空气"),
 								    ("溜冰", "冰面上咔嚓咔嚓的声响"),
 								    ("游泳", "一头扎进水里的畅快"),
 								    ("烤红薯", "红薯外焦里糯、掰开冒热气的香味"),
 								    ("烤", "火堆噼啪响、烟气里混着食物焦香"),
 								    ("打水漂", "石片在水面跳跃、一圈一圈涟漪散开"),
 								    ("捉", "追着跑、手心攥紧怕跑掉的紧张"),
 								    ("雪", "雪花落在脸上化成水珠的凉"),
 								    ("风", "风灌进领子里的感觉"),
 								    ("下雨", "雨点打在屋顶上的声音"),
 								    ("自行车", "骑车下坡风呼呼吹过耳朵"),
 								    ("火车", "绿皮车厢里混着泡面和橘子皮的味道"),
 								    ("学校", "教室里粉笔灰飘在阳光里的样子"),
 								    ("考试", "翻卷子时纸张沙沙响"),
 								    ("工厂", "机器轰鸣、油污和铁锈的气味"),
 								    ("做饭", "锅铲碰锅底的声响、油花溅起来的滋滋声"),
 								)
 								def extract_scene_cues(user_message: str) -> list[str]:
 								    msg = (user_message or "").strip()
 								    if not msg:
 								        return []
 								    cues: list[str] = []
 								    for keyword, description in _SCENE_CUE_WORDS:
 								        if keyword in msg:
 								            cues.append(description)
 								    return cues[:3]
-												feat:
1. 建立问题库大纲，对应每个人生阶段槽位
2. 鼓励使用更生活化的交流语言共情与总结
3. 降低评审模型可能发生截断的概率
4. 成稿质量维度强化情感表达和上下文连贯性

											
										
										
											2026-04-09 15:32:35 +08:00
-												refactor(eval+memoir)：精简内部评测路由与服务，composite/对话摘要与 judge 能力补强

- 访谈：新增 interview_state_hints，联动 orchestrator 与提示词
- 回忆录：story_pipeline_sync/state/memory/post_commit 与 Celery 任务调整
- 基建：开发用 celery broker、compose/development 脚本、依赖注入
- eval-web：移除数据集/实验/版本等页面与流式轮询，突出 Playground
- 文档与单测同步

											
										
										
											2026-04-08 21:36:12 +08:00
+								_SLOT_REPEAT_PATTERNS: dict[str, tuple[str, ...]] = {
 								    "place": ("哪里长大", "家乡", "老家", "在哪长大", "什么地方长大"),
 								    "people": ("谁对你影响", "家里都有谁", "小时候和谁", "身边有什么人"),
 								    "daily_life": ("平时怎么过", "日常都做什么", "小时候都玩什么"),
 								    "emotion": ("那时候什么感觉", "当时什么感受", "小时候开心吗"),
 								    "turning_event": ("印象最深的事", "难忘的事", "转折"),
 								    "school": ("什么学校", "在哪上学", "读的什么学校", "上什么学校"),
 								    "city": ("在哪个城市", "去了哪里读书", "在哪读书"),
 								    "motivation": ("为什么想学", "为什么选这个", "动力是什么"),
 								    "challenge": ("遇到什么困难", "最大的难处", "辛苦吗"),
 								    "change": ("后来有什么变化", "这件事怎么改变你", "之后有什么不同"),
 								    "job": ("做什么工作", "具体做什么", "工作内容是什么"),
 								    "environment": ("工作环境", "在哪工作", "什么单位"),
 								    "decision": ("为什么做这个决定", "怎么决定的"),
 								    "pressure": ("压力大吗", "最难的时候", "最大的压力"),
 								    "growth": ("学到了什么", "成长在哪里", "后来有什么提升"),
 								    "relationship": ("和家人关系怎么样", "和伴侣关系怎么样"),
 								    "conflict": ("有什么矛盾", "怎么吵起来的", "冲突"),
 								    "support": ("谁支持你", "谁帮过你", "怎么支持你的"),
 								    "responsibility": ("承担什么责任", "家里靠谁", "你主要负责什么"),
 								    "value": ("你最看重什么", "信念是什么", "原则是什么"),
 								    "regret": ("最大的遗憾", "后悔过吗"),
 								    "pride": ("最骄傲的事", "最自豪的事"),
 								    "lesson": ("学到了什么道理", "最大的感悟", "给你什么启发"),
 								}
 								def _normalize_text(text: str) -> str:
 								    return _PUNCT_RE.sub("", (text or "").strip().lower())
 								def _dedupe_keep_last(items: Iterable[str], *, limit: int) -> list[str]:
 								    out: list[str] = []
 								    seen: set[str] = set()
 								    for raw in reversed([str(x).strip() for x in items if str(x).strip()]):
 								        key = _normalize_text(raw)
 								        if not key or key in seen:
 								            continue
 								        seen.add(key)
 								        out.append(raw)
 								        if len(out) >= limit:
 								            break
 								    out.reverse()
 								    return out
 								def _merge_known_facts(
 								    existing: Iterable[KnownFact],
 								    additions: Iterable[KnownFact],
 								    *,
 								    limit: int = 24,
 								) -> list[KnownFact]:
 								    merged: dict[tuple[str, str, str], KnownFact] = {}
 								    for item in list(existing) + list(additions):
 								        key = (
 								            (item.stage or "").strip(),
 								            (item.slot_name or "").strip(),
 								            _normalize_text(f"{item.label}:{item.value}"),
 								        )
 								        if not key[2]:
 								            continue
 								        merged[key] = item
 								    values = list(merged.values())[-limit:]
 								    return values
 								def _merge_persona_threads(
 								    existing: Iterable[PersonaThread],
 								    additions: Iterable[PersonaThread],
 								    *,
 								    limit: int = 12,
 								) -> list[PersonaThread]:
 								    merged: dict[tuple[str, str], PersonaThread] = {}
 								    for item in list(existing) + list(additions):
 								        key = (_normalize_text(item.trait), _normalize_text(item.evidence))
 								        if not key[0]:
 								            continue
 								        merged[key] = item
 								    values = list(merged.values())[-limit:]
 								    return values
 								def _trim_sentence(text: str, *, limit: int = 80) -> str:
 								    s = re.sub(r"\s+", " ", (text or "").strip())
 								    if len(s) <= limit:
 								        return s
 								    return s[: limit - 1].rstrip() + "…"
 								def build_runtime_interview_state(
 								    state: MemoirStateSchema,
 								    *,
 								    user_message: str,
 								    active_stage: str,
 								    birth_year: int | None = None,
 								    birth_place: str = "",
 								    grew_up_place: str = "",
 								    occupation: str = "",
 								) -> MemoirStateSchema:
 								    """Merge current-turn hints into a prompt-only state view."""
 								    additions: list[KnownFact] = []
 								    if birth_year:
 								        additions.append(
 								            KnownFact(
 								                label="出生年份",
 								                value=f"{birth_year}年",
 								                source="profile",
 								            )
 								        )
 								    if birth_place:
 								        additions.append(
 								            KnownFact(
 								                label="出生地",
 								                value=birth_place.strip(),
 								                source="profile",
 								                stage="childhood",
 								                slot_name="place",
 								            )
 								        )
 								    if grew_up_place:
 								        additions.append(
 								            KnownFact(
 								                label="成长地",
 								                value=grew_up_place.strip(),
 								                source="profile",
 								                stage="childhood",
 								                slot_name="place",
 								            )
 								        )
 								    if occupation:
 								        additions.append(
 								            KnownFact(
 								                label="职业背景",
 								                value=occupation.strip(),
 								                source="profile",
 								                stage="career",
 								                slot_name="job",
 								            )
 								        )
 								    msg = _trim_sentence(user_message, limit=120)
 								    if msg:
 								        additions.append(
 								            KnownFact(
 								                label="本轮新信息",
 								                value=msg,
 								                source="current_turn",
 								                stage=active_stage,
 								            )
 								        )
 								    persona_additions: list[PersonaThread] = []
 								    haystack = " ".join(
 								        [msg]
 								        + [fact.value for fact in state.known_facts[-8:]]
 								        + list(state.filled_slots_for_stage(active_stage).values())[:4]
 								    )
 								    for trait, markers in _TRAIT_HINTS:
 								        for marker in markers:
 								            if marker and marker in haystack:
 								                persona_additions.append(
 								                    PersonaThread(
 								                        trait=trait,
-												feat:
1. 建立问题库大纲，对应每个人生阶段槽位
2. 鼓励使用更生活化的交流语言共情与总结
3. 降低评审模型可能发生截断的概率
4. 成稿质量维度强化情感表达和上下文连贯性

											
										
										
											2026-04-09 15:32:35 +08:00
+								                        evidence=_trim_sentence(
 								                            marker if marker in msg else haystack, limit=70
 								                        ),
-												refactor(eval+memoir)：精简内部评测路由与服务，composite/对话摘要与 judge 能力补强

- 访谈：新增 interview_state_hints，联动 orchestrator 与提示词
- 回忆录：story_pipeline_sync/state/memory/post_commit 与 Celery 任务调整
- 基建：开发用 celery broker、compose/development 脚本、依赖注入
- eval-web：移除数据集/实验/版本等页面与流式轮询，突出 Playground
- 文档与单测同步

											
										
										
											2026-04-08 21:36:12 +08:00
+								                        source="heuristic",
 								                        stage=active_stage,
 								                    )
 								                )
 								                break
 								    return state.model_copy(
 								        update={
 								            "known_facts": _merge_known_facts(state.known_facts, additions),
 								            "persona_threads": _merge_persona_threads(
 								                state.persona_threads, persona_additions
 								            ),
 								        }
 								    )
-												feat:
1. 建立问题库大纲，对应每个人生阶段槽位
2. 鼓励使用更生活化的交流语言共情与总结
3. 降低评审模型可能发生截断的概率
4. 成稿质量维度强化情感表达和上下文连贯性

											
										
										
											2026-04-09 15:32:35 +08:00
+								def extract_recent_questions(
 								    messages: Iterable[BaseMessage], *, limit: int = 4
 								) -> list[str]:
-												refactor(eval+memoir)：精简内部评测路由与服务，composite/对话摘要与 judge 能力补强

- 访谈：新增 interview_state_hints，联动 orchestrator 与提示词
- 回忆录：story_pipeline_sync/state/memory/post_commit 与 Celery 任务调整
- 基建：开发用 celery broker、compose/development 脚本、依赖注入
- eval-web：移除数据集/实验/版本等页面与流式轮询，突出 Playground
- 文档与单测同步

											
										
										
											2026-04-08 21:36:12 +08:00
+								    questions: list[str] = []
 								    for msg in messages:
 								        if not isinstance(msg, AIMessage):
 								            continue
 								        text = str(getattr(msg, "content", "") or "").strip()
 								        if not text:
 								            continue
 								        for part in _QUESTION_SPLIT_RE.split(text):
 								            part = part.strip()
 								            if not part:
 								                continue
 								            if any(w in text for w in ("？", "?")):
 								                questions.append(_trim_sentence(part + "？", limit=50))
 								    return _dedupe_keep_last(questions, limit=limit)
 								def update_recent_questions(
 								    existing: Iterable[str],
 								    generated_segments: Iterable[str],
 								    *,
 								    limit: int = 4,
 								) -> list[str]:
 								    fresh: list[str] = list(existing)
 								    for seg in generated_segments:
 								        text = str(seg or "").strip()
 								        if not text or ("？" not in text and "?" not in text):
 								            continue
 								        parts = [p.strip() for p in _QUESTION_SPLIT_RE.split(text) if p.strip()]
 								        if not parts:
 								            continue
 								        fresh.append(_trim_sentence(parts[-1] + "？", limit=50))
 								    return _dedupe_keep_last(fresh, limit=limit)
 								def apply_duplicate_question_guard(
 								    segments: Iterable[str],
 								    *,
 								    state: MemoirStateSchema,
 								    recent_questions: Iterable[str],
 								) -> tuple[list[str], bool]:
 								    """Downgrade obvious repeated-fact questions into acknowledgment-only text."""
 								    recent_norms = {_normalize_text(q) for q in recent_questions if _normalize_text(q)}
 								    known_patterns: list[str] = []
 								    for fact in state.known_facts:
 								        slot_patterns = _SLOT_REPEAT_PATTERNS.get(fact.slot_name or "", ())
 								        known_patterns.extend(slot_patterns)
 								        if fact.label == "本轮新信息":
 								            known_patterns.append(fact.value)
 								    cleaned: list[str] = []
 								    touched = False
 								    for seg in segments:
 								        text = str(seg or "").strip()
 								        if not text:
 								            continue
 								        text_norm = _normalize_text(text)
 								        repeated = False
 								        if ("？" in text or "?" in text) and text_norm:
 								            if any(q and (q in text_norm or text_norm in q) for q in recent_norms):
 								                repeated = True
 								            if not repeated:
 								                for pattern in known_patterns:
 								                    pat_norm = _normalize_text(pattern)
 								                    if pat_norm and pat_norm in text_norm:
 								                        repeated = True
 								                        break
 								        if repeated:
 								            sentences = [s.strip() for s in _SENTENCE_SPLIT_RE.split(text) if s.strip()]
 								            kept = [s for s in sentences if "？" not in s and "?" not in s]
-												fix(chat): 重复追问被拦截时再多问一次模型

防重复问句会把整段回复削成「这一段我记住了。」只剩一句套话时，用带纠偏说明的 system 再调一次 LLM，尽量避免用户只看到干巴巴_ack。仍只重试一次，并打日志与 meta 标记 duplicate_question_guard_llm_retry。

											
										
										
											2026-04-10 15:33:28 +08:00
+								            replacement = kept[0] if kept else DUPLICATE_QUESTION_GUARD_FALLBACK_ZH
-												refactor(eval+memoir)：精简内部评测路由与服务，composite/对话摘要与 judge 能力补强

- 访谈：新增 interview_state_hints，联动 orchestrator 与提示词
- 回忆录：story_pipeline_sync/state/memory/post_commit 与 Celery 任务调整
- 基建：开发用 celery broker、compose/development 脚本、依赖注入
- eval-web：移除数据集/实验/版本等页面与流式轮询，突出 Playground
- 文档与单测同步

											
										
										
											2026-04-08 21:36:12 +08:00
+								            if not replacement.endswith(("。", "！", "…")):
 								                replacement += "。"
 								            cleaned.append(replacement)
 								            touched = True
 								        else:
 								            cleaned.append(text)
 								    if not cleaned:
-												fix(chat): 重复追问被拦截时再多问一次模型

防重复问句会把整段回复削成「这一段我记住了。」只剩一句套话时，用带纠偏说明的 system 再调一次 LLM，尽量避免用户只看到干巴巴_ack。仍只重试一次，并打日志与 meta 标记 duplicate_question_guard_llm_retry。

											
										
										
											2026-04-10 15:33:28 +08:00
+								        cleaned = [DUPLICATE_QUESTION_GUARD_FALLBACK_ZH]
-												refactor(eval+memoir)：精简内部评测路由与服务，composite/对话摘要与 judge 能力补强

- 访谈：新增 interview_state_hints，联动 orchestrator 与提示词
- 回忆录：story_pipeline_sync/state/memory/post_commit 与 Celery 任务调整
- 基建：开发用 celery broker、compose/development 脚本、依赖注入
- eval-web：移除数据集/实验/版本等页面与流式轮询，突出 Playground
- 文档与单测同步

											
										
										
											2026-04-08 21:36:12 +08:00
+								    return cleaned, touched
-												fix(chat): 重复追问被拦截时再多问一次模型

防重复问句会把整段回复削成「这一段我记住了。」只剩一句套话时，用带纠偏说明的 system 再调一次 LLM，尽量避免用户只看到干巴巴_ack。仍只重试一次，并打日志与 meta 标记 duplicate_question_guard_llm_retry。

											
										
										
											2026-04-10 15:33:28 +08:00
+								def segments_are_only_duplicate_guard_fallback(segments: Iterable[str]) -> bool:
 								    """是否为「仅兜底_ack、无实质承接」——适合再打一枪模型。"""
 								    parts = [str(s or "").strip() for s in segments if str(s or "").strip()]
 								    return len(parts) == 1 and parts[0] == DUPLICATE_QUESTION_GUARD_FALLBACK_ZH
-												WIP: memory system improvements (in progress)

Interview/chat prompt layers, reply planner, style profiles, memory
injection, interview meta store, and related tests. Work not finished.

Made-with: Cursor

											
										
										
											2026-04-22 16:56:28 +08:00
+								# 助手可见回复中，明显声称「我本人有过某种人生经历」的高置信子串（偏保守、宁可漏网不误伤泛化共情）。
 								_AUTOBIOGRAPHICAL_MARKERS_ZH: tuple[str, ...] = (
 								    "我小时候",
 								    "我小学",
 								    "我中学",
 								    "我初中",
 								    "我高中",
 								    "我大学",
 								    "我上学那",
 								    "我念书",
 								    "我读书那",
 								    "我暗恋",
 								    "我当时暗恋",
 								    "我爸妈",
 								    "我父亲",
 								    "我母亲",
 								    "我爹",
 								    "我妈",
 								    "我爷爷",
 								    "我奶奶",
 								    "我外公",
 								    "我外婆",
 								    "我前任",
 								    "我老公",
 								    "我老婆",
 								    "我丈夫",
 								    "我妻子",
 								    "我男友",
 								    "我女友",
 								    "我对象",
 								    "我儿子",
 								    "我女儿",
 								    "我孩子",
 								    "我以前也",
 								    "我当时也",
 								    "我那时候也",
 								    "我也经历过",
 								    "我也有过",
 								    "我也演过",
 								    "我也上台",
 								    "我演过",
 								    "我饰演",
 								    "我演出",
 								    "我演的是",
 								    "我演的",
 								    "我扮演",
 								    "感觉我熟",
 								    "这我熟",
 								)
 								AUTOBIOGRAPHICAL_BOUNDARY_FALLBACK_ZH = (
 								    "你刚说的这段很有画面，我想多听你讲讲那时候你心里是什么感觉。"
 								)
 								def _segment_has_autobiographical_claim_zh(text: str) -> bool:
 								    s = (text or "").strip()
 								    if not s:
 								        return False
 								    if _AUTOBIO_IYAN_NOT_DEMO_RE.search(s):
 								        return True
 								    return any(m and m in s for m in _AUTOBIOGRAPHICAL_MARKERS_ZH)
 								def apply_autobiographical_boundary_guard(
 								    segments: Iterable[str],
 								) -> tuple[list[str], bool]:
 								    """将明显带有「助手自传式经历」的段落替换为中性承接，避免身份越界。"""
 								    cleaned: list[str] = []
 								    touched = False
 								    for seg in segments:
 								        text = str(seg or "").strip()
 								        if not text:
 								            continue
 								        if _segment_has_autobiographical_claim_zh(text):
 								            cleaned.append(AUTOBIOGRAPHICAL_BOUNDARY_FALLBACK_ZH)
 								            touched = True
 								        else:
 								            cleaned.append(text)
 								    if not cleaned:
 								        cleaned = [AUTOBIOGRAPHICAL_BOUNDARY_FALLBACK_ZH]
 								        touched = True
 								    return cleaned, touched
-												refactor(eval+memoir)：精简内部评测路由与服务，composite/对话摘要与 judge 能力补强

- 访谈：新增 interview_state_hints，联动 orchestrator 与提示词
- 回忆录：story_pipeline_sync/state/memory/post_commit 与 Celery 任务调整
- 基建：开发用 celery broker、compose/development 脚本、依赖注入
- eval-web：移除数据集/实验/版本等页面与流式轮询，突出 Playground
- 文档与单测同步

											
										
										
											2026-04-08 21:36:12 +08:00
+								def stage_slot_hint_lines(stage: str) -> list[str]:
 								    keys = STAGE_SLOT_KEYS.get(stage, ())
 								    stage_zh = STAGE_DISPLAY_ZH.get(stage, stage)
 								    return [f"{stage_zh}:{key}" for key in keys]