refactor(eval+memoir)：精简内部评测路由与服务，composite/对话摘要与 judge 能力补强

- 访谈：新增 interview_state_hints，联动 orchestrator 与提示词 - 回忆录：story_pipeline_sync/state/memory/post_commit 与 Celery 任务调整 - 基建：开发用 celery broker、compose/development 脚本、依赖注入 - eval-web：移除数据集/实验/版本等页面与流式轮询，突出 Playground - 文档与单测同步
2026-04-08 21:36:12 +08:00
parent 2a0c80987d
commit 064ad2161d
64 changed files with 3412 additions and 3068 deletions
--- a/api/app/agents/chat/interview_state_hints.py
+++ b/api/app/agents/chat/interview_state_hints.py
@@ -0,0 +1,327 @@
+"""Interview quality helpers: known facts, persona threads, and anti-repeat guard."""
+
+from __future__ import annotations
+
+import re
+from collections.abc import Iterable
+
+from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
+
+from app.agents.stage_constants import STAGE_DISPLAY_ZH, STAGE_SLOT_KEYS
+from app.agents.state_schema import KnownFact, MemoirStateSchema, PersonaThread
+
+_QUESTION_SPLIT_RE = re.compile(r"[?？]+")
+_SENTENCE_SPLIT_RE = re.compile(r"(?<=[。！？!?])")
+_PUNCT_RE = re.compile(r"[\s，。！？；：、“”‘’（）()《》【】\\[\\],.!?:;\"'`~·…-]+")
+
+_TRAIT_HINTS: tuple[tuple[str, tuple[str, ...]], ...] = (
+    ("执着坚持", ("坚持", "执着", "咬牙", "熬过", "顶住", "训练", "反复")),
+    ("规划目标感", ("计划", "规划", "目标", "打算", "一步步", "安排", "准备")),
+    ("求真较真", ("弄明白", "搞清楚", "想通", "为什么", "较真", "求证")),
+    ("行动力", ("决定", "创业", "开始做", "尝试", "报名", "跑去", "去做")),
+    ("家庭责任感", ("家里", "父母", "妈妈", "爸爸", "妻子", "丈夫", "孩子", "照顾", "支持")),
+    ("即时反馈驱动", ("反馈", "看到结果", "成就感", "立刻", "马上见效")),
+    ("自由天性", ("自由", "无拘无束", "满世界跑", "疯玩", "野", "不管")),
+    ("动手创造", ("自己动手", "搭", "做", "造", "修", "拆", "烤", "生火", "种")),
+    ("重感情念旧", ("想起来", "怀念", "舍不得", "还记得", "那时候", "小时候")),
+    ("好胜争先", ("比赛", "赢", "比", "第一", "不服输", "较劲")),
+)
+
+_SCENE_CUE_WORDS: tuple[tuple[str, str], ...] = (
+    ("田野", "田野的泥土和青草气息"),
+    ("河里", "河水的凉意"),
+    ("海边", "海风和咸咸的空气"),
+    ("溜冰", "冰面上咔嚓咔嚓的声响"),
+    ("游泳", "一头扎进水里的畅快"),
+    ("烤红薯", "红薯外焦里糯、掰开冒热气的香味"),
+    ("烤", "火堆噼啪响、烟气里混着食物焦香"),
+    ("打水漂", "石片在水面跳跃、一圈一圈涟漪散开"),
+    ("捉", "追着跑、手心攥紧怕跑掉的紧张"),
+    ("雪", "雪花落在脸上化成水珠的凉"),
+    ("风", "风灌进领子里的感觉"),
+    ("下雨", "雨点打在屋顶上的声音"),
+    ("自行车", "骑车下坡风呼呼吹过耳朵"),
+    ("火车", "绿皮车厢里混着泡面和橘子皮的味道"),
+    ("学校", "教室里粉笔灰飘在阳光里的样子"),
+    ("考试", "翻卷子时纸张沙沙响"),
+    ("工厂", "机器轰鸣、油污和铁锈的气味"),
+    ("做饭", "锅铲碰锅底的声响、油花溅起来的滋滋声"),
+)
+
+
+def extract_scene_cues(user_message: str) -> list[str]:
+    msg = (user_message or "").strip()
+    if not msg:
+        return []
+    cues: list[str] = []
+    for keyword, description in _SCENE_CUE_WORDS:
+        if keyword in msg:
+            cues.append(description)
+    return cues[:3]
+
+_SLOT_REPEAT_PATTERNS: dict[str, tuple[str, ...]] = {
+    "place": ("哪里长大", "家乡", "老家", "在哪长大", "什么地方长大"),
+    "people": ("谁对你影响", "家里都有谁", "小时候和谁", "身边有什么人"),
+    "daily_life": ("平时怎么过", "日常都做什么", "小时候都玩什么"),
+    "emotion": ("那时候什么感觉", "当时什么感受", "小时候开心吗"),
+    "turning_event": ("印象最深的事", "难忘的事", "转折"),
+    "school": ("什么学校", "在哪上学", "读的什么学校", "上什么学校"),
+    "city": ("在哪个城市", "去了哪里读书", "在哪读书"),
+    "motivation": ("为什么想学", "为什么选这个", "动力是什么"),
+    "challenge": ("遇到什么困难", "最大的难处", "辛苦吗"),
+    "change": ("后来有什么变化", "这件事怎么改变你", "之后有什么不同"),
+    "job": ("做什么工作", "具体做什么", "工作内容是什么"),
+    "environment": ("工作环境", "在哪工作", "什么单位"),
+    "decision": ("为什么做这个决定", "怎么决定的"),
+    "pressure": ("压力大吗", "最难的时候", "最大的压力"),
+    "growth": ("学到了什么", "成长在哪里", "后来有什么提升"),
+    "relationship": ("和家人关系怎么样", "和伴侣关系怎么样"),
+    "conflict": ("有什么矛盾", "怎么吵起来的", "冲突"),
+    "support": ("谁支持你", "谁帮过你", "怎么支持你的"),
+    "responsibility": ("承担什么责任", "家里靠谁", "你主要负责什么"),
+    "value": ("你最看重什么", "信念是什么", "原则是什么"),
+    "regret": ("最大的遗憾", "后悔过吗"),
+    "pride": ("最骄傲的事", "最自豪的事"),
+    "lesson": ("学到了什么道理", "最大的感悟", "给你什么启发"),
+}
+
+
+def _normalize_text(text: str) -> str:
+    return _PUNCT_RE.sub("", (text or "").strip().lower())
+
+
+def _dedupe_keep_last(items: Iterable[str], *, limit: int) -> list[str]:
+    out: list[str] = []
+    seen: set[str] = set()
+    for raw in reversed([str(x).strip() for x in items if str(x).strip()]):
+        key = _normalize_text(raw)
+        if not key or key in seen:
+            continue
+        seen.add(key)
+        out.append(raw)
+        if len(out) >= limit:
+            break
+    out.reverse()
+    return out
+
+
+def _merge_known_facts(
+    existing: Iterable[KnownFact],
+    additions: Iterable[KnownFact],
+    *,
+    limit: int = 24,
+) -> list[KnownFact]:
+    merged: dict[tuple[str, str, str], KnownFact] = {}
+    for item in list(existing) + list(additions):
+        key = (
+            (item.stage or "").strip(),
+            (item.slot_name or "").strip(),
+            _normalize_text(f"{item.label}:{item.value}"),
+        )
+        if not key[2]:
+            continue
+        merged[key] = item
+    values = list(merged.values())[-limit:]
+    return values
+
+
+def _merge_persona_threads(
+    existing: Iterable[PersonaThread],
+    additions: Iterable[PersonaThread],
+    *,
+    limit: int = 12,
+) -> list[PersonaThread]:
+    merged: dict[tuple[str, str], PersonaThread] = {}
+    for item in list(existing) + list(additions):
+        key = (_normalize_text(item.trait), _normalize_text(item.evidence))
+        if not key[0]:
+            continue
+        merged[key] = item
+    values = list(merged.values())[-limit:]
+    return values
+
+
+def _trim_sentence(text: str, *, limit: int = 80) -> str:
+    s = re.sub(r"\s+", " ", (text or "").strip())
+    if len(s) <= limit:
+        return s
+    return s[: limit - 1].rstrip() + "…"
+
+
+def build_runtime_interview_state(
+    state: MemoirStateSchema,
+    *,
+    user_message: str,
+    active_stage: str,
+    birth_year: int | None = None,
+    birth_place: str = "",
+    grew_up_place: str = "",
+    occupation: str = "",
+) -> MemoirStateSchema:
+    """Merge current-turn hints into a prompt-only state view."""
+    additions: list[KnownFact] = []
+    if birth_year:
+        additions.append(
+            KnownFact(
+                label="出生年份",
+                value=f"{birth_year}年",
+                source="profile",
+            )
+        )
+    if birth_place:
+        additions.append(
+            KnownFact(
+                label="出生地",
+                value=birth_place.strip(),
+                source="profile",
+                stage="childhood",
+                slot_name="place",
+            )
+        )
+    if grew_up_place:
+        additions.append(
+            KnownFact(
+                label="成长地",
+                value=grew_up_place.strip(),
+                source="profile",
+                stage="childhood",
+                slot_name="place",
+            )
+        )
+    if occupation:
+        additions.append(
+            KnownFact(
+                label="职业背景",
+                value=occupation.strip(),
+                source="profile",
+                stage="career",
+                slot_name="job",
+            )
+        )
+
+    msg = _trim_sentence(user_message, limit=120)
+    if msg:
+        additions.append(
+            KnownFact(
+                label="本轮新信息",
+                value=msg,
+                source="current_turn",
+                stage=active_stage,
+            )
+        )
+
+    persona_additions: list[PersonaThread] = []
+    haystack = " ".join(
+        [msg]
+        + [fact.value for fact in state.known_facts[-8:]]
+        + list(state.filled_slots_for_stage(active_stage).values())[:4]
+    )
+    for trait, markers in _TRAIT_HINTS:
+        for marker in markers:
+            if marker and marker in haystack:
+                persona_additions.append(
+                    PersonaThread(
+                        trait=trait,
+                        evidence=_trim_sentence(marker if marker in msg else haystack, limit=70),
+                        source="heuristic",
+                        stage=active_stage,
+                    )
+                )
+                break
+
+    return state.model_copy(
+        update={
+            "known_facts": _merge_known_facts(state.known_facts, additions),
+            "persona_threads": _merge_persona_threads(
+                state.persona_threads, persona_additions
+            ),
+        }
+    )
+
+
+def extract_recent_questions(messages: Iterable[BaseMessage], *, limit: int = 4) -> list[str]:
+    questions: list[str] = []
+    for msg in messages:
+        if not isinstance(msg, AIMessage):
+            continue
+        text = str(getattr(msg, "content", "") or "").strip()
+        if not text:
+            continue
+        for part in _QUESTION_SPLIT_RE.split(text):
+            part = part.strip()
+            if not part:
+                continue
+            if any(w in text for w in ("？", "?")):
+                questions.append(_trim_sentence(part + "？", limit=50))
+    return _dedupe_keep_last(questions, limit=limit)
+
+
+def update_recent_questions(
+    existing: Iterable[str],
+    generated_segments: Iterable[str],
+    *,
+    limit: int = 4,
+) -> list[str]:
+    fresh: list[str] = list(existing)
+    for seg in generated_segments:
+        text = str(seg or "").strip()
+        if not text or ("？" not in text and "?" not in text):
+            continue
+        parts = [p.strip() for p in _QUESTION_SPLIT_RE.split(text) if p.strip()]
+        if not parts:
+            continue
+        fresh.append(_trim_sentence(parts[-1] + "？", limit=50))
+    return _dedupe_keep_last(fresh, limit=limit)
+
+
+def apply_duplicate_question_guard(
+    segments: Iterable[str],
+    *,
+    state: MemoirStateSchema,
+    recent_questions: Iterable[str],
+) -> tuple[list[str], bool]:
+    """Downgrade obvious repeated-fact questions into acknowledgment-only text."""
+    recent_norms = {_normalize_text(q) for q in recent_questions if _normalize_text(q)}
+    known_patterns: list[str] = []
+    for fact in state.known_facts:
+        slot_patterns = _SLOT_REPEAT_PATTERNS.get(fact.slot_name or "", ())
+        known_patterns.extend(slot_patterns)
+        if fact.label == "本轮新信息":
+            known_patterns.append(fact.value)
+    cleaned: list[str] = []
+    touched = False
+    for seg in segments:
+        text = str(seg or "").strip()
+        if not text:
+            continue
+        text_norm = _normalize_text(text)
+        repeated = False
+        if ("？" in text or "?" in text) and text_norm:
+            if any(q and (q in text_norm or text_norm in q) for q in recent_norms):
+                repeated = True
+            if not repeated:
+                for pattern in known_patterns:
+                    pat_norm = _normalize_text(pattern)
+                    if pat_norm and pat_norm in text_norm:
+                        repeated = True
+                        break
+        if repeated:
+            sentences = [s.strip() for s in _SENTENCE_SPLIT_RE.split(text) if s.strip()]
+            kept = [s for s in sentences if "？" not in s and "?" not in s]
+            replacement = kept[0] if kept else "这一段我记住了。"
+            if not replacement.endswith(("。", "！", "…")):
+                replacement += "。"
+            cleaned.append(replacement)
+            touched = True
+        else:
+            cleaned.append(text)
+    if not cleaned:
+        cleaned = ["这一段我记住了。"]
+    return cleaned, touched
+
+
+def stage_slot_hint_lines(stage: str) -> list[str]:
+    keys = STAGE_SLOT_KEYS.get(stage, ())
+    stage_zh = STAGE_DISPLAY_ZH.get(stage, stage)
+    return [f"{stage_zh}:{key}" for key in keys]
+