From 2fded6fbd927f1a259b3d70f70bf073ac5d76292 Mon Sep 17 00:00:00 2001 From: Kevin Date: Mon, 6 Apr 2026 22:22:50 +0800 Subject: [PATCH] refactor(chat): AI-native prompts, remove interview heuristics - Drop interview_reply_length and utterance_substance; always run stage LLM and memory retrieval when enabled; trim Settings fields and .env.example. - Replace guided/opening prompts with compact fact blocks plus unified behavior guidance; slim background_voice and persona to tone hints. - InterviewAgent uses fixed chat_interview max_tokens/chars/segments. Also includes stacked work: profile followup/extract path, evaluation rubric and judge schema updates, transcript SPLIT handling in execution service, user export markdown split tests, and golden case fixture. --- api/.env.example | 16 +- api/app/agents/chat/background_voice.py | 19 +- api/app/agents/chat/interview_agent.py | 61 +-- api/app/agents/chat/interview_reply_length.py | 311 --------------- api/app/agents/chat/occupation_context.py | 8 +- api/app/agents/chat/orchestrator.py | 123 +++--- api/app/agents/chat/output_rules.py | 4 +- api/app/agents/chat/personas.py | 44 +-- api/app/agents/chat/profile_agent.py | 7 +- api/app/agents/chat/prompt_context.py | 10 +- api/app/agents/chat/prompts_conversation.py | 368 ++++++------------ api/app/agents/chat/prompts_profile.py | 20 +- api/app/agents/chat/stage_detection.py | 7 +- api/app/agents/chat/utterance_substance.py | 73 ---- api/app/core/config.py | 27 +- .../features/evaluation/execution_service.py | 13 +- .../importers/user_export_markdown.py | 9 +- api/app/features/evaluation/judge_schemas.py | 12 +- .../evaluation/rubrics/conversation_v1.py | 18 +- .../fixtures/evaluation_golden_cases.json | 39 ++ api/tests/test_background_voice.py | 15 +- api/tests/test_chat_stage_detection_gates.py | 16 +- api/tests/test_experience_regressions.py | 129 +----- api/tests/test_interview_prompts.py | 149 ++----- api/tests/test_interview_reply_length.py | 207 ---------- api/tests/test_user_export_markdown_split.py | 24 ++ api/tests/test_utterance_substance.py | 46 --- 27 files changed, 426 insertions(+), 1349 deletions(-) delete mode 100644 api/app/agents/chat/interview_reply_length.py delete mode 100644 api/app/agents/chat/utterance_substance.py create mode 100644 api/tests/fixtures/evaluation_golden_cases.json delete mode 100644 api/tests/test_interview_reply_length.py create mode 100644 api/tests/test_user_export_markdown_split.py delete mode 100644 api/tests/test_utterance_substance.py diff --git a/api/.env.example b/api/.env.example index 072e87c..8e9cb73 100644 --- a/api/.env.example +++ b/api/.env.example @@ -53,11 +53,10 @@ EMBEDDING_MODEL=embedding-3 # CHAT_STAGE_DETECTION_MAX_TOKENS=128 # 访谈性格(InterviewAgent):default | warm_listener | curious_guide # CHAT_INTERVIEW_PERSONA=default -# 访谈回复长度档位(brief/standard/expanded)联动:极短输入 / 默认 / 长段+新细节 -# CHAT_INTERVIEW_BRIEF_MAX_TOKENS=240 -# CHAT_INTERVIEW_BRIEF_MAX_CHARS_PER_SEGMENT=180 -# CHAT_INTERVIEW_EXPANDED_MAX_TOKENS=400 -# CHAT_INTERVIEW_EXPANDED_MAX_CHARS_PER_SEGMENT=300 +# 访谈主回复:统一 max_tokens / 单段字数(代码截断),不再分 brief/expanded 档 +# CHAT_INTERVIEW_MAX_TOKENS=380 +# CHAT_INTERVIEW_MAX_CHARS_PER_SEGMENT=260 +# CHAT_INTERVIEW_MAX_SEGMENTS=2 # 访谈:是否按本轮用户话检索记忆并注入提示词(关则不调 retrieve) # CHAT_MEMORY_RETRIEVAL_ENABLED=true # CHAT_MEMORY_TOP_K=8 @@ -82,13 +81,6 @@ EMBEDDING_MODEL=embedding-3 # CHAT_INPUT_NORMALIZE_LLM_MAX_INPUT_CHARS=8000 # True:仅 is_from_voice 时走 LLM 纠错;键盘输入仅规则归一 # CHAT_INPUT_NORMALIZE_LLM_VOICE_ONLY=true -# 短时/应答/元话语:本轮跳过阶段 LLM 与记忆向量检索(仍保留访谈主 LLM);关则每轮完整路径 -# CHAT_SUBSTANTIVE_HEURISTIC_ENABLED=true -# CHAT_SUBSTANTIVE_MIN_CHARS=12 -# CHAT_STAGE_DETECTION_SKIP_LLM_ON_INSUFFICIENT_SIGNAL=true -# CHAT_MEMORY_RETRIEVAL_REQUIRE_SUBSTANTIVE=true -# 资料收集:短时/元话语不跑资料字段抽取 LLM(仍生成追问) -# CHAT_PROFILE_EXTRACT_REQUIRE_SUBSTANTIVE=true # Memoir Phase1:True 时用一次「批量 JSON」做抽取+分类(单段或多段均可;失败自动回退逐段)。 # False 时始终逐段(与启用本开关前的行为一致,含防抖合并后的多段任务)。 diff --git a/api/app/agents/chat/background_voice.py b/api/app/agents/chat/background_voice.py index 01bc672..c4df3b4 100644 --- a/api/app/agents/chat/background_voice.py +++ b/api/app/agents/chat/background_voice.py @@ -84,26 +84,19 @@ def normalize_background_voice(voice: str | None) -> BackgroundVoice: return infer_background_voice(s) -def get_background_voice_chat_block(voice: str | None) -> str: - """注入访谈 guided/opening 的「背景语气」段落;default 返回空串。""" +def get_background_voice_tone_hint(voice: str | None) -> str: + """一句背景语气提示,融入主 system prompt;default 返回空串。""" v = normalize_background_voice(voice) if v == "default": return "" if v == "military": return ( - "## 背景语气:军队语境(仅语气,不编造事实)\n" - "称呼得体、句子简洁利落、条理清楚;避免网络梗与油滑套话。\n" - "先简短接住对方,再**最多一个**具体问题;不写命令式、不做思想政治表态。\n" - "涉及纪律、集体、任务等措辞,**仅当用户口述已出现相关事实时**自然呼应,禁止堆砌军事化辞藻或虚构经历。\n" - "用户已退役/转业,以回忆军旅岁月为基调,不要预设其仍在服役。" + "语气简洁利落、得体;称呼自然;不写命令式、不堆砌军事辞藻;" + "仅当用户口述已出现相关事实时才呼应军旅语境,不编造经历。" ) - # cadre return ( - "## 背景语气:干部/机关语境(仅语气,不编造事实)\n" - "稳重、有分寸,敬语适度;句子可略完整,但仍控制总字数,避免官样文章与排比空话。\n" - "先回应对方内容,再**最多一个**具体问题;不写公文套话、不做政治评价。\n" - "涉及职务与组织时,**不得编造**用户未提及的职级、单位与荣誉。\n" - "用户已退休,以回顾和怀念工作岁月为基调,不要预设其仍在岗。" + "语气稳重有分寸、敬语适度;避免官样排比与公文套话;" + "不得编造用户未提及的职级、单位与荣誉。" ) diff --git a/api/app/agents/chat/interview_agent.py b/api/app/agents/chat/interview_agent.py index 1faffa7..96f5a48 100644 --- a/api/app/agents/chat/interview_agent.py +++ b/api/app/agents/chat/interview_agent.py @@ -13,7 +13,6 @@ from app.agents.chat.helpers import format_history_string, get_history_with_wind from app.agents.chat.personas import normalize_interview_persona from app.agents.chat.prompt_context import ChatPromptContext from app.agents.chat.stage_detection import keyword_fallback_primary_stage -from app.agents.chat.interview_reply_length import compute_reply_plan from app.agents.chat.prompts_conversation import ( SLOT_NAME_MAP, get_opening_prompt, @@ -67,23 +66,6 @@ class InterviewAgent: """关键词回退:与 stage_detection 一致(多阶段打分)。""" return keyword_fallback_primary_stage(user_message) - def _estimate_same_topic_turns( - self, history_messages: List[Any], current_filled_slots: dict - ) -> int: - """估算同一话题的连续轮数(保守:宁可多陪聊几轮再换)。""" - n_pairs = len(history_messages) // 2 - if n_pairs <= 1: - return n_pairs - recent_window = min(n_pairs, 5) - recent = history_messages[-(recent_window * 2) :] - nonempty_user_turns = 0 - for i in range(0, len(recent), 2): - msg = recent[i] - text = msg.content if hasattr(msg, "content") else str(msg) - if len(text.strip()) > 5: - nonempty_user_turns += 1 - return nonempty_user_turns - def _resolve_text_for_model( self, user_message: str, @@ -137,27 +119,21 @@ class InterviewAgent: max_chars=settings.chat_history_max_chars, ) conversation_turn_total = hw.turn_total - same_topic_turns = self._estimate_same_topic_turns(hw.window, filled_slots) all_stages_coverage = memoir_state.all_stages_coverage() persona = normalize_interview_persona(settings.chat_interview_persona) - reply_plan = compute_reply_plan( - text_for_model, - background_voice=background_voice, - settings=settings, - ) + max_segments = int(settings.chat_interview_max_segments) + max_tokens = int(settings.chat_interview_max_tokens) + max_chars = int(settings.chat_interview_max_chars_per_segment) + ctx = ChatPromptContext( current_stage=memoir_state.current_stage, empty_slots=empty_slots, filled_slots=filled_slots, - user_message=text_for_model, - conversation_turn_total=conversation_turn_total, - same_topic_turns=same_topic_turns, all_stages_coverage=all_stages_coverage, detected_user_stage=du, user_profile_context=user_profile_context, persona=persona, memory_evidence_text=memory_evidence_text, - reply_length_mode=reply_plan.mode.value, background_voice=background_voice, occupation=occupation, ) @@ -181,7 +157,7 @@ class InterviewAgent: omit_system_body=settings.agent_log_omit_system_message_body, ), ) - chat_llm = self.llm.bind(max_tokens=reply_plan.max_tokens) + chat_llm = self.llm.bind(max_tokens=max_tokens) prompt_chars = _message_contents_char_count(messages) llm_t0 = time.perf_counter() with agent_span( @@ -212,26 +188,25 @@ class InterviewAgent: ) raw_list = segments_from_llm_response( response_text, - max_segments=reply_plan.max_segments, + max_segments=max_segments, ) if not raw_list: raw_list = [response_text.strip()] out = truncate_chat_segments( raw_list, - max_segments=reply_plan.max_segments, - max_chars_per_segment=reply_plan.max_chars_per_segment, + max_segments=max_segments, + max_chars_per_segment=max_chars, ) if not out: - out = [response_text.strip()[: reply_plan.max_chars_per_segment]] + out = [response_text.strip()[:max_chars]] out = nonempty_segments_or_fallback(out, fallback=_FALLBACK_REPLY) log_agent_summary( logger, "InterviewAgent.generate_response segments={} conversation_id={} " - "reply_length_mode={} max_tokens={}", + "max_tokens={}", len(out), conversation_id, - reply_plan.mode.value, - reply_plan.max_tokens, + max_tokens, ) return AgentChatTurn(messages=out, skip_tts=False) except Exception as e: @@ -314,15 +289,11 @@ class InterviewAgent: raw_list = segments_from_llm_response(response_text, max_segments=2) if not raw_list: raw_list = [response_text.strip()] - open_plan = compute_reply_plan( - "x" * 50, - background_voice=background_voice, - settings=settings, - ) + max_chars = int(settings.chat_interview_max_chars_per_segment) out = truncate_chat_segments( raw_list, max_segments=2, - max_chars_per_segment=open_plan.max_chars_per_segment, + max_chars_per_segment=max_chars, ) log_agent_summary( logger, @@ -330,11 +301,7 @@ class InterviewAgent: len(out), conversation_id, ) - segments = ( - out - if out - else [response_text.strip()[: open_plan.max_chars_per_segment]] - ) + segments = out if out else [response_text.strip()[:max_chars]] return nonempty_segments_or_fallback( segments, fallback="你好呀~ 又见面了,最近有没有什么事想跟我说说?", diff --git a/api/app/agents/chat/interview_reply_length.py b/api/app/agents/chat/interview_reply_length.py deleted file mode 100644 index ed196d8..0000000 --- a/api/app/agents/chat/interview_reply_length.py +++ /dev/null @@ -1,311 +0,0 @@ -""" -访谈回复长度:由用户本轮文本 + 启发式(新细节 / 闲聊 / 信息密度)决定档位, -与 max_tokens、max_chars_per_segment 联动;单一 ReplyPlan 供 prompt 与截断共用。 -""" - -from __future__ import annotations - -from dataclasses import dataclass -from enum import Enum -from typing import TYPE_CHECKING - -from app.agents.chat.background_voice import normalize_background_voice - -if TYPE_CHECKING: - from app.core.config import Settings - - -class ReplyLengthMode(str, Enum): - """brief:极短;standard:默认;expanded:值得展开承接时稍长。""" - - brief = "brief" - standard = "standard" - expanded = "expanded" - - -# 用户本轮字符数分桶(strip 后按 len,中文友好) -_LEN_BRIEF_MAX = 20 -_LEN_MID_EXPAND_MIN = 40 -_LEN_LONG_MIN = 80 - - -def heuristic_likely_new_detail(user_message: str) -> bool: - """ - 轻量启发:本轮是否很可能补充了新人名、新关系或新情节(追问触发与长度共用)。 - """ - m = (user_message or "").strip() - if len(m) < 2: - return False - needles = ( - "叫", - "名字", - "名叫", - "同桌", - "初恋", - "现实里", - "戏里", - "饰演", - "演我", - "第一次", - "认识", - "没想到", - "猜猜", - ) - return any(n in m for n in needles) - - -def heuristic_information_rich(user_message: str) -> bool: - """ - 轻量启发:短句也可能信息密度高(新转折、重大事件、时间锚点),用于避免误压成 brief。 - """ - m = (user_message or "").strip() - if len(m) < 2: - return False - needles = ( - "突然", - "那年", - "后来", - "记得", - "第一次", - "没想到", - "离开", - "去世", - "走了", - "结婚", - "离婚", - "生病", - "辍学", - "退学", - "下岗", - "破产", - "我爸", - "我妈", - "爷爷", - "奶奶", - ) - return any(n in m for n in needles) - - -def heuristic_likely_emotional(user_message: str) -> bool: - """ - 轻量启发:用户本轮是否在表达较强情绪(需要更多承接空间、不应被压成 brief)。 - """ - m = (user_message or "").strip() - if len(m) < 4: - return False - needles = ( - "想哭", - "哭了", - "难过", - "伤心", - "心酸", - "感动", - "激动", - "害怕", - "委屈", - "后悔", - "对不起", - "愧疚", - "感激", - "谢谢你", - "想念", - "想他", - "想她", - "舍不得", - "不容易", - "太难了", - "崩溃", - "绝望", - "幸福", - "骄傲", - "自豪", - ) - return any(n in m for n in needles) - - -def heuristic_likely_chit_chat(user_message: str) -> bool: - """ - 轻量启发:本轮是否偏闲聊(放宽长句里纯寒暄/天气类)。 - """ - m = (user_message or "").strip() - if len(m) > 200: - return False - - needles_short = ( - "天气", - "谢谢", - "哈哈", - "呵呵", - "在吗", - "吃了吗", - "早上好", - "晚安", - "闲聊", - "逗你", - ) - if len(m) > 48: - head = m[:100] - if any(n in head for n in needles_short): - if not heuristic_information_rich(m) and not heuristic_likely_new_detail(m): - return True - return False - - if any(n in m for n in needles_short): - return True - if len(m) <= 8 and m in ("嗯", "好", "行的", "谢谢", "哈哈", "可以", "没事"): - return True - return False - - -@dataclass(frozen=True) -class ReplyPlan: - """单一计划:prompt 展示档位与数值上限一致(含背景语气微调)。""" - - mode: ReplyLengthMode - max_tokens: int - max_chars_per_segment: int - max_segments: int - likely_new_detail: bool - likely_chit_chat: bool - information_rich: bool - - -def compute_reply_plan( - user_message: str, - *, - background_voice: str | None, - settings: "Settings", -) -> ReplyPlan: - """ - 信息量与情绪优先,字数次之: - - 短输入且无新信息、无情绪 → brief - - 短输入但有新细节/高密度/强情绪 → standard - - 中段(40-79)有实质/情绪 → expanded(给足承接空间) - - 中段无实质 → standard - - 长输入:闲聊为主 → standard;有展开价值 → expanded - """ - norm = (user_message or "").strip() - n = max(0, len(norm)) - max_segments = int(settings.chat_interview_max_segments) - - likely_new = heuristic_likely_new_detail(norm) - likely_chit = heuristic_likely_chit_chat(norm) - info_rich = heuristic_information_rich(norm) - emotional = heuristic_likely_emotional(norm) - substantive = likely_new or info_rich or emotional - - def _mk(m: ReplyLengthMode) -> ReplyPlan: - return _plan_from_mode( - m, - max_segments=max_segments, - settings=settings, - background_voice=background_voice, - likely_new=likely_new, - likely_chit=likely_chit, - info_rich=info_rich, - ) - - if likely_chit and not substantive: - return _mk( - ReplyLengthMode.brief if n <= _LEN_BRIEF_MAX else ReplyLengthMode.standard - ) - - if n <= _LEN_BRIEF_MAX: - return _mk(ReplyLengthMode.standard if substantive else ReplyLengthMode.brief) - - if n < _LEN_MID_EXPAND_MIN: - return _mk(ReplyLengthMode.standard) - - if n < _LEN_LONG_MIN: - return _mk( - ReplyLengthMode.expanded if substantive else ReplyLengthMode.standard - ) - - return _mk(ReplyLengthMode.expanded if substantive else ReplyLengthMode.standard) - - -def _plan_from_mode( - mode: ReplyLengthMode, - *, - max_segments: int, - settings: "Settings", - background_voice: str | None, - likely_new: bool, - likely_chit: bool, - info_rich: bool, -) -> ReplyPlan: - if mode == ReplyLengthMode.brief: - base = ReplyPlan( - mode=mode, - max_tokens=int(settings.chat_interview_brief_max_tokens), - max_chars_per_segment=int( - settings.chat_interview_brief_max_chars_per_segment - ), - max_segments=max_segments, - likely_new_detail=likely_new, - likely_chit_chat=likely_chit, - information_rich=info_rich, - ) - elif mode == ReplyLengthMode.expanded: - base = ReplyPlan( - mode=mode, - max_tokens=int(settings.chat_interview_expanded_max_tokens), - max_chars_per_segment=int( - settings.chat_interview_expanded_max_chars_per_segment - ), - max_segments=max_segments, - likely_new_detail=likely_new, - likely_chit_chat=likely_chit, - information_rich=info_rich, - ) - else: - base = ReplyPlan( - mode=ReplyLengthMode.standard, - max_tokens=int(settings.chat_interview_max_tokens), - max_chars_per_segment=int(settings.chat_interview_max_chars_per_segment), - max_segments=max_segments, - likely_new_detail=likely_new, - likely_chit_chat=likely_chit, - information_rich=info_rich, - ) - return bump_reply_plan_for_background_voice( - base, background_voice=background_voice, settings=settings - ) - - -def bump_reply_plan_for_background_voice( - plan: ReplyPlan, - *, - background_voice: str | None, - settings: "Settings", -) -> ReplyPlan: - """ - 干部/军队背景时,仅对 standard 档小幅提高 token 与单段字数;**展示档位不变**(仍为 standard)。 - """ - if normalize_background_voice(background_voice) == "default": - return plan - if plan.mode != ReplyLengthMode.standard: - return plan - extra_t = int( - getattr( - settings, - "chat_interview_cadre_military_standard_extra_tokens", - 0, - ) - ) - extra_c = int( - getattr( - settings, - "chat_interview_cadre_military_standard_extra_chars", - 0, - ) - ) - return ReplyPlan( - mode=plan.mode, - max_tokens=plan.max_tokens + extra_t, - max_chars_per_segment=plan.max_chars_per_segment + extra_c, - max_segments=plan.max_segments, - likely_new_detail=plan.likely_new_detail, - likely_chit_chat=plan.likely_chit_chat, - information_rich=plan.information_rich, - ) diff --git a/api/app/agents/chat/occupation_context.py b/api/app/agents/chat/occupation_context.py index 4321dea..d1f51b1 100644 --- a/api/app/agents/chat/occupation_context.py +++ b/api/app/agents/chat/occupation_context.py @@ -6,17 +6,13 @@ from app.agents.chat.background_voice import normalize_background_voice def get_occupation_chat_hint(occupation: str | None, background_voice: str) -> str: - """default 路径的通用职业上下文;cadre/military 已有专属块,返回空串。""" + """一句职业事实(仅 default 路径);cadre/military 语气由 background_voice 覆盖。""" if normalize_background_voice(background_voice) != "default": return "" occ = (occupation or "").strip() if not occ: return "" - return ( - f"## 用户职业背景\n" - f"用户从事过「{occ}」相关工作。聊天时自然贴合这一背景," - f"在用语和追问方向上适度靠近用户的职业经历与知识面,但不要刻意。" - ) + return f"从事过「{occ}」相关工作,聊天可自然贴近其经历,不要刻意。" def get_occupation_narrative_hint(occupation: str | None, background_voice: str) -> str: diff --git a/api/app/agents/chat/orchestrator.py b/api/app/agents/chat/orchestrator.py index 4bf7b1b..ac6496b 100644 --- a/api/app/agents/chat/orchestrator.py +++ b/api/app/agents/chat/orchestrator.py @@ -10,6 +10,7 @@ from typing import TYPE_CHECKING, List, Optional from sqlalchemy.ext.asyncio import AsyncSession from app.agents.chat.agent_turn import AgentChatTurn +from app.agents.chat.helpers import get_history_with_window from app.agents.chat.interview_agent import InterviewAgent from app.agents.chat.profile_agent import ProfileAgent from app.agents.state_schema import MemoirStateSchema @@ -19,13 +20,9 @@ from app.agents.chat.stage_detection import ( detect_primary_life_stage, life_stage_display_name, ) -from app.agents.chat.utterance_substance import should_run_chat_stage_memory_heavy_work from app.core.config import settings from app.core.dependencies import get_llm_provider -from app.features.conversation.input_normalize import ( - apply_conversation_input_rules, - normalize_chat_input_for_agent, -) +from app.features.conversation.input_normalize import normalize_chat_input_for_agent from app.features.memoir.state_service import get_or_create_state, switch_stage @@ -68,15 +65,6 @@ async def _fetch_interview_memory_evidence( "event=chat_memory_retrieval_skip reason=empty user_id={}", user_id ) return "" - if ( - settings.chat_memory_retrieval_require_substantive - and not should_run_chat_stage_memory_heavy_work(msg) - ): - logger.debug( - "event=chat_memory_retrieval_skip reason=not_substantive user_id={}", - user_id, - ) - return "" try: emb = get_embedding_provider() ms = MemoryService(db, embedding_provider=emb) @@ -143,60 +131,79 @@ class ChatOrchestrator: if user: missing = get_missing_profile_fields_fn(user) if missing: - try: - log_agent_detail( - logger, - "ChatOrchestrator route=profile conversation_id={} " - "missing_fields={} user_msg_len={}", + hw_profile = await get_history_with_window( + conversation_id, + max_pairs=settings.chat_history_max_pairs, + max_chars=settings.chat_history_max_chars, + ) + profile_turn_total = hw_profile.turn_total + if profile_turn_total >= settings.chat_profile_max_turns: + logger.info( + "event=chat_profile_cap_skip conversation_id={} " + "turn_total={} cap={} missing_fields={}", conversation_id, + profile_turn_total, + settings.chat_profile_max_turns, missing, - len(user_message or ""), ) - run_extract = True - if settings.chat_profile_extract_require_substantive: - rules_only = apply_conversation_input_rules(user_message or "") - run_extract = should_run_chat_stage_memory_heavy_work( - rules_only + else: + try: + log_agent_detail( + logger, + "ChatOrchestrator route=profile conversation_id={} " + "missing_fields={} user_msg_len={} profile_turn_total={}", + conversation_id, + missing, + len(user_message or ""), + profile_turn_total, ) - extracted = None - if run_extract: + # Profile 阶段每轮都抽取:短确认语也可能带可推断资料,跳过抽取会导致槽位长期不更新 extracted = ( await self.profile_agent.extract_profile_from_message( user_message, missing, conversation_id=conversation_id ) ) - if extracted: - await apply_extracted_profile_fn(user, extracted, db) - - remaining = get_missing_profile_fields_fn(user) - filled = get_filled_profile_fields_fn(user) - interview_stage_hint = "" - if not remaining: - st = await get_or_create_state(user.id, db) - interview_stage_hint = life_stage_display_name(st.current_stage) - responses = await self.profile_agent.generate_profile_followup( - conversation_id=conversation_id, - user_message=user_message, - missing_fields=remaining, - filled_fields=filled, - nickname=user.nickname or "", - interview_stage_hint=interview_stage_hint, - ) - if agent_summary_enabled(): logger.info( - "ChatOrchestrator.process_user_message route=profile " - "duration_ms={:.2f} conversation_id={} response_segments={}", - (time.perf_counter() - t0) * 1000, + "event=chat_profile_extract conversation_id={} " + "extracted_keys={} missing_before={}", conversation_id, - len(responses), + list(extracted.keys()) if extracted else [], + missing, + ) + if extracted: + await apply_extracted_profile_fn(user, extracted, db) + + remaining = get_missing_profile_fields_fn(user) + filled = get_filled_profile_fields_fn(user) + interview_stage_hint = "" + if not remaining: + st = await get_or_create_state(user.id, db) + interview_stage_hint = life_stage_display_name( + st.current_stage + ) + responses = await self.profile_agent.generate_profile_followup( + conversation_id=conversation_id, + user_message=user_message, + missing_fields=remaining, + filled_fields=filled, + nickname=user.nickname or "", + interview_stage_hint=interview_stage_hint, + ) + if agent_summary_enabled(): + logger.info( + "ChatOrchestrator.process_user_message route=profile " + "duration_ms={:.2f} conversation_id={} response_segments={}", + (time.perf_counter() - t0) * 1000, + conversation_id, + len(responses), + ) + return AgentChatTurn(messages=responses, skip_tts=False) + except Exception as e: + logger.error(f"资料收集处理失败: {e}", exc_info=True) + return AgentChatTurn( + messages=["不好意思刚才没接住,你再说一遍好吗?"], + skip_tts=False, ) - return AgentChatTurn(messages=responses, skip_tts=False) - except Exception as e: - logger.error(f"资料收集处理失败: {e}", exc_info=True) - return AgentChatTurn( - messages=["不好意思刚才没接住,你再说一遍好吗?"], - skip_tts=False, - ) # --- 正式访谈模式 --- user_id = user.id if user else None @@ -227,14 +234,10 @@ class ChatOrchestrator: is_from_voice=is_from_voice, ) state = await get_or_create_state(user_id, db) - substantive_turn = should_run_chat_stage_memory_heavy_work( - normalized_user_message - ) detected = await detect_primary_life_stage( normalized_user_message, state.current_stage, self.interview_agent.llm, - skip_llm=not substantive_turn, ) if detected != state.current_stage: state = await switch_stage(user_id, detected, db) diff --git a/api/app/agents/chat/output_rules.py b/api/app/agents/chat/output_rules.py index 8d6a861..f9c0877 100644 --- a/api/app/agents/chat/output_rules.py +++ b/api/app/agents/chat/output_rules.py @@ -8,7 +8,9 @@ def chat_output_rules() -> str: "反引号代码、`[]()` 链接、列表符号或渲染用符号;只输出连贯口语,**可以**在需要分两气泡时使用字面量 " "`[SPLIT]`(仅此一处方括号用法);**禁止**输出括号、括号内的策略/舞台说明(例如「(先接住情绪)」「(共情)」)、" "思考过程或任何元注释——这些只存在于系统指令里,**绝不可**出现在你对用户说的话中;" - "采访腔(「我注意到」「我想了解」);重复确认对方已经说过或能推断出的信息;编造对方没说的细节。" + "采访腔(「我注意到」「我想了解」);重复确认对方已经说过或能推断出的信息;" + "编造对方没说的**具体**事实(人名、时间、地点、事件经过等若用户未提及则不说)。" + "**允许**用「我能想象……」「那时候大概……」等泛泛接话,但不要把这些写成就等于用户亲身经历的事实。" ) diff --git a/api/app/agents/chat/personas.py b/api/app/agents/chat/personas.py index 0e34e97..01158d1 100644 --- a/api/app/agents/chat/personas.py +++ b/api/app/agents/chat/personas.py @@ -1,5 +1,5 @@ """ -访谈 Agent 可配置性格(Persona):仅影响语气与追问倾向,不替代事实边界与槽位约束。 +访谈 Agent 可配置性格(Persona):仅影响语气,不替代事实边界与槽位约束。 """ from __future__ import annotations @@ -20,41 +20,21 @@ def normalize_interview_persona(raw: str | None) -> str: return "default" -def get_interview_persona_block(persona: str) -> str: - """ - 返回注入到访谈 prompt 的「访谈性格」段落(不含 default,由调用方跳过)。 - """ +def get_interview_persona_tone_hint(persona: str) -> str: + """一句访谈性格提示,融入主 system prompt;default 返回空串。""" key = normalize_interview_persona(persona) if key == "default": return "" + if key == "warm_listener": + return "偏倾听与承接,语气柔和、少打断;不审问感,一次最多一个具体问题。" + return "爱把人往一个具体细节里带;短句像微信,一次最多一个具体问题,不重复上文已清楚的事。" - blocks = { - "warm_listener": ( - "## 访谈性格:温柔倾听\n" - "在遵守「回忆录导向与闲聊」的前提下,优先把对话引向可写进回忆录的素材;明显闲聊时先陪聊。\n" - "你更偏倾听与承接,语气柔和、少打断;" - "但一旦用户说出**新的人名、新的关系、或新的情节线**(上文未展开)," - "仍必须按本提示中的「追问触发」规则,在承接后带**一个**具体问题,不能用纯感慨代替。\n" - "禁止审问感、禁止一次抛多个问题。" - ), - "curious_guide": ( - "## 访谈性格:好奇引导\n" - "在遵守「回忆录导向与闲聊」的前提下,追问尽量落在人生故事与未覆盖方向上;明显闲聊时先陪聊。\n" - "你更愿意把人往**一个具体细节**里带:时间、场景、对方反应、你心里一闪而过的念头;" - "每轮**最多一个**具体问题,短句、像微信。\n" - "若本轮触发「追问触发」,优先追问用户刚抛出的新信息,不要为了凑问题去重复上文已清楚的事。" - ), - } - return blocks.get(key, "") + +def get_interview_persona_block(persona: str) -> str: + """兼容旧名:返回空串,请改用 get_interview_persona_tone_hint。""" + return "" def get_opening_persona_line(persona: str) -> str: - """开场白用的一行性格提示(短,避免喧宾夺主)。""" - key = normalize_interview_persona(persona) - if key == "default": - return "" - lines = { - "warm_listener": "语气偏倾听、少打断;但仍须完成「问候 + 一个具体问题」。", - "curious_guide": "语气偏好奇、爱往细节里带一个具体问题;不要一次问很多。", - } - return lines.get(key, "") + """兼容旧名:与访谈轮次共用一句性格提示。""" + return get_interview_persona_tone_hint(persona) diff --git a/api/app/agents/chat/profile_agent.py b/api/app/agents/chat/profile_agent.py index ca8a17f..b03a0b5 100644 --- a/api/app/agents/chat/profile_agent.py +++ b/api/app/agents/chat/profile_agent.py @@ -152,6 +152,12 @@ class ProfileAgent: result["grew_up_place"] = str(parsed.grew_up_place) if parsed.occupation: result["occupation"] = str(parsed.occupation) + bp = result.get("birth_place") + gp = result.get("grew_up_place") + if bp and not gp: + result["grew_up_place"] = bp + elif gp and not bp: + result["birth_place"] = gp return result except Exception as e: logger.error("提取资料信息失败: {}", e) @@ -173,7 +179,6 @@ class ProfileAgent: prompt = get_profile_followup_prompt( missing_fields, filled_fields, - user_message, nickname, interview_stage_hint=interview_stage_hint, ) diff --git a/api/app/agents/chat/prompt_context.py b/api/app/agents/chat/prompt_context.py index 3b71614..32ae21c 100644 --- a/api/app/agents/chat/prompt_context.py +++ b/api/app/agents/chat/prompt_context.py @@ -13,35 +13,27 @@ class ChatPromptContext: current_stage: str empty_slots: List[str] filled_slots: Dict[str, str] - user_message: str - conversation_turn_total: int = 0 - same_topic_turns: int = 0 all_stages_coverage: Optional[Dict[str, Dict]] = None detected_user_stage: str = "" user_profile_context: str = "" persona: str = "default" memory_evidence_text: str = "" - reply_length_mode: str = "standard" background_voice: str = "default" occupation: str = "" def guided_system_prompt(self) -> str: - """`user_message` 仅参与启发式,不出现在返回的系统提示文本中。""" + """用户原话仅以对话历史 + HumanMessage 注入模型。""" from app.agents.chat.prompts_conversation import get_guided_conversation_prompt return get_guided_conversation_prompt( current_stage=self.current_stage, empty_slots=self.empty_slots, filled_slots=self.filled_slots, - user_message=self.user_message, - conversation_turn_total=self.conversation_turn_total, - same_topic_turns=self.same_topic_turns, all_stages_coverage=self.all_stages_coverage, detected_user_stage=self.detected_user_stage, user_profile_context=self.user_profile_context, persona=self.persona, memory_evidence_text=self.memory_evidence_text, - reply_length_mode=self.reply_length_mode, background_voice=self.background_voice, occupation=self.occupation, ) diff --git a/api/app/agents/chat/prompts_conversation.py b/api/app/agents/chat/prompts_conversation.py index 937c444..68d9091 100644 --- a/api/app/agents/chat/prompts_conversation.py +++ b/api/app/agents/chat/prompts_conversation.py @@ -1,22 +1,16 @@ """ -对话 Agent 提示词模板 +对话 Agent 提示词模板(精简:事实块 + 行为指引,由模型自行判断追问/长度/闲聊)。 """ from typing import Dict, List, Optional from app.agents.chat.background_voice import ( - get_background_voice_chat_block, + get_background_voice_tone_hint, normalize_background_voice, ) from app.agents.chat.occupation_context import get_occupation_chat_hint -from app.agents.chat.interview_reply_length import ( - heuristic_likely_chit_chat, - heuristic_likely_emotional, - heuristic_likely_new_detail, -) from app.agents.chat.personas import ( - get_interview_persona_block, - get_opening_persona_line, + get_interview_persona_tone_hint, normalize_interview_persona, ) from app.agents.chat.output_rules import chat_output_rules @@ -49,20 +43,55 @@ SLOT_NAME_MAP = { "lesson": "人生经验", } -STAGE_RELATED_TOPICS = { - "childhood": ["family", "education"], - "education": ["childhood", "career"], - "career": ["education", "family", "belief"], - "family": ["childhood", "career", "belief"], - "belief": ["career", "family"], -} +def _compact_era_hint(current_stage: str, user_profile_context: str) -> str: + """单行时代联想,可选附在进度后。""" + if not user_profile_context: + return "" -def _guided_voice_intro_line(background_voice: str) -> str: - """顶部角色描述(具体「接住」写法集中在 ## 你要做的)。""" + birth_year = None + birth_place = "" + for line in user_profile_context.split("\n"): + if "出生年份" in line: + try: + birth_year = int(line.split(":")[1].strip().replace("年", "")) + except (ValueError, IndexError): + pass + if "出生地" in line or "成长地" in line: + birth_place = line.split(":")[1].strip() if ":" in line else "" + + if not birth_year: + return "" + + age_range = STAGE_ERA_HINTS.get(current_stage, (0, 30)) + era_start = birth_year + age_range[0] + era_end = birth_year + age_range[1] + + era_events = [] + decade_events = { + 1950: "新中国成立初期、土地改革、抗美援朝", + 1960: "大跃进、三年自然灾害、中苏关系变化", + 1970: "文化大革命、知青上山下乡、中美建交", + 1980: "改革开放、恢复高考、个体经济兴起、电视普及", + 1990: "社会主义市场经济、下海潮、香港回归、互联网初期", + 2000: "加入WTO、房地产兴起、手机普及、北京奥运", + 2010: "移动互联网爆发、微信时代、共享经济、双创浪潮", + 2020: "新冠疫情、直播经济、人工智能崛起", + } + + for decade, events in decade_events.items(): + if era_start <= decade + 9 and era_end >= decade: + era_events.append(f"{decade}年代:{events}") + + if not era_events: + return "" + + place_hint = f" {birth_place}" if birth_place else "" return ( - "你是「岁月知己」,像老朋友陪用户聊人生。" - "短句为主,遵守下方「本轮回复长度」档位。" + f"时代联想(口述里一两句带过即可):约 {era_start}-{era_end} 年{place_hint};" + f"可提及 {era_events[0]}" + + (f";{era_events[1]}" if len(era_events) > 1 else "") + + "。" ) @@ -82,7 +111,7 @@ def get_opening_prompt( f"## 当前建议话题({stage_name})\n可以从中选一个来问:{topics_str}" ) task_question = ( - "2. **必须问一个问题**:接着问一个**具体、好回答**的问题,引导用户开始分享;" + "2. 接着问一个**具体、好回答**的问题,引导用户开始分享;" "优先落在上述还未聊透的方向上。不要问太宽泛的「有什么想聊的」。" ) _opening_examples = { @@ -132,44 +161,50 @@ def get_opening_prompt( else: topics_heading = ( f"## 当前阶段({stage_name})\n" - "访谈结构化槽位里,这一阶段的主要问题在素材侧**已有覆盖**。" - "开场要像老朋友重逢:接近况、接续上次聊过的事、或任何用户可能提起的新片段;" - "**禁止**为了凑问题而默认再从「童年在哪长大」等已覆盖模板重头盘问。" + "这一阶段的主要话题在素材侧**已有覆盖**。" + "开场要像老朋友重逢:接近况、接续上次聊过的事、或新片段;" + "**禁止**为了凑问题而从「童年在哪长大」等已覆盖模板重头盘问。" ) task_question = ( - "2. **问候 + 轻巧引子**:用一句温暖的话接上对话;若自然,可以问一个与近况、" - "想续上的回忆、或新冒出来的小事有关的问题。若不适合追问,问候 + 一句开放式引子即可。" + "2. **问候 + 轻巧引子**:温暖接话;若自然可问一个与近况或回忆有关的问题," + "不适合追问时问候 + 开放式引子即可。" ) style_examples = ( "示例(仅供参考风格):\n" '"嘿,又见面啦~ 今天有没有哪件事突然从脑子里冒出来,想跟我说说?"\n或\n' '"在的!上次聊到那儿我还记着,你后来还有想起什么细节吗?"' ) - profile_section = ( - f"\n## 用户基本信息\n{user_profile_context}\n" if user_profile_context else "" - ) + + profile_lines: List[str] = [] + if user_profile_context.strip(): + profile_lines.append(user_profile_context.strip()) + occ = get_occupation_chat_hint(occupation, background_voice) + if occ: + profile_lines.append(occ) + profile_section = "" + if profile_lines: + profile_section = "## 用户信息\n" + "\n".join(profile_lines) + "\n" + persona_key = normalize_interview_persona(persona) - opening_persona = get_opening_persona_line(persona_key) - persona_extra = f"\n## 访谈性格\n{opening_persona}\n" if opening_persona else "" - voice_block = get_background_voice_chat_block(background_voice) - voice_section = f"\n{voice_block}\n" if voice_block else "" - occ_hint = get_occupation_chat_hint(occupation, background_voice) - occ_section = f"\n{occ_hint}\n" if occ_hint else "" + persona_tone = get_interview_persona_tone_hint(persona_key) + voice_tone = get_background_voice_tone_hint(background_voice) + tone_bits = [t for t in (persona_tone, voice_tone) if t] + tone_paragraph = "" + if tone_bits: + tone_paragraph = " " + " ".join(tone_bits) + "\n\n" + bv = normalize_background_voice(background_voice) - if bv == "default": + opening_head = ( + "你是「岁月知己」。用户刚进对话,**还没说话**,请你先开口。" + "**短、像微信**,一两句问候 + 一个具体问题即可,不要排比、不要文学描写。\n\n" + ) + if bv != "default": opening_head = ( "你是「岁月知己」。用户刚进对话,**还没说话**,请你先开口。" - "**短、像微信**,一两句问候 + 一句具体问题即可,不要排比、不要文学描写。" + "**短**;两三句内问候 + 一个具体问题;不要排比、不要文学描写。\n\n" ) - else: - opening_head = ( - "你是「岁月知己」。用户刚进对话,**还没说话**,请你先开口。" - "**短;两三句内完成问候 + 一个具体问题**;不要排比、不要文学描写。" - ) - return f"""{opening_head} -{profile_section} -{topics_heading} -{persona_extra}{voice_section}{occ_section} + + return f"""{opening_head}{tone_paragraph}{profile_section}{topics_heading} ## 任务 1. 简短问候。 {task_question} @@ -184,102 +219,26 @@ def get_opening_prompt( 直接输出(仅自然口语,无 Markdown):""" -def _build_era_context(current_stage: str, user_profile_context: str) -> str: - """根据用户的人生阶段和出生年份,生成对应时代的历史/政治/文化背景提示""" - if not user_profile_context: - return "" - - birth_year = None - birth_place = "" - for line in user_profile_context.split("\n"): - if "出生年份" in line: - try: - birth_year = int(line.split(":")[1].strip().replace("年", "")) - except (ValueError, IndexError): - pass - if "出生地" in line or "成长地" in line: - birth_place = line.split(":")[1].strip() if ":" in line else "" - - if not birth_year: - return "" - - age_range = STAGE_ERA_HINTS.get(current_stage, (0, 30)) - era_start = birth_year + age_range[0] - era_end = birth_year + age_range[1] - - era_events = [] - decade_events = { - 1950: "新中国成立初期、土地改革、抗美援朝", - 1960: "大跃进、三年自然灾害、中苏关系变化", - 1970: "文化大革命、知青上山下乡、中美建交", - 1980: "改革开放、恢复高考、个体经济兴起、电视普及", - 1990: "社会主义市场经济、下海潮、香港回归、互联网初期", - 2000: "加入WTO、房地产兴起、手机普及、北京奥运", - 2010: "移动互联网爆发、微信时代、共享经济、双创浪潮", - 2020: "新冠疫情、直播经济、人工智能崛起", - } - - for decade, events in decade_events.items(): - if era_start <= decade + 9 and era_end >= decade: - era_events.append(f"{decade}年代:{events}") - - if not era_events: - return "" - - place_hint = f" {birth_place}" if birth_place else "" - return ( - f"\n## 时代参考(一两句带过即可,勿长篇)\n" - f"约 {era_start}-{era_end} 年{place_hint};可联想:{era_events[0]}" - + (f";{era_events[1]}" if len(era_events) > 1 else "") - + "\n" - ) - - -def _format_reply_length_section(current_mode: str) -> str: - """仅输出当前档位说明,减少重复 tokens。""" - safe = ( - current_mode - if current_mode in ("brief", "standard", "expanded") - else "standard" - ) - mode_desc = { - "brief": "一两句话,简短温暖;可带一个小问题也可以不带。", - "standard": "承接对方 + 最多一个具体问题;像朋友聊天,不写长段。", - "expanded": "用户本轮内容或情绪较浓——可多一两句承接核心点,再自然追问;仍控制在两段以内。", - } - desc = mode_desc[safe] - return f"""## 本轮回复长度 -**当前档位:{safe}** -{desc} -""" - - def get_guided_conversation_prompt( current_stage: str, empty_slots: List[str], filled_slots: Dict[str, str], - user_message: str, - conversation_turn_total: int = 0, - same_topic_turns: int = 0, all_stages_coverage: Optional[Dict[str, Dict]] = None, detected_user_stage: str = "", user_profile_context: str = "", persona: str = "default", memory_evidence_text: str = "", - reply_length_mode: str = "standard", background_voice: str = "default", occupation: str = "", ) -> str: - """生成状态感知的对话提示词。 - - ``user_message`` 仅用于启发式(新细节/闲聊/情绪),其原文**不会**写入本提示,用户话仅以最终 HumanMessage 传入模型。 - ``conversation_turn_total`` 为 Redis 全量历史的轮次数,不受窗口截断影响。 - """ + """生成状态感知的对话提示词;用户原话仅以 HumanMessage 传入,不写入本 system 文本。""" persona_key = normalize_interview_persona(persona) - persona_block = get_interview_persona_block(persona_key) - likely_new = heuristic_likely_new_detail(user_message) - likely_chit = heuristic_likely_chit_chat(user_message) - reply_length_section = _format_reply_length_section(reply_length_mode) + persona_tone = get_interview_persona_tone_hint(persona_key) + voice_tone = get_background_voice_tone_hint(background_voice) + tone_bits = [t for t in (persona_tone, voice_tone) if t] + tone_line = "" + if tone_bits: + tone_line = " " + " ".join(tone_bits) current_stage_name = STAGE_DISPLAY_ZH.get(current_stage, current_stage) user_stage_name = ( @@ -305,7 +264,6 @@ def get_guided_conversation_prompt( filled_slots_str = "\n".join(filled_info) if filled_info else "刚开始聊" progress_lines: List[str] = [] - uncovered_stages: List[str] = [] if all_stages_coverage: cur_cn = STAGE_DISPLAY_ZH.get(current_stage, current_stage) progress_lines.append(f"当前阶段:{cur_cn}") @@ -318,55 +276,34 @@ def get_guided_conversation_prompt( continue if filled_n == 0: progress_lines.append(f" {sname}:未聊") - uncovered_stages.append(sname) elif filled_n < total_n: progress_lines.append(f" {sname}:{filled_n}/{total_n}") progress_str = "\n".join(progress_lines) if progress_lines else "" - filled_count = len(filled_slots) - should_switch_topic = same_topic_turns >= 5 or ( - filled_count >= 3 and same_topic_turns >= 4 + active_stage = ( + detected_user_stage if user_jumped and detected_user_stage else current_stage ) - should_lighten_mood = ( - conversation_turn_total > 0 and conversation_turn_total % 7 == 0 - ) - should_try_new_stage = filled_count >= 4 and len(empty_slots) <= 1 + era_line = "" + if settings.chat_era_context_enabled: + era_line = _compact_era_hint(active_stage, user_profile_context) - related_stages = STAGE_RELATED_TOPICS.get(current_stage, []) - related_stages_str = "、".join([STAGE_DISPLAY_ZH.get(s, s) for s in related_stages]) - - emotional = heuristic_likely_emotional(user_message) - - tone_section = f"{persona_block}\n" if persona_block else "" - - followup_trigger_block = "## 本轮追问判定\n" - followup_trigger_block += ( - "总体原则见「对话方向」与「你要做的」;以下为仅本轮生效的判定:\n" - ) - if likely_new: - followup_trigger_block += ( - "**【本轮判定】用户补充了新细节 → 承接后须追问 1 句。**\n" - ) - elif emotional: - followup_trigger_block += ( - "**【本轮判定】用户情绪较浓 → 先好好共情承接,不必急着追问。**\n" + if user_jumped: + topic_desc = ( + f"你们原本在聊「{current_stage_name}」," + f"用户自然地聊到了「{user_stage_name}」——跟着他/她的节奏,别硬拉回。" ) else: - followup_trigger_block += ( - "(无特殊判定时按惯例:新线头追问一句,否则可只承接。)\n" - ) + topic_desc = f"你们在聊「{current_stage_name}」这阶段的话题。" - memoir_orientation_lines = [ - "## 对话方向", - "追问与承接**优先服务于人生故事与回忆录素材**,但不要让对方觉得你在走流程。", - "若用户**明显在闲聊**,以陪聊为主,**不要**用回忆录式问题打断。", - "若用户一边回忆一边开玩笑,先接情绪,再轻轻带回一个与经历相关的小问题。", - ] - if likely_chit: - memoir_orientation_lines.append( - "**【本轮偏闲聊】** → 以承接与陪聊为主;若用户自然带回经历,再追问。" - ) - memoir_orientation_block = "\n".join(memoir_orientation_lines) + "\n" + user_info_parts: List[str] = [] + if user_profile_context.strip(): + user_info_parts.append(user_profile_context.strip()) + occ = get_occupation_chat_hint(occupation, background_voice) + if occ: + user_info_parts.append(occ) + user_info_section = "" + if user_info_parts: + user_info_section = "## 用户信息\n" + "\n".join(user_info_parts) + "\n\n" memory_section = "" mem_trim = (memory_evidence_text or "").strip() @@ -374,87 +311,40 @@ def get_guided_conversation_prompt( memory_section = ( "## 相关记忆摘录(仅供衔接,禁止编造)\n" "以下为系统从用户**过往口述**中检索到的摘录,**不是**用户本轮亲口新说的内容。\n" - "承接时可自然用「你之前提过……」「上次你说到……」等口语,不要把摘录里的细节写成本轮用户新告诉你的事实;禁止编造摘录未出现的内容。\n\n" + "承接时可自然用「你之前提过……」等口语,不要把摘录里的细节写成本轮用户新说的;" + "禁止编造摘录未出现的内容。\n\n" f"{mem_trim}\n\n" ) - dynamic_guidance = "" - if user_jumped: - dynamic_guidance += f""" -- **用户正在聊「{user_stage_name}」的话题,跟着他/她的节奏走,不要试图拉回「{current_stage_name}」** -- 顺着用户的思路,帮他/她把这个话题聊深聊透 -- 这是很自然的事情,人回忆往事经常会跳跃,你要做的是陪伴和倾听""" - else: - if should_lighten_mood: - dynamic_guidance += "\n- 聊了一会儿了,可以适当轻松一下,聊点有趣的" - if should_switch_topic and empty_slots_readable: - if likely_new: - dynamic_guidance += f"\n- 若用户本轮**刚补充**新细节,请先就这一点追问一句,再自然转到未聊方向:{empty_slots_str}" - else: - dynamic_guidance += ( - f"\n- 这个话题聊得差不多了,可以自然转到:{empty_slots_str}" - ) - if should_try_new_stage and related_stages: - dynamic_guidance += ( - f"\n- 如果自然的话,可以尝试聊聊相关的话题,比如{related_stages_str}" - ) + progress_block = f"## 进度\n{progress_str}\n" if progress_str else "" + era_block = f"{era_line}\n" if era_line else "" - uncovered_hint = "" - if not user_jumped and uncovered_stages and should_try_new_stage: - uncovered_hint = f"\n- 还没聊到的人生阶段有:{'、'.join(uncovered_stages)},如果聊天中有自然的契机,可以轻轻带一句,但不要刻意" + return f"""你是「岁月知己」,像老朋友陪用户聊人生。短句为主,像微信聊天。{tone_line} - if user_jumped: - topic_desc = f"你们原本在聊「{current_stage_name}」,但用户自然地聊到了「{user_stage_name}」的内容" - else: - topic_desc = f"你们聊到了「{current_stage_name}」这个话题" - - profile_section = "" - if user_profile_context: - profile_section = f"\n## 用户基本信息\n{user_profile_context}\n" - - active_stage = ( - detected_user_stage if user_jumped and detected_user_stage else current_stage - ) - era_context = ( - _build_era_context(active_stage, user_profile_context) - if settings.chat_era_context_enabled - else "" - ) - - voice_block = get_background_voice_chat_block(background_voice) - voice_section = f"\n{voice_block}\n" if voice_block else "" - occ_hint = get_occupation_chat_hint(occupation, background_voice) - occ_section = f"\n{occ_hint}\n" if occ_hint else "" - intro_line = _guided_voice_intro_line(background_voice) - - prompt = f"""{intro_line} {topic_desc} -{reply_length_section} -{profile_section} -{voice_section}{occ_section} -## 本阶段已聊 + +{user_info_section}## 当前对话状态 +已聊: {filled_slots_str} -## 还可聊的方向 -{empty_slots_str} +还可聊的方向:{empty_slots_str} -## 进度 -{progress_str} -{era_context} -{memoir_orientation_block}{memory_section}{followup_trigger_block} -{tone_section} - -## 你要做的 -1. **先接住对方**——一句真诚回应,不要写成总结或讲评。 -2. 用户跳到别的人生阶段,跟着聊,别硬拉回。 -3. **最多追问一个**具体、好答的问题(参照上方「本轮追问判定」);无需追问时,只承接就好。 -4. 用户回「嗯」「对」之类,结合上文理解,承接或换个新角度,不要重复上一轮问过的事。 -5. 可用 [SPLIT] 分成**最多 2 条**消息。 -{dynamic_guidance}{uncovered_hint} +{progress_block}{era_block}{memory_section}## 你要做的 +- **先接住对方**——一句真诚回应,不要写成总结或讲评。 +- 你自己判断该追问还是只承接:有新线头就顺着问一个具体的事;情绪浓就好好接住、不必急着追问;明显闲聊就陪聊;用户只说「嗯」「对」则结合上文承接或换个角度。 +- 可以用「我能想象……」「那时候大概……」轻轻接话,但不可编造具体人名、时间、事件等你不知道的细节。 +- 不要重复上一轮问过的事;用户跳到别的人生阶段,跟着聊,别硬拉回。 +- 追问与承接服务于人生故事素材,但不要让对方觉得在走审问式流程;**最多**抛一个具体问题,也可以不追问。 +- 可用 [SPLIT] 分成**最多 2 条**消息。 ## 不要做的 {chat_output_rules()} 直接输出(仅自然口语,无 Markdown,无任何括号前缀或旁白):""" - return prompt + +__all__ = [ + "SLOT_NAME_MAP", + "get_guided_conversation_prompt", + "get_opening_prompt", +] diff --git a/api/app/agents/chat/prompts_profile.py b/api/app/agents/chat/prompts_profile.py index 14a1e31..ff1998a 100644 --- a/api/app/agents/chat/prompts_profile.py +++ b/api/app/agents/chat/prompts_profile.py @@ -88,13 +88,13 @@ def get_profile_extraction_prompt( 1. birth_year 填整数(四位数),如"65年出生"转为 1965 2. 如果用户在任一轮说过出生地/成长地/职业等,都要提取 3. 只提取明确提到的信息,不要猜测 -4. 如果没有提取到任何信息,返回空对象 {{}}""" +4. 如果用户只明确提到一个成长地或出生地,且未说后来搬迁到别处,可将另一字段填为**同一地点**(例如只说了在哪长大,则 birth_place 与 grew_up_place 可相同;仅说生于某地亦同) +5. 如果没有提取到任何信息,返回空对象 {{}}""" def get_profile_followup_prompt( missing_fields: List[str], filled_fields: Dict[str, str], - user_message: str, nickname: str = "", interview_stage_hint: str = "", ) -> str: @@ -119,9 +119,7 @@ def get_profile_followup_prompt( return f"""你是「岁月知己」。用户的基本信息已经收集完毕: {filled_str} -用户刚才说:"{user_message}" - -请对用户的回答做出温暖的回应,然后自然地过渡到人生故事的访谈。 +用户本轮消息在对话末尾。请对用户的回答做出温暖的回应,然后自然地过渡到人生故事的访谈。 可以说类似「了解了!那我们现在开始聊聊你的人生故事吧」这样的话;{stage_hint} **不要**默认只问童年,除非用户刚才聊的正是童年。 @@ -136,13 +134,17 @@ def get_profile_followup_prompt( ## 还需要了解 {missing_str} -用户刚才说:"{user_message}" +用户本轮原话在历史里(末尾 HumanMessage),勿在脑中丢开。 -请先对用户说的内容做出自然回应,然后**只**询问「还需要了解」中的信息(每次问 1-2 个)。 -语气要像朋友聊天一样自然亲切。 +## 你怎么说 +1. **先接住**:对用户说的内容做自然回应,像朋友在听。 +2. **话题优先**:若用户正在讲一段故事、回忆或情绪,**优先**顺着问一个与**当前话题**相关的具体小问题;不要为凑字段打断叙事。 +3. **资料穿插**:仅当用户本轮主要在确认、闲聊或话题与缺失资料完全无关时,再在末尾**温和插入 0~1 个**「还需要了解」里的问题。 +4. **轮换**:若上一轮你已就某一类资料追问过(见历史里助手发言),本轮**不要再问同一类**;改问其他缺失项,或本轮只承接、不提资料。 +5. 每次最多 **1~2 个**资料相关问点;能用推断就不要重复确认已知地/年。 严格禁止: -- **严禁再次询问「已知信息」中已列出的内容**(例如已知出生年份就绝不要再问哪年出生) +- **严禁再次询问「已知信息」中已列出的内容** - {chat_output_rules()} 回复格式:多条消息用 [SPLIT] 分隔。 diff --git a/api/app/agents/chat/stage_detection.py b/api/app/agents/chat/stage_detection.py index 47d47e0..93e9266 100644 --- a/api/app/agents/chat/stage_detection.py +++ b/api/app/agents/chat/stage_detection.py @@ -59,20 +59,15 @@ async def detect_primary_life_stage( user_message: str, current_stage: str, llm: Any, - *, - skip_llm: bool = False, ) -> str: """ 返回合法的人生阶段 key;失败时回退为 current_stage。 - skip_llm=True 时仅用关键词(短时/元话语等路径,不调阶段 LLM)。 + 每轮在启用时调用阶段检测 LLM(短句亦由模型判断,不用关键词替代)。 """ fb = normalize_chat_stage(current_stage, "childhood") if not settings.chat_stage_detection_enabled: return _keyword_fallback_stage(user_message, fb) - if skip_llm and settings.chat_stage_detection_skip_llm_on_insufficient_signal: - return _keyword_fallback_stage(user_message, fb) - if not llm: return _keyword_fallback_stage(user_message, fb) diff --git a/api/app/agents/chat/utterance_substance.py b/api/app/agents/chat/utterance_substance.py deleted file mode 100644 index 883165b..0000000 --- a/api/app/agents/chat/utterance_substance.py +++ /dev/null @@ -1,73 +0,0 @@ -""" -启发式判断访谈「本轮」是否值得跑阶段 LLM / 记忆检索等高成本步骤。 - -短答、应答词、元话语(谈整理回忆本身而非人生经历)为 False;长文本或中等长度非常用词为 True。 -与配置 `chat_substantive_*` 配合;关闭启发式时恒为 True。 -""" - -from __future__ import annotations - -import re -from typing import Final - -from app.core.config import settings - -# 极短应答(整句精确匹配) -_SHORT_ACK_EXACT: Final[frozenset[str]] = frozenset( - { - "嗯", - "对", - "好", - "是", - "行的", - "是的", - "没有", - "行", - "噢", - "哦", - "好吧", - "嗯嗯", - "对对", - "好嘞", - "对的", - "没了", - "可以", - "就这样", - "还行", - "还好", - } -) - -# 元话语:谈回忆过程/访谈本身,不足以切换人生阶段或拉记忆证据 -_META_PROCESS: Final[re.Pattern[str]] = re.compile( - r"(回忆|想起).{0,20}(细节|收获|快忘|忘的|很多东西)" - r"|(整理|聊聊|谈到).{0,8}(回忆|访谈|记录)" - r"|最大的收获", - re.UNICODE, -) - - -def should_run_chat_stage_memory_heavy_work(text: str) -> bool: - """ - True:值得调用阶段检测 LLM、记忆检索(向量等)。 - False:仅用关键词阶段回退、跳过记忆检索。 - """ - if not settings.chat_substantive_heuristic_enabled: - return True - s = (text or "").strip() - if not s: - return False - # 元话语可略长,须在「达到 min_chars」分支之前判断 - if _META_PROCESS.search(s): - return False - min_chars = int(settings.chat_substantive_min_chars) - if len(s) >= min_chars: - return True - if s in _SHORT_ACK_EXACT: - return False - if len(s) <= 4: - # 极短:多为语气/应答 - if all(ch in "嗯哦噢对对好好的没行是的不没一下的了呗嘛呀啊" for ch in s): - return False - # 偏短但未命中噪音规则:默认走完整路径;5 字常见为有信息短句(旧逻辑用 >=6 会误杀) - return len(s) >= 5 diff --git a/api/app/core/config.py b/api/app/core/config.py index be53825..20dfce8 100644 --- a/api/app/core/config.py +++ b/api/app/core/config.py @@ -60,21 +60,6 @@ class Settings(BaseSettings): chat_interview_max_tokens: int = 380 chat_interview_max_segments: int = 2 chat_interview_max_chars_per_segment: int = 260 - # 访谈:用户本轮极短输入时的更紧上限(见 interview_reply_length) - chat_interview_brief_max_tokens: int = Field(default=260, ge=64, le=2048) - chat_interview_brief_max_chars_per_segment: int = Field(default=200, ge=32, le=2000) - # 访谈:有新细节/情绪/长段时的展开上限 - chat_interview_expanded_max_tokens: int = Field(default=520, ge=64, le=4096) - chat_interview_expanded_max_chars_per_segment: int = Field( - default=380, ge=32, le=4000 - ) - # 干部/军队推断命中时,standard 档在分桶基础上小幅放宽(brief/expanded 不变) - chat_interview_cadre_military_standard_extra_tokens: int = Field( - default=40, ge=0, le=512 - ) - chat_interview_cadre_military_standard_extra_chars: int = Field( - default=40, ge=0, le=2000 - ) chat_opening_max_tokens: int = 256 chat_profile_followup_max_tokens: int = 280 # Redis 全量历史仅用于 turn 计数;注入 LLM 时截取最近若干轮与字符预算 @@ -84,20 +69,12 @@ class Settings(BaseSettings): # 访谈:每轮用 LLM 判定用户主人生阶段并更新 MemoirState.current_stage;False 时仅用关键词 chat_stage_detection_enabled: bool = True chat_stage_detection_max_tokens: int = 128 - # True:短句/应答/元话语本轮仅用关键词判阶段,不调阶段 LLM(见 utterance_substance) - chat_stage_detection_skip_llm_on_insufficient_signal: bool = True - # strip 后主文低于该长度时启用精细启发式;达到或超过则视为有足够信息走完整路径 - chat_substantive_min_chars: int = Field(default=12, ge=1, le=256) - # False:每轮都跑阶段/记忆高成本路径(忽略短时启发式) - chat_substantive_heuristic_enabled: bool = True # 访谈性格:default | warm_listener | curious_guide(未知值按 default) chat_interview_persona: str = "default" # 访谈:按用户本轮话检索记忆并注入 prompt(关则不调 MemoryService.retrieve) chat_memory_retrieval_enabled: bool = True chat_memory_top_k: int = Field(default=8, ge=1, le=30) chat_memory_evidence_max_chars: int = Field(default=4096, ge=256, le=50_000) - # True:短时/元话语等(见 utterance_substance)本轮不跑向量检索 - chat_memory_retrieval_require_substantive: bool = True # ── Memoir 叙事忠实度检查(FidelityCheckAgent)──────────────── memoir_fidelity_check_enabled: bool = True @@ -118,8 +95,8 @@ class Settings(BaseSettings): ) # True 且 mode=llm:仅语音/ASR 段走 LLM 纠错;键盘输入仅规则归一(省每轮 LLM) chat_input_normalize_llm_voice_only: bool = True - # 资料收集:短时/应答/元话语不调用资料字段抽取 LLM(仍生成 followup) - chat_profile_extract_require_substantive: bool = True + # 资料收集:超过该对话轮次(Redis 全量轮次计数)仍有缺失字段时,强制进入访谈,避免长期问卷感 + chat_profile_max_turns: int = Field(default=8, ge=1, le=500) # Memoir Phase1:多 segment 一批一次 LLM 完成抽取+章节分类(失败回退逐段);单段且关时仍逐段 memoir_phase1_batch_llm_enabled: bool = False diff --git a/api/app/features/evaluation/execution_service.py b/api/app/features/evaluation/execution_service.py index 204012b..fbc0150 100644 --- a/api/app/features/evaluation/execution_service.py +++ b/api/app/features/evaluation/execution_service.py @@ -51,6 +51,11 @@ def _utterances_for_case(case: EvalCase) -> list[str]: return [str(u).strip() for u in raw if str(u).strip()] +def _assistant_text_for_eval_display(raw: str) -> str: + """评审与 transcript 展示:避免字面量 [SPLIT] 干扰 judge 阅读。""" + return (raw or "").replace("[SPLIT]", "\n") + + async def execute_eval_run( db: AsyncSession, *, @@ -127,12 +132,14 @@ async def execute_eval_run( for i, u in enumerate(utterances): if i >= len(replies): break - transcript_parts.append(f"用户: {u}\nAI: {replies[i]}") + transcript_parts.append( + f"用户: {u}\nAI: {_assistant_text_for_eval_display(replies[i])}" + ) prior = "" for idx, u in enumerate(utterances): if idx >= len(replies): break - reply = replies[idx] + reply = _assistant_text_for_eval_display(replies[idx]) lat = latencies[idx] if idx < len(latencies) else None tj = await judge.judge_turn( prior_transcript=prior, @@ -146,7 +153,7 @@ async def execute_eval_run( run_id=run.id, turn_index=idx, user_utterance=u, - assistant_reply=reply, + assistant_reply=replies[idx], duration_ms=lat, judge_scores_json=scores, judge_rationale=rationale, diff --git a/api/app/features/evaluation/importers/user_export_markdown.py b/api/app/features/evaluation/importers/user_export_markdown.py index 92722cb..670590a 100644 --- a/api/app/features/evaluation/importers/user_export_markdown.py +++ b/api/app/features/evaluation/importers/user_export_markdown.py @@ -5,6 +5,11 @@ from __future__ import annotations import re +def _normalize_export_ai_block(body: str) -> str: + """多段助手回复中的 [SPLIT] 在 Markdown 导出中改为换行,便于阅读。""" + return (body or "").replace("[SPLIT]", "\n").strip() + + def extract_user_utterances_from_export_md(text: str) -> list[str]: """匹配 ``**用户:**`` 块之间的正文。""" out: list[str] = [] @@ -41,6 +46,6 @@ def extract_dialogue_turns_from_export_md(text: str) -> list[tuple[str, str]]: u = (user_m.group(1) or "").strip() if not u or u == "(空)": continue - a = ((ai_m.group(1) if ai_m else "") or "").strip() - out.append((u, a)) + raw_ai = ((ai_m.group(1) if ai_m else "") or "").strip() + out.append((u, _normalize_export_ai_block(raw_ai))) return out diff --git a/api/app/features/evaluation/judge_schemas.py b/api/app/features/evaluation/judge_schemas.py index 05ddac2..5f4eea4 100644 --- a/api/app/features/evaluation/judge_schemas.py +++ b/api/app/features/evaluation/judge_schemas.py @@ -6,14 +6,16 @@ from pydantic import BaseModel, Field class TurnJudgeOutput(BaseModel): - """单轮对话质量(情绪强化 rubric 子集 + 总分)。""" + """单轮对话质量(情绪 + 流畅度/重复抑制 + 总分)。""" total_score: float = Field(ge=0, le=100) emotion_score: float = Field(default=0, ge=0, le=30) - information_score: float = Field(default=0, ge=0, le=25) - structure_score: float = Field(default=0, ge=0, le=15) - question_score: float = Field(default=0, ge=0, le=15) - persona_score: float = Field(default=0, ge=0, le=15) + information_score: float = Field(default=0, ge=0, le=20) + structure_score: float = Field(default=0, ge=0, le=10) + question_score: float = Field(default=0, ge=0, le=10) + persona_score: float = Field(default=0, ge=0, le=10) + repetition_score: float = Field(default=0, ge=0, le=10) + naturalness_score: float = Field(default=0, ge=0, le=10) rationale: str = "" diff --git a/api/app/features/evaluation/rubrics/conversation_v1.py b/api/app/features/evaluation/rubrics/conversation_v1.py index 3adbaf8..f819eaa 100644 --- a/api/app/features/evaluation/rubrics/conversation_v1.py +++ b/api/app/features/evaluation/rubrics/conversation_v1.py @@ -1,20 +1,24 @@ """对话评审 rubric 文本(v1)。""" -TURN_JUDGE_INSTRUCTIONS = """你是「岁月留书」访谈对话质量评审。根据下面维度给本轮 AI 回复打分(0-100 为 total_score,各子分上限已注明,总和应合理)。 +TURN_JUDGE_INSTRUCTIONS = """你是「岁月留书」访谈对话质量评审。根据下面维度给本轮 AI 回复打分(0-100 为 total_score,各子分上限已注明,子分之和应与 total_score 大体一致)。 维度(参考): - 情绪承接与共情(emotion_score,最高 30) -- 信息获取与追问(information_score,最高 25) -- 结构化访谈推进(structure_score,最高 15) -- 提问质量(question_score,最高 15) -- 人物理解与一致性(persona_score,最高 15) +- 信息获取与追问(information_score,最高 20) +- 结构化访谈推进(structure_score,最高 10) +- 提问质量(question_score,最高 10) +- 人物理解与一致性(persona_score,最高 10) +- 重复抑制(repetition_score,最高 10):是否重复了上 1~2 轮已问过的问题或同一资料槽;高度重复则低分 +- 自然流畅(naturalness_score,最高 10):是否像朋友聊天;有无不必要采访腔、总结腔、流程感 输出 JSON:**json** 字段名如下: -total_score, emotion_score, information_score, structure_score, question_score, persona_score, rationale +total_score, emotion_score, information_score, structure_score, question_score, persona_score, repetition_score, naturalness_score, rationale 只输出 JSON。""" -CONV_JUDGE_INSTRUCTIONS = """你是访谈整段对话评审。给定完整 transcript(用户与 AI 多轮),打一个综合 total_score(0-100),并给出 dimension_scores 对象(可为空对象),以及 rationale。 +CONV_JUDGE_INSTRUCTIONS = """你是访谈整段对话评审。给定完整 transcript(用户与 AI 多轮),打一个综合 total_score(0-100)。 + +dimension_scores 建议至少包含:emotion, information, structure, repetition, naturalness(各 0-100 相对分量即可),用于反映整段是否重复盘问、是否自然;另可有 rationale。 只输出 JSON:total_score, dimension_scores, rationale。""" diff --git a/api/tests/fixtures/evaluation_golden_cases.json b/api/tests/fixtures/evaluation_golden_cases.json new file mode 100644 index 0000000..4343711 --- /dev/null +++ b/api/tests/fixtures/evaluation_golden_cases.json @@ -0,0 +1,39 @@ +{ + "description": "金标准用户轮次:导入 regression set 后通过 POST .../cases 创建 case(body.user_utterances 取对应数组)。用于 baseline/candidate 实验对比。", + "cases": [ + { + "title": "implicit_confirm_hometown", + "user_utterances": [ + "你好。", + "我生于1962年,长在湖南长沙,小时候就在那边读的小学。", + "嗯,是的。", + "后来六十年代末家里也没搬,一直在那边。" + ] + }, + { + "title": "emotional_long_memory", + "user_utterances": [ + "今天想说说我们村那棵老槐树。", + "日本鬼子在的时候,为了望风把高过墙头的树都砍了,村里人硬是想办法保住学校里那棵,怕得很又舍不得。", + "我现在一说起来心里还堵得慌,那时候老人们夜里都不敢大声说话。" + ] + }, + { + "title": "childhood_rich_no_job_yet", + "user_utterances": [ + "我生在密云的一个村里,河边小时候常去玩。", + "那条河老早就改过道,我们村边上那段现在叫沙河了,明代就改过。", + "小学是庙改的,院子里有棵楸树,我们班男生都抱不过来。", + "操场北边是沙坨子,我们小孩爱往上爬,一脚深一脚浅的。" + ] + }, + { + "title": "interview_brief_ack", + "user_utterances": [ + "我那年冬天第一次见李老师,她借我一件棉袄穿。", + "嗯对,就是那件蓝布的。", + "后来她调到县城了,我再也没见过她。" + ] + } + ] +} diff --git a/api/tests/test_background_voice.py b/api/tests/test_background_voice.py index 8062362..eedb60b 100644 --- a/api/tests/test_background_voice.py +++ b/api/tests/test_background_voice.py @@ -1,8 +1,8 @@ """职业文本推断 background_voice(干部/军队)。""" from app.agents.chat.background_voice import ( - get_background_voice_chat_block, get_background_voice_narrative_block, + get_background_voice_tone_hint, infer_background_voice, normalize_background_voice, ) @@ -42,10 +42,13 @@ def test_narrative_editor_system_prompt_appends_voice() -> None: assert "背景文体(军队" in mil -def test_cadre_military_blocks_include_retirement_context() -> None: - chat_c = get_background_voice_chat_block("cadre") - chat_m = get_background_voice_chat_block("military") +def test_cadre_military_tone_hints_and_narrative_retirement_context() -> None: + chat_c = get_background_voice_tone_hint("cadre") + chat_m = get_background_voice_tone_hint("military") + assert chat_c and chat_m + assert "稳重" in chat_c or "分寸" in chat_c + assert "简洁" in chat_m or "利落" in chat_m narr_c = get_background_voice_narrative_block("cadre") narr_m = get_background_voice_narrative_block("military") - assert "退休" in chat_c and "退休" in narr_c - assert "退役" in chat_m and "退役" in narr_m + assert "退休" in narr_c + assert "退役" in narr_m diff --git a/api/tests/test_chat_stage_detection_gates.py b/api/tests/test_chat_stage_detection_gates.py index ba9c35c..2ccd8af 100644 --- a/api/tests/test_chat_stage_detection_gates.py +++ b/api/tests/test_chat_stage_detection_gates.py @@ -1,4 +1,4 @@ -"""阶段检测:skip_llm 路径不调 LLM。""" +"""阶段检测:启用时每轮调用 LLM(含短句)。""" from unittest.mock import MagicMock @@ -9,7 +9,9 @@ from app.agents.chat.stage_detection import detect_primary_life_stage @pytest.mark.asyncio -async def test_skip_llm_does_not_call_json_llm(monkeypatch: pytest.MonkeyPatch) -> None: +async def test_short_message_still_calls_stage_llm( + monkeypatch: pytest.MonkeyPatch, +) -> None: called: list[int] = [] async def _fake_allm(*_a: object, **_k: object) -> StageDetectionOutput: @@ -20,11 +22,6 @@ async def test_skip_llm_does_not_call_json_llm(monkeypatch: pytest.MonkeyPatch) "app.agents.chat.stage_detection.settings.chat_stage_detection_enabled", True, ) - monkeypatch.setattr( - "app.agents.chat.stage_detection.settings." - "chat_stage_detection_skip_llm_on_insufficient_signal", - True, - ) monkeypatch.setattr( "app.agents.chat.stage_detection.allm_json_call", _fake_allm, @@ -33,7 +30,6 @@ async def test_skip_llm_does_not_call_json_llm(monkeypatch: pytest.MonkeyPatch) "嗯", "childhood", MagicMock(), - skip_llm=True, ) - assert not called - assert out == "childhood" + assert called == [1] + assert out == "career" diff --git a/api/tests/test_experience_regressions.py b/api/tests/test_experience_regressions.py index 4f186ce..d1eb341 100644 --- a/api/tests/test_experience_regressions.py +++ b/api/tests/test_experience_regressions.py @@ -1,19 +1,9 @@ -"""面向体验的回归测试:保护"聊得下去"与"回忆录有文笔"两个核心目标。 +"""面向体验的回归测试:保护「聊得下去」与回忆录文笔两个核心目标。 -与 test_interview_prompts / test_interview_reply_length 不同,这组测试不验证字面规则, -而是验证体验目标的必要条件是否成立。改 agent 后如果这里挂了,说明体验方向可能在退步。 +与 test_interview_prompts 不同,这组测试不绑定已删除的启发式分档; +访谈侧仅验证 prompt 仍包含关键行为指引。 """ -from types import SimpleNamespace - -import pytest - -from app.agents.chat.interview_reply_length import ( - ReplyLengthMode, - compute_reply_plan, - heuristic_likely_emotional, - heuristic_likely_new_detail, -) from app.agents.chat.prompts_conversation import ( get_guided_conversation_prompt, get_opening_prompt, @@ -26,134 +16,59 @@ from app.agents.memoir.prompts import ( from app.features.memoir import story_pipeline_sync as sps -def _fake_settings(**overrides: object) -> SimpleNamespace: - base = { - "chat_interview_max_tokens": 380, - "chat_interview_max_segments": 2, - "chat_interview_max_chars_per_segment": 260, - "chat_interview_brief_max_tokens": 260, - "chat_interview_brief_max_chars_per_segment": 200, - "chat_interview_expanded_max_tokens": 520, - "chat_interview_expanded_max_chars_per_segment": 380, - } - base.update(overrides) - return SimpleNamespace(**base) - - -# ── 聊天体验回归 ────────────────────────────────────────────────── - - class TestChatExperienceRegressions: - """保护"聊得下去"体验。""" + """保护「聊得下去」体验。""" - def test_emotional_short_message_not_brief(self) -> None: - """用户表达强情绪时不应压成 brief,要给模型足够空间承接情绪。""" - p = compute_reply_plan( - "我妈走了以后,我真的很难过", - background_voice=None, - settings=_fake_settings(), - ) - assert p.mode != ReplyLengthMode.brief - assert heuristic_likely_emotional("我妈走了以后,我真的很难过") is True - - def test_emotional_medium_message_gets_expanded(self) -> None: - """中等长度且有情绪的消息应该给 expanded 档位,让模型有空间好好共情。""" - msg = "那年我奶奶去世的时候,我在外地上学,没来得及见最后一面,到现在想起来还是特别难过" - assert len(msg) >= 40 - p = compute_reply_plan(msg, background_voice=None, settings=_fake_settings()) - assert p.mode == ReplyLengthMode.expanded - - def test_new_detail_triggers_followup_hint_in_prompt(self) -> None: - """用户提到新人名/新关系时,prompt 应明确要求追问(而不是只感慨)。""" + def test_guided_prompt_encourages_flexible_followup(self) -> None: + """模型自主判断追问 vs 承接,不应再出现「本轮判定」硬分支文案。""" p = get_guided_conversation_prompt( current_stage="childhood", empty_slots=["place", "people"], filled_slots={}, - user_message="那个女生叫小芳,是我同桌", - conversation_turn_total=2, - same_topic_turns=2, - all_stages_coverage=None, detected_user_stage="childhood", user_profile_context="", persona="default", ) - assert "本轮判定" in p - assert "追问" in p + assert "本轮追问判定" not in p + assert "你自己判断" in p or "该追问" in p - def test_emotional_prompt_prioritizes_empathy(self) -> None: - """用户情绪浓时 prompt 应出现情绪承接优先的提示。""" - p = get_guided_conversation_prompt( - current_stage="family", - empty_slots=["relationship"], - filled_slots={}, - user_message="想起我妈,心酸", - conversation_turn_total=3, - same_topic_turns=1, - all_stages_coverage=None, - detected_user_stage="family", - user_profile_context="", - persona="default", - ) - assert "情绪" in p - - def test_chit_chat_does_not_force_memoir_question(self) -> None: - """闲聊时 prompt 不应强行追问回忆录问题。""" - p = get_guided_conversation_prompt( - current_stage="childhood", - empty_slots=["place"], - filled_slots={}, - user_message="今天天气真好哈哈", - conversation_turn_total=0, - same_topic_turns=0, - all_stages_coverage=None, - detected_user_stage="childhood", - user_profile_context="", - persona="default", - ) - assert "偏闲聊" in p - assert "陪聊" in p - - def test_topic_switch_not_triggered_at_3_turns(self) -> None: - """聊了 3 轮同话题不应该就要换——用户可能还想继续。""" + def test_guided_prompt_topic_switch_not_hardcoded_in_prompt(self) -> None: p = get_guided_conversation_prompt( current_stage="childhood", empty_slots=["place", "people", "emotion"], filled_slots={"daily_life": "放学后去河边玩"}, - user_message="对啊,那条河特别浅", - conversation_turn_total=4, - same_topic_turns=3, - all_stages_coverage=None, detected_user_stage="childhood", user_profile_context="", persona="default", ) assert "聊得差不多了" not in p - def test_prompt_intro_mentions_empathy_first(self) -> None: - """prompt 开头应强调"先接住对方"而不是"控制字数"。""" + def test_guided_prompt_intro_mentions_connect_first(self) -> None: p = get_guided_conversation_prompt( current_stage="childhood", empty_slots=["place"], filled_slots={}, - user_message="小时候家里穷", - conversation_turn_total=0, - same_topic_turns=0, - all_stages_coverage=None, detected_user_stage="childhood", user_profile_context="", persona="default", ) assert "接住" in p - -# ── 回忆录文风回归 ────────────────────────────────────────────────── + def test_opening_prompt_stays_short_task_shape(self) -> None: + p = get_opening_prompt( + current_stage="childhood", + empty_slots_readable=["成长的地方"], + user_profile_context="", + persona="default", + ) + assert "问候" in p + assert "任务" in p or "具体问题" in p class TestMemoirStyleRegressions: - """保护"回忆录有文笔"体验。""" + """保护「回忆录有文笔」体验。""" def test_title_prompt_allows_literary_expression(self) -> None: - """标题 prompt 不应禁止一切文学性表达——只禁止虚构。""" prompt = get_creative_title_json_prompt( stage="childhood", emotion="warm", @@ -163,7 +78,6 @@ class TestMemoirStyleRegressions: assert "平实" not in prompt.lower() def test_title_prompt_uses_facts_only_not_plain(self) -> None: - """标题 prompt 应该走 facts_only(允许文采),而不是 plain(要求平实)。""" prompt = get_creative_title_json_prompt( stage="childhood", emotion="warm", @@ -172,13 +86,11 @@ class TestMemoirStyleRegressions: assert "优雅" in prompt or "书面语" in prompt or "文采" in prompt def test_narrative_prompt_encourages_literary_quality(self) -> None: - """叙事 prompt 应该鼓励"有温度"的书面语,不只是"清楚记事"。""" sys_prompt = get_narrative_editor_system_prompt() assert "温度" in sys_prompt or "优雅" in sys_prompt assert "画面感" in sys_prompt or "生动" in sys_prompt def test_narrative_json_prompt_allows_emotion_rendering(self) -> None: - """叙事 JSON prompt 应允许情感渲染(不新增事实前提下)。""" prompt = get_narrative_json_prompt( stage="childhood", slots={"turning_event": "爷爷背我过河"}, @@ -187,7 +99,6 @@ class TestMemoirStyleRegressions: assert "文采服务于真实" in prompt or "虚构描写" in prompt def test_merge_shrink_only_on_extreme_loss(self) -> None: - """合并场景只有在极端缩水时才触发 fallback,不因正常重组而退回。""" existing = "这是一段已有的故事正文,讲述了童年在河边的回忆。" * 20 assert len(existing) > 400 half_content = existing[: len(existing) // 2] diff --git a/api/tests/test_interview_prompts.py b/api/tests/test_interview_prompts.py index 9762ab0..cfe21c6 100644 --- a/api/tests/test_interview_prompts.py +++ b/api/tests/test_interview_prompts.py @@ -1,4 +1,4 @@ -"""访谈提示词:追问触发与性格(Persona)拼接回归。""" +"""访谈提示词:精简结构与人格/语气融合回归。""" from langchain_core.messages import AIMessage, HumanMessage, SystemMessage @@ -10,37 +10,51 @@ from app.agents.chat.prompts_conversation import ( ) -def test_guided_prompt_contains_mandatory_followup_when_heuristic_matches(): - p = get_guided_conversation_prompt( - current_stage="childhood", - empty_slots=["place", "people"], - filled_slots={}, - user_message="厉害吧 那个女生叫娟娟", - conversation_turn_total=1, - same_topic_turns=1, - all_stages_coverage=None, - detected_user_stage="childhood", - user_profile_context="", - persona="default", - ) - assert "本轮追问判定" in p - assert "本轮判定" in p - - def test_guided_prompt_does_not_embed_raw_user_message_in_system_text(): p = get_guided_conversation_prompt( current_stage="childhood", empty_slots=["place"], filled_slots={}, - user_message="__USER_SECRET_PHRASE_XYZ__", - conversation_turn_total=0, - same_topic_turns=0, - all_stages_coverage=None, detected_user_stage="childhood", user_profile_context="", persona="default", ) assert "__USER_SECRET_PHRASE_XYZ__" not in p + # Signature no longer takes user_message; secret would only leak via profile + p2 = get_guided_conversation_prompt( + current_stage="childhood", + empty_slots=["place"], + filled_slots={}, + detected_user_stage="childhood", + user_profile_context="__USER_SECRET_PROFILE__", + persona="default", + ) + assert "__USER_SECRET_PROFILE__" in p2 + + +def test_guided_prompt_mentions_empathy_and_self_judgment(): + p = get_guided_conversation_prompt( + current_stage="childhood", + empty_slots=["place"], + filled_slots={}, + detected_user_stage="childhood", + user_profile_context="", + persona="default", + ) + assert "接住对方" in p + assert "你自己判断" in p or "该追问" in p + + +def test_guided_prompt_persona_tone_warm_listener(): + p = get_guided_conversation_prompt( + current_stage="education", + empty_slots=["school"], + filled_slots={}, + detected_user_stage="education", + user_profile_context="", + persona="warm_listener", + ) + assert "倾听" in p or "柔和" in p def test_guided_prompt_persona_curious_guide(): @@ -48,15 +62,11 @@ def test_guided_prompt_persona_curious_guide(): current_stage="education", empty_slots=["school"], filled_slots={}, - user_message="还行吧", - conversation_turn_total=0, - same_topic_turns=0, - all_stages_coverage=None, detected_user_stage="education", user_profile_context="", persona="curious_guide", ) - assert "好奇引导" in p + assert "细节" in p def test_normalize_interview_persona_unknown_falls_back(): @@ -64,32 +74,11 @@ def test_normalize_interview_persona_unknown_falls_back(): assert normalize_interview_persona("") == "default" -def test_guided_prompt_contains_memoir_orientation(): - p = get_guided_conversation_prompt( - current_stage="childhood", - empty_slots=["place"], - filled_slots={}, - user_message="后来我就去上班了", - conversation_turn_total=0, - same_topic_turns=0, - all_stages_coverage=None, - detected_user_stage="childhood", - user_profile_context="", - persona="default", - ) - assert "对话方向" in p - assert "人生故事" in p - - def test_guided_prompt_contains_memory_section_when_evidence(): p = get_guided_conversation_prompt( current_stage="childhood", empty_slots=["place"], filled_slots={}, - user_message="后来我就去上班了", - conversation_turn_total=0, - same_topic_turns=0, - all_stages_coverage=None, detected_user_stage="childhood", user_profile_context="", persona="default", @@ -100,75 +89,17 @@ def test_guided_prompt_contains_memory_section_when_evidence(): assert "1990年生于上海" in p -def test_guided_prompt_chit_chat_hint(): +def test_guided_prompt_military_tone_in_system(): p = get_guided_conversation_prompt( current_stage="childhood", empty_slots=["place"], filled_slots={}, - user_message="今天天气真好哈哈", - conversation_turn_total=0, - same_topic_turns=0, - all_stages_coverage=None, - detected_user_stage="childhood", - user_profile_context="", - persona="default", - ) - assert "偏闲聊" in p - - -def test_guided_prompt_reply_length_section_explicit_expanded(): - p = get_guided_conversation_prompt( - current_stage="childhood", - empty_slots=["place"], - filled_slots={}, - user_message="还行吧", - conversation_turn_total=0, - same_topic_turns=0, - all_stages_coverage=None, - detected_user_stage="childhood", - user_profile_context="", - persona="default", - reply_length_mode="expanded", - ) - assert "本轮回复长度" in p - assert "当前档位:expanded" in p - assert "expanded" in p - - -def test_guided_prompt_reply_length_explicit_brief(): - """档位由 Agent 的 ReplyPlan 传入,prompt 不再自行推导。""" - p = get_guided_conversation_prompt( - current_stage="childhood", - empty_slots=["place"], - filled_slots={}, - user_message="嗯", - conversation_turn_total=0, - same_topic_turns=0, - all_stages_coverage=None, - detected_user_stage="childhood", - user_profile_context="", - persona="default", - reply_length_mode="brief", - ) - assert "当前档位:brief" in p - - -def test_guided_prompt_background_voice_military() -> None: - p = get_guided_conversation_prompt( - current_stage="childhood", - empty_slots=["place"], - filled_slots={}, - user_message="后来我就去上班了", - conversation_turn_total=0, - same_topic_turns=0, - all_stages_coverage=None, detected_user_stage="childhood", user_profile_context="", persona="default", background_voice="military", ) - assert "背景语气:军队语境" in p - assert "先接住对方" in p + assert "简洁" in p or "利落" in p or "得体" in p def test_opening_prompt_military_has_examples_note() -> None: @@ -180,11 +111,9 @@ def test_opening_prompt_military_has_examples_note() -> None: background_voice="military", ) assert "军队语境" in p - assert "(军队语境:简洁" in p or "军队语境" in p def test_format_history_string_includes_system_for_debug_logs() -> None: - """log_agent_payload 依赖本函数时需包含 System,避免生产上丢失主 system prompt。""" s = format_history_string( [ SystemMessage(content="SYS_INSTRUCTIONS"), diff --git a/api/tests/test_interview_reply_length.py b/api/tests/test_interview_reply_length.py deleted file mode 100644 index f6a5356..0000000 --- a/api/tests/test_interview_reply_length.py +++ /dev/null @@ -1,207 +0,0 @@ -"""访谈回复长度策略:分桶与 InterviewAgent 的 max_tokens / 截断联动。""" - -from types import SimpleNamespace -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest - -from app.agents.chat.helpers import HistoryWithWindow -from app.agents.chat.interview_reply_length import ( - ReplyLengthMode, - bump_reply_plan_for_background_voice, - compute_reply_plan, -) -from app.agents.state_schema import MemoirStateSchema - - -def _fake_settings(**overrides: object) -> SimpleNamespace: - base = { - "chat_interview_max_tokens": 380, - "chat_interview_max_segments": 2, - "chat_interview_max_chars_per_segment": 260, - "chat_interview_brief_max_tokens": 260, - "chat_interview_brief_max_chars_per_segment": 200, - "chat_interview_expanded_max_tokens": 520, - "chat_interview_expanded_max_chars_per_segment": 380, - } - base.update(overrides) - return SimpleNamespace(**base) - - -def test_strategy_brief_when_very_short() -> None: - s = compute_reply_plan( - "x" * 5, - background_voice=None, - settings=_fake_settings(), - ) - assert s.mode == ReplyLengthMode.brief - assert s.max_tokens == 260 - assert s.max_chars_per_segment == 200 - - -def test_strategy_standard_mid_length() -> None: - s = compute_reply_plan( - "x" * 50, - background_voice=None, - settings=_fake_settings(), - ) - assert s.mode == ReplyLengthMode.standard - assert s.max_tokens == 380 - assert s.max_chars_per_segment == 260 - - -def test_strategy_long_chit_stays_standard() -> None: - msg = "今天天气真好哈哈" * 11 - assert len(msg) >= 80 - s = compute_reply_plan( - msg, - background_voice=None, - settings=_fake_settings(), - ) - assert s.mode == ReplyLengthMode.standard - assert s.max_tokens == 380 - - -def test_strategy_long_with_new_detail_expanded() -> None: - base = "第一次认识他" - msg = (base + "x" * 200)[:120] - assert len(msg) == 120 - s = compute_reply_plan( - msg, - background_voice=None, - settings=_fake_settings(), - ) - assert s.mode == ReplyLengthMode.expanded - assert s.max_tokens == 520 - assert s.max_chars_per_segment == 380 - - -def test_strategy_boundary_len_20_brief_len_21_standard() -> None: - a = compute_reply_plan( - "x" * 20, - background_voice=None, - settings=_fake_settings(), - ) - b = compute_reply_plan( - "x" * 21, - background_voice=None, - settings=_fake_settings(), - ) - assert a.mode == ReplyLengthMode.brief - assert b.mode == ReplyLengthMode.standard - - -def test_bump_standard_only_for_cadre_military() -> None: - s0 = compute_reply_plan( - "x" * 50, - background_voice=None, - settings=_fake_settings(), - ) - bumped = bump_reply_plan_for_background_voice( - s0, - background_voice="cadre", - settings=_fake_settings( - chat_interview_cadre_military_standard_extra_tokens=40, - chat_interview_cadre_military_standard_extra_chars=40, - ), - ) - assert bumped.max_tokens == s0.max_tokens + 40 - assert bumped.max_chars_per_segment == s0.max_chars_per_segment + 40 - - brief = compute_reply_plan( - "x" * 5, - background_voice=None, - settings=_fake_settings( - chat_interview_cadre_military_standard_extra_tokens=40, - chat_interview_cadre_military_standard_extra_chars=40, - ), - ) - same = bump_reply_plan_for_background_voice( - brief, - background_voice="military", - settings=_fake_settings( - chat_interview_cadre_military_standard_extra_tokens=40, - chat_interview_cadre_military_standard_extra_chars=40, - ), - ) - assert same.max_tokens == brief.max_tokens - - -def test_plan_short_information_rich_is_standard_not_brief() -> None: - """短句但含高密度锚点(如「那年」「我爸」)→ standard,避免误压成 brief。""" - p = compute_reply_plan( - "那年我爸突然病了", - background_voice=None, - settings=_fake_settings(), - ) - assert p.mode == ReplyLengthMode.standard - assert p.information_rich is True - - -def test_plan_long_chit_stays_standard_not_expanded() -> None: - """长段明显闲聊 → standard,不因字数进入 expanded。""" - msg = "今天天气真好哈哈" * 11 - assert len(msg) >= 80 - p = compute_reply_plan( - msg, - background_voice=None, - settings=_fake_settings(), - ) - assert p.mode == ReplyLengthMode.standard - assert p.likely_chit_chat is True - - -def test_strategy_boundary_len_79_standard_len_80_long_branch() -> None: - a = compute_reply_plan( - "x" * 79, - background_voice=None, - settings=_fake_settings(), - ) - b = compute_reply_plan( - "x" * 80, - background_voice=None, - settings=_fake_settings(), - ) - assert a.mode == ReplyLengthMode.standard - assert b.mode == ReplyLengthMode.standard - - -@pytest.mark.asyncio -async def test_interview_agent_passes_strategy_to_bind_and_truncate() -> None: - """同一套 strategy 用于 llm.bind(max_tokens=) 与 truncate_chat_segments。""" - from app.agents.chat import interview_agent as ia - - mock_llm = MagicMock() - mock_bound = MagicMock() - mock_bound.ainvoke = AsyncMock( - return_value=MagicMock(content="你好,后来呢?[SPLIT]还有吗?") - ) - mock_llm.bind = MagicMock(return_value=mock_bound) - - agent = ia.InterviewAgent() - agent.llm = mock_llm - - state = MemoirStateSchema( - stage_order=["childhood"], - current_stage="childhood", - covered_stages=[], - slots={"childhood": {}}, - ) - - with patch( - "app.agents.chat.interview_agent.get_history_with_window", - new=AsyncMock(return_value=HistoryWithWindow(turn_total=0, window=[])), - ): - turn = await agent.generate_response_with_state( - conversation_id="c1", - user_message="x" * 100 + "第一次认识他", - memoir_state=state, - ) - - mock_llm.bind.assert_called_once() - call_kw = mock_llm.bind.call_args[1] - assert call_kw["max_tokens"] == 520 - - assert len(turn.messages) >= 1 - for seg in turn.messages: - assert len(seg) <= 380 diff --git a/api/tests/test_user_export_markdown_split.py b/api/tests/test_user_export_markdown_split.py new file mode 100644 index 0000000..20f41c3 --- /dev/null +++ b/api/tests/test_user_export_markdown_split.py @@ -0,0 +1,24 @@ +"""导出 Markdown 中 AI 块的 [SPLIT] 规范化。""" + +from app.features.evaluation.importers.user_export_markdown import ( + extract_dialogue_turns_from_export_md, +) + + +def test_extract_dialogue_turns_replaces_split_in_ai(): + md = """ +#### 轮次 1 + +**用户:** +你好。 + +**AI:** +第一段[SPLIT]第二段 +""" + turns = extract_dialogue_turns_from_export_md(md) + assert len(turns) == 1 + u, a = turns[0] + assert u == "你好。" + assert "[SPLIT]" not in a + assert "第一段" in a and "第二段" in a + assert "\n" in a diff --git a/api/tests/test_utterance_substance.py b/api/tests/test_utterance_substance.py deleted file mode 100644 index 77820c9..0000000 --- a/api/tests/test_utterance_substance.py +++ /dev/null @@ -1,46 +0,0 @@ -"""访谈轮次「实质内容」启发式(阶段 LLM / 记忆检索门控)。""" - -import pytest - -from app.agents.chat import utterance_substance as us - - -@pytest.fixture -def heuristic_on(monkeypatch: pytest.MonkeyPatch) -> None: - monkeypatch.setattr( - "app.agents.chat.utterance_substance.settings.chat_substantive_heuristic_enabled", - True, - ) - monkeypatch.setattr( - "app.agents.chat.utterance_substance.settings.chat_substantive_min_chars", - 12, - ) - - -def test_substantive_long_sentence(heuristic_on: None) -> None: - assert us.should_run_chat_stage_memory_heavy_work( - "我在下乡插队时住在生产队仓库里,印象最深的是冬天的早晨。" - ) - - -def test_non_substantive_ack(heuristic_on: None) -> None: - assert not us.should_run_chat_stage_memory_heavy_work("嗯") - assert not us.should_run_chat_stage_memory_heavy_work("对对") - - -def test_non_substantive_meta_process(heuristic_on: None) -> None: - assert not us.should_run_chat_stage_memory_heavy_work("我回忆起了许多快忘的细节") - - -def test_five_char_short_substantive_not_skipped(heuristic_on: None) -> None: - """五字短句未命中应答/元话语时不应被当成非实质(评审:旧 >=6 会误杀)。""" - assert len("我进了工厂") == 5 - assert us.should_run_chat_stage_memory_heavy_work("我进了工厂") - - -def test_heuristic_disabled_always_true(monkeypatch: pytest.MonkeyPatch) -> None: - monkeypatch.setattr( - "app.agents.chat.utterance_substance.settings.chat_substantive_heuristic_enabled", - False, - ) - assert us.should_run_chat_stage_memory_heavy_work("嗯")