feat: 回忆录证据血缘与内部评测可追溯,顺带对齐本地评测台与 CI

数据库与模型:新增多版迁移(章节证据快照、对话血缘、记忆事实/时间线 lineage 等),把「成稿 ↔ 对话/记忆」的溯源信息落到表结构里。
业务链路:会话与 WS、回忆录/故事流水线、记忆写入与 enrichment 等跟着接上线索与快照;新增章节证据快照与评测侧 EvalTraceService 等模块,方便组评审用的证据包。
内部评测:自动化 run 与手工 memoir 评审共用可追溯证据;rubric/ judge 相关脚本与文档有配套调整。
app-eval-web:Memoir/实验详情里能展开看证据摘要与 evidence_trace(含对话轮次 id);Vite 代理与 development.sh 注入的 API 端口与当前默认内部评测端口一致,避免改端口后页面连错服务。
工程杂项:GitHub Actions / 仓库说明有更新;各适配器与支付/配额/plan 等多处为小改动或跟随主改动的收尾;新增/扩充了?
This commit is contained in:
Kevin
2026-04-08 15:37:09 +08:00
parent 6772e1269c
commit 309a051038
109 changed files with 4125 additions and 858 deletions

View File

@@ -3,7 +3,7 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import List
from typing import Any, List
@dataclass(frozen=True)
@@ -12,3 +12,4 @@ class AgentChatTurn:
messages: List[str]
skip_tts: bool = False
memory_retrieval_trace: dict[str, Any] | None = None

View File

@@ -12,17 +12,17 @@ from app.agents.chat.agent_turn import AgentChatTurn
from app.agents.chat.helpers import format_history_string, get_history_with_window
from app.agents.chat.personas import normalize_interview_persona
from app.agents.chat.prompt_context import ChatPromptContext
from app.agents.chat.stage_detection import keyword_fallback_primary_stage
from app.agents.chat.prompts_conversation import (
SLOT_NAME_MAP,
get_opening_prompt,
)
from app.agents.state_schema import MemoirStateSchema
from app.agents.chat.reply_limits import (
nonempty_segments_or_fallback,
segments_from_llm_response,
truncate_chat_segments,
)
from app.agents.chat.stage_detection import keyword_fallback_primary_stage
from app.agents.state_schema import MemoirStateSchema
from app.core.agent_logging import (
agent_span,
log_agent_payload,
@@ -92,6 +92,8 @@ class InterviewAgent:
background_voice: str = "default",
normalized_user_message: Optional[str] = None,
occupation: str = "",
profile_birth_year: int | None = None,
profile_era_place: str = "",
) -> AgentChatTurn:
"""生成状态感知的访谈回复,不持久化(由 Orchestrator 负责)"""
if not self.llm:
@@ -136,6 +138,8 @@ class InterviewAgent:
memory_evidence_text=memory_evidence_text,
background_voice=background_voice,
occupation=occupation,
profile_birth_year=profile_birth_year,
profile_era_place=profile_era_place,
)
system_prompt = ctx.guided_system_prompt()
messages: List[Any] = [SystemMessage(content=system_prompt)]

View File

@@ -13,15 +13,15 @@ from app.agents.chat.agent_turn import AgentChatTurn
from app.agents.chat.helpers import get_history_with_window
from app.agents.chat.interview_agent import InterviewAgent
from app.agents.chat.profile_agent import ProfileAgent
from app.agents.state_schema import MemoirStateSchema
from app.core.agent_logging import agent_summary_enabled, log_agent_detail
from app.core.logging import get_logger
from app.agents.chat.stage_detection import (
detect_primary_life_stage,
life_stage_display_name,
)
from app.agents.state_schema import MemoirStateSchema
from app.core.agent_logging import agent_summary_enabled, log_agent_detail
from app.core.config import settings
from app.core.dependencies import get_llm_provider
from app.core.logging import get_logger
from app.features.conversation.input_normalize import normalize_chat_input_for_agent
from app.features.memoir.state_service import get_or_create_state, switch_stage
@@ -48,28 +48,35 @@ async def _fetch_interview_memory_evidence(
db: AsyncSession,
user_id: str,
user_message: str,
) -> str:
"""按本轮用户话检索记忆格式化短文本;失败或未启用时返回空串"""
) -> tuple[str, dict | None]:
"""按本轮用户话检索记忆格式化短文本 + 可入库 trace稳定 id"""
from app.core.dependencies import get_embedding_provider
from app.features.memory.evidence_format import format_evidence_chunks_for_prompt
from app.features.memory.retrieval_trace import (
chat_memory_retrieval_trace_from_bundle,
)
from app.features.memory.service import MemoryService
if not settings.chat_memory_retrieval_enabled:
logger.debug(
"event=chat_memory_retrieval_skip reason=disabled user_id={}", user_id
)
return ""
return "", None
msg = (user_message or "").strip()
if not msg:
logger.debug(
"event=chat_memory_retrieval_skip reason=empty user_id={}", user_id
)
return ""
return "", None
try:
emb = get_embedding_provider()
ms = MemoryService(db, embedding_provider=emb)
bundle = await ms.retrieve(user_id, msg, top_k=settings.chat_memory_top_k)
top_k = settings.chat_memory_top_k
bundle = await ms.retrieve(user_id, msg, top_k=top_k)
bd = bundle.model_dump()
trace = chat_memory_retrieval_trace_from_bundle(
bd, top_k=top_k, query_len=len(msg)
)
text = format_evidence_chunks_for_prompt(bd)
t = (text or "").strip()
if not t:
@@ -77,7 +84,7 @@ async def _fetch_interview_memory_evidence(
"event=memory_evidence_for_prompt user_id={} formatted_chars=0",
user_id,
)
return ""
return "", trace
max_c = settings.chat_memory_evidence_max_chars
if len(t) > max_c:
t = t[: max_c - 3] + "..."
@@ -86,14 +93,14 @@ async def _fetch_interview_memory_evidence(
user_id,
len(t),
)
return t
return t, trace
except Exception as e:
try:
await db.rollback()
except Exception as rollback_error:
logger.warning("访谈记忆检索失败后回滚也失败: {}", rollback_error)
logger.warning("访谈记忆检索失败: {}", e)
return ""
return "", None
class ChatOrchestrator:
@@ -197,12 +204,15 @@ class ChatOrchestrator:
conversation_id,
len(responses),
)
return AgentChatTurn(messages=responses, skip_tts=False)
return AgentChatTurn(
messages=responses, skip_tts=False, memory_retrieval_trace=None
)
except Exception as e:
logger.error(f"资料收集处理失败: {e}", exc_info=True)
return AgentChatTurn(
messages=["不好意思刚才没接住,你再说一遍好吗?"],
skip_tts=False,
memory_retrieval_trace=None,
)
# --- 正式访谈模式 ---
@@ -262,10 +272,17 @@ class ChatOrchestrator:
background_voice = infer_background_voice(user.occupation)
occupation = user.occupation or ""
memory_evidence_text = await _fetch_interview_memory_evidence(
memory_evidence_text, mem_trace = await _fetch_interview_memory_evidence(
db, user_id, normalized_user_message
)
profile_birth_year = user.birth_year if user else None
profile_era_place = ""
if user:
profile_era_place = (
(user.birth_place or user.grew_up_place or "").strip()
)
turn = await self.interview_agent.generate_response_with_state(
conversation_id=conversation_id,
user_message=user_message,
@@ -276,6 +293,8 @@ class ChatOrchestrator:
background_voice=background_voice,
normalized_user_message=normalized_user_message,
occupation=occupation,
profile_birth_year=profile_birth_year,
profile_era_place=profile_era_place,
)
if agent_summary_enabled():
logger.info(
@@ -287,6 +306,12 @@ class ChatOrchestrator:
len(turn.messages),
turn.skip_tts,
)
if mem_trace is not None:
return AgentChatTurn(
messages=turn.messages,
skip_tts=turn.skip_tts,
memory_retrieval_trace=mem_trace,
)
return turn
async def extract_profile_from_message(
@@ -349,6 +374,8 @@ class ChatOrchestrator:
background_voice: str = "default",
normalized_user_message: str | None = None,
occupation: str = "",
profile_birth_year: int | None = None,
profile_era_place: str = "",
) -> AgentChatTurn:
"""委托 InterviewAgent 生成访谈回复(持久化由调用方负责)。"""
return await self.interview_agent.generate_response_with_state(
@@ -361,6 +388,8 @@ class ChatOrchestrator:
background_voice=background_voice,
normalized_user_message=normalized_user_message,
occupation=occupation,
profile_birth_year=profile_birth_year,
profile_era_place=profile_era_place,
)
def detect_user_stage(self, user_message: str) -> str:

View File

@@ -14,17 +14,17 @@ from app.agents.chat.prompts_profile import (
get_profile_followup_prompt,
get_profile_greeting_prompt,
)
from app.agents.chat.reply_limits import (
nonempty_segments_or_fallback,
segments_from_llm_response,
truncate_chat_segments,
)
from app.agents.chat.schemas import ProfileExtractionOutput
from app.core.agent_logging import agent_span, log_agent_payload, log_agent_summary
from app.core.config import settings
from app.core.dependencies import get_llm_provider
from app.core.llm_call import allm_json_call
from app.core.logging import get_logger
from app.agents.chat.reply_limits import (
nonempty_segments_or_fallback,
segments_from_llm_response,
truncate_chat_segments,
)
logger = get_logger(__name__)

View File

@@ -20,6 +20,8 @@ class ChatPromptContext:
memory_evidence_text: str = ""
background_voice: str = "default"
occupation: str = ""
profile_birth_year: int | None = None
profile_era_place: str = ""
def guided_system_prompt(self) -> str:
"""用户原话仅以对话历史 + HumanMessage 注入模型。"""
@@ -36,4 +38,6 @@ class ChatPromptContext:
memory_evidence_text=self.memory_evidence_text,
background_voice=self.background_voice,
occupation=self.occupation,
profile_birth_year=self.profile_birth_year,
profile_era_place=self.profile_era_place,
)

View File

@@ -4,6 +4,13 @@ Chat 模块提示词:用户资料收集 + 对话访谈
from app.agents.chat.output_rules import chat_output_rules
# Conversation prompts对话访谈
from app.agents.chat.prompts_conversation import (
SLOT_NAME_MAP,
get_guided_conversation_prompt,
get_opening_prompt,
)
# Profile prompts用户资料收集
from app.agents.chat.prompts_profile import (
PROFILE_FIELD_NAMES,
@@ -14,13 +21,6 @@ from app.agents.chat.prompts_profile import (
get_profile_greeting_prompt,
)
# Conversation prompts对话访谈
from app.agents.chat.prompts_conversation import (
SLOT_NAME_MAP,
get_guided_conversation_prompt,
get_opening_prompt,
)
__all__ = [
"chat_output_rules",
"PROFILE_FIELD_NAMES",

View File

@@ -9,11 +9,11 @@ from app.agents.chat.background_voice import (
normalize_background_voice,
)
from app.agents.chat.occupation_context import get_occupation_chat_hint
from app.agents.chat.output_rules import chat_output_rules
from app.agents.chat.personas import (
get_interview_persona_tone_hint,
normalize_interview_persona,
)
from app.agents.chat.output_rules import chat_output_rules
from app.agents.stage_constants import CHAT_STAGES, STAGE_DISPLAY_ZH, STAGE_ERA_HINTS
from app.core.config import settings
@@ -44,25 +44,18 @@ SLOT_NAME_MAP = {
}
def _compact_era_hint(current_stage: str, user_profile_context: str) -> str:
"""单行时代联想,可选附在进度后。"""
if not user_profile_context:
return ""
birth_year = None
birth_place = ""
for line in user_profile_context.split("\n"):
if "出生年份" in line:
try:
birth_year = int(line.split("")[1].strip().replace("", ""))
except (ValueError, IndexError):
pass
if "出生地" in line or "成长地" in line:
birth_place = line.split("")[1].strip() if "" in line else ""
def _compact_era_hint(
current_stage: str,
*,
birth_year: int | None = None,
era_place: str = "",
) -> str:
"""单行时代联想,可选附在进度后。出生年与地点由调用方从用户资料结构化传入。"""
if not birth_year:
return ""
birth_place = (era_place or "").strip()
age_range = STAGE_ERA_HINTS.get(current_stage, (0, 30))
era_start = birth_year + age_range[0]
era_end = birth_year + age_range[1]
@@ -230,6 +223,8 @@ def get_guided_conversation_prompt(
memory_evidence_text: str = "",
background_voice: str = "default",
occupation: str = "",
profile_birth_year: Optional[int] = None,
profile_era_place: str = "",
) -> str:
"""生成状态感知的对话提示词;用户原话仅以 HumanMessage 传入,不写入本 system 文本。"""
persona_key = normalize_interview_persona(persona)
@@ -285,7 +280,11 @@ def get_guided_conversation_prompt(
)
era_line = ""
if settings.chat_era_context_enabled:
era_line = _compact_era_hint(active_stage, user_profile_context)
era_line = _compact_era_hint(
active_stage,
birth_year=profile_birth_year,
era_place=profile_era_place,
)
if user_jumped:
topic_desc = (

View File

@@ -6,7 +6,6 @@ from typing import Dict, List, Optional
from app.agents.chat.output_rules import chat_output_rules
PROFILE_FIELD_NAMES = {
"birth_year": "出生年份",
"birth_place": "出生地",