WIP: memory system improvements (in progress)

Interview/chat prompt layers, reply planner, style profiles, memory
injection, interview meta store, and related tests. Work not finished.

Made-with: Cursor
This commit is contained in:
Kevin
2026-04-22 16:56:28 +08:00
parent e848f26354
commit 3121d1384d
28 changed files with 2790 additions and 452 deletions

View File

@@ -11,12 +11,14 @@ from langchain_core.messages import HumanMessage, SystemMessage
from app.agents.chat.agent_turn import AgentChatTurn
from app.agents.chat.helpers import format_history_string, get_history_with_window
from app.agents.chat.interview_state_hints import (
apply_autobiographical_boundary_guard,
apply_duplicate_question_guard,
extract_recent_questions,
segments_are_only_duplicate_guard_fallback,
update_recent_questions,
)
from app.agents.chat.interview_turn_plan import plan_interview_turn
from app.agents.chat.reply_planner import maybe_refine_turn_plan_with_llm
from app.agents.chat.personas import normalize_interview_persona
from app.agents.chat.prompt_context import ChatPromptContext
from app.agents.chat.prompts_conversation import (
@@ -135,12 +137,15 @@ class InterviewAgent:
user_profile_context: str = "",
detected_user_stage: Optional[str] = None,
memory_evidence_text: str = "",
memory_anchor_source: str = "",
memory_planner_text: str = "",
background_voice: str = "default",
normalized_user_message: Optional[str] = None,
occupation: str = "",
profile_birth_year: int | None = None,
profile_era_place: str = "",
stage_switched_this_turn: bool = False,
scene_cues_for_planner: Optional[list[str]] = None,
) -> AgentChatTurn:
"""生成状态感知的访谈回复,不持久化(由 Orchestrator 负责)"""
if not self.llm:
@@ -179,7 +184,7 @@ class InterviewAgent:
current_stage=memoir_state.current_stage,
empty_slots=empty_slots,
normalized_user_message=text_for_model,
memory_evidence_text=memory_evidence_text,
memory_evidence_text=(memory_anchor_source or "").strip(),
stage_switched_this_turn=stage_switched_this_turn,
)
logger.info(
@@ -189,6 +194,37 @@ class InterviewAgent:
len(turn_plan.anchor_snippet or ""),
)
reply_planner_raw = ""
baseline_mode = turn_plan.mode
baseline_primary_focus = turn_plan.primary_focus
if settings.chat_reply_planner_llm_enabled:
rq_preview = (
"\n".join(recent_questions[-4:])
if recent_questions
else ""
)
turn_plan, reply_planner_raw = await maybe_refine_turn_plan_with_llm(
self.llm,
plan=turn_plan,
text_for_model=text_for_model,
memory_evidence_text=(memory_planner_text or memory_evidence_text)
or "",
max_tokens=int(settings.chat_reply_planner_max_tokens),
temperature=float(settings.chat_reply_planner_temperature),
scene_cues_for_planner=scene_cues_for_planner or [],
recent_questions_preview=rq_preview,
)
if reply_planner_raw:
logger.info(
"event=reply_planner_applied memory_usage={} reply_shape={} "
"mode={} primary_focus={} focus_source={}",
turn_plan.memory_usage,
turn_plan.reply_shape,
turn_plan.mode,
turn_plan.primary_focus,
turn_plan.focus_source,
)
ctx = ChatPromptContext(
current_stage=memoir_state.current_stage,
empty_slots=empty_slots,
@@ -322,6 +358,7 @@ class InterviewAgent:
recent_questions=rq_base,
)
retry_used = True
out, auto_bio = apply_autobiographical_boundary_guard(out)
updated_recent_questions = update_recent_questions(rq_base, out)
log_agent_summary(
logger,
@@ -338,6 +375,18 @@ class InterviewAgent:
"recent_questions": updated_recent_questions,
"duplicate_question_guard_triggered": deduped,
"duplicate_question_guard_llm_retry": retry_used,
"autobiographical_boundary_guard_triggered": auto_bio,
"reply_planner_llm_used": bool(
settings.chat_reply_planner_llm_enabled
and (reply_planner_raw or "").strip()
),
"reply_planner_raw_preview": (reply_planner_raw or "")[:800],
"focus_planner_baseline_mode": baseline_mode,
"focus_planner_baseline_primary_focus": baseline_primary_focus,
"focus_planner_mode": turn_plan.mode,
"focus_planner_primary_focus": turn_plan.primary_focus,
"focus_planner_focus_source": turn_plan.focus_source,
"focus_planner_focus_summary": (turn_plan.focus_summary or "")[:200],
},
)
except Exception as e: