Files
life-echo/api/app/agents/chat/reply_planner.py
Kevin 3121d1384d WIP: memory system improvements (in progress)
Interview/chat prompt layers, reply planner, style profiles, memory
injection, interview meta store, and related tests. Work not finished.

Made-with: Cursor
2026-04-22 16:56:28 +08:00

221 lines
8.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""访谈 focus planner规则 TurnPlan 之后的可选 LLM 细化JSON判断本轮承接重点并微调记忆引用与回复形状。"""
from __future__ import annotations
import json
from dataclasses import replace
from typing import Any
from app.agents.chat.interview_turn_plan import (
InterviewTurnPlan,
apply_safe_mode_override,
)
from app.core.langchain_llm import ainvoke_json_object
from app.core.logging import get_logger
logger = get_logger(__name__)
_VALID_FOCUS_PRIMARIES: frozenset[str] = frozenset(
{"emotion", "relationship", "identity", "scene", "memoir_gap", "follow_user"}
)
def merge_reply_planner_json_into_turn_plan(
plan: InterviewTurnPlan,
raw_json: str,
) -> InterviewTurnPlan:
"""将 planner 返回的 JSON 合并进 TurnPlan非法字段忽略且不得突破安全边界。"""
if not (raw_json or "").strip():
return plan
try:
data = json.loads(raw_json)
except json.JSONDecodeError:
logger.warning("reply_planner json decode failed")
return plan
if not isinstance(data, dict):
return plan
kw: dict[str, Any] = {}
touched_focus = False
mu = data.get("memory_usage")
if mu in ("none", "allowed_with_attribution"):
if plan.memory_usage == "none" and mu == "allowed_with_attribution":
pass
else:
kw["memory_usage"] = mu
rs = data.get("reply_shape")
if rs in ("flexible", "ack_only", "ack_then_question"):
kw["reply_shape"] = rs
mrs = data.get("memory_reference_style")
if isinstance(mrs, str) and 2 <= len(mrs.strip()) <= 24:
kw["memory_reference_style"] = mrs.strip()
# forbid_first_person_experience仅允许 true模型若建议 false 一律忽略
if data.get("forbid_first_person_experience") is False:
logger.debug("reply_planner ignored forbid_first_person_experience=false")
if "primary_focus" in data:
pf = data.get("primary_focus")
if isinstance(pf, str) and pf in _VALID_FOCUS_PRIMARIES:
kw["primary_focus"] = pf # type: ignore[assignment]
touched_focus = True
if "secondary_focus" in data:
sf = data.get("secondary_focus")
if sf is None or (isinstance(sf, str) and not str(sf).strip()):
kw["secondary_focus"] = None
touched_focus = True
elif isinstance(sf, str) and sf in _VALID_FOCUS_PRIMARIES:
kw["secondary_focus"] = sf # type: ignore[assignment]
touched_focus = True
fsum = data.get("focus_summary")
if isinstance(fsum, str) and fsum.strip():
s = fsum.strip()
if len(s) > 200:
s = s[:199].rstrip() + ""
kw["focus_summary"] = s
touched_focus = True
mo = data.get("mode_override")
if mo is not None and mo != "":
merged_mode = apply_safe_mode_override(
plan.mode,
str(mo) if not isinstance(mo, str) else mo,
primary_focus=str(kw.get("primary_focus", plan.primary_focus)),
)
if merged_mode is not None and merged_mode != plan.mode:
kw["mode"] = merged_mode
touched_focus = True
if touched_focus:
kw["focus_source"] = "llm"
if not kw:
return plan
return replace(plan, **kw)
def _build_reply_planner_prompt(
*,
plan: InterviewTurnPlan,
user_message_preview: str,
memory_evidence_preview: str,
scene_cues_preview: str,
recent_questions_preview: str,
) -> str:
mem_note = (
(memory_evidence_preview or "").strip()[:1200]
if (memory_evidence_preview or "").strip()
else "(本轮无检索记忆预览)"
)
um = (user_message_preview or "").strip()[:800]
scene_block = (
(scene_cues_preview or "").strip()[:600]
if (scene_cues_preview or "").strip()
else "(本轮无场景关键词触发的氛围线索)"
)
rq_block = (
(recent_questions_preview or "").strip()[:400]
if (recent_questions_preview or "").strip()
else "(无)"
)
focus_hint = f"{plan.primary_focus}"
if plan.secondary_focus:
focus_hint += f" / 次:{plan.secondary_focus}"
return f"""你是回忆录访谈的「本轮重点计划器」。只输出**一个 JSON 对象**,不要 markdown不要解释。
## 任务
先判断:用户本轮**最该被接住、最不该被忽略**的是什么(情绪、关系与他人、身份与面子、现场感官、或叙述槽缺口)。再决定如何微调基线。
## 当前规则基线(服务端已算好,须尊重安全边界)
- mode: {plan.mode}
- primary_focus规则先验: {focus_hint}
- memory_usage: {plan.memory_usage}
- reply_shape: {plan.reply_shape}
- memory_reference_style: {plan.memory_reference_style}
- forbid_first_person_experience: {plan.forbid_first_person_experience}
## 用户本轮话(截断)
{um}
## 近期你已问过的问题(截断;避免重复角度)
{rq_block}
## 检索记忆预览(供规划追问角度;**非**正文提纲,勿复述成长摘要)
{mem_note}
## 场景氛围线索(仅关键词映射,**不是用户原话**;可作辅助意象,不得压过用户明确提到的人名、关系与面子)
{scene_block}
## 输出 JSON 字段(仅限下列键;未提及的键不要输出)
- primary_focus: \"emotion\" | \"relationship\" | \"identity\" | \"scene\" | \"memoir_gap\" | \"follow_user\"
- secondary_focus: 同上或 null
- focus_summary: 字符串≤80 字,用**中文**写清**追问角度 / 承接方向**(问什么、先接住哪条张力),**不要**写成回复正文提纲或旧记忆复述
- mode_override: \"emotion_first\" | \"clarify_first\" | \"memoir_push\" | \"follow_user_only\" | null
- memory_usage: \"none\" | \"allowed_with_attribution\"
- reply_shape: \"flexible\" | \"ack_only\" | \"ack_then_question\"
- memory_reference_style: 224 字,用于「你之前提过…」类归因起句
- forbid_first_person_experience: 必须为 true
## 约束
1. **不要编造**用户未说的人、事、时地。
2. 若基线 memory_usage 为 none则输出 memory_usage 必须为 none。
3. 若用户话里同时有**明确他人/称谓/观众/面子/身份自称**与**身体感受或环境**,通常应把 primary_focus 设为 relationship 或 identity而不是 scene。
4. mode_override 仅在确实需要时给出;与基线相同时填 null。不要为了改而改。
5. 若用户在追问「讲讲她的故事/说说他」等,倾向 reply_shape=\"ack_then_question\"(仍最多一个问句)。
6. focus_summary **不得**支配主回复措辞或诱导复述检索细节;若基线 memory_usage 为 none**不得**输出 allowed_with_attribution。
7. focus_summary 用于:**先接住本轮核心张力**、再决定追问槽位;若用户话里含说不清/不确定/暧昧羞涩,倾向 mode_override=\"clarify_first\"(勿强推问卷)。"""
async def maybe_refine_turn_plan_with_llm(
llm: Any,
*,
plan: InterviewTurnPlan,
text_for_model: str,
memory_evidence_text: str,
max_tokens: int,
temperature: float,
scene_cues_for_planner: list[str] | None = None,
recent_questions_preview: str = "",
) -> tuple[InterviewTurnPlan, str]:
"""可选:调用轻量 JSON focus planner失败返回原 plan 与空 raw。"""
if llm is None:
return plan, ""
scene_cues_preview = ""
if scene_cues_for_planner:
scene_cues_preview = "\n".join(
f"- {c}" for c in scene_cues_for_planner[:8]
)
prompt = _build_reply_planner_prompt(
plan=plan,
user_message_preview=text_for_model,
memory_evidence_preview=memory_evidence_text,
scene_cues_preview=scene_cues_preview,
recent_questions_preview=recent_questions_preview,
)
try:
pl_llm = llm.bind(temperature=float(temperature))
raw = await ainvoke_json_object(
pl_llm,
prompt,
max_tokens=max_tokens,
agent="ReplyPlanner.interview",
)
if not raw:
return plan, ""
merged = merge_reply_planner_json_into_turn_plan(plan, raw)
return merged, raw
except Exception as e:
logger.warning("reply_planner llm failed: {}", e)
return plan, ""
__all__ = [
"maybe_refine_turn_plan_with_llm",
"merge_reply_planner_json_into_turn_plan",
]