life-echo/api/app/features/memory/chat_memory_injection.py

"""
访谈聊天：先检索、后筛选、再限量注入。

检索结果不等于注入内容：主 prompt 只收极短「线索」，TurnPlan 挂钩优先用户原话；
完整 bundle 仅给 reply_planner 做焦点规划（可选）。
"""

from __future__ import annotations

from dataclasses import dataclass
from typing import Any

from app.features.conversation.constants import chat
from app.features.memory.constants import memory
from app.features.memory.evidence_format import (
    dedupe_evidence_chunk_rows,
    format_evidence_chunks_for_chat_prompt,
    format_evidence_chunks_for_prompt,
    format_user_memory_for_chat_display,
)


@dataclass(frozen=True)
class InterviewMemorySlices:
    """访谈轮次三类 memory 产物。"""

    # 进主 system Context 的极短片段（可能为空）
    prompt_excerpt: str
    # 供 TurnPlan / extract_anchor_snippet：仅一条短线索或空（不替代用户原话）
    anchor_source: str
    # 供 reply_planner JSON：可较长，仍受 max_chars 截断
    planner_preview: str
    # 检索是否非空（gating 前）
    had_retrieval: bool


_DISMISSIVE_MARKERS: tuple[str, ...] = (
    "哈哈",
    "呵呵",
    "笑死",
    "早就不会",
    "不会了",
    "别提",
    "不想说",
    "算了",
    "没感觉",
    "忘了",
    "不记得",
    "不用了",
    "别问了",
    "别说了",
    "你记错",
    "别扯",
    "打住",
    "翻篇",
    "没那回事",
    "不是那回事",
    "不是那个",
)
_REJECTION_MARKERS: tuple[str, ...] = (
    "不是",
    "没有",
    "不对",
    "错了",
    "辟谣",
    "过了",
)
_CONTINUATION_MARKERS: tuple[str, ...] = (
    "继续",
    "后来",
    "那次",
    "上次",
    "再说",
    "还有",
    "接着",
    "回到",
    "上回",
    "你刚",
    "刚才",
)


def _should_suppress_memory_injection(user_message: str) -> bool:
    """默认不注入：短句敷衍、否定翻篇、或本轮已足够长且未明显接续旧线。"""
    um = (user_message or "").strip()
    if not um:
        return True
    if any(m in um for m in _DISMISSIVE_MARKERS):
        return True
    if any(m in um for m in _REJECTION_MARKERS):
        # 短句里的否定更像翻篇
        if len(um) <= 24:
            return True
    if len(um) <= 18:
        return True
    if len(um) >= 72 and not any(m in um for m in _CONTINUATION_MARKERS):
        # 长段新叙事：优先当前话头，不塞旧记忆块
        return True
    return False


def _first_chunk_line_for_anchor(evidence: dict, *, max_chars: int = 120) -> str:
    chunks = evidence.get("relevant_chunks") or []
    chunks = dedupe_evidence_chunk_rows(chunks[:10])
    for c in chunks:
        content = (
            c.get("content", "") if isinstance(c, dict) else getattr(c, "content", "")
        )
        raw = (content or "").strip()
        if len(raw) < 8:
            continue
        line = raw.splitlines()[0].strip() if raw else ""
        if not line:
            continue
        safe_line = format_user_memory_for_chat_display(line, verbatim=True)
        if len(safe_line) > max_chars:
            return safe_line[: max_chars - 1].rstrip() + "…"
        return safe_line
    summaries = evidence.get("relevant_summaries") or []
    for s in summaries[:1]:
        if isinstance(s, dict):
            st = (s.get("content") or "").strip()
            if st:
                safe_st = format_user_memory_for_chat_display(st)
                return safe_st[:max_chars] if len(safe_st) > max_chars else safe_st
    return ""


def format_minimal_prompt_memory_hint(evidence: dict, *, max_chars: int = 100) -> str:
    """单条极短线索，供 Context；禁止长段复述。"""
    line = _first_chunk_line_for_anchor(evidence, max_chars=max_chars)
    if not line:
        return ""
    return (
        "## 记忆线索（仅用于追问角度，禁止复述成段正文）\n"
        "以下为检索到的**一条**用户过往口述/系统摘要，**不是**助手自己的经历，"
        "也**不是**用户本轮原话；优先用用户本轮原话承接与追问。\n"
        "若需要轻量勾连，只能用「你之前提过」「你说过」「你刚讲到」这类**归因式**半句，"
        "禁止整段展开，禁止写成助手亲历。\n"
        f"- {line}\n"
    )


def build_planner_preview(
    evidence: dict,
    *,
    use_safe_chat_format: bool,
) -> str:
    """reply_planner 专用：保留较完整上下文，仍截断。"""
    if use_safe_chat_format:
        text = format_evidence_chunks_for_chat_prompt(evidence)
    else:
        text = format_evidence_chunks_for_prompt(evidence)
    t = (text or "").strip()
    if not t:
        return ""
    max_c = min(int(chat.memory_evidence_max_chars), 2000)
    if len(t) > max_c:
        return t[: max_c - 3] + "..."
    return t


def slice_interview_memory(
    evidence: dict[str, Any] | None,
    user_message: str,
) -> InterviewMemorySlices:
    """
    检索 bundle → 三类切片。gating 关闭时仍可为 planner 提供 preview。
    """
    if not evidence:
        return InterviewMemorySlices(
            prompt_excerpt="",
            anchor_source="",
            planner_preview="",
            had_retrieval=False,
        )

    use_safe = chat.memory_safe_evidence_format_enabled
    planner_preview = build_planner_preview(evidence, use_safe_chat_format=use_safe)
    had = bool(planner_preview.strip())

    if _should_suppress_memory_injection(user_message):
        return InterviewMemorySlices(
            prompt_excerpt="",
            anchor_source="",
            planner_preview=planner_preview,
            had_retrieval=had,
        )

    if not had:
        return InterviewMemorySlices(
            prompt_excerpt="",
            anchor_source="",
            planner_preview="",
            had_retrieval=False,
        )

    prompt_excerpt = format_minimal_prompt_memory_hint(evidence)
    anchor = _first_chunk_line_for_anchor(evidence, max_chars=160)
    # anchor 给 TurnPlan：带一条短句即可，不拼整段 M 块
    return InterviewMemorySlices(
        prompt_excerpt=prompt_excerpt,
        anchor_source=anchor,
        planner_preview=planner_preview,
        had_retrieval=True,
    )


__all__ = [
    "InterviewMemorySlices",
    "slice_interview_memory",
    "format_minimal_prompt_memory_hint",
    "build_planner_preview",
]