api/app/features/memory/chat_memory_injection.py

"""
访谈聊天：先检索、后筛选、再限量注入。

检索结果不等于注入内容：主 prompt 只收极短「线索」，TurnPlan 挂钩优先用户原话；
完整 bundle 仅给 reply_planner 做焦点规划（可选）。
"""

from __future__ import annotations

from dataclasses import dataclass
from typing import Any

from app.features.conversation.constants import chat
from app.features.memory.constants import memory
from app.features.memory.evidence_format import (
    dedupe_evidence_chunk_rows,
    format_evidence_chunks_for_chat_prompt,
    format_evidence_chunks_for_prompt,
    format_user_memory_for_chat_display,
)


@dataclass(frozen=True)
class InterviewMemorySlices:
    """访谈轮次三类 memory 产物。"""

    # 进主 system Context 的极短片段（可能为空）
    prompt_excerpt: str
    # 供 TurnPlan / extract_anchor_snippet：仅一条短线索或空（不替代用户原话）
    anchor_source: str
    # 供 reply_planner JSON：可较长，仍受 max_chars 截断
    planner_preview: str
    # 检索是否非空（gating 前）
    had_retrieval: bool


_DISMISSIVE_MARKERS: tuple[str, ...] = (
    "哈哈",
    "呵呵",
    "笑死",
    "早就不会",
    "不会了",
    "别提",
    "不想说",
    "算了",
    "没感觉",
    "忘了",
    "不记得",
    "不用了",
    "别问了",
    "别说了",
    "你记错",
    "别扯",
    "打住",
    "翻篇",
    "没那回事",
    "不是那回事",
    "不是那个",
)
_REJECTION_MARKERS: tuple[str, ...] = (
    "不是",
    "没有",
    "不对",
    "错了",
    "辟谣",
    "过了",
)
_CONTINUATION_MARKERS: tuple[str, ...] = (
    "继续",
    "后来",
    "那次",
    "上次",
    "再说",
    "还有",
    "接着",
    "回到",
    "上回",
    "你刚",
    "刚才",
)


def _should_suppress_memory_injection(user_message: str) -> bool:
    """默认不注入：短句敷衍、否定翻篇、或本轮已足够长且未明显接续旧线。"""
    um = (user_message or "").strip()
    if not um:
        return True
    if any(m in um for m in _DISMISSIVE_MARKERS):
        return True
    if any(m in um for m in _REJECTION_MARKERS):
        # 短句里的否定更像翻篇
        if len(um) <= 24:
            return True
    if len(um) <= 18:
        return True
    if len(um) >= 72 and not any(m in um for m in _CONTINUATION_MARKERS):
        # 长段新叙事：优先当前话头，不塞旧记忆块
        return True
    return False


def _first_chunk_line_for_anchor(evidence: dict, *, max_chars: int = 120) -> str:
    chunks = evidence.get("relevant_chunks") or []
    chunks = dedupe_evidence_chunk_rows(chunks[:10])
    for c in chunks:
        content = (
            c.get("content", "") if isinstance(c, dict) else getattr(c, "content", "")
        )
        raw = (content or "").strip()
        if len(raw) < 8:
            continue
        line = raw.splitlines()[0].strip() if raw else ""
        if not line:
            continue
        safe_line = format_user_memory_for_chat_display(line, verbatim=True)
        if len(safe_line) > max_chars:
            return safe_line[: max_chars - 1].rstrip() + "…"
        return safe_line
    summaries = evidence.get("relevant_summaries") or []
    for s in summaries[:1]:
        if isinstance(s, dict):
            st = (s.get("content") or "").strip()
            if st:
                safe_st = format_user_memory_for_chat_display(st)
                return safe_st[:max_chars] if len(safe_st) > max_chars else safe_st
    return ""


def format_minimal_prompt_memory_hint(evidence: dict, *, max_chars: int = 100) -> str:
    """单条极短线索，供 Context；禁止长段复述。"""
    line = _first_chunk_line_for_anchor(evidence, max_chars=max_chars)
    if not line:
        return ""
    return (
        "## 记忆线索（仅用于追问角度，禁止复述成段正文）\n"
        "以下为检索到的**一条**用户过往口述/系统摘要，**不是**助手自己的经历，"
        "也**不是**用户本轮原话；优先用用户本轮原话承接与追问。\n"
        "若需要轻量勾连，只能用「你之前提过」「你说过」「你刚讲到」这类**归因式**半句，"
        "禁止整段展开，禁止写成助手亲历。\n"
        f"- {line}\n"
    )


def build_planner_preview(
    evidence: dict,
    *,
    use_safe_chat_format: bool,
) -> str:
    """reply_planner 专用：保留较完整上下文，仍截断。"""
    if use_safe_chat_format:
        text = format_evidence_chunks_for_chat_prompt(evidence)
    else:
        text = format_evidence_chunks_for_prompt(evidence)
    t = (text or "").strip()
    if not t:
        return ""
    max_c = min(int(chat.memory_evidence_max_chars), 2000)
    if len(t) > max_c:
        return t[: max_c - 3] + "..."
    return t


def slice_interview_memory(
    evidence: dict[str, Any] | None,
    user_message: str,
) -> InterviewMemorySlices:
    """
    检索 bundle → 三类切片。gating 关闭时仍可为 planner 提供 preview。
    """
    if not evidence:
        return InterviewMemorySlices(
            prompt_excerpt="",
            anchor_source="",
            planner_preview="",
            had_retrieval=False,
        )

    use_safe = chat.memory_safe_evidence_format_enabled
    planner_preview = build_planner_preview(evidence, use_safe_chat_format=use_safe)
    had = bool(planner_preview.strip())

    if _should_suppress_memory_injection(user_message):
        return InterviewMemorySlices(
            prompt_excerpt="",
            anchor_source="",
            planner_preview=planner_preview,
            had_retrieval=had,
        )

    if not had:
        return InterviewMemorySlices(
            prompt_excerpt="",
            anchor_source="",
            planner_preview="",
            had_retrieval=False,
        )

    prompt_excerpt = format_minimal_prompt_memory_hint(evidence)
    anchor = _first_chunk_line_for_anchor(evidence, max_chars=160)
    # anchor 给 TurnPlan：带一条短句即可，不拼整段 M 块
    return InterviewMemorySlices(
        prompt_excerpt=prompt_excerpt,
        anchor_source=anchor,
        planner_preview=planner_preview,
        had_retrieval=True,
    )


__all__ = [
    "InterviewMemorySlices",
    "slice_interview_memory",
    "format_minimal_prompt_memory_hint",
    "build_planner_preview",
]
-												WIP: memory system improvements (in progress)

Interview/chat prompt layers, reply planner, style profiles, memory
injection, interview meta store, and related tests. Work not finished.

Made-with: Cursor

											
										
										
											2026-04-22 16:56:28 +08:00
+								"""
 								访谈聊天：先检索、后筛选、再限量注入。
 								检索结果不等于注入内容：主 prompt 只收极短「线索」，TurnPlan 挂钩优先用户原话；
 								完整 bundle 仅给 reply_planner 做焦点规划（可选）。
 								"""
 								from __future__ import annotations
 								from dataclasses import dataclass
 								from typing import Any
-												refactor(api): TOML 配置 SSOT、统一错误契约、Auth/事务加固与可观测性 (#33)

配置 SSOT（TOML + .env）
统一错误契约
Auth 与事务边界
Redis / Celery 可靠性:业务 Redis（DB/0）与 Celery broker/backend（DB/1）显式拆分；连接池、sync client
可观测性（OpenTelemetry + LGTM）
											
										
										
											2026-05-22 13:44:50 +08:00
+								from app.features.conversation.constants import chat
 								from app.features.memory.constants import memory
-												WIP: memory system improvements (in progress)

Interview/chat prompt layers, reply planner, style profiles, memory
injection, interview meta store, and related tests. Work not finished.

Made-with: Cursor

											
										
										
											2026-04-22 16:56:28 +08:00
+								from app.features.memory.evidence_format import (
 								    dedupe_evidence_chunk_rows,
 								    format_evidence_chunks_for_chat_prompt,
 								    format_evidence_chunks_for_prompt,
-												feat(api)!: memory single chain — async MemoryService, strict eval closure

Route all memory ingest/retrieve/enrichment/compaction through async MemoryService.
Remove legacy sync memory implementations (ingest/retrieve/compaction); Celery and
memoir Phase2 call asyncio.run into MemoryService-backed helpers.

Memoir Phase1 batch ingest uses MemoryService.ingest_transcripts_batch; drop chapters.
evidence_bundle_json mirror (Alembic 0015). Evaluation uses snapshot/link-only bundles;
raise EvidenceClosureMissing instead of partial/fallback lineage tiers.

Split memoir state into NarrativeCoverageState and InterviewControlState; delete the
_interview_meta_store adapter layer. Remove rolling-query and recent-fact fallback
settings from config and evidence assembly.

Update judges, docs, tests, and PlaygroundPage alignment.

Made-with: Cursor

											
										
										
											2026-04-30 14:11:46 +08:00
+								    format_user_memory_for_chat_display,
-												WIP: memory system improvements (in progress)

Interview/chat prompt layers, reply planner, style profiles, memory
injection, interview meta store, and related tests. Work not finished.

Made-with: Cursor

											
										
										
											2026-04-22 16:56:28 +08:00
+								)
 								@dataclass(frozen=True)
 								class InterviewMemorySlices:
 								    """访谈轮次三类 memory 产物。"""
 								    # 进主 system Context 的极短片段（可能为空）
 								    prompt_excerpt: str
 								    # 供 TurnPlan / extract_anchor_snippet：仅一条短线索或空（不替代用户原话）
 								    anchor_source: str
 								    # 供 reply_planner JSON：可较长，仍受 max_chars 截断
 								    planner_preview: str
 								    # 检索是否非空（gating 前）
 								    had_retrieval: bool
 								_DISMISSIVE_MARKERS: tuple[str, ...] = (
 								    "哈哈",
 								    "呵呵",
 								    "笑死",
 								    "早就不会",
 								    "不会了",
 								    "别提",
 								    "不想说",
 								    "算了",
 								    "没感觉",
 								    "忘了",
 								    "不记得",
 								    "不用了",
 								    "别问了",
 								    "别说了",
 								    "你记错",
 								    "别扯",
 								    "打住",
 								    "翻篇",
 								    "没那回事",
 								    "不是那回事",
 								    "不是那个",
 								)
 								_REJECTION_MARKERS: tuple[str, ...] = (
 								    "不是",
 								    "没有",
 								    "不对",
 								    "错了",
 								    "辟谣",
 								    "过了",
 								)
 								_CONTINUATION_MARKERS: tuple[str, ...] = (
 								    "继续",
 								    "后来",
 								    "那次",
 								    "上次",
 								    "再说",
 								    "还有",
 								    "接着",
 								    "回到",
 								    "上回",
 								    "你刚",
 								    "刚才",
 								)
 								def _should_suppress_memory_injection(user_message: str) -> bool:
 								    """默认不注入：短句敷衍、否定翻篇、或本轮已足够长且未明显接续旧线。"""
 								    um = (user_message or "").strip()
 								    if not um:
 								        return True
 								    if any(m in um for m in _DISMISSIVE_MARKERS):
 								        return True
 								    if any(m in um for m in _REJECTION_MARKERS):
 								        # 短句里的否定更像翻篇
 								        if len(um) <= 24:
 								            return True
 								    if len(um) <= 18:
 								        return True
 								    if len(um) >= 72 and not any(m in um for m in _CONTINUATION_MARKERS):
 								        # 长段新叙事：优先当前话头，不塞旧记忆块
 								        return True
 								    return False
 								def _first_chunk_line_for_anchor(evidence: dict, *, max_chars: int = 120) -> str:
 								    chunks = evidence.get("relevant_chunks") or []
 								    chunks = dedupe_evidence_chunk_rows(chunks[:10])
 								    for c in chunks:
 								        content = (
 								            c.get("content", "") if isinstance(c, dict) else getattr(c, "content", "")
 								        )
 								        raw = (content or "").strip()
 								        if len(raw) < 8:
 								            continue
 								        line = raw.splitlines()[0].strip() if raw else ""
 								        if not line:
 								            continue
 								        safe_line = format_user_memory_for_chat_display(line, verbatim=True)
 								        if len(safe_line) > max_chars:
 								            return safe_line[: max_chars - 1].rstrip() + "…"
 								        return safe_line
 								    summaries = evidence.get("relevant_summaries") or []
 								    for s in summaries[:1]:
 								        if isinstance(s, dict):
 								            st = (s.get("content") or "").strip()
 								            if st:
 								                safe_st = format_user_memory_for_chat_display(st)
 								                return safe_st[:max_chars] if len(safe_st) > max_chars else safe_st
 								    return ""
 								def format_minimal_prompt_memory_hint(evidence: dict, *, max_chars: int = 100) -> str:
 								    """单条极短线索，供 Context；禁止长段复述。"""
 								    line = _first_chunk_line_for_anchor(evidence, max_chars=max_chars)
 								    if not line:
 								        return ""
 								    return (
 								        "## 记忆线索（仅用于追问角度，禁止复述成段正文）\n"
 								        "以下为检索到的**一条**用户过往口述/系统摘要，**不是**助手自己的经历，"
 								        "也**不是**用户本轮原话；优先用用户本轮原话承接与追问。\n"
 								        "若需要轻量勾连，只能用「你之前提过」「你说过」「你刚讲到」这类**归因式**半句，"
 								        "禁止整段展开，禁止写成助手亲历。\n"
 								        f"- {line}\n"
 								    )
 								def build_planner_preview(
 								    evidence: dict,
 								    *,
 								    use_safe_chat_format: bool,
 								) -> str:
 								    """reply_planner 专用：保留较完整上下文，仍截断。"""
 								    if use_safe_chat_format:
 								        text = format_evidence_chunks_for_chat_prompt(evidence)
 								    else:
 								        text = format_evidence_chunks_for_prompt(evidence)
 								    t = (text or "").strip()
 								    if not t:
 								        return ""
-												refactor(api): TOML 配置 SSOT、统一错误契约、Auth/事务加固与可观测性 (#33)

配置 SSOT（TOML + .env）
统一错误契约
Auth 与事务边界
Redis / Celery 可靠性:业务 Redis（DB/0）与 Celery broker/backend（DB/1）显式拆分；连接池、sync client
可观测性（OpenTelemetry + LGTM）
											
										
										
											2026-05-22 13:44:50 +08:00
+								    max_c = min(int(chat.memory_evidence_max_chars), 2000)
-												WIP: memory system improvements (in progress)

Interview/chat prompt layers, reply planner, style profiles, memory
injection, interview meta store, and related tests. Work not finished.

Made-with: Cursor

											
										
										
											2026-04-22 16:56:28 +08:00
+								    if len(t) > max_c:
 								        return t[: max_c - 3] + "..."
 								    return t
 								def slice_interview_memory(
 								    evidence: dict[str, Any] | None,
 								    user_message: str,
 								) -> InterviewMemorySlices:
 								    """
 								    检索 bundle → 三类切片。gating 关闭时仍可为 planner 提供 preview。
 								    """
 								    if not evidence:
 								        return InterviewMemorySlices(
 								            prompt_excerpt="",
 								            anchor_source="",
 								            planner_preview="",
 								            had_retrieval=False,
 								        )
-												refactor(api): TOML 配置 SSOT、统一错误契约、Auth/事务加固与可观测性 (#33)

配置 SSOT（TOML + .env）
统一错误契约
Auth 与事务边界
Redis / Celery 可靠性:业务 Redis（DB/0）与 Celery broker/backend（DB/1）显式拆分；连接池、sync client
可观测性（OpenTelemetry + LGTM）
											
										
										
											2026-05-22 13:44:50 +08:00
+								    use_safe = chat.memory_safe_evidence_format_enabled
-												WIP: memory system improvements (in progress)

Interview/chat prompt layers, reply planner, style profiles, memory
injection, interview meta store, and related tests. Work not finished.

Made-with: Cursor

											
										
										
											2026-04-22 16:56:28 +08:00
+								    planner_preview = build_planner_preview(evidence, use_safe_chat_format=use_safe)
 								    had = bool(planner_preview.strip())
 								    if _should_suppress_memory_injection(user_message):
 								        return InterviewMemorySlices(
 								            prompt_excerpt="",
 								            anchor_source="",
 								            planner_preview=planner_preview,
 								            had_retrieval=had,
 								        )
 								    if not had:
 								        return InterviewMemorySlices(
 								            prompt_excerpt="",
 								            anchor_source="",
 								            planner_preview="",
 								            had_retrieval=False,
 								        )
 								    prompt_excerpt = format_minimal_prompt_memory_hint(evidence)
 								    anchor = _first_chunk_line_for_anchor(evidence, max_chars=160)
 								    # anchor 给 TurnPlan：带一条短句即可，不拼整段 M 块
 								    return InterviewMemorySlices(
 								        prompt_excerpt=prompt_excerpt,
 								        anchor_source=anchor,
 								        planner_preview=planner_preview,
 								        had_retrieval=True,
 								    )
 								__all__ = [
 								    "InterviewMemorySlices",
 								    "slice_interview_memory",
 								    "format_minimal_prompt_memory_hint",
 								    "build_planner_preview",
 								]