api/app/core/langchain_llm.py

"""
与 `get_llm_provider().langchain_llm` 配合使用的 LangChain Runnable 约定。

langchain-openai 要求用顶层 `response_format` 绑定 JSON 模式，禁止对 `.bind()` 传入
`model_kwargs={"response_format": ...}`（会错误传入底层 `completions.create`）。
"""

from __future__ import annotations

import hashlib
import time
from typing import Any

from app.core.agent_logging import (
    agent_summary_enabled,
    agent_verbose_enabled,
    log_agent_payload,
)
from app.core.logging import get_logger

logger = get_logger(__name__)


def bind_json_object_mode(llm: Any, *, max_tokens: int) -> Any:
    """返回绑定 `response_format=json_object` 与 `max_tokens` 的 Runnable（通常为 ChatOpenAI）。"""
    return llm.bind(
        response_format={"type": "json_object"},
        max_tokens=max_tokens,
    )


def _prompt_sha12(prompt: str) -> str:
    return hashlib.sha256((prompt or "").encode("utf-8")).hexdigest()[:12]


def invoke_json_object(
    llm: Any,
    prompt: str,
    *,
    max_tokens: int,
    agent: str | None = None,
    retry_empty: bool = True,
) -> str:
    """
    同步调用 JSON object 模式；空 content 时可选重试一次（缓解 DeepSeek 偶发空输出）。
    仅依赖 bind_json_object_mode，不引用 features。
    """
    bound = bind_json_object_mode(llm, max_tokens=max_tokens)
    tag = agent or "json_object"
    sha = _prompt_sha12(prompt)
    attempts = 2 if retry_empty else 1
    t0 = time.perf_counter()
    last_content = ""
    for attempt in range(attempts):
        response = bound.invoke(prompt)
        content = (getattr(response, "content", None) or "").strip()
        last_content = content
        if content:
            if attempt > 0:
                logger.info(
                    "json_object 空内容重试成功 agent={} prompt_sha12={}",
                    tag,
                    sha,
                )
            _log_json_object_done(
                tag, sha, prompt, content, attempt + 1, t0, success=True
            )
            return content
        if attempt == 0 and retry_empty:
            logger.warning(
                "json_object 返回空 content，将重试 agent={} attempt={} prompt_sha12={}",
                tag,
                attempt,
                sha,
            )
    logger.warning("json_object 仍为空 agent={} prompt_sha12={}", tag, sha)
    _log_json_object_done(tag, sha, prompt, last_content, attempts, t0, success=False)
    return ""


async def ainvoke_json_object(
    llm: Any,
    prompt: str,
    *,
    max_tokens: int,
    agent: str | None = None,
    retry_empty: bool = True,
) -> str:
    """异步版 `invoke_json_object`。"""
    bound = bind_json_object_mode(llm, max_tokens=max_tokens)
    tag = agent or "json_object"
    sha = _prompt_sha12(prompt)
    attempts = 2 if retry_empty else 1
    t0 = time.perf_counter()
    last_content = ""
    for attempt in range(attempts):
        response = await bound.ainvoke(prompt)
        content = (getattr(response, "content", None) or "").strip()
        last_content = content
        if content:
            if attempt > 0:
                logger.info(
                    "json_object 空内容重试成功 agent={} prompt_sha12={}",
                    tag,
                    sha,
                )
            _log_json_object_done(
                tag, sha, prompt, content, attempt + 1, t0, success=True
            )
            return content
        if attempt == 0 and retry_empty:
            logger.warning(
                "json_object 返回空 content，将重试 agent={} attempt={} prompt_sha12={}",
                tag,
                attempt,
                sha,
            )
    logger.warning("json_object 仍为空 agent={} prompt_sha12={}", tag, sha)
    _log_json_object_done(tag, sha, prompt, last_content, attempts, t0, success=False)
    return ""


def _log_json_object_done(
    tag: str,
    sha: str,
    prompt: str,
    content: str,
    attempts_used: int,
    t0: float,
    *,
    success: bool,
) -> None:
    ms = (time.perf_counter() - t0) * 1000
    if agent_summary_enabled():
        prompt_chars = len(prompt or "")
        logger.info(
            "llm_json_object agent={} prompt_sha12={} duration_ms={:.2f} "
            "prompt_char_count={} response_len={} attempts={} success={}",
            tag,
            sha,
            ms,
            prompt_chars,
            len(content or ""),
            attempts_used,
            success,
        )
    if agent_verbose_enabled():
        log_agent_payload(logger, f"{tag}.prompt", prompt)
        log_agent_payload(logger, f"{tag}.response", content)
-												fix/various fixes

											
										
										
											2026-03-20 15:15:35 +08:00
+								"""
 								与 `get_llm_provider().langchain_llm` 配合使用的 LangChain Runnable 约定。
 								langchain-openai 要求用顶层 `response_format` 绑定 JSON 模式，禁止对 `.bind()` 传入
 								`model_kwargs={"response_format": ...}`（会错误传入底层 `completions.create`）。
 								"""
 								from __future__ import annotations
-												feat(api+app): 对话阶段化、回忆录流水线与客户端会话体验
- DB: segments 用户输入文本（Alembic 0002）
- Chat: 阶段检测/阶段提示/回复限制，编排与访谈/画像 prompts 调整
- Memoir: 忠实度检查 agent，叙事与分类等链路更新
- Core: agent 日志、Alembic 启动、LangChain/日志/配置等
- Story: time_hints；Memory 检索与相关测试
- Expo: 助手头像、会话页与消息拆分、实时会话与文案/i18n
- Docs/scripts/tests: 迁移脚本、LLM JSON/记忆检索文档、新增单测

											
										
										
											2026-03-26 12:13:36 +08:00
+								import hashlib
 								import time
-												fix/various fixes

											
										
										
											2026-03-20 15:15:35 +08:00
+								from typing import Any
-												feat(api+app): 对话阶段化、回忆录流水线与客户端会话体验
- DB: segments 用户输入文本（Alembic 0002）
- Chat: 阶段检测/阶段提示/回复限制，编排与访谈/画像 prompts 调整
- Memoir: 忠实度检查 agent，叙事与分类等链路更新
- Core: agent 日志、Alembic 启动、LangChain/日志/配置等
- Story: time_hints；Memory 检索与相关测试
- Expo: 助手头像、会话页与消息拆分、实时会话与文案/i18n
- Docs/scripts/tests: 迁移脚本、LLM JSON/记忆检索文档、新增单测

											
										
										
											2026-03-26 12:13:36 +08:00
+								from app.core.agent_logging import (
 								    agent_summary_enabled,
 								    agent_verbose_enabled,
 								    log_agent_payload,
 								)
 								from app.core.logging import get_logger
 								logger = get_logger(__name__)
-												fix/various fixes

											
										
										
											2026-03-20 15:15:35 +08:00
 								def bind_json_object_mode(llm: Any, *, max_tokens: int) -> Any:
 								    """返回绑定 `response_format=json_object` 与 `max_tokens` 的 Runnable（通常为 ChatOpenAI）。"""
 								    return llm.bind(
 								        response_format={"type": "json_object"},
 								        max_tokens=max_tokens,
 								    )
-												feat(api+app): 对话阶段化、回忆录流水线与客户端会话体验
- DB: segments 用户输入文本（Alembic 0002）
- Chat: 阶段检测/阶段提示/回复限制，编排与访谈/画像 prompts 调整
- Memoir: 忠实度检查 agent，叙事与分类等链路更新
- Core: agent 日志、Alembic 启动、LangChain/日志/配置等
- Story: time_hints；Memory 检索与相关测试
- Expo: 助手头像、会话页与消息拆分、实时会话与文案/i18n
- Docs/scripts/tests: 迁移脚本、LLM JSON/记忆检索文档、新增单测

											
										
										
											2026-03-26 12:13:36 +08:00
 								def _prompt_sha12(prompt: str) -> str:
 								    return hashlib.sha256((prompt or "").encode("utf-8")).hexdigest()[:12]
 								def invoke_json_object(
 								    llm: Any,
 								    prompt: str,
 								    *,
 								    max_tokens: int,
 								    agent: str | None = None,
 								    retry_empty: bool = True,
 								) -> str:
 								    """
 								    同步调用 JSON object 模式；空 content 时可选重试一次（缓解 DeepSeek 偶发空输出）。
 								    仅依赖 bind_json_object_mode，不引用 features。
 								    """
 								    bound = bind_json_object_mode(llm, max_tokens=max_tokens)
 								    tag = agent or "json_object"
 								    sha = _prompt_sha12(prompt)
 								    attempts = 2 if retry_empty else 1
 								    t0 = time.perf_counter()
 								    last_content = ""
 								    for attempt in range(attempts):
 								        response = bound.invoke(prompt)
 								        content = (getattr(response, "content", None) or "").strip()
 								        last_content = content
 								        if content:
 								            if attempt > 0:
 								                logger.info(
 								                    "json_object 空内容重试成功 agent={} prompt_sha12={}",
 								                    tag,
 								                    sha,
 								                )
 								            _log_json_object_done(
 								                tag, sha, prompt, content, attempt + 1, t0, success=True
 								            )
 								            return content
 								        if attempt == 0 and retry_empty:
 								            logger.warning(
 								                "json_object 返回空 content，将重试 agent={} attempt={} prompt_sha12={}",
 								                tag,
 								                attempt,
 								                sha,
 								            )
 								    logger.warning("json_object 仍为空 agent={} prompt_sha12={}", tag, sha)
 								    _log_json_object_done(tag, sha, prompt, last_content, attempts, t0, success=False)
 								    return ""
 								async def ainvoke_json_object(
 								    llm: Any,
 								    prompt: str,
 								    *,
 								    max_tokens: int,
 								    agent: str | None = None,
 								    retry_empty: bool = True,
 								) -> str:
 								    """异步版 `invoke_json_object`。"""
 								    bound = bind_json_object_mode(llm, max_tokens=max_tokens)
 								    tag = agent or "json_object"
 								    sha = _prompt_sha12(prompt)
 								    attempts = 2 if retry_empty else 1
 								    t0 = time.perf_counter()
 								    last_content = ""
 								    for attempt in range(attempts):
 								        response = await bound.ainvoke(prompt)
 								        content = (getattr(response, "content", None) or "").strip()
 								        last_content = content
 								        if content:
 								            if attempt > 0:
 								                logger.info(
 								                    "json_object 空内容重试成功 agent={} prompt_sha12={}",
 								                    tag,
 								                    sha,
 								                )
 								            _log_json_object_done(
 								                tag, sha, prompt, content, attempt + 1, t0, success=True
 								            )
 								            return content
 								        if attempt == 0 and retry_empty:
 								            logger.warning(
 								                "json_object 返回空 content，将重试 agent={} attempt={} prompt_sha12={}",
 								                tag,
 								                attempt,
 								                sha,
 								            )
 								    logger.warning("json_object 仍为空 agent={} prompt_sha12={}", tag, sha)
 								    _log_json_object_done(tag, sha, prompt, last_content, attempts, t0, success=False)
 								    return ""
 								def _log_json_object_done(
 								    tag: str,
 								    sha: str,
 								    prompt: str,
 								    content: str,
 								    attempts_used: int,
 								    t0: float,
 								    *,
 								    success: bool,
 								) -> None:
 								    ms = (time.perf_counter() - t0) * 1000
 								    if agent_summary_enabled():
-												refactor(agents): 抽取阶段常量与对话上下文；快档 LLM；图片 prompt 可禁止回退

访谈与阶段
- 新增 app/agents/stage_constants.py：集中 CHAT_STAGES、章节分类/顺序、阶段到默认 memoir 类别等，与 MemoirState 默认槽位顺序对齐；减少散落在 prompts 内的重复常量。
- 新增 app/agents/chat/prompt_context.py：以 ChatPromptContext 汇总 guided 系统提示所需字段（阶段、槽位、轮次、人设、记忆证据、回复长度模式、背景声线、职业等），统一走 get_guided_conversation_prompt。
- 大幅收敛 app/agents/chat/prompts_conversation.py；调整 prompts.py、stage_prompts.py、stage_detection.py；同步 interview_agent、profile_agent、helpers 与 state_schema，使对话侧构造提示的方式一致、可测。

回忆录流水线
- memoir/prompts.py 删除已迁至 stage_constants / 独立模板的大段常量与图片占位相关逻辑；classification / extraction / fidelity / narrative agents 与 orchest（全量历史仍可用于计数，注入模型时按轮次与字符上限截断）、image_prompt_fallback_disabled。
- dependencies 增加 get_llm_provider_fast（LRU 缓存，可与默认共用密钥与 base_url）。

任务与编排
- memoir_tasks：prepare_batches 注入 llm_fast；开启独立快档模型时打结构化日志。
- chapter_cover_tasks、story_image_tasks：与图片 prompt / JSON 工具路径或策略变更对齐（import 与行为一致）。
- story_pipeline_sync 等小处同步。

其它核心
- langchain_llm、text_normalize 随上述调用链微调。

开发者体验
- .cursor/settings.json：启用 redis-development、postman 插件。

测试
- 新增 test_image_prompt_policy：覆盖「禁止回退」等图片 prompt 策略。
- 更新 test_interview_prompts、test_interview_reply_length、test_experience_regressions、test_json_and_memory_utils，匹配新常量位置、json_utils 与对话/长度行为。

											
										
										
											2026-04-02 12:00:00 +08:00
+								        prompt_chars = len(prompt or "")
-												feat(api+app): 对话阶段化、回忆录流水线与客户端会话体验
- DB: segments 用户输入文本（Alembic 0002）
- Chat: 阶段检测/阶段提示/回复限制，编排与访谈/画像 prompts 调整
- Memoir: 忠实度检查 agent，叙事与分类等链路更新
- Core: agent 日志、Alembic 启动、LangChain/日志/配置等
- Story: time_hints；Memory 检索与相关测试
- Expo: 助手头像、会话页与消息拆分、实时会话与文案/i18n
- Docs/scripts/tests: 迁移脚本、LLM JSON/记忆检索文档、新增单测

											
										
										
											2026-03-26 12:13:36 +08:00
+								        logger.info(
 								            "llm_json_object agent={} prompt_sha12={} duration_ms={:.2f} "
-												refactor(agents): 抽取阶段常量与对话上下文；快档 LLM；图片 prompt 可禁止回退

访谈与阶段
- 新增 app/agents/stage_constants.py：集中 CHAT_STAGES、章节分类/顺序、阶段到默认 memoir 类别等，与 MemoirState 默认槽位顺序对齐；减少散落在 prompts 内的重复常量。
- 新增 app/agents/chat/prompt_context.py：以 ChatPromptContext 汇总 guided 系统提示所需字段（阶段、槽位、轮次、人设、记忆证据、回复长度模式、背景声线、职业等），统一走 get_guided_conversation_prompt。
- 大幅收敛 app/agents/chat/prompts_conversation.py；调整 prompts.py、stage_prompts.py、stage_detection.py；同步 interview_agent、profile_agent、helpers 与 state_schema，使对话侧构造提示的方式一致、可测。

回忆录流水线
- memoir/prompts.py 删除已迁至 stage_constants / 独立模板的大段常量与图片占位相关逻辑；classification / extraction / fidelity / narrative agents 与 orchest（全量历史仍可用于计数，注入模型时按轮次与字符上限截断）、image_prompt_fallback_disabled。
- dependencies 增加 get_llm_provider_fast（LRU 缓存，可与默认共用密钥与 base_url）。

任务与编排
- memoir_tasks：prepare_batches 注入 llm_fast；开启独立快档模型时打结构化日志。
- chapter_cover_tasks、story_image_tasks：与图片 prompt / JSON 工具路径或策略变更对齐（import 与行为一致）。
- story_pipeline_sync 等小处同步。

其它核心
- langchain_llm、text_normalize 随上述调用链微调。

开发者体验
- .cursor/settings.json：启用 redis-development、postman 插件。

测试
- 新增 test_image_prompt_policy：覆盖「禁止回退」等图片 prompt 策略。
- 更新 test_interview_prompts、test_interview_reply_length、test_experience_regressions、test_json_and_memory_utils，匹配新常量位置、json_utils 与对话/长度行为。

											
										
										
											2026-04-02 12:00:00 +08:00
+								            "prompt_char_count={} response_len={} attempts={} success={}",
-												feat(api+app): 对话阶段化、回忆录流水线与客户端会话体验
- DB: segments 用户输入文本（Alembic 0002）
- Chat: 阶段检测/阶段提示/回复限制，编排与访谈/画像 prompts 调整
- Memoir: 忠实度检查 agent，叙事与分类等链路更新
- Core: agent 日志、Alembic 启动、LangChain/日志/配置等
- Story: time_hints；Memory 检索与相关测试
- Expo: 助手头像、会话页与消息拆分、实时会话与文案/i18n
- Docs/scripts/tests: 迁移脚本、LLM JSON/记忆检索文档、新增单测

											
										
										
											2026-03-26 12:13:36 +08:00
+								            tag,
 								            sha,
 								            ms,
-												refactor(agents): 抽取阶段常量与对话上下文；快档 LLM；图片 prompt 可禁止回退

访谈与阶段
- 新增 app/agents/stage_constants.py：集中 CHAT_STAGES、章节分类/顺序、阶段到默认 memoir 类别等，与 MemoirState 默认槽位顺序对齐；减少散落在 prompts 内的重复常量。
- 新增 app/agents/chat/prompt_context.py：以 ChatPromptContext 汇总 guided 系统提示所需字段（阶段、槽位、轮次、人设、记忆证据、回复长度模式、背景声线、职业等），统一走 get_guided_conversation_prompt。
- 大幅收敛 app/agents/chat/prompts_conversation.py；调整 prompts.py、stage_prompts.py、stage_detection.py；同步 interview_agent、profile_agent、helpers 与 state_schema，使对话侧构造提示的方式一致、可测。

回忆录流水线
- memoir/prompts.py 删除已迁至 stage_constants / 独立模板的大段常量与图片占位相关逻辑；classification / extraction / fidelity / narrative agents 与 orchest（全量历史仍可用于计数，注入模型时按轮次与字符上限截断）、image_prompt_fallback_disabled。
- dependencies 增加 get_llm_provider_fast（LRU 缓存，可与默认共用密钥与 base_url）。

任务与编排
- memoir_tasks：prepare_batches 注入 llm_fast；开启独立快档模型时打结构化日志。
- chapter_cover_tasks、story_image_tasks：与图片 prompt / JSON 工具路径或策略变更对齐（import 与行为一致）。
- story_pipeline_sync 等小处同步。

其它核心
- langchain_llm、text_normalize 随上述调用链微调。

开发者体验
- .cursor/settings.json：启用 redis-development、postman 插件。

测试
- 新增 test_image_prompt_policy：覆盖「禁止回退」等图片 prompt 策略。
- 更新 test_interview_prompts、test_interview_reply_length、test_experience_regressions、test_json_and_memory_utils，匹配新常量位置、json_utils 与对话/长度行为。

											
										
										
											2026-04-02 12:00:00 +08:00
+								            prompt_chars,
-												feat(api+app): 对话阶段化、回忆录流水线与客户端会话体验
- DB: segments 用户输入文本（Alembic 0002）
- Chat: 阶段检测/阶段提示/回复限制，编排与访谈/画像 prompts 调整
- Memoir: 忠实度检查 agent，叙事与分类等链路更新
- Core: agent 日志、Alembic 启动、LangChain/日志/配置等
- Story: time_hints；Memory 检索与相关测试
- Expo: 助手头像、会话页与消息拆分、实时会话与文案/i18n
- Docs/scripts/tests: 迁移脚本、LLM JSON/记忆检索文档、新增单测

											
										
										
											2026-03-26 12:13:36 +08:00
+								            len(content or ""),
 								            attempts_used,
 								            success,
 								        )
 								    if agent_verbose_enabled():
 								        log_agent_payload(logger, f"{tag}.prompt", prompt)
 								        log_agent_payload(logger, f"{tag}.response", content)