life-echo/api/app/core/langchain_llm.py

"""
与 `get_llm_provider().langchain_llm` 配合使用的 LangChain Runnable 约定。

langchain-openai 要求用顶层 `response_format` 绑定 JSON 模式，禁止对 `.bind()` 传入
`model_kwargs={"response_format": ...}`（会错误传入底层 `completions.create`）。
"""

from __future__ import annotations

import hashlib
import time
from typing import Any

from app.core.agent_logging import (
    agent_summary_enabled,
    agent_verbose_enabled,
    log_agent_payload,
)
from app.core.logging import get_logger

logger = get_logger(__name__)


def bind_json_object_mode(llm: Any, *, max_tokens: int) -> Any:
    """返回绑定 `response_format=json_object` 与 `max_tokens` 的 Runnable（通常为 ChatOpenAI）。"""
    return llm.bind(
        response_format={"type": "json_object"},
        max_tokens=max_tokens,
    )


def _prompt_sha12(prompt: str) -> str:
    return hashlib.sha256((prompt or "").encode("utf-8")).hexdigest()[:12]


def invoke_json_object(
    llm: Any,
    prompt: str,
    *,
    max_tokens: int,
    agent: str | None = None,
    retry_empty: bool = True,
) -> str:
    """
    同步调用 JSON object 模式；空 content 时可选重试一次（缓解 DeepSeek 偶发空输出）。
    仅依赖 bind_json_object_mode，不引用 features。
    """
    bound = bind_json_object_mode(llm, max_tokens=max_tokens)
    tag = agent or "json_object"
    sha = _prompt_sha12(prompt)
    attempts = 2 if retry_empty else 1
    t0 = time.perf_counter()
    last_content = ""
    for attempt in range(attempts):
        response = bound.invoke(prompt)
        content = (getattr(response, "content", None) or "").strip()
        last_content = content
        if content:
            if attempt > 0:
                logger.info(
                    "json_object 空内容重试成功 agent={} prompt_sha12={}",
                    tag,
                    sha,
                )
            _log_json_object_done(
                tag, sha, prompt, content, attempt + 1, t0, success=True
            )
            return content
        if attempt == 0 and retry_empty:
            logger.warning(
                "json_object 返回空 content，将重试 agent={} attempt={} prompt_sha12={}",
                tag,
                attempt,
                sha,
            )
    logger.warning("json_object 仍为空 agent={} prompt_sha12={}", tag, sha)
    _log_json_object_done(tag, sha, prompt, last_content, attempts, t0, success=False)
    return ""


async def ainvoke_json_object(
    llm: Any,
    prompt: str,
    *,
    max_tokens: int,
    agent: str | None = None,
    retry_empty: bool = True,
) -> str:
    """异步版 `invoke_json_object`。"""
    bound = bind_json_object_mode(llm, max_tokens=max_tokens)
    tag = agent or "json_object"
    sha = _prompt_sha12(prompt)
    attempts = 2 if retry_empty else 1
    t0 = time.perf_counter()
    last_content = ""
    for attempt in range(attempts):
        response = await bound.ainvoke(prompt)
        content = (getattr(response, "content", None) or "").strip()
        last_content = content
        if content:
            if attempt > 0:
                logger.info(
                    "json_object 空内容重试成功 agent={} prompt_sha12={}",
                    tag,
                    sha,
                )
            _log_json_object_done(
                tag, sha, prompt, content, attempt + 1, t0, success=True
            )
            return content
        if attempt == 0 and retry_empty:
            logger.warning(
                "json_object 返回空 content，将重试 agent={} attempt={} prompt_sha12={}",
                tag,
                attempt,
                sha,
            )
    logger.warning("json_object 仍为空 agent={} prompt_sha12={}", tag, sha)
    _log_json_object_done(tag, sha, prompt, last_content, attempts, t0, success=False)
    return ""


def _log_json_object_done(
    tag: str,
    sha: str,
    prompt: str,
    content: str,
    attempts_used: int,
    t0: float,
    *,
    success: bool,
) -> None:
    ms = (time.perf_counter() - t0) * 1000
    if agent_summary_enabled():
        prompt_chars = len(prompt or "")
        logger.info(
            "llm_json_object agent={} prompt_sha12={} duration_ms={:.2f} "
            "prompt_char_count={} response_len={} attempts={} success={}",
            tag,
            sha,
            ms,
            prompt_chars,
            len(content or ""),
            attempts_used,
            success,
        )
    if agent_verbose_enabled():
        log_agent_payload(logger, f"{tag}.prompt", prompt)
        log_agent_payload(logger, f"{tag}.response", content)