feat(eval): memoir A/B chapter judging and eval-web parity with dialogue

- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas. - Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error. - MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings. - app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS. - Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014. - Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.
2026-04-10 10:23:43 +08:00
parent b0251e5b26
commit ac49bc7f23
59 changed files with 4773 additions and 696 deletions
--- a/api/app/core/llm_call.py
+++ b/api/app/core/llm_call.py
@@ -15,6 +15,13 @@ from typing import Any, Callable, Literal, TypeVar

 from pydantic import BaseModel, ValidationError

+try:
+    from openai import (
+        ContentFilterFinishReasonError as _OpenAIContentFilterFinishReasonError,
+    )
+except ImportError:  # 兼容性：旧版 SDK 无此类
+    _OpenAIContentFilterFinishReasonError = None
+
 from app.core.agent_logging import agent_verbose_enabled, log_agent_payload
 from app.core.json_utils import extract_json_payload
 from app.core.langchain_llm import (
@@ -61,6 +68,57 @@ def _prompt_sha12(prompt: str) -> str:
    return hashlib.sha256((prompt or "").encode("utf-8")).hexdigest()[:12]


+def _iter_exception_chain(exc: BaseException):
+    """包含自身与 ``__cause__`` / ``__context__`` 链，去重防环。"""
+    seen: set[int] = set()
+    cur: BaseException | None = exc
+    while cur is not None and id(cur) not in seen:
+        yield cur
+        seen.add(id(cur))
+        cur = cur.__cause__ or cur.__context__
+
+
+def _is_content_filter_refusal(exc: BaseException) -> bool:
+    """OpenAI / Azure 等内容审核拦截：无模型 JSON 可解析，属可预期失败，不宜打 ERROR 堆栈。"""
+    for e in _iter_exception_chain(exc):
+        if _OpenAIContentFilterFinishReasonError is not None and isinstance(
+            e,
+            _OpenAIContentFilterFinishReasonError,
+        ):
+            return True
+        msg = str(e).lower()
+        if "content filter" in msg and (
+            "reject" in msg or "blocked" in msg or "filter" in msg
+        ):
+            return True
+    return False
+
+
+_LLM_MSG_CONTENT_FILTER = (
+    "模型输出被服务商内容安全策略拦截（content filter），通常与提示或上下文中触发了合规扫描有关；"
+    "可尝试更换模型、缩短送入模型的正文/证据节选，或在服务商控制台调整内容过滤策略。"
+)
+
+
+def _format_llm_invoke_error_message(exc: BaseException) -> str:
+    if _is_content_filter_refusal(exc):
+        return _LLM_MSG_CONTENT_FILTER
+    return str(exc)
+
+
+def _log_invoke_failure(*, agent: str, exc: BaseException, sync: bool) -> None:
+    if _is_content_filter_refusal(exc):
+        logger.info(
+            "event=llm_content_filter_blocked agent={} sync={} detail={}",
+            agent,
+            sync,
+            str(exc)[:500],
+        )
+        return
+    tag = "llm_json_call" if sync else "allm_json_call"
+    logger.bind(agent=agent).exception("{} invoke error: {}", tag, exc)
+
+
 def _invoke_raw_sync(
    llm: Any,
    prompt: str,
@@ -272,7 +330,7 @@ def llm_json_call(
            return fallback_factory()
        raise
    except Exception as e:
-        logger.bind(agent=agent).exception("llm_json_call invoke error: {}", e)
+        _log_invoke_failure(agent=agent, exc=e, sync=True)
        used_fb = fallback_factory is not None
        _emit_meta(
            agent=agent,
@@ -295,7 +353,7 @@ def llm_json_call(
            return fallback_factory()
        raise LLMCallError(
            "invoke",
-            str(e),
+            _format_llm_invoke_error_message(e),
            raw_content=raw[:4096] if raw else None,
        ) from e

@@ -366,7 +424,7 @@ async def allm_json_call(
            return fallback_factory()
        raise
    except Exception as e:
-        logger.bind(agent=agent).exception("allm_json_call invoke error: {}", e)
+        _log_invoke_failure(agent=agent, exc=e, sync=False)
        used_fb = fallback_factory is not None
        _emit_meta(
            agent=agent,
@@ -389,7 +447,7 @@ async def allm_json_call(
            return fallback_factory()
        raise LLMCallError(
            "invoke",
-            str(e),
+            _format_llm_invoke_error_message(e),
            raw_content=raw[:4096] if raw else None,
        ) from e