feat(eval): memoir A/B chapter judging and eval-web parity with dialogue
- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas. - Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error. - MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings. - app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS. - Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014. - Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.
This commit is contained in:
@@ -15,6 +15,13 @@ from typing import Any, Callable, Literal, TypeVar
|
||||
|
||||
from pydantic import BaseModel, ValidationError
|
||||
|
||||
try:
|
||||
from openai import (
|
||||
ContentFilterFinishReasonError as _OpenAIContentFilterFinishReasonError,
|
||||
)
|
||||
except ImportError: # 兼容性:旧版 SDK 无此类
|
||||
_OpenAIContentFilterFinishReasonError = None
|
||||
|
||||
from app.core.agent_logging import agent_verbose_enabled, log_agent_payload
|
||||
from app.core.json_utils import extract_json_payload
|
||||
from app.core.langchain_llm import (
|
||||
@@ -61,6 +68,57 @@ def _prompt_sha12(prompt: str) -> str:
|
||||
return hashlib.sha256((prompt or "").encode("utf-8")).hexdigest()[:12]
|
||||
|
||||
|
||||
def _iter_exception_chain(exc: BaseException):
|
||||
"""包含自身与 ``__cause__`` / ``__context__`` 链,去重防环。"""
|
||||
seen: set[int] = set()
|
||||
cur: BaseException | None = exc
|
||||
while cur is not None and id(cur) not in seen:
|
||||
yield cur
|
||||
seen.add(id(cur))
|
||||
cur = cur.__cause__ or cur.__context__
|
||||
|
||||
|
||||
def _is_content_filter_refusal(exc: BaseException) -> bool:
|
||||
"""OpenAI / Azure 等内容审核拦截:无模型 JSON 可解析,属可预期失败,不宜打 ERROR 堆栈。"""
|
||||
for e in _iter_exception_chain(exc):
|
||||
if _OpenAIContentFilterFinishReasonError is not None and isinstance(
|
||||
e,
|
||||
_OpenAIContentFilterFinishReasonError,
|
||||
):
|
||||
return True
|
||||
msg = str(e).lower()
|
||||
if "content filter" in msg and (
|
||||
"reject" in msg or "blocked" in msg or "filter" in msg
|
||||
):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
_LLM_MSG_CONTENT_FILTER = (
|
||||
"模型输出被服务商内容安全策略拦截(content filter),通常与提示或上下文中触发了合规扫描有关;"
|
||||
"可尝试更换模型、缩短送入模型的正文/证据节选,或在服务商控制台调整内容过滤策略。"
|
||||
)
|
||||
|
||||
|
||||
def _format_llm_invoke_error_message(exc: BaseException) -> str:
|
||||
if _is_content_filter_refusal(exc):
|
||||
return _LLM_MSG_CONTENT_FILTER
|
||||
return str(exc)
|
||||
|
||||
|
||||
def _log_invoke_failure(*, agent: str, exc: BaseException, sync: bool) -> None:
|
||||
if _is_content_filter_refusal(exc):
|
||||
logger.info(
|
||||
"event=llm_content_filter_blocked agent={} sync={} detail={}",
|
||||
agent,
|
||||
sync,
|
||||
str(exc)[:500],
|
||||
)
|
||||
return
|
||||
tag = "llm_json_call" if sync else "allm_json_call"
|
||||
logger.bind(agent=agent).exception("{} invoke error: {}", tag, exc)
|
||||
|
||||
|
||||
def _invoke_raw_sync(
|
||||
llm: Any,
|
||||
prompt: str,
|
||||
@@ -272,7 +330,7 @@ def llm_json_call(
|
||||
return fallback_factory()
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.bind(agent=agent).exception("llm_json_call invoke error: {}", e)
|
||||
_log_invoke_failure(agent=agent, exc=e, sync=True)
|
||||
used_fb = fallback_factory is not None
|
||||
_emit_meta(
|
||||
agent=agent,
|
||||
@@ -295,7 +353,7 @@ def llm_json_call(
|
||||
return fallback_factory()
|
||||
raise LLMCallError(
|
||||
"invoke",
|
||||
str(e),
|
||||
_format_llm_invoke_error_message(e),
|
||||
raw_content=raw[:4096] if raw else None,
|
||||
) from e
|
||||
|
||||
@@ -366,7 +424,7 @@ async def allm_json_call(
|
||||
return fallback_factory()
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.bind(agent=agent).exception("allm_json_call invoke error: {}", e)
|
||||
_log_invoke_failure(agent=agent, exc=e, sync=False)
|
||||
used_fb = fallback_factory is not None
|
||||
_emit_meta(
|
||||
agent=agent,
|
||||
@@ -389,7 +447,7 @@ async def allm_json_call(
|
||||
return fallback_factory()
|
||||
raise LLMCallError(
|
||||
"invoke",
|
||||
str(e),
|
||||
_format_llm_invoke_error_message(e),
|
||||
raw_content=raw[:4096] if raw else None,
|
||||
) from e
|
||||
|
||||
|
||||
Reference in New Issue
Block a user