feat(eval): server-side replay/phase1 timing + memoir phase1 batch chunking

- Replay and memoir-submit responses include started/finished UTC and elapsed_ms;
  Phase1 poll exposes Redis-backed submit time and elapsed_ms_since_submit.
- Phase1 batch LLM splits segments by memoir_phase1_batch_llm_chunk_size with
  bisect fallback per chunk; Playground shows server timings.

Made-with: Cursor
This commit is contained in:
Kevin
2026-04-09 13:38:53 +08:00
parent 064ad2161d
commit b0251e5b26
14 changed files with 544 additions and 14 deletions

View File

@@ -110,6 +110,8 @@ class Settings(BaseSettings):
# Memoir Phase1多 segment 一批一次 LLM 完成抽取+章节分类(失败回退逐段);单段且关时仍逐段
memoir_phase1_batch_llm_enabled: bool = True
memoir_phase1_batch_llm_max_tokens: int = Field(default=4096, ge=512, le=32_768)
#: Phase1 批处理 LLM单次请求最多包含的 segment 数(多块合并,避免 completion 顶满截断)
memoir_phase1_batch_llm_chunk_size: int = Field(default=24, ge=1, le=500)
# Memoir agents`invoke_json_object` / `llm_json_call` 的 max_tokens原硬编码迁至配置
memoir_extraction_max_tokens: int = Field(default=1024, ge=64, le=8192)
memoir_classification_max_tokens: int = Field(default=256, ge=32, le=4096)
@@ -172,8 +174,8 @@ class Settings(BaseSettings):
log_level: str = "INFO"
# LOG_AGENT_VERBOSE为 True 时额外输出 Agent 单行 INFO 摘要(耗时、规模),无需全局 DEBUG
log_agent_verbose: bool = False
# AGENT_LOG_MAX_CHARSDEBUG 下记录 prompt/响应预览时的最大字符数0=不截断(完整输出)
agent_log_max_chars: int = Field(default=0, ge=0, le=50_000_000)
# AGENT_LOG_MAX_CHARSDEBUG 下记录 prompt/响应预览时的最大字符数0=不截断(完整输出,慎用
agent_log_max_chars: int = Field(default=4096, ge=0, le=50_000_000)
# AGENT_LOG_OMIT_SYSTEM_MESSAGE_BODYDEBUG 下访谈/资料聊天日志省略 System 正文(仅 len+sha12
agent_log_omit_system_message_body: bool = True
# AGENT_LOG_JSON_PROMPT_PREFIX_CHARSDEBUG 下 *.prompt 总长超过下项时再跳过前 N 字符后预览0=不跳过)
@@ -182,6 +184,10 @@ class Settings(BaseSettings):
agent_log_json_prompt_prefix_only_if_len_gt: int = Field(
default=4000, ge=0, le=2_000_000
)
# AGENT_LOG_PROMPT_MODEDEBUG 下 *.prompt 记录方式 preview=截断预览 | hash_only=仅 sha12+长度(无正文)
agent_log_prompt_mode: str = Field(default="preview")
# AGENT_LOG_PROMPT_DEDUPDEBUG 下同一 label 连续相同全文时第二条起跳过(减重复模板噪音)
agent_log_prompt_dedup: bool = False
# 第三方 stdlib logging空=自动LOG_LEVEL 为 DEBUG/TRACE 时 Celery→INFO、httpx/httpcore→WARNING
celery_log_level: str = ""
httpx_log_level: str = ""
@@ -216,6 +222,25 @@ class Settings(BaseSettings):
return False
return True
@field_validator("agent_log_prompt_mode", mode="before")
@classmethod
def _normalize_agent_log_prompt_mode(cls, v: object) -> str:
if v is None:
return "preview"
s = str(v).strip().lower()
if s not in ("preview", "hash_only"):
return "preview"
return s
@field_validator("agent_log_prompt_dedup", mode="before")
@classmethod
def _coerce_agent_log_prompt_dedup(cls, v: object) -> bool:
if isinstance(v, bool):
return v
if v is None:
return False
return str(v).strip().lower() in ("1", "true", "yes", "on")
# ── Misc ─────────────────────────────────────────────────
enable_test_subscription: int = 0
enable_test_plan: str = "" # "1" / "true" / "yes" 为 True