Files
life-echo/api/app/core/config.py
Kevin 59d4b19d7d feat(api): 回忆录管线简化、路由延迟池与相关加固
- Phase1/2:移除 MemoirOrchestrator.run 与 process_memoir_segments 别名;文档改为 process_memoir_phase1。
- 槽位校验集中到 stage_constants(filter_stage_slots),批处理与顺序路径及 state_service 写库一致。
- StoryRoute:no_llm/parse_error/invalid_target 保守 new_story;短篇护栏不覆盖这些 fallback。
- Phase2 低置信单路径可选延迟(StoryPipelineResult.deferred):不写 Chapter/Story,Segment 记录 defer 元数据,冷却内不重复消费;上限后停自动重试,Phase1 同类目新段唤醒池内段。
- Alembic 0017:segments 表 narrative_defer_* 列。
- ProfileAgent:经 LlmGateway/注入 Provider 统一聊天与 JSON,新增测试。
- ImagePromptOrchestrator:LLM 初始化失败可依配置降级或硬失败;补充策略测试。
- 配套单测与 README/本地开发文档表述更新。

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-06 13:18:02 +08:00

473 lines
27 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
统一配置:所有环境变量通过此模块的 Settings 单点读取。
业务代码只允许 import settings禁止散落 os.getenv() / load_dotenv()。
本地开发时由 api/development.sh 在启动前将 .env.development 同步为 .env每次启动覆盖
Docker / 服务端由镜像与 compose 注入进程环境;此处仅固定读取工作目录下的 .env 作为默认值来源。
进程环境变量(容器 environment、export覆盖 .env 同名项。
"""
import secrets
from pydantic import AliasChoices, Field, field_validator
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
case_sensitive=False,
extra="ignore",
)
# ── Database ──────────────────────────────────────────────
database_url: str = "postgresql://postgres:postgres@localhost:5432/life_echo"
# 启动时是否执行 Alembicmain.py lifespan测试或仅读场景可关
alembic_run_on_startup: bool = True
# True迁移失败则进程退出生产推荐。False仅打错误日志并继续本地无 DB 时)
alembic_startup_fail_fast: bool = False
alembic_startup_max_retries: int = Field(default=3, ge=1, le=10)
alembic_startup_retry_base_seconds: float = Field(default=1.0, ge=0.1, le=60.0)
# ── Redis ─────────────────────────────────────────────────
redis_url: str = "redis://localhost:6379/0"
redis_session_ttl: int = 86400
# ── Runtime / Celery 开发体验 ─────────────────────────────
# APP_ENV本地默认 developmentDocker 生产栈请设为 production
app_environment: str = Field(
default="development",
validation_alias=AliasChoices("APP_ENV", "APP_ENVIRONMENT"),
)
# 非 production 且为 True 时,在 main/internal_main 连接 Redis 后清空 Celery 队列(不 FLUSHDB不影响会话键
celery_purge_broker_on_startup: bool = False
# Memory LLM 富化任务路由队列;可与主 worker 分离(见 README / docker-compose
celery_memory_enrichment_queue: str = "memory_idle"
# ── Auth / JWT ────────────────────────────────────────────
secret_key: str = Field(default_factory=lambda: secrets.token_urlsafe(32))
algorithm: str = "HS256"
access_token_expire_minutes: int = 120
refresh_token_expire_days: int = 30
# 本地/内网评测:允许 POST /api/auth/mock/sms-login 跳过短信须显式开启production 下路由仍拒绝)
mock_sms_login_enabled: bool = False
# ── LLM / DeepSeek ───────────────────────────────────────
deepseek_api_key: str = ""
deepseek_base_url: str = "https://api.deepseek.com"
# 官方新模型名V4-Flash与弃用名 deepseek-chat 对齐为「非思考」需另设 deepseek_thinking_enabled
deepseek_model: str = "deepseek-v4-flash"
# V4-Flash 在官方 API 中 thinking 默认为 enabled主链路为对齐旧版 deepseek-chat 默认关闭
deepseek_thinking_enabled: bool = False
llm_api_key: str = ""
llm_base_url: str = ""
llm_model: str = ""
llm_temperature: float = 0.7
# 空字符串:快档位与默认模型相同;分类/抽取/记忆富化等可单独指定较轻模型
llm_fast_model: str = ""
# ── Memory 向量(智谱 BigModel 国内 embedding-3与 LLM/DeepSeek 密钥分离)──
zhipu_api_key: str = ""
embedding_base_url: str = "https://open.bigmodel.cn/api/paas/v4"
embedding_model: str = "embedding-3"
# ── Chat 访谈token 上限 + 代码截断,见 reply_limits──
chat_interview_max_tokens: int = 512
chat_interview_max_segments: int = 2
chat_interview_max_chars_per_segment: int = 380
chat_opening_max_tokens: int = 380
chat_profile_followup_max_tokens: int = 280
# Redis 全量历史仅用于 turn 计数;注入 LLM 时截取最近若干轮与字符预算
chat_history_max_pairs: int = Field(default=15, ge=1, le=500)
chat_history_max_chars: int = Field(default=6000, ge=256, le=500_000)
chat_era_context_enabled: bool = True
# 访谈:每轮用 LLM 判定用户主人生阶段并更新 MemoirState.current_stageFalse 时仅用关键词
chat_stage_detection_enabled: bool = True
chat_stage_detection_max_tokens: int = 128
# 访谈性格default | warm_listener | curious_guide未知值按 default
chat_interview_persona: str = "default"
# 访谈/开场 LLM 采样温度:略高于通用 llm_temperature利于口语与叙事变化、减程式句
chat_interview_temperature: float = Field(default=0.93, ge=0.0, le=2.0)
# 访谈:按用户本轮话检索记忆并注入 prompt关则不调 MemoryService.retrieve
chat_memory_retrieval_enabled: bool = True
chat_memory_top_k: int = Field(default=8, ge=1, le=30)
chat_memory_evidence_max_chars: int = Field(default=4096, ge=256, le=50_000)
# 访谈记忆注入使用聊天专用安全格式化(编号引用 + 主语弱化说明)
chat_memory_safe_evidence_format_enabled: bool = True
# True在规则 TurnPlan 之后追加一轮轻量 JSON focus planner本轮承接重点 + memory 引用 + 回复形状;失败则回退基线)
chat_reply_planner_llm_enabled: bool = False
chat_reply_planner_max_tokens: int = Field(default=256, ge=64, le=1024)
chat_reply_planner_temperature: float = Field(default=0.2, ge=0.0, le=1.0)
# ── Memoir 叙事忠实度检查FidelityCheckAgent────────────────
memoir_fidelity_check_enabled: bool = True
memoir_fidelity_check_max_tokens: int = 512
# 口述归一(进入叙事 / 忠实度前segment 原文不落库off | rules | llm
memoir_oral_normalize_enabled: bool = True
memoir_oral_normalize_mode: str = "rules"
memoir_oral_normalize_llm_max_tokens: int = Field(default=512, ge=64, le=4096)
memoir_oral_normalize_llm_max_input_chars: int = Field(
default=8000, ge=64, le=50_000
)
# 聊天:模型消费净稿(不改变 segment 落库原文);与 memoir 规则层共用,配置独立
chat_input_normalize_enabled: bool = True
chat_input_normalize_mode: str = "rules" # off | rules | llm
chat_input_normalize_llm_max_tokens: int = Field(default=512, ge=64, le=4096)
chat_input_normalize_llm_max_input_chars: int = Field(
default=8000, ge=64, le=50_000
)
# True 且 mode=llm仅语音/ASR 段走 LLM 纠错;键盘输入仅规则归一(省每轮 LLM
chat_input_normalize_llm_voice_only: bool = True
# 资料收集超过该对话轮次Redis 全量轮次计数)仍有缺失字段时,强制进入访谈,避免长期问卷感
chat_profile_max_turns: int = Field(default=8, ge=1, le=500)
# Memoir Phase1多 segment 一批一次 LLM 完成抽取+章节分类(失败回退逐段);单段且关时仍逐段
memoir_phase1_batch_llm_enabled: bool = True
memoir_phase1_batch_llm_max_tokens: int = Field(default=4096, ge=512, le=32_768)
#: Phase1 批处理 LLM单次请求最多包含的 segment 数(多块合并,避免 completion 顶满截断)
memoir_phase1_batch_llm_chunk_size: int = Field(default=24, ge=1, le=500)
#: 回忆录流水线细粒度进度 Redis 快照 TTLmemoir_pipeline_run:*
memoir_pipeline_run_ttl_seconds: int = Field(default=172_800, ge=3600, le=2_592_000)
# Memoir agents`invoke_json_object` / `llm_json_call` 的 max_tokens原硬编码迁至配置
memoir_extraction_max_tokens: int = Field(default=1024, ge=64, le=8192)
memoir_classification_max_tokens: int = Field(default=256, ge=32, le=4096)
memoir_narrative_max_tokens: int = Field(default=4096, ge=256, le=32_768)
memoir_narrative_merge_max_tokens: int = Field(default=8192, ge=256, le=64_000)
memoir_title_max_tokens: int = Field(default=256, ge=32, le=4096)
memoir_story_route_max_tokens: int = Field(default=1024, ge=64, le=8192)
memoir_story_batch_plan_max_tokens: int = Field(default=4096, ge=256, le=32_768)
# 资料抽取ProfileAgent JSON 模式)
chat_profile_extract_max_tokens: int = Field(default=512, ge=64, le=4096)
# ── ASR ───────────────────────────────────────────────────
asr_provider: str = "whisper"
asr_model_size: str = "small"
asr_device: str = "auto"
asr_compute_type: str = "auto"
asr_model_cache_dir: str = ""
# ── Tencent SMS ──────────────────────────────────────────
tencent_sms_secret_id: str = ""
tencent_sms_secret_key: str = ""
tencent_sms_sdk_app_id: str = ""
tencent_sms_sign_name: str = ""
tencent_sms_template_id: str = ""
tencent_sms_template_param_count: int = 2
# ── Tencent ASR / TTS共用 Secret与短信、COS 密钥独立)────────────────
tencent_secret_id: str = ""
tencent_secret_key: str = ""
# ── TTS (openai | tencent),与 ASR 独立:仅控制回复侧语音合成 ──
enable_tts: bool = True
tts_provider: str = "tencent"
openai_api_key: str = ""
tts_voice_type: int = 502001 # Tencent 音色 ID见 https://cloud.tencent.com/document/product/1073/92668
tts_codec: str = "mp3"
# ── WeChat Pay ───────────────────────────────────────────
wechat_pay_app_id: str = ""
wechat_pay_mch_id: str = ""
wechat_pay_api_v3_key: str = ""
wechat_pay_private_key_path: str = "certs/apiclient_key.pem"
wechat_pay_private_key: str = "" # PEM 内容,与 private_key_path 二选一
wechat_pay_cert_serial_no: str = ""
wechat_pay_notify_url: str = ""
wechat_pay_platform_public_key: str = ""
wechat_pay_platform_public_key_path: str = ""
wechat_pay_platform_public_key_id: str = ""
# ── Alipay ───────────────────────────────────────────────
alipay_app_id: str = ""
alipay_private_key: str = ""
alipay_public_key: str = ""
alipay_notify_url: str = ""
alipay_sign_type: str = "RSA2"
alipay_under_development: str = "true" # "1"/"true"/"yes" 视为开发中不可用
# ── Logging ──────────────────────────────────────────────
# 环境变量 LOG_LEVEL控制 loguru sink 最低级别TRACE/DEBUG/INFO/…)
log_level: str = "INFO"
# LOG_AGENT_VERBOSE为 True 时额外输出 Agent 单行 INFO 摘要(耗时、规模),无需全局 DEBUG
log_agent_verbose: bool = False
# AGENT_LOG_MAX_CHARSDEBUG 下记录 prompt/响应预览时的最大字符数0=不截断(完整输出,慎用)
agent_log_max_chars: int = Field(default=4096, ge=0, le=50_000_000)
# AGENT_LOG_OMIT_SYSTEM_MESSAGE_BODYDEBUG 下访谈/资料聊天日志省略 System 正文(仅 len+sha12
agent_log_omit_system_message_body: bool = True
# AGENT_LOG_JSON_PROMPT_PREFIX_CHARSDEBUG 下 *.prompt 总长超过下项时再跳过前 N 字符后预览0=不跳过)
agent_log_json_prompt_prefix_chars: int = Field(default=0, ge=0, le=500_000)
# AGENT_LOG_JSON_PROMPT_PREFIX_ONLY_IF_LEN_GT触发“跳过前缀”的最小 prompt 长度
agent_log_json_prompt_prefix_only_if_len_gt: int = Field(
default=4000, ge=0, le=2_000_000
)
# AGENT_LOG_PROMPT_MODEDEBUG 下 *.prompt 记录方式 preview=截断预览 | hash_only=仅 sha12+长度(无正文)
agent_log_prompt_mode: str = Field(default="preview")
# AGENT_LOG_PROMPT_DEDUPDEBUG 下同一 label 连续相同全文时第二条起跳过(减重复模板噪音)
agent_log_prompt_dedup: bool = False
# 第三方 stdlib logging空=自动DEBUG/TRACE 时 Celery→INFO否则 Celery 与 httpx 默认 WARNING
celery_log_level: str = ""
httpx_log_level: str = ""
# 非空时额外写入 JSONLserialize=True便于 Loki/ELK与 stderr 彩色控制台并存
log_json_file: str = ""
@field_validator("celery_purge_broker_on_startup", mode="before")
@classmethod
def _coerce_celery_purge_broker_on_startup(cls, v: object) -> bool:
if isinstance(v, bool):
return v
if v is None:
return False
return str(v).strip().lower() in ("1", "true", "yes", "on")
@field_validator("mock_sms_login_enabled", mode="before")
@classmethod
def _coerce_mock_sms_login_enabled(cls, v: object) -> bool:
if isinstance(v, bool):
return v
if v is None:
return False
return str(v).strip().lower() in ("1", "true", "yes", "on")
@field_validator("log_agent_verbose", mode="before")
@classmethod
def _coerce_log_agent_verbose(cls, v: object) -> bool:
if isinstance(v, bool):
return v
if v is None:
return False
return str(v).strip().lower() in ("1", "true", "yes", "on")
@field_validator("agent_log_omit_system_message_body", mode="before")
@classmethod
def _coerce_agent_log_omit_system_message_body(cls, v: object) -> bool:
if isinstance(v, bool):
return v
if v is None:
return True
s = str(v).strip().lower()
if s in ("0", "false", "no", "off"):
return False
return True
@field_validator("agent_log_prompt_mode", mode="before")
@classmethod
def _normalize_agent_log_prompt_mode(cls, v: object) -> str:
if v is None:
return "preview"
s = str(v).strip().lower()
if s not in ("preview", "hash_only"):
return "preview"
return s
@field_validator("agent_log_prompt_dedup", mode="before")
@classmethod
def _coerce_agent_log_prompt_dedup(cls, v: object) -> bool:
if isinstance(v, bool):
return v
if v is None:
return False
return str(v).strip().lower() in ("1", "true", "yes", "on")
# ── Misc ─────────────────────────────────────────────────
enable_test_subscription: int = 0
enable_test_plan: str = "" # "1" / "true" / "yes" 为 True
enable_docs: bool = True
# ── Memoir Image ─────────────────────────────────────────
memoir_image_enabled: bool = False
# True图片 LLM prompt 失败时不使用英语降级模板(需产品与任务失败流确认后开启)
image_prompt_fallback_disabled: bool = False
memoir_image_poll_interval: int = 3
memoir_image_max_attempts: int = 20
memoir_image_provider: str = "liblib"
memoir_image_style_default: str = "watercolor"
memoir_image_size_default: str = "1280x720"
memoir_image_download_hosts: str = ""
# 章节 canonical_markdown 中至少含多少张 asset:// 正文插图才生成/展示章节封面(≥ 该值即满足0 表示不以此条件拦截)
memoir_min_inline_images_for_chapter_cover: int = Field(default=1, ge=0, le=100)
# Story 正文至少多少字才创建主图 intent / 调图0 表示不限制)
story_image_min_body_chars: int = 400
# generate_story_image 入队去重Redis SET NX
story_image_enqueue_dedup_ttl: int = Field(default=300, ge=30, le=86400)
# 章节物化异步任务延迟入队(秒),削峰
recompose_chapter_delay_seconds: int = Field(default=8, ge=0, le=600)
# 与 memoir pipeline 一致的章节互斥锁 TTL应覆盖 Phase2 / recompose 的 P95 时长
chapter_pipeline_lock_ttl_seconds: int = Field(default=360, ge=10, le=3600)
# Append 硬上限canonical 字符数、版本数(超限强制 new_story
story_append_max_canonical_chars: int = Field(default=12000, ge=1000, le=500_000)
story_append_max_versions: int = Field(default=20, ge=1, le=500)
# StoryRouteAgent候选 JSON 预算(保守默认,可调大)
story_route_candidate_body_max_chars: int = Field(default=2200, ge=200, le=8000)
story_route_candidate_total_max_chars: int = Field(
default=20_000, ge=2000, le=100_000
)
story_route_long_body_head_chars: int = Field(default=700, ge=100, le=4000)
story_route_long_body_tail_chars: int = Field(default=700, ge=100, le=4000)
story_route_summary_min_chars: int = Field(default=30, ge=0, le=500)
story_route_index_preview_chars: int = Field(default=140, ge=20, le=500)
# 童年/求学/家庭:本批口述低于该字数且路由为 new 时,倾向续写到默认候选,减少碎篇
memoir_story_route_append_guardrail_oral_chars: int = Field(
default=1800, ge=0, le=50_000
)
# Evidence 检索 top_k大批次 unit 时降低检索量
evidence_top_k_default: int = Field(default=10, ge=1, le=50)
evidence_top_k_large_batch: int = Field(default=5, ge=1, le=50)
evidence_large_batch_threshold: int = Field(default=3, ge=1, le=100)
# Story/Chapter 标题在正文达到此字数后才由 LLM 生成;之前用占位符
story_title_min_body_chars: int = Field(default=60, ge=0, le=10_000)
# 回忆录 Celery累计 strip 后口述字数未达此值则暂缓提交0=关闭,仅防抖后提交)
memoir_segment_batch_min_chars: int = Field(default=50, ge=0, le=50_000)
# 本批首条 segment 入队起最长等待(秒),超时则提交(即使字数不足)
memoir_segment_batch_max_wait_seconds: float = Field(
default=60.0, ge=0.0, le=3600.0
)
# 回忆录叙事 Phase 2 Celery触发单条口述达到该 strip 字数则立即跑叙事
memoir_narrative_immediate_char_threshold: int = Field(default=50, ge=0, le=50_000)
# 同一 topic_category 下未叙事段数达到该值则触发 Phase 2
memoir_narrative_batch_min_segments: int = Field(default=3, ge=1, le=500)
# 同上,累计 user_input_text 字符数strip 后由 Segment 列 length 近似)
memoir_narrative_batch_min_chars: int = Field(default=80, ge=0, le=500_000)
# Phase 1 完成后未触发 Phase 2 时,延迟任务兜底(秒);新 Phase 1 会 revoke 旧定时
memoir_narrative_batch_max_wait_seconds: float = Field(
default=120.0, ge=1.0, le=3600.0
)
# FalseCelery/批处理更新 slot 时不改写 MemoirState.current_stage访谈路径仍可由 switch_stage 推进)
# True仅当 chat_bucket( proposed ) == chat_bucket( existing ) 时允许批处理对齐 current_stage
memoir_extraction_updates_current_stage: bool = False
# TrueFidelityCheckAgent JSON/LLM 解析失败时放行(仅建议 append 场景配合 existing 兜底)
memoir_fidelity_fail_open_on_parse_error: bool = False
# 正文与 evidence 文本的最长公共子串达到该长度且 oral/旧正文未覆盖时,回退为安全正文
memoir_narrative_evidence_overlap_min_chars: int = Field(default=14, ge=8, le=256)
# True启用短「场合锚点」词检测聚餐/那晚等),须同时在摘录中出现且口述未覆盖才回退
memoir_evidence_scene_anchor_check_enabled: bool = True
# True标题生成时 slots 仅保留在 oral 或正文摘录中出现的条目(减少档案串台)
memoir_title_slots_require_body_or_oral_match: bool = True
# True标题中出现高置信「履历链」短语则须在 hay正文+口述+已传 slots中有逐字依据否则降级占位
memoir_title_hay_grounding_strict_phrases_enabled: bool = True
# True章节物化拿不到 pipeline 锁时 Celery retry避免长期跳过导致 dirty 不收敛)
memoir_recompose_retry_on_lock_contention: bool = True
# Phase2 立即派发使用固定 task_id减少同类目重复入队超时任务仍用独立 id
memoir_phase2_singleflight_immediate: bool = True
# TruePhase2 路由低置信no_llm/parse_error/invalid_target时不写 Story
# 把 segment 标记为 narrative_deferred_until 之后再重试。
memoir_route_defer_enabled: bool = True
# 低置信延迟时长(秒):到期前不消费这些 segment避免后台空转
memoir_route_defer_seconds: float = Field(default=120.0, ge=1.0, le=3600.0)
# 同一类目最多自动延迟次数;达到上限后 segment 仅靠新素材到达激活,不再自动重试
memoir_route_defer_max_attempts: int = Field(default=3, ge=1, le=20)
# TruePhase2 首稿后异步运行质量增强fidelity recheck、标题润色、LLM 归一)
memoir_quality_pass_enabled: bool = True
memoir_quality_pass_delay_seconds: int = Field(default=5, ge=0, le=300)
# ── Memory 检索与富化 ─────────────────────────────────────
# False跳过 ingest 后 LLM 富化(摘要/事实/时间线)
memory_enrichment_enabled: bool = True
memory_enrichment_max_chars: int = Field(default=12000, ge=1000, le=100_000)
# ── Memory compaction近重复 chunk 软排除;事件触发 + Redis 防抖 + 用户锁;需 worker + Beat 跑 sweep──
memory_compaction_enabled: bool = True
memory_compaction_debounce_seconds: int = Field(default=105, ge=10, le=3600)
memory_compaction_lock_ttl_seconds: int = Field(default=600, ge=60, le=7200)
memory_compaction_chunk_similarity_threshold: float = Field(
default=0.92, ge=0.5, le=0.999
)
memory_compaction_min_layers_for_exclude: int = Field(default=2, ge=1, le=3)
memory_compaction_max_chunks_per_run: int = Field(default=200, ge=1, le=10_000)
memory_compaction_max_excludes_per_run: int = Field(default=50, ge=1, le=1000)
memory_compaction_max_neighbors_per_chunk: int = Field(default=25, ge=5, le=100)
memory_compaction_text_jaccard_min: float = Field(default=0.55, ge=0.0, le=1.0)
memory_compaction_metadata_event_year_window: int = Field(default=1, ge=0, le=50)
# Beat sweep扫描最近 N 小时内有新 chunk 的用户并调度 compaction
memory_compaction_sweep_recent_hours: int = Field(default=24, ge=1, le=168)
# ── Liblib ───────────────────────────────────────────────
liblib_access_key: str = ""
liblib_secret_key: str = ""
liblib_base_url: str = "https://openapi.liblibai.cloud"
liblib_template_uuid: str = ""
# ── Tencent COS ──────────────────────────────────────────
tencent_cos_secret_id: str = ""
tencent_cos_secret_key: str = ""
tencent_cos_region: str = "ap-shanghai"
tencent_cos_bucket: str = ""
tencent_cos_base_url: str = ""
tencent_cos_token: str = ""
# ── Internal regression evaluation lab独立入口不挂在消费者 API────
internal_eval_api_key: str = ""
internal_eval_enable_docs: bool = False
# 逗号分隔;空则内部 API 不额外限制 Origin仍可依赖 internal_eval_api_key
internal_eval_cors_origins: str = ""
# 智谱 GLM-5评审模型OpenAI 兼容 Chat Completions与 langchain-openai 一致)
eval_judge_api_key: str = ""
eval_judge_base_url: str = "https://open.bigmodel.cn/api/paas/v4"
eval_judge_model: str = "glm-5"
eval_judge_temperature: float = 0.3
# 评测评审DeepSeekOpenAI 兼容);默认 deepseek-v4-flash + 非思考(对齐定价页非思考用法;非 v4-pro
eval_judge_deepseek_model: str = "deepseek-v4-flash"
# 当仅指定 deepseek-v4-flash、未用弃用名区分时是否走思考模式与 eval_judge_deepseek_model 联用)
eval_judge_deepseek_thinking_enabled: bool = False
eval_judge_deepseek_context_window_tokens: int = Field(
default=64_000,
ge=4096,
le=2_000_000,
description="DeepSeek 评审专用上下文预算(用于 transcript 截断;与 GLM 200K 分离)",
)
# GLM-5 输入上下文 200Khttps://docs.bigmodel.cn
eval_judge_context_window_tokens: int = Field(
default=200_000, ge=4096, le=2_000_000
)
# 预留给完成 tokensjson 输出)及路由误差
eval_judge_completion_reserve_tokens: int = Field(default=4096, ge=256, le=131_072)
eval_judge_prompt_budget_safety_tokens: int = Field(default=2048, ge=0, le=32_768)
# transcript 混合中英文时 token/字 估值(略低于 1.2 可多给汉字篇幅;若评审请求被拒可回调高)
eval_judge_approx_tokens_per_char: float = Field(default=1.0, ge=0.3, le=8.0)
# 整段/逐轮节选 transcript 最大字符0=按 eval_judge_context_window_tokens 自动扣 rubric 头
eval_judge_max_transcript_chars: int = Field(default=0, ge=0, le=2_000_000)
# 双 transcript 对比流每条对话上限0=按上下文平分(扣 overhead
eval_judge_max_compare_transcript_chars_each: int = Field(
default=0, ge=0, le=2_000_000
)
# 对比 prompt 固定开销(模板 + 两份评分 JSON的字符估值略低则 transcript 合计空间更大
eval_judge_compare_prompt_overhead_chars: int = Field(
default=10_000, ge=500, le=500_000
)
# 回忆录音评:章节 LLM 并发上限(仅评审请求;准备阶段仍串行访问 DB
eval_judge_memoir_chapter_concurrency: int = Field(
default=4,
ge=1,
le=32,
)
# 回忆录评审 prompt 内粗截断(汉字计字符);万字级章节请保持 body ≥ 正文峰值
eval_judge_memoir_body_max_chars: int = Field(
default=36_000,
ge=8_000,
le=500_000,
description="【当前回忆录正文】注入评审 prompt 前的最大字符",
)
eval_judge_memoir_evidence_max_chars: int = Field(
default=32_000,
ge=8_000,
le=500_000,
description="对话证据 / 结构化证据 / 参考基线各块的最大字符(与 eval_trace_format 对齐)",
)
# json_object 完成预算MemoirJudgeOutput 字段多,需预留足量 token
eval_judge_memoir_completion_max_tokens: int = Field(
default=3072,
ge=512,
le=16_384,
)
# 候选对话回放:与生产访谈类似的温度
eval_candidate_temperature: float = 0.7
# 门禁:受保护 session 合成份数下跌超过该阈值视为回归0100 分制)
eval_gate_protected_regression_threshold: float = Field(
default=2.0, ge=0.0, le=100.0
)
# 执行 LLM 判分与回放Celery 未跑时可关,仅跑结构/导入)
eval_execution_enabled: bool = True
settings = Settings()