Files
life-echo/api/app/core/config.py

359 lines
21 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
统一配置:所有环境变量通过此模块的 Settings 单点读取。
业务代码只允许 import settings禁止散落 os.getenv() / load_dotenv()。
本地开发时由 api/development.sh 在启动前将 .env.development 同步为 .env每次启动覆盖
Docker / 服务端由镜像与 compose 注入进程环境;此处仅固定读取工作目录下的 .env 作为默认值来源。
进程环境变量(容器 environment、export覆盖 .env 同名项。
"""
import secrets
from pydantic import Field, field_validator
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
case_sensitive=False,
extra="ignore",
)
# ── Database ──────────────────────────────────────────────
database_url: str = "postgresql://postgres:postgres@localhost:5432/life_echo"
# 启动时是否执行 Alembicmain.py lifespan测试或仅读场景可关
alembic_run_on_startup: bool = True
# True迁移失败则进程退出生产推荐。False仅打错误日志并继续本地无 DB 时)
alembic_startup_fail_fast: bool = False
alembic_startup_max_retries: int = Field(default=3, ge=1, le=10)
alembic_startup_retry_base_seconds: float = Field(default=1.0, ge=0.1, le=60.0)
# ── Redis ─────────────────────────────────────────────────
redis_url: str = "redis://localhost:6379/0"
redis_session_ttl: int = 86400
# ── Auth / JWT ────────────────────────────────────────────
secret_key: str = Field(default_factory=lambda: secrets.token_urlsafe(32))
algorithm: str = "HS256"
access_token_expire_minutes: int = 120
refresh_token_expire_days: int = 30
# ── LLM / DeepSeek ───────────────────────────────────────
deepseek_api_key: str = ""
deepseek_base_url: str = "https://api.deepseek.com"
deepseek_model: str = "deepseek-chat"
llm_api_key: str = ""
llm_base_url: str = ""
llm_model: str = ""
llm_temperature: float = 0.7
# 空字符串:快档位与默认模型相同;分类/抽取/记忆富化等可单独指定较轻模型
llm_fast_model: str = ""
# ── Memory 向量(智谱 BigModel 国内 embedding-3与 LLM/DeepSeek 密钥分离)──
zhipu_api_key: str = ""
embedding_base_url: str = "https://open.bigmodel.cn/api/paas/v4"
embedding_model: str = "embedding-3"
# ── Chat 访谈token 上限 + 代码截断,见 reply_limits──
chat_interview_max_tokens: int = 380
chat_interview_max_segments: int = 2
chat_interview_max_chars_per_segment: int = 260
# 访谈:用户本轮极短输入时的更紧上限(见 interview_reply_length
chat_interview_brief_max_tokens: int = Field(default=260, ge=64, le=2048)
chat_interview_brief_max_chars_per_segment: int = Field(default=200, ge=32, le=2000)
# 访谈:有新细节/情绪/长段时的展开上限
chat_interview_expanded_max_tokens: int = Field(default=520, ge=64, le=4096)
chat_interview_expanded_max_chars_per_segment: int = Field(
default=380, ge=32, le=4000
)
# 干部/军队推断命中时standard 档在分桶基础上小幅放宽brief/expanded 不变)
chat_interview_cadre_military_standard_extra_tokens: int = Field(
default=40, ge=0, le=512
)
chat_interview_cadre_military_standard_extra_chars: int = Field(
default=40, ge=0, le=2000
)
chat_opening_max_tokens: int = 256
chat_profile_followup_max_tokens: int = 280
# Redis 全量历史仅用于 turn 计数;注入 LLM 时截取最近若干轮与字符预算
chat_history_max_pairs: int = Field(default=15, ge=1, le=500)
chat_history_max_chars: int = Field(default=6000, ge=256, le=500_000)
chat_era_context_enabled: bool = True
# 访谈:每轮用 LLM 判定用户主人生阶段并更新 MemoirState.current_stageFalse 时仅用关键词
chat_stage_detection_enabled: bool = True
chat_stage_detection_max_tokens: int = 128
# True短句/应答/元话语本轮仅用关键词判阶段,不调阶段 LLM见 utterance_substance
chat_stage_detection_skip_llm_on_insufficient_signal: bool = True
# strip 后主文低于该长度时启用精细启发式;达到或超过则视为有足够信息走完整路径
chat_substantive_min_chars: int = Field(default=12, ge=1, le=256)
# False每轮都跑阶段/记忆高成本路径(忽略短时启发式)
chat_substantive_heuristic_enabled: bool = True
# 访谈性格default | warm_listener | curious_guide未知值按 default
chat_interview_persona: str = "default"
# 访谈:按用户本轮话检索记忆并注入 prompt关则不调 MemoryService.retrieve
chat_memory_retrieval_enabled: bool = True
chat_memory_top_k: int = Field(default=8, ge=1, le=30)
chat_memory_evidence_max_chars: int = Field(default=4096, ge=256, le=50_000)
# True短时/元话语等(见 utterance_substance本轮不跑向量检索
chat_memory_retrieval_require_substantive: bool = True
# ── Memoir 叙事忠实度检查FidelityCheckAgent────────────────
memoir_fidelity_check_enabled: bool = True
memoir_fidelity_check_max_tokens: int = 512
# 口述归一(进入叙事 / 忠实度前segment 原文不落库off | rules | llm
memoir_oral_normalize_enabled: bool = True
memoir_oral_normalize_mode: str = "rules"
memoir_oral_normalize_llm_max_tokens: int = Field(default=512, ge=64, le=4096)
memoir_oral_normalize_llm_max_input_chars: int = Field(
default=8000, ge=64, le=50_000
)
# 聊天:模型消费净稿(不改变 segment 落库原文);与 memoir 规则层共用,配置独立
chat_input_normalize_enabled: bool = True
chat_input_normalize_mode: str = "rules" # off | rules | llm
chat_input_normalize_llm_max_tokens: int = Field(default=512, ge=64, le=4096)
chat_input_normalize_llm_max_input_chars: int = Field(
default=8000, ge=64, le=50_000
)
# True 且 mode=llm仅语音/ASR 段走 LLM 纠错;键盘输入仅规则归一(省每轮 LLM
chat_input_normalize_llm_voice_only: bool = True
# 资料收集:短时/应答/元话语不调用资料字段抽取 LLM仍生成 followup
chat_profile_extract_require_substantive: bool = True
# Memoir Phase1多 segment 一批一次 LLM 完成抽取+章节分类(失败回退逐段);单段且关时仍逐段
memoir_phase1_batch_llm_enabled: bool = False
memoir_phase1_batch_llm_max_tokens: int = Field(default=4096, ge=512, le=32_768)
# Memoir agents`invoke_json_object` / `llm_json_call` 的 max_tokens原硬编码迁至配置
memoir_extraction_max_tokens: int = Field(default=1024, ge=64, le=8192)
memoir_classification_max_tokens: int = Field(default=256, ge=32, le=4096)
memoir_narrative_max_tokens: int = Field(default=4096, ge=256, le=32_768)
memoir_narrative_merge_max_tokens: int = Field(default=8192, ge=256, le=64_000)
memoir_title_max_tokens: int = Field(default=256, ge=32, le=4096)
memoir_story_route_max_tokens: int = Field(default=1024, ge=64, le=8192)
memoir_story_batch_plan_max_tokens: int = Field(default=4096, ge=256, le=32_768)
# 资料抽取ProfileAgent JSON 模式)
chat_profile_extract_max_tokens: int = Field(default=512, ge=64, le=4096)
# ── ASR ───────────────────────────────────────────────────
asr_provider: str = "whisper"
asr_model_size: str = "small"
asr_device: str = "auto"
asr_compute_type: str = "auto"
asr_model_cache_dir: str = ""
# ── Tencent SMS ──────────────────────────────────────────
tencent_sms_secret_id: str = ""
tencent_sms_secret_key: str = ""
tencent_sms_sdk_app_id: str = ""
tencent_sms_sign_name: str = ""
tencent_sms_template_id: str = ""
tencent_sms_template_param_count: int = 2
# ── Tencent ASR / TTS共用 Secret与短信、COS 密钥独立)────────────────
tencent_secret_id: str = ""
tencent_secret_key: str = ""
# ── TTS (openai | tencent),与 ASR 独立:仅控制回复侧语音合成 ──
enable_tts: bool = True
tts_provider: str = "tencent"
openai_api_key: str = ""
tts_voice_type: int = 502001 # Tencent 音色 ID见 https://cloud.tencent.com/document/product/1073/92668
tts_codec: str = "mp3"
# ── WeChat Pay ───────────────────────────────────────────
wechat_pay_app_id: str = ""
wechat_pay_mch_id: str = ""
wechat_pay_api_v3_key: str = ""
wechat_pay_private_key_path: str = "certs/apiclient_key.pem"
wechat_pay_private_key: str = "" # PEM 内容,与 private_key_path 二选一
wechat_pay_cert_serial_no: str = ""
wechat_pay_notify_url: str = ""
wechat_pay_platform_public_key: str = ""
wechat_pay_platform_public_key_path: str = ""
wechat_pay_platform_public_key_id: str = ""
# ── Alipay ───────────────────────────────────────────────
alipay_app_id: str = ""
alipay_private_key: str = ""
alipay_public_key: str = ""
alipay_notify_url: str = ""
alipay_sign_type: str = "RSA2"
alipay_under_development: str = "true" # "1"/"true"/"yes" 视为开发中不可用
# ── Logging ──────────────────────────────────────────────
# 环境变量 LOG_LEVEL控制 loguru sink 最低级别TRACE/DEBUG/INFO/…)
log_level: str = "INFO"
# LOG_AGENT_VERBOSE为 True 时额外输出 Agent 单行 INFO 摘要(耗时、规模),无需全局 DEBUG
log_agent_verbose: bool = False
# AGENT_LOG_MAX_CHARSDEBUG 下记录 prompt/响应预览时的最大字符数
agent_log_max_chars: int = Field(default=4096, ge=256, le=100_000)
# AGENT_LOG_OMIT_SYSTEM_MESSAGE_BODYDEBUG 下访谈/资料聊天日志省略 System 正文(仅 len+sha12
agent_log_omit_system_message_body: bool = True
# AGENT_LOG_JSON_PROMPT_PREFIX_CHARSDEBUG 下 *.prompt 总长超过下项时再跳过前 N 字符后预览0=不跳过)
agent_log_json_prompt_prefix_chars: int = Field(default=0, ge=0, le=500_000)
# AGENT_LOG_JSON_PROMPT_PREFIX_ONLY_IF_LEN_GT触发“跳过前缀”的最小 prompt 长度
agent_log_json_prompt_prefix_only_if_len_gt: int = Field(
default=4000, ge=0, le=2_000_000
)
# 第三方 stdlib logging空=自动LOG_LEVEL 为 DEBUG/TRACE 时 Celery→INFO、httpx/httpcore→WARNING
celery_log_level: str = ""
httpx_log_level: str = ""
@field_validator("log_agent_verbose", mode="before")
@classmethod
def _coerce_log_agent_verbose(cls, v: object) -> bool:
if isinstance(v, bool):
return v
if v is None:
return False
return str(v).strip().lower() in ("1", "true", "yes", "on")
@field_validator("agent_log_omit_system_message_body", mode="before")
@classmethod
def _coerce_agent_log_omit_system_message_body(cls, v: object) -> bool:
if isinstance(v, bool):
return v
if v is None:
return True
s = str(v).strip().lower()
if s in ("0", "false", "no", "off"):
return False
return True
# ── Misc ─────────────────────────────────────────────────
enable_test_subscription: int = 0
enable_test_plan: str = "" # "1" / "true" / "yes" 为 True
enable_docs: bool = True
# ── Memoir Image ─────────────────────────────────────────
memoir_image_enabled: bool = False
# True图片 LLM prompt 失败时不使用英语降级模板(需产品与任务失败流确认后开启)
image_prompt_fallback_disabled: bool = False
memoir_image_poll_interval: int = 3
memoir_image_max_attempts: int = 20
memoir_image_provider: str = "liblib"
memoir_image_style_default: str = "watercolor"
memoir_image_size_default: str = "1280x720"
memoir_image_download_hosts: str = ""
# Story 正文至少多少字才创建主图 intent / 调图0 表示不限制)
story_image_min_body_chars: int = 400
# generate_story_image 入队去重Redis SET NX
story_image_enqueue_dedup_ttl: int = Field(default=300, ge=30, le=86400)
# 章节物化异步任务延迟入队(秒),削峰
recompose_chapter_delay_seconds: int = Field(default=8, ge=0, le=600)
# 与 memoir pipeline 一致的章节互斥锁 TTL应覆盖 Phase2 / recompose 的 P95 时长
chapter_pipeline_lock_ttl_seconds: int = Field(default=360, ge=10, le=3600)
# Append 硬上限canonical 字符数、版本数(超限强制 new_story
story_append_max_canonical_chars: int = Field(default=12000, ge=1000, le=500_000)
story_append_max_versions: int = Field(default=20, ge=1, le=500)
# StoryRouteAgent候选 JSON 预算(保守默认,可调大)
story_route_candidate_body_max_chars: int = Field(default=1600, ge=200, le=8000)
story_route_candidate_total_max_chars: int = Field(
default=16_000, ge=2000, le=100_000
)
story_route_long_body_head_chars: int = Field(default=700, ge=100, le=4000)
story_route_long_body_tail_chars: int = Field(default=700, ge=100, le=4000)
story_route_summary_min_chars: int = Field(default=30, ge=0, le=500)
story_route_index_preview_chars: int = Field(default=80, ge=20, le=500)
# Evidence 检索 top_k大批次 unit 时降低检索量
evidence_top_k_default: int = Field(default=10, ge=1, le=50)
evidence_top_k_large_batch: int = Field(default=5, ge=1, le=50)
evidence_large_batch_threshold: int = Field(default=3, ge=1, le=100)
# Story/Chapter 标题在正文达到此字数后才由 LLM 生成;之前用占位符
story_title_min_body_chars: int = Field(default=60, ge=0, le=10_000)
# 回忆录 Celery累计 strip 后口述字数未达此值则暂缓提交0=关闭,仅防抖后提交)
memoir_segment_batch_min_chars: int = Field(default=50, ge=0, le=50_000)
# 本批首条 segment 入队起最长等待(秒),超时则提交(即使字数不足)
memoir_segment_batch_max_wait_seconds: float = Field(
default=60.0, ge=0.0, le=3600.0
)
# 回忆录叙事 Phase 2 Celery触发单条口述达到该 strip 字数则立即跑叙事
memoir_narrative_immediate_char_threshold: int = Field(default=50, ge=0, le=50_000)
# 同一 topic_category 下未叙事段数达到该值则触发 Phase 2
memoir_narrative_batch_min_segments: int = Field(default=3, ge=1, le=500)
# 同上,累计 user_input_text 字符数strip 后由 Segment 列 length 近似)
memoir_narrative_batch_min_chars: int = Field(default=80, ge=0, le=500_000)
# Phase 1 完成后未触发 Phase 2 时,延迟任务兜底(秒);新 Phase 1 会 revoke 旧定时
memoir_narrative_batch_max_wait_seconds: float = Field(
default=120.0, ge=1.0, le=3600.0
)
# FalseCelery/批处理更新 slot 时不改写 MemoirState.current_stage访谈路径仍可由 switch_stage 推进)
# True仅当 chat_bucket( proposed ) == chat_bucket( existing ) 时允许批处理对齐 current_stage
memoir_extraction_updates_current_stage: bool = False
# TrueFidelityCheckAgent JSON/LLM 解析失败时放行(仅建议 append 场景配合 existing 兜底)
memoir_fidelity_fail_open_on_parse_error: bool = False
# 正文与 evidence 文本的最长公共子串达到该长度且 oral/旧正文未覆盖时,回退为安全正文
memoir_narrative_evidence_overlap_min_chars: int = Field(default=14, ge=8, le=256)
# True启用短「场合锚点」词检测聚餐/那晚等),须同时在摘录中出现且口述未覆盖才回退
memoir_evidence_scene_anchor_check_enabled: bool = True
# True标题生成时 slots 仅保留在 oral 或正文摘录中出现的条目(减少档案串台)
memoir_title_slots_require_body_or_oral_match: bool = True
# True标题中出现高置信「履历链」短语则须在 hay正文+口述+已传 slots中有逐字依据否则降级占位
memoir_title_hay_grounding_strict_phrases_enabled: bool = True
# True章节物化拿不到 pipeline 锁时 Celery retry避免长期跳过导致 dirty 不收敛)
memoir_recompose_retry_on_lock_contention: bool = True
# Phase2 立即派发使用固定 task_id减少同类目重复入队超时任务仍用独立 id
memoir_phase2_singleflight_immediate: bool = True
# ── Memory 检索与富化 ─────────────────────────────────────
# Truequery 为空时仍返回 rolling 摘要 + 最近事实/时间线(无 chunk 向量检索)
memory_evidence_empty_query_include_rolling: bool = False
# False跳过 ingest 后 LLM 富化(摘要/事实/时间线)
memory_enrichment_enabled: bool = True
memory_enrichment_max_chars: int = Field(default=12000, ge=1000, le=100_000)
# True事实 ILIKE 未命中时退回「最近 confirmed 事实」(易引入无关/矛盾事实;默认关)
memory_fact_search_use_recent_fallback: bool = False
# ── Memory compaction近重复 chunk 软排除;事件触发 + Redis 防抖 + 用户锁;需 worker + Beat 跑 sweep──
memory_compaction_enabled: bool = True
memory_compaction_debounce_seconds: int = Field(default=105, ge=10, le=3600)
memory_compaction_lock_ttl_seconds: int = Field(default=600, ge=60, le=7200)
memory_compaction_chunk_similarity_threshold: float = Field(
default=0.92, ge=0.5, le=0.999
)
memory_compaction_min_layers_for_exclude: int = Field(default=2, ge=1, le=3)
memory_compaction_max_chunks_per_run: int = Field(default=200, ge=1, le=10_000)
memory_compaction_max_excludes_per_run: int = Field(default=50, ge=1, le=1000)
memory_compaction_max_neighbors_per_chunk: int = Field(default=25, ge=5, le=100)
memory_compaction_text_jaccard_min: float = Field(default=0.55, ge=0.0, le=1.0)
memory_compaction_metadata_event_year_window: int = Field(default=1, ge=0, le=50)
# Beat sweep扫描最近 N 小时内有新 chunk 的用户并调度 compaction
memory_compaction_sweep_recent_hours: int = Field(default=24, ge=1, le=168)
# ── Liblib ───────────────────────────────────────────────
liblib_access_key: str = ""
liblib_secret_key: str = ""
liblib_base_url: str = "https://openapi.liblibai.cloud"
liblib_template_uuid: str = ""
# ── Tencent COS ──────────────────────────────────────────
tencent_cos_secret_id: str = ""
tencent_cos_secret_key: str = ""
tencent_cos_region: str = "ap-shanghai"
tencent_cos_bucket: str = ""
tencent_cos_base_url: str = ""
tencent_cos_token: str = ""
# ── Internal regression evaluation lab独立入口不挂在消费者 API────
internal_eval_api_key: str = ""
internal_eval_enable_docs: bool = False
# 逗号分隔;空则内部 API 不额外限制 Origin仍可依赖 internal_eval_api_key
internal_eval_cors_origins: str = ""
# GLM / 智谱评审模型OpenAI 兼容 Chat Completions与 langchain-openai 一致)
eval_judge_api_key: str = ""
eval_judge_base_url: str = "https://open.bigmodel.cn/api/paas/v4"
eval_judge_model: str = "glm-4-flash"
eval_judge_temperature: float = 0.3
# 候选对话回放:与生产访谈类似的温度
eval_candidate_temperature: float = 0.7
# 门禁:受保护 session 合成份数下跌超过该阈值视为回归0100 分制)
eval_gate_protected_regression_threshold: float = Field(
default=2.0, ge=0.0, le=100.0
)
# 执行 LLM 判分与回放Celery 未跑时可关,仅跑结构/导入)
eval_execution_enabled: bool = True
settings = Settings()