Chat 访谈 - 新增 persona 系统(default / warm_listener / curious_guide)与 background_voice 语气层 - 回复长度由 compute_reply_plan 统一决策(brief / standard / expanded),融合信息密度启发式 - 输入净稿(input_normalize):编排层可选 rules/llm 归一用户口语后再喂模型与记忆检索 - 记忆证据注入:按用户话检索 memory evidence 并注入 prompt Memoir 回忆录 - 口述归一(oral_normalize):segment 原文保留,story 管线取派生净稿作叙事输入 - segment 入队批次门闸:累计字数 + 最长等待秒数,减少零碎提交 - fidelity_check / prompts / narrative_agent 微调 - Alembic 0005:清理跨章节 story 外键 Infra - Dockerfile 加入 ffmpeg - pyproject.toml 新增依赖并同步 uv.lock - .env.example / .env.production 补全新配置项 Tests - 新增 test_background_voice、test_chat_input_normalize、test_experience_regressions - 扩展 test_interview_prompts、test_interview_reply_length、test_story_route_oral_invariant Made-with: Cursor
248 lines
13 KiB
Python
248 lines
13 KiB
Python
"""
|
||
统一配置:所有环境变量通过此模块的 Settings 单点读取。
|
||
业务代码只允许 import settings,禁止散落 os.getenv() / load_dotenv()。
|
||
|
||
本地开发时由 api/development.sh 在启动前将 .env.development 同步为 .env(每次启动覆盖)。
|
||
Docker / 服务端由镜像与 compose 注入进程环境;此处仅固定读取工作目录下的 .env 作为默认值来源。
|
||
进程环境变量(容器 environment、export)覆盖 .env 同名项。
|
||
"""
|
||
|
||
import secrets
|
||
|
||
from pydantic import Field, field_validator
|
||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||
|
||
|
||
class Settings(BaseSettings):
|
||
model_config = SettingsConfigDict(
|
||
env_file=".env",
|
||
env_file_encoding="utf-8",
|
||
case_sensitive=False,
|
||
extra="ignore",
|
||
)
|
||
|
||
# ── Database ──────────────────────────────────────────────
|
||
database_url: str = "postgresql://postgres:postgres@localhost:5432/life_echo"
|
||
# 启动时是否执行 Alembic(main.py lifespan);测试或仅读场景可关
|
||
alembic_run_on_startup: bool = True
|
||
# True:迁移失败则进程退出(生产推荐)。False:仅打错误日志并继续(本地无 DB 时)
|
||
alembic_startup_fail_fast: bool = False
|
||
alembic_startup_max_retries: int = Field(default=3, ge=1, le=10)
|
||
alembic_startup_retry_base_seconds: float = Field(default=1.0, ge=0.1, le=60.0)
|
||
|
||
# ── Redis ─────────────────────────────────────────────────
|
||
redis_url: str = "redis://localhost:6379/0"
|
||
redis_session_ttl: int = 86400
|
||
|
||
# ── Auth / JWT ────────────────────────────────────────────
|
||
secret_key: str = Field(default_factory=lambda: secrets.token_urlsafe(32))
|
||
algorithm: str = "HS256"
|
||
access_token_expire_minutes: int = 120
|
||
refresh_token_expire_days: int = 30
|
||
|
||
# ── LLM / DeepSeek ───────────────────────────────────────
|
||
deepseek_api_key: str = ""
|
||
deepseek_base_url: str = "https://api.deepseek.com"
|
||
deepseek_model: str = "deepseek-chat"
|
||
llm_api_key: str = ""
|
||
llm_base_url: str = ""
|
||
llm_model: str = ""
|
||
llm_temperature: float = 0.7
|
||
|
||
# ── Memory 向量(智谱 BigModel 国内 embedding-3;与 LLM/DeepSeek 密钥分离)──
|
||
zhipu_api_key: str = ""
|
||
embedding_base_url: str = "https://open.bigmodel.cn/api/paas/v4"
|
||
embedding_model: str = "embedding-3"
|
||
|
||
# ── Chat 访谈(token 上限 + 代码截断,见 reply_limits)──
|
||
chat_interview_max_tokens: int = 380
|
||
chat_interview_max_segments: int = 2
|
||
chat_interview_max_chars_per_segment: int = 260
|
||
# 访谈:用户本轮极短输入时的更紧上限(见 interview_reply_length)
|
||
chat_interview_brief_max_tokens: int = Field(default=260, ge=64, le=2048)
|
||
chat_interview_brief_max_chars_per_segment: int = Field(default=200, ge=32, le=2000)
|
||
# 访谈:有新细节/情绪/长段时的展开上限
|
||
chat_interview_expanded_max_tokens: int = Field(default=520, ge=64, le=4096)
|
||
chat_interview_expanded_max_chars_per_segment: int = Field(
|
||
default=380, ge=32, le=4000
|
||
)
|
||
# 干部/军队推断命中时,standard 档在分桶基础上小幅放宽(brief/expanded 不变)
|
||
chat_interview_cadre_military_standard_extra_tokens: int = Field(
|
||
default=40, ge=0, le=512
|
||
)
|
||
chat_interview_cadre_military_standard_extra_chars: int = Field(
|
||
default=40, ge=0, le=2000
|
||
)
|
||
chat_opening_max_tokens: int = 256
|
||
chat_profile_followup_max_tokens: int = 280
|
||
chat_era_context_enabled: bool = True
|
||
# 访谈:每轮用 LLM 判定用户主人生阶段并更新 MemoirState.current_stage;False 时仅用关键词
|
||
chat_stage_detection_enabled: bool = True
|
||
chat_stage_detection_max_tokens: int = 128
|
||
# 访谈性格:default | warm_listener | curious_guide(未知值按 default)
|
||
chat_interview_persona: str = "default"
|
||
# 访谈:按用户本轮话检索记忆并注入 prompt(关则不调 MemoryService.retrieve)
|
||
chat_memory_retrieval_enabled: bool = True
|
||
chat_memory_top_k: int = Field(default=8, ge=1, le=30)
|
||
chat_memory_evidence_max_chars: int = Field(default=4096, ge=256, le=50_000)
|
||
|
||
# ── Memoir 叙事忠实度检查(FidelityCheckAgent)────────────────
|
||
memoir_fidelity_check_enabled: bool = True
|
||
memoir_fidelity_check_max_tokens: int = 512
|
||
# 口述归一(进入叙事 / 忠实度前;segment 原文不落库):off | rules | llm
|
||
memoir_oral_normalize_enabled: bool = True
|
||
memoir_oral_normalize_mode: str = "rules"
|
||
memoir_oral_normalize_llm_max_tokens: int = Field(default=512, ge=64, le=4096)
|
||
memoir_oral_normalize_llm_max_input_chars: int = Field(
|
||
default=8000, ge=64, le=50_000
|
||
)
|
||
# 聊天:模型消费净稿(不改变 segment 落库原文);与 memoir 规则层共用,配置独立
|
||
chat_input_normalize_enabled: bool = True
|
||
chat_input_normalize_mode: str = "rules" # off | rules | llm
|
||
chat_input_normalize_llm_max_tokens: int = Field(default=512, ge=64, le=4096)
|
||
chat_input_normalize_llm_max_input_chars: int = Field(
|
||
default=8000, ge=64, le=50_000
|
||
)
|
||
|
||
# ── ASR ───────────────────────────────────────────────────
|
||
asr_provider: str = "whisper"
|
||
asr_model_size: str = "small"
|
||
asr_device: str = "auto"
|
||
asr_compute_type: str = "auto"
|
||
asr_model_cache_dir: str = ""
|
||
|
||
# ── Tencent SMS ──────────────────────────────────────────
|
||
tencent_sms_secret_id: str = ""
|
||
tencent_sms_secret_key: str = ""
|
||
tencent_sms_sdk_app_id: str = ""
|
||
tencent_sms_sign_name: str = ""
|
||
tencent_sms_template_id: str = ""
|
||
tencent_sms_template_param_count: int = 2
|
||
|
||
# ── Tencent ASR / TTS(共用 Secret;与短信、COS 密钥独立)────────────────
|
||
tencent_secret_id: str = ""
|
||
tencent_secret_key: str = ""
|
||
|
||
# ── TTS (openai | tencent),与 ASR 独立:仅控制回复侧语音合成 ──
|
||
enable_tts: bool = True
|
||
tts_provider: str = "tencent"
|
||
openai_api_key: str = ""
|
||
tts_voice_type: int = 502001 # Tencent 音色 ID,见 https://cloud.tencent.com/document/product/1073/92668
|
||
tts_codec: str = "mp3"
|
||
|
||
# ── WeChat Pay ───────────────────────────────────────────
|
||
wechat_pay_app_id: str = ""
|
||
wechat_pay_mch_id: str = ""
|
||
wechat_pay_api_v3_key: str = ""
|
||
wechat_pay_private_key_path: str = "certs/apiclient_key.pem"
|
||
wechat_pay_private_key: str = "" # PEM 内容,与 private_key_path 二选一
|
||
wechat_pay_cert_serial_no: str = ""
|
||
wechat_pay_notify_url: str = ""
|
||
wechat_pay_platform_public_key: str = ""
|
||
wechat_pay_platform_public_key_path: str = ""
|
||
wechat_pay_platform_public_key_id: str = ""
|
||
|
||
# ── Alipay ───────────────────────────────────────────────
|
||
alipay_app_id: str = ""
|
||
alipay_private_key: str = ""
|
||
alipay_public_key: str = ""
|
||
alipay_notify_url: str = ""
|
||
alipay_sign_type: str = "RSA2"
|
||
alipay_under_development: str = "true" # "1"/"true"/"yes" 视为开发中不可用
|
||
|
||
# ── Logging ──────────────────────────────────────────────
|
||
# 环境变量 LOG_LEVEL;控制 loguru sink 最低级别(TRACE/DEBUG/INFO/…)
|
||
log_level: str = "INFO"
|
||
# LOG_AGENT_VERBOSE:为 True 时额外输出 Agent 单行 INFO 摘要(耗时、规模),无需全局 DEBUG
|
||
log_agent_verbose: bool = False
|
||
# AGENT_LOG_MAX_CHARS:DEBUG 下记录 prompt/响应预览时的最大字符数
|
||
agent_log_max_chars: int = Field(default=4096, ge=256, le=100_000)
|
||
# 第三方 stdlib logging(空=自动:LOG_LEVEL 为 DEBUG/TRACE 时 Celery→INFO、httpx/httpcore→WARNING)
|
||
celery_log_level: str = ""
|
||
httpx_log_level: str = ""
|
||
|
||
@field_validator("log_agent_verbose", mode="before")
|
||
@classmethod
|
||
def _coerce_log_agent_verbose(cls, v: object) -> bool:
|
||
if isinstance(v, bool):
|
||
return v
|
||
if v is None:
|
||
return False
|
||
return str(v).strip().lower() in ("1", "true", "yes", "on")
|
||
|
||
# ── Misc ─────────────────────────────────────────────────
|
||
enable_test_subscription: int = 0
|
||
enable_test_plan: str = "" # "1" / "true" / "yes" 为 True
|
||
enable_docs: bool = True
|
||
|
||
# ── Memoir Image ─────────────────────────────────────────
|
||
memoir_image_enabled: bool = False
|
||
memoir_image_poll_interval: int = 3
|
||
memoir_image_max_attempts: int = 20
|
||
memoir_image_provider: str = "liblib"
|
||
memoir_image_style_default: str = "watercolor"
|
||
memoir_image_size_default: str = "1280x720"
|
||
memoir_image_download_hosts: str = ""
|
||
# Story 正文至少多少字才创建主图 intent / 调图(0 表示不限制)
|
||
story_image_min_body_chars: int = 400
|
||
# generate_story_image 入队去重(Redis SET NX,秒)
|
||
story_image_enqueue_dedup_ttl: int = Field(default=300, ge=30, le=86400)
|
||
# 章节物化异步任务延迟入队(秒),削峰
|
||
recompose_chapter_delay_seconds: int = Field(default=8, ge=0, le=600)
|
||
# 与 memoir pipeline 一致的章节互斥锁 TTL(秒)
|
||
chapter_pipeline_lock_ttl_seconds: int = Field(default=120, ge=10, le=3600)
|
||
# Append 硬上限:canonical 字符数、版本数(超限强制 new_story)
|
||
story_append_max_canonical_chars: int = Field(default=12000, ge=1000, le=500_000)
|
||
story_append_max_versions: int = Field(default=20, ge=1, le=500)
|
||
# Evidence 检索 top_k:大批次 unit 时降低检索量
|
||
evidence_top_k_default: int = Field(default=10, ge=1, le=50)
|
||
evidence_top_k_large_batch: int = Field(default=5, ge=1, le=50)
|
||
evidence_large_batch_threshold: int = Field(default=3, ge=1, le=100)
|
||
# 叙事输出相对口述极端过短才回退(仅防极端压缩;0.3 = 模型输出不到口述 30% 才触发)
|
||
memoir_narrative_fallback_body_ratio: float = 0.3
|
||
memoir_narrative_fallback_min_chars: int = 15
|
||
# 回忆录 Celery:累计 strip 后口述字数未达此值则暂缓提交(0=关闭,仅防抖后提交)
|
||
memoir_segment_batch_min_chars: int = Field(default=50, ge=0, le=50_000)
|
||
# 本批首条 segment 入队起最长等待(秒),超时则提交(即使字数不足)
|
||
memoir_segment_batch_max_wait_seconds: float = Field(
|
||
default=60.0, ge=0.0, le=3600.0
|
||
)
|
||
|
||
# ── Memory 检索与富化 ─────────────────────────────────────
|
||
# True:query 为空时仍返回 rolling 摘要 + 最近事实/时间线(无 chunk FTS)
|
||
memory_evidence_empty_query_include_rolling: bool = False
|
||
# False:跳过 ingest 后 LLM 富化(摘要/事实/时间线)
|
||
memory_enrichment_enabled: bool = True
|
||
memory_enrichment_max_chars: int = Field(default=12000, ge=1000, le=100_000)
|
||
|
||
# ── Memory compaction(近重复 chunk 软排除;事件触发 + Redis 防抖 + 用户锁)──
|
||
memory_compaction_enabled: bool = False
|
||
memory_compaction_debounce_seconds: int = Field(default=105, ge=10, le=3600)
|
||
memory_compaction_lock_ttl_seconds: int = Field(default=600, ge=60, le=7200)
|
||
memory_compaction_chunk_similarity_threshold: float = Field(
|
||
default=0.92, ge=0.5, le=0.999
|
||
)
|
||
memory_compaction_min_layers_for_exclude: int = Field(default=2, ge=1, le=3)
|
||
memory_compaction_max_chunks_per_run: int = Field(default=200, ge=1, le=10_000)
|
||
memory_compaction_max_excludes_per_run: int = Field(default=50, ge=1, le=1000)
|
||
memory_compaction_max_neighbors_per_chunk: int = Field(default=25, ge=5, le=100)
|
||
memory_compaction_text_jaccard_min: float = Field(default=0.55, ge=0.0, le=1.0)
|
||
memory_compaction_metadata_event_year_window: int = Field(default=1, ge=0, le=50)
|
||
|
||
# ── Liblib ───────────────────────────────────────────────
|
||
liblib_access_key: str = ""
|
||
liblib_secret_key: str = ""
|
||
liblib_base_url: str = "https://openapi.liblibai.cloud"
|
||
liblib_template_uuid: str = ""
|
||
|
||
# ── Tencent COS ──────────────────────────────────────────
|
||
tencent_cos_secret_id: str = ""
|
||
tencent_cos_secret_key: str = ""
|
||
tencent_cos_region: str = "ap-shanghai"
|
||
tencent_cos_bucket: str = ""
|
||
tencent_cos_base_url: str = ""
|
||
tencent_cos_token: str = ""
|
||
|
||
|
||
settings = Settings()
|