Files
life-echo/api/app/core/config.py
Kevin 69a673e6c6 feat(api): 访谈人格/回复长度策略、口述归一、背景语气与输入净稿全链路
Chat 访谈
- 新增 persona 系统(default / warm_listener / curious_guide)与 background_voice 语气层
- 回复长度由 compute_reply_plan 统一决策(brief / standard / expanded),融合信息密度启发式
- 输入净稿(input_normalize):编排层可选 rules/llm 归一用户口语后再喂模型与记忆检索
- 记忆证据注入:按用户话检索 memory evidence 并注入 prompt

Memoir 回忆录
- 口述归一(oral_normalize):segment 原文保留,story 管线取派生净稿作叙事输入
- segment 入队批次门闸:累计字数 + 最长等待秒数,减少零碎提交
- fidelity_check / prompts / narrative_agent 微调
- Alembic 0005:清理跨章节 story 外键

Infra
- Dockerfile 加入 ffmpeg
- pyproject.toml 新增依赖并同步 uv.lock
- .env.example / .env.production 补全新配置项

Tests
- 新增 test_background_voice、test_chat_input_normalize、test_experience_regressions
- 扩展 test_interview_prompts、test_interview_reply_length、test_story_route_oral_invariant

Made-with: Cursor
2026-03-31 23:55:26 +08:00

248 lines
13 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
统一配置:所有环境变量通过此模块的 Settings 单点读取。
业务代码只允许 import settings禁止散落 os.getenv() / load_dotenv()。
本地开发时由 api/development.sh 在启动前将 .env.development 同步为 .env每次启动覆盖
Docker / 服务端由镜像与 compose 注入进程环境;此处仅固定读取工作目录下的 .env 作为默认值来源。
进程环境变量(容器 environment、export覆盖 .env 同名项。
"""
import secrets
from pydantic import Field, field_validator
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
case_sensitive=False,
extra="ignore",
)
# ── Database ──────────────────────────────────────────────
database_url: str = "postgresql://postgres:postgres@localhost:5432/life_echo"
# 启动时是否执行 Alembicmain.py lifespan测试或仅读场景可关
alembic_run_on_startup: bool = True
# True迁移失败则进程退出生产推荐。False仅打错误日志并继续本地无 DB 时)
alembic_startup_fail_fast: bool = False
alembic_startup_max_retries: int = Field(default=3, ge=1, le=10)
alembic_startup_retry_base_seconds: float = Field(default=1.0, ge=0.1, le=60.0)
# ── Redis ─────────────────────────────────────────────────
redis_url: str = "redis://localhost:6379/0"
redis_session_ttl: int = 86400
# ── Auth / JWT ────────────────────────────────────────────
secret_key: str = Field(default_factory=lambda: secrets.token_urlsafe(32))
algorithm: str = "HS256"
access_token_expire_minutes: int = 120
refresh_token_expire_days: int = 30
# ── LLM / DeepSeek ───────────────────────────────────────
deepseek_api_key: str = ""
deepseek_base_url: str = "https://api.deepseek.com"
deepseek_model: str = "deepseek-chat"
llm_api_key: str = ""
llm_base_url: str = ""
llm_model: str = ""
llm_temperature: float = 0.7
# ── Memory 向量(智谱 BigModel 国内 embedding-3与 LLM/DeepSeek 密钥分离)──
zhipu_api_key: str = ""
embedding_base_url: str = "https://open.bigmodel.cn/api/paas/v4"
embedding_model: str = "embedding-3"
# ── Chat 访谈token 上限 + 代码截断,见 reply_limits──
chat_interview_max_tokens: int = 380
chat_interview_max_segments: int = 2
chat_interview_max_chars_per_segment: int = 260
# 访谈:用户本轮极短输入时的更紧上限(见 interview_reply_length
chat_interview_brief_max_tokens: int = Field(default=260, ge=64, le=2048)
chat_interview_brief_max_chars_per_segment: int = Field(default=200, ge=32, le=2000)
# 访谈:有新细节/情绪/长段时的展开上限
chat_interview_expanded_max_tokens: int = Field(default=520, ge=64, le=4096)
chat_interview_expanded_max_chars_per_segment: int = Field(
default=380, ge=32, le=4000
)
# 干部/军队推断命中时standard 档在分桶基础上小幅放宽brief/expanded 不变)
chat_interview_cadre_military_standard_extra_tokens: int = Field(
default=40, ge=0, le=512
)
chat_interview_cadre_military_standard_extra_chars: int = Field(
default=40, ge=0, le=2000
)
chat_opening_max_tokens: int = 256
chat_profile_followup_max_tokens: int = 280
chat_era_context_enabled: bool = True
# 访谈:每轮用 LLM 判定用户主人生阶段并更新 MemoirState.current_stageFalse 时仅用关键词
chat_stage_detection_enabled: bool = True
chat_stage_detection_max_tokens: int = 128
# 访谈性格default | warm_listener | curious_guide未知值按 default
chat_interview_persona: str = "default"
# 访谈:按用户本轮话检索记忆并注入 prompt关则不调 MemoryService.retrieve
chat_memory_retrieval_enabled: bool = True
chat_memory_top_k: int = Field(default=8, ge=1, le=30)
chat_memory_evidence_max_chars: int = Field(default=4096, ge=256, le=50_000)
# ── Memoir 叙事忠实度检查FidelityCheckAgent────────────────
memoir_fidelity_check_enabled: bool = True
memoir_fidelity_check_max_tokens: int = 512
# 口述归一(进入叙事 / 忠实度前segment 原文不落库off | rules | llm
memoir_oral_normalize_enabled: bool = True
memoir_oral_normalize_mode: str = "rules"
memoir_oral_normalize_llm_max_tokens: int = Field(default=512, ge=64, le=4096)
memoir_oral_normalize_llm_max_input_chars: int = Field(
default=8000, ge=64, le=50_000
)
# 聊天:模型消费净稿(不改变 segment 落库原文);与 memoir 规则层共用,配置独立
chat_input_normalize_enabled: bool = True
chat_input_normalize_mode: str = "rules" # off | rules | llm
chat_input_normalize_llm_max_tokens: int = Field(default=512, ge=64, le=4096)
chat_input_normalize_llm_max_input_chars: int = Field(
default=8000, ge=64, le=50_000
)
# ── ASR ───────────────────────────────────────────────────
asr_provider: str = "whisper"
asr_model_size: str = "small"
asr_device: str = "auto"
asr_compute_type: str = "auto"
asr_model_cache_dir: str = ""
# ── Tencent SMS ──────────────────────────────────────────
tencent_sms_secret_id: str = ""
tencent_sms_secret_key: str = ""
tencent_sms_sdk_app_id: str = ""
tencent_sms_sign_name: str = ""
tencent_sms_template_id: str = ""
tencent_sms_template_param_count: int = 2
# ── Tencent ASR / TTS共用 Secret与短信、COS 密钥独立)────────────────
tencent_secret_id: str = ""
tencent_secret_key: str = ""
# ── TTS (openai | tencent),与 ASR 独立:仅控制回复侧语音合成 ──
enable_tts: bool = True
tts_provider: str = "tencent"
openai_api_key: str = ""
tts_voice_type: int = 502001 # Tencent 音色 ID见 https://cloud.tencent.com/document/product/1073/92668
tts_codec: str = "mp3"
# ── WeChat Pay ───────────────────────────────────────────
wechat_pay_app_id: str = ""
wechat_pay_mch_id: str = ""
wechat_pay_api_v3_key: str = ""
wechat_pay_private_key_path: str = "certs/apiclient_key.pem"
wechat_pay_private_key: str = "" # PEM 内容,与 private_key_path 二选一
wechat_pay_cert_serial_no: str = ""
wechat_pay_notify_url: str = ""
wechat_pay_platform_public_key: str = ""
wechat_pay_platform_public_key_path: str = ""
wechat_pay_platform_public_key_id: str = ""
# ── Alipay ───────────────────────────────────────────────
alipay_app_id: str = ""
alipay_private_key: str = ""
alipay_public_key: str = ""
alipay_notify_url: str = ""
alipay_sign_type: str = "RSA2"
alipay_under_development: str = "true" # "1"/"true"/"yes" 视为开发中不可用
# ── Logging ──────────────────────────────────────────────
# 环境变量 LOG_LEVEL控制 loguru sink 最低级别TRACE/DEBUG/INFO/…)
log_level: str = "INFO"
# LOG_AGENT_VERBOSE为 True 时额外输出 Agent 单行 INFO 摘要(耗时、规模),无需全局 DEBUG
log_agent_verbose: bool = False
# AGENT_LOG_MAX_CHARSDEBUG 下记录 prompt/响应预览时的最大字符数
agent_log_max_chars: int = Field(default=4096, ge=256, le=100_000)
# 第三方 stdlib logging空=自动LOG_LEVEL 为 DEBUG/TRACE 时 Celery→INFO、httpx/httpcore→WARNING
celery_log_level: str = ""
httpx_log_level: str = ""
@field_validator("log_agent_verbose", mode="before")
@classmethod
def _coerce_log_agent_verbose(cls, v: object) -> bool:
if isinstance(v, bool):
return v
if v is None:
return False
return str(v).strip().lower() in ("1", "true", "yes", "on")
# ── Misc ─────────────────────────────────────────────────
enable_test_subscription: int = 0
enable_test_plan: str = "" # "1" / "true" / "yes" 为 True
enable_docs: bool = True
# ── Memoir Image ─────────────────────────────────────────
memoir_image_enabled: bool = False
memoir_image_poll_interval: int = 3
memoir_image_max_attempts: int = 20
memoir_image_provider: str = "liblib"
memoir_image_style_default: str = "watercolor"
memoir_image_size_default: str = "1280x720"
memoir_image_download_hosts: str = ""
# Story 正文至少多少字才创建主图 intent / 调图0 表示不限制)
story_image_min_body_chars: int = 400
# generate_story_image 入队去重Redis SET NX
story_image_enqueue_dedup_ttl: int = Field(default=300, ge=30, le=86400)
# 章节物化异步任务延迟入队(秒),削峰
recompose_chapter_delay_seconds: int = Field(default=8, ge=0, le=600)
# 与 memoir pipeline 一致的章节互斥锁 TTL
chapter_pipeline_lock_ttl_seconds: int = Field(default=120, ge=10, le=3600)
# Append 硬上限canonical 字符数、版本数(超限强制 new_story
story_append_max_canonical_chars: int = Field(default=12000, ge=1000, le=500_000)
story_append_max_versions: int = Field(default=20, ge=1, le=500)
# Evidence 检索 top_k大批次 unit 时降低检索量
evidence_top_k_default: int = Field(default=10, ge=1, le=50)
evidence_top_k_large_batch: int = Field(default=5, ge=1, le=50)
evidence_large_batch_threshold: int = Field(default=3, ge=1, le=100)
# 叙事输出相对口述极端过短才回退仅防极端压缩0.3 = 模型输出不到口述 30% 才触发)
memoir_narrative_fallback_body_ratio: float = 0.3
memoir_narrative_fallback_min_chars: int = 15
# 回忆录 Celery累计 strip 后口述字数未达此值则暂缓提交0=关闭,仅防抖后提交)
memoir_segment_batch_min_chars: int = Field(default=50, ge=0, le=50_000)
# 本批首条 segment 入队起最长等待(秒),超时则提交(即使字数不足)
memoir_segment_batch_max_wait_seconds: float = Field(
default=60.0, ge=0.0, le=3600.0
)
# ── Memory 检索与富化 ─────────────────────────────────────
# Truequery 为空时仍返回 rolling 摘要 + 最近事实/时间线(无 chunk FTS
memory_evidence_empty_query_include_rolling: bool = False
# False跳过 ingest 后 LLM 富化(摘要/事实/时间线)
memory_enrichment_enabled: bool = True
memory_enrichment_max_chars: int = Field(default=12000, ge=1000, le=100_000)
# ── Memory compaction近重复 chunk 软排除;事件触发 + Redis 防抖 + 用户锁)──
memory_compaction_enabled: bool = False
memory_compaction_debounce_seconds: int = Field(default=105, ge=10, le=3600)
memory_compaction_lock_ttl_seconds: int = Field(default=600, ge=60, le=7200)
memory_compaction_chunk_similarity_threshold: float = Field(
default=0.92, ge=0.5, le=0.999
)
memory_compaction_min_layers_for_exclude: int = Field(default=2, ge=1, le=3)
memory_compaction_max_chunks_per_run: int = Field(default=200, ge=1, le=10_000)
memory_compaction_max_excludes_per_run: int = Field(default=50, ge=1, le=1000)
memory_compaction_max_neighbors_per_chunk: int = Field(default=25, ge=5, le=100)
memory_compaction_text_jaccard_min: float = Field(default=0.55, ge=0.0, le=1.0)
memory_compaction_metadata_event_year_window: int = Field(default=1, ge=0, le=50)
# ── Liblib ───────────────────────────────────────────────
liblib_access_key: str = ""
liblib_secret_key: str = ""
liblib_base_url: str = "https://openapi.liblibai.cloud"
liblib_template_uuid: str = ""
# ── Tencent COS ──────────────────────────────────────────
tencent_cos_secret_id: str = ""
tencent_cos_secret_key: str = ""
tencent_cos_region: str = "ap-shanghai"
tencent_cos_bucket: str = ""
tencent_cos_base_url: str = ""
tencent_cos_token: str = ""
settings = Settings()