Files
life-echo/api/app/agents/memoir/extraction_agent.py
Kevin ccdc4e4277 feat(i18n): persist language preference and thread through chat, memoir, TTS
- Add users.language_preference (Alembic 0018, default zh); capture at signup/SMS
  only; expose on auth and profile APIs
- Lite English prompts for chat and memoir; localized stage labels and agent
  names (Life Echo / 岁月知己)
- Tencent TTS: language-aware synthesis, ModelType=1 for 501004, English chunking
- WebSocket pipeline: emit all AGENT_RESPONSE segments when TTS cancels; INFO logs
  for tts_this_turn and TTS decisions; on-demand TTS logging
- Expo: device language on auth, i18n tiers/agent name, [SPLIT] streaming UX fixes
- Tests for migration, prompts, pipeline, router tts_this_turn, reply segments

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-11 16:16:49 +08:00

89 lines
2.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
ExtractionAgent从用户消息中提取 5-stage 状态与 slots。
对应现有逻辑get_state_extraction_prompt + JSON 解析
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Any, Dict
from app.agents.memoir.prompts import get_state_extraction_prompt
from app.agents.memoir.schemas import StateExtractionOutput
from app.agents.stage_constants import normalize_chat_stage
from app.core.config import settings
from app.core.llm_call import LLMCallError, llm_json_call
from app.core.logging import get_logger
logger = get_logger(__name__)
@dataclass
class ExtractionResult:
"""状态提取结果"""
detected_stage: str
slots: Dict[str, str]
class ExtractionAgent:
"""从用户消息中提取 detected_stage 和 slots"""
def extract(
self,
user_message: str,
current_stage: str,
stage_slots: Dict[str, Any],
llm: Any,
*,
language: str = "zh",
) -> ExtractionResult:
"""
提取结构化信息并判断阶段。
llm 需支持 .invoke(prompt) 同步调用Celery 任务内使用)。
"""
detected_stage = current_stage
extracted_slots: Dict[str, str] = {}
if not llm:
return ExtractionResult(
detected_stage=detected_stage, slots=extracted_slots
)
try:
prompt = get_state_extraction_prompt(
user_message=user_message,
current_stage=current_stage,
stage_slots={
k: v.model_dump() if hasattr(v, "model_dump") else v
for k, v in (stage_slots or {}).items()
},
language=language,
)
parsed = llm_json_call(
llm,
prompt,
StateExtractionOutput,
max_tokens=settings.memoir_extraction_max_tokens,
agent="ExtractionAgent.extract",
)
raw_slots = parsed.slots or {}
extracted_slots = {
k: v if isinstance(v, str) else str(v) for k, v in raw_slots.items()
}
if not extracted_slots:
# 无实质 slot 时不推断阶段,避免元话语被标成任意 childhood 等(与服务端护栏一致)
detected_stage = normalize_chat_stage(
current_stage, fallback=current_stage
)
else:
raw_detected = parsed.detected_stage or current_stage
detected_stage = normalize_chat_stage(
str(raw_detected) if raw_detected is not None else None,
fallback=current_stage,
)
except LLMCallError as e:
logger.warning("ExtractionAgent LLM 解析失败: {}", e)
return ExtractionResult(detected_stage=detected_stage, slots=extracted_slots)