Files
life-echo/api/app/features/memory/extractor.py
Kevin bb16d3a5c9 refactor(agents): 抽取阶段常量与对话上下文;快档 LLM;图片 prompt 可禁止回退
访谈与阶段
- 新增 app/agents/stage_constants.py:集中 CHAT_STAGES、章节分类/顺序、阶段到默认 memoir 类别等,与 MemoirState 默认槽位顺序对齐;减少散落在 prompts 内的重复常量。
- 新增 app/agents/chat/prompt_context.py:以 ChatPromptContext 汇总 guided 系统提示所需字段(阶段、槽位、轮次、人设、记忆证据、回复长度模式、背景声线、职业等),统一走 get_guided_conversation_prompt。
- 大幅收敛 app/agents/chat/prompts_conversation.py;调整 prompts.py、stage_prompts.py、stage_detection.py;同步 interview_agent、profile_agent、helpers 与 state_schema,使对话侧构造提示的方式一致、可测。

回忆录流水线
- memoir/prompts.py 删除已迁至 stage_constants / 独立模板的大段常量与图片占位相关逻辑;classification / extraction / fidelity / narrative agents 与 orchest(全量历史仍可用于计数,注入模型时按轮次与字符上限截断)、image_prompt_fallback_disabled。
- dependencies 增加 get_llm_provider_fast(LRU 缓存,可与默认共用密钥与 base_url)。

任务与编排
- memoir_tasks:prepare_batches 注入 llm_fast;开启独立快档模型时打结构化日志。
- chapter_cover_tasks、story_image_tasks:与图片 prompt / JSON 工具路径或策略变更对齐(import 与行为一致)。
- story_pipeline_sync 等小处同步。

其它核心
- langchain_llm、text_normalize 随上述调用链微调。

开发者体验
- .cursor/settings.json:启用 redis-development、postman 插件。

测试
- 新增 test_image_prompt_policy:覆盖「禁止回退」等图片 prompt 策略。
- 更新 test_interview_prompts、test_interview_reply_length、test_experience_regressions、test_json_and_memory_utils,匹配新常量位置、json_utils 与对话/长度行为。
2026-04-02 12:00:00 +08:00

93 lines
3.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""从 transcript 块中抽取结构化事实LLM + JSON"""
from __future__ import annotations
from typing import Any
from app.core.langchain_llm import ainvoke_json_object, invoke_json_object
from app.core.logging import get_logger
from app.features.memory.llm_schemas import (
FactsExtractionPayload,
facts_payload_to_dicts,
parse_json_payload,
)
logger = get_logger(__name__)
def _max_transcript_chars() -> int:
from app.core.config import settings
return settings.memory_enrichment_max_chars
def extract_facts_from_transcript_sync(llm: Any, numbered_blocks: str) -> list[dict]:
"""同步:带 chunk_id 标记的文本 → 事实列表。"""
if not llm or not (numbered_blocks or "").strip():
return []
text = numbered_blocks.strip()[: _max_transcript_chars()]
prompt = (
"你是回忆录记忆抽取助手。阅读下列带 [chunk_id=...] 的文本块,抽取可核查的事实。\n"
"每个事实含 fact_type: person|event|relation|place|milestonesubjectpredicate"
"object_json可为字符串或对象confidence 0..1source_chunk_id 必须等于某段的 chunk id。\n"
'只输出 JSON{"facts":[...]},无事实则 {"facts":[]}。\n\n'
f"{text}"
)
try:
raw = invoke_json_object(
llm,
prompt,
max_tokens=4096,
agent="memory.extract_facts_sync",
)
parsed = parse_json_payload(raw, FactsExtractionPayload)
if parsed is None:
return []
return facts_payload_to_dicts(parsed)
except (TypeError, ValueError) as e:
logger.warning("extract_facts_from_transcript_sync 解析失败: {}", e)
return []
async def extract_facts_from_transcript_async(
llm: Any, numbered_blocks: str
) -> list[dict]:
"""异步版。"""
if not llm or not (numbered_blocks or "").strip():
return []
text = numbered_blocks.strip()[: _max_transcript_chars()]
prompt = (
"你是回忆录记忆抽取助手。阅读下列带 [chunk_id=...] 的文本块,抽取可核查的事实。\n"
"每个事实含 fact_type: person|event|relation|place|milestonesubjectpredicate"
"object_jsonconfidence 0..1source_chunk_id 必须等于某段的 chunk id。\n"
'只输出 JSON{"facts":[...]},无事实则 {"facts":[]}。\n\n'
f"{text}"
)
try:
raw = await ainvoke_json_object(
llm,
prompt,
max_tokens=4096,
agent="memory.extract_facts_async",
)
parsed = parse_json_payload(raw, FactsExtractionPayload)
if parsed is None:
return []
return facts_payload_to_dicts(parsed)
except (TypeError, ValueError) as e:
logger.warning("extract_facts_from_transcript_async 解析失败: {}", e)
return []
async def extract_facts(chunk_text: str, *, user_id: str) -> list[dict]:
"""兼容旧接口:单块文本(无 chunk id 时传空 source_chunk_id"""
from app.core.dependencies import get_llm_provider_fast
llm = get_llm_provider_fast().langchain_llm
blocks = f"[chunk_id=null]\n{chunk_text}"
facts = await extract_facts_from_transcript_async(llm, blocks)
for f in facts:
if f.get("source_chunk_id") in (None, "null", ""):
f["source_chunk_id"] = None
return facts