Files
life-echo/api/app/features/memory/llm_schemas.py
Kevin e4bf0710c7 feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路
数据库
- 新增迁移 0003:timeline_events.memory_source_id 外键 → memory_sources,便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化(摘要/事实/时间线),可配置开关与最大字符数
- 新增证据包组装:合并 chunk、摘要、事实、时间线、故事等检索结果;支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展;文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG;分段 ASR 日志与空音频处理;转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符,与分段逻辑一致

后端 - Agent
- reply_limits:按 [SPLIT] 与段落拆段,并保证非空 fallback,供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id;任务成功结?
2026-03-27 16:24:43 +08:00

104 lines
2.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""LLM JSON 输出校验memory 富化)。"""
from __future__ import annotations
import json
from typing import Any, TypeVar
from pydantic import BaseModel, Field, field_validator
TModel = TypeVar("TModel", bound=BaseModel)
class ExtractedFactItem(BaseModel):
fact_type: str = "event"
subject: str | None = None
predicate: str | None = None
object_json: Any = None
confidence: float = Field(default=0.75, ge=0.0, le=1.0)
source_chunk_id: str | None = None
@field_validator("fact_type", mode="before")
@classmethod
def _coerce_fact_type(cls, v: object) -> str:
ft = str(v or "event").strip() or "event"
if ft not in ("person", "event", "relation", "place", "milestone"):
return "event"
return ft
class FactsExtractionPayload(BaseModel):
facts: list[ExtractedFactItem] = Field(default_factory=list)
class SessionSummaryPayload(BaseModel):
summary: str = ""
class RollingSummaryPayload(BaseModel):
rolling_summary: str = ""
class TimelineEventItem(BaseModel):
event_year: int | None = None
event_date: str | None = None
title: str = ""
description: str | None = None
source_fact_ids: list[str] = Field(default_factory=list)
@field_validator("source_fact_ids", mode="before")
@classmethod
def _coerce_sf(cls, v: object) -> list[str]:
if v is None:
return []
if isinstance(v, str):
return [v] if v else []
if isinstance(v, list):
return [str(x) for x in v if x]
return []
class TimelineEventsPayload(BaseModel):
events: list[TimelineEventItem] = Field(default_factory=list)
def parse_json_payload(raw: str, model: type[TModel]) -> TModel | None:
"""解析 invoke_json_object 返回的 JSON 字符串。"""
from app.features.memoir.memoir_images.json_payload import extract_json_payload
try:
cleaned = extract_json_payload(raw)
data = json.loads(cleaned)
return model.model_validate(data)
except (json.JSONDecodeError, ValueError, TypeError):
return None
def facts_payload_to_dicts(payload: FactsExtractionPayload) -> list[dict]:
out: list[dict] = []
for item in payload.facts:
d = item.model_dump()
scid = d.get("source_chunk_id")
if scid is not None and not isinstance(scid, str):
d["source_chunk_id"] = str(scid)
out.append(d)
return out
def timeline_payload_to_dicts(payload: TimelineEventsPayload) -> list[dict]:
out: list[dict] = []
for ev in payload.events:
title = (ev.title or "").strip()
if not title:
continue
out.append(
{
"event_year": ev.event_year,
"event_date": ev.event_date,
"title": title,
"description": ev.description,
"source_fact_ids": ev.source_fact_ids or [],
}
)
return out[:20]