Files
life-echo/api/app/features/memory/llm_schemas.py

104 lines
2.9 KiB
Python
Raw Normal View History

"""LLM JSON 输出校验memory 富化)。"""
from __future__ import annotations
import json
from typing import Any, TypeVar
from pydantic import BaseModel, Field, field_validator
TModel = TypeVar("TModel", bound=BaseModel)
class ExtractedFactItem(BaseModel):
fact_type: str = "event"
subject: str | None = None
predicate: str | None = None
object_json: Any = None
confidence: float = Field(default=0.75, ge=0.0, le=1.0)
source_chunk_id: str | None = None
@field_validator("fact_type", mode="before")
@classmethod
def _coerce_fact_type(cls, v: object) -> str:
ft = str(v or "event").strip() or "event"
if ft not in ("person", "event", "relation", "place", "milestone"):
return "event"
return ft
class FactsExtractionPayload(BaseModel):
facts: list[ExtractedFactItem] = Field(default_factory=list)
class SessionSummaryPayload(BaseModel):
summary: str = ""
class RollingSummaryPayload(BaseModel):
rolling_summary: str = ""
class TimelineEventItem(BaseModel):
event_year: int | None = None
event_date: str | None = None
title: str = ""
description: str | None = None
source_fact_ids: list[str] = Field(default_factory=list)
@field_validator("source_fact_ids", mode="before")
@classmethod
def _coerce_sf(cls, v: object) -> list[str]:
if v is None:
return []
if isinstance(v, str):
return [v] if v else []
if isinstance(v, list):
return [str(x) for x in v if x]
return []
class TimelineEventsPayload(BaseModel):
events: list[TimelineEventItem] = Field(default_factory=list)
def parse_json_payload(raw: str, model: type[TModel]) -> TModel | None:
"""解析 invoke_json_object 返回的 JSON 字符串。"""
from app.features.memoir.memoir_images.json_payload import extract_json_payload
try:
cleaned = extract_json_payload(raw)
data = json.loads(cleaned)
return model.model_validate(data)
except (json.JSONDecodeError, ValueError, TypeError):
return None
def facts_payload_to_dicts(payload: FactsExtractionPayload) -> list[dict]:
out: list[dict] = []
for item in payload.facts:
d = item.model_dump()
scid = d.get("source_chunk_id")
if scid is not None and not isinstance(scid, str):
d["source_chunk_id"] = str(scid)
out.append(d)
return out
def timeline_payload_to_dicts(payload: TimelineEventsPayload) -> list[dict]:
out: list[dict] = []
for ev in payload.events:
title = (ev.title or "").strip()
if not title:
continue
out.append(
{
"event_year": ev.event_year,
"event_date": ev.event_date,
"title": title,
"description": ev.description,
"source_fact_ids": ev.source_fact_ids or [],
}
)
return out[:20]