104 lines
2.9 KiB
Python
104 lines
2.9 KiB
Python
|
|
"""LLM JSON 输出校验(memory 富化)。"""
|
|||
|
|
|
|||
|
|
from __future__ import annotations
|
|||
|
|
|
|||
|
|
import json
|
|||
|
|
from typing import Any, TypeVar
|
|||
|
|
|
|||
|
|
from pydantic import BaseModel, Field, field_validator
|
|||
|
|
|
|||
|
|
TModel = TypeVar("TModel", bound=BaseModel)
|
|||
|
|
|
|||
|
|
|
|||
|
|
class ExtractedFactItem(BaseModel):
|
|||
|
|
fact_type: str = "event"
|
|||
|
|
subject: str | None = None
|
|||
|
|
predicate: str | None = None
|
|||
|
|
object_json: Any = None
|
|||
|
|
confidence: float = Field(default=0.75, ge=0.0, le=1.0)
|
|||
|
|
source_chunk_id: str | None = None
|
|||
|
|
|
|||
|
|
@field_validator("fact_type", mode="before")
|
|||
|
|
@classmethod
|
|||
|
|
def _coerce_fact_type(cls, v: object) -> str:
|
|||
|
|
ft = str(v or "event").strip() or "event"
|
|||
|
|
if ft not in ("person", "event", "relation", "place", "milestone"):
|
|||
|
|
return "event"
|
|||
|
|
return ft
|
|||
|
|
|
|||
|
|
|
|||
|
|
class FactsExtractionPayload(BaseModel):
|
|||
|
|
facts: list[ExtractedFactItem] = Field(default_factory=list)
|
|||
|
|
|
|||
|
|
|
|||
|
|
class SessionSummaryPayload(BaseModel):
|
|||
|
|
summary: str = ""
|
|||
|
|
|
|||
|
|
|
|||
|
|
class RollingSummaryPayload(BaseModel):
|
|||
|
|
rolling_summary: str = ""
|
|||
|
|
|
|||
|
|
|
|||
|
|
class TimelineEventItem(BaseModel):
|
|||
|
|
event_year: int | None = None
|
|||
|
|
event_date: str | None = None
|
|||
|
|
title: str = ""
|
|||
|
|
description: str | None = None
|
|||
|
|
source_fact_ids: list[str] = Field(default_factory=list)
|
|||
|
|
|
|||
|
|
@field_validator("source_fact_ids", mode="before")
|
|||
|
|
@classmethod
|
|||
|
|
def _coerce_sf(cls, v: object) -> list[str]:
|
|||
|
|
if v is None:
|
|||
|
|
return []
|
|||
|
|
if isinstance(v, str):
|
|||
|
|
return [v] if v else []
|
|||
|
|
if isinstance(v, list):
|
|||
|
|
return [str(x) for x in v if x]
|
|||
|
|
return []
|
|||
|
|
|
|||
|
|
|
|||
|
|
class TimelineEventsPayload(BaseModel):
|
|||
|
|
events: list[TimelineEventItem] = Field(default_factory=list)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def parse_json_payload(raw: str, model: type[TModel]) -> TModel | None:
|
|||
|
|
"""解析 invoke_json_object 返回的 JSON 字符串。"""
|
|||
|
|
from app.features.memoir.memoir_images.json_payload import extract_json_payload
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
cleaned = extract_json_payload(raw)
|
|||
|
|
data = json.loads(cleaned)
|
|||
|
|
return model.model_validate(data)
|
|||
|
|
except (json.JSONDecodeError, ValueError, TypeError):
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
|
|||
|
|
def facts_payload_to_dicts(payload: FactsExtractionPayload) -> list[dict]:
|
|||
|
|
out: list[dict] = []
|
|||
|
|
for item in payload.facts:
|
|||
|
|
d = item.model_dump()
|
|||
|
|
scid = d.get("source_chunk_id")
|
|||
|
|
if scid is not None and not isinstance(scid, str):
|
|||
|
|
d["source_chunk_id"] = str(scid)
|
|||
|
|
out.append(d)
|
|||
|
|
return out
|
|||
|
|
|
|||
|
|
|
|||
|
|
def timeline_payload_to_dicts(payload: TimelineEventsPayload) -> list[dict]:
|
|||
|
|
out: list[dict] = []
|
|||
|
|
for ev in payload.events:
|
|||
|
|
title = (ev.title or "").strip()
|
|||
|
|
if not title:
|
|||
|
|
continue
|
|||
|
|
out.append(
|
|||
|
|
{
|
|||
|
|
"event_year": ev.event_year,
|
|||
|
|
"event_date": ev.event_date,
|
|||
|
|
"title": title,
|
|||
|
|
"description": ev.description,
|
|||
|
|
"source_fact_ids": ev.source_fact_ids or [],
|
|||
|
|
}
|
|||
|
|
)
|
|||
|
|
return out[:20]
|