Files
life-echo/api/app/features/memory/llm_schemas.py
Kevin ac49bc7f23 feat(eval): memoir A/B chapter judging and eval-web parity with dialogue
- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas.

- Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error.

- MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings.

- app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS.

- Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014.

- Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.
2026-04-10 10:25:15 +08:00

116 lines
3.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""LLM JSON 输出校验memory 富化)。"""
from __future__ import annotations
import json
from typing import Any, TypeVar
from pydantic import BaseModel, Field, field_validator
TModel = TypeVar("TModel", bound=BaseModel)
class ExtractedFactItem(BaseModel):
fact_type: str = "event"
subject: str | None = None
predicate: str | None = None
object_json: Any = None
confidence: float = Field(default=0.75, ge=0.0, le=1.0)
source_chunk_id: str | None = None
@field_validator("fact_type", mode="before")
@classmethod
def _coerce_fact_type(cls, v: object) -> str:
ft = str(v or "event").strip() or "event"
if ft not in ("person", "event", "relation", "place", "milestone"):
return "event"
return ft
class FactsExtractionPayload(BaseModel):
facts: list[ExtractedFactItem] = Field(default_factory=list)
class EnrichmentPayload(BaseModel):
"""单轮记忆富化:会话摘要 + 结构化事实ingest 后一次 LLM 调用)。"""
summary: str = ""
facts: list[ExtractedFactItem] = Field(default_factory=list)
class SessionSummaryPayload(BaseModel):
summary: str = ""
class RollingSummaryPayload(BaseModel):
rolling_summary: str = ""
class TimelineEventItem(BaseModel):
event_year: int | None = None
event_date: str | None = None
title: str = ""
description: str | None = None
source_fact_ids: list[str] = Field(default_factory=list)
@field_validator("source_fact_ids", mode="before")
@classmethod
def _coerce_sf(cls, v: object) -> list[str]:
if v is None:
return []
if isinstance(v, str):
return [v] if v else []
if isinstance(v, list):
return [str(x) for x in v if x]
return []
class TimelineEventsPayload(BaseModel):
events: list[TimelineEventItem] = Field(default_factory=list)
def parse_json_payload(raw: str, model: type[TModel]) -> TModel | None:
"""解析 invoke_json_object 返回的 JSON 字符串。"""
from app.core.json_utils import extract_json_payload
try:
cleaned = extract_json_payload(raw)
data = json.loads(cleaned)
return model.model_validate(data)
except (json.JSONDecodeError, ValueError, TypeError):
return None
def facts_payload_to_dicts(payload: FactsExtractionPayload) -> list[dict]:
out: list[dict] = []
for item in payload.facts:
d = item.model_dump()
scid = d.get("source_chunk_id")
if scid is not None and not isinstance(scid, str):
d["source_chunk_id"] = str(scid)
out.append(d)
return out
def enrichment_payload_to_fact_dicts(payload: EnrichmentPayload) -> list[dict]:
"""将 EnrichmentPayload.facts 转为与 extract_facts 一致的字典列表。"""
return facts_payload_to_dicts(FactsExtractionPayload(facts=list(payload.facts)))
def timeline_payload_to_dicts(payload: TimelineEventsPayload) -> list[dict]:
out: list[dict] = []
for ev in payload.events:
title = (ev.title or "").strip()
if not title:
continue
out.append(
{
"event_year": ev.event_year,
"event_date": ev.event_date,
"title": title,
"description": ev.description,
"source_fact_ids": ev.source_fact_ids or [],
}
)
return out[:20]