2026-04-08 15:37:09 +08:00
|
|
|
|
"""组装 Chapter/Story 评测证据闭包并格式化为评审输入。"""
|
|
|
|
|
|
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
|
|
from typing import Literal
|
|
|
|
|
|
|
|
|
|
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
|
|
|
|
|
|
|
|
from app.features.conversation import repo as conversation_repo
|
|
|
|
|
|
from app.features.conversation.lineage_schemas import aggregate_lineage_from_segments
|
|
|
|
|
|
from app.features.evaluation.eval_trace_format import (
|
|
|
|
|
|
build_segment_transcript,
|
|
|
|
|
|
format_chapter_for_judge,
|
|
|
|
|
|
format_story_for_judge,
|
|
|
|
|
|
)
|
|
|
|
|
|
from app.features.evaluation.eval_trace_repo import (
|
|
|
|
|
|
fetch_ai_messages_for_segments,
|
|
|
|
|
|
fetch_memory_closure_for_conversations,
|
|
|
|
|
|
fetch_segments_for_user,
|
|
|
|
|
|
get_chapter_for_eval_trace,
|
|
|
|
|
|
get_story_for_eval_trace,
|
|
|
|
|
|
list_chapter_ids_for_story,
|
|
|
|
|
|
load_chunks_by_ids,
|
|
|
|
|
|
load_facts_by_ids,
|
|
|
|
|
|
load_summaries_by_ids,
|
|
|
|
|
|
load_timeline_by_ids,
|
|
|
|
|
|
normalize_source_segment_ids,
|
|
|
|
|
|
story_link_ids_by_type,
|
|
|
|
|
|
)
|
|
|
|
|
|
from app.features.evaluation.eval_trace_schemas import (
|
|
|
|
|
|
ChapterEvidenceBundle,
|
|
|
|
|
|
FormattedMemoirEvidence,
|
|
|
|
|
|
StoryEvidenceBundle,
|
|
|
|
|
|
)
|
|
|
|
|
|
from app.features.memoir.chapter_evidence_snapshot import (
|
|
|
|
|
|
EVIDENCE_SNAPSHOT_SCHEMA_VERSION,
|
|
|
|
|
|
)
|
|
|
|
|
|
from app.features.memoir.models import Chapter
|
|
|
|
|
|
from app.features.story.models import Story, StoryVersion
|
|
|
|
|
|
|
|
|
|
|
|
_MAX_EVIDENCE_CONVERSATIONS = 8
|
|
|
|
|
|
_MAX_EVIDENCE_TRANSCRIPT_CHARS = 16_000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _segments_in_order(segments: list, segment_ids: list[str]) -> list:
|
|
|
|
|
|
order = {str(sid): i for i, sid in enumerate(segment_ids)}
|
|
|
|
|
|
return sorted(segments, key=lambda s: order.get(str(s.id), 9999))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _trim_fallback_transcript(text: str) -> str:
|
|
|
|
|
|
s = (text or "").strip()
|
|
|
|
|
|
if len(s) <= _MAX_EVIDENCE_TRANSCRIPT_CHARS:
|
|
|
|
|
|
return s
|
|
|
|
|
|
return f"{s[:_MAX_EVIDENCE_TRANSCRIPT_CHARS]}\n\n…(访谈证据已截断)"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def fallback_user_transcript_evidence(db: AsyncSession, user_id: str) -> str:
|
|
|
|
|
|
"""legacy:最近若干会话全文(仅作 fallback,调用方须声明 tier=fallback)。"""
|
|
|
|
|
|
conversations = await conversation_repo.get_user_conversations(user_id, db)
|
|
|
|
|
|
if not conversations:
|
|
|
|
|
|
return ""
|
|
|
|
|
|
parts: list[str] = []
|
|
|
|
|
|
for conv in reversed(conversations[:_MAX_EVIDENCE_CONVERSATIONS]):
|
|
|
|
|
|
rows = await conversation_repo.get_conversation_messages(str(conv.id), db)
|
|
|
|
|
|
blocks: list[str] = []
|
|
|
|
|
|
for row in rows:
|
|
|
|
|
|
role = str(row.role or "").lower()
|
|
|
|
|
|
body = (row.content or "").strip()
|
|
|
|
|
|
if not body:
|
|
|
|
|
|
continue
|
|
|
|
|
|
label = "用户" if role == "human" else "AI"
|
|
|
|
|
|
blocks.append(f"{label}: {body}")
|
|
|
|
|
|
transcript = "\n\n".join(blocks)
|
|
|
|
|
|
if transcript:
|
|
|
|
|
|
parts.append(f"## 会话 {str(conv.id)}\n{transcript}")
|
|
|
|
|
|
return _trim_fallback_transcript("\n\n".join(parts))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class EvalTraceService:
|
|
|
|
|
|
def __init__(self, db: AsyncSession) -> None:
|
|
|
|
|
|
self._db = db
|
|
|
|
|
|
|
|
|
|
|
|
async def _story_dialogue_lineage(
|
|
|
|
|
|
self,
|
|
|
|
|
|
st: Story,
|
|
|
|
|
|
segments: list,
|
|
|
|
|
|
segment_ids_ordered: list[str],
|
|
|
|
|
|
) -> dict | None:
|
|
|
|
|
|
if getattr(st, "current_version_id", None):
|
|
|
|
|
|
ver = await self._db.get(StoryVersion, st.current_version_id)
|
|
|
|
|
|
if ver and isinstance(getattr(ver, "lineage_json", None), dict):
|
|
|
|
|
|
lj = ver.lineage_json
|
|
|
|
|
|
if lj.get("turns"):
|
|
|
|
|
|
return lj
|
|
|
|
|
|
if segments and segment_ids_ordered:
|
|
|
|
|
|
ordered = _segments_in_order(segments, segment_ids_ordered)
|
|
|
|
|
|
conv_ids = sorted(
|
|
|
|
|
|
{str(s.conversation_id) for s in segments if s.conversation_id}
|
|
|
|
|
|
)
|
|
|
|
|
|
return aggregate_lineage_from_segments(
|
|
|
|
|
|
ordered, conversation_id_fallback=conv_ids[0] if conv_ids else None
|
|
|
|
|
|
)
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
def _chapter_closure_tier(
|
|
|
|
|
|
self,
|
|
|
|
|
|
*,
|
|
|
|
|
|
segment_ids_resolved: list[str],
|
|
|
|
|
|
chunk_ids: list[str],
|
|
|
|
|
|
fact_ids: list[str],
|
|
|
|
|
|
tl_ids: list[str],
|
|
|
|
|
|
sum_ids: list[str],
|
|
|
|
|
|
) -> Literal["strict", "partial", "fallback"]:
|
|
|
|
|
|
has_seg = bool(segment_ids_resolved)
|
|
|
|
|
|
has_mem = bool(chunk_ids or fact_ids or tl_ids or sum_ids)
|
|
|
|
|
|
if has_seg and has_mem:
|
|
|
|
|
|
return "strict"
|
|
|
|
|
|
if has_seg:
|
|
|
|
|
|
return "partial"
|
|
|
|
|
|
if has_mem:
|
|
|
|
|
|
return "partial"
|
|
|
|
|
|
return "fallback"
|
|
|
|
|
|
|
2026-04-09 15:32:35 +08:00
|
|
|
|
async def build_chapter_bundle(
|
|
|
|
|
|
self, user_id: str, chapter: Chapter
|
|
|
|
|
|
) -> ChapterEvidenceBundle:
|
2026-04-08 15:37:09 +08:00
|
|
|
|
notes: list[str] = []
|
|
|
|
|
|
live_segment_ids = normalize_source_segment_ids(
|
|
|
|
|
|
getattr(chapter, "source_segments", None)
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
row = getattr(chapter, "current_evidence_snapshot", None)
|
|
|
|
|
|
row_has_closure = bool(
|
|
|
|
|
|
(row and (row.segment_ids or []))
|
2026-04-09 15:32:35 +08:00
|
|
|
|
or (
|
|
|
|
|
|
row
|
|
|
|
|
|
and (
|
|
|
|
|
|
row.memory_chunk_ids
|
|
|
|
|
|
or row.memory_fact_ids
|
|
|
|
|
|
or row.timeline_event_ids
|
|
|
|
|
|
or row.summary_ids
|
|
|
|
|
|
)
|
|
|
|
|
|
)
|
2026-04-08 15:37:09 +08:00
|
|
|
|
)
|
|
|
|
|
|
if (
|
|
|
|
|
|
row is not None
|
|
|
|
|
|
and str(row.user_id) == str(user_id)
|
|
|
|
|
|
and str(row.chapter_id) == str(chapter.id)
|
|
|
|
|
|
and int(row.schema_version or 0) == EVIDENCE_SNAPSHOT_SCHEMA_VERSION
|
|
|
|
|
|
and row_has_closure
|
|
|
|
|
|
):
|
2026-04-09 15:32:35 +08:00
|
|
|
|
segment_ids = [str(x) for x in (row.segment_ids or []) if str(x).strip()]
|
2026-04-08 15:37:09 +08:00
|
|
|
|
conv_ids = sorted(
|
|
|
|
|
|
{str(x) for x in (row.conversation_ids or []) if str(x).strip()}
|
|
|
|
|
|
)
|
2026-04-09 15:32:35 +08:00
|
|
|
|
chunk_ids = [str(x) for x in (row.memory_chunk_ids or []) if str(x).strip()]
|
2026-04-08 15:37:09 +08:00
|
|
|
|
fact_ids = [str(x) for x in (row.memory_fact_ids or []) if str(x).strip()]
|
2026-04-09 15:32:35 +08:00
|
|
|
|
tl_ids = [str(x) for x in (row.timeline_event_ids or []) if str(x).strip()]
|
2026-04-08 15:37:09 +08:00
|
|
|
|
sum_ids = [str(x) for x in (row.summary_ids or []) if str(x).strip()]
|
|
|
|
|
|
notes.extend([str(x) for x in (row.notes or []) if x])
|
|
|
|
|
|
notes.append("evidence_from_chapter_evidence_snapshot_table")
|
|
|
|
|
|
tier = self._chapter_closure_tier(
|
|
|
|
|
|
segment_ids_resolved=segment_ids,
|
|
|
|
|
|
chunk_ids=chunk_ids,
|
|
|
|
|
|
fact_ids=fact_ids,
|
|
|
|
|
|
tl_ids=tl_ids,
|
|
|
|
|
|
sum_ids=sum_ids,
|
|
|
|
|
|
)
|
|
|
|
|
|
if live_segment_ids and set(live_segment_ids) != set(segment_ids):
|
2026-04-09 15:32:35 +08:00
|
|
|
|
notes.append(
|
|
|
|
|
|
"live_source_segments_differ_from_snapshot_reconcile_in_pipeline"
|
|
|
|
|
|
)
|
2026-04-08 15:37:09 +08:00
|
|
|
|
dlg = getattr(row, "message_lineage_json", None)
|
|
|
|
|
|
return ChapterEvidenceBundle(
|
|
|
|
|
|
user_id=user_id,
|
|
|
|
|
|
chapter_id=str(chapter.id),
|
|
|
|
|
|
segment_ids=segment_ids,
|
|
|
|
|
|
conversation_ids=conv_ids,
|
|
|
|
|
|
memory_chunk_ids=chunk_ids,
|
|
|
|
|
|
memory_fact_ids=fact_ids,
|
|
|
|
|
|
timeline_event_ids=tl_ids,
|
|
|
|
|
|
summary_ids=sum_ids,
|
|
|
|
|
|
lineage_tier=tier,
|
|
|
|
|
|
notes=notes,
|
|
|
|
|
|
dialogue_lineage=dlg if isinstance(dlg, dict) else None,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
snap = getattr(chapter, "evidence_bundle_json", None)
|
|
|
|
|
|
snap_uid = str(snap.get("user_id") or "") if isinstance(snap, dict) else ""
|
|
|
|
|
|
snap_has_closure = bool(
|
|
|
|
|
|
(isinstance(snap, dict) and (snap.get("segment_ids") or []))
|
|
|
|
|
|
or (
|
|
|
|
|
|
isinstance(snap, dict)
|
|
|
|
|
|
and (
|
|
|
|
|
|
snap.get("memory_chunk_ids")
|
|
|
|
|
|
or snap.get("memory_fact_ids")
|
|
|
|
|
|
or snap.get("timeline_event_ids")
|
|
|
|
|
|
or snap.get("summary_ids")
|
|
|
|
|
|
)
|
|
|
|
|
|
)
|
|
|
|
|
|
)
|
|
|
|
|
|
use_snap = (
|
|
|
|
|
|
isinstance(snap, dict)
|
|
|
|
|
|
and int(snap.get("schema_version") or 0) == EVIDENCE_SNAPSHOT_SCHEMA_VERSION
|
|
|
|
|
|
and str(snap.get("chapter_id") or "") == str(chapter.id)
|
|
|
|
|
|
and (not snap_uid or snap_uid == str(user_id))
|
|
|
|
|
|
and snap_has_closure
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if use_snap and isinstance(snap, dict):
|
2026-04-09 15:32:35 +08:00
|
|
|
|
segment_ids = [
|
|
|
|
|
|
str(x) for x in (snap.get("segment_ids") or []) if str(x).strip()
|
|
|
|
|
|
]
|
2026-04-08 15:37:09 +08:00
|
|
|
|
conv_ids = sorted(
|
|
|
|
|
|
{str(x) for x in (snap.get("conversation_ids") or []) if str(x).strip()}
|
|
|
|
|
|
)
|
2026-04-09 15:32:35 +08:00
|
|
|
|
chunk_ids = [
|
|
|
|
|
|
str(x) for x in (snap.get("memory_chunk_ids") or []) if str(x).strip()
|
|
|
|
|
|
]
|
|
|
|
|
|
fact_ids = [
|
|
|
|
|
|
str(x) for x in (snap.get("memory_fact_ids") or []) if str(x).strip()
|
|
|
|
|
|
]
|
|
|
|
|
|
tl_ids = [
|
|
|
|
|
|
str(x) for x in (snap.get("timeline_event_ids") or []) if str(x).strip()
|
|
|
|
|
|
]
|
|
|
|
|
|
sum_ids = [
|
|
|
|
|
|
str(x) for x in (snap.get("summary_ids") or []) if str(x).strip()
|
|
|
|
|
|
]
|
2026-04-08 15:37:09 +08:00
|
|
|
|
notes.extend([str(x) for x in (snap.get("notes") or []) if x])
|
|
|
|
|
|
notes.append("evidence_from_chapter_evidence_bundle_json_column")
|
|
|
|
|
|
tier = self._chapter_closure_tier(
|
|
|
|
|
|
segment_ids_resolved=segment_ids,
|
|
|
|
|
|
chunk_ids=chunk_ids,
|
|
|
|
|
|
fact_ids=fact_ids,
|
|
|
|
|
|
tl_ids=tl_ids,
|
|
|
|
|
|
sum_ids=sum_ids,
|
|
|
|
|
|
)
|
|
|
|
|
|
if live_segment_ids and set(live_segment_ids) != set(segment_ids):
|
2026-04-09 15:32:35 +08:00
|
|
|
|
notes.append(
|
|
|
|
|
|
"live_source_segments_differ_from_snapshot_reconcile_in_pipeline"
|
|
|
|
|
|
)
|
|
|
|
|
|
snap_dlg = (
|
|
|
|
|
|
snap.get("message_lineage_json") if isinstance(snap, dict) else None
|
|
|
|
|
|
)
|
2026-04-08 15:37:09 +08:00
|
|
|
|
return ChapterEvidenceBundle(
|
|
|
|
|
|
user_id=user_id,
|
|
|
|
|
|
chapter_id=str(chapter.id),
|
|
|
|
|
|
segment_ids=segment_ids,
|
|
|
|
|
|
conversation_ids=conv_ids,
|
|
|
|
|
|
memory_chunk_ids=chunk_ids,
|
|
|
|
|
|
memory_fact_ids=fact_ids,
|
|
|
|
|
|
timeline_event_ids=tl_ids,
|
|
|
|
|
|
summary_ids=sum_ids,
|
|
|
|
|
|
lineage_tier=tier,
|
|
|
|
|
|
notes=notes,
|
|
|
|
|
|
dialogue_lineage=snap_dlg if isinstance(snap_dlg, dict) else None,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
segment_ids = live_segment_ids
|
|
|
|
|
|
if not segment_ids:
|
|
|
|
|
|
notes.append("no_source_segments")
|
|
|
|
|
|
notes.append("fallback_lineage_transcript_pending")
|
|
|
|
|
|
return ChapterEvidenceBundle(
|
|
|
|
|
|
user_id=user_id,
|
|
|
|
|
|
chapter_id=str(chapter.id),
|
|
|
|
|
|
segment_ids=[],
|
|
|
|
|
|
conversation_ids=[],
|
|
|
|
|
|
lineage_tier="fallback",
|
|
|
|
|
|
notes=notes,
|
|
|
|
|
|
dialogue_lineage=None,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
segments = await fetch_segments_for_user(
|
|
|
|
|
|
self._db, user_id=user_id, segment_ids=segment_ids
|
|
|
|
|
|
)
|
|
|
|
|
|
resolved_seg_ids = [s.id for s in segments] or segment_ids
|
|
|
|
|
|
if len(segments) < len(segment_ids):
|
|
|
|
|
|
notes.append("some_segments_missing_or_foreign_user")
|
2026-04-09 15:32:35 +08:00
|
|
|
|
conv_ids = sorted(
|
|
|
|
|
|
{str(s.conversation_id) for s in segments if s.conversation_id}
|
|
|
|
|
|
)
|
|
|
|
|
|
(
|
|
|
|
|
|
chunk_ids,
|
|
|
|
|
|
fact_ids,
|
|
|
|
|
|
tl_ids,
|
|
|
|
|
|
sum_ids,
|
|
|
|
|
|
) = await fetch_memory_closure_for_conversations(
|
2026-04-08 15:37:09 +08:00
|
|
|
|
self._db, user_id=user_id, conversation_ids=conv_ids
|
|
|
|
|
|
)
|
|
|
|
|
|
tier = self._chapter_closure_tier(
|
|
|
|
|
|
segment_ids_resolved=resolved_seg_ids,
|
|
|
|
|
|
chunk_ids=chunk_ids,
|
|
|
|
|
|
fact_ids=fact_ids,
|
|
|
|
|
|
tl_ids=tl_ids,
|
|
|
|
|
|
sum_ids=sum_ids,
|
|
|
|
|
|
)
|
|
|
|
|
|
if tier == "partial":
|
|
|
|
|
|
notes.append(
|
|
|
|
|
|
"chapter_source_segments_union_semantics=partial_lineage_until_snapshot"
|
|
|
|
|
|
)
|
|
|
|
|
|
elif tier == "strict":
|
|
|
|
|
|
notes.append("chapter_lineage_strict_segments_plus_memory_closure")
|
|
|
|
|
|
segs_ord = _segments_in_order(segments, resolved_seg_ids)
|
|
|
|
|
|
dlg_live = aggregate_lineage_from_segments(
|
|
|
|
|
|
segs_ord, conversation_id_fallback=conv_ids[0] if conv_ids else None
|
|
|
|
|
|
)
|
|
|
|
|
|
return ChapterEvidenceBundle(
|
|
|
|
|
|
user_id=user_id,
|
|
|
|
|
|
chapter_id=str(chapter.id),
|
|
|
|
|
|
segment_ids=resolved_seg_ids,
|
|
|
|
|
|
conversation_ids=conv_ids,
|
|
|
|
|
|
memory_chunk_ids=chunk_ids,
|
|
|
|
|
|
memory_fact_ids=fact_ids,
|
|
|
|
|
|
timeline_event_ids=tl_ids,
|
|
|
|
|
|
summary_ids=sum_ids,
|
|
|
|
|
|
lineage_tier=tier,
|
|
|
|
|
|
notes=notes,
|
|
|
|
|
|
dialogue_lineage=dlg_live,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
async def format_chapter_bundle(
|
|
|
|
|
|
self, bundle: ChapterEvidenceBundle
|
|
|
|
|
|
) -> tuple[FormattedMemoirEvidence, ChapterEvidenceBundle]:
|
|
|
|
|
|
"""若 tier=fallback,调用方应先将要并入 transcripts 写入 session;此处只负责 segment 路径。"""
|
|
|
|
|
|
if bundle.lineage_tier == "fallback":
|
|
|
|
|
|
ft = await fallback_user_transcript_evidence(self._db, bundle.user_id)
|
|
|
|
|
|
notes = list(bundle.notes)
|
|
|
|
|
|
notes.append("used_legacy_recent_conversations_transcript")
|
|
|
|
|
|
bundle = bundle.model_copy(update={"notes": notes})
|
|
|
|
|
|
formatted = format_chapter_for_judge(
|
|
|
|
|
|
bundle,
|
|
|
|
|
|
transcript=ft,
|
|
|
|
|
|
chunks=[],
|
|
|
|
|
|
facts=[],
|
|
|
|
|
|
events=[],
|
|
|
|
|
|
summaries=[],
|
|
|
|
|
|
)
|
|
|
|
|
|
return formatted, bundle
|
|
|
|
|
|
|
|
|
|
|
|
segs = await fetch_segments_for_user(
|
|
|
|
|
|
self._db, user_id=bundle.user_id, segment_ids=bundle.segment_ids
|
|
|
|
|
|
)
|
|
|
|
|
|
ai_map = await fetch_ai_messages_for_segments(
|
|
|
|
|
|
self._db, user_id=bundle.user_id, segment_ids=[s.id for s in segs]
|
|
|
|
|
|
)
|
|
|
|
|
|
transcript = build_segment_transcript(segs, ai_map)
|
|
|
|
|
|
chunks = await load_chunks_by_ids(
|
|
|
|
|
|
self._db, user_id=bundle.user_id, chunk_ids=bundle.memory_chunk_ids
|
|
|
|
|
|
)
|
|
|
|
|
|
facts = await load_facts_by_ids(
|
|
|
|
|
|
self._db, user_id=bundle.user_id, fact_ids=bundle.memory_fact_ids
|
|
|
|
|
|
)
|
|
|
|
|
|
events = await load_timeline_by_ids(
|
|
|
|
|
|
self._db, user_id=bundle.user_id, event_ids=bundle.timeline_event_ids
|
|
|
|
|
|
)
|
|
|
|
|
|
summaries = await load_summaries_by_ids(
|
|
|
|
|
|
self._db, user_id=bundle.user_id, summary_ids=bundle.summary_ids
|
|
|
|
|
|
)
|
|
|
|
|
|
formatted = format_chapter_for_judge(
|
|
|
|
|
|
bundle,
|
|
|
|
|
|
transcript=transcript,
|
|
|
|
|
|
chunks=chunks,
|
|
|
|
|
|
facts=facts,
|
|
|
|
|
|
events=events,
|
|
|
|
|
|
summaries=summaries,
|
|
|
|
|
|
)
|
|
|
|
|
|
return formatted, bundle
|
|
|
|
|
|
|
2026-04-09 15:32:35 +08:00
|
|
|
|
async def build_story_bundle(
|
|
|
|
|
|
self, user_id: str, story_id: str
|
|
|
|
|
|
) -> StoryEvidenceBundle:
|
|
|
|
|
|
st = await get_story_for_eval_trace(
|
|
|
|
|
|
self._db, user_id=user_id, story_id=story_id
|
|
|
|
|
|
)
|
2026-04-08 15:37:09 +08:00
|
|
|
|
if not st:
|
|
|
|
|
|
return StoryEvidenceBundle(
|
|
|
|
|
|
user_id=user_id,
|
|
|
|
|
|
story_id=story_id,
|
|
|
|
|
|
lineage_tier="fallback",
|
|
|
|
|
|
notes=["story_not_found"],
|
|
|
|
|
|
dialogue_lineage=None,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
links = list(st.evidence_links or [])
|
|
|
|
|
|
lc, lf, lt, ls = story_link_ids_by_type(links)
|
|
|
|
|
|
notes: list[str] = []
|
|
|
|
|
|
chapter_ids = await list_chapter_ids_for_story(
|
|
|
|
|
|
self._db, user_id=user_id, story_id=str(st.id)
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if lc or lf or lt or ls:
|
|
|
|
|
|
# 结构化以 link 为准;会话级 transcript 尝试从挂靠章节 source_segments 收缩
|
|
|
|
|
|
seg_ids: list[str] = []
|
|
|
|
|
|
conv_ids: list[str] = []
|
|
|
|
|
|
for cid in chapter_ids:
|
|
|
|
|
|
ch = await get_chapter_for_eval_trace(
|
|
|
|
|
|
self._db, user_id=user_id, chapter_id=cid
|
|
|
|
|
|
)
|
|
|
|
|
|
if not ch:
|
|
|
|
|
|
continue
|
|
|
|
|
|
seg_ids.extend(normalize_source_segment_ids(ch.source_segments))
|
|
|
|
|
|
# 保序去重
|
|
|
|
|
|
seen_s: set[str] = set()
|
|
|
|
|
|
dedup_seg: list[str] = []
|
|
|
|
|
|
for s in seg_ids:
|
|
|
|
|
|
if s not in seen_s:
|
|
|
|
|
|
seen_s.add(s)
|
|
|
|
|
|
dedup_seg.append(s)
|
|
|
|
|
|
segments = await fetch_segments_for_user(
|
|
|
|
|
|
self._db, user_id=user_id, segment_ids=dedup_seg
|
|
|
|
|
|
)
|
2026-04-09 15:32:35 +08:00
|
|
|
|
conv_ids = sorted(
|
|
|
|
|
|
{str(s.conversation_id) for s in segments if s.conversation_id}
|
|
|
|
|
|
)
|
2026-04-08 15:37:09 +08:00
|
|
|
|
if dedup_seg and not segments:
|
|
|
|
|
|
notes.append("chapter_segment_ids_unresolved")
|
|
|
|
|
|
if conv_ids:
|
|
|
|
|
|
notes.append("transcript_from_chapter_source_segments")
|
|
|
|
|
|
else:
|
|
|
|
|
|
notes.append("no_chapter_segments_for_transcript_context")
|
|
|
|
|
|
bound_transcript = bool(segments)
|
|
|
|
|
|
story_tier: Literal["strict", "partial", "fallback"] = "strict"
|
|
|
|
|
|
if (lc or lf or lt or ls) and not bound_transcript:
|
|
|
|
|
|
notes.append("structured_evidence_without_bound_transcript")
|
|
|
|
|
|
story_tier = "partial"
|
|
|
|
|
|
dlg = await self._story_dialogue_lineage(st, segments, dedup_seg)
|
|
|
|
|
|
return StoryEvidenceBundle(
|
|
|
|
|
|
user_id=user_id,
|
|
|
|
|
|
story_id=str(st.id),
|
|
|
|
|
|
segment_ids=[s.id for s in segments] or dedup_seg,
|
|
|
|
|
|
conversation_ids=conv_ids,
|
|
|
|
|
|
memory_chunk_ids=lc,
|
|
|
|
|
|
memory_fact_ids=lf,
|
|
|
|
|
|
timeline_event_ids=lt,
|
|
|
|
|
|
summary_ids=ls,
|
|
|
|
|
|
lineage_tier=story_tier,
|
|
|
|
|
|
notes=notes,
|
|
|
|
|
|
augmented_with_chapter_context=bool(chapter_ids),
|
|
|
|
|
|
story_link_evidence_count=len(links),
|
|
|
|
|
|
fallback_chapter_ids=chapter_ids,
|
|
|
|
|
|
dialogue_lineage=dlg,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 无 StoryEvidenceLink:由章节 source_segments 推导 partial;再不行则 fallback
|
|
|
|
|
|
seg_ids = []
|
|
|
|
|
|
conv_ids: list[str] = []
|
|
|
|
|
|
for cid in chapter_ids:
|
|
|
|
|
|
ch = await get_chapter_for_eval_trace(
|
|
|
|
|
|
self._db, user_id=user_id, chapter_id=cid
|
|
|
|
|
|
)
|
|
|
|
|
|
if not ch:
|
|
|
|
|
|
continue
|
|
|
|
|
|
seg_ids.extend(normalize_source_segment_ids(ch.source_segments))
|
|
|
|
|
|
seen_s = set()
|
|
|
|
|
|
dedup_seg = []
|
|
|
|
|
|
for s in seg_ids:
|
|
|
|
|
|
if s not in seen_s:
|
|
|
|
|
|
seen_s.add(s)
|
|
|
|
|
|
dedup_seg.append(s)
|
|
|
|
|
|
if dedup_seg:
|
|
|
|
|
|
segments = await fetch_segments_for_user(
|
|
|
|
|
|
self._db, user_id=user_id, segment_ids=dedup_seg
|
|
|
|
|
|
)
|
2026-04-09 15:32:35 +08:00
|
|
|
|
conv_ids = sorted(
|
|
|
|
|
|
{str(s.conversation_id) for s in segments if s.conversation_id}
|
|
|
|
|
|
)
|
|
|
|
|
|
(
|
|
|
|
|
|
chunk_ids,
|
|
|
|
|
|
fact_ids,
|
|
|
|
|
|
tl_ids,
|
|
|
|
|
|
sum_ids,
|
|
|
|
|
|
) = await fetch_memory_closure_for_conversations(
|
|
|
|
|
|
self._db, user_id=user_id, conversation_ids=conv_ids
|
2026-04-08 15:37:09 +08:00
|
|
|
|
)
|
|
|
|
|
|
notes.append("fallback_lineage_no_story_evidence_links")
|
|
|
|
|
|
notes.append("augmented_with_chapter_context")
|
|
|
|
|
|
dlg2 = await self._story_dialogue_lineage(st, segments, dedup_seg)
|
|
|
|
|
|
return StoryEvidenceBundle(
|
|
|
|
|
|
user_id=user_id,
|
|
|
|
|
|
story_id=str(st.id),
|
|
|
|
|
|
segment_ids=[s.id for s in segments] or dedup_seg,
|
|
|
|
|
|
conversation_ids=conv_ids,
|
|
|
|
|
|
memory_chunk_ids=chunk_ids,
|
|
|
|
|
|
memory_fact_ids=fact_ids,
|
|
|
|
|
|
timeline_event_ids=tl_ids,
|
|
|
|
|
|
summary_ids=sum_ids,
|
|
|
|
|
|
lineage_tier="partial",
|
|
|
|
|
|
notes=notes,
|
|
|
|
|
|
augmented_with_chapter_context=True,
|
|
|
|
|
|
story_link_evidence_count=0,
|
|
|
|
|
|
fallback_chapter_ids=chapter_ids,
|
|
|
|
|
|
dialogue_lineage=dlg2,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
notes.append("no_story_evidence_links_and_no_chapter_segments")
|
|
|
|
|
|
notes.append("fallback_lineage_transcript_pending")
|
|
|
|
|
|
dlg3 = await self._story_dialogue_lineage(st, [], [])
|
|
|
|
|
|
return StoryEvidenceBundle(
|
|
|
|
|
|
user_id=user_id,
|
|
|
|
|
|
story_id=str(st.id),
|
|
|
|
|
|
lineage_tier="fallback",
|
|
|
|
|
|
notes=notes,
|
|
|
|
|
|
story_link_evidence_count=0,
|
|
|
|
|
|
fallback_chapter_ids=chapter_ids,
|
|
|
|
|
|
dialogue_lineage=dlg3,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
async def format_story_bundle(
|
|
|
|
|
|
self, bundle: StoryEvidenceBundle
|
|
|
|
|
|
) -> tuple[FormattedMemoirEvidence, StoryEvidenceBundle]:
|
|
|
|
|
|
if bundle.lineage_tier == "fallback":
|
|
|
|
|
|
ft = await fallback_user_transcript_evidence(self._db, bundle.user_id)
|
|
|
|
|
|
notes = list(bundle.notes)
|
|
|
|
|
|
notes.append("used_legacy_recent_conversations_transcript")
|
|
|
|
|
|
bundle = bundle.model_copy(update={"notes": notes})
|
|
|
|
|
|
formatted = format_story_for_judge(
|
|
|
|
|
|
bundle,
|
|
|
|
|
|
transcript=ft,
|
|
|
|
|
|
chunks=[],
|
|
|
|
|
|
facts=[],
|
|
|
|
|
|
events=[],
|
|
|
|
|
|
summaries=[],
|
|
|
|
|
|
)
|
|
|
|
|
|
return formatted, bundle
|
|
|
|
|
|
|
|
|
|
|
|
segs = await fetch_segments_for_user(
|
|
|
|
|
|
self._db, user_id=bundle.user_id, segment_ids=bundle.segment_ids
|
|
|
|
|
|
)
|
|
|
|
|
|
ai_map = await fetch_ai_messages_for_segments(
|
|
|
|
|
|
self._db, user_id=bundle.user_id, segment_ids=[s.id for s in segs]
|
|
|
|
|
|
)
|
|
|
|
|
|
transcript = build_segment_transcript(segs, ai_map)
|
|
|
|
|
|
|
|
|
|
|
|
chunks = await load_chunks_by_ids(
|
|
|
|
|
|
self._db, user_id=bundle.user_id, chunk_ids=bundle.memory_chunk_ids
|
|
|
|
|
|
)
|
|
|
|
|
|
facts = await load_facts_by_ids(
|
|
|
|
|
|
self._db, user_id=bundle.user_id, fact_ids=bundle.memory_fact_ids
|
|
|
|
|
|
)
|
|
|
|
|
|
events = await load_timeline_by_ids(
|
|
|
|
|
|
self._db, user_id=bundle.user_id, event_ids=bundle.timeline_event_ids
|
|
|
|
|
|
)
|
|
|
|
|
|
summaries = await load_summaries_by_ids(
|
|
|
|
|
|
self._db, user_id=bundle.user_id, summary_ids=bundle.summary_ids
|
|
|
|
|
|
)
|
|
|
|
|
|
formatted = format_story_for_judge(
|
|
|
|
|
|
bundle,
|
|
|
|
|
|
transcript=transcript,
|
|
|
|
|
|
chunks=chunks,
|
|
|
|
|
|
facts=facts,
|
|
|
|
|
|
events=events,
|
|
|
|
|
|
summaries=summaries,
|
|
|
|
|
|
)
|
|
|
|
|
|
return formatted, bundle
|