feat: 回忆录证据血缘与内部评测可追溯,顺带对齐本地评测台与 CI
数据库与模型:新增多版迁移(章节证据快照、对话血缘、记忆事实/时间线 lineage 等),把「成稿 ↔ 对话/记忆」的溯源信息落到表结构里。 业务链路:会话与 WS、回忆录/故事流水线、记忆写入与 enrichment 等跟着接上线索与快照;新增章节证据快照与评测侧 EvalTraceService 等模块,方便组评审用的证据包。 内部评测:自动化 run 与手工 memoir 评审共用可追溯证据;rubric/ judge 相关脚本与文档有配套调整。 app-eval-web:Memoir/实验详情里能展开看证据摘要与 evidence_trace(含对话轮次 id);Vite 代理与 development.sh 注入的 API 端口与当前默认内部评测端口一致,避免改端口后页面连错服务。 工程杂项:GitHub Actions / 仓库说明有更新;各适配器与支付/配额/plan 等多处为小改动或跟随主改动的收尾;新增/扩充了?
This commit is contained in:
@@ -20,12 +20,6 @@ from app.agents.memoir.prompts import (
|
||||
format_evidence_chunks_for_prompt,
|
||||
format_narrative_user_content,
|
||||
)
|
||||
from app.agents.stage_constants import (
|
||||
CATEGORY_TO_CHAT_STAGE,
|
||||
CHAPTER_CATEGORIES,
|
||||
CHAT_STAGES,
|
||||
STAGE_TO_ORDER,
|
||||
)
|
||||
from app.agents.memoir.story_route_agent import (
|
||||
APPEND_FIRST_CHAPTER_CATEGORIES,
|
||||
PLAN_BATCH_MAX_SEGMENTS,
|
||||
@@ -33,20 +27,30 @@ from app.agents.memoir.story_route_agent import (
|
||||
StoryRouteAgent,
|
||||
default_append_target_story_id,
|
||||
)
|
||||
from app.agents.stage_constants import (
|
||||
CATEGORY_TO_CHAT_STAGE,
|
||||
CHAPTER_CATEGORIES,
|
||||
CHAT_STAGES,
|
||||
STAGE_TO_ORDER,
|
||||
)
|
||||
from app.agents.state_schema import MemoirStateSchema
|
||||
from app.core.config import settings
|
||||
from app.core.dependencies import get_embedding_provider
|
||||
from app.core.logging import get_logger
|
||||
from app.features.conversation.lineage_schemas import aggregate_lineage_from_segments
|
||||
from app.features.memoir.chapter_evidence_snapshot import (
|
||||
refresh_chapter_evidence_snapshot_with_retry_sync,
|
||||
)
|
||||
from app.features.memoir.cover_eligibility import chapter_needs_cover_enqueue
|
||||
from app.features.memoir.memoir_images.settings import MemoirImageSettings
|
||||
from app.features.memoir.models import Chapter
|
||||
from app.features.memoir.narrative_to_markdown import narrative_to_markdown
|
||||
from app.features.memoir.narrative_safety import (
|
||||
body_contains_prompt_artifact,
|
||||
evidence_leakage_heuristic,
|
||||
evidence_scene_anchor_leak,
|
||||
strip_evidence_for_overlap_check,
|
||||
)
|
||||
from app.features.memoir.narrative_to_markdown import narrative_to_markdown
|
||||
from app.features.memoir.oral_normalize import (
|
||||
apply_oral_rules,
|
||||
normalize_oral_for_memoir,
|
||||
@@ -56,17 +60,111 @@ from app.features.memoir.repo import (
|
||||
reorder_chapter_story_links_by_life_order_sync,
|
||||
)
|
||||
from app.features.memory.repo import retrieve_evidence_sync
|
||||
from app.features.story.models import Story
|
||||
from app.features.story.models import Story, StoryVersion
|
||||
from app.features.story.sync_write import (
|
||||
append_story_version_sync,
|
||||
count_story_versions_sync,
|
||||
create_story_with_version_sync,
|
||||
ensure_chapter_story_link_sync,
|
||||
list_active_stories_for_user_sync,
|
||||
replace_story_evidence_links_sync,
|
||||
)
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def _dialogue_lineage_dict_for_segment_ids(
|
||||
category_segments: list,
|
||||
segment_ids: list[str],
|
||||
) -> dict | None:
|
||||
"""Merge DialogueLineage from contributing segments (memoir batch unit order)."""
|
||||
if not segment_ids or not category_segments:
|
||||
return None
|
||||
order = {str(sid): i for i, sid in enumerate(segment_ids)}
|
||||
picked = [s for s in category_segments if str(getattr(s, "id", "")) in order]
|
||||
picked.sort(key=lambda s: order[str(s.id)])
|
||||
conv_fb: str | None = None
|
||||
if picked:
|
||||
conv_fb = getattr(picked[0], "conversation_id", None)
|
||||
if not conv_fb:
|
||||
for s in picked:
|
||||
c = getattr(s, "conversation_id", None)
|
||||
if c:
|
||||
conv_fb = str(c)
|
||||
break
|
||||
return aggregate_lineage_from_segments(
|
||||
picked,
|
||||
conversation_id_fallback=str(conv_fb) if conv_fb else None,
|
||||
)
|
||||
|
||||
|
||||
def _evidence_link_ids(evidence: dict) -> tuple[list[str], list[str], list[str], list[str]]:
|
||||
"""从 retrieve_evidence_sync 结果提取稳定 ID 列表。"""
|
||||
chunks: list[str] = []
|
||||
for c in evidence.get("relevant_chunks") or []:
|
||||
if isinstance(c, dict) and c.get("id"):
|
||||
chunks.append(str(c["id"]))
|
||||
facts: list[str] = []
|
||||
for f in evidence.get("relevant_facts") or []:
|
||||
if isinstance(f, dict) and f.get("id"):
|
||||
facts.append(str(f["id"]))
|
||||
timelines: list[str] = []
|
||||
for e in evidence.get("timeline_hints") or []:
|
||||
if isinstance(e, dict) and e.get("id"):
|
||||
timelines.append(str(e["id"]))
|
||||
summaries: list[str] = []
|
||||
for s in evidence.get("relevant_summaries") or []:
|
||||
if isinstance(s, dict) and s.get("id"):
|
||||
summaries.append(str(s["id"]))
|
||||
return chunks, facts, timelines, summaries
|
||||
|
||||
|
||||
def _story_prompt_meta_for_lineage(
|
||||
evidence: dict,
|
||||
*,
|
||||
memoir_correlation_id: str | None,
|
||||
top_k: int,
|
||||
) -> dict:
|
||||
c, f, t, s = _evidence_link_ids(evidence)
|
||||
return {
|
||||
"memoir_retrieval": {
|
||||
"correlation_id": memoir_correlation_id,
|
||||
"top_k": top_k,
|
||||
"chunk_ids": c,
|
||||
"fact_ids": f,
|
||||
"timeline_event_ids": t,
|
||||
"summary_ids": s,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def _persist_story_lineage_sync(
|
||||
session: Session,
|
||||
*,
|
||||
story_id: str,
|
||||
version: StoryVersion,
|
||||
evidence: dict,
|
||||
memoir_correlation_id: str | None,
|
||||
top_k: int,
|
||||
dialogue_lineage: dict | None = None,
|
||||
) -> None:
|
||||
"""写入 StoryEvidenceLink + 本版本 prompt_meta(可审计检索闭包)。"""
|
||||
c, f, t, s = _evidence_link_ids(evidence)
|
||||
replace_story_evidence_links_sync(
|
||||
session,
|
||||
story_id=story_id,
|
||||
chunk_ids=c,
|
||||
fact_ids=f,
|
||||
timeline_event_ids=t,
|
||||
summary_ids=s,
|
||||
)
|
||||
version.prompt_meta = _story_prompt_meta_for_lineage(
|
||||
evidence, memoir_correlation_id=memoir_correlation_id, top_k=top_k
|
||||
)
|
||||
if dialogue_lineage:
|
||||
version.lineage_json = dialogue_lineage
|
||||
|
||||
|
||||
# 标题中若出现下列多字履历表述,则必须在 hay(正文+口述+传入标题的 slots)中逐字出现,否则剔除无果片段或降级占位
|
||||
_MEMOIR_TITLE_HAY_GROUNDING_PHRASES: tuple[str, ...] = (
|
||||
"晋升旅长",
|
||||
@@ -515,6 +613,7 @@ def _ensure_chapter_record(
|
||||
)
|
||||
chapter.is_new = True
|
||||
session.flush()
|
||||
refresh_chapter_evidence_snapshot_with_retry_sync(session, str(chapter.id))
|
||||
return chapter
|
||||
|
||||
|
||||
@@ -526,6 +625,8 @@ def _run_batch_plan_writes(
|
||||
chapter: Chapter,
|
||||
chapter_category: str,
|
||||
evidence_text: str,
|
||||
evidence: dict,
|
||||
evidence_top_k: int,
|
||||
slot_snippets: dict[str, str],
|
||||
user_id: str,
|
||||
user_profile: str,
|
||||
@@ -659,10 +760,23 @@ def _run_batch_plan_writes(
|
||||
)
|
||||
|
||||
if target_story_id:
|
||||
append_story_version_sync(session, str(target_story_id), md)
|
||||
dispatch_ids.add(str(target_story_id))
|
||||
sid_s = str(target_story_id)
|
||||
ver = append_story_version_sync(session, sid_s, md)
|
||||
dlg = _dialogue_lineage_dict_for_segment_ids(
|
||||
category_segments, list(unit.segment_ids)
|
||||
)
|
||||
_persist_story_lineage_sync(
|
||||
session,
|
||||
story_id=sid_s,
|
||||
version=ver,
|
||||
evidence=evidence,
|
||||
memoir_correlation_id=memoir_correlation_id,
|
||||
top_k=evidence_top_k,
|
||||
dialogue_lineage=dlg,
|
||||
)
|
||||
dispatch_ids.add(sid_s)
|
||||
ensure_chapter_story_link_sync(
|
||||
session, chapter_id=str(chapter.id), story_id=str(target_story_id)
|
||||
session, chapter_id=str(chapter.id), story_id=sid_s
|
||||
)
|
||||
sid_log = target_story_id
|
||||
is_append = True
|
||||
@@ -690,6 +804,21 @@ def _run_batch_plan_writes(
|
||||
)
|
||||
sid_log = st.id
|
||||
is_append = False
|
||||
if st.current_version_id:
|
||||
ver0 = session.get(StoryVersion, st.current_version_id)
|
||||
if ver0:
|
||||
dlg = _dialogue_lineage_dict_for_segment_ids(
|
||||
category_segments, list(unit.segment_ids)
|
||||
)
|
||||
_persist_story_lineage_sync(
|
||||
session,
|
||||
story_id=str(st.id),
|
||||
version=ver0,
|
||||
evidence=evidence,
|
||||
memoir_correlation_id=memoir_correlation_id,
|
||||
top_k=evidence_top_k,
|
||||
dialogue_lineage=dlg,
|
||||
)
|
||||
|
||||
elapsed = time.perf_counter() - t0
|
||||
logger.info(
|
||||
@@ -865,6 +994,8 @@ def run_story_pipeline_for_category_batch(
|
||||
chapter=chapter,
|
||||
chapter_category=chapter_category,
|
||||
evidence_text=evidence_text,
|
||||
evidence=evidence,
|
||||
evidence_top_k=top_k,
|
||||
slot_snippets=slot_snippets,
|
||||
user_id=user_id,
|
||||
user_profile=user_profile,
|
||||
@@ -995,11 +1126,26 @@ def run_story_pipeline_for_category_batch(
|
||||
|
||||
do_append = target_story_id is not None
|
||||
|
||||
dlg_single = _dialogue_lineage_dict_for_segment_ids(
|
||||
category_segments,
|
||||
[str(s.id) for s in category_segments],
|
||||
)
|
||||
|
||||
if do_append:
|
||||
append_story_version_sync(session, str(target_story_id), md)
|
||||
dispatch_ids.add(str(target_story_id))
|
||||
sid_s = str(target_story_id)
|
||||
ver = append_story_version_sync(session, sid_s, md)
|
||||
_persist_story_lineage_sync(
|
||||
session,
|
||||
story_id=sid_s,
|
||||
version=ver,
|
||||
evidence=evidence,
|
||||
memoir_correlation_id=memoir_correlation_id,
|
||||
top_k=top_k,
|
||||
dialogue_lineage=dlg_single,
|
||||
)
|
||||
dispatch_ids.add(sid_s)
|
||||
ensure_chapter_story_link_sync(
|
||||
session, chapter_id=str(chapter.id), story_id=str(target_story_id)
|
||||
session, chapter_id=str(chapter.id), story_id=sid_s
|
||||
)
|
||||
sid_log = target_story_id
|
||||
is_append = True
|
||||
@@ -1027,6 +1173,18 @@ def run_story_pipeline_for_category_batch(
|
||||
)
|
||||
sid_log = st.id
|
||||
is_append = False
|
||||
if st.current_version_id:
|
||||
ver0 = session.get(StoryVersion, st.current_version_id)
|
||||
if ver0:
|
||||
_persist_story_lineage_sync(
|
||||
session,
|
||||
story_id=str(st.id),
|
||||
version=ver0,
|
||||
evidence=evidence,
|
||||
memoir_correlation_id=memoir_correlation_id,
|
||||
top_k=top_k,
|
||||
dialogue_lineage=dlg_single,
|
||||
)
|
||||
|
||||
elapsed = time.perf_counter() - t0
|
||||
logger.info(
|
||||
@@ -1055,6 +1213,7 @@ def run_story_pipeline_for_category_batch(
|
||||
reorder_chapter_story_links_by_life_order_sync(session, str(chapter.id))
|
||||
mark_chapter_dirty_sync(session, str(chapter.id))
|
||||
session.flush()
|
||||
refresh_chapter_evidence_snapshot_with_retry_sync(session, str(chapter.id))
|
||||
|
||||
image_settings = MemoirImageSettings.from_env()
|
||||
needs_cover = image_settings.enabled and chapter_needs_cover_enqueue(chapter)
|
||||
|
||||
Reference in New Issue
Block a user