Files
Kevin 309a051038 feat: 回忆录证据血缘与内部评测可追溯,顺带对齐本地评测台与 CI
数据库与模型:新增多版迁移(章节证据快照、对话血缘、记忆事实/时间线 lineage 等),把「成稿 ↔ 对话/记忆」的溯源信息落到表结构里。
业务链路:会话与 WS、回忆录/故事流水线、记忆写入与 enrichment 等跟着接上线索与快照;新增章节证据快照与评测侧 EvalTraceService 等模块,方便组评审用的证据包。
内部评测:自动化 run 与手工 memoir 评审共用可追溯证据;rubric/ judge 相关脚本与文档有配套调整。
app-eval-web:Memoir/实验详情里能展开看证据摘要与 evidence_trace(含对话轮次 id);Vite 代理与 development.sh 注入的 API 端口与当前默认内部评测端口一致,避免改端口后页面连错服务。
工程杂项:GitHub Actions / 仓库说明有更新;各适配器与支付/配额/plan 等多处为小改动或跟随主改动的收尾;新增/扩充了?
2026-04-08 15:37:09 +08:00

110 lines
3.4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import json
import re
from typing import Any
from app.core.json_utils import extract_json_payload
from app.features.memoir.asset_resolver import strip_image_placeholders
from .schema import IMAGE_STATUS_PENDING
PLACEHOLDER_RE = re.compile(
r"\{\{\{\{IMAGE:(.*?)\}\}\}\}|\{\{IMAGE:(.*?)\}\}",
re.DOTALL,
)
def parse_image_placeholders(content: str, max_images: int) -> list[dict[str, Any]]:
"""离线迁移/调试用:解析正文中的 IMAGE 占位符。"""
items: list[dict[str, Any]] = []
for match in PLACEHOLDER_RE.finditer(content or ""):
description = (match.group(1) or match.group(2) or "").strip()
if not description:
continue
items.append(
{
"index": len(items),
"description": description,
"placeholder": match.group(0),
"start_offset": match.start(),
}
)
if max_images is not None and len(items) >= max_images:
break
return items
def build_initial_image_assets(
placeholders: list[dict[str, Any]],
provider: str,
style: str,
size: str,
now_iso: str,
) -> list[dict[str, Any]]:
return [
{
"index": item["index"],
"placeholder": item["placeholder"],
"description": item["description"],
"prompt": None,
"url": None,
"status": IMAGE_STATUS_PENDING,
"provider": provider,
"style": style,
"size": size,
"error": None,
"created_at": now_iso,
"updated_at": now_iso,
}
for item in placeholders
]
def parse_narrative_json(raw: str) -> list[dict[str, Any]]:
"""
解析 LLM 输出的 JSON 叙事paragraphs
不根据 image_description 生成配图占位;插图由 story/chapter 结构化流程单独处理。
"""
if not raw or not str(raw).strip():
return []
try:
payload = extract_json_payload(raw)
data = json.loads(payload)
paragraphs = data.get("paragraphs") or []
if not isinstance(paragraphs, list):
return []
except (json.JSONDecodeError, TypeError, AttributeError):
return []
result: list[dict[str, Any]] = []
for p in paragraphs:
if not isinstance(p, dict):
continue
content = (p.get("content") or "").strip()
if content:
result.append({"content": content, "placeholder_info": None})
return result
def split_plain_narrative_into_sections(narrative: str) -> list[dict[str, Any]]:
"""非 JSON 叙事:去掉遗留占位符后按空行拆段,不产生段落配图。"""
text = strip_image_placeholders(narrative or "")
if not text.strip():
return []
parts = [p.strip() for p in text.split("\n\n") if p.strip()]
return [{"content": p, "placeholder_info": None} for p in parts]
def parse_narrative_to_sections(narrative: str) -> list[dict[str, Any]]:
"""
将 narrative 解析为 sections。
JSONparagraphs走 parse_narrative_json否则剥离占位符后按段拆分。
"""
if not narrative or not str(narrative).strip():
return []
stripped = narrative.strip()
if stripped.startswith("{") and "paragraphs" in stripped:
segments = parse_narrative_json(narrative)
if segments:
return segments
return split_plain_narrative_into_sections(narrative)