feat(eval): memoir A/B chapter judging and eval-web parity with dialogue
- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas.
- Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error.
- MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings.
- app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS.
- Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014.
- Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.
2026-04-10 10:23:43 +08:00
|
|
|
|
"""
|
|
|
|
|
|
回忆录流水线细粒度进度:Redis JSON 快照,以 memoir_correlation_id 为聚合根。
|
|
|
|
|
|
供 Celery worker(同步 Redis)与 internal eval API 读取。
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
|
|
import json
|
|
|
|
|
|
from datetime import datetime, timezone
|
|
|
|
|
|
from typing import Any
|
|
|
|
|
|
|
|
|
|
|
|
import redis
|
|
|
|
|
|
|
|
|
|
|
|
from app.core.logging import get_logger
|
2026-05-22 13:44:50 +08:00
|
|
|
|
from app.core.redis_sync import get_sync_redis
|
|
|
|
|
|
from app.features.memoir.constants import memoir
|
feat(eval): memoir A/B chapter judging and eval-web parity with dialogue
- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas.
- Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error.
- MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings.
- app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS.
- Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014.
- Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.
2026-04-10 10:23:43 +08:00
|
|
|
|
|
|
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _redis() -> redis.Redis:
|
2026-05-22 13:44:50 +08:00
|
|
|
|
return get_sync_redis(decode_responses=True)
|
feat(eval): memoir A/B chapter judging and eval-web parity with dialogue
- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas.
- Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error.
- MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings.
- app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS.
- Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014.
- Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.
2026-04-10 10:23:43 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _run_key(correlation_id: str) -> str:
|
|
|
|
|
|
return f"memoir_pipeline_run:{correlation_id}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _phase1_index_key(phase1_task_id: str) -> str:
|
|
|
|
|
|
return f"memoir_pipeline_run:by_phase1_task:{phase1_task_id}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _ttl() -> int:
|
2026-05-22 13:44:50 +08:00
|
|
|
|
return int(memoir.pipeline_run_ttl_seconds)
|
feat(eval): memoir A/B chapter judging and eval-web parity with dialogue
- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas.
- Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error.
- MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings.
- app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS.
- Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014.
- Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.
2026-04-10 10:23:43 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _empty_fanout() -> dict[str, Any]:
|
|
|
|
|
|
return {
|
|
|
|
|
|
"story_images": [],
|
|
|
|
|
|
"recompose_chapters": [],
|
|
|
|
|
|
"memory_enrichment": [],
|
|
|
|
|
|
"quality_pass": None,
|
|
|
|
|
|
"compaction": None,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _default_doc(correlation_id: str) -> dict[str, Any]:
|
|
|
|
|
|
return {
|
|
|
|
|
|
"memoir_correlation_id": correlation_id,
|
|
|
|
|
|
"user_id": None,
|
|
|
|
|
|
"started_at_utc": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
|
|
|
|
|
|
"phase1": None,
|
|
|
|
|
|
"phase2": [],
|
|
|
|
|
|
"fanout": _empty_fanout(),
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _merge_phase2_list(
|
|
|
|
|
|
existing: list[dict[str, Any]], updates: list[dict[str, Any]]
|
|
|
|
|
|
) -> list[dict[str, Any]]:
|
|
|
|
|
|
by_tid: dict[str, dict[str, Any]] = {}
|
|
|
|
|
|
for x in existing:
|
|
|
|
|
|
tid = str(x.get("task_id") or "").strip()
|
|
|
|
|
|
if tid:
|
|
|
|
|
|
by_tid[tid] = dict(x)
|
|
|
|
|
|
for u in updates:
|
|
|
|
|
|
tid = str(u.get("task_id") or "").strip()
|
|
|
|
|
|
if not tid:
|
|
|
|
|
|
continue
|
|
|
|
|
|
if tid in by_tid:
|
|
|
|
|
|
merged = {**by_tid[tid], **u}
|
|
|
|
|
|
by_tid[tid] = merged
|
|
|
|
|
|
else:
|
|
|
|
|
|
by_tid[tid] = dict(u)
|
|
|
|
|
|
return list(by_tid.values())
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-04-10 20:35:57 +08:00
|
|
|
|
def _fanout_list_merge_key(
|
|
|
|
|
|
items: list[dict], patch_items: list[dict], id_key: str
|
|
|
|
|
|
) -> None:
|
feat(eval): memoir A/B chapter judging and eval-web parity with dialogue
- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas.
- Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error.
- MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings.
- app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS.
- Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014.
- Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.
2026-04-10 10:23:43 +08:00
|
|
|
|
by_id: dict[str, dict[str, Any]] = {}
|
|
|
|
|
|
for x in items:
|
|
|
|
|
|
k = str(x.get(id_key) or "").strip()
|
|
|
|
|
|
if k:
|
|
|
|
|
|
by_id[k] = dict(x)
|
|
|
|
|
|
for u in patch_items:
|
|
|
|
|
|
k = str(u.get(id_key) or "").strip()
|
|
|
|
|
|
if not k:
|
|
|
|
|
|
continue
|
|
|
|
|
|
if k in by_id:
|
|
|
|
|
|
by_id[k] = {**by_id[k], **u}
|
|
|
|
|
|
else:
|
|
|
|
|
|
by_id[k] = dict(u)
|
|
|
|
|
|
items.clear()
|
|
|
|
|
|
items.extend(by_id.values())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _merge_fanout(base: dict[str, Any], patch: dict[str, Any]) -> dict[str, Any]:
|
|
|
|
|
|
out = dict(base)
|
|
|
|
|
|
for k, v in patch.items():
|
2026-04-10 20:35:57 +08:00
|
|
|
|
if k in (
|
|
|
|
|
|
"story_images",
|
|
|
|
|
|
"recompose_chapters",
|
|
|
|
|
|
"memory_enrichment",
|
|
|
|
|
|
) and isinstance(v, list):
|
feat(eval): memoir A/B chapter judging and eval-web parity with dialogue
- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas.
- Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error.
- MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings.
- app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS.
- Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014.
- Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.
2026-04-10 10:23:43 +08:00
|
|
|
|
id_key = (
|
|
|
|
|
|
"story_id"
|
|
|
|
|
|
if k == "story_images"
|
|
|
|
|
|
else "chapter_id"
|
|
|
|
|
|
if k == "recompose_chapters"
|
|
|
|
|
|
else "source_id"
|
|
|
|
|
|
)
|
|
|
|
|
|
existing = list(out.get(k) or [])
|
|
|
|
|
|
_fanout_list_merge_key(existing, v, id_key)
|
|
|
|
|
|
out[k] = existing
|
|
|
|
|
|
elif k == "quality_pass" and isinstance(v, dict):
|
|
|
|
|
|
out[k] = {**(out.get(k) or {}), **v} if out.get(k) else dict(v)
|
|
|
|
|
|
elif k == "compaction" and isinstance(v, dict):
|
|
|
|
|
|
out[k] = {**(out.get(k) or {}), **v} if out.get(k) else dict(v)
|
|
|
|
|
|
else:
|
|
|
|
|
|
out[k] = v
|
|
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _merge_doc(base: dict[str, Any], patch: dict[str, Any]) -> dict[str, Any]:
|
|
|
|
|
|
out = dict(base)
|
|
|
|
|
|
for k, v in patch.items():
|
|
|
|
|
|
if k == "phase2" and isinstance(v, list):
|
|
|
|
|
|
out["phase2"] = _merge_phase2_list(list(out.get("phase2") or []), v)
|
|
|
|
|
|
elif k == "fanout" and isinstance(v, dict):
|
2026-04-10 20:35:57 +08:00
|
|
|
|
out["fanout"] = _merge_fanout(dict(out.get("fanout") or _empty_fanout()), v)
|
feat(eval): memoir A/B chapter judging and eval-web parity with dialogue
- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas.
- Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error.
- MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings.
- app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS.
- Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014.
- Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.
2026-04-10 10:23:43 +08:00
|
|
|
|
elif k == "phase1" and isinstance(v, dict):
|
|
|
|
|
|
cur = dict(out.get("phase1") or {})
|
|
|
|
|
|
for pk, pv in v.items():
|
2026-04-10 20:35:57 +08:00
|
|
|
|
if (
|
|
|
|
|
|
pk == "detail"
|
|
|
|
|
|
and isinstance(pv, dict)
|
|
|
|
|
|
and isinstance(cur.get("detail"), dict)
|
feat(eval): memoir A/B chapter judging and eval-web parity with dialogue
- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas.
- Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error.
- MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings.
- app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS.
- Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014.
- Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.
2026-04-10 10:23:43 +08:00
|
|
|
|
):
|
|
|
|
|
|
cur["detail"] = {**cur["detail"], **pv}
|
|
|
|
|
|
else:
|
|
|
|
|
|
cur[pk] = pv
|
|
|
|
|
|
out["phase1"] = cur
|
|
|
|
|
|
elif isinstance(v, dict) and isinstance(out.get(k), dict):
|
|
|
|
|
|
out[k] = {**out[k], **v}
|
|
|
|
|
|
else:
|
|
|
|
|
|
out[k] = v
|
|
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def merge_pipeline_run(correlation_id: str, patch: dict[str, Any]) -> None:
|
|
|
|
|
|
"""合并补丁到流水线快照(不存在则创建最小文档)。"""
|
|
|
|
|
|
cid = (correlation_id or "").strip()
|
|
|
|
|
|
if not cid:
|
|
|
|
|
|
return
|
|
|
|
|
|
try:
|
|
|
|
|
|
r = _redis()
|
|
|
|
|
|
key = _run_key(cid)
|
|
|
|
|
|
raw = r.get(key)
|
|
|
|
|
|
if raw:
|
|
|
|
|
|
doc = json.loads(raw)
|
|
|
|
|
|
else:
|
|
|
|
|
|
doc = _default_doc(cid)
|
|
|
|
|
|
doc = _merge_doc(doc, patch)
|
|
|
|
|
|
r.setex(key, _ttl(), json.dumps(doc, ensure_ascii=False))
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.warning(
|
|
|
|
|
|
"memoir_pipeline_progress merge failed correlation_id={} err={}",
|
|
|
|
|
|
cid,
|
|
|
|
|
|
e,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def init_pipeline_run_from_phase1(
|
|
|
|
|
|
user_id: str,
|
|
|
|
|
|
correlation_id: str,
|
|
|
|
|
|
phase1_task_id: str,
|
|
|
|
|
|
*,
|
|
|
|
|
|
segment_count: int,
|
|
|
|
|
|
) -> None:
|
|
|
|
|
|
cid = (correlation_id or "").strip()
|
|
|
|
|
|
uid = (user_id or "").strip()
|
|
|
|
|
|
tid = (phase1_task_id or "").strip()
|
|
|
|
|
|
if not cid or not uid or not tid:
|
|
|
|
|
|
return
|
|
|
|
|
|
try:
|
|
|
|
|
|
r = _redis()
|
|
|
|
|
|
now = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
|
|
|
|
|
|
doc = {
|
|
|
|
|
|
"memoir_correlation_id": cid,
|
|
|
|
|
|
"user_id": uid,
|
|
|
|
|
|
"started_at_utc": now,
|
|
|
|
|
|
"phase1": {
|
|
|
|
|
|
"task_id": tid,
|
|
|
|
|
|
"status": "running",
|
|
|
|
|
|
"step": "started",
|
|
|
|
|
|
"detail": {"segment_count": int(segment_count)},
|
|
|
|
|
|
},
|
|
|
|
|
|
"phase2": [],
|
|
|
|
|
|
"fanout": _empty_fanout(),
|
|
|
|
|
|
}
|
|
|
|
|
|
ttl = _ttl()
|
|
|
|
|
|
r.setex(_run_key(cid), ttl, json.dumps(doc, ensure_ascii=False))
|
|
|
|
|
|
r.setex(_phase1_index_key(tid), ttl, cid)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.warning(
|
|
|
|
|
|
"memoir_pipeline_progress init failed correlation_id={} err={}",
|
|
|
|
|
|
cid,
|
|
|
|
|
|
e,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_pipeline_run_snapshot(correlation_id: str) -> dict[str, Any] | None:
|
|
|
|
|
|
cid = (correlation_id or "").strip()
|
|
|
|
|
|
if not cid:
|
|
|
|
|
|
return None
|
|
|
|
|
|
try:
|
|
|
|
|
|
raw = _redis().get(_run_key(cid))
|
|
|
|
|
|
if not raw:
|
|
|
|
|
|
return None
|
|
|
|
|
|
return json.loads(raw)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.warning(
|
|
|
|
|
|
"memoir_pipeline_progress get failed correlation_id={} err={}",
|
|
|
|
|
|
cid,
|
|
|
|
|
|
e,
|
|
|
|
|
|
)
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def resolve_correlation_id_for_phase1_task(phase1_task_id: str) -> str | None:
|
|
|
|
|
|
tid = (phase1_task_id or "").strip()
|
|
|
|
|
|
if not tid:
|
|
|
|
|
|
return None
|
|
|
|
|
|
try:
|
|
|
|
|
|
cid = _redis().get(_phase1_index_key(tid))
|
|
|
|
|
|
return (cid or "").strip() or None
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.warning(
|
|
|
|
|
|
"memoir_pipeline_progress resolve phase1_task={} err={}",
|
|
|
|
|
|
tid,
|
|
|
|
|
|
e,
|
|
|
|
|
|
)
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_pipeline_run_for_eval(
|
|
|
|
|
|
user_id: str,
|
|
|
|
|
|
*,
|
|
|
|
|
|
memoir_correlation_id: str | None = None,
|
|
|
|
|
|
phase1_task_id: str | None = None,
|
|
|
|
|
|
) -> dict[str, Any] | None:
|
|
|
|
|
|
"""Internal eval:校验 user_id 与快照一致后返回。"""
|
|
|
|
|
|
uid = (user_id or "").strip()
|
|
|
|
|
|
if not uid:
|
|
|
|
|
|
return None
|
|
|
|
|
|
cid = (memoir_correlation_id or "").strip()
|
|
|
|
|
|
if not cid and phase1_task_id:
|
|
|
|
|
|
cid = resolve_correlation_id_for_phase1_task(phase1_task_id) or ""
|
|
|
|
|
|
if not cid:
|
|
|
|
|
|
return None
|
|
|
|
|
|
snap = get_pipeline_run_snapshot(cid)
|
|
|
|
|
|
if not snap:
|
|
|
|
|
|
return None
|
|
|
|
|
|
if str(snap.get("user_id") or "").strip() != uid:
|
|
|
|
|
|
return None
|
|
|
|
|
|
return snap
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def merge_fanout_item(
|
|
|
|
|
|
correlation_id: str | None,
|
|
|
|
|
|
*,
|
|
|
|
|
|
list_name: str,
|
|
|
|
|
|
id_field: str,
|
|
|
|
|
|
item_id: str,
|
|
|
|
|
|
task_id: str,
|
|
|
|
|
|
status: str,
|
|
|
|
|
|
extra: dict[str, Any] | None = None,
|
|
|
|
|
|
) -> None:
|
|
|
|
|
|
cid = (correlation_id or "").strip()
|
|
|
|
|
|
if not cid:
|
|
|
|
|
|
return
|
|
|
|
|
|
item: dict[str, Any] = {
|
|
|
|
|
|
id_field: item_id,
|
|
|
|
|
|
"task_id": task_id,
|
|
|
|
|
|
"status": status,
|
|
|
|
|
|
}
|
|
|
|
|
|
if extra:
|
|
|
|
|
|
item.update(extra)
|
|
|
|
|
|
merge_pipeline_run(cid, {"fanout": {list_name: [item]}})
|