feat(eval): memoir A/B chapter judging and eval-web parity with dialogue

- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas.

- Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error.

- MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings.

- app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS.

- Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014.

- Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.
This commit is contained in:
Kevin
2026-04-10 10:23:43 +08:00
parent b0251e5b26
commit ac49bc7f23
59 changed files with 4773 additions and 696 deletions

View File

@@ -20,10 +20,10 @@ from app.core.config import settings
from app.core.db import get_sync_db
from app.core.dependencies import get_llm_provider
from app.core.logging import get_logger
from app.core.memoir_pipeline_progress import merge_pipeline_run
from app.features.memoir.models import Chapter
from app.features.memoir.repo import mark_chapter_dirty_sync
from app.features.story.models import Story
from app.features.story.sync_write import append_story_version_sync
logger = get_logger(__name__)
@@ -43,7 +43,6 @@ def _polish_story_title(
chapter_category: str,
) -> bool:
"""Re-generate title if current title is a placeholder. Returns True if updated."""
from app.agents.stage_constants import CHAPTER_CATEGORIES
from app.features.memoir.story_pipeline_sync import _placeholder_title
current = (story.title or "").strip()
@@ -85,23 +84,54 @@ def memoir_quality_pass(
Post-draft quality pass: polish titles, recheck fidelity on flagged stories.
Runs asynchronously after the fast draft is committed and visible.
"""
qptid = str(self.request.id)
if not settings.memoir_quality_pass_enabled:
if memoir_correlation_id:
merge_pipeline_run(
memoir_correlation_id,
{
"fanout": {
"quality_pass": {"task_id": qptid, "status": "disabled"},
},
},
)
return {"status": "disabled"}
t0 = time.perf_counter()
logger.info(
"event=quality_pass_start user_id={} stories={} chapters={} "
"memoir_correlation_id={}",
"memoir_correlation_id={} msg=成稿质量巡检开始",
user_id,
len(story_ids),
len(chapter_ids),
memoir_correlation_id or "",
)
if memoir_correlation_id:
merge_pipeline_run(
memoir_correlation_id,
{
"fanout": {
"quality_pass": {"task_id": qptid, "status": "running"},
},
},
)
try:
llm = _get_llm()
if not llm:
logger.warning("event=quality_pass_no_llm user_id={}", user_id)
if memoir_correlation_id:
merge_pipeline_run(
memoir_correlation_id,
{
"fanout": {
"quality_pass": {
"task_id": qptid,
"status": "no_llm",
},
},
},
)
return {"status": "no_llm"}
titles_polished = 0
@@ -137,13 +167,15 @@ def memoir_quality_pass(
db.commit()
elapsed = time.perf_counter() - t0
duration_ms = elapsed * 1000
logger.info(
"event=quality_pass_done user_id={} titles_polished={} "
"chapters_dirtied={} seconds={:.3f} memoir_correlation_id={}",
"chapters_dirtied={} duration_ms={:.1f} memoir_correlation_id={} "
"msg=成稿质量巡检完成",
user_id,
titles_polished,
len(chapters_dirtied),
elapsed,
duration_ms,
memoir_correlation_id or "",
)
@@ -154,7 +186,12 @@ def memoir_quality_pass(
for ch_id in sorted(chapters_dirtied):
try:
recompose_chapter_task.apply_async(args=[ch_id], countdown=2)
rckw: dict = {}
if memoir_correlation_id:
rckw["memoir_correlation_id"] = memoir_correlation_id
recompose_chapter_task.apply_async(
args=[ch_id], kwargs=rckw, countdown=2
)
except Exception as exc:
logger.warning(
"quality_pass recompose enqueue failed chapter={}: {}",
@@ -162,6 +199,22 @@ def memoir_quality_pass(
exc,
)
if memoir_correlation_id:
merge_pipeline_run(
memoir_correlation_id,
{
"fanout": {
"quality_pass": {
"task_id": qptid,
"status": "success",
"detail": {
"titles_polished": titles_polished,
"chapters_dirtied": len(chapters_dirtied),
},
},
},
},
)
return {
"status": "success",
"titles_polished": titles_polished,
@@ -174,4 +227,17 @@ def memoir_quality_pass(
logger.error(
"event=quality_pass_failed user_id={} exc={}", user_id, e
)
if memoir_correlation_id:
merge_pipeline_run(
memoir_correlation_id,
{
"fanout": {
"quality_pass": {
"task_id": qptid,
"status": "failure",
"detail": {"error": str(e)},
},
},
},
)
raise self.retry(exc=e) from e