feat(api): 拆分章节物化与 Story 后处理,并加固 Redis 锁与腾讯 ASR
回忆录 Story 流水线(同步) - 同步路径仅写入 Story 与章节关联,改为 mark_chapter_dirty_sync,不再内联 compose - 物化由 Celery recompose_chapter 异步完成;compose 不变量与异常时保留 dirty 的语义在 repo 中补充说明 - Evidence:大批次时降低 top_k;路由候选 story 携带 char_count/version_count;append 超长/版本过多时强制新开 story - 叙事 prompt:relevant_chunks 去重,减少重复证据噪声 - 叙事回退与忠实度 gate:返回 fallback 类型并记录结构化日志(含耗时、JSON 有效性等) Post-commit 与任务编排 - 新增 post_commit.enqueue_story_post_commit_effects:统一派发 generate_story_image(Redis 去重)、延迟 recompose_chapter、可选 memory compaction - memoir_tasks / story_service / story_image_tasks 改为调用 post-commit 入口;主图回填后按关联章节重算并调度物化与 compacs(锁委托、Redis 单例、ASR to_thread) - 更新 test_narrative_pipeline 以适配 _apply_narrative_fallbacks 返回值
This commit is contained in:
@@ -1,10 +1,13 @@
|
||||
"""
|
||||
Celery 用:按批次将 transcript 写入 Story,并物化 Chapter canonical_markdown。
|
||||
Celery 用:按批次将 transcript 写入 Story,并标记 Chapter 需物化(markdown_compose_dirty)。
|
||||
|
||||
同步路径不执行 compose;物化由 commit 后 `recompose_chapter` 异步完成。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import time
|
||||
import uuid
|
||||
from typing import Any
|
||||
|
||||
@@ -17,26 +20,27 @@ from app.agents.memoir.prompts import (
|
||||
format_evidence_chunks_for_prompt,
|
||||
format_narrative_user_content,
|
||||
)
|
||||
from app.core.config import settings
|
||||
from app.agents.memoir.story_route_agent import (
|
||||
PLAN_BATCH_MAX_SEGMENTS,
|
||||
StoryBatchPlan,
|
||||
StoryRouteAgent,
|
||||
)
|
||||
from app.agents.state_schema import MemoirStateSchema
|
||||
from app.core.config import settings
|
||||
from app.core.logging import get_logger
|
||||
from app.features.memoir.cover_eligibility import chapter_needs_cover_enqueue
|
||||
from app.features.memoir.memoir_images.settings import MemoirImageSettings
|
||||
from app.features.memoir.models import Chapter
|
||||
from app.features.memoir.narrative_to_markdown import narrative_to_markdown
|
||||
from app.features.memoir.repo import (
|
||||
compose_chapter_from_story_links_sync,
|
||||
mark_chapter_dirty_sync,
|
||||
reorder_chapter_story_links_by_life_order_sync,
|
||||
)
|
||||
from app.features.memory.repo import retrieve_evidence_sync
|
||||
from app.features.story.models import Story
|
||||
from app.features.story.sync_write import (
|
||||
append_story_version_sync,
|
||||
count_story_versions_sync,
|
||||
create_story_with_version_sync,
|
||||
ensure_chapter_story_link_sync,
|
||||
list_active_stories_for_user_sync,
|
||||
@@ -71,12 +75,12 @@ def _gate_narrative_fidelity(
|
||||
llm: Any,
|
||||
*,
|
||||
existing_canonical: str | None = None,
|
||||
) -> str:
|
||||
"""叙事 JSON 忠实度检查;不通过则回退为口述正文(续写时保留已有故事 + 口述)。"""
|
||||
) -> tuple[str, str]:
|
||||
"""返回 (文本, fallback 原因);忠实度不通过时第二项为 fidelity_failed。"""
|
||||
from app.agents.memoir.fidelity_check_agent import FidelityCheckAgent
|
||||
|
||||
if not settings.memoir_fidelity_check_enabled or not llm:
|
||||
return narrative_raw
|
||||
return narrative_raw, "none"
|
||||
agent = FidelityCheckAgent()
|
||||
ex = (existing_canonical or "").strip() or None
|
||||
if agent.passes(
|
||||
@@ -85,7 +89,7 @@ def _gate_narrative_fidelity(
|
||||
llm=llm,
|
||||
existing_canonical_markdown=ex,
|
||||
):
|
||||
return narrative_raw
|
||||
return narrative_raw, "none"
|
||||
logger.warning(
|
||||
"event=fidelity_gate_fallback oral_len={} merge={}",
|
||||
len((oral_text or "").strip()),
|
||||
@@ -93,8 +97,8 @@ def _gate_narrative_fidelity(
|
||||
)
|
||||
o = (oral_text or "").strip()
|
||||
if not o and not ex:
|
||||
return narrative_raw
|
||||
return _fidelity_fallback_json(o, ex)
|
||||
return narrative_raw, "none"
|
||||
return _fidelity_fallback_json(o, ex), "fidelity_failed"
|
||||
|
||||
|
||||
def _should_fallback_to_transcript(md: str, oral: str) -> bool:
|
||||
@@ -155,7 +159,8 @@ def _apply_narrative_fallbacks(
|
||||
existing_for_narrative: str,
|
||||
*,
|
||||
chapter_category: str,
|
||||
) -> str:
|
||||
) -> tuple[str, str]:
|
||||
"""返回 (文本, fallback_type);无改写时为 none。"""
|
||||
# 整篇合并(JSON)输出异常缩水:回退为旧文 + 本段口述,避免覆盖丢失
|
||||
if existing_for_narrative and _is_json_narrative(narrative_raw):
|
||||
merged_md = narrative_to_markdown(narrative_raw).strip()
|
||||
@@ -166,7 +171,7 @@ def _apply_narrative_fallbacks(
|
||||
"chapter_category={}",
|
||||
chapter_category,
|
||||
)
|
||||
return f"{ex}\n\n{combined_unit_text.strip()}"
|
||||
return f"{ex}\n\n{combined_unit_text.strip()}", "merge_shrink"
|
||||
|
||||
if (
|
||||
existing_for_narrative
|
||||
@@ -178,10 +183,10 @@ def _apply_narrative_fallbacks(
|
||||
"chapter_category={}",
|
||||
chapter_category,
|
||||
)
|
||||
return f"{existing_for_narrative}\n\n{combined_unit_text}"
|
||||
|
||||
# 禁止把「章节级 canonical」(多故事拼接)写进单条 Story:会把全章正文塞进一个故事,
|
||||
# 且该 story 若挂多章会导致各章阅读视图串台。新建故事时宁可短,也不拼接 existing_chapter_md。
|
||||
return (
|
||||
f"{existing_for_narrative}\n\n{combined_unit_text}",
|
||||
"coalesce_to_old_plus_oral",
|
||||
)
|
||||
|
||||
md_check = narrative_to_markdown(narrative_raw).strip()
|
||||
oral = (combined_unit_text or "").strip()
|
||||
@@ -195,7 +200,7 @@ def _apply_narrative_fallbacks(
|
||||
len(oral),
|
||||
len(md_check),
|
||||
)
|
||||
return f"{ex_fb}\n\n{oral}"
|
||||
return f"{ex_fb}\n\n{oral}", "coalesce_to_old_plus_oral"
|
||||
logger.warning(
|
||||
"event=narrative_fallback reason=body_too_short_vs_oral "
|
||||
"chapter_category={} oral_len={} md_len={}",
|
||||
@@ -203,9 +208,28 @@ def _apply_narrative_fallbacks(
|
||||
len(oral),
|
||||
len(md_check),
|
||||
)
|
||||
return oral
|
||||
return oral, "coalesce_to_oral"
|
||||
|
||||
return narrative_raw
|
||||
return narrative_raw, "none"
|
||||
|
||||
|
||||
def _merge_fallback_type(gate_ft: str, apply_ft: str) -> str:
|
||||
if apply_ft != "none":
|
||||
return apply_ft
|
||||
return gate_ft
|
||||
|
||||
|
||||
def _story_meta_for_route(
|
||||
session: Session, candidates: list
|
||||
) -> dict[str, dict[str, int]]:
|
||||
meta: dict[str, dict[str, int]] = {}
|
||||
for s in candidates:
|
||||
sid = str(s.id)
|
||||
meta[sid] = {
|
||||
"char_count": len((s.canonical_markdown or "").strip()),
|
||||
"version_count": count_story_versions_sync(session, sid),
|
||||
}
|
||||
return meta
|
||||
|
||||
|
||||
def _ensure_chapter_record(
|
||||
@@ -268,19 +292,37 @@ def _run_batch_plan_writes(
|
||||
narrative_agent: NarrativeAgent,
|
||||
) -> set[str]:
|
||||
dispatch_ids: set[str] = set()
|
||||
max_chars = int(settings.story_append_max_canonical_chars)
|
||||
max_ver = int(settings.story_append_max_versions)
|
||||
for unit in plan.units:
|
||||
t0 = time.perf_counter()
|
||||
unit_text = _ordered_text_for_segment_ids(category_segments, unit.segment_ids)
|
||||
new_content_input = format_narrative_user_content(unit_text, evidence_text)
|
||||
|
||||
target_story_id: str | None = None
|
||||
existing_for_narrative = ""
|
||||
decision_source = "batch_plan"
|
||||
if unit.decision == "append_story" and unit.target_story_id:
|
||||
st = session.get(Story, unit.target_story_id)
|
||||
if st and st.user_id == user_id:
|
||||
target_story_id = st.id
|
||||
existing_for_narrative = (st.canonical_markdown or "").strip()
|
||||
canon = (st.canonical_markdown or "").strip()
|
||||
vc = count_story_versions_sync(session, str(st.id))
|
||||
if len(canon) > max_chars or vc >= max_ver:
|
||||
logger.info(
|
||||
"event=append_overflow_to_new story_id={} canonical_chars={} "
|
||||
"versions={} decision_source=batch_plan",
|
||||
str(st.id),
|
||||
len(canon),
|
||||
vc,
|
||||
)
|
||||
target_story_id = None
|
||||
existing_for_narrative = ""
|
||||
decision_source = "forced_new_due_to_append_limit"
|
||||
else:
|
||||
target_story_id = st.id
|
||||
existing_for_narrative = canon
|
||||
|
||||
narrative_raw = narrative_agent.generate_narrative(
|
||||
raw_gen = narrative_agent.generate_narrative(
|
||||
stage=chapter_category,
|
||||
slots=slot_snippets,
|
||||
new_content=new_content_input,
|
||||
@@ -289,18 +331,29 @@ def _run_batch_plan_writes(
|
||||
birth_year=user_birth_year,
|
||||
llm=llm,
|
||||
)
|
||||
narrative_raw = _gate_narrative_fidelity(
|
||||
json_invalid = False
|
||||
s0 = (raw_gen or "").strip()
|
||||
if s0.startswith("{") and "paragraphs" in s0:
|
||||
try:
|
||||
json.loads(s0)
|
||||
except json.JSONDecodeError:
|
||||
json_invalid = True
|
||||
|
||||
narrative_raw, fb_gate = _gate_narrative_fidelity(
|
||||
unit_text,
|
||||
narrative_raw,
|
||||
raw_gen,
|
||||
llm,
|
||||
existing_canonical=existing_for_narrative or None,
|
||||
)
|
||||
narrative_raw = _apply_narrative_fallbacks(
|
||||
narrative_raw, fb_apply = _apply_narrative_fallbacks(
|
||||
narrative_raw,
|
||||
unit_text,
|
||||
existing_for_narrative,
|
||||
chapter_category=chapter_category,
|
||||
)
|
||||
fallback_type = _merge_fallback_type(fb_gate, fb_apply)
|
||||
if json_invalid and fallback_type == "none":
|
||||
fallback_type = "json_invalid"
|
||||
|
||||
md = _coalesce_story_markdown(
|
||||
narrative_to_markdown(narrative_raw).strip(),
|
||||
@@ -309,11 +362,13 @@ def _run_batch_plan_writes(
|
||||
)
|
||||
|
||||
if target_story_id:
|
||||
append_story_version_sync(session, target_story_id, md)
|
||||
dispatch_ids.add(target_story_id)
|
||||
append_story_version_sync(session, str(target_story_id), md)
|
||||
dispatch_ids.add(str(target_story_id))
|
||||
ensure_chapter_story_link_sync(
|
||||
session, chapter_id=chapter.id, story_id=target_story_id
|
||||
session, chapter_id=str(chapter.id), story_id=str(target_story_id)
|
||||
)
|
||||
sid_log = target_story_id
|
||||
is_append = True
|
||||
else:
|
||||
story_title = (unit.new_story_title or "").strip()
|
||||
if not story_title:
|
||||
@@ -332,10 +387,33 @@ def _run_batch_plan_writes(
|
||||
canonical_markdown=md,
|
||||
stage=chapter_category,
|
||||
)
|
||||
dispatch_ids.add(st.id)
|
||||
dispatch_ids.add(str(st.id))
|
||||
ensure_chapter_story_link_sync(
|
||||
session, chapter_id=chapter.id, story_id=st.id
|
||||
session, chapter_id=str(chapter.id), story_id=str(st.id)
|
||||
)
|
||||
sid_log = st.id
|
||||
is_append = False
|
||||
|
||||
elapsed = time.perf_counter() - t0
|
||||
logger.info(
|
||||
"event=story_generated route_type=batch decision_source={} route_decision={} "
|
||||
"unit_segments={} used_evidence={} narrative_json_valid={} fidelity_passed={} "
|
||||
"fallback_type={} oral_len={} md_len={} chapter_category={} is_append={} "
|
||||
"story_id={} seconds={:.3f}",
|
||||
decision_source,
|
||||
unit.decision,
|
||||
len(unit.segment_ids),
|
||||
bool(evidence_text.strip()),
|
||||
_is_json_narrative(raw_gen),
|
||||
fb_gate == "none",
|
||||
fallback_type,
|
||||
len(unit_text.strip()),
|
||||
len(md.strip()),
|
||||
chapter_category,
|
||||
is_append,
|
||||
sid_log,
|
||||
elapsed,
|
||||
)
|
||||
return dispatch_ids
|
||||
|
||||
|
||||
@@ -361,8 +439,12 @@ def run_story_pipeline_for_category_batch(
|
||||
combined_text = "\n\n".join(segment_texts)
|
||||
source_ids = [seg.id for seg in category_segments]
|
||||
|
||||
n_units = len(category_segments)
|
||||
top_k = int(settings.evidence_top_k_default)
|
||||
if n_units > int(settings.evidence_large_batch_threshold):
|
||||
top_k = int(settings.evidence_top_k_large_batch)
|
||||
try:
|
||||
evidence = retrieve_evidence_sync(session, user_id, combined_text, top_k=10)
|
||||
evidence = retrieve_evidence_sync(session, user_id, combined_text, top_k=top_k)
|
||||
except Exception as e:
|
||||
logger.warning("Evidence 检索跳过: {}", e)
|
||||
evidence = {
|
||||
@@ -412,7 +494,8 @@ def run_story_pipeline_for_category_batch(
|
||||
)
|
||||
|
||||
candidates = list_active_stories_for_user_sync(session, user_id)
|
||||
valid_ids = {s.id for s in candidates}
|
||||
valid_ids = {str(s.id) for s in candidates}
|
||||
story_meta = _story_meta_for_route(session, candidates)
|
||||
|
||||
batch_for_route = (
|
||||
f"{combined_text}\n\n{evidence_text}"
|
||||
@@ -437,6 +520,7 @@ def run_story_pipeline_for_category_batch(
|
||||
candidate_stories=candidates,
|
||||
llm=llm,
|
||||
valid_story_ids=valid_ids,
|
||||
story_meta=story_meta,
|
||||
)
|
||||
|
||||
chapter = _ensure_chapter_record(
|
||||
@@ -471,17 +555,36 @@ def run_story_pipeline_for_category_batch(
|
||||
candidate_stories=candidates,
|
||||
llm=llm,
|
||||
valid_story_ids=valid_ids,
|
||||
story_meta=story_meta,
|
||||
)
|
||||
|
||||
t0 = time.perf_counter()
|
||||
target_story_id: str | None = None
|
||||
existing_for_narrative = ""
|
||||
decision_source = "fallback_no_llm" if not llm else "single_decide"
|
||||
max_chars = int(settings.story_append_max_canonical_chars)
|
||||
max_ver = int(settings.story_append_max_versions)
|
||||
if route.decision == "append_story" and route.target_story_id:
|
||||
st = session.get(Story, route.target_story_id)
|
||||
if st and st.user_id == user_id:
|
||||
target_story_id = st.id
|
||||
existing_for_narrative = (st.canonical_markdown or "").strip()
|
||||
canon = (st.canonical_markdown or "").strip()
|
||||
vc = count_story_versions_sync(session, str(st.id))
|
||||
if len(canon) > max_chars or vc >= max_ver:
|
||||
logger.info(
|
||||
"event=append_overflow_to_new story_id={} canonical_chars={} "
|
||||
"versions={} decision_source=single_decide",
|
||||
str(st.id),
|
||||
len(canon),
|
||||
vc,
|
||||
)
|
||||
target_story_id = None
|
||||
existing_for_narrative = ""
|
||||
decision_source = "forced_new_due_to_append_limit"
|
||||
else:
|
||||
target_story_id = st.id
|
||||
existing_for_narrative = canon
|
||||
|
||||
narrative_raw = narrative_agent.generate_narrative(
|
||||
raw_gen = narrative_agent.generate_narrative(
|
||||
stage=chapter_category,
|
||||
slots=slot_snippets,
|
||||
new_content=new_content_input,
|
||||
@@ -490,19 +593,30 @@ def run_story_pipeline_for_category_batch(
|
||||
birth_year=user_birth_year,
|
||||
llm=llm,
|
||||
)
|
||||
narrative_raw = _gate_narrative_fidelity(
|
||||
json_invalid = False
|
||||
s0 = (raw_gen or "").strip()
|
||||
if s0.startswith("{") and "paragraphs" in s0:
|
||||
try:
|
||||
json.loads(s0)
|
||||
except json.JSONDecodeError:
|
||||
json_invalid = True
|
||||
|
||||
narrative_raw, fb_gate = _gate_narrative_fidelity(
|
||||
combined_text,
|
||||
narrative_raw,
|
||||
raw_gen,
|
||||
llm,
|
||||
existing_canonical=existing_for_narrative or None,
|
||||
)
|
||||
|
||||
narrative_raw = _apply_narrative_fallbacks(
|
||||
narrative_raw, fb_apply = _apply_narrative_fallbacks(
|
||||
narrative_raw,
|
||||
combined_text,
|
||||
existing_for_narrative,
|
||||
chapter_category=chapter_category,
|
||||
)
|
||||
fallback_type = _merge_fallback_type(fb_gate, fb_apply)
|
||||
if json_invalid and fallback_type == "none":
|
||||
fallback_type = "json_invalid"
|
||||
|
||||
md = _coalesce_story_markdown(
|
||||
narrative_to_markdown(narrative_raw).strip(),
|
||||
@@ -513,11 +627,13 @@ def run_story_pipeline_for_category_batch(
|
||||
do_append = target_story_id is not None
|
||||
|
||||
if do_append:
|
||||
append_story_version_sync(session, target_story_id, md)
|
||||
dispatch_ids.add(target_story_id)
|
||||
append_story_version_sync(session, str(target_story_id), md)
|
||||
dispatch_ids.add(str(target_story_id))
|
||||
ensure_chapter_story_link_sync(
|
||||
session, chapter_id=chapter.id, story_id=target_story_id
|
||||
session, chapter_id=str(chapter.id), story_id=str(target_story_id)
|
||||
)
|
||||
sid_log = target_story_id
|
||||
is_append = True
|
||||
else:
|
||||
story_title = (route.new_story_title or "").strip()
|
||||
if not story_title:
|
||||
@@ -536,13 +652,36 @@ def run_story_pipeline_for_category_batch(
|
||||
canonical_markdown=md,
|
||||
stage=chapter_category,
|
||||
)
|
||||
dispatch_ids.add(st.id)
|
||||
dispatch_ids.add(str(st.id))
|
||||
ensure_chapter_story_link_sync(
|
||||
session, chapter_id=chapter.id, story_id=st.id
|
||||
session, chapter_id=str(chapter.id), story_id=str(st.id)
|
||||
)
|
||||
sid_log = st.id
|
||||
is_append = False
|
||||
|
||||
reorder_chapter_story_links_by_life_order_sync(session, chapter.id)
|
||||
compose_chapter_from_story_links_sync(session, chapter.id)
|
||||
elapsed = time.perf_counter() - t0
|
||||
logger.info(
|
||||
"event=story_generated route_type=single decision_source={} route_decision={} "
|
||||
"unit_segments={} used_evidence={} narrative_json_valid={} fidelity_passed={} "
|
||||
"fallback_type={} oral_len={} md_len={} chapter_category={} is_append={} "
|
||||
"story_id={} seconds={:.3f}",
|
||||
decision_source,
|
||||
route.decision,
|
||||
len(category_segments),
|
||||
bool(evidence_text.strip()),
|
||||
_is_json_narrative(raw_gen),
|
||||
fb_gate == "none",
|
||||
fallback_type,
|
||||
len(combined_text.strip()),
|
||||
len(md.strip()),
|
||||
chapter_category,
|
||||
is_append,
|
||||
sid_log,
|
||||
elapsed,
|
||||
)
|
||||
|
||||
reorder_chapter_story_links_by_life_order_sync(session, str(chapter.id))
|
||||
mark_chapter_dirty_sync(session, str(chapter.id))
|
||||
session.flush()
|
||||
|
||||
image_settings = MemoirImageSettings.from_env()
|
||||
|
||||
Reference in New Issue
Block a user