2026-03-20 15:15:35 +08:00
|
|
|
|
"""
|
|
|
|
|
|
StoryRouteAgent:Celery 批次内判断 new_story vs append_story(JSON)。
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
|
|
import json
|
|
|
|
|
|
from typing import Any, Literal
|
|
|
|
|
|
|
|
|
|
|
|
from pydantic import BaseModel, field_validator
|
|
|
|
|
|
|
2026-03-22 16:45:57 +08:00
|
|
|
|
from app.agents.memoir.prompts import (
|
|
|
|
|
|
get_story_batch_plan_prompt,
|
|
|
|
|
|
get_story_route_prompt,
|
|
|
|
|
|
)
|
2026-04-08 09:38:07 +08:00
|
|
|
|
from app.agents.memoir.story_route_payload import (
|
|
|
|
|
|
build_route_candidate_json,
|
|
|
|
|
|
sort_stories_for_route,
|
|
|
|
|
|
)
|
2026-04-03 11:02:05 +08:00
|
|
|
|
from app.core.config import settings
|
2026-04-03 13:34:27 +08:00
|
|
|
|
from app.core.llm_call import LLMCallError, llm_json_call
|
2026-03-20 15:15:35 +08:00
|
|
|
|
from app.core.logging import get_logger
|
|
|
|
|
|
from app.features.story.models import Story
|
|
|
|
|
|
|
|
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-03-22 16:45:57 +08:00
|
|
|
|
# 超过此数量跳过批量规划(单次路由),避免 prompt 过大
|
|
|
|
|
|
PLAN_BATCH_MAX_SEGMENTS = 48
|
|
|
|
|
|
|
2026-04-08 09:38:07 +08:00
|
|
|
|
# 童年 / 求学 / 家庭:模型与后处理均倾向「少拆分、优先续写」
|
|
|
|
|
|
APPEND_FIRST_CHAPTER_CATEGORIES = frozenset({"childhood", "education", "family"})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def default_append_target_story_id(
|
|
|
|
|
|
candidate_stories: list[Story],
|
|
|
|
|
|
story_meta: dict[str, dict[str, int]] | None,
|
|
|
|
|
|
settings: Any,
|
|
|
|
|
|
) -> str | None:
|
|
|
|
|
|
"""排序后的首选续写目标(与路由候选 JSON 顺序一致)。"""
|
|
|
|
|
|
if not candidate_stories:
|
|
|
|
|
|
return None
|
|
|
|
|
|
meta = story_meta or {}
|
|
|
|
|
|
ordered = sort_stories_for_route(
|
|
|
|
|
|
candidate_stories,
|
|
|
|
|
|
meta,
|
|
|
|
|
|
summary_min_chars=int(settings.story_route_summary_min_chars),
|
|
|
|
|
|
)
|
|
|
|
|
|
if not ordered:
|
|
|
|
|
|
return None
|
|
|
|
|
|
return str(ordered[0].id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def merge_consecutive_new_story_units(
|
|
|
|
|
|
units: list[StoryBatchPlanUnit],
|
|
|
|
|
|
) -> list[StoryBatchPlanUnit]:
|
|
|
|
|
|
"""将相邻的多个 new_story 单元合并为一个,减少同批碎片叙事。"""
|
|
|
|
|
|
if not units:
|
|
|
|
|
|
return units
|
|
|
|
|
|
out: list[StoryBatchPlanUnit] = []
|
|
|
|
|
|
i = 0
|
|
|
|
|
|
while i < len(units):
|
|
|
|
|
|
u = units[i]
|
|
|
|
|
|
if u.decision != "new_story":
|
|
|
|
|
|
out.append(u)
|
|
|
|
|
|
i += 1
|
|
|
|
|
|
continue
|
|
|
|
|
|
run_segs: list[str] = list(u.segment_ids)
|
|
|
|
|
|
j = i + 1
|
|
|
|
|
|
while j < len(units) and units[j].decision == "new_story":
|
|
|
|
|
|
run_segs.extend(units[j].segment_ids)
|
|
|
|
|
|
j += 1
|
|
|
|
|
|
if j > i + 1:
|
|
|
|
|
|
out.append(
|
|
|
|
|
|
StoryBatchPlanUnit(
|
|
|
|
|
|
segment_ids=run_segs,
|
|
|
|
|
|
decision="new_story",
|
|
|
|
|
|
target_story_id=None,
|
|
|
|
|
|
reason="coalesced_consecutive_new_story",
|
|
|
|
|
|
)
|
|
|
|
|
|
)
|
|
|
|
|
|
else:
|
|
|
|
|
|
out.append(u)
|
|
|
|
|
|
i = j
|
|
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def normalize_batch_plan_reduce_new_story_fragmentation(
|
|
|
|
|
|
plan: StoryBatchPlan,
|
|
|
|
|
|
ordered_segment_ids: list[str],
|
|
|
|
|
|
*,
|
|
|
|
|
|
chapter_category: str,
|
|
|
|
|
|
candidate_stories: list[Story],
|
|
|
|
|
|
valid_story_ids: set[str],
|
|
|
|
|
|
story_meta: dict[str, dict[str, int]] | None,
|
|
|
|
|
|
settings: Any,
|
|
|
|
|
|
) -> StoryBatchPlan:
|
|
|
|
|
|
"""
|
|
|
|
|
|
LLM 校验通过后的确定性归一:合并相邻 new_story;在 append-first 类目下若整批只有一个 new 块则改为 append。
|
|
|
|
|
|
"""
|
|
|
|
|
|
units = merge_consecutive_new_story_units(list(plan.units))
|
|
|
|
|
|
if (
|
|
|
|
|
|
chapter_category in APPEND_FIRST_CHAPTER_CATEGORIES
|
|
|
|
|
|
and candidate_stories
|
|
|
|
|
|
and len(units) == 1
|
|
|
|
|
|
and units[0].decision == "new_story"
|
|
|
|
|
|
):
|
|
|
|
|
|
tid = default_append_target_story_id(candidate_stories, story_meta, settings)
|
|
|
|
|
|
if tid and tid in valid_story_ids:
|
|
|
|
|
|
units = [
|
|
|
|
|
|
StoryBatchPlanUnit(
|
|
|
|
|
|
segment_ids=list(ordered_segment_ids),
|
|
|
|
|
|
decision="append_story",
|
|
|
|
|
|
target_story_id=tid,
|
|
|
|
|
|
reason="append_first_whole_batch_fallback",
|
|
|
|
|
|
)
|
|
|
|
|
|
]
|
|
|
|
|
|
candidate = StoryBatchPlan(units=units)
|
2026-04-09 15:32:35 +08:00
|
|
|
|
ok, err = validate_story_batch_plan(ordered_segment_ids, candidate, valid_story_ids)
|
2026-04-08 09:38:07 +08:00
|
|
|
|
if not ok:
|
|
|
|
|
|
logger.warning(
|
|
|
|
|
|
"batch_plan_normalize_revalidate_failed err={} keep_original",
|
|
|
|
|
|
err,
|
|
|
|
|
|
)
|
|
|
|
|
|
return plan
|
|
|
|
|
|
return candidate
|
|
|
|
|
|
|
2026-03-22 16:45:57 +08:00
|
|
|
|
|
|
|
|
|
|
class StoryBatchPlanUnit(BaseModel):
|
|
|
|
|
|
"""批量写入中的一个单元(连续 segment 块)。"""
|
|
|
|
|
|
|
|
|
|
|
|
segment_ids: list[str]
|
|
|
|
|
|
decision: Literal["new_story", "append_story"]
|
|
|
|
|
|
target_story_id: str | None = None
|
|
|
|
|
|
new_story_title: str | None = None
|
|
|
|
|
|
reason: str | None = None
|
|
|
|
|
|
|
|
|
|
|
|
@field_validator("target_story_id", mode="before")
|
|
|
|
|
|
@classmethod
|
|
|
|
|
|
def empty_str_to_none_tid(cls, v: Any) -> str | None:
|
|
|
|
|
|
if v is None or v == "":
|
|
|
|
|
|
return None
|
|
|
|
|
|
if isinstance(v, str):
|
|
|
|
|
|
return v.strip() or None
|
|
|
|
|
|
return str(v)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class StoryBatchPlan(BaseModel):
|
|
|
|
|
|
units: list[StoryBatchPlanUnit]
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-03-20 15:15:35 +08:00
|
|
|
|
class StoryRouteDecision(BaseModel):
|
|
|
|
|
|
decision: Literal["new_story", "append_story"]
|
|
|
|
|
|
target_story_id: str | None = None
|
|
|
|
|
|
new_story_title: str | None = None
|
|
|
|
|
|
reason: str | None = None
|
|
|
|
|
|
|
|
|
|
|
|
@field_validator("target_story_id", mode="before")
|
|
|
|
|
|
@classmethod
|
|
|
|
|
|
def empty_str_to_none(cls, v: Any) -> str | None:
|
|
|
|
|
|
if v is None or v == "":
|
|
|
|
|
|
return None
|
|
|
|
|
|
if isinstance(v, str):
|
|
|
|
|
|
return v.strip() or None
|
|
|
|
|
|
return str(v)
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-03-22 16:45:57 +08:00
|
|
|
|
def _build_segments_json_for_plan(
|
|
|
|
|
|
segments: list[tuple[str, str]], *, text_preview_chars: int = 4000
|
|
|
|
|
|
) -> str:
|
2026-03-26 12:13:36 +08:00
|
|
|
|
"""segments: (id, user_input_text) 按口述顺序。"""
|
2026-03-22 16:45:57 +08:00
|
|
|
|
rows: list[dict[str, str]] = []
|
|
|
|
|
|
for sid, text in segments:
|
|
|
|
|
|
t = (text or "").strip()
|
|
|
|
|
|
if len(t) > text_preview_chars:
|
|
|
|
|
|
t = t[:text_preview_chars] + "…"
|
|
|
|
|
|
rows.append({"id": sid, "text": t})
|
|
|
|
|
|
return json.dumps(rows, ensure_ascii=False, indent=2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def validate_story_batch_plan(
|
|
|
|
|
|
ordered_segment_ids: list[str],
|
|
|
|
|
|
plan: StoryBatchPlan,
|
|
|
|
|
|
valid_story_ids: set[str],
|
|
|
|
|
|
) -> tuple[bool, str | None]:
|
|
|
|
|
|
"""
|
2026-04-02 14:38:40 +08:00
|
|
|
|
校验:segment 全覆盖、顺序一致、append 目标合法。
|
|
|
|
|
|
标题由 NarrativeAgent 延迟生成,路由阶段不再要求 new_story_title。
|
2026-03-22 16:45:57 +08:00
|
|
|
|
返回 (ok, error_code)。
|
|
|
|
|
|
"""
|
|
|
|
|
|
if not plan.units:
|
|
|
|
|
|
return False, "empty_units"
|
|
|
|
|
|
flat: list[str] = []
|
|
|
|
|
|
for u in plan.units:
|
|
|
|
|
|
if not u.segment_ids:
|
|
|
|
|
|
return False, "empty_unit_segment_ids"
|
|
|
|
|
|
flat.extend(u.segment_ids)
|
|
|
|
|
|
if len(flat) != len(set(flat)):
|
|
|
|
|
|
return False, "duplicate_segment"
|
|
|
|
|
|
if flat != ordered_segment_ids:
|
|
|
|
|
|
return False, "segment_mismatch"
|
|
|
|
|
|
for u in plan.units:
|
|
|
|
|
|
if u.decision == "append_story":
|
|
|
|
|
|
tid = u.target_story_id
|
|
|
|
|
|
if not tid or tid not in valid_story_ids:
|
|
|
|
|
|
return False, "invalid_append_target"
|
|
|
|
|
|
return True, None
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-03-20 15:15:35 +08:00
|
|
|
|
class StoryRouteAgent:
|
|
|
|
|
|
def decide(
|
|
|
|
|
|
self,
|
|
|
|
|
|
*,
|
|
|
|
|
|
chapter_category: str,
|
|
|
|
|
|
chapter_title: str,
|
|
|
|
|
|
batch_transcript: str,
|
|
|
|
|
|
candidate_stories: list[Story],
|
|
|
|
|
|
llm: Any,
|
|
|
|
|
|
valid_story_ids: set[str],
|
2026-03-30 11:53:04 +08:00
|
|
|
|
story_meta: dict[str, dict[str, int]] | None = None,
|
2026-03-20 15:15:35 +08:00
|
|
|
|
) -> StoryRouteDecision:
|
|
|
|
|
|
if not llm:
|
2026-04-09 15:32:35 +08:00
|
|
|
|
fb = default_append_target_story_id(candidate_stories, story_meta, settings)
|
2026-04-08 09:38:07 +08:00
|
|
|
|
if fb and fb in valid_story_ids:
|
|
|
|
|
|
return StoryRouteDecision(
|
|
|
|
|
|
decision="append_story",
|
|
|
|
|
|
target_story_id=fb,
|
|
|
|
|
|
reason="no_llm_default_append",
|
|
|
|
|
|
)
|
2026-03-20 15:15:35 +08:00
|
|
|
|
return StoryRouteDecision(
|
|
|
|
|
|
decision="new_story",
|
|
|
|
|
|
new_story_title=None,
|
|
|
|
|
|
reason="no_llm",
|
|
|
|
|
|
)
|
2026-04-03 11:02:05 +08:00
|
|
|
|
payload = build_route_candidate_json(candidate_stories, story_meta, settings)
|
2026-03-20 15:15:35 +08:00
|
|
|
|
prompt = get_story_route_prompt(
|
|
|
|
|
|
chapter_category=chapter_category,
|
|
|
|
|
|
chapter_title=chapter_title,
|
|
|
|
|
|
batch_transcript=batch_transcript,
|
|
|
|
|
|
candidate_stories_json=payload,
|
|
|
|
|
|
)
|
2026-04-03 13:34:27 +08:00
|
|
|
|
|
|
|
|
|
|
def _decide_fallback() -> StoryRouteDecision:
|
2026-04-09 15:32:35 +08:00
|
|
|
|
fb = default_append_target_story_id(candidate_stories, story_meta, settings)
|
2026-04-08 09:38:07 +08:00
|
|
|
|
if fb and fb in valid_story_ids:
|
|
|
|
|
|
return StoryRouteDecision(
|
|
|
|
|
|
decision="append_story",
|
|
|
|
|
|
target_story_id=fb,
|
|
|
|
|
|
reason="parse_error_default_append",
|
|
|
|
|
|
)
|
2026-03-20 15:15:35 +08:00
|
|
|
|
return StoryRouteDecision(
|
|
|
|
|
|
decision="new_story",
|
|
|
|
|
|
new_story_title=None,
|
|
|
|
|
|
reason="parse_error",
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2026-04-03 13:34:27 +08:00
|
|
|
|
decision = llm_json_call(
|
|
|
|
|
|
llm,
|
|
|
|
|
|
prompt,
|
|
|
|
|
|
StoryRouteDecision,
|
|
|
|
|
|
max_tokens=settings.memoir_story_route_max_tokens,
|
|
|
|
|
|
agent="StoryRouteAgent.decide",
|
|
|
|
|
|
fallback_factory=_decide_fallback,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2026-03-20 15:15:35 +08:00
|
|
|
|
if decision.decision == "append_story":
|
|
|
|
|
|
tid = decision.target_story_id
|
|
|
|
|
|
if not tid or tid not in valid_story_ids:
|
2026-04-08 09:38:07 +08:00
|
|
|
|
fb = default_append_target_story_id(
|
|
|
|
|
|
candidate_stories, story_meta, settings
|
|
|
|
|
|
)
|
|
|
|
|
|
if fb and fb in valid_story_ids:
|
|
|
|
|
|
logger.info(
|
|
|
|
|
|
"StoryRoute append 无效 target_story_id={},回退默认 append {}",
|
|
|
|
|
|
tid,
|
|
|
|
|
|
fb,
|
|
|
|
|
|
)
|
|
|
|
|
|
return StoryRouteDecision(
|
|
|
|
|
|
decision="append_story",
|
|
|
|
|
|
target_story_id=fb,
|
|
|
|
|
|
reason="invalid_target_default_append",
|
|
|
|
|
|
)
|
2026-03-20 15:15:35 +08:00
|
|
|
|
logger.warning(
|
2026-04-08 09:38:07 +08:00
|
|
|
|
"StoryRoute append 无效 target_story_id={},且无可用默认目标,回退 new_story",
|
2026-03-20 15:15:35 +08:00
|
|
|
|
tid,
|
|
|
|
|
|
)
|
|
|
|
|
|
return StoryRouteDecision(
|
|
|
|
|
|
decision="new_story",
|
|
|
|
|
|
new_story_title=decision.new_story_title,
|
|
|
|
|
|
reason="invalid_target",
|
|
|
|
|
|
)
|
|
|
|
|
|
return decision
|
2026-03-22 16:45:57 +08:00
|
|
|
|
|
|
|
|
|
|
def plan_batch(
|
|
|
|
|
|
self,
|
|
|
|
|
|
*,
|
|
|
|
|
|
chapter_category: str,
|
|
|
|
|
|
chapter_title: str,
|
|
|
|
|
|
segments: list[tuple[str, str]],
|
|
|
|
|
|
candidate_stories: list[Story],
|
|
|
|
|
|
llm: Any,
|
|
|
|
|
|
valid_story_ids: set[str],
|
2026-03-30 11:53:04 +08:00
|
|
|
|
story_meta: dict[str, dict[str, int]] | None = None,
|
2026-03-22 16:45:57 +08:00
|
|
|
|
) -> StoryBatchPlan | None:
|
|
|
|
|
|
"""
|
|
|
|
|
|
将本批 segment 划分为多个写入单元。解析失败返回 None,由调用方回退 decide()。
|
|
|
|
|
|
"""
|
|
|
|
|
|
if not llm or len(segments) < 2:
|
|
|
|
|
|
return None
|
2026-04-03 11:02:05 +08:00
|
|
|
|
payload = build_route_candidate_json(candidate_stories, story_meta, settings)
|
2026-03-22 16:45:57 +08:00
|
|
|
|
segments_json = _build_segments_json_for_plan(segments)
|
|
|
|
|
|
prompt = get_story_batch_plan_prompt(
|
|
|
|
|
|
chapter_category=chapter_category,
|
|
|
|
|
|
chapter_title=chapter_title,
|
|
|
|
|
|
segments_json=segments_json,
|
|
|
|
|
|
candidate_stories_json=payload,
|
|
|
|
|
|
)
|
|
|
|
|
|
try:
|
2026-04-03 13:34:27 +08:00
|
|
|
|
plan = llm_json_call(
|
2026-03-26 12:13:36 +08:00
|
|
|
|
llm,
|
|
|
|
|
|
prompt,
|
2026-04-03 13:34:27 +08:00
|
|
|
|
StoryBatchPlan,
|
|
|
|
|
|
max_tokens=settings.memoir_story_batch_plan_max_tokens,
|
2026-03-26 12:13:36 +08:00
|
|
|
|
agent="StoryRouteAgent.plan_batch",
|
2026-04-03 13:34:27 +08:00
|
|
|
|
)
|
|
|
|
|
|
except LLMCallError as e:
|
2026-03-26 12:13:36 +08:00
|
|
|
|
logger.warning("StoryRouteAgent.plan_batch 解析失败: {}", e)
|
2026-03-22 16:45:57 +08:00
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
ordered = [s[0] for s in segments]
|
|
|
|
|
|
ok, err = validate_story_batch_plan(ordered, plan, valid_story_ids)
|
|
|
|
|
|
if not ok:
|
2026-03-26 12:13:36 +08:00
|
|
|
|
logger.warning("StoryRouteAgent.plan_batch 校验失败: {}", err)
|
2026-03-22 16:45:57 +08:00
|
|
|
|
return None
|
2026-04-08 09:38:07 +08:00
|
|
|
|
return normalize_batch_plan_reduce_new_story_fragmentation(
|
|
|
|
|
|
plan,
|
|
|
|
|
|
ordered,
|
|
|
|
|
|
chapter_category=chapter_category,
|
|
|
|
|
|
candidate_stories=candidate_stories,
|
|
|
|
|
|
valid_story_ids=valid_story_ids,
|
|
|
|
|
|
story_meta=story_meta,
|
|
|
|
|
|
settings=settings,
|
|
|
|
|
|
)
|