- 对齐「多智能体收敛」与「回忆录 stories-first / markdown-first」方向:收紧运行时契约、 删除过渡兼容路径与双轨逻辑,并同步更新客户端与文档。 - Chat:以 ChatOrchestrator 为实时编排入口;删除独立 conversation_agent,精简 prompts。 - Memoir:删除 memory_agent;MemoirOrchestrator、classification / story_route 与 prompts 收敛到 prepare_batches + run_story_pipeline_for_category_batch 主链路。 - 将 agents 侧 processor 迁入 feature 层为 background_runner,并移除 features 下重复/过时 processor 封装。 - 新增 history_store,强化「conversation_messages 为 DB 真源、Redis 为缓存」模型。 - 调整 models、repo、service、session_history;精简 WS message_types,重构 pipeline 与 router。 - 移除章节占位、整章再生等旧路径;章节列表与封面逻辑要求 story 关联;收紧 cover 资格与 enqueue。 - helpers、repo、service、router、reading_segment_materialize、story_pipeline_sync、pdf_service 等按 canonical markdown / cover_asset_id 收缩;删除 memoir_images/provider 等冗余。 - tasks:memoir_tasks、chapter_cover_tasks 等大幅瘦身;story_image_tasks 等与当前图片任务对齐。 - core:config、logging、redis、task_tracker 小幅调整。 - auth / user / payment / quota:路由或服务侧删减过时接口或逻辑(如 payment router 行数减少)。 - pyproject.toml、development.sh、.env.example / .env.production、README 等同步说明或变量。 - Alembic 0001_initial_schema 微调(与当前 schema 叙事一致的小改动)。 - 回忆录:types / mappers / api、章节页与 memoir 页与后端契约对齐;markdown-renderer 调整。 - 语音:删除 voice/player,voice-segment-store 相应精简。 - api/tests:删除 conftest 及绝大部分既有测试文件(websocket_baseline、conversation、memoir 图片、PDF、SMS 等),属有意收缩/待按 backend-test-system 重建的信号。 - docs:新增多智能体收敛与移除兼容层计划摘要;更新 story-first 设计、backend-test-system、 multi-agent-refactor-plan、实施总结等。 BREAKING CHANGE: 后端对外契约、回忆录章节字段与若干路由/任务行为已变更;大量 API 测试被移除, CI 若依赖这些用例需按新策略补测或调整流水线。
230 lines
7.4 KiB
Python
230 lines
7.4 KiB
Python
"""
|
||
StoryRouteAgent:Celery 批次内判断 new_story vs append_story(JSON)。
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import json
|
||
from typing import Any, Literal
|
||
|
||
from pydantic import BaseModel, field_validator
|
||
|
||
from app.agents.memoir.prompts import (
|
||
get_story_batch_plan_prompt,
|
||
get_story_route_prompt,
|
||
)
|
||
from app.core.langchain_llm import bind_json_object_mode
|
||
from app.core.logging import get_logger
|
||
from app.features.story.models import Story
|
||
|
||
logger = get_logger(__name__)
|
||
|
||
|
||
# 超过此数量跳过批量规划(单次路由),避免 prompt 过大
|
||
PLAN_BATCH_MAX_SEGMENTS = 48
|
||
|
||
|
||
class StoryBatchPlanUnit(BaseModel):
|
||
"""批量写入中的一个单元(连续 segment 块)。"""
|
||
|
||
segment_ids: list[str]
|
||
decision: Literal["new_story", "append_story"]
|
||
target_story_id: str | None = None
|
||
new_story_title: str | None = None
|
||
reason: str | None = None
|
||
|
||
@field_validator("target_story_id", mode="before")
|
||
@classmethod
|
||
def empty_str_to_none_tid(cls, v: Any) -> str | None:
|
||
if v is None or v == "":
|
||
return None
|
||
if isinstance(v, str):
|
||
return v.strip() or None
|
||
return str(v)
|
||
|
||
|
||
class StoryBatchPlan(BaseModel):
|
||
units: list[StoryBatchPlanUnit]
|
||
|
||
|
||
class StoryRouteDecision(BaseModel):
|
||
decision: Literal["new_story", "append_story"]
|
||
target_story_id: str | None = None
|
||
new_story_title: str | None = None
|
||
reason: str | None = None
|
||
|
||
@field_validator("target_story_id", mode="before")
|
||
@classmethod
|
||
def empty_str_to_none(cls, v: Any) -> str | None:
|
||
if v is None or v == "":
|
||
return None
|
||
if isinstance(v, str):
|
||
return v.strip() or None
|
||
return str(v)
|
||
|
||
|
||
def _build_candidate_json(stories: list[Story], *, preview_chars: int = 220) -> str:
|
||
rows: list[dict[str, Any]] = []
|
||
for s in stories:
|
||
md = (s.canonical_markdown or "").strip().replace("\n", " ")
|
||
preview = md[:preview_chars] + ("…" if len(md) > preview_chars else "")
|
||
links: list[str] = []
|
||
for cl in getattr(s, "chapter_links", None) or []:
|
||
ch = getattr(cl, "chapter", None)
|
||
if ch is None:
|
||
continue
|
||
cat = getattr(ch, "category", None) or ""
|
||
tit = getattr(ch, "title", None) or ""
|
||
links.append(f"{tit}({cat})")
|
||
rows.append(
|
||
{
|
||
"id": s.id,
|
||
"title": s.title,
|
||
"preview": preview,
|
||
"linked_chapters": links,
|
||
}
|
||
)
|
||
return json.dumps(rows, ensure_ascii=False, indent=2)
|
||
|
||
|
||
def _build_segments_json_for_plan(
|
||
segments: list[tuple[str, str]], *, text_preview_chars: int = 4000
|
||
) -> str:
|
||
"""segments: (id, transcript_text) 按口述顺序。"""
|
||
rows: list[dict[str, str]] = []
|
||
for sid, text in segments:
|
||
t = (text or "").strip()
|
||
if len(t) > text_preview_chars:
|
||
t = t[:text_preview_chars] + "…"
|
||
rows.append({"id": sid, "text": t})
|
||
return json.dumps(rows, ensure_ascii=False, indent=2)
|
||
|
||
|
||
def validate_story_batch_plan(
|
||
ordered_segment_ids: list[str],
|
||
plan: StoryBatchPlan,
|
||
valid_story_ids: set[str],
|
||
) -> tuple[bool, str | None]:
|
||
"""
|
||
校验:segment 全覆盖、顺序一致、append 目标合法、new_story 有标题。
|
||
返回 (ok, error_code)。
|
||
"""
|
||
if not plan.units:
|
||
return False, "empty_units"
|
||
flat: list[str] = []
|
||
for u in plan.units:
|
||
if not u.segment_ids:
|
||
return False, "empty_unit_segment_ids"
|
||
flat.extend(u.segment_ids)
|
||
if len(flat) != len(set(flat)):
|
||
return False, "duplicate_segment"
|
||
if flat != ordered_segment_ids:
|
||
return False, "segment_mismatch"
|
||
for u in plan.units:
|
||
if u.decision == "append_story":
|
||
tid = u.target_story_id
|
||
if not tid or tid not in valid_story_ids:
|
||
return False, "invalid_append_target"
|
||
else:
|
||
title = (u.new_story_title or "").strip()
|
||
if not title:
|
||
return False, "missing_new_title"
|
||
return True, None
|
||
|
||
|
||
class StoryRouteAgent:
|
||
def decide(
|
||
self,
|
||
*,
|
||
chapter_category: str,
|
||
chapter_title: str,
|
||
batch_transcript: str,
|
||
candidate_stories: list[Story],
|
||
llm: Any,
|
||
valid_story_ids: set[str],
|
||
) -> StoryRouteDecision:
|
||
if not llm:
|
||
return StoryRouteDecision(
|
||
decision="new_story",
|
||
new_story_title=None,
|
||
reason="no_llm",
|
||
)
|
||
payload = _build_candidate_json(candidate_stories)
|
||
prompt = get_story_route_prompt(
|
||
chapter_category=chapter_category,
|
||
chapter_title=chapter_title,
|
||
batch_transcript=batch_transcript,
|
||
candidate_stories_json=payload,
|
||
)
|
||
try:
|
||
json_llm = bind_json_object_mode(llm, max_tokens=1024)
|
||
response = json_llm.invoke(prompt)
|
||
raw = (response.content or "").strip()
|
||
data = json.loads(raw)
|
||
decision = StoryRouteDecision.model_validate(data)
|
||
except Exception as e:
|
||
logger.warning("StoryRouteAgent 解析失败: %s", e)
|
||
return StoryRouteDecision(
|
||
decision="new_story",
|
||
new_story_title=None,
|
||
reason="parse_error",
|
||
)
|
||
|
||
if decision.decision == "append_story":
|
||
tid = decision.target_story_id
|
||
if not tid or tid not in valid_story_ids:
|
||
logger.warning(
|
||
"StoryRoute append 无效 target_story_id=%s,回退 new_story",
|
||
tid,
|
||
)
|
||
return StoryRouteDecision(
|
||
decision="new_story",
|
||
new_story_title=decision.new_story_title,
|
||
reason="invalid_target",
|
||
)
|
||
if decision.decision == "new_story" and not (
|
||
decision.new_story_title and decision.new_story_title.strip()
|
||
):
|
||
decision.new_story_title = None
|
||
return decision
|
||
|
||
def plan_batch(
|
||
self,
|
||
*,
|
||
chapter_category: str,
|
||
chapter_title: str,
|
||
segments: list[tuple[str, str]],
|
||
candidate_stories: list[Story],
|
||
llm: Any,
|
||
valid_story_ids: set[str],
|
||
) -> StoryBatchPlan | None:
|
||
"""
|
||
将本批 segment 划分为多个写入单元。解析失败返回 None,由调用方回退 decide()。
|
||
"""
|
||
if not llm or len(segments) < 2:
|
||
return None
|
||
payload = _build_candidate_json(candidate_stories)
|
||
segments_json = _build_segments_json_for_plan(segments)
|
||
prompt = get_story_batch_plan_prompt(
|
||
chapter_category=chapter_category,
|
||
chapter_title=chapter_title,
|
||
segments_json=segments_json,
|
||
candidate_stories_json=payload,
|
||
)
|
||
try:
|
||
json_llm = bind_json_object_mode(llm, max_tokens=4096)
|
||
response = json_llm.invoke(prompt)
|
||
raw = (response.content or "").strip()
|
||
data = json.loads(raw)
|
||
plan = StoryBatchPlan.model_validate(data)
|
||
except Exception as e:
|
||
logger.warning("StoryRouteAgent.plan_batch 解析失败: %s", e)
|
||
return None
|
||
|
||
ordered = [s[0] for s in segments]
|
||
ok, err = validate_story_batch_plan(ordered, plan, valid_story_ids)
|
||
if not ok:
|
||
logger.warning("StoryRouteAgent.plan_batch 校验失败: %s", err)
|
||
return None
|
||
return plan
|