Files
life-echo/api/app/agents/memoir/story_route_agent.py
Kevin 786ebf8ae6 refactor(api,expo): 多智能体与会话收敛、回忆录兼容层移除、后端测试集大幅删减
- 对齐「多智能体收敛」与「回忆录 stories-first / markdown-first」方向:收紧运行时契约、
  删除过渡兼容路径与双轨逻辑,并同步更新客户端与文档。

- Chat:以 ChatOrchestrator 为实时编排入口;删除独立 conversation_agent,精简 prompts。
- Memoir:删除 memory_agent;MemoirOrchestrator、classification / story_route 与 prompts 收敛到
  prepare_batches + run_story_pipeline_for_category_batch 主链路。
- 将 agents 侧 processor 迁入 feature 层为 background_runner,并移除 features 下重复/过时
  processor 封装。

- 新增 history_store,强化「conversation_messages 为 DB 真源、Redis 为缓存」模型。
- 调整 models、repo、service、session_history;精简 WS message_types,重构 pipeline 与 router。

- 移除章节占位、整章再生等旧路径;章节列表与封面逻辑要求 story 关联;收紧 cover 资格与
  enqueue。
- helpers、repo、service、router、reading_segment_materialize、story_pipeline_sync、pdf_service
  等按 canonical markdown / cover_asset_id 收缩;删除 memoir_images/provider 等冗余。
- tasks:memoir_tasks、chapter_cover_tasks 等大幅瘦身;story_image_tasks 等与当前图片任务对齐。

- core:config、logging、redis、task_tracker 小幅调整。
- auth / user / payment / quota:路由或服务侧删减过时接口或逻辑(如 payment router 行数减少)。

- pyproject.toml、development.sh、.env.example / .env.production、README 等同步说明或变量。

- Alembic 0001_initial_schema 微调(与当前 schema 叙事一致的小改动)。

- 回忆录:types / mappers / api、章节页与 memoir 页与后端契约对齐;markdown-renderer 调整。
- 语音:删除 voice/player,voice-segment-store 相应精简。

- api/tests:删除 conftest 及绝大部分既有测试文件(websocket_baseline、conversation、memoir
  图片、PDF、SMS 等),属有意收缩/待按 backend-test-system 重建的信号。
- docs:新增多智能体收敛与移除兼容层计划摘要;更新 story-first 设计、backend-test-system、
  multi-agent-refactor-plan、实施总结等。

BREAKING CHANGE: 后端对外契约、回忆录章节字段与若干路由/任务行为已变更;大量 API 测试被移除,
  CI 若依赖这些用例需按新策略补测或调整流水线。
2026-03-22 18:10:28 +08:00

230 lines
7.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
StoryRouteAgentCelery 批次内判断 new_story vs append_storyJSON
"""
from __future__ import annotations
import json
from typing import Any, Literal
from pydantic import BaseModel, field_validator
from app.agents.memoir.prompts import (
get_story_batch_plan_prompt,
get_story_route_prompt,
)
from app.core.langchain_llm import bind_json_object_mode
from app.core.logging import get_logger
from app.features.story.models import Story
logger = get_logger(__name__)
# 超过此数量跳过批量规划(单次路由),避免 prompt 过大
PLAN_BATCH_MAX_SEGMENTS = 48
class StoryBatchPlanUnit(BaseModel):
"""批量写入中的一个单元(连续 segment 块)。"""
segment_ids: list[str]
decision: Literal["new_story", "append_story"]
target_story_id: str | None = None
new_story_title: str | None = None
reason: str | None = None
@field_validator("target_story_id", mode="before")
@classmethod
def empty_str_to_none_tid(cls, v: Any) -> str | None:
if v is None or v == "":
return None
if isinstance(v, str):
return v.strip() or None
return str(v)
class StoryBatchPlan(BaseModel):
units: list[StoryBatchPlanUnit]
class StoryRouteDecision(BaseModel):
decision: Literal["new_story", "append_story"]
target_story_id: str | None = None
new_story_title: str | None = None
reason: str | None = None
@field_validator("target_story_id", mode="before")
@classmethod
def empty_str_to_none(cls, v: Any) -> str | None:
if v is None or v == "":
return None
if isinstance(v, str):
return v.strip() or None
return str(v)
def _build_candidate_json(stories: list[Story], *, preview_chars: int = 220) -> str:
rows: list[dict[str, Any]] = []
for s in stories:
md = (s.canonical_markdown or "").strip().replace("\n", " ")
preview = md[:preview_chars] + ("" if len(md) > preview_chars else "")
links: list[str] = []
for cl in getattr(s, "chapter_links", None) or []:
ch = getattr(cl, "chapter", None)
if ch is None:
continue
cat = getattr(ch, "category", None) or ""
tit = getattr(ch, "title", None) or ""
links.append(f"{tit}({cat})")
rows.append(
{
"id": s.id,
"title": s.title,
"preview": preview,
"linked_chapters": links,
}
)
return json.dumps(rows, ensure_ascii=False, indent=2)
def _build_segments_json_for_plan(
segments: list[tuple[str, str]], *, text_preview_chars: int = 4000
) -> str:
"""segments: (id, transcript_text) 按口述顺序。"""
rows: list[dict[str, str]] = []
for sid, text in segments:
t = (text or "").strip()
if len(t) > text_preview_chars:
t = t[:text_preview_chars] + ""
rows.append({"id": sid, "text": t})
return json.dumps(rows, ensure_ascii=False, indent=2)
def validate_story_batch_plan(
ordered_segment_ids: list[str],
plan: StoryBatchPlan,
valid_story_ids: set[str],
) -> tuple[bool, str | None]:
"""
校验segment 全覆盖、顺序一致、append 目标合法、new_story 有标题。
返回 (ok, error_code)。
"""
if not plan.units:
return False, "empty_units"
flat: list[str] = []
for u in plan.units:
if not u.segment_ids:
return False, "empty_unit_segment_ids"
flat.extend(u.segment_ids)
if len(flat) != len(set(flat)):
return False, "duplicate_segment"
if flat != ordered_segment_ids:
return False, "segment_mismatch"
for u in plan.units:
if u.decision == "append_story":
tid = u.target_story_id
if not tid or tid not in valid_story_ids:
return False, "invalid_append_target"
else:
title = (u.new_story_title or "").strip()
if not title:
return False, "missing_new_title"
return True, None
class StoryRouteAgent:
def decide(
self,
*,
chapter_category: str,
chapter_title: str,
batch_transcript: str,
candidate_stories: list[Story],
llm: Any,
valid_story_ids: set[str],
) -> StoryRouteDecision:
if not llm:
return StoryRouteDecision(
decision="new_story",
new_story_title=None,
reason="no_llm",
)
payload = _build_candidate_json(candidate_stories)
prompt = get_story_route_prompt(
chapter_category=chapter_category,
chapter_title=chapter_title,
batch_transcript=batch_transcript,
candidate_stories_json=payload,
)
try:
json_llm = bind_json_object_mode(llm, max_tokens=1024)
response = json_llm.invoke(prompt)
raw = (response.content or "").strip()
data = json.loads(raw)
decision = StoryRouteDecision.model_validate(data)
except Exception as e:
logger.warning("StoryRouteAgent 解析失败: %s", e)
return StoryRouteDecision(
decision="new_story",
new_story_title=None,
reason="parse_error",
)
if decision.decision == "append_story":
tid = decision.target_story_id
if not tid or tid not in valid_story_ids:
logger.warning(
"StoryRoute append 无效 target_story_id=%s,回退 new_story",
tid,
)
return StoryRouteDecision(
decision="new_story",
new_story_title=decision.new_story_title,
reason="invalid_target",
)
if decision.decision == "new_story" and not (
decision.new_story_title and decision.new_story_title.strip()
):
decision.new_story_title = None
return decision
def plan_batch(
self,
*,
chapter_category: str,
chapter_title: str,
segments: list[tuple[str, str]],
candidate_stories: list[Story],
llm: Any,
valid_story_ids: set[str],
) -> StoryBatchPlan | None:
"""
将本批 segment 划分为多个写入单元。解析失败返回 None由调用方回退 decide()。
"""
if not llm or len(segments) < 2:
return None
payload = _build_candidate_json(candidate_stories)
segments_json = _build_segments_json_for_plan(segments)
prompt = get_story_batch_plan_prompt(
chapter_category=chapter_category,
chapter_title=chapter_title,
segments_json=segments_json,
candidate_stories_json=payload,
)
try:
json_llm = bind_json_object_mode(llm, max_tokens=4096)
response = json_llm.invoke(prompt)
raw = (response.content or "").strip()
data = json.loads(raw)
plan = StoryBatchPlan.model_validate(data)
except Exception as e:
logger.warning("StoryRouteAgent.plan_batch 解析失败: %s", e)
return None
ordered = [s[0] for s in segments]
ok, err = validate_story_batch_plan(ordered, plan, valid_story_ids)
if not ok:
logger.warning("StoryRouteAgent.plan_batch 校验失败: %s", err)
return None
return plan