docs: add story-first markdown-first design
This commit is contained in:
@@ -1,11 +1,7 @@
|
||||
"""回忆录模块:MemoryAgent、BackgroundTaskRunner、MemoirOrchestrator、各 Specialist Agent"""
|
||||
|
||||
from app.agents.memoir.memory_agent import MemoryAgent
|
||||
from app.agents.memoir.processor import (
|
||||
BackgroundTaskRunner,
|
||||
ContentAnalyzer,
|
||||
MemoirGenerator,
|
||||
)
|
||||
from app.agents.memoir.processor import BackgroundTaskRunner
|
||||
from app.agents.memoir.orchestrator import MemoirOrchestrator
|
||||
from app.agents.memoir.extraction_agent import ExtractionAgent, ExtractionResult
|
||||
from app.agents.memoir.classification_agent import ClassificationAgent
|
||||
@@ -15,8 +11,6 @@ from app.agents.memoir.placeholder_agent import inject_placeholders
|
||||
__all__ = [
|
||||
"MemoryAgent",
|
||||
"BackgroundTaskRunner",
|
||||
"ContentAnalyzer",
|
||||
"MemoirGenerator",
|
||||
"MemoirOrchestrator",
|
||||
"ExtractionAgent",
|
||||
"ExtractionResult",
|
||||
|
||||
@@ -1,176 +1,23 @@
|
||||
"""
|
||||
回忆录后台处理器:分析对话、更新状态、生成章节、创意标题
|
||||
使用 Celery 进行后台任务处理
|
||||
回忆录后台处理器:debounce 聚合后派发 Celery 任务
|
||||
实际回忆录生成由 memoir_tasks.process_memoir_segments 调用 MemoirOrchestrator 完成
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, List
|
||||
|
||||
from app.core.dependencies import get_llm_provider
|
||||
from app.core.logging import get_logger
|
||||
from app.core.task_tracker import task_tracker
|
||||
|
||||
from app.agents.state_schema import MemoirStateSchema
|
||||
from app.features.memoir.memoir_images.json_payload import extract_json_payload
|
||||
from app.agents.memoir.prompts import (
|
||||
get_creative_title_prompt,
|
||||
get_narrative_json_prompt,
|
||||
get_state_extraction_prompt,
|
||||
)
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
STAGE_KEYWORDS = {
|
||||
"childhood": ["童年", "小时候", "出生", "家乡", "小镇"],
|
||||
"education": ["上学", "学校", "老师", "同学", "教育", "大学"],
|
||||
"career": ["工作", "职业", "事业", "公司", "同事", "创业"],
|
||||
"family": ["伴侣", "孩子", "家庭", "家人", "结婚", "父母"],
|
||||
"belief": ["信念", "价值观", "座右铭", "坚持", "原则"],
|
||||
}
|
||||
|
||||
|
||||
def _get_langchain_llm():
|
||||
try:
|
||||
provider = get_llm_provider()
|
||||
return getattr(provider, "langchain_llm", None)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
@dataclass
|
||||
class AnalysisResult:
|
||||
detected_stage: str
|
||||
extracted_slots: Dict[str, str]
|
||||
emotion: str
|
||||
is_new_chapter: bool
|
||||
|
||||
|
||||
class ContentAnalyzer:
|
||||
def __init__(self) -> None:
|
||||
self.llm = _get_langchain_llm()
|
||||
|
||||
def _detect_stage(self, user_message: str, fallback_stage: str) -> str:
|
||||
message = user_message.lower()
|
||||
for stage, keywords in STAGE_KEYWORDS.items():
|
||||
if any(word in message for word in keywords):
|
||||
return stage
|
||||
return fallback_stage
|
||||
|
||||
def _fallback_slots(
|
||||
self, state: MemoirStateSchema, stage: str, user_message: str
|
||||
) -> Dict[str, str]:
|
||||
stage_slots = state.slots.get(stage, {})
|
||||
for key, value in stage_slots.items():
|
||||
if not value.snippet:
|
||||
return {key: user_message.strip()[:200]}
|
||||
return {}
|
||||
|
||||
async def analyze_message(
|
||||
self, user_message: str, current_state: MemoirStateSchema
|
||||
) -> AnalysisResult:
|
||||
detected_stage = self._detect_stage(user_message, current_state.current_stage)
|
||||
extracted_slots: Dict[str, str] = {}
|
||||
emotion = "neutral"
|
||||
is_new_chapter = False
|
||||
if self.llm:
|
||||
try:
|
||||
prompt = get_state_extraction_prompt(
|
||||
user_message=user_message,
|
||||
current_stage=current_state.current_stage,
|
||||
stage_slots=current_state.slots.get(detected_stage, {}),
|
||||
)
|
||||
json_llm = self.llm.bind(
|
||||
model_kwargs={"response_format": {"type": "json_object"}},
|
||||
max_tokens=1024,
|
||||
)
|
||||
response = await json_llm.ainvoke(prompt)
|
||||
content = response.content.strip()
|
||||
parsed = json.loads(extract_json_payload(content))
|
||||
detected_stage = parsed.get("detected_stage", detected_stage)
|
||||
extracted_slots = parsed.get("slots", {}) or {}
|
||||
emotion = parsed.get("emotion", emotion)
|
||||
is_new_chapter = bool(parsed.get("is_new_chapter", is_new_chapter))
|
||||
except json.JSONDecodeError:
|
||||
extracted_slots = self._fallback_slots(
|
||||
current_state, detected_stage, user_message
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("分析消息失败: %s", e)
|
||||
extracted_slots = self._fallback_slots(
|
||||
current_state, detected_stage, user_message
|
||||
)
|
||||
else:
|
||||
extracted_slots = self._fallback_slots(
|
||||
current_state, detected_stage, user_message
|
||||
)
|
||||
return AnalysisResult(
|
||||
detected_stage=detected_stage,
|
||||
extracted_slots=extracted_slots,
|
||||
emotion=emotion,
|
||||
is_new_chapter=is_new_chapter,
|
||||
)
|
||||
|
||||
|
||||
class MemoirGenerator:
|
||||
def __init__(self) -> None:
|
||||
self.llm = _get_langchain_llm()
|
||||
|
||||
async def generate_chapter_title(
|
||||
self, stage: str, slots: Dict[str, str], emotion: str
|
||||
) -> str:
|
||||
if not self.llm:
|
||||
return f"{stage} 回忆"
|
||||
try:
|
||||
prompt = get_creative_title_prompt(
|
||||
stage=stage, emotion=emotion, slots=slots
|
||||
)
|
||||
response = await self.llm.ainvoke(prompt)
|
||||
return response.content.strip().strip('"')
|
||||
except Exception as e:
|
||||
logger.error("生成标题失败: %s", e)
|
||||
return f"{stage} 回忆"
|
||||
|
||||
async def generate_narrative(
|
||||
self,
|
||||
stage: str,
|
||||
slots: Dict[str, str],
|
||||
new_content: str,
|
||||
existing_content: str,
|
||||
) -> str:
|
||||
if not self.llm:
|
||||
if existing_content:
|
||||
return f"{existing_content}\n\n{new_content}"
|
||||
return new_content
|
||||
try:
|
||||
prompt = get_narrative_json_prompt(
|
||||
stage=stage,
|
||||
slots=slots,
|
||||
new_content=new_content,
|
||||
existing_content=existing_content,
|
||||
)
|
||||
json_llm = self.llm.bind(
|
||||
model_kwargs={"response_format": {"type": "json_object"}},
|
||||
max_tokens=4096,
|
||||
)
|
||||
response = await json_llm.ainvoke(prompt)
|
||||
return response.content.strip()
|
||||
except Exception as e:
|
||||
logger.error("生成叙事失败: %s", e)
|
||||
if existing_content:
|
||||
return f"{existing_content}\n\n{new_content}"
|
||||
return new_content
|
||||
|
||||
|
||||
class BackgroundTaskRunner:
|
||||
def __init__(self, debounce_seconds: int = 5) -> None:
|
||||
self.debounce_seconds = debounce_seconds
|
||||
self._pending: Dict[str, List[str]] = {}
|
||||
self._timers: Dict[str, object] = {}
|
||||
self.analyzer = ContentAnalyzer()
|
||||
self.generator = MemoirGenerator()
|
||||
|
||||
async def _submit_task(self, user_id: str, segment_ids: List[str]) -> str | None:
|
||||
try:
|
||||
|
||||
675
docs/plans/2026-03-19-story-first-markdown-first-design.md
Normal file
675
docs/plans/2026-03-19-story-first-markdown-first-design.md
Normal file
@@ -0,0 +1,675 @@
|
||||
# Story-First + Markdown-First 重构设计
|
||||
|
||||
> 日期:2026-03-19
|
||||
> 目标:一次性重构 Life Echo 的回忆录生成链路,取消 `section` 作为正文真源,建立 `evidence -> story -> chapter -> delivery` 的新架构。
|
||||
|
||||
## 1. 结论
|
||||
|
||||
本次重构采用以下不可回退的架构决策:
|
||||
|
||||
1. `story` 是创作真源。
|
||||
2. `chapter` 是阅读与导出视图,不再是创作真源。
|
||||
3. `markdown` 是唯一正文真源。
|
||||
4. `memory/RAG` 是 story 与 chapter 的证据底座,不再只是章节生成的辅助能力。
|
||||
5. `agent` 只负责分析、决策、生成,不直接写数据库。
|
||||
6. 本次采用一次性切换,不保留旧运行时兼容路径。
|
||||
|
||||
一句话概括新模型:
|
||||
|
||||
`conversation/segment -> memory evidence -> story markdown -> chapter markdown -> app reading/pdf/export`
|
||||
|
||||
## 2. 目标与非目标
|
||||
|
||||
### 2.1 目标
|
||||
|
||||
- 建立 `story-first` 的长期可演进数据模型。
|
||||
- 建立 `markdown-first` 的统一正文契约,支持移动端阅读优化。
|
||||
- 建立清晰的 multi-agent 编排边界,避免 agent 与持久化耦合。
|
||||
- 建立可追溯的 memory/RAG 证据体系,支撑访谈追问与回忆录写作。
|
||||
- 为未来独立 Web 运营润色端预留版本管理、审计、回滚能力。
|
||||
- 一次性切换到新模型,不留下 `chapter/section` 双真源债务。
|
||||
|
||||
### 2.2 非目标
|
||||
|
||||
- 不引入 Braintrust、Vellum 等外部 AI 平台作为当前架构依赖。
|
||||
- 不做运行时向后兼容。
|
||||
- 不保留 `chapter_sections` 作为正文真源。
|
||||
- 不在本阶段开放用户端手工编辑 markdown。
|
||||
- 不在本阶段引入复杂富文本、HTML 或通用文档编辑器协议。
|
||||
|
||||
## 3. 设计原则
|
||||
|
||||
1. 单一真源:正文只能有一份 canonical markdown。
|
||||
2. 可追溯:story 与 chapter 必须能追到 evidence、版本、生成来源。
|
||||
3. 单向依赖:`evidence -> story -> chapter -> delivery`,禁止反向污染。
|
||||
4. 可替换:LLM runtime、检索策略、渲染实现都应可替换,但内容模型不漂移。
|
||||
5. 可验证:所有 AI 生成结果必须有版本和测试护栏。
|
||||
6. 可运营:未来运营端进入时,不需要重构底层模型。
|
||||
|
||||
## 4. 总体架构
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A["Conversation / Segments"] --> B["Memory Ingest"]
|
||||
B --> C["Evidence Layer"]
|
||||
C --> D["Story Builder"]
|
||||
D --> E["Story Layer"]
|
||||
E --> F["Chapter Composer"]
|
||||
F --> G["Chapter Layer"]
|
||||
G --> H["App Reading"]
|
||||
G --> I["PDF Export"]
|
||||
G --> J["Image Delivery"]
|
||||
```
|
||||
|
||||
系统分四层:
|
||||
|
||||
### 4.1 Evidence Layer
|
||||
|
||||
负责原始素材沉淀、检索、结构化事实抽取与时间线组织。
|
||||
|
||||
核心表:
|
||||
|
||||
- `memory_sources`
|
||||
- `memory_chunks`
|
||||
- `memory_facts`
|
||||
- `timeline_events`
|
||||
- `memory_summaries`
|
||||
|
||||
职责:
|
||||
|
||||
- 存储 transcript、note、draft 等来源
|
||||
- chunk、embedding、FTS、fact extraction、timeline hints
|
||||
- 为 conversation RAG 和 writing RAG 提供 evidence bundle
|
||||
|
||||
### 4.2 Story Layer
|
||||
|
||||
负责“可独立讲述的一段人生故事”的形成、演进、追溯和版本管理。
|
||||
|
||||
核心表:
|
||||
|
||||
- `stories`
|
||||
- `story_versions`
|
||||
- `story_evidence_links`
|
||||
|
||||
职责:
|
||||
|
||||
- 聚合 evidence 成 story
|
||||
- 维护 story 的 canonical markdown
|
||||
- 记录版本链和 AI/编辑来源
|
||||
- 作为 chapter 编排的唯一创作输入
|
||||
|
||||
### 4.3 Chapter Layer
|
||||
|
||||
负责把多个 stories 编排成可阅读、可导出的章节。
|
||||
|
||||
核心表:
|
||||
|
||||
- `chapters`
|
||||
- `chapter_versions`
|
||||
- `chapter_story_links`
|
||||
|
||||
职责:
|
||||
|
||||
- 组织阅读顺序
|
||||
- 生成章节级 markdown
|
||||
- 承担目录、阅读页、导出页的消费语义
|
||||
|
||||
### 4.4 Delivery Layer
|
||||
|
||||
负责消费 chapter,服务移动端阅读、PDF 导出、图片资源、阅读设置。
|
||||
|
||||
核心资源:
|
||||
|
||||
- `books`
|
||||
- `memoir_images` 或后续统一 `assets`
|
||||
- PDF 渲染与导出任务
|
||||
|
||||
## 5. 数据模型
|
||||
|
||||
### 5.1 现有保留表
|
||||
|
||||
保留并升级使用:
|
||||
|
||||
- `conversations`
|
||||
- `segments`
|
||||
- `memory_sources`
|
||||
- `memory_chunks`
|
||||
- `memory_facts`
|
||||
- `timeline_events`
|
||||
- `memory_summaries`
|
||||
- `books`
|
||||
|
||||
### 5.2 新增主表
|
||||
|
||||
#### `stories`
|
||||
|
||||
建议字段:
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
| --- | --- | --- |
|
||||
| `id` | string | 主键 |
|
||||
| `user_id` | string | 用户 ID |
|
||||
| `title` | string | 故事标题 |
|
||||
| `stage` | string | childhood / education / career / family / belief / summary |
|
||||
| `story_type` | string | event / person / relationship / reflection / turning_point |
|
||||
| `summary` | text | 短摘要 |
|
||||
| `canonical_markdown` | text | 当前生效正文 |
|
||||
| `time_start` | string/null | 起始时间,可为 year/month/date 粗粒度 |
|
||||
| `time_end` | string/null | 结束时间 |
|
||||
| `people_refs` | json | 人物引用 |
|
||||
| `place_refs` | json | 地点引用 |
|
||||
| `tag_refs` | json | 标签 |
|
||||
| `status` | string | active / archived / merged / draft |
|
||||
| `confidence` | float | story 聚合置信度 |
|
||||
| `current_version_id` | string | 当前版本 |
|
||||
| `created_at` | datetime | 创建时间 |
|
||||
| `updated_at` | datetime | 更新时间 |
|
||||
|
||||
#### `story_versions`
|
||||
|
||||
建议字段:
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
| --- | --- | --- |
|
||||
| `id` | string | 主键 |
|
||||
| `story_id` | string | 所属 story |
|
||||
| `version_no` | int | 递增版本号 |
|
||||
| `markdown_snapshot` | text | 正文快照 |
|
||||
| `change_summary` | text | 变更摘要 |
|
||||
| `actor_type` | string | ai / user / editor / system |
|
||||
| `source_type` | string | generate / rewrite / merge / manual / migration |
|
||||
| `parent_version_id` | string/null | 父版本 |
|
||||
| `prompt_meta` | json/null | 本次生成的 prompt / model / params 元信息 |
|
||||
| `created_at` | datetime | 创建时间 |
|
||||
|
||||
#### `story_evidence_links`
|
||||
|
||||
建议字段:
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
| --- | --- | --- |
|
||||
| `id` | string | 主键 |
|
||||
| `story_id` | string | story |
|
||||
| `evidence_type` | string | chunk / fact / timeline_event / summary |
|
||||
| `evidence_id` | string | 证据 ID |
|
||||
| `role` | string | primary / supporting / background |
|
||||
| `weight` | float | 证据权重 |
|
||||
| `created_at` | datetime | 创建时间 |
|
||||
|
||||
### 5.3 重定义章节表
|
||||
|
||||
#### `chapters`
|
||||
|
||||
建议字段:
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
| --- | --- | --- |
|
||||
| `id` | string | 主键 |
|
||||
| `user_id` | string | 用户 ID |
|
||||
| `book_id` | string/null | 所属 book |
|
||||
| `title` | string | 章节标题 |
|
||||
| `category` | string | 章节分类 |
|
||||
| `order_index` | int | 排序 |
|
||||
| `summary` | text | 章节摘要 |
|
||||
| `canonical_markdown` | text | 当前生效正文 |
|
||||
| `status` | string | active / draft / archived |
|
||||
| `cover_asset_id` | string/null | 封面资源 |
|
||||
| `current_version_id` | string | 当前版本 |
|
||||
| `created_at` | datetime | 创建时间 |
|
||||
| `updated_at` | datetime | 更新时间 |
|
||||
|
||||
#### `chapter_versions`
|
||||
|
||||
结构与 `story_versions` 类似,保存章节正文版本。
|
||||
|
||||
#### `chapter_story_links`
|
||||
|
||||
建议字段:
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
| --- | --- | --- |
|
||||
| `id` | string | 主键 |
|
||||
| `chapter_id` | string | chapter |
|
||||
| `story_id` | string | story |
|
||||
| `order_index` | int | 在章节中的顺序 |
|
||||
| `role` | string | core / bridge / appendix |
|
||||
| `created_at` | datetime | 创建时间 |
|
||||
|
||||
### 5.4 废弃对象
|
||||
|
||||
上线后退出正文真源角色:
|
||||
|
||||
- `chapter_sections`
|
||||
|
||||
如需保留,仅允许作为短期迁移脚本输入,不进入新运行时。
|
||||
|
||||
## 6. Markdown 契约
|
||||
|
||||
### 6.1 决策
|
||||
|
||||
`markdown` 是唯一正文真源。
|
||||
|
||||
以下对象都必须持有 canonical markdown:
|
||||
|
||||
- `stories.canonical_markdown`
|
||||
- `chapters.canonical_markdown`
|
||||
|
||||
### 6.2 允许语法
|
||||
|
||||
本阶段只允许安全且稳定的受限 markdown 子集:
|
||||
|
||||
- `#`、`##` 标题
|
||||
- 段落
|
||||
- `>` 引用
|
||||
- `---` 分隔线
|
||||
- `**粗体**`
|
||||
- `*斜体*`
|
||||
- 简单无序列表
|
||||
- 图片:``
|
||||
|
||||
### 6.3 禁止语法
|
||||
|
||||
- 任意 HTML
|
||||
- 脚注
|
||||
- 表格
|
||||
- 复杂嵌套列表
|
||||
- 任意外链资源
|
||||
- 自定义脚本或内联样式
|
||||
|
||||
### 6.4 图片规则
|
||||
|
||||
正文只存资源引用,不存真实 URL。
|
||||
|
||||
示例:
|
||||
|
||||
```md
|
||||

|
||||
```
|
||||
|
||||
解析时:
|
||||
|
||||
- app 端根据 `asset_id` 拉取资源
|
||||
- PDF 渲染器根据 `asset_id` 注入图片
|
||||
- 图注默认来自 alt 文本
|
||||
|
||||
## 7. Multi-Agent 设计
|
||||
|
||||
### 7.1 原则
|
||||
|
||||
- orchestrator 决策
|
||||
- specialist agent 产出结构化结果
|
||||
- service/repo 落库
|
||||
- agent 禁止直接访问 ORM/DB
|
||||
|
||||
### 7.2 新编排链路
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
A["ConversationOrchestrator"] --> B["MemoryIngestOrchestrator"]
|
||||
B --> C["ExtractionAgent"]
|
||||
B --> D["Fact/Timeline Enrichment"]
|
||||
D --> E["StoryBuilderOrchestrator"]
|
||||
E --> F["StorySynthesisAgent"]
|
||||
E --> G["StoryMergeAgent"]
|
||||
E --> H["Story Version Write"]
|
||||
H --> I["ChapterComposerOrchestrator"]
|
||||
I --> J["ChapterOutlineAgent"]
|
||||
I --> K["ChapterComposeAgent"]
|
||||
I --> L["Chapter Version Write"]
|
||||
L --> M["Reading / Export / Image"]
|
||||
```
|
||||
|
||||
### 7.3 Orchestrator 职责
|
||||
|
||||
#### `ConversationOrchestrator`
|
||||
|
||||
- 负责在线访谈
|
||||
- 使用 conversation RAG 做追问 grounding
|
||||
- 输出 transcript / segments
|
||||
|
||||
#### `MemoryIngestOrchestrator`
|
||||
|
||||
- transcript 写入 `memory_sources`
|
||||
- 切块写入 `memory_chunks`
|
||||
- 触发 embedding / FTS / fact / timeline enrichment
|
||||
|
||||
#### `StoryBuilderOrchestrator`
|
||||
|
||||
- 判断新增 story、补充现有 story、合并重复 story
|
||||
- 组织 evidence bundle
|
||||
- 生成或更新 story markdown
|
||||
|
||||
#### `ChapterComposerOrchestrator`
|
||||
|
||||
- 读取 stories
|
||||
- 生成章节大纲和章节 markdown
|
||||
- 维护 `chapter_story_links`
|
||||
|
||||
#### `ImageOrchestrator`
|
||||
|
||||
- 从 story 或 chapter 上下文生成图片 prompt
|
||||
- 只返回 asset 结果,不修改正文真源
|
||||
|
||||
### 7.4 Specialist Agents
|
||||
|
||||
建议保留这些 specialist:
|
||||
|
||||
- `ProfileAgent`
|
||||
- `InterviewAgent`
|
||||
- `ExtractionAgent`
|
||||
- `StorySynthesisAgent`
|
||||
- `StoryMergeAgent`
|
||||
- `ChapterOutlineAgent`
|
||||
- `ChapterComposeAgent`
|
||||
- `PromptGenerationAgent`
|
||||
|
||||
## 8. RAG 设计
|
||||
|
||||
### 8.1 两条读路径
|
||||
|
||||
#### `conversation RAG`
|
||||
|
||||
用途:
|
||||
|
||||
- 帮助访谈更具体
|
||||
- 避免重复追问
|
||||
- 把已讲过的人、地点、事件重新接回来
|
||||
|
||||
读取对象:
|
||||
|
||||
- `stories`
|
||||
- `memory_facts`
|
||||
- `timeline_events`
|
||||
- 必要时回到 `memory_chunks`
|
||||
|
||||
#### `writing RAG`
|
||||
|
||||
用途:
|
||||
|
||||
- 支撑 story 生成
|
||||
- 支撑 chapter 编排
|
||||
- 约束忠实度与证据引用
|
||||
|
||||
读取对象:
|
||||
|
||||
- `memory_chunks`
|
||||
- `memory_facts`
|
||||
- `timeline_events`
|
||||
- `memory_summaries`
|
||||
- `stories`
|
||||
|
||||
### 8.2 检索策略
|
||||
|
||||
Retriever 采用混合检索:
|
||||
|
||||
1. metadata filter
|
||||
2. FTS
|
||||
3. vector retrieval
|
||||
4. score fusion
|
||||
5. token budget 裁剪
|
||||
|
||||
输出统一 evidence bundle:
|
||||
|
||||
```json
|
||||
{
|
||||
"relevant_chunks": [],
|
||||
"relevant_summaries": [],
|
||||
"relevant_facts": [],
|
||||
"timeline_hints": [],
|
||||
"relevant_stories": []
|
||||
}
|
||||
```
|
||||
|
||||
### 8.3 重要边界
|
||||
|
||||
- RAG 只读 evidence / story,不直接改正文。
|
||||
- story/chapter 生成结果必须落到 versioned markdown。
|
||||
- 事实层和正文层分离,避免“结构化事实污染叙事表达”。
|
||||
|
||||
## 9. API 契约
|
||||
|
||||
### 9.1 App 端主接口
|
||||
|
||||
#### `GET /api/chapters`
|
||||
|
||||
返回:
|
||||
|
||||
- `id`
|
||||
- `title`
|
||||
- `category`
|
||||
- `order_index`
|
||||
- `summary`
|
||||
- `cover_asset`
|
||||
- `updated_at`
|
||||
|
||||
不返回 `sections`。
|
||||
|
||||
#### `GET /api/chapters/:id`
|
||||
|
||||
返回:
|
||||
|
||||
- `id`
|
||||
- `title`
|
||||
- `category`
|
||||
- `canonical_markdown`
|
||||
- `rendered_assets`
|
||||
- `reading_meta`
|
||||
- `updated_at`
|
||||
|
||||
说明:
|
||||
|
||||
- `canonical_markdown` 是正文真源
|
||||
- `rendered_assets` 是图片等资源映射
|
||||
- `reading_meta` 可包含估算阅读时长、目录信息、是否有未读更新等
|
||||
|
||||
#### `GET /api/books/current`
|
||||
|
||||
继续服务目录与导出流程,但正文消费只认 chapter markdown。
|
||||
|
||||
### 9.2 未来运营端接口
|
||||
|
||||
预留,不进入当前 app 主流程:
|
||||
|
||||
- `GET /api/stories/:id`
|
||||
- `GET /api/stories/:id/versions`
|
||||
- `POST /api/stories/:id/rewrite`
|
||||
- `POST /api/chapters/:id/recompose`
|
||||
|
||||
## 10. 阅读页设计约束
|
||||
|
||||
### 10.1 前端读取模型
|
||||
|
||||
`app-expo` 章节页从 `chapter.canonical_markdown` 渲染,不再从 `chapter.sections` 渲染。
|
||||
|
||||
### 10.2 渲染层
|
||||
|
||||
客户端可将 markdown 转换为受控 AST,再映射到原生阅读组件:
|
||||
|
||||
- Heading
|
||||
- Paragraph
|
||||
- Quote
|
||||
- Divider
|
||||
- Image
|
||||
- Caption
|
||||
|
||||
### 10.3 渲染缓存
|
||||
|
||||
允许生成 render cache,例如:
|
||||
|
||||
- 目录块
|
||||
- 首屏块
|
||||
- 阅读进度锚点
|
||||
- 图片资源索引
|
||||
|
||||
但 render cache 不是正文真源。
|
||||
|
||||
## 11. LLM Runtime 边界
|
||||
|
||||
### 11.1 决策
|
||||
|
||||
LangChain 可以保留,但只能留在 adapter/runtime 层。
|
||||
|
||||
禁止:
|
||||
|
||||
- agent 直接拿 `langchain_llm`
|
||||
- 业务逻辑耦合 LangChain message 类型
|
||||
|
||||
### 11.2 建议接口
|
||||
|
||||
统一抽象 `LLMRuntime`:
|
||||
|
||||
- `generate_text()`
|
||||
- `generate_json()`
|
||||
- `stream_text()`
|
||||
- `embed_texts()`
|
||||
|
||||
底层实现可以继续包 LangChain / OpenAI-compatible provider,但对 agent 层隐藏实现细节。
|
||||
|
||||
## 12. 错误处理
|
||||
|
||||
错误按四层处理:
|
||||
|
||||
### 12.1 `evidence failure`
|
||||
|
||||
- transcript 保留
|
||||
- 允许后续重试 enrichment
|
||||
- 不阻塞会话结束
|
||||
|
||||
### 12.2 `story synthesis failure`
|
||||
|
||||
- 不覆盖当前 story 版本
|
||||
- 保留 evidence
|
||||
- 标记待重试任务
|
||||
|
||||
### 12.3 `chapter compose failure`
|
||||
|
||||
- 不影响已有章节阅读
|
||||
- 不污染已发布章节版本
|
||||
|
||||
### 12.4 `render failure`
|
||||
|
||||
- app 回退纯文本模式
|
||||
- PDF 导出回退安全渲染
|
||||
|
||||
强约束:
|
||||
|
||||
- 任何 AI 生成只新增 version
|
||||
- 禁止原地覆盖当前生效版本
|
||||
|
||||
## 13. 测试策略
|
||||
|
||||
### 13.1 Golden Tests
|
||||
|
||||
- transcript -> story markdown
|
||||
- story set -> chapter markdown
|
||||
- chapter markdown -> PDF snapshot
|
||||
|
||||
### 13.2 Parser / Renderer Tests
|
||||
|
||||
- markdown AST 解析
|
||||
- app-expo 阅读渲染
|
||||
- PDF 渲染一致性
|
||||
- 图片 asset 引用解析
|
||||
|
||||
### 13.3 Migration Tests
|
||||
|
||||
- 旧 `chapter/section` -> 新 `story/chapter markdown`
|
||||
- 数据量校验
|
||||
- 版本链校验
|
||||
- 资源引用校验
|
||||
|
||||
### 13.4 Contract Tests
|
||||
|
||||
- API 不再返回 `sections` 真源
|
||||
- chapter detail 必须返回 `canonical_markdown`
|
||||
- orchestrator 输出结构必须稳定
|
||||
|
||||
## 14. Cutover 方案
|
||||
|
||||
本次采用一次性切换,不保留旧运行时兼容。
|
||||
|
||||
### 14.1 切换顺序
|
||||
|
||||
1. 冻结旧 `chapter/section` 体系的新功能开发。
|
||||
2. 完成新 schema migration。
|
||||
3. 完成历史数据迁移:
|
||||
- 旧 transcript/sections -> evidence
|
||||
- 旧章节正文 -> story markdown / chapter markdown
|
||||
- 旧图片 -> asset 引用
|
||||
4. 切换后端:
|
||||
- 新 repo
|
||||
- 新 service
|
||||
- 新 orchestrator
|
||||
- 新 API DTO
|
||||
5. 切换 `app-expo` 阅读页到 markdown renderer。
|
||||
6. 切换 PDF 导出到 markdown-based rendering。
|
||||
7. 运行 verifier:
|
||||
- schema check
|
||||
- row count / relation check
|
||||
- markdown parse check
|
||||
- asset resolution check
|
||||
- golden tests
|
||||
8. 发布。
|
||||
9. 发布后封禁旧表写入,删除旧代码路径。
|
||||
|
||||
### 14.2 上线后状态
|
||||
|
||||
上线后系统内只存在一套真相:
|
||||
|
||||
- evidence 真相:memory tables
|
||||
- 正文真相:story/chapter canonical markdown + versions
|
||||
|
||||
不再存在:
|
||||
|
||||
- 旧 `section` 正文真源
|
||||
- 旧 chapter-only 创作链路
|
||||
- 旧 agent 直连 LangChain runtime 的模式
|
||||
|
||||
## 15. 实施清单
|
||||
|
||||
### 15.1 数据层
|
||||
|
||||
- 新增 `stories`
|
||||
- 新增 `story_versions`
|
||||
- 新增 `story_evidence_links`
|
||||
- 重定义 `chapters`
|
||||
- 新增 `chapter_versions`
|
||||
- 新增 `chapter_story_links`
|
||||
- 废弃 `chapter_sections`
|
||||
|
||||
### 15.2 服务层
|
||||
|
||||
- 完成 `MemoryService.ingest_transcript`
|
||||
- 完成 `HybridRetriever.retrieve`
|
||||
- 新增 `StoryService`
|
||||
- 重写 `MemoirService`
|
||||
|
||||
### 15.3 Agent 层
|
||||
|
||||
- 重写 story/chapter 编排链
|
||||
- 统一 orchestrator 输出结构
|
||||
- agent 禁止直接落库
|
||||
|
||||
### 15.4 前端层
|
||||
|
||||
- 章节页切换到 markdown 渲染
|
||||
- 目录页继续按 chapter 展示
|
||||
- 图片按 asset 引用解析
|
||||
|
||||
### 15.5 导出层
|
||||
|
||||
- PDF 从 chapter markdown 渲染
|
||||
- 与 app 阅读风格保持基本一致
|
||||
|
||||
## 16. 最终判断
|
||||
|
||||
这次重构的核心不是“把章节页改好看”,而是把回忆录系统的内容真源彻底改正确。
|
||||
|
||||
正确的长期抽象应当是:
|
||||
|
||||
- evidence 是事实与来源底座
|
||||
- story 是创作真源
|
||||
- chapter 是阅读视图
|
||||
- markdown 是正文真源
|
||||
|
||||
只要这四个点不再动摇,后续无论是运营端润色、自动重编排、版本审计、在线 RAG、PDF 导出,都会建立在稳定架构上,而不是继续给 `section-first` 体系补洞。
|
||||
Reference in New Issue
Block a user