2026-03-19 10:38:11 +08:00
|
|
|
|
"""
|
|
|
|
|
|
回忆录整理 Agent:基于传记结构,将口语改写为书面语,归类到章节
|
|
|
|
|
|
支持异步调用
|
|
|
|
|
|
"""
|
2026-03-19 14:36:14 +08:00
|
|
|
|
|
2026-03-19 10:38:11 +08:00
|
|
|
|
import json
|
|
|
|
|
|
from typing import Dict, List, Optional
|
|
|
|
|
|
|
|
|
|
|
|
from app.core.dependencies import get_llm_provider
|
|
|
|
|
|
from app.core.logging import get_logger
|
|
|
|
|
|
|
2026-03-19 10:54:48 +08:00
|
|
|
|
from app.agents.memoir.prompts import (
|
|
|
|
|
|
CHAPTER_CATEGORIES,
|
|
|
|
|
|
STAGE_TO_ORDER,
|
2026-03-19 10:38:11 +08:00
|
|
|
|
get_chapter_classification_prompt,
|
|
|
|
|
|
get_text_rewrite_prompt,
|
|
|
|
|
|
inject_image_placeholder_template,
|
|
|
|
|
|
)
|
2026-03-19 11:27:43 +08:00
|
|
|
|
from app.features.memoir.memoir_images.json_payload import extract_json_payload
|
2026-03-19 10:38:11 +08:00
|
|
|
|
|
|
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _get_langchain_llm():
|
|
|
|
|
|
try:
|
|
|
|
|
|
provider = get_llm_provider()
|
|
|
|
|
|
return getattr(provider, "langchain_llm", None)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class MemoryAgent:
|
|
|
|
|
|
"""回忆录整理 Agent(支持异步)"""
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
|
|
self.llm = _get_langchain_llm()
|
|
|
|
|
|
|
|
|
|
|
|
async def classify_chapter(self, segments_text: str) -> str:
|
|
|
|
|
|
if not self.llm:
|
|
|
|
|
|
return "childhood"
|
|
|
|
|
|
try:
|
|
|
|
|
|
prompt = get_chapter_classification_prompt(segments_text)
|
|
|
|
|
|
response = await self.llm.ainvoke(prompt)
|
2026-03-19 14:36:14 +08:00
|
|
|
|
content = (
|
|
|
|
|
|
response.content if hasattr(response, "content") else str(response)
|
|
|
|
|
|
)
|
2026-03-19 10:38:11 +08:00
|
|
|
|
category = content.strip().lower()
|
|
|
|
|
|
if category in CHAPTER_CATEGORIES:
|
|
|
|
|
|
return category
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error("分类章节失败: %s", e)
|
|
|
|
|
|
return "childhood"
|
|
|
|
|
|
|
|
|
|
|
|
async def rewrite_to_literary(
|
|
|
|
|
|
self,
|
|
|
|
|
|
segments_text: str,
|
|
|
|
|
|
chapter_category: str,
|
|
|
|
|
|
existing_content: Optional[str] = None,
|
|
|
|
|
|
) -> Dict:
|
|
|
|
|
|
if not self.llm:
|
|
|
|
|
|
return {
|
|
|
|
|
|
"title": CHAPTER_CATEGORIES.get(chapter_category, "章节"),
|
|
|
|
|
|
"content": segments_text,
|
|
|
|
|
|
"summary": "",
|
|
|
|
|
|
"image_suggestions": [],
|
|
|
|
|
|
}
|
|
|
|
|
|
try:
|
|
|
|
|
|
prompt = get_text_rewrite_prompt(
|
|
|
|
|
|
segments_text, chapter_category, existing_content or ""
|
|
|
|
|
|
)
|
2026-03-19 11:27:43 +08:00
|
|
|
|
json_llm = self.llm.bind(
|
|
|
|
|
|
model_kwargs={"response_format": {"type": "json_object"}},
|
|
|
|
|
|
max_tokens=4096,
|
|
|
|
|
|
)
|
|
|
|
|
|
response = await json_llm.ainvoke(prompt)
|
2026-03-19 14:36:14 +08:00
|
|
|
|
content = (
|
|
|
|
|
|
response.content if hasattr(response, "content") else str(response)
|
|
|
|
|
|
)
|
2026-03-19 10:38:11 +08:00
|
|
|
|
content = content.strip()
|
2026-03-19 11:27:43 +08:00
|
|
|
|
result = json.loads(extract_json_payload(content))
|
2026-03-19 10:38:11 +08:00
|
|
|
|
result["content"] = inject_image_placeholder_template(
|
|
|
|
|
|
result.get("content") or ""
|
|
|
|
|
|
)
|
|
|
|
|
|
return result
|
|
|
|
|
|
except json.JSONDecodeError:
|
|
|
|
|
|
raw = response.content if hasattr(response, "content") else str(response)
|
|
|
|
|
|
return {
|
|
|
|
|
|
"title": CHAPTER_CATEGORIES.get(chapter_category, "章节"),
|
|
|
|
|
|
"content": inject_image_placeholder_template(raw),
|
|
|
|
|
|
"summary": "",
|
|
|
|
|
|
"image_suggestions": [],
|
|
|
|
|
|
}
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error("改写文本失败: %s", e)
|
|
|
|
|
|
return {
|
|
|
|
|
|
"title": CHAPTER_CATEGORIES.get(chapter_category, "章节"),
|
|
|
|
|
|
"content": segments_text,
|
|
|
|
|
|
"summary": "",
|
|
|
|
|
|
"image_suggestions": [],
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
async def process_segments(
|
|
|
|
|
|
self,
|
|
|
|
|
|
segments: List[Dict],
|
|
|
|
|
|
existing_chapters: Optional[Dict[str, Dict]] = None,
|
|
|
|
|
|
) -> Dict[str, Dict]:
|
|
|
|
|
|
if existing_chapters is None:
|
|
|
|
|
|
existing_chapters = {}
|
|
|
|
|
|
segments_by_category: Dict[str, List[str]] = {}
|
|
|
|
|
|
for segment in segments:
|
|
|
|
|
|
text = segment.get("transcript_text", "")
|
|
|
|
|
|
if not text:
|
|
|
|
|
|
continue
|
|
|
|
|
|
category = await self.classify_chapter(text)
|
|
|
|
|
|
if category not in segments_by_category:
|
|
|
|
|
|
segments_by_category[category] = []
|
|
|
|
|
|
segments_by_category[category].append(text)
|
|
|
|
|
|
updated_chapters = existing_chapters.copy()
|
|
|
|
|
|
for category, texts in segments_by_category.items():
|
|
|
|
|
|
combined_text = "\n\n".join(texts)
|
|
|
|
|
|
existing_content = existing_chapters.get(category, {}).get("content", "")
|
|
|
|
|
|
result = await self.rewrite_to_literary(
|
|
|
|
|
|
combined_text, category, existing_content
|
|
|
|
|
|
)
|
|
|
|
|
|
updated_chapters[category] = {
|
|
|
|
|
|
"title": result.get("title", CHAPTER_CATEGORIES.get(category, "章节")),
|
|
|
|
|
|
"content": result.get("content", ""),
|
|
|
|
|
|
"summary": result.get("summary", ""),
|
|
|
|
|
|
"image_suggestions": result.get("image_suggestions", []),
|
|
|
|
|
|
"category": category,
|
|
|
|
|
|
"order_index": STAGE_TO_ORDER.get(category, 999),
|
|
|
|
|
|
}
|
|
|
|
|
|
return updated_chapters
|