fix/various fixes

This commit is contained in:
Kevin
2026-03-20 15:15:35 +08:00
parent 7f57f96c25
commit 7317bf10cd
112 changed files with 3790 additions and 2242 deletions

View File

@@ -3,10 +3,11 @@
"""
import json
from app.core.logging import get_logger
import uuid
from io import BytesIO
from typing import Dict, List
from typing import Dict, List, Set
from datetime import datetime, timezone
import redis
@@ -20,24 +21,17 @@ from app.features.conversation.models import Segment
from app.features.memoir.models import (
Book,
Chapter,
ChapterSection,
MemoirImage,
MemoirState,
)
from app.features.user.models import User
from app.core.dependencies import get_llm_provider
from app.agents.state_schema import MemoirStateSchema, SlotData, default_state
from app.agents.memoir.prompts import (
STAGE_TO_ORDER,
get_narrative_json_prompt,
)
from app.agents.memoir import MemoirOrchestrator
from app.agents.memoir.narrative_agent import NarrativeAgent
from app.agents.chat.prompts_profile import format_user_profile_context
from app.features.memoir.memoir_images.parser import (
build_initial_image_assets,
parse_image_placeholders,
parse_narrative_to_sections,
)
import hashlib
from app.core.dependencies import get_image_generator
@@ -60,19 +54,18 @@ from app.features.memoir.memoir_images.storage import (
TencentCosStorageService,
CosUploadError,
)
from app.features.memoir.cover_eligibility import (
chapter_needs_cover_enqueue,
cover_memoir_image_pending_or_failed,
)
from app.features.memoir.story_pipeline_sync import (
run_story_pipeline_for_category_batch,
)
logger = get_logger(__name__)
_REDIS_CLIENTS: dict[bool, redis.Redis] = {}
def _is_json_narrative(text: str) -> bool:
"""检测 narrative 是否为 JSON 格式paragraphs 结构)"""
if not text or not text.strip():
return False
s = text.strip()
return s.startswith("{") and "paragraphs" in s
def _get_llm():
"""Celery 任务内获取 LangChain LLM通过 port"""
try:
@@ -206,7 +199,6 @@ def chapter_has_images_to_generate(images: list[dict] | None) -> bool:
def _memoir_image_from_asset(
chapter_id: str,
section_id: str | None,
order_index: int,
image_asset: dict,
) -> MemoirImage:
@@ -215,59 +207,11 @@ def _memoir_image_from_asset(
return MemoirImage(
id=str(uuid.uuid4()).replace("-", "")[:32],
chapter_id=chapter_id,
section_id=section_id,
order_index=order_index,
**kwargs,
)
def _section_has_image_to_generate(section) -> bool:
"""章节段落是否有待生成的配图(从 image_record / image_id 关联的 memoir_images 读取)。"""
r = getattr(section, "image_record", None)
if not r:
return False
status = (getattr(r, "status") or "").strip()
return status in (IMAGE_STATUS_PENDING, IMAGE_STATUS_FAILED)
def _chapter_has_any_section_images_to_generate(chapter) -> bool:
if not chapter or not getattr(chapter, "sections", None):
return False
return any(_section_has_image_to_generate(s) for s in chapter.sections)
def _chapter_has_cover_to_generate(chapter) -> bool:
"""章节是否有待生成的封面图MemoirImage section_id=None 且 status 为 pending/failed"""
images = getattr(chapter, "images", None) or []
for m in images:
if getattr(m, "section_id", None) is None:
status = (getattr(m, "status") or "").strip()
return status in (IMAGE_STATUS_PENDING, IMAGE_STATUS_FAILED)
return False
def _chapter_needs_cover_enqueue(chapter) -> bool:
"""尚无 cover_asset 且章节有正文时,可派发 generate_chapter_cover。"""
if not chapter:
return False
if getattr(chapter, "cover_asset_id", None):
return False
md = (getattr(chapter, "canonical_markdown", None) or "").strip()
if md:
return True
sections = getattr(chapter, "sections", None) or []
return any((getattr(s, "content", None) or "").strip() for s in sections)
def _get_cover_memoir_image(chapter):
"""获取章节封面 MemoirImagesection_id=None若无可生成则返回 None。"""
images = getattr(chapter, "images", None) or []
for m in images:
if getattr(m, "section_id", None) is None:
return m
return None
def _select_placeholders_for_effective_max(
placeholders: list[dict],
existing_images: list[dict] | None,
@@ -296,126 +240,8 @@ def _select_placeholders_for_effective_max(
return [{**item, "index": index} for index, item in enumerate(selected)]
def _save_narrative_to_sections(
db: Session,
chapter,
narrative: str,
title: str,
category: str,
order_index: int,
source_segments: list,
user_id: str,
):
"""
将 narrative 拆成 chapter_sections 并写入(段落不配 MemoirImage
已有 section 不删除,仅追加新内容。章节封面由 generate_chapter_cover + cover_asset_id 闭环处理。
chapter 可为已有章节或 None会新建。返回 chapter。
"""
if chapter is None:
chapter = Chapter(
id=str(uuid.uuid4()),
user_id=user_id,
title=title,
order_index=order_index,
status="completed",
category=category,
cover_image=None,
is_new=True,
source_segments=source_segments or [],
)
db.add(chapter)
db.flush()
# 已有 sections 不删除,只追加新内容
existing_sections = (
db.execute(
select(ChapterSection)
.where(ChapterSection.chapter_id == chapter.id)
.order_by(ChapterSection.order_index)
)
.scalars()
.all()
)
if existing_sections:
existing_content = "\n\n".join(
(s.content or "").strip()
for s in existing_sections
if (s.content or "").strip()
)
if existing_content and narrative.startswith(existing_content):
new_part = narrative[len(existing_content) :].lstrip()
else:
new_part = (narrative or "").strip()
if not new_part:
chapter.title = title
chapter.is_new = True
chapter.source_segments = list(
set((chapter.source_segments or []) + (source_segments or []))
)
from app.features.memoir.repo import (
ensure_chapter_markdown_and_version_sync,
)
ensure_chapter_markdown_and_version_sync(db, chapter, narrative)
return chapter
narrative_to_parse = new_part
order_base = max(s.order_index for s in existing_sections) + 1
else:
narrative_to_parse = (narrative or "").strip()
order_base = 0
segments = parse_narrative_to_sections(narrative_to_parse)
if not segments:
sec = ChapterSection(
id=str(uuid.uuid4()),
chapter_id=chapter.id,
order_index=order_base,
content=(narrative_to_parse or "").strip() or "",
image_id=None,
)
db.add(sec)
db.flush()
chapter.title = title
chapter.is_new = True
chapter.source_segments = list(
set((chapter.source_segments or []) + (source_segments or []))
)
from app.features.memoir.repo import ensure_chapter_markdown_and_version_sync
ensure_chapter_markdown_and_version_sync(db, chapter, narrative)
return chapter
# 段落不再绑定配图(每故事/章节结构化出图);仅章节封面走 MemoirImage
for i, seg in enumerate(segments):
order_idx = order_base + i
content = (seg.get("content") or "").strip()
sec = ChapterSection(
id=str(uuid.uuid4()),
chapter_id=chapter.id,
order_index=order_idx,
content=content,
image_id=None,
)
db.add(sec)
db.flush()
db.flush()
chapter.title = title
chapter.is_new = True
chapter.source_segments = list(
set((chapter.source_segments or []) + (source_segments or []))
)
# 确保 canonical_markdown 与版本链agent 产出由 repo 落库)
from app.features.memoir.repo import ensure_chapter_markdown_and_version_sync
ensure_chapter_markdown_and_version_sync(db, chapter, narrative)
return chapter
def initialize_chapter_images(_chapter):
"""
兼容旧调用:若章节已改为 sections 存储,则图片初始化已在 _save_narrative_to_sections 中完成,直接返回。
"""
"""兼容旧调用:封面由 generate_chapter_cover 处理。"""
logger.info("initialize_chapter_images: 封面由 generate_chapter_cover 处理,跳过")
return []
@@ -563,12 +389,7 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]):
occupation=user_obj.occupation,
)
narrative_agent = NarrativeAgent()
chapter_composer = __import__(
"app.agents.memoir.chapter_composer_orchestrator",
fromlist=["ChapterComposerOrchestrator"],
).ChapterComposerOrchestrator()
from app.features.memory.repo import retrieve_evidence_sync
story_dispatch_ids: Set[str] = set()
def _process_category(
chapter_category: str,
@@ -578,133 +399,24 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]):
birth_year,
llm,
):
"""单章节处理ChapterComposerOrchestrator 生成 markdown或 NarrativeAgent 回退repo 落库"""
segment_texts = [seg.transcript_text or "" for seg in category_segments]
combined_text = "\n\n".join(segment_texts)
source_ids = [seg.id for seg in category_segments]
# 证据检索writing RAG
try:
evidence = retrieve_evidence_sync(
db, user_id, combined_text, top_k=10
)
except Exception as e:
logger.warning("Evidence 检索跳过: %s", e)
evidence = {
"relevant_chunks": [],
"relevant_summaries": [],
"relevant_facts": [],
"timeline_hints": [],
"relevant_stories": [],
}
stmt_chapter = (
select(Chapter)
.where(
Chapter.user_id == user_id,
Chapter.category == chapter_category,
Chapter.is_active == True,
)
.options(
joinedload(Chapter.sections).joinedload(
ChapterSection.image_record
),
joinedload(Chapter.images),
)
)
result_chapter = db.execute(stmt_chapter)
chapter = result_chapter.unique().scalar_one_or_none()
slot_snippets = {}
stage_slots = state.slots.get(chapter_category, {}) or {}
for key, value in stage_slots.items():
snip = getattr(value, "snippet", None) or (
value.get("snippet") if isinstance(value, dict) else None
)
if snip:
slot_snippets[key] = snip
title = chapter.title if chapter else f"{chapter_category} 回忆"
existing_markdown = ""
if chapter:
existing_markdown = (
getattr(chapter, "canonical_markdown", None) or ""
)
if not existing_markdown and getattr(chapter, "sections", None):
existing_markdown = "\n\n".join(
s.content
for s in sorted(
chapter.sections, key=lambda x: x.order_index
)
if (s.content or "").strip()
)
if not chapter:
title = narrative_agent.generate_title(
stage=chapter_category,
emotion="neutral",
slots=slot_snippets,
user_profile=profile,
birth_year=birth_year,
llm=llm,
)
# ChapterComposerOrchestrator 产出 markdownagent 不落库)
narrative = chapter_composer.compose_chapter_markdown(
title=title,
category=chapter_category,
evidence=evidence,
existing_markdown=existing_markdown,
"""stories-first路由 + 写 story物化 chapter。"""
nonlocal story_dispatch_ids
chapter, needs_cover, disp = run_story_pipeline_for_category_batch(
db,
user_id=user_id,
chapter_category=chapter_category,
category_segments=category_segments,
state=state,
user_profile=profile,
birth_year=birth_year,
user_birth_year=birth_year,
llm=llm,
)
if not narrative or not narrative.strip():
new_narrative = narrative_agent.generate_narrative(
stage=chapter_category,
slots=slot_snippets,
new_content=combined_text,
existing_content=existing_markdown,
user_profile=profile,
birth_year=birth_year,
llm=llm,
)
if _is_json_narrative(new_narrative):
narrative = new_narrative
elif existing_markdown:
narrative = f"{existing_markdown}\n\n{new_narrative}"
else:
narrative = new_narrative
if (
existing_markdown
and not _is_json_narrative(narrative)
and len(narrative) < len(existing_markdown) * 0.8
):
logger.warning(
"内容长度异常: existing=%d, new=%d, category=%s. 回退为追加模式",
len(existing_markdown),
len(narrative),
chapter_category,
)
narrative = f"{existing_markdown}\n\n{combined_text}"
calculated_order_index = STAGE_TO_ORDER.get(chapter_category, 999)
chapter = _save_narrative_to_sections(
db,
chapter,
narrative,
title=title,
category=chapter_category,
order_index=calculated_order_index,
source_segments=source_ids,
user_id=user_id,
)
story_dispatch_ids |= disp
db.flush()
db.refresh(chapter)
needs_cover_enqueue = (
image_settings.enabled and _chapter_needs_cover_enqueue(chapter)
image_settings.enabled and chapter_needs_cover_enqueue(chapter)
)
stmt_book = (
@@ -754,16 +466,26 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]):
db.commit()
from app.tasks.chapter_cover_tasks import generate_chapter_cover
from app.tasks.chapter_compose_tasks import recompose_chapters_for_story
from app.tasks.story_image_tasks import generate_story_image
for sid in story_dispatch_ids:
try:
generate_story_image.delay(sid)
except Exception as exc:
logger.warning("generate_story_image delay: %s", exc)
try:
recompose_chapters_for_story.delay(sid)
except Exception as exc:
logger.warning("recompose_chapters_for_story delay: %s", exc)
from app.tasks.chapter_cover_enqueue import (
try_enqueue_generate_chapter_cover,
)
for chapter_id in sorted(chapters_to_enqueue):
try:
if try_enqueue_generate_chapter_cover(chapter_id, source="pipeline"):
logger.info(f"派发章节封面任务: chapter={chapter_id}")
generate_chapter_cover.delay(chapter_id)
except Exception as exc:
logger.warning(
f"章节封面任务派发失败: chapter={chapter_id}, error={exc}"
)
logger.info(f"回忆录处理完成: user_id={user_id}, task_id={task_id}")
@@ -799,93 +521,61 @@ def generate_chapter_content(self, user_id: str, stage: str, new_content: str):
try:
with get_sync_db() as db:
llm = _get_llm()
# 查找 active 章节并预加载 sections
stmt = (
select(Chapter)
.where(
Chapter.user_id == user_id,
Chapter.category == stage,
Chapter.is_active == True,
)
.options(joinedload(Chapter.sections))
)
result = db.execute(stmt)
chapter = result.unique().scalar_one_or_none()
existing_content = ""
if chapter and getattr(chapter, "sections", None):
existing_content = "\n\n".join(
s.content
for s in sorted(chapter.sections, key=lambda x: x.order_index)
if (s.content or "").strip()
user_obj = db.get(User, user_id)
user_profile = ""
user_birth_year = None
if user_obj:
user_birth_year = user_obj.birth_year
user_profile = format_user_profile_context(
birth_year=user_obj.birth_year,
birth_place=user_obj.birth_place,
grew_up_place=user_obj.grew_up_place,
occupation=user_obj.occupation,
)
if llm:
prompt = get_narrative_json_prompt(
stage=stage,
slots={},
new_content=new_content,
existing_content=existing_content,
)
json_llm = llm.bind(
model_kwargs={"response_format": {"type": "json_object"}},
max_tokens=4096,
)
response = json_llm.invoke(prompt)
new_narrative = response.content.strip()
if _is_json_narrative(new_narrative):
narrative = new_narrative
elif existing_content:
narrative = f"{existing_content}\n\n{new_narrative}"
else:
narrative = new_narrative
else:
narrative = (
f"{existing_content}\n\n{new_content}"
if existing_content
else new_content
)
class _Seg:
def __init__(self, text: str):
self.id = str(uuid.uuid4())
self.transcript_text = text
# 安全检查:新内容不应比旧内容短(仅非 JSON 格式)
if (
existing_content
and not _is_json_narrative(narrative)
and len(narrative) < len(existing_content) * 0.8
):
logger.warning(
f"内容长度异常: existing={len(existing_content)}, "
f"new={len(narrative)}, stage={stage}. 回退为追加模式"
)
narrative = f"{existing_content}\n\n{new_content}"
calculated_order_index = STAGE_TO_ORDER.get(stage, 999)
title = chapter.title if chapter else f"{stage} 回忆"
chapter = _save_narrative_to_sections(
state = _get_or_create_state_sync(user_id, db)
chapter, _, dispatch_ids = run_story_pipeline_for_category_batch(
db,
chapter,
narrative,
title=title,
category=stage,
order_index=calculated_order_index,
source_segments=[],
user_id=user_id,
chapter_category=stage,
category_segments=[_Seg(new_content)],
state=state,
user_profile=user_profile,
user_birth_year=user_birth_year,
llm=llm,
)
db.commit()
db.refresh(chapter)
from app.tasks.chapter_compose_tasks import recompose_chapters_for_story
from app.tasks.story_image_tasks import generate_story_image
for sid in dispatch_ids:
try:
generate_story_image.delay(sid)
except Exception as exc:
logger.warning("generate_story_image delay: %s", exc)
try:
recompose_chapters_for_story.delay(sid)
except Exception as exc:
logger.warning("recompose_chapters_for_story delay: %s", exc)
image_settings = MemoirImageSettings.from_env()
if (
image_settings.enabled
and chapter
and _chapter_needs_cover_enqueue(chapter)
and chapter_needs_cover_enqueue(chapter)
):
from app.tasks.chapter_cover_tasks import generate_chapter_cover
from app.tasks.chapter_cover_enqueue import (
try_enqueue_generate_chapter_cover,
)
try:
generate_chapter_cover.delay(chapter.id)
except Exception as exc:
logger.warning(
"章节封面任务派发失败: chapter=%s, error=%s", chapter.id, exc
)
try_enqueue_generate_chapter_cover(chapter.id, source="pipeline")
return {"status": "success"}
except Exception as e:
@@ -901,7 +591,7 @@ def build_cos_key(user_id: str, chapter_id: str, index: int | str, prompt: str)
@shared_task(bind=True, max_retries=3, default_retry_delay=30)
def generate_chapter_images(self, chapter_id: str):
"""异步补图:处理封面 MemoirImage 与历史遗留的段落配图pending/failed"""
"""异步补图:处理章节级 MemoirImagepending/failed正文配图走 story_image_tasks。"""
lock_acquired = False
provider = None
with get_sync_db() as db:
@@ -909,34 +599,16 @@ def generate_chapter_images(self, chapter_id: str):
stmt = (
select(Chapter)
.where(Chapter.id == chapter_id)
.options(
joinedload(Chapter.sections).joinedload(
ChapterSection.image_record
),
joinedload(Chapter.images),
)
.options(joinedload(Chapter.images))
)
chapter = db.execute(stmt).unique().scalar_one_or_none()
if not chapter:
logger.info("章节补图跳过: chapter=%s, reason=not_found", chapter_id)
return {"status": "no_chapter"}
sections = getattr(chapter, "sections", None) or []
sections_with_pending = [
(idx, s)
for idx, s in enumerate(sections)
if _section_has_image_to_generate(s)
]
cover_rec = _get_cover_memoir_image(chapter)
cover_to_generate = (
cover_rec
if cover_rec
and (getattr(cover_rec, "status") or "").strip()
in (IMAGE_STATUS_PENDING, IMAGE_STATUS_FAILED)
else None
)
if not sections_with_pending and not cover_to_generate:
cover_to_generate = cover_memoir_image_pending_or_failed(chapter)
if not cover_to_generate:
logger.info(
"章节补图跳过: chapter=%s, reason=no_pending_images", chapter_id
"章节补图跳过: chapter=%s, reason=no_pending_cover", chapter_id
)
return {"status": "no_images"}
@@ -954,9 +626,8 @@ def generate_chapter_images(self, chapter_id: str):
image_generator = get_image_generator()
storage = TencentCosStorageService.from_env()
logger.info(
"章节补图开始: chapter=%s, pending_sections=%d, cover=%s",
"章节封面补图开始: chapter=%s, cover=%s",
chapter_id,
len(sections_with_pending),
bool(cover_to_generate),
)
retryable_failures: list[str] = []
@@ -976,7 +647,7 @@ def generate_chapter_images(self, chapter_id: str):
rec.retryable = d.get("retryable")
rec.updated_at = datetime.now(timezone.utc)
# 先处理封面图
# 封面图(正文来自 canonical_markdown
if cover_to_generate:
current_item = memoir_image_to_dict(cover_to_generate) or {}
current_item.setdefault("placeholder", "")
@@ -986,15 +657,10 @@ def generate_chapter_images(self, chapter_id: str):
_apply_item_to_memoir_image(cover_to_generate, current_item)
db.commit()
try:
sections_ordered = sorted(
sections, key=lambda s: getattr(s, "order_index", 0)
)
first_content = (
(sections_ordered[0].content or "").strip()
if sections_ordered
else ""
)
context_excerpt = " ".join(first_content.split("\n")[:5])[:200]
raw_md = (
getattr(chapter, "canonical_markdown", None) or ""
).strip()
context_excerpt = " ".join(raw_md.split("\n")[:5])[:200]
prompt_data = prompt_orchestrator.build_cover_prompt(
chapter_title=chapter.title,
chapter_category=chapter.category or "",
@@ -1059,91 +725,6 @@ def generate_chapter_images(self, chapter_id: str):
_apply_item_to_memoir_image(cover_to_generate, current_item)
db.commit()
for sec_index, section in sections_with_pending:
item = (
memoir_image_to_dict(section.image_record)
if section.image_record
else {}
)
current_item = dict(item) if item else {}
current_item.setdefault("placeholder", "")
current_item.setdefault("description", "")
current_item["status"] = IMAGE_STATUS_PROCESSING
current_item["updated_at"] = datetime.now(timezone.utc).isoformat()
_apply_item_to_memoir_image(section.image_record, current_item)
db.commit()
try:
context_lines = (section.content or "").strip().split("\n")[:5]
context_excerpt = " ".join(context_lines)[:200]
prompt_data = prompt_orchestrator.build_prompt(
chapter_title=chapter.title,
chapter_category=chapter.category or "",
description=current_item.get("description", ""),
context_excerpt=context_excerpt,
)
result = image_generator.generate(
prompt_data["prompt"],
prompt_data["size"],
prompt_data["style"],
)
if result.status != TaskStatus.COMPLETED or not result.image_url:
raise RuntimeError(result.error or "Image generation failed")
image_bytes = _normalize_image_bytes_for_storage(
image_generator.download_image(result.image_url)
)
key = build_cos_key(
chapter.user_id, chapter.id, sec_index, prompt_data["prompt"]
)
current_item["storage_key"] = key
current_item["url"] = storage.upload_bytes(
image_bytes, key, "image/png"
)
current_item["prompt"] = prompt_data["prompt"]
current_item["style"] = prompt_data["style"]
current_item["size"] = prompt_data["size"]
current_item["status"] = IMAGE_STATUS_COMPLETED
current_item["error"] = None
current_item["retryable"] = None
current_item["updated_at"] = datetime.now(timezone.utc).isoformat()
_apply_item_to_memoir_image(section.image_record, current_item)
db.commit()
logger.info(
"章节补图成功: chapter=%s, section_index=%s, url=%s",
chapter_id,
sec_index,
current_item["url"],
)
except Exception as exc:
failure_msg = f"section_index={sec_index}, error={exc}"
if isinstance(exc, CosUploadError) and not exc.retryable:
permanent_failures.append(failure_msg)
logger.error(
"图片上传不可重试,清理配图: chapter=%s, %s",
chapter_id,
failure_msg,
)
mi = section.image_record
section.image_id = None
if mi:
db.delete(mi)
db.commit()
else:
current_item["status"] = IMAGE_STATUS_FAILED
current_item["error"] = str(exc)
current_item["retryable"] = True
retryable_failures.append(failure_msg)
logger.warning(
"图片生成失败(可重试): chapter=%s, %s",
chapter_id,
failure_msg,
)
current_item["updated_at"] = datetime.now(
timezone.utc
).isoformat()
_apply_item_to_memoir_image(section.image_record, current_item)
db.commit()
if retryable_failures:
raise RuntimeError(
f"章节补图存在可重试失败项: chapter={chapter_id}, failures={'; '.join(retryable_failures)}"