From 7f57f96c25de92db47f600539ad6860a9b20bdb2 Mon Sep 17 00:00:00 2001 From: Kevin Date: Fri, 20 Mar 2026 10:30:07 +0800 Subject: [PATCH] =?UTF-8?q?=E9=87=8D=E6=9E=84=E5=9B=9E=E5=BF=86=E5=BD=95?= =?UTF-8?q?=E4=B8=BA=20story-first=20/=20markdown-first=20=E6=9E=B6?= =?UTF-8?q?=E6=9E=84=E5=B9=B6=E6=95=B4=E5=90=88=E5=9B=BE=E7=89=87=E6=84=8F?= =?UTF-8?q?=E5=9B=BE=E4=B8=8E=E5=89=8D=E7=AB=AF=20UI=20=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 本次 squash merge 将 codex-story-first-image-intent 的整体改动合入 development,核心内容包括: 1. 后端数据与迁移:新增 stories、story_versions、story_image_intents、chapter_cover_intents、assets 等模型与 Alembic 迁移,建立 story-first、markdown-first、asset-first 的主数据链路。 2. 生成与任务链:引入 StoryBuilderOrchestrator、ChapterComposerOrchestrator、story_image_tasks、chapter_cover_tasks,图片生成从正文占位符改为结构化 intent -> asset -> markdown 回填。 3. 并发与一致性:为 story/chapter intent 增加 claim_token、claimed_at、attempt_count,采用数据库原子 claim 为主、Redis 锁为辅,避免重复生成、锁误删和 processing 卡死。 4. Memoir 读写路径:章节 canonical_markdown 成为正文真源,列表/详情接口补齐 markdown、cover_asset、word_count 等字段,PDF 与 asset 解析链路同步升级。 5. Memory / Retrieval:扩展 transcript ingest、chunking、evidence 检索与 story 聚合基础设施,为后续 story-first RAG 与多 agent 编排提供底座。 6. App 端体验:章节页继续走 MarkdownRenderer 阅读链,同时吸收 fix3-19 的跨平台 UI glitch 修复;更新对话页、首页、文案资源与章节列表映射逻辑。 7. 测试与文档:补充 asset resolver、story image task、章节封面派发、markdown 映射等回归测试,并加入图片占位符退役设计文档。 --- api/alembic/env.py | 2 + .../0003_story_first_markdown_first_schema.py | 207 ++++++++ ..._migrate_sections_to_canonical_markdown.py | 105 ++++ .../versions/0005_add_story_image_intents.py | 60 +++ .../0006_add_chapter_cover_intents.py | 57 +++ api/alembic/versions/0007_add_assets_table.py | 49 ++ .../0008_migrate_legacy_images_to_assets.py | 110 +++++ .../0009_story_image_intent_constraints.py | 72 +++ .../versions/0010_intent_claim_fields.py | 50 ++ api/app/agents/memoir/__init__.py | 4 + .../memoir/chapter_composer_orchestrator.py | 106 +++++ api/app/agents/memoir/memory_agent.py | 6 +- api/app/agents/memoir/orchestrator.py | 2 +- api/app/agents/memoir/prompts.py | 49 +- .../memoir/story_builder_orchestrator.py | 107 +++++ api/app/core/redis_lock.py | 41 ++ api/app/features/asset/__init__.py | 1 + api/app/features/asset/models.py | 21 + api/app/features/asset/repo.py | 37 ++ api/app/features/memoir/asset_resolver.py | 101 ++++ api/app/features/memoir/asset_urls.py | 44 ++ api/app/features/memoir/chapter_cover.py | 60 +++ api/app/features/memoir/helpers.py | 87 +++- .../features/memoir/memoir_images/parser.py | 72 +-- api/app/features/memoir/models.py | 133 +++++- api/app/features/memoir/pdf_service.py | 66 +-- api/app/features/memoir/repo.py | 109 ++++- api/app/features/memoir/service.py | 109 +++-- api/app/features/memory/chunker.py | 38 +- api/app/features/memory/deps.py | 12 +- api/app/features/memory/repo.py | 308 +++++++++++- api/app/features/memory/retriever.py | 104 +++- api/app/features/memory/service.py | 125 ++++- api/app/features/story/__init__.py | 1 + api/app/features/story/backfill.py | 34 ++ api/app/features/story/deps.py | 9 + .../features/story/image_intent_extractor.py | 115 +++++ api/app/features/story/models.py | 162 +++++++ api/app/features/story/repo.py | 187 ++++++++ api/app/features/story/service.py | 228 +++++++++ api/app/features/user/models.py | 1 + api/app/main.py | 1 + api/app/tasks/__init__.py | 10 +- api/app/tasks/celery_app.py | 8 +- api/app/tasks/chapter_cover_tasks.py | 304 ++++++++++++ api/app/tasks/memoir_tasks.py | 273 +++++------ api/app/tasks/story_image_tasks.py | 297 ++++++++++++ api/tests/conftest.py | 1 + api/tests/test_asset_resolver.py | 65 +++ api/tests/test_memoir_image_parser.py | 22 +- api/tests/test_pdf_service_images.py | 52 ++ ...t_process_memoir_segments_image_enqueue.py | 43 +- api/tests/test_story_image_tasks.py | 152 ++++++ app-expo/package-lock.json | 100 +++- app-expo/package.json | 1 + app-expo/src/app/(main)/chapter/[id].tsx | 450 +++--------------- app-expo/src/app/(main)/conversation/[id].tsx | 89 +++- app-expo/src/app/(tabs)/index.tsx | 63 ++- app-expo/src/app/(tabs)/memoir.tsx | 5 +- app-expo/src/features/memoir/mappers.ts | 31 +- app-expo/src/features/memoir/types.ts | 11 + app-expo/src/i18n/generated/resources.ts | 7 + .../src/i18n/locales/en/conversation.json | 9 +- app-expo/src/i18n/locales/en/profile.json | 4 +- .../src/i18n/locales/zh/conversation.json | 9 +- .../tests/features/memoir/mappers.test.ts | 33 ++ ...image-intent-placeholder-removal-design.md | 352 ++++++++++++++ 67 files changed, 4751 insertions(+), 832 deletions(-) create mode 100644 api/alembic/versions/0003_story_first_markdown_first_schema.py create mode 100644 api/alembic/versions/0004_migrate_sections_to_canonical_markdown.py create mode 100644 api/alembic/versions/0005_add_story_image_intents.py create mode 100644 api/alembic/versions/0006_add_chapter_cover_intents.py create mode 100644 api/alembic/versions/0007_add_assets_table.py create mode 100644 api/alembic/versions/0008_migrate_legacy_images_to_assets.py create mode 100644 api/alembic/versions/0009_story_image_intent_constraints.py create mode 100644 api/alembic/versions/0010_intent_claim_fields.py create mode 100644 api/app/agents/memoir/chapter_composer_orchestrator.py create mode 100644 api/app/agents/memoir/story_builder_orchestrator.py create mode 100644 api/app/core/redis_lock.py create mode 100644 api/app/features/asset/__init__.py create mode 100644 api/app/features/asset/models.py create mode 100644 api/app/features/asset/repo.py create mode 100644 api/app/features/memoir/asset_resolver.py create mode 100644 api/app/features/memoir/asset_urls.py create mode 100644 api/app/features/memoir/chapter_cover.py create mode 100644 api/app/features/story/__init__.py create mode 100644 api/app/features/story/backfill.py create mode 100644 api/app/features/story/deps.py create mode 100644 api/app/features/story/image_intent_extractor.py create mode 100644 api/app/features/story/models.py create mode 100644 api/app/features/story/repo.py create mode 100644 api/app/features/story/service.py create mode 100644 api/app/tasks/chapter_cover_tasks.py create mode 100644 api/app/tasks/story_image_tasks.py create mode 100644 api/tests/test_asset_resolver.py create mode 100644 api/tests/test_story_image_tasks.py create mode 100644 docs/plans/2026-03-19-image-intent-placeholder-removal-design.md diff --git a/api/alembic/env.py b/api/alembic/env.py index 560f2a2..bfbdb76 100644 --- a/api/alembic/env.py +++ b/api/alembic/env.py @@ -15,10 +15,12 @@ from app.core.config import settings from app.core.db import Base, _database_url # 聚合注册所有 feature 的 model 到 Base.metadata +from app.features.asset import models as _asset_models # noqa: F401 from app.features.auth import models as _auth_models # noqa: F401 from app.features.conversation import models as _conv_models # noqa: F401 from app.features.memory import models as _memory_models # noqa: F401 from app.features.memoir import models as _memoir_models # noqa: F401 +from app.features.story import models as _story_models # noqa: F401 from app.features.payment import models as _payment_models # noqa: F401 from app.features.user import models as _user_models # noqa: F401 diff --git a/api/alembic/versions/0003_story_first_markdown_first_schema.py b/api/alembic/versions/0003_story_first_markdown_first_schema.py new file mode 100644 index 0000000..e4da989 --- /dev/null +++ b/api/alembic/versions/0003_story_first_markdown_first_schema.py @@ -0,0 +1,207 @@ +"""story_first_markdown_first_schema + +Story-First + Markdown-First 架构:新增 stories/story_versions/story_evidence_links, +重定义 chapters(canonical_markdown),新增 chapter_versions/chapter_story_links。 + +Revision ID: 0003_story_first +Revises: 0002_schema +Create Date: 2026-03-19 + +""" + +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +revision: str = "0003_story_first" +down_revision: Union[str, Sequence[str], None] = "0002_schema" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # 1. stories + op.create_table( + "stories", + sa.Column("id", sa.String(), nullable=False), + sa.Column("user_id", sa.String(), nullable=False), + sa.Column("title", sa.String(), nullable=False), + sa.Column("stage", sa.String(), nullable=True), + sa.Column("story_type", sa.String(), nullable=True), + sa.Column("summary", sa.Text(), nullable=True), + sa.Column("canonical_markdown", sa.Text(), nullable=True), + sa.Column("time_start", sa.String(), nullable=True), + sa.Column("time_end", sa.String(), nullable=True), + sa.Column("people_refs", sa.JSON(), nullable=True), + sa.Column("place_refs", sa.JSON(), nullable=True), + sa.Column("tag_refs", sa.JSON(), nullable=True), + sa.Column("status", sa.String(), nullable=True), + sa.Column("confidence", sa.Float(), nullable=True), + sa.Column("current_version_id", sa.String(), nullable=True), + sa.Column("created_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("updated_at", sa.DateTime(timezone=True), nullable=True), + sa.ForeignKeyConstraint(["user_id"], ["users.id"]), + sa.PrimaryKeyConstraint("id"), + if_not_exists=True, + ) + op.create_index( + op.f("ix_stories_user_id"), + "stories", + ["user_id"], + unique=False, + if_not_exists=True, + ) + + # 2. story_versions + op.create_table( + "story_versions", + sa.Column("id", sa.String(), nullable=False), + sa.Column("story_id", sa.String(), nullable=False), + sa.Column("version_no", sa.Integer(), nullable=False), + sa.Column("markdown_snapshot", sa.Text(), nullable=False), + sa.Column("change_summary", sa.Text(), nullable=True), + sa.Column("actor_type", sa.String(), nullable=True), + sa.Column("source_type", sa.String(), nullable=True), + sa.Column("parent_version_id", sa.String(), nullable=True), + sa.Column("prompt_meta", sa.JSON(), nullable=True), + sa.Column("created_at", sa.DateTime(timezone=True), nullable=True), + sa.ForeignKeyConstraint( + ["parent_version_id"], + ["story_versions.id"], + ondelete="SET NULL", + ), + sa.ForeignKeyConstraint(["story_id"], ["stories.id"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("id"), + if_not_exists=True, + ) + op.create_index( + op.f("ix_story_versions_story_id"), + "story_versions", + ["story_id"], + unique=False, + if_not_exists=True, + ) + + # 3. story_evidence_links + op.create_table( + "story_evidence_links", + sa.Column("id", sa.String(), nullable=False), + sa.Column("story_id", sa.String(), nullable=False), + sa.Column("evidence_type", sa.String(), nullable=False), + sa.Column("evidence_id", sa.String(), nullable=False), + sa.Column("role", sa.String(), nullable=True), + sa.Column("weight", sa.Float(), nullable=True), + sa.Column("created_at", sa.DateTime(timezone=True), nullable=True), + sa.ForeignKeyConstraint(["story_id"], ["stories.id"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("id"), + if_not_exists=True, + ) + op.create_index( + op.f("ix_story_evidence_links_story_id"), + "story_evidence_links", + ["story_id"], + unique=False, + if_not_exists=True, + ) + + # 4. chapters 新增列(PostgreSQL ADD COLUMN IF NOT EXISTS) + conn = op.get_bind() + for col_sql in [ + "ALTER TABLE chapters ADD COLUMN IF NOT EXISTS book_id VARCHAR", + "ALTER TABLE chapters ADD COLUMN IF NOT EXISTS summary TEXT", + "ALTER TABLE chapters ADD COLUMN IF NOT EXISTS canonical_markdown TEXT", + "ALTER TABLE chapters ADD COLUMN IF NOT EXISTS cover_asset_id VARCHAR", + "ALTER TABLE chapters ADD COLUMN IF NOT EXISTS current_version_id VARCHAR", + "ALTER TABLE chapters ADD COLUMN IF NOT EXISTS created_at TIMESTAMP WITH TIME ZONE", + ]: + conn.execute(sa.text(col_sql)) + # FK 若已存在则跳过 + from sqlalchemy import inspect + + insp = inspect(conn) + fk_names = [fk.get("name") for fk in insp.get_foreign_keys("chapters") or []] + if "fk_chapters_book_id" not in fk_names: + op.create_foreign_key( + "fk_chapters_book_id", + "chapters", + "books", + ["book_id"], + ["id"], + ondelete="SET NULL", + ) + + # 5. chapter_versions + op.create_table( + "chapter_versions", + sa.Column("id", sa.String(), nullable=False), + sa.Column("chapter_id", sa.String(), nullable=False), + sa.Column("version_no", sa.Integer(), nullable=False), + sa.Column("markdown_snapshot", sa.Text(), nullable=False), + sa.Column("change_summary", sa.Text(), nullable=True), + sa.Column("actor_type", sa.String(), nullable=True), + sa.Column("source_type", sa.String(), nullable=True), + sa.Column("parent_version_id", sa.String(), nullable=True), + sa.Column("prompt_meta", sa.JSON(), nullable=True), + sa.Column("created_at", sa.DateTime(timezone=True), nullable=True), + sa.ForeignKeyConstraint(["chapter_id"], ["chapters.id"], ondelete="CASCADE"), + sa.ForeignKeyConstraint( + ["parent_version_id"], + ["chapter_versions.id"], + ondelete="SET NULL", + ), + sa.PrimaryKeyConstraint("id"), + if_not_exists=True, + ) + op.create_index( + op.f("ix_chapter_versions_chapter_id"), + "chapter_versions", + ["chapter_id"], + unique=False, + if_not_exists=True, + ) + + # 6. chapter_story_links + op.create_table( + "chapter_story_links", + sa.Column("id", sa.String(), nullable=False), + sa.Column("chapter_id", sa.String(), nullable=False), + sa.Column("story_id", sa.String(), nullable=False), + sa.Column("order_index", sa.Integer(), nullable=False), + sa.Column("role", sa.String(), nullable=True), + sa.Column("created_at", sa.DateTime(timezone=True), nullable=True), + sa.ForeignKeyConstraint(["chapter_id"], ["chapters.id"], ondelete="CASCADE"), + sa.ForeignKeyConstraint(["story_id"], ["stories.id"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("id"), + if_not_exists=True, + ) + op.create_index( + op.f("ix_chapter_story_links_chapter_id"), + "chapter_story_links", + ["chapter_id"], + unique=False, + if_not_exists=True, + ) + op.create_index( + op.f("ix_chapter_story_links_story_id"), + "chapter_story_links", + ["story_id"], + unique=False, + if_not_exists=True, + ) + + +def downgrade() -> None: + op.drop_table("chapter_story_links") + op.drop_table("chapter_versions") + op.drop_constraint("fk_chapters_book_id", "chapters", type_="foreignkey") + op.drop_column("chapters", "created_at") + op.drop_column("chapters", "current_version_id") + op.drop_column("chapters", "cover_asset_id") + op.drop_column("chapters", "canonical_markdown") + op.drop_column("chapters", "summary") + op.drop_column("chapters", "book_id") + op.drop_table("story_evidence_links") + op.drop_table("story_versions") + op.drop_table("stories") diff --git a/api/alembic/versions/0004_migrate_sections_to_canonical_markdown.py b/api/alembic/versions/0004_migrate_sections_to_canonical_markdown.py new file mode 100644 index 0000000..0be7dc4 --- /dev/null +++ b/api/alembic/versions/0004_migrate_sections_to_canonical_markdown.py @@ -0,0 +1,105 @@ +"""migrate sections to canonical_markdown + +将旧章节(有 sections 但 canonical_markdown 为空)从 sections 推导并写入 canonical_markdown。 +同时创建 chapter_version 记录(source_type=migration)。 + +Revision ID: 0004_migrate_md +Revises: 0003_story_first +Create Date: 2026-03-19 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.orm import Session, selectinload + +revision: str = "0004_migrate_md" +down_revision: Union[str, Sequence[str], None] = "0003_story_first" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def _sections_to_markdown(chapter) -> str: + """从 sections 推导 markdown,与 helpers.sections_to_content_and_images 一致。""" + sections = getattr(chapter, "sections", None) or [] + ordered = sorted(sections, key=lambda s: getattr(s, "order_index", 0)) + parts = [] + for s in ordered: + text = (getattr(s, "content", None) or "").strip() + if text: + parts.append(text) + img = _section_image_to_dict(s) + if img: + placeholder = (img.get("placeholder") or "").strip() + if placeholder: + parts.append(placeholder) + return "\n\n".join(parts) if parts else "" + + +def _section_image_to_dict(section) -> dict | None: + """与 helpers.section_image_to_dict 一致。""" + from app.features.memoir.memoir_images.serializers import memoir_image_to_dict + + if getattr(section, "image_record", None): + return memoir_image_to_dict(section.image_record) + return None + + +def upgrade() -> None: + from app.features.memoir.models import Chapter, ChapterSection, ChapterVersion + + conn = op.get_bind() + session = Session(bind=conn) + + chapters = ( + session.query(Chapter) + .options( + selectinload(Chapter.sections).selectinload(ChapterSection.image_record), + ) + .filter( + sa.or_( + Chapter.canonical_markdown.is_(None), + Chapter.canonical_markdown == "", + ), + ) + .all() + ) + + for ch in chapters: + md = _sections_to_markdown(ch) + if not md.strip(): + continue + + # 创建 chapter_version(source_type=migration) + import uuid + + from sqlalchemy import func + + count_stmt = sa.select(func.count(ChapterVersion.id)).where( + ChapterVersion.chapter_id == ch.id + ) + version_no = (session.execute(count_stmt).scalar() or 0) + 1 + + version = ChapterVersion( + id=str(uuid.uuid4()), + chapter_id=ch.id, + version_no=version_no, + markdown_snapshot=md, + actor_type="system", + source_type="migration", + ) + session.add(version) + session.flush() + + ch.canonical_markdown = md + ch.current_version_id = version.id + + # 由 alembic context 管理事务提交 + session.close() + + +def downgrade() -> None: + # 数据迁移不可逆,downgrade 不清理 canonical_markdown + pass diff --git a/api/alembic/versions/0005_add_story_image_intents.py b/api/alembic/versions/0005_add_story_image_intents.py new file mode 100644 index 0000000..fbd12e0 --- /dev/null +++ b/api/alembic/versions/0005_add_story_image_intents.py @@ -0,0 +1,60 @@ +"""add story_image_intents + +Revision ID: 0005_story_image_intents +Revises: 0004_migrate_md +Create Date: 2026-03-19 + +""" + +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +revision: str = "0005_story_image_intents" +down_revision: Union[str, Sequence[str], None] = "0004_migrate_md" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.create_table( + "story_image_intents", + sa.Column("id", sa.String(), nullable=False), + sa.Column("story_id", sa.String(), nullable=False), + sa.Column("story_version_id", sa.String(), nullable=True), + sa.Column("intent_role", sa.String(), nullable=False), + sa.Column("source_span", sa.JSON(), nullable=True), + sa.Column("caption", sa.String(), nullable=True), + sa.Column("prompt_brief", sa.Text(), nullable=True), + sa.Column("style_profile", sa.String(), nullable=True), + sa.Column("status", sa.String(), nullable=False), + sa.Column("asset_id", sa.String(), nullable=True), + sa.Column("error", sa.Text(), nullable=True), + sa.Column("created_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("updated_at", sa.DateTime(timezone=True), nullable=True), + sa.ForeignKeyConstraint(["story_id"], ["stories.id"], ondelete="CASCADE"), + sa.ForeignKeyConstraint( + ["story_version_id"], + ["story_versions.id"], + ondelete="SET NULL", + ), + sa.PrimaryKeyConstraint("id"), + if_not_exists=True, + ) + op.create_index( + op.f("ix_story_image_intents_story_id"), + "story_image_intents", + ["story_id"], + unique=False, + if_not_exists=True, + ) + + +def downgrade() -> None: + op.drop_index( + op.f("ix_story_image_intents_story_id"), + table_name="story_image_intents", + ) + op.drop_table("story_image_intents") diff --git a/api/alembic/versions/0006_add_chapter_cover_intents.py b/api/alembic/versions/0006_add_chapter_cover_intents.py new file mode 100644 index 0000000..9b057a2 --- /dev/null +++ b/api/alembic/versions/0006_add_chapter_cover_intents.py @@ -0,0 +1,57 @@ +"""add chapter_cover_intents + +Revision ID: 0006_chapter_cover_intents +Revises: 0005_story_image_intents +Create Date: 2026-03-19 + +""" + +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +revision: str = "0006_chapter_cover_intents" +down_revision: Union[str, Sequence[str], None] = "0005_story_image_intents" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.create_table( + "chapter_cover_intents", + sa.Column("id", sa.String(), nullable=False), + sa.Column("chapter_id", sa.String(), nullable=False), + sa.Column("chapter_version_id", sa.String(), nullable=True), + sa.Column("story_ids", sa.JSON(), nullable=True), + sa.Column("prompt_brief", sa.Text(), nullable=True), + sa.Column("status", sa.String(), nullable=False), + sa.Column("asset_id", sa.String(), nullable=True), + sa.Column("error", sa.Text(), nullable=True), + sa.Column("created_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("updated_at", sa.DateTime(timezone=True), nullable=True), + sa.ForeignKeyConstraint(["chapter_id"], ["chapters.id"], ondelete="CASCADE"), + sa.ForeignKeyConstraint( + ["chapter_version_id"], + ["chapter_versions.id"], + ondelete="SET NULL", + ), + sa.PrimaryKeyConstraint("id"), + if_not_exists=True, + ) + op.create_index( + op.f("ix_chapter_cover_intents_chapter_id"), + "chapter_cover_intents", + ["chapter_id"], + unique=False, + if_not_exists=True, + ) + + +def downgrade() -> None: + op.drop_index( + op.f("ix_chapter_cover_intents_chapter_id"), + table_name="chapter_cover_intents", + ) + op.drop_table("chapter_cover_intents") diff --git a/api/alembic/versions/0007_add_assets_table.py b/api/alembic/versions/0007_add_assets_table.py new file mode 100644 index 0000000..b372a1d --- /dev/null +++ b/api/alembic/versions/0007_add_assets_table.py @@ -0,0 +1,49 @@ +"""add assets table + +Revision ID: 0007_assets +Revises: 0006_chapter_cover_intents +Create Date: 2026-03-19 + +""" + +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +revision: str = "0007_assets" +down_revision: Union[str, Sequence[str], None] = "0006_chapter_cover_intents" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.create_table( + "assets", + sa.Column("id", sa.String(), nullable=False), + sa.Column("asset_type", sa.String(), nullable=False), + sa.Column("storage_key", sa.String(), nullable=False), + sa.Column("url", sa.String(), nullable=True), + sa.Column("provider", sa.String(), nullable=True), + sa.Column("style_profile", sa.String(), nullable=True), + sa.Column("prompt_final", sa.Text(), nullable=True), + sa.Column("status", sa.String(), nullable=False), + sa.Column("width", sa.Integer(), nullable=True), + sa.Column("height", sa.Integer(), nullable=True), + sa.Column("created_at", sa.DateTime(timezone=True), nullable=True), + sa.PrimaryKeyConstraint("id"), + if_not_exists=True, + ) + op.create_index( + op.f("ix_assets_asset_type"), + "assets", + ["asset_type"], + unique=False, + if_not_exists=True, + ) + + +def downgrade() -> None: + op.drop_index(op.f("ix_assets_asset_type"), table_name="assets") + op.drop_table("assets") diff --git a/api/alembic/versions/0008_migrate_legacy_images_to_assets.py b/api/alembic/versions/0008_migrate_legacy_images_to_assets.py new file mode 100644 index 0000000..7f0e83b --- /dev/null +++ b/api/alembic/versions/0008_migrate_legacy_images_to_assets.py @@ -0,0 +1,110 @@ +"""migrate legacy placeholders and memoir_images to assets + +1. 从 chapters.canonical_markdown 移除 {{IMAGE:...}} / {{{{IMAGE:...}}}} 占位符 +2. 将已完成 memoir_images(含 storage_key)写入 assets;章节封面绑定 cover_asset_id + +Revision ID: 0008_legacy_assets +Revises: 0007_assets +Create Date: 2026-03-19 + +""" + +import uuid +from datetime import datetime, timezone +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + +revision: str = "0008_legacy_assets" +down_revision: Union[str, Sequence[str], None] = "0007_assets" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + from app.features.memoir.asset_resolver import strip_legacy_image_placeholders + + conn = op.get_bind() + + rows_ch = conn.execute( + sa.text( + "SELECT id, canonical_markdown FROM chapters WHERE canonical_markdown IS NOT NULL" + ) + ).fetchall() + for cid, md in rows_ch: + if not md or not str(md).strip(): + continue + cleaned = strip_legacy_image_placeholders(str(md)) + if cleaned != str(md).strip(): + conn.execute( + sa.text("UPDATE chapters SET canonical_markdown = :md WHERE id = :id"), + {"md": cleaned, "id": cid}, + ) + + rows_mi = conn.execute( + sa.text( + """ + SELECT id, chapter_id, section_id, storage_key, url, provider, style, status + FROM memoir_images + WHERE status = 'completed' + AND storage_key IS NOT NULL + AND TRIM(storage_key) <> '' + """ + ) + ).fetchall() + + existing = { + r[0] + for r in conn.execute(sa.text("SELECT storage_key FROM assets")).fetchall() + if r[0] + } + + for row in rows_mi: + _mid, chapter_id, section_id, storage_key, url, provider, style, _status = row + sk = (storage_key or "").strip() + if not sk or sk in existing: + continue + aid = str(uuid.uuid4()) + asset_type = "chapter_cover" if section_id is None else "story_image" + now = datetime.now(timezone.utc) + conn.execute( + sa.text( + """ + INSERT INTO assets ( + id, asset_type, storage_key, url, provider, style_profile, + prompt_final, status, width, height, created_at + ) VALUES ( + :id, :atype, :sk, :url, :prov, :style, + :prompt, 'completed', NULL, NULL, :created + ) + """ + ), + { + "id": aid, + "atype": asset_type, + "sk": sk, + "url": url, + "prov": provider, + "style": style, + "prompt": None, + "created": now, + }, + ) + existing.add(sk) + if section_id is None and chapter_id: + conn.execute( + sa.text( + """ + UPDATE chapters + SET cover_asset_id = :aid + WHERE id = :cid + AND (cover_asset_id IS NULL OR cover_asset_id = '') + """ + ), + {"aid": aid, "cid": chapter_id}, + ) + + +def downgrade() -> None: + pass diff --git a/api/alembic/versions/0009_story_image_intent_constraints.py b/api/alembic/versions/0009_story_image_intent_constraints.py new file mode 100644 index 0000000..8a07fe7 --- /dev/null +++ b/api/alembic/versions/0009_story_image_intent_constraints.py @@ -0,0 +1,72 @@ +"""story_image_intents: one primary per story + optional FK to assets + +Revision ID: 0009_si_constraints +Revises: 0008_legacy_assets +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +revision: str = "0009_si_constraints" +down_revision: Union[str, Sequence[str], None] = "0008_legacy_assets" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + bind = op.get_bind() + bind.execute( + sa.text( + """ + UPDATE story_image_intents SET asset_id = NULL + WHERE asset_id IS NOT NULL + AND NOT EXISTS (SELECT 1 FROM assets a WHERE a.id = story_image_intents.asset_id) + """ + ) + ) + # 去重:同一 story 多条 primary 时保留最新一条 + bind.execute( + sa.text( + """ + DELETE FROM story_image_intents + WHERE id IN ( + SELECT id FROM ( + SELECT id, + ROW_NUMBER() OVER ( + PARTITION BY story_id + ORDER BY created_at DESC NULLS LAST, id DESC + ) AS rn + FROM story_image_intents + WHERE intent_role = 'primary' + ) t + WHERE rn > 1 + ) + """ + ) + ) + op.create_foreign_key( + "fk_story_image_intents_asset_id_assets", + "story_image_intents", + "assets", + ["asset_id"], + ["id"], + ondelete="SET NULL", + ) + op.execute( + """ + CREATE UNIQUE INDEX IF NOT EXISTS uq_story_primary_image_intent + ON story_image_intents (story_id) + WHERE intent_role = 'primary' + """ + ) + + +def downgrade() -> None: + op.execute("DROP INDEX IF EXISTS uq_story_primary_image_intent") + op.drop_constraint( + "fk_story_image_intents_asset_id_assets", + "story_image_intents", + type_="foreignkey", + ) diff --git a/api/alembic/versions/0010_intent_claim_fields.py b/api/alembic/versions/0010_intent_claim_fields.py new file mode 100644 index 0000000..488deee --- /dev/null +++ b/api/alembic/versions/0010_intent_claim_fields.py @@ -0,0 +1,50 @@ +"""add claim fields to story/chapter image intents + +Revision ID: 0010_intent_claims +Revises: 0009_si_constraints +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + + +revision: str = "0010_intent_claims" +down_revision: Union[str, Sequence[str], None] = "0009_si_constraints" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def _add_claim_columns(table_name: str) -> None: + op.add_column(table_name, sa.Column("claim_token", sa.String(), nullable=True)) + op.add_column( + table_name, sa.Column("claimed_at", sa.DateTime(timezone=True), nullable=True) + ) + op.add_column( + table_name, + sa.Column( + "attempt_count", + sa.Integer(), + nullable=True, + server_default="0", + ), + ) + op.execute(f"UPDATE {table_name} SET attempt_count = 0 WHERE attempt_count IS NULL") + op.alter_column(table_name, "attempt_count", nullable=False, server_default=None) + + +def _drop_claim_columns(table_name: str) -> None: + op.drop_column(table_name, "attempt_count") + op.drop_column(table_name, "claimed_at") + op.drop_column(table_name, "claim_token") + + +def upgrade() -> None: + _add_claim_columns("story_image_intents") + _add_claim_columns("chapter_cover_intents") + + +def downgrade() -> None: + _drop_claim_columns("chapter_cover_intents") + _drop_claim_columns("story_image_intents") diff --git a/api/app/agents/memoir/__init__.py b/api/app/agents/memoir/__init__.py index d945e27..9d10082 100644 --- a/api/app/agents/memoir/__init__.py +++ b/api/app/agents/memoir/__init__.py @@ -7,11 +7,15 @@ from app.agents.memoir.extraction_agent import ExtractionAgent, ExtractionResult from app.agents.memoir.classification_agent import ClassificationAgent from app.agents.memoir.narrative_agent import NarrativeAgent from app.agents.memoir.placeholder_agent import inject_placeholders +from app.agents.memoir.story_builder_orchestrator import StoryBuilderOrchestrator +from app.agents.memoir.chapter_composer_orchestrator import ChapterComposerOrchestrator __all__ = [ "MemoryAgent", "BackgroundTaskRunner", "MemoirOrchestrator", + "StoryBuilderOrchestrator", + "ChapterComposerOrchestrator", "ExtractionAgent", "ExtractionResult", "ClassificationAgent", diff --git a/api/app/agents/memoir/chapter_composer_orchestrator.py b/api/app/agents/memoir/chapter_composer_orchestrator.py new file mode 100644 index 0000000..6d15082 --- /dev/null +++ b/api/app/agents/memoir/chapter_composer_orchestrator.py @@ -0,0 +1,106 @@ +""" +ChapterComposerOrchestrator — 读取 stories/evidence,生成章节 markdown。 + +Agent 只产出结构化结果,不直接写 DB。 +""" + +from __future__ import annotations + +from typing import Any + +from app.core.logging import get_logger + +logger = get_logger(__name__) + + +class ChapterComposerOrchestrator: + """ + 生成章节大纲和章节 markdown。 + 仅返回 markdown,不落库。 + """ + + def compose_chapter_markdown( + self, + *, + title: str, + category: str, + evidence: dict, + existing_markdown: str = "", + user_profile: str = "", + birth_year: int | None = None, + llm: Any = None, + ) -> str: + """ + 从 evidence 生成章节 markdown。 + 若有 existing_markdown 则追加/合并。 + 返回 markdown 正文,不写 DB。 + """ + from app.agents.memoir.narrative_agent import NarrativeAgent + + chunks = evidence.get("relevant_chunks", []) + facts = evidence.get("relevant_facts", []) + new_content = self._format_evidence_for_prompt(chunks, facts) + + agent = NarrativeAgent() + narrative = agent.generate_narrative( + stage=category, + slots={}, + new_content=new_content, + existing_content=existing_markdown, + user_profile=user_profile, + birth_year=birth_year, + llm=llm, + ) + return self._to_markdown(narrative) + + def _format_evidence_for_prompt(self, chunks: list, facts: list) -> str: + """将 evidence 格式化为 prompt 输入。""" + parts = [] + for c in chunks[:10]: + content = ( + c.get("content", "") + if isinstance(c, dict) + else getattr(c, "content", "") + ) + if content: + parts.append(content.strip()) + for f in facts[:5]: + if isinstance(f, dict): + subj = f.get("subject", "") + pred = f.get("predicate", "") + obj = f.get("object_json", "") + if subj or pred: + parts.append(f"{subj} {pred} {obj}") + else: + parts.append( + f"{getattr(f, 'subject', '')} {getattr(f, 'predicate', '')}" + ) + return "\n\n".join(parts) if parts else "" + + def _to_markdown(self, narrative: str) -> str: + """将 narrative(JSON 或纯文本)转为 markdown。正文不含占位符。""" + if not narrative or not narrative.strip(): + return "" + if narrative.strip().startswith("{") and "paragraphs" in narrative: + import json + + try: + data = json.loads(narrative) + paras = data.get("paragraphs", []) + if isinstance(paras, list): + parts = [] + for p in paras: + if isinstance(p, dict): + text = p.get("content", p.get("text", "")) + else: + text = str(p) + if text.strip(): + parts.append(text.strip()) + md = "\n\n".join(parts) + else: + md = narrative + except json.JSONDecodeError: + md = narrative + else: + md = narrative.strip() + return md diff --git a/api/app/agents/memoir/memory_agent.py b/api/app/agents/memoir/memory_agent.py index ff006a2..590a44e 100644 --- a/api/app/agents/memoir/memory_agent.py +++ b/api/app/agents/memoir/memory_agent.py @@ -14,7 +14,6 @@ from app.agents.memoir.prompts import ( STAGE_TO_ORDER, get_chapter_classification_prompt, get_text_rewrite_prompt, - inject_image_placeholder_template, ) from app.features.memoir.memoir_images.json_payload import extract_json_payload @@ -78,15 +77,12 @@ class MemoryAgent: ) content = content.strip() result = json.loads(extract_json_payload(content)) - result["content"] = inject_image_placeholder_template( - result.get("content") or "" - ) return result except json.JSONDecodeError: raw = response.content if hasattr(response, "content") else str(response) return { "title": CHAPTER_CATEGORIES.get(chapter_category, "章节"), - "content": inject_image_placeholder_template(raw), + "content": raw, "summary": "", "image_suggestions": [], } diff --git a/api/app/agents/memoir/orchestrator.py b/api/app/agents/memoir/orchestrator.py index 7e1d548..728942a 100644 --- a/api/app/agents/memoir/orchestrator.py +++ b/api/app/agents/memoir/orchestrator.py @@ -97,7 +97,7 @@ class MemoirOrchestrator: continue category_to_segments.setdefault(chapter_category, []).append(segment) - # 2) 按 category 调用 process_category:内含 NarrativeAgent、PlaceholderInject、持久化 + # 2) 按 category 调用 process_category:叙事生成、持久化、封面入队标记 for chapter_category, category_segments in category_to_segments.items(): if not acquire_lock(chapter_category): logger.warning( diff --git a/api/app/agents/memoir/prompts.py b/api/app/agents/memoir/prompts.py index 36aa7b8..595d6a1 100644 --- a/api/app/agents/memoir/prompts.py +++ b/api/app/agents/memoir/prompts.py @@ -57,8 +57,8 @@ _IMAGE_PLACEHOLDER_ANY_BRACES_RE = re.compile( def inject_image_placeholder_template(content: str) -> str: """ - 入库前对章节正文做占位符处理:用正则匹配所有图片占位符位置,拼上固定模板。 - 支持任意层数花括号,输出统一为四层大括号 + 固定模板 + 描述。 + 对正文中的 IMAGE 占位符拼上固定风格模板(四层花括号)。 + **线上写路径已不使用**;保留供离线迁移脚本处理历史数据。 """ if not content or not content.strip(): return content @@ -92,7 +92,6 @@ def get_system_prompt() -> str: 4. 将口语化表达改写为书面语,保持原意和情感 5. 生成合适的章节标题和段落结构 6. 提取关键信息,形成连贯的叙述 -7. 建议插图位置(在描述场景、人物、地点的地方) ## 内容筛选原则(最重要) 对话中往往夹杂大量与回忆录无关的噪音,你必须严格筛选,只保留有价值的内容: @@ -171,24 +170,14 @@ def get_text_rewrite_prompt( 请按照以下格式返回 JSON: {{ "title": "章节标题", - "content": "改写后的书面语内容(包含图片占位符)", + "content": "改写后的书面语内容", "summary": "章节摘要(50字以内)" }} 要求: 1. 标题要简洁有力,能概括章节主题 2. 内容要流畅自然,保持原意和情感 -3. 如果已有章节内容,请将新内容与已有内容自然融合 -4. 在内容中适当位置插入图片占位符 - -## 图片占位符格式(必须严格遵守) -- **唯一合法格式**:开头恰好四个左花括号、结尾恰好四个右花括号,中间为 IMAGE:具体描述。即:{{{{IMAGE:具体的图片描述}}}} -- 禁止使用两层 {{ }}、六层 {{{{{{ }}}}}} 或任意其它层数,否则会在手机端显示异常。 -- 占位符单独占一行,描述要具体、有画面感。系统会在入库前自动拼上统一风格模板,你只需写场景描述即可。 - -正确示例(仅此格式): -{{{{IMAGE:南方小镇的青石板路,两旁是白墙黑瓦的老房子}}}} -{{{{IMAGE:奶奶坐在院子里的藤椅上,手里摇着蒲扇}}}}""" +3. 如果已有章节内容,请将新内容与已有内容自然融合""" def get_state_extraction_prompt( @@ -350,30 +339,11 @@ def get_narrative_prompt( 3. **只输出新内容的改写结果**,不要重复已有内容 4. 如果有衔接上下文,确保新内容与之自然衔接(语气、时间线连贯) 5. 语气自然,有情绪 -6. 在适合配图的地方插入图片占位符 -7. 如果有用户的基本信息(出生地、成长地等),在叙述中自然融入地域文化和时代背景 +6. 如果有用户的基本信息(出生地、成长地等),在叙述中自然融入地域文化和时代背景 8. **不要将对话中的交互性语言(如"我跟你说"、"你知道吗")写入叙述** 9. **不要在正文中插入章节标题或分类标签**(如"章节:信念与价值观"、"## 童年与成长背景"等),章节标题由系统单独管理 -## 图片占位符格式(必须严格遵守) -- **唯一合法格式**:开头恰好四个左花括号、结尾恰好四个右花括号,即:{{{{IMAGE:具体的图片描述}}}} -- 禁止两层 {{ }}、六层 {{{{{{ }}}}}} 或其它层数,否则会在手机端显示多余花括号。 -- 占位符单独占一行,描述要具体、有画面感。系统会在入库前自动拼上统一风格模板,你只需写场景描述即可。 - -正确示例(仅此格式): -- {{{{IMAGE:南方小镇的青石板路,两旁是白墙黑瓦的老房子}}}} -- {{{{IMAGE:奶奶坐在院子里的藤椅上,手里摇着蒲扇}}}} -- {{{{IMAGE:少年背着书包站在火车站台上,回望身后的小镇}}}} -- {{{{IMAGE:泛黄的大学录取通知书,压在一摞旧课本下}}}} - -图片占位符要求: -- 描述要具体、有画面感,便于后续生成或匹配图片 -- 每 200-300 字左右可以插入一个 -- 单独占一行,不要嵌入段落中 -- 不要使用括号或星号等其他格式 -- **花括号必须且仅能为四层**:{{{{ 与 }}}} 各四个,不多不少 - -只输出新对话内容的改写结果(包含图片占位符)。如果对话中没有值得记录的人生经历内容,输出空字符串。 +只输出新对话内容的改写结果。如果对话中没有值得记录的人生经历内容,输出空字符串。 """ @@ -415,19 +385,18 @@ def get_narrative_json_prompt( 1. 从对话中提炼与人生经历相关的核心内容,过滤语气词、寒暄、与AI的交互 2. 使用第一人称,改写为流畅的书面叙述,不要直接引用对话原话 3. 只输出新内容的改写,不要重复已有内容 -4. 每 200-300 字左右一个段落,每个段落配一张图 +4. 每 200-300 字左右一个段落 5. 如有衔接上下文,确保新内容与之自然衔接 ## 输出格式(严格 JSON) {{ "paragraphs": [ - {{"content": "段落正文", "image_description": "该段配图的场景描述,具体有画面感"}}, + {{"content": "段落正文"}}, ... ] }} -- content: 本段纯正文,不含占位符 -- image_description: 该段配图的场景描述,具体、有画面感,便于生成图片。示例:南方小镇的青石板路,两旁是白墙黑瓦的老房子 +- content: 本段纯正文 如果对话中没有值得记录的人生经历内容,输出:{{"paragraphs": []}} """ diff --git a/api/app/agents/memoir/story_builder_orchestrator.py b/api/app/agents/memoir/story_builder_orchestrator.py new file mode 100644 index 0000000..e3bd558 --- /dev/null +++ b/api/app/agents/memoir/story_builder_orchestrator.py @@ -0,0 +1,107 @@ +""" +StoryBuilderOrchestrator — 组织 evidence,调用 StorySynthesisAgent,产出 story markdown。 + +Agent 只产出结构化结果,不直接写 DB。 +""" + +from __future__ import annotations + +from typing import Any + +from app.core.logging import get_logger + +logger = get_logger(__name__) + + +class StoryBuilderOrchestrator: + """ + 判断新增 story、补充现有 story、合并重复 story。 + 组织 evidence bundle,生成或更新 story markdown。 + 仅返回结构化输出,不落库。 + """ + + def build_story_markdown( + self, + *, + evidence: dict, + stage: str, + story_type: str | None = None, + existing_markdown: str = "", + user_profile: str = "", + birth_year: int | None = None, + llm: Any = None, + ) -> str: + """ + 从 evidence 生成 story markdown。 + 若有 existing_markdown 则做补充/合并。 + 返回 markdown 正文,不写 DB。 + """ + from app.agents.memoir.narrative_agent import NarrativeAgent + + chunks = evidence.get("relevant_chunks", []) + facts = evidence.get("relevant_facts", []) + new_content = self._format_evidence_for_prompt(chunks, facts) + + agent = NarrativeAgent() + markdown = agent.generate_narrative( + stage=stage, + slots={}, + new_content=new_content, + existing_content=existing_markdown, + user_profile=user_profile, + birth_year=birth_year, + llm=llm, + ) + return self._to_markdown(markdown) + + def _format_evidence_for_prompt(self, chunks: list, facts: list) -> str: + """将 evidence 格式化为 prompt 输入。""" + parts = [] + for c in chunks[:10]: + content = ( + c.get("content", "") + if isinstance(c, dict) + else getattr(c, "content", "") + ) + if content: + parts.append(content.strip()) + for f in facts[:5]: + if isinstance(f, dict): + subj = f.get("subject", "") + pred = f.get("predicate", "") + obj = f.get("object_json", "") + if subj or pred: + parts.append(f"{subj} {pred} {obj}") + else: + parts.append( + f"{getattr(f, 'subject', '')} {getattr(f, 'predicate', '')}" + ) + return "\n\n".join(parts) if parts else "" + + def _to_markdown(self, narrative: str) -> str: + """将 narrative(JSON 或纯文本)转为 markdown。正文不包含占位符,图片意图由 StoryImageIntentExtractor 提取。""" + if not narrative or not narrative.strip(): + return "" + if narrative.strip().startswith("{") and "paragraphs" in narrative: + import json + + try: + data = json.loads(narrative) + paras = data.get("paragraphs", []) + if isinstance(paras, list): + parts = [] + for p in paras: + if isinstance(p, dict): + text = p.get("content", p.get("text", "")) + else: + text = str(p) + if text.strip(): + parts.append(text.strip()) + md = "\n\n".join(parts) + else: + md = narrative + except json.JSONDecodeError: + md = narrative + else: + md = narrative.strip() + return md diff --git a/api/app/core/redis_lock.py b/api/app/core/redis_lock.py new file mode 100644 index 0000000..8730b69 --- /dev/null +++ b/api/app/core/redis_lock.py @@ -0,0 +1,41 @@ +"""Small Redis lock helpers for background tasks.""" + +from dataclasses import dataclass +import uuid + +import redis + +from app.core.config import settings + + +@dataclass(frozen=True) +class RedisLockHandle: + client: redis.Redis + key: str + token: bytes + + +def acquire_redis_lock(key: str, *, ttl_seconds: int) -> RedisLockHandle | None: + """Acquire a single-owner Redis lock or return None when unavailable.""" + client = redis.from_url(settings.redis_url, decode_responses=False) + token = uuid.uuid4().hex.encode("utf-8") + if not client.set(key, token, nx=True, ex=ttl_seconds): + return None + return RedisLockHandle(client=client, key=key, token=token) + + +def release_redis_lock(handle: RedisLockHandle | None) -> None: + """Release the lock only if we still own it.""" + if handle is None: + return + handle.client.eval( + """ + if redis.call("GET", KEYS[1]) == ARGV[1] then + return redis.call("DEL", KEYS[1]) + end + return 0 + """, + 1, + handle.key, + handle.token, + ) diff --git a/api/app/features/asset/__init__.py b/api/app/features/asset/__init__.py new file mode 100644 index 0000000..9f88700 --- /dev/null +++ b/api/app/features/asset/__init__.py @@ -0,0 +1 @@ +"""Asset feature — 统一资源表。""" diff --git a/api/app/features/asset/models.py b/api/app/features/asset/models.py new file mode 100644 index 0000000..d5ae92a --- /dev/null +++ b/api/app/features/asset/models.py @@ -0,0 +1,21 @@ +"""Asset 模型。""" + +from sqlalchemy import Column, DateTime, Integer, String, Text + +from app.core.db import Base, utc_now + + +class Asset(Base): + __tablename__ = "assets" + + id = Column(String, primary_key=True) + asset_type = Column(String, nullable=False) + storage_key = Column(String, nullable=False) + url = Column(String, nullable=True) + provider = Column(String, nullable=True) + style_profile = Column(String, nullable=True) + prompt_final = Column(Text, nullable=True) + status = Column(String, nullable=False) + width = Column(Integer, nullable=True) + height = Column(Integer, nullable=True) + created_at = Column(DateTime(timezone=True), default=utc_now) diff --git a/api/app/features/asset/repo.py b/api/app/features/asset/repo.py new file mode 100644 index 0000000..efa2f5d --- /dev/null +++ b/api/app/features/asset/repo.py @@ -0,0 +1,37 @@ +"""Asset repository — 资源表数据访问。""" + +import uuid + +from sqlalchemy.ext.asyncio import AsyncSession + +from app.features.asset.models import Asset + + +def _new_id() -> str: + return str(uuid.uuid4()) + + +async def create_asset( + db: AsyncSession, + *, + asset_type: str, + storage_key: str, + url: str | None = None, + provider: str | None = None, + style_profile: str | None = None, + prompt_final: str | None = None, + status: str = "completed", +) -> Asset: + """Create asset. Caller must commit.""" + asset = Asset( + id=_new_id(), + asset_type=asset_type, + storage_key=storage_key, + url=url, + provider=provider, + style_profile=style_profile, + prompt_final=prompt_final, + status=status, + ) + db.add(asset) + return asset diff --git a/api/app/features/memoir/asset_resolver.py b/api/app/features/memoir/asset_resolver.py new file mode 100644 index 0000000..27e79a3 --- /dev/null +++ b/api/app/features/memoir/asset_resolver.py @@ -0,0 +1,101 @@ +""" +asset:// 与旧占位符清理。 + +迁移与渲染共用:从正文移除 {{IMAGE:...}} / {{{{IMAGE:...}}}}。 +""" + +import re +from typing import Callable + +_PLACEHOLDER_RE = re.compile( + r"\{\{\{\{IMAGE:(.*?)\}\}\}\}|\{\{IMAGE:(.*?)\}\}", + re.DOTALL, +) + +_ASSET_REF_RE = re.compile(r"!\[([^\]]*)\]\(asset://([a-zA-Z0-9_-]+)\)") + + +def strip_legacy_image_placeholders(text: str | None) -> str: + """移除正文中的旧 IMAGE 占位符,保留其余 markdown。""" + if not text: + return "" + return _PLACEHOLDER_RE.sub("", text).strip() + + +def parse_asset_refs(markdown: str) -> list[tuple[int, int, str, str]]: + refs = [] + for m in _ASSET_REF_RE.finditer(markdown or ""): + refs.append((m.start(), m.end(), m.group(1) or "", m.group(2) or "")) + return refs + + +def collect_asset_ids_from_markdown(markdown: str) -> list[str]: + return [m.group(2) for m in _ASSET_REF_RE.finditer(markdown or "") if m.group(2)] + + +def collect_asset_ids_for_chapter(chapter) -> set[str]: + """章节正文(canonical + 各 section)与 cover_asset_id 中出现的 asset id。""" + ids: set[str] = set() + md = getattr(chapter, "canonical_markdown", None) or "" + ids.update(collect_asset_ids_from_markdown(md)) + for sec in getattr(chapter, "sections", None) or []: + ids.update(collect_asset_ids_from_markdown(getattr(sec, "content", None) or "")) + cid = getattr(chapter, "cover_asset_id", None) + if cid: + ids.add(str(cid)) + return ids + + +def collect_asset_ids_for_chapters(chapters: list) -> set[str]: + combined: set[str] = set() + for ch in chapters or []: + combined |= collect_asset_ids_for_chapter(ch) + return combined + + +def split_markdown_by_asset_refs( + markdown: str, + resolve_asset: Callable[[str], str | None], +) -> list[dict]: + blocks: list[dict] = [] + refs = parse_asset_refs(markdown or "") + if not refs: + text = (markdown or "").strip() + if text: + blocks.append({"type": "text", "value": text}) + return blocks + + pos = 0 + for start, end, caption, asset_id in refs: + if start > pos: + text = markdown[pos:start].strip() + if text: + blocks.append({"type": "text", "value": text}) + url = resolve_asset(asset_id) if asset_id else None + if url: + blocks.append({"type": "image", "url": url, "caption": caption}) + pos = end + + if pos < len(markdown or ""): + text = markdown[pos:].strip() + if text: + blocks.append({"type": "text", "value": text}) + + return blocks + + +def resolve_asset_refs_in_markdown( + markdown: str, + resolve_asset: Callable[[str], str | None], +) -> str: + if not markdown or not resolve_asset: + return markdown or "" + + def repl(m): + caption, asset_id = m.group(1) or "", m.group(2) or "" + url = resolve_asset(asset_id) if asset_id else None + if url: + return f"![{caption}]({url})" + return m.group(0) + + return _ASSET_REF_RE.sub(repl, markdown) diff --git a/api/app/features/memoir/asset_urls.py b/api/app/features/memoir/asset_urls.py new file mode 100644 index 0000000..341bcde --- /dev/null +++ b/api/app/features/memoir/asset_urls.py @@ -0,0 +1,44 @@ +"""按 Asset id 批量生成 COS 签名 URL(解析正文 asset://)。""" + +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.config import settings +from app.core.logging import get_logger +from app.features.asset.models import Asset +from app.features.memoir.memoir_images.storage import ( + CosDownloadUrlError, + TencentCosStorageService, +) + +logger = get_logger(__name__) + + +async def signed_urls_for_asset_ids( + db: AsyncSession, asset_ids: set[str] +) -> dict[str, str]: + """返回 asset_id -> 短期可访问 URL;签名失败则跳过该 id。""" + if not asset_ids: + return {} + stmt = select(Asset).where(Asset.id.in_(asset_ids)) + result = await db.execute(stmt) + rows = list(result.scalars().all()) + storage = TencentCosStorageService.from_settings(settings) + out: dict[str, str] = {} + for a in rows: + key = (a.storage_key or "").strip() + if not key: + continue + try: + out[a.id] = storage.get_download_url(key) + except CosDownloadUrlError as exc: + logger.warning( + "Asset 签名失败: id=%s key=%s retryable=%s error=%s", + a.id, + key, + exc.retryable, + exc, + ) + except Exception as exc: + logger.warning("Asset 签名失败: id=%s error=%s", a.id, exc) + return out diff --git a/api/app/features/memoir/chapter_cover.py b/api/app/features/memoir/chapter_cover.py new file mode 100644 index 0000000..833da04 --- /dev/null +++ b/api/app/features/memoir/chapter_cover.py @@ -0,0 +1,60 @@ +""" +Chapter 封面意图 — 从本章 stories 或章节内容聚合生成封面 prompt。 + +封面不回写进 chapter 正文 markdown,绑定到 chapters.cover_asset_id。 +""" + +from __future__ import annotations + + +def aggregate_cover_prompt_from_stories( + stories: list, + *, + chapter_title: str = "", + chapter_category: str = "", +) -> str: + """ + 从本章 stories 聚合封面 prompt。 + 人物、地点、时间、情绪、时代背景。 + """ + parts = [] + if chapter_title: + parts.append(chapter_title) + if chapter_category: + parts.append(chapter_category) + for s in (stories or [])[:5]: + title = getattr(s, "title", None) or ( + s.get("title") if isinstance(s, dict) else None + ) + stage = getattr(s, "stage", None) or ( + s.get("stage") if isinstance(s, dict) else None + ) + summary = getattr(s, "summary", None) or ( + s.get("summary") if isinstance(s, dict) else None + ) + if title: + parts.append(title) + if stage: + parts.append(stage) + if summary: + parts.append((summary or "")[:100]) + return ",".join(p for p in parts if p) + + +def aggregate_cover_prompt_from_chapter( + chapter_title: str = "", + chapter_category: str = "", + markdown_excerpt: str = "", +) -> str: + """ + 从章节标题、分类、正文摘要聚合封面 prompt。 + 用于无 story_links 的章节(兼容旧 memoir 流程)。 + """ + parts = [] + if chapter_title: + parts.append(chapter_title) + if chapter_category: + parts.append(chapter_category) + if markdown_excerpt: + parts.append(markdown_excerpt[:200].strip()) + return ",".join(p for p in parts if p) or "人生回忆录章节" diff --git a/api/app/features/memoir/helpers.py b/api/app/features/memoir/helpers.py index 3867606..1600874 100644 --- a/api/app/features/memoir/helpers.py +++ b/api/app/features/memoir/helpers.py @@ -2,10 +2,9 @@ 回忆录序列化与图片归一化辅助(供 MemoirService 使用)。 """ -from app.core.logging import get_logger - from app.core.config import settings -from app.features.memoir.models import Chapter, ChapterSection +from app.core.logging import get_logger +from app.features.memoir.asset_resolver import resolve_asset_refs_in_markdown from app.features.memoir.memoir_images.schema import ( IMAGE_STATUS_COMPLETED, IMAGE_STATUS_FAILED, @@ -21,6 +20,7 @@ from app.features.memoir.memoir_images.storage import ( normalize_cos_url, resolve_image_storage_key, ) +from app.features.memoir.models import Chapter logger = get_logger(__name__) @@ -82,11 +82,33 @@ def section_image_to_dict(section) -> dict | None: return None -def chapter_cover_to_dict(ch: Chapter) -> dict | None: +def chapter_cover_to_dict( + ch: Chapter, asset_url_map: dict[str, str] | None = None +) -> dict | None: images = getattr(ch, "images", None) or [] for m in images: if getattr(m, "section_id", None) is None: return memoir_image_to_dict(m) + asset_url_map = asset_url_map or {} + aid = getattr(ch, "cover_asset_id", None) + if aid and asset_url_map.get(str(aid)): + url = asset_url_map[str(aid)] + return { + "placeholder": "", + "description": "章节封面", + "index": 0, + "status": IMAGE_STATUS_COMPLETED, + "prompt": None, + "url": url, + "storage_key": None, + "provider": None, + "style": None, + "size": None, + "error": None, + "retryable": None, + "created_at": None, + "updated_at": None, + } if getattr(ch, "cover_image", None) and isinstance(ch.cover_image, dict): return ch.cover_image return None @@ -111,31 +133,82 @@ def sections_to_content_and_images(ch: Chapter) -> tuple[str, list[dict]]: return content, images -def chapter_to_dict(ch: Chapter) -> dict: +def _chapter_markdown(ch: Chapter) -> str: + """正文真源:优先 canonical_markdown,否则从 sections 推导(兼容旧数据)。""" + md = getattr(ch, "canonical_markdown", None) + if md and str(md).strip(): + return str(md).strip() + content, _ = sections_to_content_and_images(ch) + return content + + +def chapter_to_list_dict( + ch: Chapter, asset_url_map: dict[str, str] | None = None +) -> dict: + """ + 列表视图:与详情字段对齐的最小子集 + 客户端兼容字段。 + 含 status、canonical_markdown、content、cover_image(与 cover_asset 同构)、images、sections、word_count。 + """ + cover = chapter_cover_to_dict(ch, asset_url_map=asset_url_map) + cover_normalized = normalize_image_assets_for_api([cover])[0] if cover else None + canonical_raw = _chapter_markdown(ch) + wcount = len(canonical_raw.strip()) if canonical_raw else 0 + return { + "id": ch.id, + "title": ch.title, + "category": ch.category, + "order_index": ch.order_index, + "status": getattr(ch, "status", None) or "draft", + "summary": getattr(ch, "summary", None) or "", + "canonical_markdown": canonical_raw, + "content": canonical_raw, + "cover_asset": cover_normalized, + "cover_image": cover_normalized, + "images": [], + "sections": [], + "word_count": wcount, + "updated_at": ch.updated_at.isoformat() if ch.updated_at else None, + "is_new": getattr(ch, "is_new", False), + "source_segments": getattr(ch, "source_segments", None) or [], + } + + +def chapter_to_dict(ch: Chapter, asset_url_map: dict[str, str] | None = None) -> dict: + """详情视图:含 canonical_markdown、rendered_assets。asset_url_map 用于解析 asset:// 与 cover_asset_id。""" + asset_url_map = asset_url_map or {} + resolve = lambda aid: asset_url_map.get(aid) # noqa: E731 + content, images_list = sections_to_content_and_images(ch) + content = resolve_asset_refs_in_markdown(content, resolve) normalized_images = normalize_image_assets_for_api(images_list) - cover = chapter_cover_to_dict(ch) + cover = chapter_cover_to_dict(ch, asset_url_map=asset_url_map) cover_normalized = normalize_image_assets_for_api([cover])[0] if cover else None sections_data = [] if getattr(ch, "sections", None): for s in sorted(ch.sections, key=lambda x: getattr(x, "order_index", 0)): sec_img = section_image_to_dict(s) sec_img = normalize_image_assets_for_api([sec_img])[0] if sec_img else None + raw_sec = (getattr(s, "content", None) or "").strip() sections_data.append( { - "content": (getattr(s, "content", None) or "").strip(), + "content": resolve_asset_refs_in_markdown(raw_sec, resolve), "image": sec_img, } ) + # 正文真源:优先 canonical_markdown + canonical_md = _chapter_markdown(ch) + canonical_md = resolve_asset_refs_in_markdown(canonical_md, resolve) return { "id": ch.id, "title": ch.title, "content": content, + "canonical_markdown": canonical_md, "order_index": ch.order_index, "status": ch.status, "category": ch.category, "images": normalized_images, "cover_image": cover_normalized, + "rendered_assets": normalized_images, "sections": sections_data, "updated_at": ch.updated_at.isoformat() if ch.updated_at else None, "is_new": ch.is_new, diff --git a/api/app/features/memoir/memoir_images/parser.py b/api/app/features/memoir/memoir_images/parser.py index 35cc1f4..a3565a5 100644 --- a/api/app/features/memoir/memoir_images/parser.py +++ b/api/app/features/memoir/memoir_images/parser.py @@ -2,6 +2,8 @@ import json import re from typing import Any +from app.features.memoir.asset_resolver import strip_legacy_image_placeholders + from .json_payload import extract_json_payload from .schema import IMAGE_STATUS_PENDING @@ -12,6 +14,7 @@ PLACEHOLDER_RE = re.compile( def parse_image_placeholders(content: str, max_images: int) -> list[dict[str, Any]]: + """离线迁移/调试用:解析正文中的 IMAGE 占位符。""" items: list[dict[str, Any]] = [] for match in PLACEHOLDER_RE.finditer(content or ""): description = (match.group(1) or match.group(2) or "").strip() @@ -56,44 +59,12 @@ def build_initial_image_assets( ] -def split_narrative_to_sections(narrative: str) -> list[dict[str, Any]]: - """ - 将带 {{IMAGE:...}} 占位符的正文按占位符拆成多段。 - 返回 list[dict],每项含: - - content: 本段纯文本(不含占位符) - - placeholder_info: 本段后的配图占位信息,或 None(最后一段无图) - """ - if not (narrative or narrative.strip()): - return [] - placeholders = parse_image_placeholders(narrative, max_images=None) - sections: list[dict[str, Any]] = [] - for i in range(len(placeholders) + 1): - if i == 0: - start = 0 - else: - prev = placeholders[i - 1] - start = prev["start_offset"] + len(prev["placeholder"]) - if i < len(placeholders): - end = placeholders[i]["start_offset"] - placeholder_info = placeholders[i] - else: - end = len(narrative) - placeholder_info = None - content = narrative[start:end] - if isinstance(content, str): - content = content.strip() - sections.append( - {"content": content or "", "placeholder_info": placeholder_info} - ) - return sections - - def parse_narrative_json(raw: str) -> list[dict[str, Any]]: """ - 解析 LLM 输出的 JSON 格式叙事。 - 返回与 split_narrative_to_sections 相同结构:list[dict],每项含 content、placeholder_info。 + 解析 LLM 输出的 JSON 叙事(paragraphs)。 + 不根据 image_description 生成配图占位;插图由 story/chapter 结构化流程单独处理。 """ - if not (raw or raw.strip()): + if not raw or not str(raw).strip(): return [] try: payload = extract_json_payload(raw) @@ -105,33 +76,34 @@ def parse_narrative_json(raw: str) -> list[dict[str, Any]]: return [] result: list[dict[str, Any]] = [] - for i, p in enumerate(paragraphs): + for p in paragraphs: if not isinstance(p, dict): continue content = (p.get("content") or "").strip() - desc = (p.get("image_description") or "").strip() - placeholder_info = None - if desc: - placeholder_info = { - "placeholder": f"{{{{IMAGE:{desc}}}}}", - "description": desc, - "index": i, - "start_offset": 0, - } - result.append({"content": content, "placeholder_info": placeholder_info}) + if content: + result.append({"content": content, "placeholder_info": None}) return result +def split_plain_narrative_into_sections(narrative: str) -> list[dict[str, Any]]: + """非 JSON 叙事:去掉遗留占位符后按空行拆段,不产生段落配图。""" + text = strip_legacy_image_placeholders(narrative or "") + if not text.strip(): + return [] + parts = [p.strip() for p in text.split("\n\n") if p.strip()] + return [{"content": p, "placeholder_info": None} for p in parts] + + def parse_narrative_to_sections(narrative: str) -> list[dict[str, Any]]: """ - 将 narrative 解析为 sections。优先尝试 JSON 格式,失败则回退到占位符解析。 - 返回与 split_narrative_to_sections 相同结构。 + 将 narrative 解析为 sections。 + JSON(paragraphs)走 parse_narrative_json;否则剥离占位符后按段拆分。 """ - if not (narrative or narrative.strip()): + if not narrative or not str(narrative).strip(): return [] stripped = narrative.strip() if stripped.startswith("{") and "paragraphs" in stripped: segments = parse_narrative_json(narrative) if segments: return segments - return split_narrative_to_sections(narrative) + return split_plain_narrative_into_sections(narrative) diff --git a/api/app/features/memoir/models.py b/api/app/features/memoir/models.py index 4151e88..a05ca0b 100644 --- a/api/app/features/memoir/models.py +++ b/api/app/features/memoir/models.py @@ -14,21 +14,31 @@ from app.core.db import Base, utc_now class Chapter(Base): + """章节:阅读与导出视图,canonical_markdown 为正文真源。""" + __tablename__ = "chapters" id = Column(String, primary_key=True) user_id = Column(String, ForeignKey("users.id"), nullable=False) + book_id = Column(String, ForeignKey("books.id", ondelete="SET NULL"), nullable=True) title = Column(String, nullable=False) - order_index = Column(Integer, nullable=False) - status = Column(String, default="draft") - cover_image = Column(JSON, nullable=True) - updated_at = Column(DateTime(timezone=True), default=utc_now, onupdate=utc_now) category = Column(String, nullable=True) + order_index = Column(Integer, nullable=False) + summary = Column(Text, nullable=True) + canonical_markdown = Column(Text, nullable=True) # 当前生效正文(markdown-first) + status = Column(String, default="draft") # active / draft / archived + cover_image = Column(JSON, nullable=True) # 兼容旧数据,逐步迁移到 cover_asset_id + cover_asset_id = Column(String, nullable=True) + current_version_id = Column(String, nullable=True) # FK 在 migration 中分步添加 + created_at = Column(DateTime(timezone=True), default=utc_now) + updated_at = Column(DateTime(timezone=True), default=utc_now, onupdate=utc_now) + # 兼容旧运行时,迁移后废弃 is_new = Column(Boolean, default=True) is_active = Column(Boolean, default=True) source_segments = Column(JSON, nullable=True) user = relationship("User", back_populates="chapters") + book = relationship("Book", back_populates="chapters") sections = relationship( "ChapterSection", back_populates="chapter", @@ -41,6 +51,27 @@ class Chapter(Base): foreign_keys="MemoirImage.chapter_id", cascade="all, delete-orphan", ) + versions = relationship( + "ChapterVersion", + back_populates="chapter", + foreign_keys="ChapterVersion.chapter_id", + cascade="all, delete-orphan", + ) + current_version = relationship( + "ChapterVersion", + primaryjoin="Chapter.current_version_id == ChapterVersion.id", + foreign_keys="ChapterVersion.id", + ) + story_links = relationship( + "ChapterStoryLink", + back_populates="chapter", + cascade="all, delete-orphan", + ) + cover_intents = relationship( + "ChapterCoverIntent", + back_populates="chapter", + cascade="all, delete-orphan", + ) class ChapterSection(Base): @@ -101,6 +132,95 @@ class MemoirImage(Base): ) +class ChapterVersion(Base): + """Chapter 版本快照,记录正文变更与来源。""" + + __tablename__ = "chapter_versions" + + id = Column(String, primary_key=True) + chapter_id = Column( + String, + ForeignKey("chapters.id", ondelete="CASCADE"), + nullable=False, + index=True, + ) + version_no = Column(Integer, nullable=False) + markdown_snapshot = Column(Text, nullable=False) + change_summary = Column(Text, nullable=True) + actor_type = Column(String, nullable=True) # ai / user / editor / system + source_type = Column( + String, nullable=True + ) # generate / rewrite / merge / manual / migration + parent_version_id = Column( + String, ForeignKey("chapter_versions.id", ondelete="SET NULL"), nullable=True + ) + prompt_meta = Column(JSON, nullable=True) + created_at = Column(DateTime(timezone=True), default=utc_now) + + chapter = relationship( + "Chapter", back_populates="versions", foreign_keys=[chapter_id] + ) + parent_version = relationship( + "ChapterVersion", + remote_side="ChapterVersion.id", + foreign_keys=[parent_version_id], + ) + + +class ChapterCoverIntent(Base): + """Chapter 封面意图(结构化)。""" + + __tablename__ = "chapter_cover_intents" + + id = Column(String, primary_key=True) + chapter_id = Column( + String, + ForeignKey("chapters.id", ondelete="CASCADE"), + nullable=False, + index=True, + ) + chapter_version_id = Column( + String, + ForeignKey("chapter_versions.id", ondelete="SET NULL"), + nullable=True, + ) + story_ids = Column(JSON, nullable=True) + prompt_brief = Column(Text, nullable=True) + status = Column(String, nullable=False) + claim_token = Column(String, nullable=True) + claimed_at = Column(DateTime(timezone=True), nullable=True) + attempt_count = Column(Integer, nullable=False, default=0) + asset_id = Column(String, nullable=True) + error = Column(Text, nullable=True) + created_at = Column(DateTime(timezone=True), default=utc_now) + updated_at = Column(DateTime(timezone=True), default=utc_now, onupdate=utc_now) + + chapter = relationship("Chapter", back_populates="cover_intents") + + +class ChapterStoryLink(Base): + """Chapter 与 Story 的编排关联。""" + + __tablename__ = "chapter_story_links" + + id = Column(String, primary_key=True) + chapter_id = Column( + String, + ForeignKey("chapters.id", ondelete="CASCADE"), + nullable=False, + index=True, + ) + story_id = Column( + String, ForeignKey("stories.id", ondelete="CASCADE"), nullable=False, index=True + ) + order_index = Column(Integer, nullable=False) + role = Column(String, nullable=True) # core / bridge / appendix + created_at = Column(DateTime(timezone=True), default=utc_now) + + chapter = relationship("Chapter", back_populates="story_links") + story = relationship("Story", back_populates="chapter_links") + + class Book(Base): __tablename__ = "books" @@ -115,6 +235,11 @@ class Book(Base): last_update_chapter_id = Column(String, nullable=True) user = relationship("User", back_populates="books") + chapters = relationship( + "Chapter", + back_populates="book", + foreign_keys="Chapter.book_id", + ) class MemoirState(Base): diff --git a/api/app/features/memoir/pdf_service.py b/api/app/features/memoir/pdf_service.py index c3c10cc..845b1c9 100644 --- a/api/app/features/memoir/pdf_service.py +++ b/api/app/features/memoir/pdf_service.py @@ -4,7 +4,7 @@ PDF 生成服务(从 services 迁入 memoir feature) from app.core.logging import get_logger from io import BytesIO -from typing import List +from typing import List, Optional import httpx from PIL import Image @@ -21,12 +21,20 @@ from reportlab.platypus import ( from reportlab.pdfbase import pdfmetrics from reportlab.pdfbase.cidfonts import UnicodeCIDFont +from app.features.memoir.asset_resolver import ( + collect_asset_ids_from_markdown, + split_markdown_by_asset_refs, + strip_legacy_image_placeholders, +) +from app.features.memoir.helpers import ( + _chapter_markdown, + sections_to_content_and_images, +) from app.features.memoir.memoir_images.parser import PLACEHOLDER_RE from app.features.memoir.memoir_images.schema import ( IMAGE_STATUS_COMPLETED, normalize_image_assets, ) -from app.features.memoir.memoir_images.serializers import memoir_image_to_dict from app.features.memoir.memoir_images.storage import ( CosDownloadUrlError, TencentCosStorageService, @@ -60,24 +68,6 @@ def split_content_blocks(content: str, images: list[dict]) -> list[dict]: return blocks -def sections_to_blocks(sections: list, prepare_fn=None) -> list[dict]: - if prepare_fn is None: - prepare_fn = _prepare_pdf_image_assets - blocks: list[dict] = [] - for section in sorted(sections, key=lambda s: getattr(s, "order_index", 0)): - content = (getattr(section, "content", None) or "").strip() - if content: - blocks.append({"type": "text", "value": content}) - img = None - if getattr(section, "image_record", None): - img = memoir_image_to_dict(section.image_record) - if img: - prepared = prepare_fn([img]) - if prepared and prepared[0].get("url"): - blocks.append({"type": "image", "url": prepared[0]["url"]}) - return blocks - - def _prepare_pdf_image_assets(images: list[dict]) -> list[dict]: storage = TencentCosStorageService.from_env() prepared_assets: list[dict] = [] @@ -132,7 +122,12 @@ class PDFService: logger.warning("PDF 图片下载失败: url=%s, error=%s", url, exc) return None - async def generate_pdf(self, book, chapters: List) -> bytes: + async def generate_pdf( + self, + book, + chapters: List, + asset_url_map: Optional[dict[str, str]] = None, + ) -> bytes: buffer = BytesIO() doc = SimpleDocTemplate(buffer, pagesize=A4) styles = getSampleStyleSheet() @@ -170,16 +165,27 @@ class PDFService: for chapter in chapters: story.append(Paragraph(chapter.title, heading_style)) story.append(Spacer(1, 0.2 * inch)) - sections = getattr(chapter, "sections", None) or [] - if sections: - blocks = sections_to_blocks(sections) + # 正文真源:canonical_markdown(与 API / 前端一致) + markdown = _chapter_markdown(chapter) + _, images_list = sections_to_content_and_images(chapter) + if not markdown: + markdown = getattr(chapter, "content", "") or "" + if not images_list: + images_list = list(getattr(chapter, "images", None) or []) + prepared_images = _prepare_pdf_image_assets(images_list) + blocks: list[dict] + if asset_url_map and collect_asset_ids_from_markdown(markdown): + blocks = split_markdown_by_asset_refs( + markdown, + lambda aid: asset_url_map.get(aid) if asset_url_map else None, + ) + for b in blocks: + if b.get("type") == "text": + b["value"] = strip_legacy_image_placeholders( + b.get("value") or "" + ) else: - images = _prepare_pdf_image_assets( - getattr(chapter, "images", None) or [] - ) - blocks = split_content_blocks( - getattr(chapter, "content", "") or "", images - ) + blocks = split_content_blocks(markdown, prepared_images) for block in blocks: if block["type"] == "text": paragraphs = block["value"].split("\n\n") diff --git a/api/app/features/memoir/repo.py b/api/app/features/memoir/repo.py index 8e66363..faa0a63 100644 --- a/api/app/features/memoir/repo.py +++ b/api/app/features/memoir/repo.py @@ -1,10 +1,19 @@ """Memoir repository — Book, Chapter, MemoirState data access.""" +import uuid +from datetime import datetime, timezone + from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.orm import Session, joinedload -from app.features.memoir.models import Book, Chapter, ChapterSection, MemoirState +from app.features.memoir.models import ( + Book, + Chapter, + ChapterSection, + ChapterVersion, + MemoirState, +) async def get_current_book(user_id: str, db: AsyncSession) -> Book | None: @@ -91,3 +100,101 @@ def get_archived_chapter_summaries_sync( if preview.strip(): summaries.append((ch.title or "", preview)) return summaries + + +def ensure_chapter_markdown_and_version_sync( + session: Session, + chapter: Chapter, + markdown: str, +) -> None: + """ + 为已有 chapter 设置 canonical_markdown 并创建 chapter_version。 + 由 _save_narrative_to_sections 调用,确保 markdown 真源与版本链。 + """ + from sqlalchemy import func + + count_stmt = select(func.count(ChapterVersion.id)).where( + ChapterVersion.chapter_id == chapter.id + ) + version_no = (session.execute(count_stmt).scalar() or 0) + 1 + + version = ChapterVersion( + id=str(uuid.uuid4()), + chapter_id=chapter.id, + version_no=version_no, + markdown_snapshot=markdown, + actor_type="ai", + source_type="generate", + ) + session.add(version) + session.flush() + chapter.canonical_markdown = markdown + chapter.current_version_id = version.id + + +def save_chapter_markdown_sync( + session: Session, + *, + user_id: str, + chapter_id: str | None, + title: str, + category: str, + order_index: int, + markdown: str, + source_segments: list[str] | None = None, +) -> Chapter: + """ + 将 markdown 写入 chapter.canonical_markdown 和 chapter_versions。 + Agent 不直接调用,由 service/task 调用。 + 若 chapter_id 为 None 则新建章节。 + """ + if chapter_id: + chapter = session.get(Chapter, chapter_id) + if not chapter or chapter.user_id != user_id: + raise ValueError(f"Chapter {chapter_id} not found or access denied") + else: + chapter = Chapter( + id=str(uuid.uuid4()), + user_id=user_id, + title=title, + category=category, + order_index=order_index, + status="completed", + is_new=True, + is_active=True, + source_segments=source_segments or [], + ) + session.add(chapter) + session.flush() + + # 创建 chapter_version + from sqlalchemy import func + + count_stmt = select(func.count(ChapterVersion.id)).where( + ChapterVersion.chapter_id == chapter.id + ) + version_no = (session.execute(count_stmt).scalar() or 0) + 1 + + version = ChapterVersion( + id=str(uuid.uuid4()), + chapter_id=chapter.id, + version_no=version_no, + markdown_snapshot=markdown, + actor_type="ai", + source_type="generate", + ) + session.add(version) + session.flush() + + chapter.canonical_markdown = markdown + chapter.current_version_id = version.id + chapter.title = title + chapter.is_new = True + if source_segments: + chapter.source_segments = list( + set((chapter.source_segments or []) + source_segments) + ) + + session.flush() + session.refresh(chapter) + return chapter diff --git a/api/app/features/memoir/service.py b/api/app/features/memoir/service.py index 4bbe9db..21b4c1f 100644 --- a/api/app/features/memoir/service.py +++ b/api/app/features/memoir/service.py @@ -1,6 +1,5 @@ """Memoir service — 回忆录编排(章节生成、状态流转);通过 MemoryService 获取 evidence。""" -import uuid from datetime import datetime, timezone from typing import List, Optional @@ -16,14 +15,18 @@ from app.agents.memoir.prompts import ( STAGE_TO_ORDER, ) from app.features.memoir import repo +from app.features.memoir.asset_resolver import ( + collect_asset_ids_for_chapter, + collect_asset_ids_for_chapters, + strip_legacy_image_placeholders, +) +from app.features.memoir.asset_urls import signed_urls_for_asset_ids from app.features.memoir.helpers import ( chapter_to_dict, + chapter_to_list_dict, is_image_permanently_unavailable, ) -from app.features.memoir.models import Book, Chapter, ChapterSection, MemoirImage -from app.features.memoir.memoir_images.parser import build_initial_image_assets -from app.features.memoir.memoir_images.serializers import image_dict_to_row_kwargs -from app.features.memoir.memoir_images.prompting import MemoirImagePromptService +from app.features.memoir.models import Book, Chapter, ChapterSection from app.features.memoir.memoir_images.settings import MemoirImageSettings from app.features.memory.service import MemoryService @@ -121,12 +124,19 @@ class MemoirService: stmt = ( select(Chapter) .where(Chapter.user_id == user_id, Chapter.is_active == True) - .options(joinedload(Chapter.sections)) + .options( + joinedload(Chapter.sections).joinedload(ChapterSection.image_record), + joinedload(Chapter.images), + ) .order_by(Chapter.order_index) ) result = await self._db.execute(stmt) chapters = list(result.unique().scalars().all()) - pdf_bytes = await pdf_service.generate_pdf(book, chapters) + asset_ids = collect_asset_ids_for_chapters(chapters) + asset_map = await signed_urls_for_asset_ids(self._db, asset_ids) + pdf_bytes = await pdf_service.generate_pdf( + book, chapters, asset_url_map=asset_map + ) return { "pdf_base64": pdf_bytes.decode("latin1"), "filename": f"{book.title}.pdf", @@ -138,6 +148,10 @@ class MemoirService: chapters = await repo.get_chapters_with_sections( user_id, self._db, is_new_only=is_new ) + asset_ids: set[str] = set() + for ch in chapters: + asset_ids |= collect_asset_ids_for_chapter(ch) + asset_map = await signed_urls_for_asset_ids(self._db, asset_ids) chapter_by_category: dict[str, Chapter] = {} for ch in chapters: if ch.category and ch.category not in chapter_by_category: @@ -147,7 +161,7 @@ class MemoirService: ch = chapter_by_category.pop(category, None) if ch: await self._cleanup_unavailable_images(ch) - all_chapters.append(chapter_to_dict(ch)) + all_chapters.append(chapter_to_list_dict(ch, asset_url_map=asset_map)) else: if is_new is True: continue @@ -155,13 +169,17 @@ class MemoirService: { "id": f"placeholder_{category}", "title": CHAPTER_CATEGORIES[category], - "content": "", + "category": category, "order_index": STAGE_TO_ORDER.get(category, 999), "status": "empty", - "category": category, - "images": [], + "summary": "", + "canonical_markdown": "", + "content": "", + "cover_asset": None, "cover_image": None, + "images": [], "sections": [], + "word_count": 0, "updated_at": None, "is_new": False, "source_segments": [], @@ -169,7 +187,7 @@ class MemoirService: ) for ch in chapter_by_category.values(): await self._cleanup_unavailable_images(ch) - all_chapters.append(chapter_to_dict(ch)) + all_chapters.append(chapter_to_list_dict(ch, asset_url_map=asset_map)) return all_chapters async def get_chapter(self, chapter_id: str, user_id: str) -> dict: @@ -181,7 +199,10 @@ class MemoirService: if not chapter.is_active: raise HTTPException(status_code=404, detail="Chapter not found") await self._cleanup_unavailable_images(chapter) - return chapter_to_dict(chapter) + asset_map = await signed_urls_for_asset_ids( + self._db, collect_asset_ids_for_chapter(chapter) + ) + return chapter_to_dict(chapter, asset_url_map=asset_map) async def disable_chapter(self, chapter_id: str, user_id: str) -> dict: chapter = await self._db.get(Chapter, chapter_id) @@ -220,10 +241,14 @@ class MemoirService: async def check_and_trigger_cover_generation(self, user_id: str) -> dict: """ - 检查可生成封面的章节(section 配图 > 3 且无已完成封面), - 若有则触发生成任务。已有封面的章节不再检查。 + 有正文、尚无 cover_asset、且 legacy 封面 MemoirImage 未 completed 时, + 派发 generate_chapter_cover(由 intent/asset 闭环完成)。 """ - from app.tasks.memoir_tasks import generate_chapter_images + from app.tasks.chapter_cover_tasks import generate_chapter_cover + + img_settings = MemoirImageSettings.from_env() + if not img_settings.enabled: + return {"triggered": []} chapters = await repo.get_chapters_with_sections( user_id, self._db, active_only=True, is_new_only=None @@ -232,59 +257,33 @@ class MemoirService: for ch in chapters: if not ch.category or ch.status == "empty": continue - sections = getattr(ch, "sections", None) or [] - section_image_count = sum( - 1 for s in sections if getattr(s, "image_id", None) + if getattr(ch, "cover_asset_id", None): + continue + md = (ch.canonical_markdown or "").strip() + section_blob = "\n\n".join( + (s.content or "").strip() + for s in getattr(ch, "sections", None) or [] + if (s.content or "").strip() ) + body = md or strip_legacy_image_placeholders(section_blob).strip() + if not body: + continue images = getattr(ch, "images", None) or [] cover_rec = next( (m for m in images if getattr(m, "section_id", None) is None), None, ) - if section_image_count <= 3: - continue if ( cover_rec and (getattr(cover_rec, "status") or "").strip() == "completed" ): continue - if cover_rec is None: - img_settings = MemoirImageSettings.from_env() - if img_settings.enabled: - now_iso = datetime.now(timezone.utc).isoformat() - cover_ph = { - "placeholder": "{{{{{{{{IMAGE:章节封面}}}}}}}}", - "description": "章节封面", - "index": 0, - } - style = MemoirImagePromptService.CATEGORY_STYLE_MAP.get( - ch.category or "", img_settings.default_style - ) - cover_asset = build_initial_image_assets( - [cover_ph], - img_settings.provider, - style, - img_settings.default_size, - now_iso, - )[0] - kwargs = image_dict_to_row_kwargs(cover_asset) - cover_mi = MemoirImage( - id=str(uuid.uuid4()).replace("-", "")[:32], - chapter_id=ch.id, - section_id=None, - order_index=0, - **kwargs, - ) - self._db.add(cover_mi) - await self._db.commit() - await self._db.refresh(ch) - logger.info("创建封面占位: chapter=%s", ch.id) try: - generate_chapter_images.delay(ch.id) + generate_chapter_cover.delay(ch.id) triggered.append(ch.id) - logger.info("触发生成封面: chapter=%s", ch.id) + logger.info("触发生成章节封面(asset): chapter=%s", ch.id) except Exception as exc: - logger.warning("封面生成任务派发失败: chapter=%s, error=%s", ch.id, exc) + logger.warning("封面任务派发失败: chapter=%s, error=%s", ch.id, exc) return {"triggered": triggered} async def mark_memoir_read(self, user_id: str) -> dict: diff --git a/api/app/features/memory/chunker.py b/api/app/features/memory/chunker.py index 6be02f7..6dd4e4f 100644 --- a/api/app/features/memory/chunker.py +++ b/api/app/features/memory/chunker.py @@ -1,8 +1,38 @@ -"""Transcript chunker — split raw text into retrieval-ready chunks (skeleton).""" +"""Transcript chunker — split raw text into retrieval-ready chunks.""" + +import re def chunk_transcript( - text: str, *, max_tokens: int = 512, overlap: int = 64 + text: str, *, max_chars: int = 800, overlap_chars: int = 100 ) -> list[str]: - """Split transcript text into overlapping chunks.""" - raise NotImplementedError + """ + Split transcript text into overlapping chunks. + Uses character count as proxy for tokens (~4 chars/token for Chinese). + """ + if not text or not text.strip(): + return [] + text = text.strip() + if len(text) <= max_chars: + return [text] if text else [] + + chunks: list[str] = [] + start = 0 + step = max_chars - overlap_chars + + while start < len(text): + end = start + max_chars + chunk = text[start:end] + # 尽量在句末切分 + if end < len(text): + for sep in ["。", "!", "?", "\n", ";", ".", "!", "?"]: + last_sep = chunk.rfind(sep) + if last_sep > max_chars // 2: + chunk = chunk[: last_sep + 1] + end = start + len(chunk) + break + if chunk.strip(): + chunks.append(chunk.strip()) + start += len(chunk) if chunk else step + + return chunks diff --git a/api/app/features/memory/deps.py b/api/app/features/memory/deps.py index eb636ff..b69acb2 100644 --- a/api/app/features/memory/deps.py +++ b/api/app/features/memory/deps.py @@ -2,8 +2,16 @@ from fastapi import Depends from sqlalchemy.ext.asyncio import AsyncSession from app.core.db import get_async_db +from app.core.dependencies import get_embedding_provider from app.features.memory.service import MemoryService -async def get_memory_service(db: AsyncSession = Depends(get_async_db)) -> MemoryService: - return MemoryService(db=db) +def _get_embedding_provider(): + return get_embedding_provider() + + +async def get_memory_service( + db: AsyncSession = Depends(get_async_db), + embedding_provider=Depends(_get_embedding_provider), +) -> MemoryService: + return MemoryService(db=db, embedding_provider=embedding_provider) diff --git a/api/app/features/memory/repo.py b/api/app/features/memory/repo.py index 81e4026..9bbbe81 100644 --- a/api/app/features/memory/repo.py +++ b/api/app/features/memory/repo.py @@ -1 +1,307 @@ -"""Memory repository — data access layer (skeleton).""" +"""Memory repository — MemorySource, MemoryChunk, MemoryFact, TimelineEvent data access.""" + +import uuid +from datetime import datetime, timezone + +from sqlalchemy import select, text +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.orm import Session + +from app.features.memory.models import ( + MemoryChunk, + MemoryFact, + MemorySource, + TimelineEvent, +) + + +def _new_id() -> str: + return str(uuid.uuid4()) + + +def create_source_sync( + session: Session, + *, + user_id: str, + source_type: str, + raw_text: str | None = None, + conversation_id: str | None = None, + captured_at: datetime | None = None, +) -> MemorySource: + """Create a memory source (sync). Caller must commit.""" + source = MemorySource( + id=_new_id(), + user_id=user_id, + source_type=source_type, + raw_text=raw_text, + conversation_id=conversation_id, + captured_at=captured_at or datetime.now(timezone.utc), + ) + session.add(source) + return source + + +async def create_source( + db: AsyncSession, + *, + user_id: str, + source_type: str, + raw_text: str | None = None, + conversation_id: str | None = None, + captured_at: datetime | None = None, +) -> MemorySource: + """Create a memory source. Caller must commit.""" + source = MemorySource( + id=_new_id(), + user_id=user_id, + source_type=source_type, + raw_text=raw_text, + conversation_id=conversation_id, + captured_at=captured_at or datetime.now(timezone.utc), + ) + db.add(source) + return source + + +def create_chunk_sync( + session: Session, + *, + source_id: str, + user_id: str, + content: str, + chunk_index: int, +) -> MemoryChunk: + """Create a memory chunk (sync). Caller must commit.""" + chunk = MemoryChunk( + id=_new_id(), + source_id=source_id, + user_id=user_id, + content=content, + chunk_index=chunk_index, + ) + session.add(chunk) + return chunk + + +async def create_chunk( + db: AsyncSession, + *, + source_id: str, + user_id: str, + content: str, + chunk_index: int, +) -> MemoryChunk: + """Create a memory chunk. Caller must commit.""" + chunk = MemoryChunk( + id=_new_id(), + source_id=source_id, + user_id=user_id, + content=content, + chunk_index=chunk_index, + ) + db.add(chunk) + return chunk + + +def update_chunk_fts_sync(session: Session, chunk_id: str) -> None: + """Populate content_tsv for FTS (sync). Caller must commit.""" + session.execute( + text( + "UPDATE memory_chunks SET content_tsv = to_tsvector('simple', content) WHERE id = :id" + ), + {"id": chunk_id}, + ) + + +async def update_chunk_embedding( + db: AsyncSession, chunk_id: str, embedding: list[float] +) -> None: + """Update chunk embedding. Caller must commit.""" + chunk = await db.get(MemoryChunk, chunk_id) + if chunk: + chunk.embedding = embedding + + +async def update_chunk_fts(db: AsyncSession, chunk_id: str) -> None: + """Populate content_tsv for FTS. Caller must commit.""" + await db.execute( + text( + "UPDATE memory_chunks SET content_tsv = to_tsvector('simple', content) WHERE id = :id" + ), + {"id": chunk_id}, + ) + + +async def search_chunks_fts( + db: AsyncSession, user_id: str, query: str, limit: int = 20 +) -> list[dict]: + """FTS search on memory_chunks. Returns list of {id, content, chunk_index}.""" + if not query or not query.strip(): + return [] + q = query.strip() + stmt = text(""" + SELECT id, content, chunk_index + FROM memory_chunks + WHERE user_id = :user_id AND (is_excluded IS NOT TRUE OR is_excluded = false) + AND content_tsv IS NOT NULL AND content_tsv @@ plainto_tsquery('simple', :q) + ORDER BY ts_rank_cd(content_tsv, plainto_tsquery('simple', :q2)) DESC + LIMIT :lim + """) + result = await db.execute(stmt, {"user_id": user_id, "q": q, "q2": q, "lim": limit}) + rows = result.mappings().all() + return [ + {"id": r["id"], "content": r["content"], "chunk_index": r["chunk_index"]} + for r in rows + ] + + +async def get_chunks_by_ids( + db: AsyncSession, chunk_ids: list[str] +) -> list[MemoryChunk]: + """Fetch chunks by IDs.""" + if not chunk_ids: + return [] + stmt = select(MemoryChunk).where(MemoryChunk.id.in_(chunk_ids)) + result = await db.execute(stmt) + chunks = list(result.unique().scalars().all()) + order = {cid: i for i, cid in enumerate(chunk_ids)} + return sorted(chunks, key=lambda c: order.get(c.id, 999)) + + +async def get_facts_for_user( + db: AsyncSession, user_id: str, limit: int = 20 +) -> list[MemoryFact]: + """Fetch recent facts for user.""" + stmt = ( + select(MemoryFact) + .where(MemoryFact.user_id == user_id, MemoryFact.status == "confirmed") + .order_by(MemoryFact.created_at.desc()) + .limit(limit) + ) + result = await db.execute(stmt) + return list(result.unique().scalars().all()) + + +async def search_chunks_vector( + db: AsyncSession, user_id: str, query_embedding: list[float], limit: int = 20 +) -> list[dict]: + """Vector similarity search. Returns list of {id, content, chunk_index, distance}.""" + if not query_embedding: + return [] + # pgvector cosine distance: 1 - cosine_similarity, lower is better + stmt = text(""" + SELECT id, content, chunk_index, + (embedding <=> :emb::vector) AS distance + FROM memory_chunks + WHERE user_id = :user_id AND (is_excluded IS NOT TRUE OR is_excluded = false) + AND embedding IS NOT NULL + ORDER BY embedding <=> :emb2::vector + LIMIT :lim + """) + emb_str = "[" + ",".join(str(x) for x in query_embedding) + "]" + result = await db.execute( + stmt, + {"user_id": user_id, "emb": emb_str, "emb2": emb_str, "lim": limit}, + ) + rows = result.mappings().all() + return [ + { + "id": r["id"], + "content": r["content"], + "chunk_index": r["chunk_index"], + "distance": float(r["distance"]), + } + for r in rows + ] + + +def retrieve_evidence_sync( + session: Session, user_id: str, query: str, *, top_k: int = 10 +) -> dict: + """ + Sync evidence retrieval for Celery tasks. + FTS only (no vector), returns evidence bundle. + """ + if not query or not query.strip(): + return { + "relevant_chunks": [], + "relevant_summaries": [], + "relevant_facts": [], + "timeline_hints": [], + "relevant_stories": [], + } + q = query.strip() + # FTS chunks + stmt = text(""" + SELECT id, content, chunk_index + FROM memory_chunks + WHERE user_id = :user_id AND (is_excluded IS NOT TRUE OR is_excluded = false) + AND content_tsv IS NOT NULL AND content_tsv @@ plainto_tsquery('simple', :q) + ORDER BY ts_rank_cd(content_tsv, plainto_tsquery('simple', :q2)) DESC + LIMIT :lim + """) + result = session.execute(stmt, {"user_id": user_id, "q": q, "q2": q, "lim": top_k}) + rows = result.mappings().all() + relevant_chunks = [ + {"id": r["id"], "content": r["content"], "chunk_index": r["chunk_index"]} + for r in rows + ] + # Facts + facts_stmt = ( + select(MemoryFact) + .where(MemoryFact.user_id == user_id, MemoryFact.status == "confirmed") + .order_by(MemoryFact.created_at.desc()) + .limit(top_k) + ) + facts = list(session.execute(facts_stmt).unique().scalars().all()) + relevant_facts = [ + { + "id": f.id, + "fact_type": f.fact_type, + "subject": f.subject, + "predicate": f.predicate, + "object_json": f.object_json, + } + for f in facts + ] + # Timeline + events_stmt = ( + select(TimelineEvent) + .where(TimelineEvent.user_id == user_id) + .order_by(TimelineEvent.event_year.desc().nullslast()) + .limit(top_k) + ) + events = list(session.execute(events_stmt).unique().scalars().all()) + timeline_hints = [ + { + "id": e.id, + "event_year": e.event_year, + "event_date": e.event_date, + "title": e.title, + "description": e.description, + } + for e in events + ] + return { + "relevant_chunks": relevant_chunks, + "relevant_summaries": [], + "relevant_facts": relevant_facts, + "timeline_hints": timeline_hints, + "relevant_stories": [], + } + + +async def get_timeline_events_for_user( + db: AsyncSession, user_id: str, limit: int = 20 +) -> list[TimelineEvent]: + """Fetch timeline events for user.""" + stmt = ( + select(TimelineEvent) + .where(TimelineEvent.user_id == user_id) + .order_by( + TimelineEvent.event_year.desc().nullslast(), TimelineEvent.created_at.desc() + ) + .limit(limit) + ) + result = await db.execute(stmt) + return list(result.unique().scalars().all()) diff --git a/api/app/features/memory/retriever.py b/api/app/features/memory/retriever.py index 278bbfd..6b25d89 100644 --- a/api/app/features/memory/retriever.py +++ b/api/app/features/memory/retriever.py @@ -1,10 +1,102 @@ -"""Hybrid retriever — metadata filter + FTS + vector retrieval + score fusion (skeleton).""" +"""Hybrid retriever — metadata filter + FTS + vector retrieval + score fusion.""" + +from sqlalchemy.ext.asyncio import AsyncSession + +from app.features.memory.repo import ( + get_facts_for_user, + get_timeline_events_for_user, + search_chunks_fts, + search_chunks_vector, +) +from app.ports.embedding import EmbeddingProvider + + +def _rrf_merge( + fts_items: list[dict], vector_items: list[dict], k: int = 60 +) -> list[dict]: + """Reciprocal Rank Fusion. Merge FTS and vector results by id.""" + scores: dict[str, float] = {} + for rank, item in enumerate(fts_items): + cid = item["id"] + scores[cid] = scores.get(cid, 0) + 1 / (k + rank + 1) + for rank, item in enumerate(vector_items): + cid = item["id"] + scores[cid] = scores.get(cid, 0) + 1 / (k + rank + 1) + + all_items = {x["id"]: x for x in fts_items + vector_items} + sorted_ids = sorted(scores.keys(), key=lambda i: scores[i], reverse=True) + return [all_items[i] for i in sorted_ids] class HybridRetriever: - """Phase 2+ implementation: combine FTS, vector, and metadata filter results.""" + """Combine FTS, vector, and metadata filter into evidence bundle.""" - async def retrieve( - self, user_id: str, query: str, *, top_k: int = 10 - ) -> list[dict]: - raise NotImplementedError + def __init__( + self, + db: AsyncSession, + *, + embedding_provider: EmbeddingProvider | None = None, + ): + self._db = db + self._embedding = embedding_provider + + async def retrieve(self, user_id: str, query: str, *, top_k: int = 10) -> dict: + """ + Return evidence bundle: + {relevant_chunks, relevant_summaries, relevant_facts, timeline_hints, relevant_stories} + """ + fts_chunks = await search_chunks_fts( + self._db, user_id=user_id, query=query, limit=top_k * 2 + ) + + vector_chunks: list[dict] = [] + if self._embedding and query.strip(): + q_emb = await self._embedding.embed_text(query.strip()) + if q_emb: + vector_chunks = await search_chunks_vector( + self._db, user_id=user_id, query_embedding=q_emb, limit=top_k * 2 + ) + + merged = _rrf_merge(fts_chunks, vector_chunks)[:top_k] + relevant_chunks = [ + { + "id": c["id"], + "content": c["content"], + "chunk_index": c.get("chunk_index", 0), + } + for c in merged + ] + + facts = await get_facts_for_user(self._db, user_id=user_id, limit=top_k) + relevant_facts = [ + { + "id": f.id, + "fact_type": f.fact_type, + "subject": f.subject, + "predicate": f.predicate, + "object_json": f.object_json, + } + for f in facts + ] + + events = await get_timeline_events_for_user( + self._db, user_id=user_id, limit=top_k + ) + timeline_hints = [ + { + "id": e.id, + "event_year": e.event_year, + "event_date": e.event_date, + "title": e.title, + "description": e.description, + } + for e in events + ] + + return { + "relevant_chunks": relevant_chunks, + "relevant_summaries": [], + "relevant_facts": relevant_facts, + "timeline_hints": timeline_hints, + "relevant_stories": [], + } diff --git a/api/app/features/memory/service.py b/api/app/features/memory/service.py index 3c451f4..6663b73 100644 --- a/api/app/features/memory/service.py +++ b/api/app/features/memory/service.py @@ -1,26 +1,129 @@ """ MemoryService — conversation / memoir 的统一门面。 -一期先实现基础接口签名,具体逻辑后续补充。 + +- ingest_transcript: transcript -> memory_sources, chunks, embedding, FTS +- retrieve: 委托 HybridRetriever 返回 evidence bundle """ from sqlalchemy.ext.asyncio import AsyncSession +from app.features.memory.chunker import chunk_transcript +from app.features.memory.repo import ( + create_chunk, + create_source, + update_chunk_embedding, + update_chunk_fts, +) +from app.ports.embedding import EmbeddingProvider + class MemoryService: - def __init__(self, db: AsyncSession): + def __init__( + self, + db: AsyncSession, + *, + embedding_provider: EmbeddingProvider | None = None, + ): self._db = db + self._embedding = embedding_provider async def ingest_transcript( self, user_id: str, conversation_id: str, transcript: str ) -> str: - """Ingest conversation transcript into memory. Returns source_id.""" - raise NotImplementedError("Phase 2+ implementation") + """ + Ingest conversation transcript into memory. + Creates MemorySource, chunks, populates embedding + FTS. + Returns source_id. + """ + if not transcript or not transcript.strip(): + raise ValueError("transcript cannot be empty") + + source = await create_source( + self._db, + user_id=user_id, + source_type="transcript", + raw_text=transcript.strip(), + conversation_id=conversation_id, + ) + + chunks_text = chunk_transcript(transcript.strip()) + chunk_records = [] + for i, content in enumerate(chunks_text): + chunk = await create_chunk( + self._db, + source_id=source.id, + user_id=user_id, + content=content, + chunk_index=i, + ) + chunk_records.append((chunk.id, content)) + + await self._db.flush() + + # FTS: populate content_tsv + for chunk_id, _ in chunk_records: + await update_chunk_fts(self._db, chunk_id) + + # Embedding: 若有 provider 则写入 + if self._embedding and chunk_records: + texts = [c for _, c in chunk_records] + embeddings = await self._embedding.embed_texts(texts) + for (chunk_id, _), emb in zip(chunk_records, embeddings): + if emb: + await update_chunk_embedding(self._db, chunk_id, emb) + + await self._db.commit() + return source.id async def retrieve(self, user_id: str, query: str, *, top_k: int = 10) -> dict: - """Retrieve relevant evidence for a query. 一期返回空结构,二期接入混合检索。""" - return { - "relevant_chunks": [], - "relevant_summaries": [], - "relevant_facts": [], - "timeline_hints": [], - } + """Retrieve relevant evidence. 委托 HybridRetriever。""" + from app.features.memory.retriever import HybridRetriever + + retriever = HybridRetriever(self._db, embedding_provider=self._embedding) + return await retriever.retrieve(user_id=user_id, query=query, top_k=top_k) + + +def ingest_transcript_sync( + session, + user_id: str, + conversation_id: str, + transcript: str, +) -> str: + """ + Sync transcript ingest for Celery tasks. + Creates source + chunks + FTS. Skips embedding (async). + Returns source_id. + """ + from app.features.memory.chunker import chunk_transcript + from app.features.memory.repo import ( + create_chunk_sync, + create_source_sync, + update_chunk_fts_sync, + ) + + if not transcript or not transcript.strip(): + raise ValueError("transcript cannot be empty") + + source = create_source_sync( + session, + user_id=user_id, + source_type="transcript", + raw_text=transcript.strip(), + conversation_id=conversation_id, + ) + session.flush() + + chunks_text = chunk_transcript(transcript.strip()) + for i, content in enumerate(chunks_text): + chunk = create_chunk_sync( + session, + source_id=source.id, + user_id=user_id, + content=content, + chunk_index=i, + ) + session.flush() + update_chunk_fts_sync(session, chunk.id) + + session.commit() + return source.id diff --git a/api/app/features/story/__init__.py b/api/app/features/story/__init__.py new file mode 100644 index 0000000..6933c73 --- /dev/null +++ b/api/app/features/story/__init__.py @@ -0,0 +1 @@ +"""Story Layer:可独立讲述的人生故事,markdown 正文真源。""" diff --git a/api/app/features/story/backfill.py b/api/app/features/story/backfill.py new file mode 100644 index 0000000..4306c8e --- /dev/null +++ b/api/app/features/story/backfill.py @@ -0,0 +1,34 @@ +""" +Story 图片回填 — 将 asset:// 引用插入 markdown。 + +图片生成成功后,基于 source_span 或 fallback 位置插入 ![caption](asset://asset_id)。 +""" + + +def backfill_image_into_markdown( + markdown: str, + asset_id: str, + caption: str, + *, + source_span: dict | None = None, +) -> str: + """ + 将图片引用回填到 markdown。 + + 格式:![caption](asset://asset_id) + 位置:若 source_span 有效则在对应段落后插入;否则在开头插入。 + """ + img_ref = f"![{caption}](asset://{asset_id})" + if not markdown or not markdown.strip(): + return img_ref + + if source_span and isinstance(source_span, dict): + start = source_span.get("start") + end = source_span.get("end") + if start is not None and end is not None and 0 <= start <= end <= len(markdown): + return markdown[:end] + "\n\n" + img_ref + "\n\n" + markdown[end:] + + parts = markdown.strip().split("\n\n", 1) + if len(parts) == 1: + return img_ref + "\n\n" + markdown.strip() + return parts[0] + "\n\n" + img_ref + "\n\n" + parts[1] diff --git a/api/app/features/story/deps.py b/api/app/features/story/deps.py new file mode 100644 index 0000000..dd48524 --- /dev/null +++ b/api/app/features/story/deps.py @@ -0,0 +1,9 @@ +from fastapi import Depends +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.db import get_async_db +from app.features.story.service import StoryService + + +async def get_story_service(db: AsyncSession = Depends(get_async_db)) -> StoryService: + return StoryService(db=db) diff --git a/api/app/features/story/image_intent_extractor.py b/api/app/features/story/image_intent_extractor.py new file mode 100644 index 0000000..8c7461f --- /dev/null +++ b/api/app/features/story/image_intent_extractor.py @@ -0,0 +1,115 @@ +""" +StoryImageIntentExtractor — 从 story markdown 提取唯一主图意图。 + +每个 story 必须且仅有一张主插图。提取策略: +1. 最具画面感的场景段落 +2. 具有人物 + 动作 + 场景 + 时代细节的段落 +3. 故事转折点或记忆锚点段落 +4. 若 story 过于抽象,则退化为 story title/stage/time_refs/place_refs/people_refs/summary +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass +from typing import Any + + +@dataclass +class StoryImageIntentResult: + """提取出的主图意图。""" + + caption: str + prompt_brief: str + source_span: dict[str, Any] | None + style_profile: str | None + + +# 画面感相关词汇(用于简单启发式评分) +_SCENE_WORDS = frozenset( + "坐 站 走 跑 看 望 笑 哭 说 听 拿 放 穿 戴 吃 喝 院子 路 巷 房 屋 树 花 山 水 河 桥 街 镇 村 城 夏天 冬天 春天 秋天 早晨 傍晚 夜晚 童年 少年 青年 中年 老人 奶奶 爷爷 父亲 母亲 孩子 朋友 老师 同学".split() +) + + +def _score_paragraph(text: str) -> float: + """对段落做简单画面感评分。""" + if not text or len(text.strip()) < 20: + return 0.0 + t = text.strip() + score = min(len(t) / 100.0, 2.0) # 长度 0~2 分 + overlap = sum(1 for w in _SCENE_WORDS if w in t) + score += min(overlap * 0.3, 2.0) # 场景词 0~2 分 + return score + + +def extract_primary_image_intent( + markdown: str, + *, + title: str = "", + stage: str | None = None, + summary: str | None = None, + people_refs: list[str] | None = None, + place_refs: list[str] | None = None, + time_start: str | None = None, + time_end: str | None = None, + style_profile: str | None = None, +) -> StoryImageIntentResult: + """ + 从 story markdown 提取唯一主图意图。 + + 优先从正文中选取最具画面感的段落;若正文过短或过于抽象,则使用 fallback。 + """ + paragraphs: list[tuple[str, int, int]] = [] # (text, start, end) + if markdown and markdown.strip(): + parts = re.split(r"\n\n+", markdown.strip()) + offset = 0 + for p in parts: + t = p.strip() + if t: + start = markdown.find(t, offset) + end = start + len(t) + paragraphs.append((t, start, end)) + offset = end + + best_caption = "" + best_prompt_brief = "" + best_source_span: dict[str, Any] | None = None + best_score = 0.0 + + for text, start, end in paragraphs: + score = _score_paragraph(text) + if score > best_score: + best_score = score + best_caption = (text[:80] + "…") if len(text) > 80 else text + best_prompt_brief = text[:500].strip() + best_source_span = {"start": start, "end": end, "text_preview": text[:100]} + + if best_score >= 0.5: + return StoryImageIntentResult( + caption=best_caption, + prompt_brief=best_prompt_brief, + source_span=best_source_span, + style_profile=style_profile, + ) + + # Fallback: story title, stage, time, place, people, summary + fallback_parts = [] + if title: + fallback_parts.append(title) + if stage: + fallback_parts.append(stage) + if time_start or time_end: + fallback_parts.append(f"{time_start or ''}-{time_end or ''}".strip("-")) + if place_refs: + fallback_parts.extend(place_refs[:3]) + if people_refs: + fallback_parts.extend(people_refs[:3]) + if summary: + fallback_parts.append(summary[:200]) + fallback_text = ",".join(p for p in fallback_parts if p) or "人生故事" + return StoryImageIntentResult( + caption=fallback_text[:80], + prompt_brief=fallback_text, + source_span=None, + style_profile=style_profile, + ) diff --git a/api/app/features/story/models.py b/api/app/features/story/models.py new file mode 100644 index 0000000..4edb2b3 --- /dev/null +++ b/api/app/features/story/models.py @@ -0,0 +1,162 @@ +""" +Story Layer 数据模型。 + +- stories: 可独立讲述的人生故事,canonical_markdown 为正文真源 +- story_versions: 版本链,记录每次生成/编辑 +- story_evidence_links: story 与 evidence(chunk/fact/timeline_event/summary)的关联 +""" + +from sqlalchemy import ( + Column, + DateTime, + Float, + ForeignKey, + Integer, + JSON, + String, + Text, +) +from sqlalchemy.orm import relationship + +from app.core.db import Base, utc_now + + +class Story(Base): + """可独立讲述的一段人生故事,正文真源。""" + + __tablename__ = "stories" + + id = Column(String, primary_key=True) + user_id = Column(String, ForeignKey("users.id"), nullable=False, index=True) + title = Column(String, nullable=False) + stage = Column( + String, nullable=True + ) # childhood / education / career / family / belief / summary + story_type = Column( + String, nullable=True + ) # event / person / relationship / reflection / turning_point + summary = Column(Text, nullable=True) + canonical_markdown = Column(Text, nullable=True) # 当前生效正文 + time_start = Column(String, nullable=True) # 起始时间,粗粒度 year/month/date + time_end = Column(String, nullable=True) + people_refs = Column(JSON, nullable=True) + place_refs = Column(JSON, nullable=True) + tag_refs = Column(JSON, nullable=True) + status = Column(String, default="active") # active / archived / merged / draft + confidence = Column(Float, nullable=True) + current_version_id = Column(String, nullable=True) # FK 在 migration 中分步添加 + created_at = Column(DateTime(timezone=True), default=utc_now) + updated_at = Column(DateTime(timezone=True), default=utc_now, onupdate=utc_now) + + versions = relationship( + "StoryVersion", + back_populates="story", + foreign_keys="StoryVersion.story_id", + cascade="all, delete-orphan", + ) + current_version = relationship( + "StoryVersion", + primaryjoin="Story.current_version_id == StoryVersion.id", + foreign_keys="StoryVersion.id", + ) + user = relationship("User", back_populates="stories") + evidence_links = relationship( + "StoryEvidenceLink", + back_populates="story", + cascade="all, delete-orphan", + ) + chapter_links = relationship( + "ChapterStoryLink", + back_populates="story", + cascade="all, delete-orphan", + ) + image_intents = relationship( + "StoryImageIntent", + back_populates="story", + cascade="all, delete-orphan", + ) + + +class StoryVersion(Base): + """Story 版本快照,记录正文变更与来源。""" + + __tablename__ = "story_versions" + + id = Column(String, primary_key=True) + story_id = Column( + String, ForeignKey("stories.id", ondelete="CASCADE"), nullable=False, index=True + ) + version_no = Column(Integer, nullable=False) + markdown_snapshot = Column(Text, nullable=False) + change_summary = Column(Text, nullable=True) + actor_type = Column(String, nullable=True) # ai / user / editor / system + source_type = Column( + String, nullable=True + ) # generate / rewrite / merge / manual / migration + parent_version_id = Column( + String, ForeignKey("story_versions.id", ondelete="SET NULL"), nullable=True + ) + prompt_meta = Column(JSON, nullable=True) + created_at = Column(DateTime(timezone=True), default=utc_now) + + story = relationship("Story", back_populates="versions", foreign_keys=[story_id]) + parent_version = relationship( + "StoryVersion", + remote_side="StoryVersion.id", + foreign_keys=[parent_version_id], + ) + + +class StoryEvidenceLink(Base): + """Story 与 evidence 的关联。""" + + __tablename__ = "story_evidence_links" + + id = Column(String, primary_key=True) + story_id = Column( + String, ForeignKey("stories.id", ondelete="CASCADE"), nullable=False, index=True + ) + evidence_type = Column( + String, nullable=False + ) # chunk / fact / timeline_event / summary + evidence_id = Column(String, nullable=False) + role = Column(String, nullable=True) # primary / supporting / background + weight = Column(Float, nullable=True) + created_at = Column(DateTime(timezone=True), default=utc_now) + + story = relationship("Story", back_populates="evidence_links") + + +class StoryImageIntent(Base): + """Story 主插图意图,每个 active story 仅允许 1 条 intent_role=primary。""" + + __tablename__ = "story_image_intents" + + id = Column(String, primary_key=True) + story_id = Column( + String, ForeignKey("stories.id", ondelete="CASCADE"), nullable=False, index=True + ) + story_version_id = Column( + String, + ForeignKey("story_versions.id", ondelete="SET NULL"), + nullable=True, + ) + intent_role = Column(String, nullable=False) # primary + source_span = Column(JSON, nullable=True) + caption = Column(String, nullable=True) + prompt_brief = Column(Text, nullable=True) + style_profile = Column(String, nullable=True) + status = Column(String, nullable=False) # pending / processing / completed / failed + claim_token = Column(String, nullable=True) + claimed_at = Column(DateTime(timezone=True), nullable=True) + attempt_count = Column(Integer, nullable=False, default=0) + asset_id = Column(String, nullable=True) + error = Column(Text, nullable=True) + created_at = Column(DateTime(timezone=True), default=utc_now) + updated_at = Column(DateTime(timezone=True), default=utc_now, onupdate=utc_now) + + story = relationship("Story", back_populates="image_intents") + story_version = relationship( + "StoryVersion", + foreign_keys=[story_version_id], + ) diff --git a/api/app/features/story/repo.py b/api/app/features/story/repo.py new file mode 100644 index 0000000..83b74e9 --- /dev/null +++ b/api/app/features/story/repo.py @@ -0,0 +1,187 @@ +"""Story repository — Story, StoryVersion, StoryEvidenceLink data access.""" + +import uuid +from datetime import datetime, timezone + +from sqlalchemy import delete, select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.features.story.models import ( + Story, + StoryEvidenceLink, + StoryImageIntent, + StoryVersion, +) + + +def _new_id() -> str: + return str(uuid.uuid4()) + + +async def create_story( + db: AsyncSession, + *, + user_id: str, + title: str, + stage: str | None = None, + story_type: str | None = None, + summary: str | None = None, + canonical_markdown: str | None = None, +) -> Story: + """Create a story. Caller must commit.""" + story = Story( + id=_new_id(), + user_id=user_id, + title=title, + stage=stage, + story_type=story_type, + summary=summary, + canonical_markdown=canonical_markdown or "", + ) + db.add(story) + return story + + +async def create_story_version( + db: AsyncSession, + *, + story_id: str, + version_no: int, + markdown_snapshot: str, + actor_type: str = "ai", + source_type: str = "generate", + parent_version_id: str | None = None, + prompt_meta: dict | None = None, +) -> StoryVersion: + """Create a story version. Caller must commit.""" + version = StoryVersion( + id=_new_id(), + story_id=story_id, + version_no=version_no, + markdown_snapshot=markdown_snapshot, + actor_type=actor_type, + source_type=source_type, + parent_version_id=parent_version_id, + prompt_meta=prompt_meta, + ) + db.add(version) + return version + + +async def create_story_evidence_link( + db: AsyncSession, + *, + story_id: str, + evidence_type: str, + evidence_id: str, + role: str = "primary", + weight: float | None = None, +) -> StoryEvidenceLink: + """Create story-evidence link. Caller must commit.""" + link = StoryEvidenceLink( + id=_new_id(), + story_id=story_id, + evidence_type=evidence_type, + evidence_id=evidence_id, + role=role, + weight=weight, + ) + db.add(link) + return link + + +async def get_story_by_id(db: AsyncSession, story_id: str) -> Story | None: + """Fetch story by ID.""" + return await db.get(Story, story_id) + + +async def get_stories_for_user( + db: AsyncSession, user_id: str, *, status: str | None = "active" +) -> list[Story]: + """Fetch stories for user, optionally filtered by status.""" + stmt = select(Story).where(Story.user_id == user_id) + if status: + stmt = stmt.where(Story.status == status) + stmt = stmt.order_by(Story.created_at.desc()) + result = await db.execute(stmt) + return list(result.unique().scalars().all()) + + +async def count_story_versions(db: AsyncSession, story_id: str) -> int: + """Count versions for a story.""" + from sqlalchemy import func + + stmt = select(func.count(StoryVersion.id)).where(StoryVersion.story_id == story_id) + result = await db.execute(stmt) + return result.scalar() or 0 + + +async def create_story_image_intent( + db: AsyncSession, + *, + story_id: str, + story_version_id: str | None, + caption: str, + prompt_brief: str, + source_span: dict | None = None, + style_profile: str | None = None, +) -> StoryImageIntent: + """Create primary image intent for a story. Caller must commit.""" + intent = StoryImageIntent( + id=_new_id(), + story_id=story_id, + story_version_id=story_version_id, + intent_role="primary", + source_span=source_span, + caption=caption, + prompt_brief=prompt_brief, + style_profile=style_profile, + status="pending", + ) + db.add(intent) + return intent + + +async def get_story_image_intent_by_story( + db: AsyncSession, story_id: str, *, role: str = "primary" +) -> StoryImageIntent | None: + """Get primary image intent for a story.""" + stmt = ( + select(StoryImageIntent) + .where(StoryImageIntent.story_id == story_id) + .where(StoryImageIntent.intent_role == role) + ) + result = await db.execute(stmt) + return result.unique().scalar_one_or_none() + + +async def delete_story_image_intents_by_story( + db: AsyncSession, + story_id: str, + *, + role: str = "primary", + statuses: list[str] | None = None, +) -> int: + """ + 删除指定 story 的配图 intent。 + statuses 为 None 时删除该 role 下全部;否则仅删除列出的状态(如仅清 pending/failed,避免打断 processing)。 + """ + stmt = delete(StoryImageIntent).where( + StoryImageIntent.story_id == story_id, + StoryImageIntent.intent_role == role, + ) + if statuses is not None: + stmt = stmt.where(StoryImageIntent.status.in_(statuses)) + result = await db.execute(stmt) + return result.rowcount or 0 + + +async def get_stories_by_ids(db: AsyncSession, story_ids: list[str]) -> list[Story]: + """Fetch stories by IDs.""" + if not story_ids: + return [] + stmt = select(Story).where(Story.id.in_(story_ids)) + result = await db.execute(stmt) + stories = list(result.unique().scalars().all()) + order = {sid: i for i, sid in enumerate(story_ids)} + return sorted(stories, key=lambda s: order.get(s.id, 999)) diff --git a/api/app/features/story/service.py b/api/app/features/story/service.py new file mode 100644 index 0000000..754e327 --- /dev/null +++ b/api/app/features/story/service.py @@ -0,0 +1,228 @@ +""" +StoryService — Story 层业务逻辑。 + +- 创建 story、版本、evidence 关联 +- 不直接依赖 agent,由 orchestrator 调用 +- story 正文生成后提取 primary image intent 并落库 +""" + +from datetime import datetime, timezone + +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.logging import get_logger +from app.features.story.image_intent_extractor import extract_primary_image_intent +from app.features.story.repo import ( + count_story_versions, + create_story, + create_story_evidence_link, + create_story_image_intent, + create_story_version, + delete_story_image_intents_by_story, + get_stories_for_user, + get_story_by_id, + get_story_image_intent_by_story, +) + +logger = get_logger(__name__) + + +async def _extract_and_store_image_intent( + db, + *, + story, + version, + markdown: str, +) -> None: + """ + 从 markdown 提取 primary intent。 + 仅移除 pending/failed,避免删掉正在 processing 的旧任务行;同版本则原地更新行以幂等。 + """ + await delete_story_image_intents_by_story( + db, story.id, statuses=["pending", "failed"] + ) + result = extract_primary_image_intent( + markdown, + title=story.title or "", + stage=story.stage, + summary=story.summary, + people_refs=story.people_refs or [], + place_refs=story.place_refs or [], + time_start=story.time_start, + time_end=story.time_end, + ) + existing = await get_story_image_intent_by_story(db, story.id, role="primary") + now = datetime.now(timezone.utc) + + if existing and existing.story_version_id == version.id: + st = (existing.status or "").strip() + if st in ("processing", "completed"): + return + existing.caption = result.caption + existing.prompt_brief = result.prompt_brief + existing.source_span = result.source_span + existing.style_profile = result.style_profile + existing.status = "pending" + existing.error = None + existing.asset_id = None + existing.updated_at = now + return + + if existing and existing.story_version_id != version.id: + # 复用同一主键行,避免删行导致进行中的 Celery 任务找不到 intent + existing.story_version_id = version.id + existing.caption = result.caption + existing.prompt_brief = result.prompt_brief + existing.source_span = result.source_span + existing.style_profile = result.style_profile + existing.status = "pending" + existing.error = None + existing.asset_id = None + existing.updated_at = now + return + + await create_story_image_intent( + db, + story_id=story.id, + story_version_id=version.id, + caption=result.caption, + prompt_brief=result.prompt_brief, + source_span=result.source_span, + style_profile=result.style_profile, + ) + + +class StoryService: + def __init__(self, db: AsyncSession): + self._db = db + + async def create_story( + self, + user_id: str, + title: str, + *, + stage: str | None = None, + story_type: str | None = None, + summary: str | None = None, + canonical_markdown: str | None = None, + ) -> str: + """Create story, commit, return story_id.""" + story = await create_story( + self._db, + user_id=user_id, + title=title, + stage=stage, + story_type=story_type, + summary=summary, + canonical_markdown=canonical_markdown or "", + ) + await self._db.flush() + if canonical_markdown: + version = await create_story_version( + self._db, + story_id=story.id, + version_no=1, + markdown_snapshot=canonical_markdown, + actor_type="ai", + source_type="generate", + ) + await self._db.flush() + story.current_version_id = version.id + await _extract_and_store_image_intent( + self._db, + story=story, + version=version, + markdown=canonical_markdown, + ) + await self._db.commit() + if canonical_markdown: + from app.tasks.story_image_tasks import generate_story_image + + try: + generate_story_image.delay(story.id) + except Exception as exc: + logger.warning("派发 generate_story_image 失败: %s", exc) + return story.id + + async def append_version( + self, + story_id: str, + markdown_snapshot: str, + *, + actor_type: str = "ai", + source_type: str = "generate", + change_summary: str | None = None, + prompt_meta: dict | None = None, + ) -> str: + """Append new version, update canonical_markdown, return version_id.""" + story = await get_story_by_id(self._db, story_id) + if not story: + raise ValueError(f"Story {story_id} not found") + parent_id = story.current_version_id + version_no = (await count_story_versions(self._db, story_id)) + 1 + version = await create_story_version( + self._db, + story_id=story_id, + version_no=version_no, + markdown_snapshot=markdown_snapshot, + actor_type=actor_type, + source_type=source_type, + parent_version_id=parent_id, + prompt_meta=prompt_meta, + ) + version.change_summary = change_summary + story.current_version_id = version.id + story.canonical_markdown = markdown_snapshot + await _extract_and_store_image_intent( + self._db, + story=story, + version=version, + markdown=markdown_snapshot, + ) + await self._db.commit() + from app.tasks.story_image_tasks import generate_story_image + + try: + generate_story_image.delay(story_id) + except Exception as exc: + logger.warning("派发 generate_story_image 失败: %s", exc) + return version.id + + async def link_evidence( + self, + story_id: str, + evidence_type: str, + evidence_id: str, + *, + role: str = "primary", + weight: float | None = None, + ) -> None: + """Add evidence link. Caller must ensure story exists.""" + await create_story_evidence_link( + self._db, + story_id=story_id, + evidence_type=evidence_type, + evidence_id=evidence_id, + role=role, + weight=weight, + ) + await self._db.commit() + + async def get_stories( + self, user_id: str, *, status: str | None = "active" + ) -> list[dict]: + """List stories for user.""" + stories = await get_stories_for_user(self._db, user_id, status=status) + return [ + { + "id": s.id, + "title": s.title, + "stage": s.stage, + "story_type": s.story_type, + "summary": s.summary, + "canonical_markdown": s.canonical_markdown, + "status": s.status, + "created_at": s.created_at.isoformat() if s.created_at else None, + } + for s in stories + ] diff --git a/api/app/features/user/models.py b/api/app/features/user/models.py index 88e4734..3d49f56 100644 --- a/api/app/features/user/models.py +++ b/api/app/features/user/models.py @@ -24,6 +24,7 @@ class User(Base): conversations = relationship("Conversation", back_populates="user") chapters = relationship("Chapter", back_populates="user") + stories = relationship("Story", back_populates="user") books = relationship("Book", back_populates="user") orders = relationship("Order", back_populates="user", cascade="all, delete-orphan") memoir_state = relationship( diff --git a/api/app/main.py b/api/app/main.py index 486da20..34d92e0 100644 --- a/api/app/main.py +++ b/api/app/main.py @@ -36,6 +36,7 @@ from app.features.conversation import models as _conv_models # noqa: F401 from app.features.memory import models as _memory_models # noqa: F401 from app.features.memoir import models as _memoir_models # noqa: F401 from app.features.payment import models as _payment_models # noqa: F401 +from app.features.story import models as _story_models # noqa: F401 from app.features.user import models as _user_models # noqa: F401 app = FastAPI( diff --git a/api/app/tasks/__init__.py b/api/app/tasks/__init__.py index 713270e..d20cd56 100644 --- a/api/app/tasks/__init__.py +++ b/api/app/tasks/__init__.py @@ -3,6 +3,14 @@ Celery 任务模块 """ from .celery_app import celery_app +from .chapter_cover_tasks import generate_chapter_cover from .memoir_tasks import process_memoir_segments, generate_chapter_images +from .story_image_tasks import generate_story_image -__all__ = ["celery_app", "process_memoir_segments", "generate_chapter_images"] +__all__ = [ + "celery_app", + "process_memoir_segments", + "generate_chapter_images", + "generate_chapter_cover", + "generate_story_image", +] diff --git a/api/app/tasks/celery_app.py b/api/app/tasks/celery_app.py index 9305783..29a42ab 100644 --- a/api/app/tasks/celery_app.py +++ b/api/app/tasks/celery_app.py @@ -9,10 +9,12 @@ from celery import Celery from app.core.config import settings # 与 main.py / Alembic 一致:注册所有 model,避免 mapper 初始化时 relationship 字符串找不到类 +from app.features.asset import models as _asset_models # noqa: F401 - register Asset from app.features.auth import models as _auth_models # noqa: F401 from app.features.conversation import models as _conv_models # noqa: F401 from app.features.memory import models as _memory_models # noqa: F401 from app.features.memoir import models as _memoir_models # noqa: F401 +from app.features.story import models as _story_models # noqa: F401 from app.features.payment import models as _payment_models # noqa: F401 from app.features.user import models as _user_models # noqa: F401 @@ -23,7 +25,11 @@ celery_app = Celery( "life_echo", broker=REDIS_URL, backend=REDIS_URL, - include=["app.tasks.memoir_tasks"], + include=[ + "app.tasks.memoir_tasks", + "app.tasks.story_image_tasks", + "app.tasks.chapter_cover_tasks", + ], ) # Celery 配置 diff --git a/api/app/tasks/chapter_cover_tasks.py b/api/app/tasks/chapter_cover_tasks.py new file mode 100644 index 0000000..c2913a6 --- /dev/null +++ b/api/app/tasks/chapter_cover_tasks.py @@ -0,0 +1,304 @@ +""" +Chapter 封面生成 Celery 任务。 + +从 chapter_cover_intents 原子 claim intent,或创建新 intent 后生成封面, +写入 assets,绑定到 chapters.cover_asset_id。封面不回写进正文 markdown。 +""" + +import hashlib +import uuid +from datetime import datetime, timedelta, timezone + +from celery import shared_task +from PIL import Image +from sqlalchemy import and_, func, or_, select, update +from sqlalchemy.orm import joinedload + +from app.core.db import get_sync_db +from app.core.dependencies import get_image_generator +from app.core.redis_lock import acquire_redis_lock, release_redis_lock +from app.features.asset.models import Asset +from app.features.memoir.chapter_cover import ( + aggregate_cover_prompt_from_chapter, + aggregate_cover_prompt_from_stories, +) +from app.features.memoir.memoir_images.storage import TencentCosStorageService +from app.features.memoir.models import Chapter, ChapterCoverIntent, ChapterStoryLink +from app.ports.image_gen import TaskStatus + +from app.core.logging import get_logger + +logger = get_logger(__name__) + +CHAPTER_COVER_LOCK_TTL_SECONDS = 1800 +CHAPTER_COVER_CLAIM_TTL_SECONDS = 1800 + + +def _build_cover_cos_key(user_id: str, chapter_id: str, prompt: str) -> str: + short_hash = hashlib.sha1(prompt.encode("utf-8")).hexdigest()[:10] + return f"chapters/{user_id}/{chapter_id}/cover-{short_hash}.png" + + +def _normalize_image_bytes(image_bytes: bytes) -> bytes: + from io import BytesIO + + with Image.open(BytesIO(image_bytes)) as image: + output = BytesIO() + if image.mode in {"RGBA", "LA"}: + normalized = image + elif image.mode == "P": + normalized = image.convert("RGBA") + else: + normalized = image.convert("RGB") + normalized.save(output, format="PNG") + return output.getvalue() + + +def _build_cover_prompt(prompt_brief: str) -> str: + """从 intent.prompt_brief 构建出图 prompt。""" + from app.agents.memoir.prompts import IMAGE_PLACEHOLDER_TEMPLATE + + base = IMAGE_PLACEHOLDER_TEMPLATE + if prompt_brief and prompt_brief.strip(): + return f"{base}。{prompt_brief.strip()}" + return f"{base}。章节封面" + + +def _chapter_cover_claimable_clause(now: datetime): + cutoff = now - timedelta(seconds=CHAPTER_COVER_CLAIM_TTL_SECONDS) + return or_( + ChapterCoverIntent.status.in_(["pending", "failed"]), + and_( + ChapterCoverIntent.status == "processing", + or_( + ChapterCoverIntent.claimed_at.is_(None), + ChapterCoverIntent.claimed_at < cutoff, + ), + ), + ) + + +def _build_chapter_cover_brief(chapter: Chapter) -> str: + prompt_brief = "" + stories = [] + if getattr(chapter, "story_links", None): + for link in sorted( + chapter.story_links, key=lambda l: getattr(l, "order_index", 0) + ): + story = getattr(link, "story", None) + if story: + stories.append(story) + prompt_brief = aggregate_cover_prompt_from_stories( + stories, + chapter_title=chapter.title or "", + chapter_category=chapter.category or "", + ) + if prompt_brief: + return prompt_brief + md = (chapter.canonical_markdown or "").strip() + excerpt = md[:200] if md else "" + return aggregate_cover_prompt_from_chapter( + chapter_title=chapter.title or "", + chapter_category=chapter.category or "", + markdown_excerpt=excerpt, + ) + + +def _claim_chapter_cover_intent_sync(db, chapter: Chapter, claim_token: str): + now = datetime.now(timezone.utc) + claimable = _chapter_cover_claimable_clause(now) + candidate_id = db.execute( + select(ChapterCoverIntent.id) + .where(ChapterCoverIntent.chapter_id == chapter.id) + .where(claimable) + .order_by( + ChapterCoverIntent.updated_at.desc(), ChapterCoverIntent.created_at.desc() + ) + .limit(1) + ).scalar_one_or_none() + if candidate_id: + claimed = db.execute( + update(ChapterCoverIntent) + .where(ChapterCoverIntent.id == candidate_id) + .where(_chapter_cover_claimable_clause(now)) + .values( + status="processing", + claim_token=claim_token, + claimed_at=now, + updated_at=now, + error=None, + attempt_count=func.coalesce(ChapterCoverIntent.attempt_count, 0) + 1, + ) + ) + if (claimed.rowcount or 0) != 1: + db.rollback() + return None + intent = db.get(ChapterCoverIntent, candidate_id) + db.commit() + return intent + + cutoff = now - timedelta(seconds=CHAPTER_COVER_CLAIM_TTL_SECONDS) + fresh_processing = db.execute( + select(ChapterCoverIntent.id) + .where(ChapterCoverIntent.chapter_id == chapter.id) + .where(ChapterCoverIntent.status == "processing") + .where(ChapterCoverIntent.claimed_at.is_not(None)) + .where(ChapterCoverIntent.claimed_at >= cutoff) + .limit(1) + ).scalar_one_or_none() + if fresh_processing: + return None + + intent = ChapterCoverIntent( + id=str(uuid.uuid4()), + chapter_id=chapter.id, + prompt_brief=_build_chapter_cover_brief(chapter), + status="processing", + claim_token=claim_token, + claimed_at=now, + attempt_count=1, + ) + db.add(intent) + db.flush() + db.commit() + return intent + + +@shared_task(bind=True, max_retries=3, default_retry_delay=30) +def generate_chapter_cover(self, chapter_id: str): + """ + 为 chapter 生成封面。 + 从 chapter_cover_intents 原子认领 intent,或创建新 intent 后生成, + 写入 assets 并绑定到 chapters.cover_asset_id。 + """ + lock_key = f"lock:chapter-images:{chapter_id}" + lock_handle = acquire_redis_lock( + lock_key, ttl_seconds=CHAPTER_COVER_LOCK_TTL_SECONDS + ) + if lock_handle is None: + logger.info("generate_chapter_cover: chapter=%s, reason=locked", chapter_id) + return {"status": "locked"} + + claim_token = uuid.uuid4().hex + intent = None + try: + with get_sync_db() as db: + stmt = ( + select(Chapter) + .where(Chapter.id == chapter_id) + .options( + joinedload(Chapter.story_links).joinedload(ChapterStoryLink.story), + ) + ) + chapter = db.execute(stmt).unique().scalar_one_or_none() + if not chapter: + logger.info( + "generate_chapter_cover: chapter=%s, reason=not_found", chapter_id + ) + return {"status": "no_chapter"} + + if getattr(chapter, "cover_asset_id", None): + logger.info( + "generate_chapter_cover: chapter=%s, reason=has_cover_asset", + chapter_id, + ) + return {"status": "already_has_asset"} + + intent = _claim_chapter_cover_intent_sync(db, chapter, claim_token) + if not intent: + logger.info( + "generate_chapter_cover: chapter=%s, reason=no_claimable_intent", + chapter_id, + ) + return {"status": "no_intent"} + + try: + image_generator = get_image_generator() + storage = TencentCosStorageService.from_env() + from app.features.memoir.memoir_images.settings import MemoirImageSettings + + settings = MemoirImageSettings.from_env() + prompt_final = _build_cover_prompt(intent.prompt_brief or "") + + result = image_generator.generate( + prompt_final, + settings.default_size, + settings.default_style, + ) + if result.status != TaskStatus.COMPLETED or not result.image_url: + raise RuntimeError(result.error or "Image generation failed") + + image_bytes = _normalize_image_bytes( + image_generator.download_image(result.image_url) + ) + cos_key = _build_cover_cos_key(chapter.user_id, chapter_id, prompt_final) + url = storage.upload_bytes(image_bytes, cos_key, "image/png") + + asset_id = str(uuid.uuid4()) + with get_sync_db() as db: + intent_db = db.get(ChapterCoverIntent, intent.id) + if ( + not intent_db + or (intent_db.status or "").strip() != "processing" + or (intent_db.claim_token or "").strip() != claim_token + ): + logger.info( + "generate_chapter_cover: skip persist intent=%s status=%s claim=%s", + intent.id, + getattr(intent_db, "status", None), + getattr(intent_db, "claim_token", None), + ) + return {"status": "superseded_or_cancelled"} + + asset = Asset( + id=asset_id, + asset_type="chapter_cover", + storage_key=cos_key, + url=url, + provider=settings.provider, + style_profile=settings.default_style, + prompt_final=prompt_final, + status="completed", + ) + db.add(asset) + db.flush() + + intent_db.asset_id = asset_id + intent_db.status = "completed" + intent_db.claim_token = None + intent_db.claimed_at = None + intent_db.error = None + intent_db.updated_at = datetime.now(timezone.utc) + + chapter_db = db.get(Chapter, chapter_id) + chapter_db.cover_asset_id = asset_id + + db.commit() + + logger.info( + "generate_chapter_cover: chapter=%s, asset=%s, url=%s", + chapter_id, + asset_id, + url, + ) + return {"status": "success", "asset_id": asset_id} + except Exception as exc: + if intent is not None: + with get_sync_db() as db: + intent_db = db.get(ChapterCoverIntent, intent.id) + if ( + intent_db + and (intent_db.claim_token or "").strip() == claim_token + ): + intent_db.status = "failed" + intent_db.claim_token = None + intent_db.claimed_at = None + intent_db.error = str(exc) + intent_db.updated_at = datetime.now(timezone.utc) + db.commit() + logger.warning( + "generate_chapter_cover failed: chapter=%s, error=%s", chapter_id, exc + ) + raise self.retry(exc=exc) + finally: + release_redis_lock(lock_handle) diff --git a/api/app/tasks/memoir_tasks.py b/api/app/tasks/memoir_tasks.py index ac8b3cb..cb64dc5 100644 --- a/api/app/tasks/memoir_tasks.py +++ b/api/app/tasks/memoir_tasks.py @@ -30,22 +30,18 @@ from app.agents.state_schema import MemoirStateSchema, SlotData, default_state from app.agents.memoir.prompts import ( STAGE_TO_ORDER, get_narrative_json_prompt, - inject_image_placeholder_template, ) from app.agents.memoir import MemoirOrchestrator from app.agents.memoir.narrative_agent import NarrativeAgent -from app.agents.memoir.placeholder_agent import inject_placeholders from app.agents.chat.prompts_profile import format_user_profile_context from app.features.memoir.memoir_images.parser import ( build_initial_image_assets, parse_image_placeholders, parse_narrative_to_sections, - split_narrative_to_sections, ) import hashlib from app.core.dependencies import get_image_generator from app.agents.image_prompt import ImagePromptOrchestrator -from app.features.memoir.memoir_images.prompting import MemoirImagePromptService from app.features.memoir.memoir_images.schema import ( completed_image_assets, IMAGE_STATUS_COMPLETED, @@ -250,6 +246,19 @@ def _chapter_has_cover_to_generate(chapter) -> bool: return False +def _chapter_needs_cover_enqueue(chapter) -> bool: + """尚无 cover_asset 且章节有正文时,可派发 generate_chapter_cover。""" + if not chapter: + return False + if getattr(chapter, "cover_asset_id", None): + return False + md = (getattr(chapter, "canonical_markdown", None) or "").strip() + if md: + return True + sections = getattr(chapter, "sections", None) or [] + return any((getattr(s, "content", None) or "").strip() for s in sections) + + def _get_cover_memoir_image(chapter): """获取章节封面 MemoirImage(section_id=None),若无可生成则返回 None。""" images = getattr(chapter, "images", None) or [] @@ -298,11 +307,10 @@ def _save_narrative_to_sections( user_id: str, ): """ - 将带占位符的 narrative 拆成 chapter_sections 并写入;为每段占位符创建 pending 配图。 - 已有 section 与图片不删除,仅追加新内容。若无封面 MemoirImage 则创建 pending 封面(section_id=None)。 + 将 narrative 拆成 chapter_sections 并写入(段落不配 MemoirImage)。 + 已有 section 不删除,仅追加新内容。章节封面由 generate_chapter_cover + cover_asset_id 闭环处理。 chapter 可为已有章节或 None(会新建)。返回 chapter。 """ - now_iso = datetime.now(timezone.utc).isoformat() if chapter is None: chapter = Chapter( id=str(uuid.uuid4()), @@ -344,6 +352,11 @@ def _save_narrative_to_sections( chapter.source_segments = list( set((chapter.source_segments or []) + (source_segments or [])) ) + from app.features.memoir.repo import ( + ensure_chapter_markdown_and_version_sync, + ) + + ensure_chapter_markdown_and_version_sync(db, chapter, narrative) return chapter narrative_to_parse = new_part order_base = max(s.order_index for s in existing_sections) + 1 @@ -351,13 +364,6 @@ def _save_narrative_to_sections( narrative_to_parse = (narrative or "").strip() order_base = 0 - img_settings = MemoirImageSettings.from_env() - prompt_service = ( - MemoirImagePromptService(llm=None, settings=img_settings) - if img_settings.enabled - else None - ) - segments = parse_narrative_to_sections(narrative_to_parse) if not segments: sec = ChapterSection( @@ -369,82 +375,20 @@ def _save_narrative_to_sections( ) db.add(sec) db.flush() - if img_settings.enabled: - stmt_cover = select(MemoirImage).where( - MemoirImage.chapter_id == chapter.id, - MemoirImage.section_id.is_(None), - ) - if not db.execute(stmt_cover).scalar_one_or_none(): - cover_ph = { - "placeholder": "{{{{{{{{IMAGE:章节封面}}}}}}}}", - "description": "章节封面", - "index": 0, - } - cover_asset = build_initial_image_assets( - [cover_ph], - img_settings.provider, - prompt_service.CATEGORY_STYLE_MAP.get( - category, img_settings.default_style - ) - if prompt_service - else img_settings.default_style, - img_settings.default_size, - now_iso, - )[0] - cover_mi = _memoir_image_from_asset(chapter.id, None, 0, cover_asset) - db.add(cover_mi) - db.flush() chapter.title = title chapter.is_new = True chapter.source_segments = list( set((chapter.source_segments or []) + (source_segments or [])) ) + from app.features.memoir.repo import ensure_chapter_markdown_and_version_sync + + ensure_chapter_markdown_and_version_sync(db, chapter, narrative) return chapter - def _should_have_image(seg: dict, order_idx: int) -> bool: - """有 placeholder_info 的段落配图;无则兼容旧格式(每 3 段 1 图)""" - ph = seg.get("placeholder_info") - if ph and ph.get("description"): - return True - return (order_idx % 3) == 2 - - def _placeholder_for_segment(seg: dict, order_idx: int) -> dict | None: - ph = seg.get("placeholder_info") - if ph and ph.get("placeholder") and ph.get("description"): - # 确保有 index,build_initial_image_assets 依赖此字段 - if "index" not in ph: - ph = {**ph, "index": order_idx} - return ph - content = (seg.get("content") or "").strip() - desc = (content[:50] + "…") if len(content) > 50 else (content or "章节配图") - return { - "placeholder": f"{{{{{{{{IMAGE:{desc}}}}}}}}}", - "description": desc, - "index": order_idx, - } - - # 按顺序创建 section,每 3 个 section 对应 1 张配图 + # 段落不再绑定配图(每故事/章节结构化出图);仅章节封面走 MemoirImage for i, seg in enumerate(segments): order_idx = order_base + i content = (seg.get("content") or "").strip() - image_asset = None - if img_settings.enabled and _should_have_image(seg, order_idx): - ph = _placeholder_for_segment(seg, order_idx) - style = ( - prompt_service.CATEGORY_STYLE_MAP.get( - category, img_settings.default_style - ) - if prompt_service - else img_settings.default_style - ) - image_asset = build_initial_image_assets( - [ph], - img_settings.provider, - style, - img_settings.default_size, - now_iso, - )[0] - sec = ChapterSection( id=str(uuid.uuid4()), chapter_id=chapter.id, @@ -454,49 +398,17 @@ def _save_narrative_to_sections( ) db.add(sec) db.flush() - if image_asset: - # 本段配图与当前 section 绑定,memoir_images.order_index = section.order_index + 1(封面 0 预留) - mi = _memoir_image_from_asset( - chapter.id, sec.id, order_idx + 1, image_asset - ) - db.add(mi) - db.flush() - sec.image_id = mi.id db.flush() - # 封面图:若无则创建 pending MemoirImage(section_id=None, order_index=0) - if img_settings.enabled: - stmt_cover = select(MemoirImage).where( - MemoirImage.chapter_id == chapter.id, - MemoirImage.section_id.is_(None), - ) - existing_cover = db.execute(stmt_cover).scalar_one_or_none() - if not existing_cover: - cover_ph = { - "placeholder": "{{{{{{{{IMAGE:章节封面}}}}}}}}", - "description": "章节封面", - "index": 0, - } - cover_asset = build_initial_image_assets( - [cover_ph], - img_settings.provider, - prompt_service.CATEGORY_STYLE_MAP.get( - category, img_settings.default_style - ) - if prompt_service - else img_settings.default_style, - img_settings.default_size, - now_iso, - )[0] - cover_mi = _memoir_image_from_asset(chapter.id, None, 0, cover_asset) - db.add(cover_mi) - db.flush() - chapter.title = title chapter.is_new = True chapter.source_segments = list( set((chapter.source_segments or []) + (source_segments or [])) ) + # 确保 canonical_markdown 与版本链(agent 产出由 repo 落库) + from app.features.memoir.repo import ensure_chapter_markdown_and_version_sync + + ensure_chapter_markdown_and_version_sync(db, chapter, narrative) return chapter @@ -504,9 +416,7 @@ def initialize_chapter_images(_chapter): """ 兼容旧调用:若章节已改为 sections 存储,则图片初始化已在 _save_narrative_to_sections 中完成,直接返回。 """ - logger.info( - "initialize_chapter_images: 已由 _save_narrative_to_sections 处理 section 配图,跳过" - ) + logger.info("initialize_chapter_images: 封面由 generate_chapter_cover 处理,跳过") return [] @@ -625,6 +535,17 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]): logger.warning(f"未找到段落: {segment_ids}") return {"status": "no_segments"} + # Memory ingest: transcript -> memory_sources, chunks, FTS + conv_id = getattr(segments[0], "conversation_id", None) or "" + transcript = "\n\n".join(seg.transcript_text or "" for seg in segments) + if transcript.strip(): + try: + from app.features.memory.service import ingest_transcript_sync + + ingest_transcript_sync(db, user_id, conv_id, transcript) + except Exception as e: + logger.warning("Memory ingest 跳过: %s", e) + # 获取用户状态和资料 state = _get_or_create_state_sync(user_id, db) llm = _get_llm() @@ -643,6 +564,11 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]): ) narrative_agent = NarrativeAgent() + chapter_composer = __import__( + "app.agents.memoir.chapter_composer_orchestrator", + fromlist=["ChapterComposerOrchestrator"], + ).ChapterComposerOrchestrator() + from app.features.memory.repo import retrieve_evidence_sync def _process_category( chapter_category: str, @@ -652,11 +578,26 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]): birth_year, llm, ): - """单章节处理:NarrativeAgent 生成标题+叙事,PlaceholderInjectAgent 注入,持久化""" + """单章节处理:ChapterComposerOrchestrator 生成 markdown(或 NarrativeAgent 回退),repo 落库""" segment_texts = [seg.transcript_text or "" for seg in category_segments] combined_text = "\n\n".join(segment_texts) source_ids = [seg.id for seg in category_segments] + # 证据检索(writing RAG) + try: + evidence = retrieve_evidence_sync( + db, user_id, combined_text, top_k=10 + ) + except Exception as e: + logger.warning("Evidence 检索跳过: %s", e) + evidence = { + "relevant_chunks": [], + "relevant_summaries": [], + "relevant_facts": [], + "timeline_hints": [], + "relevant_stories": [], + } + stmt_chapter = ( select(Chapter) .where( @@ -684,14 +625,19 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]): slot_snippets[key] = snip title = chapter.title if chapter else f"{chapter_category} 回忆" - existing_content = "" - if chapter and getattr(chapter, "sections", None): - existing_content = "\n\n".join( - s.content - for s in sorted(chapter.sections, key=lambda x: x.order_index) - if (s.content or "").strip() + existing_markdown = "" + if chapter: + existing_markdown = ( + getattr(chapter, "canonical_markdown", None) or "" ) - narrative = combined_text + if not existing_markdown and getattr(chapter, "sections", None): + existing_markdown = "\n\n".join( + s.content + for s in sorted( + chapter.sections, key=lambda x: x.order_index + ) + if (s.content or "").strip() + ) if not chapter: title = narrative_agent.generate_title( @@ -702,37 +648,46 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]): birth_year=birth_year, llm=llm, ) - new_narrative = narrative_agent.generate_narrative( - stage=chapter_category, - slots=slot_snippets, - new_content=combined_text, - existing_content=existing_content, + # ChapterComposerOrchestrator 产出 markdown(agent 不落库) + narrative = chapter_composer.compose_chapter_markdown( + title=title, + category=chapter_category, + evidence=evidence, + existing_markdown=existing_markdown, user_profile=profile, birth_year=birth_year, llm=llm, ) - if _is_json_narrative(new_narrative): - narrative = new_narrative - elif existing_content: - narrative = f"{existing_content}\n\n{new_narrative}" - else: - narrative = new_narrative + if not narrative or not narrative.strip(): + new_narrative = narrative_agent.generate_narrative( + stage=chapter_category, + slots=slot_snippets, + new_content=combined_text, + existing_content=existing_markdown, + user_profile=profile, + birth_year=birth_year, + llm=llm, + ) + if _is_json_narrative(new_narrative): + narrative = new_narrative + elif existing_markdown: + narrative = f"{existing_markdown}\n\n{new_narrative}" + else: + narrative = new_narrative if ( - existing_content + existing_markdown and not _is_json_narrative(narrative) - and len(narrative) < len(existing_content) * 0.8 + and len(narrative) < len(existing_markdown) * 0.8 ): logger.warning( "内容长度异常: existing=%d, new=%d, category=%s. 回退为追加模式", - len(existing_content), + len(existing_markdown), len(narrative), chapter_category, ) - narrative = f"{existing_content}\n\n{combined_text}" + narrative = f"{existing_markdown}\n\n{combined_text}" - if not _is_json_narrative(narrative): - narrative = inject_placeholders(narrative) calculated_order_index = STAGE_TO_ORDER.get(chapter_category, 999) chapter = _save_narrative_to_sections( @@ -748,9 +703,8 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]): db.flush() db.refresh(chapter) - has_images = image_settings.enabled and ( - _chapter_has_any_section_images_to_generate(chapter) - or _chapter_has_cover_to_generate(chapter) + needs_cover_enqueue = ( + image_settings.enabled and _chapter_needs_cover_enqueue(chapter) ) stmt_book = ( @@ -773,7 +727,7 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]): book.has_update = True book.last_update_chapter_id = chapter.id - return chapter, has_images + return chapter, needs_cover_enqueue def _raise_retry(): raise self.retry(countdown=10) @@ -800,13 +754,15 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]): db.commit() + from app.tasks.chapter_cover_tasks import generate_chapter_cover + for chapter_id in sorted(chapters_to_enqueue): try: - logger.info(f"派发章节补图任务: chapter={chapter_id}") - generate_chapter_images.delay(chapter_id) + logger.info(f"派发章节封面任务: chapter={chapter_id}") + generate_chapter_cover.delay(chapter_id) except Exception as exc: logger.warning( - f"补图任务派发失败: chapter={chapter_id}, error={exc}" + f"章节封面任务派发失败: chapter={chapter_id}, error={exc}" ) logger.info(f"回忆录处理完成: user_id={user_id}, task_id={task_id}") @@ -902,8 +858,6 @@ def generate_chapter_content(self, user_id: str, stage: str, new_content: str): ) narrative = f"{existing_content}\n\n{new_content}" - if not _is_json_narrative(narrative): - narrative = inject_image_placeholder_template(narrative) calculated_order_index = STAGE_TO_ORDER.get(stage, 999) title = chapter.title if chapter else f"{stage} 回忆" chapter = _save_narrative_to_sections( @@ -922,16 +876,15 @@ def generate_chapter_content(self, user_id: str, stage: str, new_content: str): if ( image_settings.enabled and chapter - and ( - _chapter_has_any_section_images_to_generate(chapter) - or _chapter_has_cover_to_generate(chapter) - ) + and _chapter_needs_cover_enqueue(chapter) ): + from app.tasks.chapter_cover_tasks import generate_chapter_cover + try: - generate_chapter_images.delay(chapter.id) + generate_chapter_cover.delay(chapter.id) except Exception as exc: logger.warning( - "补图任务派发失败: chapter=%s, error=%s", chapter.id, exc + "章节封面任务派发失败: chapter=%s, error=%s", chapter.id, exc ) return {"status": "success"} @@ -948,7 +901,7 @@ def build_cos_key(user_id: str, chapter_id: str, index: int | str, prompt: str) @shared_task(bind=True, max_retries=3, default_retry_delay=30) def generate_chapter_images(self, chapter_id: str): - """Async task to generate images for a chapter's cover and sections (each section has at most one image).""" + """异步补图:处理封面 MemoirImage 与历史遗留的段落配图(pending/failed)。""" lock_acquired = False provider = None with get_sync_db() as db: diff --git a/api/app/tasks/story_image_tasks.py b/api/app/tasks/story_image_tasks.py new file mode 100644 index 0000000..5e82ec9 --- /dev/null +++ b/api/app/tasks/story_image_tasks.py @@ -0,0 +1,297 @@ +""" +Story 主插图生成 Celery 任务。 + +从 story_image_intents 原子 claim intent,生成图片,写入 assets,更新 intent。 +不读取正文占位符。 +""" + +import hashlib +import uuid +from datetime import datetime, timedelta, timezone + +from celery import shared_task +from PIL import Image +from sqlalchemy import and_, func, or_, select, update + +from app.core.db import get_sync_db +from app.core.dependencies import get_image_generator +from app.core.redis_lock import acquire_redis_lock, release_redis_lock +from app.features.asset.models import Asset +from app.features.memoir.memoir_images.storage import TencentCosStorageService +from app.features.story.backfill import backfill_image_into_markdown +from app.features.story.models import Story, StoryImageIntent, StoryVersion +from app.ports.image_gen import TaskStatus + +from app.core.logging import get_logger + +logger = get_logger(__name__) + +STORY_IMAGE_LOCK_TTL_SECONDS = 1800 +STORY_IMAGE_CLAIM_TTL_SECONDS = 1800 + + +def _build_story_image_cos_key( + user_id: str, story_id: str, intent_id: str, prompt: str +) -> str: + short_hash = hashlib.sha1(prompt.encode("utf-8")).hexdigest()[:10] + return f"stories/{user_id}/{story_id}/{intent_id}-{short_hash}.png" + + +def _normalize_image_bytes(image_bytes: bytes) -> bytes: + from io import BytesIO + + with Image.open(BytesIO(image_bytes)) as image: + output = BytesIO() + if image.mode in {"RGBA", "LA"}: + normalized = image + elif image.mode == "P": + normalized = image.convert("RGBA") + else: + normalized = image.convert("RGB") + normalized.save(output, format="PNG") + return output.getvalue() + + +def _build_story_image_prompt( + prompt_brief: str, + story_title: str = "", + story_stage: str | None = None, + style_profile: str | None = None, +) -> str: + """从 intent.prompt_brief 构建出图 prompt。""" + from app.agents.memoir.prompts import IMAGE_PLACEHOLDER_TEMPLATE + + base = IMAGE_PLACEHOLDER_TEMPLATE + if prompt_brief and prompt_brief.strip(): + return f"{base}。{prompt_brief.strip()}" + fallback = ",".join(filter(None, [story_title, story_stage or ""])) or "人生故事" + return f"{base}。{fallback}" + + +def _story_image_claimable_clause(now: datetime): + cutoff = now - timedelta(seconds=STORY_IMAGE_CLAIM_TTL_SECONDS) + return or_( + StoryImageIntent.status.in_(["pending", "failed"]), + and_( + StoryImageIntent.status == "processing", + or_( + StoryImageIntent.claimed_at.is_(None), + StoryImageIntent.claimed_at < cutoff, + ), + ), + ) + + +def _claim_story_image_intent_sync(db, story_id: str, claim_token: str): + now = datetime.now(timezone.utc) + claimable = _story_image_claimable_clause(now) + candidate_id = db.execute( + select(StoryImageIntent.id) + .where(StoryImageIntent.story_id == story_id) + .where(StoryImageIntent.intent_role == "primary") + .where(claimable) + .order_by( + StoryImageIntent.updated_at.desc(), StoryImageIntent.created_at.desc() + ) + .limit(1) + ).scalar_one_or_none() + if not candidate_id: + return None + + claimed = db.execute( + update(StoryImageIntent) + .where(StoryImageIntent.id == candidate_id) + .where(_story_image_claimable_clause(now)) + .values( + status="processing", + claim_token=claim_token, + claimed_at=now, + updated_at=now, + error=None, + attempt_count=func.coalesce(StoryImageIntent.attempt_count, 0) + 1, + ) + ) + if (claimed.rowcount or 0) != 1: + db.rollback() + return None + + row = ( + db.execute( + select(StoryImageIntent, Story) + .join(Story, StoryImageIntent.story_id == Story.id) + .where(StoryImageIntent.id == candidate_id) + ) + .unique() + .first() + ) + db.commit() + return row + + +@shared_task(bind=True, max_retries=3, default_retry_delay=30) +def generate_story_image(self, story_id: str): + """ + 为 story 生成主插图。 + 从 story_image_intents 原子认领 primary intent,生成后写入 assets 并更新 intent。 + """ + lock_key = f"lock:story-image:{story_id}" + lock_handle = acquire_redis_lock(lock_key, ttl_seconds=STORY_IMAGE_LOCK_TTL_SECONDS) + if lock_handle is None: + logger.info("generate_story_image: story=%s, reason=locked", story_id) + return {"status": "locked"} + + claim_token = uuid.uuid4().hex + intent = None + story = None + try: + with get_sync_db() as db: + row = _claim_story_image_intent_sync(db, story_id, claim_token) + if not row: + logger.info( + "generate_story_image: story=%s, reason=no_claimable_intent", + story_id, + ) + return {"status": "no_intent"} + + intent, story = row + + image_generator = get_image_generator() + storage = TencentCosStorageService.from_env() + from app.features.memoir.memoir_images.settings import MemoirImageSettings + + settings = MemoirImageSettings.from_env() + prompt_final = _build_story_image_prompt( + intent.prompt_brief or "", + story_title=story.title or "", + story_stage=story.stage, + style_profile=intent.style_profile or settings.default_style, + ) + result = image_generator.generate( + prompt_final, + settings.default_size, + intent.style_profile or settings.default_style, + ) + if result.status != TaskStatus.COMPLETED or not result.image_url: + raise RuntimeError(result.error or "Image generation failed") + + image_bytes = _normalize_image_bytes( + image_generator.download_image(result.image_url) + ) + cos_key = _build_story_image_cos_key( + story.user_id, story_id, intent.id, prompt_final + ) + url = storage.upload_bytes(image_bytes, cos_key, "image/png") + + asset_id = str(uuid.uuid4()) + with get_sync_db() as db: + intent_db = db.get(StoryImageIntent, intent.id) + if ( + not intent_db + or (intent_db.status or "").strip() != "processing" + or (intent_db.claim_token or "").strip() != claim_token + ): + logger.info( + "generate_story_image: skip persist intent=%s status=%s claim=%s", + intent.id, + getattr(intent_db, "status", None), + getattr(intent_db, "claim_token", None), + ) + return {"status": "superseded_or_cancelled"} + + asset = Asset( + id=asset_id, + asset_type="story_image", + storage_key=cos_key, + url=url, + provider=settings.provider, + style_profile=intent.style_profile or settings.default_style, + prompt_final=prompt_final, + status="completed", + ) + db.add(asset) + db.flush() + + story_db = db.get(Story, story_id) + target_vid = intent_db.story_version_id or story_db.current_version_id + current_vid = story_db.current_version_id + + intent_db.asset_id = asset_id + intent_db.status = "completed" + intent_db.claim_token = None + intent_db.claimed_at = None + intent_db.error = None + intent_db.updated_at = datetime.now(timezone.utc) + db.flush() + + # 仅当 intent 仍指向当前版本时回填正文,避免慢任务/重试把图插到新版本上 + if not target_vid or target_vid != current_vid: + db.commit() + logger.info( + "generate_story_image: stale intent skip backfill story=%s " + "intent_ver=%s current=%s", + story_id, + target_vid, + current_vid, + ) + return {"status": "success_stale", "asset_id": asset_id} + + ver = db.get(StoryVersion, target_vid) + if not ver: + db.commit() + return {"status": "success_no_snapshot", "asset_id": asset_id} + + base_md = ver.markdown_snapshot or "" + backfilled_md = backfill_image_into_markdown( + base_md, + asset_id=asset_id, + caption=intent_db.caption or "主插图", + source_span=intent_db.source_span, + ) + max_stmt = select(func.max(StoryVersion.version_no)).where( + StoryVersion.story_id == story_id + ) + max_no = db.execute(max_stmt).scalar() + version_no = (max_no or 0) + 1 + new_ver = StoryVersion( + id=str(uuid.uuid4()), + story_id=story_id, + version_no=version_no, + markdown_snapshot=backfilled_md, + change_summary="主插图回填", + actor_type="system", + source_type="image_backfill", + parent_version_id=story_db.current_version_id, + ) + db.add(new_ver) + db.flush() + story_db.current_version_id = new_ver.id + story_db.canonical_markdown = backfilled_md + + db.commit() + + logger.info( + "generate_story_image: story=%s, asset=%s, url=%s", + story_id, + asset_id, + url, + ) + return {"status": "success", "asset_id": asset_id} + except Exception as exc: + if intent is not None: + with get_sync_db() as db: + intent_db = db.get(StoryImageIntent, intent.id) + if ( + intent_db + and (intent_db.status or "").strip() != "completed" + and (intent_db.claim_token or "").strip() == claim_token + ): + intent_db.status = "failed" + intent_db.claim_token = None + intent_db.claimed_at = None + intent_db.error = str(exc) + intent_db.updated_at = datetime.now(timezone.utc) + db.commit() + logger.warning("generate_story_image failed: story=%s, error=%s", story_id, exc) + raise self.retry(exc=exc) + finally: + release_redis_lock(lock_handle) diff --git a/api/tests/conftest.py b/api/tests/conftest.py index 817d428..b41bd8b 100644 --- a/api/tests/conftest.py +++ b/api/tests/conftest.py @@ -12,5 +12,6 @@ from app.features.auth import models as _auth_models # noqa: F401 from app.features.conversation import models as _conv_models # noqa: F401 from app.features.memory import models as _memory_models # noqa: F401 from app.features.memoir import models as _memoir_models # noqa: F401 +from app.features.story import models as _story_models # noqa: F401 from app.features.payment import models as _payment_models # noqa: F401 from app.features.user import models as _user_models # noqa: F401 diff --git a/api/tests/test_asset_resolver.py b/api/tests/test_asset_resolver.py new file mode 100644 index 0000000..7da80fb --- /dev/null +++ b/api/tests/test_asset_resolver.py @@ -0,0 +1,65 @@ +"""asset_resolver:旧占位符清理与 asset:// 解析。""" + +import unittest + +from app.features.memoir.asset_resolver import ( + collect_asset_ids_for_chapter, + collect_asset_ids_from_markdown, + resolve_asset_refs_in_markdown, + split_markdown_by_asset_refs, + strip_legacy_image_placeholders, +) +from app.features.memoir.models import Chapter, ChapterSection + + +class AssetResolverTest(unittest.TestCase): + def test_strip_legacy_image_placeholders_double_brace(self): + md = "正文\n\n{{IMAGE:院子里的树}}\n\n结尾" + out = strip_legacy_image_placeholders(md) + self.assertNotIn("IMAGE", out) + self.assertIn("正文", out) + self.assertIn("结尾", out) + + def test_strip_legacy_image_placeholders_quad_brace(self): + md = "a\n\n{{{{IMAGE:描述}}}}\n\nb" + out = strip_legacy_image_placeholders(md) + self.assertNotIn("IMAGE", out) + + def test_collect_and_split_asset_refs(self): + md = "前\n\n![图注](asset://abc-123)\n\n后" + self.assertEqual(collect_asset_ids_from_markdown(md), ["abc-123"]) + blocks = split_markdown_by_asset_refs(md, lambda aid: f"https://x/{aid}") + self.assertEqual(len(blocks), 3) + self.assertEqual(blocks[0]["type"], "text") + self.assertEqual(blocks[1]["type"], "image") + self.assertIn("https://x/abc-123", blocks[1]["url"]) + + def test_resolve_asset_refs_in_markdown(self): + md = "![c](asset://id1)" + out = resolve_asset_refs_in_markdown(md, lambda aid: "https://cdn/u") + self.assertIn("https://cdn/u", out) + self.assertNotIn("asset://", out) + + def test_collect_asset_ids_for_chapter(self): + ch = Chapter( + id="c1", + user_id="u1", + title="t", + order_index=0, + canonical_markdown="![x](asset://a1)", + cover_asset_id="cov1", + ) + ch.sections = [ + ChapterSection( + id="s1", + chapter_id="c1", + order_index=0, + content="![y](asset://a2)", + ) + ] + ids = collect_asset_ids_for_chapter(ch) + self.assertEqual(ids, {"a1", "a2", "cov1"}) + + +if __name__ == "__main__": + unittest.main() diff --git a/api/tests/test_memoir_image_parser.py b/api/tests/test_memoir_image_parser.py index 92d8819..3b8bd58 100644 --- a/api/tests/test_memoir_image_parser.py +++ b/api/tests/test_memoir_image_parser.py @@ -59,32 +59,28 @@ class MemoirImageParserTest(unittest.TestCase): ) self.assertEqual(items[0]["description"], "1938年初的上海弄堂口,冬日萧瑟") - def test_parse_narrative_json_returns_sections_with_content_and_placeholder_info( - self, - ): + def test_parse_narrative_json_ignores_image_description(self): raw = '{"paragraphs": [{"content": "那年春天。", "image_description": "南方小镇的青石板路"}, {"content": "奶奶坐在藤椅上。", "image_description": "奶奶的藤椅"}]}' segments = parse_narrative_json(raw) self.assertEqual(len(segments), 2) self.assertEqual(segments[0]["content"], "那年春天。") - self.assertEqual( - segments[0]["placeholder_info"]["description"], "南方小镇的青石板路" - ) + self.assertIsNone(segments[0]["placeholder_info"]) self.assertEqual(segments[1]["content"], "奶奶坐在藤椅上。") - self.assertEqual(segments[1]["placeholder_info"]["description"], "奶奶的藤椅") + self.assertIsNone(segments[1]["placeholder_info"]) - def test_parse_narrative_to_sections_prefers_json_then_fallback_to_placeholder( - self, - ): + def test_parse_narrative_to_sections_json_then_plain_strips_placeholders(self): json_raw = ( '{"paragraphs": [{"content": "段落一", "image_description": "图一"}]}' ) segments = parse_narrative_to_sections(json_raw) self.assertEqual(len(segments), 1) self.assertEqual(segments[0]["content"], "段落一") - self.assertEqual(segments[0]["placeholder_info"]["description"], "图一") + self.assertIsNone(segments[0]["placeholder_info"]) placeholder_raw = "正文。\n\n{{{{IMAGE:描述}}}}\n\n结尾。" segments2 = parse_narrative_to_sections(placeholder_raw) self.assertEqual(len(segments2), 2) - self.assertIn("正文", segments2[0]["content"]) - self.assertEqual(segments2[0]["placeholder_info"]["description"], "描述") + self.assertEqual(segments2[0]["content"], "正文。") + self.assertEqual(segments2[1]["content"], "结尾。") + self.assertIsNone(segments2[0]["placeholder_info"]) + self.assertIsNone(segments2[1]["placeholder_info"]) diff --git a/api/tests/test_pdf_service_images.py b/api/tests/test_pdf_service_images.py index 0ca9525..47e840b 100644 --- a/api/tests/test_pdf_service_images.py +++ b/api/tests/test_pdf_service_images.py @@ -157,3 +157,55 @@ class PDFServiceImagesTest(unittest.IsolatedAsyncioTestCase): self.assertGreater(len(pdf_bytes), 100) mock_client.get.assert_not_called() + + @patch("app.features.memoir.pdf_service.httpx.AsyncClient") + @patch("app.features.memoir.pdf_service.TencentCosStorageService") + async def test_generate_pdf_uses_canonical_markdown_when_present( + self, + storage_cls, + async_client_cls, + ): + """PDF 正文真源为 canonical_markdown,与 API / 前端一致。""" + png_bytes = ( + b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01" + b"\x08\x02\x00\x00\x00\x90wS\xde\x00\x00\x00\x0cIDATx\x9cc```\x00\x00" + b"\x00\x04\x00\x01\xf6\x178U\x00\x00\x00\x00IEND\xaeB`\x82" + ) + mock_response = MagicMock() + mock_response.content = png_bytes + mock_response.raise_for_status = MagicMock() + mock_client = AsyncMock() + mock_client.get.return_value = mock_response + async_client_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client) + async_client_cls.return_value.__aexit__ = AsyncMock(return_value=False) + storage = MagicMock() + storage.get_download_url.return_value = "https://signed.example.com/img.png" + storage_cls.from_env.return_value = storage + + service = PDFService() + book = type("BookStub", (), {"title": "我的回忆录"})() + chapter = type( + "ChapterStub", + (), + { + "title": "童年", + "canonical_markdown": "开头。\n\n{{{{IMAGE:南方小镇}}}}\n\n结尾。", + "sections": [], + "content": "", + "images": [ + { + "index": 0, + "placeholder": "{{{{IMAGE:南方小镇}}}}", + "url": None, + "storage_key": "memoirs/u1/c1/0.png", + "status": "completed", + } + ], + }, + )() + + pdf_bytes = await service.generate_pdf(book, [chapter]) + + self.assertGreater(len(pdf_bytes), 100) + self.assertNotIn(b"IMAGE:", pdf_bytes) + mock_client.get.assert_called_once() diff --git a/api/tests/test_process_memoir_segments_image_enqueue.py b/api/tests/test_process_memoir_segments_image_enqueue.py index f19bc47..1b8fd14 100644 --- a/api/tests/test_process_memoir_segments_image_enqueue.py +++ b/api/tests/test_process_memoir_segments_image_enqueue.py @@ -15,11 +15,17 @@ def _mock_get_sync_db(db): class ProcessMemoirSegmentsImageEnqueueTest(unittest.TestCase): - @patch("app.tasks.memoir_tasks._chapter_has_cover_to_generate", return_value=True) @patch( - "app.tasks.memoir_tasks._chapter_has_any_section_images_to_generate", - return_value=True, + "app.features.memory.repo.retrieve_evidence_sync", + return_value={ + "relevant_chunks": [], + "relevant_summaries": [], + "relevant_facts": [], + "timeline_hints": [], + "relevant_stories": [], + }, ) + @patch("app.features.memory.service.ingest_transcript_sync") @patch("app.tasks.memoir_tasks._update_task_status_sync") @patch("app.tasks.memoir_tasks._release_chapter_lock") @patch("app.tasks.memoir_tasks._acquire_chapter_lock", return_value=True) @@ -30,7 +36,7 @@ class ProcessMemoirSegmentsImageEnqueueTest(unittest.TestCase): ) @patch("app.tasks.memoir_tasks._get_or_create_state_sync") @patch("app.tasks.memoir_tasks._get_llm") - @patch("app.tasks.memoir_tasks.generate_chapter_images.delay") + @patch("app.tasks.chapter_cover_tasks.generate_chapter_cover.delay") @patch("app.tasks.memoir_tasks.get_sync_db") @patch("app.tasks.memoir_tasks.MemoirImageSettings.from_env") def test_process_memoir_segments_parses_markdown_wrapped_state_extraction_json( @@ -45,8 +51,8 @@ class ProcessMemoirSegmentsImageEnqueueTest(unittest.TestCase): _acquire_lock_mock, _release_lock_mock, _update_status_mock, - _has_section_images_mock, - _has_cover_mock, + ingest_mock, + retrieve_mock, ): settings_from_env.return_value = MemoirImageSettings( enabled=True, @@ -108,15 +114,15 @@ class ProcessMemoirSegmentsImageEnqueueTest(unittest.TestCase): empty_sections_result = Mock() empty_sections_result.scalars.return_value.all.return_value = [] - cover_check_result = Mock() - cover_check_result.scalar_one_or_none.return_value = None + version_count_result = Mock() + version_count_result.scalar.return_value = 0 db = Mock() db.execute.side_effect = [ segments_result, chapter_result, empty_sections_result, # _save_narrative_to_sections 内查询 ChapterSection - cover_check_result, # 封面 MemoirImage 检查 + version_count_result, # ensure_chapter_markdown_and_version_sync 内 count book_result, ] db.get.return_value = None @@ -145,6 +151,17 @@ class ProcessMemoirSegmentsImageEnqueueTest(unittest.TestCase): self.assertEqual(len(delay_events), 1) self.assertGreater(events.index(delay_events[0]), events.index("commit")) + @patch( + "app.features.memory.repo.retrieve_evidence_sync", + return_value={ + "relevant_chunks": [], + "relevant_summaries": [], + "relevant_facts": [], + "timeline_hints": [], + "relevant_stories": [], + }, + ) + @patch("app.features.memory.service.ingest_transcript_sync") @patch("app.tasks.memoir_tasks._update_task_status_sync") @patch("app.tasks.memoir_tasks._release_chapter_lock") @patch("app.tasks.memoir_tasks._acquire_chapter_lock", return_value=True) @@ -154,7 +171,7 @@ class ProcessMemoirSegmentsImageEnqueueTest(unittest.TestCase): ) @patch("app.tasks.memoir_tasks._get_or_create_state_sync") @patch("app.tasks.memoir_tasks._get_llm", return_value=None) - @patch("app.tasks.memoir_tasks.generate_chapter_images.delay") + @patch("app.tasks.chapter_cover_tasks.generate_chapter_cover.delay") @patch("app.tasks.memoir_tasks.get_sync_db") @patch("app.tasks.memoir_tasks.MemoirImageSettings.from_env") def test_process_memoir_segments_does_not_enqueue_image_jobs_when_feature_disabled( @@ -168,6 +185,8 @@ class ProcessMemoirSegmentsImageEnqueueTest(unittest.TestCase): _acquire_lock_mock, _release_lock_mock, _update_status_mock, + ingest_mock, + retrieve_mock, ): settings_from_env.return_value = MemoirImageSettings( enabled=False, @@ -203,11 +222,15 @@ class ProcessMemoirSegmentsImageEnqueueTest(unittest.TestCase): empty_sections_result = Mock() empty_sections_result.scalars.return_value.all.return_value = [] + version_count_result = Mock() + version_count_result.scalar.return_value = 0 + db = Mock() db.execute.side_effect = [ segments_result, chapter_result, empty_sections_result, # _save_narrative_to_sections 内查询 ChapterSection + version_count_result, # ensure_chapter_markdown_and_version_sync 内 count book_result, ] db.get.return_value = None diff --git a/api/tests/test_story_image_tasks.py b/api/tests/test_story_image_tasks.py new file mode 100644 index 0000000..681a0d9 --- /dev/null +++ b/api/tests/test_story_image_tasks.py @@ -0,0 +1,152 @@ +import unittest +from contextlib import contextmanager +from io import BytesIO +from types import SimpleNamespace +from unittest.mock import Mock, patch + +from PIL import Image + +from app.ports.image_gen import ImageResult, TaskStatus +from app.tasks.story_image_tasks import generate_story_image + + +def _mock_db_cm(db): + @contextmanager + def _cm(): + yield db + + return _cm() + + +def _png_bytes() -> bytes: + buf = BytesIO() + Image.new("RGB", (1, 1), color="white").save(buf, format="PNG") + return buf.getvalue() + + +class _FakeUUID: + def __init__(self, value: str): + self.hex = value + self._value = value + + def __str__(self) -> str: + return self._value + + +class GenerateStoryImageTaskTest(unittest.TestCase): + @patch("app.tasks.story_image_tasks.release_redis_lock") + @patch( + "app.tasks.story_image_tasks.acquire_redis_lock", + return_value=SimpleNamespace(key="lock:story-image:story-1"), + ) + @patch("app.tasks.story_image_tasks._claim_story_image_intent_sync") + @patch("app.tasks.story_image_tasks.get_sync_db") + @patch("app.tasks.story_image_tasks.TencentCosStorageService") + @patch("app.tasks.story_image_tasks.get_image_generator") + @patch("app.features.memoir.memoir_images.settings.MemoirImageSettings.from_env") + @patch("app.tasks.story_image_tasks.uuid.uuid4") + def test_generate_story_image_resumes_processing_intent_and_backfills_markdown( + self, + uuid4_mock, + settings_from_env, + get_image_generator_mock, + storage_cls, + get_sync_db_mock, + claim_intent_mock, + acquire_lock_mock, + release_lock_mock, + ): + uuid4_mock.side_effect = [ + _FakeUUID("claim-token"), + _FakeUUID("asset-uuid"), + _FakeUUID("version-uuid"), + ] + settings_from_env.return_value = SimpleNamespace( + provider="liblib", + default_style="watercolor", + default_size="1024x1024", + ) + + intent = SimpleNamespace( + id="intent-1", + prompt_brief="院子里的藤椅", + style_profile="watercolor", + story_version_id="ver-1", + caption="主插图", + source_span={"paragraph_index": 0}, + status="processing", + ) + story = SimpleNamespace( + id="story-1", + user_id="user-1", + title="童年的院子", + stage="childhood", + ) + db_claim = Mock() + claim_intent_mock.return_value = (intent, story) + + intent_db = SimpleNamespace( + id="intent-1", + story_version_id="ver-1", + caption="主插图", + source_span={"paragraph_index": 0}, + status="processing", + style_profile="watercolor", + claim_token="claim-token", + asset_id=None, + error=None, + updated_at=None, + ) + story_db = SimpleNamespace( + id="story-1", + current_version_id="ver-1", + canonical_markdown="第一段\n\n第二段", + ) + version_db = SimpleNamespace(id="ver-1", markdown_snapshot="第一段\n\n第二段") + version_max_result = Mock() + version_max_result.scalar.return_value = 1 + db_persist = Mock() + db_persist.get.side_effect = [intent_db, story_db, version_db] + db_persist.execute.return_value = version_max_result + + get_sync_db_mock.side_effect = [_mock_db_cm(db_claim), _mock_db_cm(db_persist)] + + generator = get_image_generator_mock.return_value + generator.generate.return_value = ImageResult( + status=TaskStatus.COMPLETED, + task_id="task-1", + image_url="https://provider.example.com/story.png", + ) + generator.download_image.return_value = _png_bytes() + storage_cls.from_env.return_value.upload_bytes.return_value = ( + "https://cos.example.com/stories/u1/s1.png" + ) + + result = generate_story_image.run("story-1") + + self.assertEqual(result["status"], "success") + self.assertEqual(intent_db.status, "completed") + self.assertIsNotNone(intent_db.asset_id) + self.assertNotEqual(story_db.current_version_id, "ver-1") + self.assertIn("asset://", story_db.canonical_markdown) + generator.generate.assert_called_once() + storage_cls.from_env.return_value.upload_bytes.assert_called_once() + claim_intent_mock.assert_called_once() + acquire_lock_mock.assert_called_once() + release_lock_mock.assert_called_once() + + @patch("app.tasks.story_image_tasks.acquire_redis_lock", return_value=None) + @patch("app.tasks.story_image_tasks.get_sync_db") + @patch("app.tasks.story_image_tasks.get_image_generator") + def test_generate_story_image_skips_when_lock_is_already_held( + self, + get_image_generator_mock, + get_sync_db_mock, + acquire_lock_mock, + ): + result = generate_story_image.run("story-1") + + self.assertEqual(result, {"status": "locked"}) + get_sync_db_mock.assert_not_called() + get_image_generator_mock.assert_not_called() + acquire_lock_mock.assert_called_once() diff --git a/app-expo/package-lock.json b/app-expo/package-lock.json index a7a91d7..0c876a7 100644 --- a/app-expo/package-lock.json +++ b/app-expo/package-lock.json @@ -72,6 +72,7 @@ "react-i18next": "^16.5.8", "react-native": "0.83.2", "react-native-gesture-handler": "~2.30.0", + "react-native-markdown-display": "^7.0.2", "react-native-reanimated": "4.2.1", "react-native-safe-area-context": "~5.6.2", "react-native-screens": "~4.23.0", @@ -8484,6 +8485,15 @@ "node": ">= 6" } }, + "node_modules/camelize": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/camelize/-/camelize-1.0.1.tgz", + "integrity": "sha512-dU+Tx2fsypxTgtLoE36npi3UqcjSSMNYfkqgmoEhtZrraP5VWq0K7FkWVTYa8eMPtnU/G2txVsfdCJTn9uzpuQ==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, "node_modules/caniuse-lite": { "version": "1.0.30001778", "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001778.tgz", @@ -8950,6 +8960,15 @@ "node": ">= 8" } }, + "node_modules/css-color-keywords": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/css-color-keywords/-/css-color-keywords-1.0.0.tgz", + "integrity": "sha512-FyyrDHZKEjXDpNJYvVsV960FiqQyXc/LlYmsxl2BcdMb2WPx0OGRVgTg55rPSyLSNMqP52R9r8geSp7apN3Ofg==", + "license": "ISC", + "engines": { + "node": ">=4" + } + }, "node_modules/css-in-js-utils": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/css-in-js-utils/-/css-in-js-utils-3.1.0.tgz", @@ -8975,6 +8994,17 @@ "url": "https://github.com/sponsors/fb55" } }, + "node_modules/css-to-react-native": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/css-to-react-native/-/css-to-react-native-3.2.0.tgz", + "integrity": "sha512-e8RKaLXMOFii+02mOlqwjbD00KSEKqblnpO9e++1aXS1fPQOpS1YoqdVHBqPjHNoxeF2mimzVqawm2KCbEdtHQ==", + "license": "MIT", + "dependencies": { + "camelize": "^1.0.0", + "css-color-keywords": "^1.0.0", + "postcss-value-parser": "^4.0.2" + } + }, "node_modules/css-tree": { "version": "1.1.3", "resolved": "https://registry.npmjs.org/css-tree/-/css-tree-1.1.3.tgz", @@ -14916,6 +14946,15 @@ "integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==", "license": "MIT" }, + "node_modules/linkify-it": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/linkify-it/-/linkify-it-2.2.0.tgz", + "integrity": "sha512-GnAl/knGn+i1U/wjBz3akz2stz+HrHLsxMwHQGofCDfPvlf+gDKN58UtfmUquTY4/MXeE2x7k19KQmeoZi94Iw==", + "license": "MIT", + "dependencies": { + "uc.micro": "^1.0.1" + } + }, "node_modules/locate-path": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-5.0.0.tgz", @@ -15117,6 +15156,28 @@ "tmpl": "1.0.5" } }, + "node_modules/markdown-it": { + "version": "10.0.0", + "resolved": "https://registry.npmjs.org/markdown-it/-/markdown-it-10.0.0.tgz", + "integrity": "sha512-YWOP1j7UbDNz+TumYP1kpwnP0aEa711cJjrAQrzd0UXlbJfc5aAq0F/PZHjiioqDC1NKgvIMX+o+9Bk7yuM2dg==", + "license": "MIT", + "dependencies": { + "argparse": "^1.0.7", + "entities": "~2.0.0", + "linkify-it": "^2.0.0", + "mdurl": "^1.0.1", + "uc.micro": "^1.0.5" + }, + "bin": { + "markdown-it": "bin/markdown-it.js" + } + }, + "node_modules/markdown-it/node_modules/entities": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/entities/-/entities-2.0.3.tgz", + "integrity": "sha512-MyoZ0jgnLvB2X3Lg5HqpFmn1kybDiIfEQmKzTb5apr51Rb+T3KdmMiqa70T+bhGnyv7bQ6WMj2QMHpGMmlrUYQ==", + "license": "BSD-2-Clause" + }, "node_modules/marky": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/marky/-/marky-1.3.0.tgz", @@ -15139,6 +15200,12 @@ "integrity": "sha512-dn6wd0uw5GsdswPFfsgMp5NSB0/aDe6fK94YJV/AJDYXL6HVLWBsxeq7js7Ad+mU2K9LAlwpk6kN2D5mwCPVow==", "license": "CC0-1.0" }, + "node_modules/mdurl": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/mdurl/-/mdurl-1.0.1.tgz", + "integrity": "sha512-/sKlQJCBYVY9Ers9hqzKou4H6V5UWc/M59TH2dvkt+84itfnq7uFOMLpOiOS4ujvHP4etln18fmIxA5R5fll0g==", + "license": "MIT" + }, "node_modules/memoize-one": { "version": "5.2.1", "resolved": "https://registry.npmjs.org/memoize-one/-/memoize-one-5.2.1.tgz", @@ -16809,7 +16876,6 @@ "version": "15.8.1", "resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.8.1.tgz", "integrity": "sha512-oj87CgZICdulUohogVAR7AjlC0327U4el4L6eAvOqCeudMDVU0NThNaV+b9Df4dXgSP1gXMTnPdhfe/2qDH5cg==", - "dev": true, "license": "MIT", "dependencies": { "loose-envify": "^1.4.0", @@ -16821,7 +16887,6 @@ "version": "16.13.1", "resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz", "integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==", - "dev": true, "license": "MIT" }, "node_modules/psl": { @@ -17362,6 +17427,15 @@ "node": ">=10" } }, + "node_modules/react-native-fit-image": { + "version": "1.5.5", + "resolved": "https://registry.npmjs.org/react-native-fit-image/-/react-native-fit-image-1.5.5.tgz", + "integrity": "sha512-Wl3Vq2DQzxgsWKuW4USfck9zS7YzhvLNPpkwUUCF90bL32e1a0zOVQ3WsJILJOwzmPdHfzZmWasiiAUNBkhNkg==", + "license": "Beerware", + "dependencies": { + "prop-types": "^15.5.10" + } + }, "node_modules/react-native-gesture-handler": { "version": "2.30.0", "resolved": "https://registry.npmjs.org/react-native-gesture-handler/-/react-native-gesture-handler-2.30.0.tgz", @@ -17387,6 +17461,22 @@ "react-native": "*" } }, + "node_modules/react-native-markdown-display": { + "version": "7.0.2", + "resolved": "https://registry.npmjs.org/react-native-markdown-display/-/react-native-markdown-display-7.0.2.tgz", + "integrity": "sha512-Mn4wotMvMfLAwbX/huMLt202W5DsdpMO/kblk+6eUs55S57VVNni1gzZCh5qpznYLjIQELNh50VIozEfY6fvaQ==", + "license": "MIT", + "dependencies": { + "css-to-react-native": "^3.0.0", + "markdown-it": "^10.0.0", + "prop-types": "^15.7.2", + "react-native-fit-image": "^1.5.5" + }, + "peerDependencies": { + "react": ">=16.2.0", + "react-native": ">=0.50.4" + } + }, "node_modules/react-native-reanimated": { "version": "4.2.1", "resolved": "https://registry.npmjs.org/react-native-reanimated/-/react-native-reanimated-4.2.1.tgz", @@ -19599,6 +19689,12 @@ "node": "*" } }, + "node_modules/uc.micro": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/uc.micro/-/uc.micro-1.0.6.tgz", + "integrity": "sha512-8Y75pvTYkLJW2hWQHXxoqRgV7qb9B+9vFEtidML+7koHUFapnVJAZ6cKs+Qjz5Aw3aZWHMC6u0wJE3At+nSGwA==", + "license": "MIT" + }, "node_modules/unbox-primitive": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/unbox-primitive/-/unbox-primitive-1.1.0.tgz", diff --git a/app-expo/package.json b/app-expo/package.json index d1c183c..72df832 100644 --- a/app-expo/package.json +++ b/app-expo/package.json @@ -91,6 +91,7 @@ "react-i18next": "^16.5.8", "react-native": "0.83.2", "react-native-gesture-handler": "~2.30.0", + "react-native-markdown-display": "^7.0.2", "react-native-reanimated": "4.2.1", "react-native-safe-area-context": "~5.6.2", "react-native-screens": "~4.23.0", diff --git a/app-expo/src/app/(main)/chapter/[id].tsx b/app-expo/src/app/(main)/chapter/[id].tsx index 54493f4..7e01605 100644 --- a/app-expo/src/app/(main)/chapter/[id].tsx +++ b/app-expo/src/app/(main)/chapter/[id].tsx @@ -1,8 +1,6 @@ -import { Image } from 'expo-image'; -import { LinearGradient } from 'expo-linear-gradient'; import { router, useLocalSearchParams } from 'expo-router'; import { Settings, Trash2, X } from 'lucide-react-native'; -import React, { useEffect, useState } from 'react'; +import React, { useState } from 'react'; import { ActivityIndicator, Alert, @@ -10,7 +8,6 @@ import { Platform, Pressable, ScrollView, - StyleSheet, useWindowDimensions, View, } from 'react-native'; @@ -21,6 +18,8 @@ import { Icon } from '@/components/ui/icon'; import { Text } from '@/components/ui/text'; import { ScreenHeader } from '@/components/screen-header'; import { ScreenGutter } from '@/constants/layout'; +import { MarkdownRenderer } from '@/features/memoir/markdown-renderer'; +import { cn } from '@/lib/utils'; import { useChapterDetail, useDeleteChapter } from '@/features/memoir/hooks'; // Life-Echo reading colors (from HTML reference) @@ -39,18 +38,6 @@ type FontSize = 'small' | 'default' | 'large'; type FontFamily = 'serif' | 'sans'; type BackgroundTheme = 'white' | 'sepia'; -const FONT_SIZES: Record = { - small: 16, - default: 20, - large: 24, -}; - -const LINE_HEIGHTS: Record = { - small: 30, - default: 38, - large: 44, -}; - const FONT_FAMILIES: Record = { serif: Platform.select({ ios: 'Georgia', android: 'serif', default: 'serif' }) ?? @@ -68,198 +55,6 @@ const BACKGROUND_COLORS: Record = { sepia: READING_COLORS.backgroundSepia, }; -function ChapterContent({ - sections, - coverImageUrl, - fontSize, - fontFamily, - backgroundColor, -}: { - sections: { content: string; image: { url: string | null } | null }[]; - coverImageUrl: string | null; - fontSize: FontSize; - fontFamily: FontFamily; - backgroundColor: BackgroundTheme; -}) { - const { width } = useWindowDimensions(); - const contentWidth = Math.min(width - ScreenGutter * 2, 672); - const heroAspectRatio = 4 / 5; - const bodySize = FONT_SIZES[fontSize]; - const lineHeight = LINE_HEIGHTS[fontSize]; - const fontFam = FONT_FAMILIES[fontFamily]; - const bgColor = BACKGROUND_COLORS[backgroundColor]; - - const [heroLoadFailed, setHeroLoadFailed] = useState(false); - useEffect(() => { - setHeroLoadFailed(false); - }, [coverImageUrl]); - const hasCoverImage = !!coverImageUrl && !heroLoadFailed; - - return ( - <> - {/* Hero Image: 仅在有封面图且加载成功时显示,避免无图或加载失败时大片空白 */} - {hasCoverImage && ( - - Chapter hero setHeroLoadFailed(true)} - style={{ - width: '100%', - height: '100%', - objectFit: 'cover', - }} - /> - - - )} - - {/* Memoir Content */} - - - {(() => { - const indent = '\u3000\u3000'; - let isFirstParagraph = true; - - return sections.flatMap((section, i) => { - const text = section.content?.trim() ?? ''; - const hasImage = !!section.image?.url; - if (!text && !hasImage) return []; - - const paragraphs = text - ? text - .split(/\n\n+/) - .map((p) => p.trim()) - .filter(Boolean) - : []; - - const nodes: React.ReactNode[] = []; - - paragraphs.forEach((para, pIdx) => { - const firstChar = para.charAt(0); - const rest = para.slice(1); - const isLastInSection = - pIdx === paragraphs.length - 1 && !hasImage; - const mb = isLastInSection ? 20 : 16; - - if (isFirstParagraph && firstChar) { - const firstParaLineHeight = Math.round(lineHeight * 1.35); - nodes.push( - - - {firstChar} - - {rest} - , - ); - isFirstParagraph = false; - } else { - nodes.push( - - {indent} - {para} - , - ); - } - }); - - if (hasImage) { - nodes.push( - 0 ? 12 : 0, - marginBottom: 20, - }} - />, - ); - } - - return nodes; - }); - })()} - - {sections.length > 0 && ( - - - - )} - - - - ); -} - function ReadingSettingsModal({ visible, onClose, @@ -290,53 +85,55 @@ function ReadingSettingsModal({ transparent onRequestClose={onClose} > - + {}} > - - - + + + {t('chapterReading.readingSettings')} [ - readingSettingsStyles.closeBtn, - pressed && { opacity: 0.6 }, - ]} + className="p-2 active:opacity-60" accessibilityLabel={t('chapterReading.close')} accessibilityRole="button" > - + - - + + {t('chapterReading.fontSize')} - + {(['small', 'default', 'large'] as const).map((s) => ( onFontSizeChange(s)} - style={({ pressed }) => [ - readingSettingsStyles.segItem, - fontSize === s && readingSettingsStyles.segItemActive, - pressed && { opacity: 0.8 }, - ]} + className={cn( + 'min-h-11 flex-1 items-center justify-center rounded-lg', + fontSize === s && 'bg-primary', + 'active:opacity-80', + )} > {t( `chapterReading.fontSize${s.charAt(0).toUpperCase() + s.slice(1)}`, @@ -347,27 +144,32 @@ function ReadingSettingsModal({ - - + + {t('chapterReading.typography')} - + {(['serif', 'sans'] as const).map((f) => ( onFontFamilyChange(f)} - style={({ pressed }) => [ - readingSettingsStyles.segItem, - fontFamily === f && readingSettingsStyles.segItemActive, - pressed && { opacity: 0.8 }, - ]} + className={cn( + 'min-h-11 flex-1 items-center justify-center rounded-lg', + fontFamily === f && 'bg-primary', + 'active:opacity-80', + )} > {t( `chapterReading.font${f.charAt(0).toUpperCase() + f.slice(1)}`, @@ -378,26 +180,28 @@ function ReadingSettingsModal({ - - + + {t('chapterReading.backgroundColor')} - + {(['white', 'sepia'] as const).map((theme) => ( onBackgroundChange(theme)} - style={({ pressed }) => [ - readingSettingsStyles.bgOption, - { backgroundColor: BACKGROUND_COLORS[theme] }, - backgroundColor === theme && - readingSettingsStyles.bgOptionActive, - pressed && { opacity: 0.9 }, - ]} + className={cn( + 'min-h-16 flex-1 flex-row items-center justify-center gap-2.5 rounded-[10px] border-2 border-transparent', + backgroundColor === theme && 'border-primary', + 'active:opacity-90', + )} + style={{ backgroundColor: BACKGROUND_COLORS[theme] }} > {t( `chapterReading.bg${theme === 'white' ? 'PureWhite' : 'Sepia'}`, @@ -426,117 +231,13 @@ function ReadingSettingsModal({ ); } -const readingSettingsStyles = StyleSheet.create({ - backdrop: { - flex: 1, - justifyContent: 'flex-end', - backgroundColor: 'rgba(0,0,0,0.4)', - }, - sheet: { - backgroundColor: '#fff', - borderTopLeftRadius: 16, - borderTopRightRadius: 16, - paddingTop: 8, - paddingHorizontal: ScreenGutter, - ...Platform.select({ - ios: { - shadowColor: '#000', - shadowOffset: { width: 0, height: -2 }, - shadowOpacity: 0.06, - shadowRadius: 12, - }, - android: { elevation: 12 }, - }), - }, - handle: { - alignSelf: 'center', - width: 40, - height: 4, - borderRadius: 2, - backgroundColor: 'rgba(0,0,0,0.2)', - marginBottom: 16, - }, - header: { - flexDirection: 'row', - alignItems: 'center', - justifyContent: 'space-between', - marginBottom: 20, - }, - title: { - fontSize: 17, - fontWeight: '700', - color: READING_COLORS.onSurface, - }, - closeBtn: { padding: 8 }, - label: { - fontSize: 11, - fontWeight: '600', - letterSpacing: 0.5, - color: READING_COLORS.onSurfaceVariant, - marginBottom: 10, - textTransform: 'uppercase', - }, - section: { marginBottom: 20 }, - segmented: { - flexDirection: 'row', - backgroundColor: '#eeeef0', - borderRadius: 10, - padding: 4, - }, - segItem: { - flex: 1, - minHeight: 44, - justifyContent: 'center', - alignItems: 'center', - borderRadius: 8, - }, - segItemActive: { - backgroundColor: READING_COLORS.primary, - }, - segText: { - fontSize: 14, - fontWeight: '500', - color: READING_COLORS.onSurfaceVariant, - }, - segTextActive: { - fontWeight: '700', - color: '#fff', - }, - bgRow: { flexDirection: 'row', gap: 12 }, - bgOption: { - flex: 1, - minHeight: 64, - flexDirection: 'row', - alignItems: 'center', - justifyContent: 'center', - gap: 10, - borderRadius: 10, - borderWidth: 2, - borderColor: 'transparent', - }, - bgOptionActive: { - borderColor: READING_COLORS.primary, - }, - bgSwatch: { - width: 28, - height: 28, - borderRadius: 6, - borderWidth: 1, - borderColor: 'rgba(0,0,0,0.1)', - }, - bgLabel: { - fontSize: 14, - fontWeight: '500', - color: READING_COLORS.onSurface, - }, - bgLabelActive: { fontWeight: '700' }, -}); - export default function ChapterScreen() { const { id } = useLocalSearchParams<{ id: string }>(); const insets = useSafeAreaInsets(); + const { width } = useWindowDimensions(); const { t } = useTranslation('memoir'); const { data: chapter, isLoading } = useChapterDetail(id ?? ''); + const contentWidth = Math.min(width - ScreenGutter * 2, 672); const deleteChapter = useDeleteChapter(); const [settingsVisible, setSettingsVisible] = useState(false); @@ -602,8 +303,9 @@ export default function ChapterScreen() { ); } - const sections = chapter.sections ?? []; const coverImageUrl = chapter.cover_image?.url ?? null; + const canonicalMarkdown = (chapter.canonical_markdown ?? '').trim(); + const renderedAssets = chapter.rendered_assets ?? chapter.images ?? []; const handleDeletePress = () => { Alert.alert( @@ -677,12 +379,14 @@ export default function ChapterScreen() { showsVerticalScrollIndicator={false} style={{ backgroundColor: bgColor }} > - diff --git a/app-expo/src/app/(main)/conversation/[id].tsx b/app-expo/src/app/(main)/conversation/[id].tsx index e5788c9..5d3873b 100644 --- a/app-expo/src/app/(main)/conversation/[id].tsx +++ b/app-expo/src/app/(main)/conversation/[id].tsx @@ -20,6 +20,7 @@ import { Platform, Pressable, StyleSheet, + Text as RNText, TextInput, View, } from 'react-native'; @@ -350,16 +351,32 @@ function VoiceRecordButton({ ]} disabled={!enabled} > - - {isRecording ? tapToEndLabel : tapToStartLabel} - + {isRecording ? ( + + + {tapToEndLabel} + + + ) : ( + + {tapToStartLabel} + + )} {isRecording && ( - {/* TODO: Duration number centering still broken on Android */} - + {formatRecordingDuration(recordingDuration)} - + )} @@ -505,7 +527,7 @@ function ChatInputBar({ accessibilityRole="button" > {sendLabel} @@ -580,11 +602,21 @@ export default function ConversationScreen() { const [input, setInput] = useState(''); const [inputMode, setInputMode] = useState('text'); const [isKeyboardVisible, setIsKeyboardVisible] = useState(false); + const [keyboardHeight, setKeyboardHeight] = useState(0); const listRef = useRef(null); useEffect(() => { - const onShow = () => setIsKeyboardVisible(true); - const onHide = () => setIsKeyboardVisible(false); + const onShow = (e: { endCoordinates: { height: number } }) => { + setIsKeyboardVisible(true); + setKeyboardHeight(e.endCoordinates.height); + InteractionManager.runAfterInteractions(() => { + listRef.current?.scrollToEnd({ animated: true }); + }); + }; + const onHide = () => { + setIsKeyboardVisible(false); + setKeyboardHeight(0); + }; const subShow = Keyboard.addListener('keyboardDidShow', onShow); const subHide = Keyboard.addListener('keyboardDidHide', onHide); return () => { @@ -621,11 +653,12 @@ export default function ConversationScreen() { const keyboardOffset = Platform.OS === 'ios' ? insets.top + 56 : 0; const kavEnabled = inputMode === 'text' && isKeyboardVisible; + const kavBehavior = Platform.OS === 'ios' ? 'padding' : 'height'; return ( @@ -668,8 +701,14 @@ export default function ConversationScreen() { item.listKey} renderItem={({ item }) => ( , +): string { + const diffMs = Date.now() - timestamp; + if (diffMs < 60_000) { + return t('timeJustNow'); } - if (diffDays === 1) return 'Yesterday'; - if (diffDays < 7) - return date.toLocaleDateString(undefined, { weekday: 'long' }); - return date.toLocaleDateString(undefined, { - month: 'short', - day: 'numeric', - }); + + const minutes = Math.floor(diffMs / 60_000); + if (minutes < 60) { + return t('timeMinutesAgo', { count: minutes }); + } + + const hours = Math.floor(diffMs / 3_600_000); + if (hours < 24) { + return t('timeHoursAgo', { count: hours }); + } + + const days = Math.floor(diffMs / 86_400_000); + return t('timeDaysAgo', { count: Math.max(1, days) }); } function ConversationCardSkeleton() { @@ -74,6 +77,12 @@ function ConversationCard({ item: ConversationListItem; onPress: () => void; }) { + const { t } = useTranslation('conversation'); + const typography = useTypography(); + const titleLineHeight = Math.max( + typography.lineHeightTight, + typography.titleLarge + 4, + ); const avatarBg = item.isDefaultAssistant ? 'bg-primary' : 'bg-secondary'; const avatarIconClass = item.isDefaultAssistant ? 'text-primary-foreground' @@ -89,22 +98,28 @@ function ConversationCard({ > - + - + {item.title} - {formatConversationTime(item.latestMessageTime)} + {formatRelativeConversationListTime(item.latestMessageTime, t)} {item.latestMessagePreview || ''} diff --git a/app-expo/src/app/(tabs)/memoir.tsx b/app-expo/src/app/(tabs)/memoir.tsx index 8792017..2a1fd86 100644 --- a/app-expo/src/app/(tabs)/memoir.tsx +++ b/app-expo/src/app/(tabs)/memoir.tsx @@ -30,6 +30,7 @@ function getChapterVariant(vm: ChapterViewModel): ChapterVariant { } function getWordCount(vm: ChapterViewModel): number { + if (vm.wordCount > 0) return vm.wordCount; return (vm.sections ?? []).reduce( (sum, s) => sum + (s.content?.length ?? 0), 0, @@ -403,7 +404,7 @@ export default function MemoirScreen() { useFocusEffect( useCallback(() => { checkCover.mutate(undefined); - }, [checkCover.mutate]), + }, [checkCover]), ); const handleRefresh = useCallback(async () => { @@ -414,7 +415,7 @@ export default function MemoirScreen() { } finally { setRefreshing(false); } - }, [checkCover.mutateAsync, refetch]); + }, [checkCover, refetch]); const handleStartChapter = () => { createConversation.mutate(undefined, { diff --git a/app-expo/src/features/memoir/mappers.ts b/app-expo/src/features/memoir/mappers.ts index cb91b3a..38b483a 100644 --- a/app-expo/src/features/memoir/mappers.ts +++ b/app-expo/src/features/memoir/mappers.ts @@ -6,23 +6,40 @@ function countByStatus(images: ImageAsset[], status: string): number { export function toChapterViewModel(chapter: Chapter): ChapterViewModel { const images = chapter.images ?? []; - const completedCount = countByStatus(images, 'completed'); + const cover = chapter.cover_image ?? chapter.cover_asset ?? null; + const imagesForStatus = cover ? [cover, ...images] : images; + const completedCount = countByStatus(imagesForStatus, 'completed'); + const hasContent = + !!(chapter.canonical_markdown ?? '').trim() || + !!(chapter.content ?? '').trim() || + !!(chapter.summary ?? '').trim(); + const wordCountFromSections = (chapter.sections ?? []).reduce( + (sum, s) => sum + (s.content?.length ?? 0), + 0, + ); + const wordCount = + typeof chapter.word_count === 'number' && chapter.word_count >= 0 + ? chapter.word_count + : wordCountFromSections; return { id: chapter.id, title: chapter.title, category: chapter.category, orderIndex: chapter.order_index, - isEmpty: chapter.status === 'empty' || !chapter.content, + isEmpty: chapter.status === 'empty' || !hasContent, isNew: chapter.is_new, - hasImages: images.length > 0, - allImagesReady: images.length > 0 && completedCount === images.length, + hasImages: imagesForStatus.length > 0, + allImagesReady: + imagesForStatus.length > 0 && completedCount === imagesForStatus.length, pendingImageCount: - countByStatus(images, 'pending') + countByStatus(images, 'processing'), - failedImageCount: countByStatus(images, 'failed'), + countByStatus(imagesForStatus, 'pending') + + countByStatus(imagesForStatus, 'processing'), + failedImageCount: countByStatus(imagesForStatus, 'failed'), sections: chapter.sections ?? [], - coverImageUrl: chapter.cover_image?.url ?? null, + coverImageUrl: cover?.url ?? null, updatedAt: chapter.updated_at, + wordCount, }; } diff --git a/app-expo/src/features/memoir/types.ts b/app-expo/src/features/memoir/types.ts index 8a68148..2406a3b 100644 --- a/app-expo/src/features/memoir/types.ts +++ b/app-expo/src/features/memoir/types.ts @@ -58,7 +58,16 @@ export interface Chapter { category: string; images: ImageAsset[]; cover_image: ImageAsset | null; + /** 列表接口与 cover_image 同构(资产化封面) */ + cover_asset?: ImageAsset | null; sections: ChapterSection[]; + summary?: string; + /** 列表接口:与 canonical 一致的字符规模(后端 word_count) */ + word_count?: number; + /** 正文真源,优先用于渲染 */ + canonical_markdown?: string | null; + /** 图片等资源映射,与 canonical_markdown 配合使用 */ + rendered_assets?: ImageAsset[]; updated_at: string | null; is_new: boolean; source_segments: unknown[]; @@ -123,4 +132,6 @@ export interface ChapterViewModel { sections: ChapterSection[]; coverImageUrl: string | null; updatedAt: string | null; + /** 优先使用列表接口的 word_count,否则由 sections 推算 */ + wordCount: number; } diff --git a/app-expo/src/i18n/generated/resources.ts b/app-expo/src/i18n/generated/resources.ts index cd50f2e..b342a58 100644 --- a/app-expo/src/i18n/generated/resources.ts +++ b/app-expo/src/i18n/generated/resources.ts @@ -87,6 +87,13 @@ interface Resources { switchToVoice: 'Switch to voice input'; tapToEndRecording: 'Tap to end'; tapToStartRecording: 'Tap to start recording'; + timeDaysAgo_one: '{{count}} day ago'; + timeDaysAgo_other: '{{count}} days ago'; + timeHoursAgo_one: '{{count}} hour ago'; + timeHoursAgo_other: '{{count}} hours ago'; + timeJustNow: 'Just now'; + timeMinutesAgo_one: '{{count}} minute ago'; + timeMinutesAgo_other: '{{count}} minutes ago'; viewAll: 'View All'; voiceMessagePreview: 'Voice message'; }; diff --git a/app-expo/src/i18n/locales/en/conversation.json b/app-expo/src/i18n/locales/en/conversation.json index aeb0997..d003451 100644 --- a/app-expo/src/i18n/locales/en/conversation.json +++ b/app-expo/src/i18n/locales/en/conversation.json @@ -26,5 +26,12 @@ "tapToEndRecording": "Tap to end", "tapToStartRecording": "Tap to start recording", "viewAll": "View All", - "voiceMessagePreview": "Voice message" + "voiceMessagePreview": "Voice message", + "timeJustNow": "Just now", + "timeMinutesAgo_one": "{{count}} minute ago", + "timeMinutesAgo_other": "{{count}} minutes ago", + "timeHoursAgo_one": "{{count}} hour ago", + "timeHoursAgo_other": "{{count}} hours ago", + "timeDaysAgo_one": "{{count}} day ago", + "timeDaysAgo_other": "{{count}} days ago" } diff --git a/app-expo/src/i18n/locales/en/profile.json b/app-expo/src/i18n/locales/en/profile.json index cb7acb5..e84a8c9 100644 --- a/app-expo/src/i18n/locales/en/profile.json +++ b/app-expo/src/i18n/locales/en/profile.json @@ -5,13 +5,13 @@ }, "appExperience": { "language": "Language", - "languageDesc": "App display language", + "languageDesc": "Display language", "largeText": "Large Text", "largeTextDesc": "Make reading easier", "nightMode": "Night Mode", "nightModeDesc": "Use dark theme", "theme": "Theme", - "themeDesc": "App color theme", + "themeDesc": "Color theme", "title": "App Experience" }, "dataPrivacy": { diff --git a/app-expo/src/i18n/locales/zh/conversation.json b/app-expo/src/i18n/locales/zh/conversation.json index ed55f15..d941584 100644 --- a/app-expo/src/i18n/locales/zh/conversation.json +++ b/app-expo/src/i18n/locales/zh/conversation.json @@ -26,5 +26,12 @@ "tapToEndRecording": "点击结束", "tapToStartRecording": "点击开始录音", "viewAll": "查看全部", - "voiceMessagePreview": "语音消息" + "voiceMessagePreview": "语音消息", + "timeJustNow": "刚刚", + "timeMinutesAgo_one": "{{count}}分钟前", + "timeMinutesAgo_other": "{{count}}分钟前", + "timeHoursAgo_one": "{{count}}小时前", + "timeHoursAgo_other": "{{count}}小时前", + "timeDaysAgo_one": "{{count}}天前", + "timeDaysAgo_other": "{{count}}天前" } diff --git a/app-expo/tests/features/memoir/mappers.test.ts b/app-expo/tests/features/memoir/mappers.test.ts index 009d136..fb1d770 100644 --- a/app-expo/tests/features/memoir/mappers.test.ts +++ b/app-expo/tests/features/memoir/mappers.test.ts @@ -49,6 +49,27 @@ describe('toChapterViewModel', () => { expect(vm.isEmpty).toBe(false); expect(vm.isNew).toBe(false); expect(vm.sections).toHaveLength(1); + expect(vm.wordCount).toBe('段落1'.length); + }); + + test('uses word_count from API when sections empty', () => { + const vm = toChapterViewModel( + makeChapter({ + sections: [], + word_count: 1200, + canonical_markdown: 'x'.repeat(1200), + }), + ); + expect(vm.wordCount).toBe(1200); + }); + + test('cover_asset mirrors cover_image for list payload', () => { + const cover = makeImage({ url: 'https://example.com/from-asset.jpg' }); + const vm = toChapterViewModel( + makeChapter({ cover_image: null, cover_asset: cover, images: [] }), + ); + expect(vm.coverImageUrl).toBe('https://example.com/from-asset.jpg'); + expect(vm.hasImages).toBe(true); }); test('detects empty chapters', () => { @@ -58,6 +79,18 @@ describe('toChapterViewModel', () => { expect(vm.isEmpty).toBe(true); }); + test('uses canonical_markdown for isEmpty when present', () => { + const vm = toChapterViewModel( + makeChapter({ + status: 'ready', + content: '', + sections: [], + canonical_markdown: '# 童年\n\n一段回忆。', + }), + ); + expect(vm.isEmpty).toBe(false); + }); + test('derives image status counts correctly', () => { const images = [ makeImage({ status: 'completed' }), diff --git a/docs/plans/2026-03-19-image-intent-placeholder-removal-design.md b/docs/plans/2026-03-19-image-intent-placeholder-removal-design.md new file mode 100644 index 0000000..2f2eba5 --- /dev/null +++ b/docs/plans/2026-03-19-image-intent-placeholder-removal-design.md @@ -0,0 +1,352 @@ +# Image Intent 化与占位符退役设计 + +> 日期:2026-03-19 +> 前提:`story-first + markdown-first` 总体重构已完成。 +> 目标:彻底移除 `{{IMAGE:描述}}` 这类正文占位符,把图片生成与回填改造成结构化 image intent 流程。 + +## 1. 结论 + +本设计的核心决策如下: + +1. `{{IMAGE:描述}}` 不再是正文协议。 +2. story 正文只保留最终可阅读的 markdown。 +3. 每个 story 必须且仅有一张主插图。 +4. 插图属于 story,封面属于 chapter。 +5. 图片“待生成意图”以结构化数据存储,不再嵌入正文。 +6. 正文中的图片只允许最终引用形式,例如 `![caption](asset://image_id)`。 + +一句话概括新流程: + +`story markdown -> extract image intent -> generate asset -> write new story version with asset:// reference` + +## 2. 问题定义 + +旧占位符方案存在以下问题: + +- 生成意图与正文内容耦合,污染 markdown 真源。 +- 占位符兼容双层、四层、多层花括号,协议不稳定。 +- 图片样式模板被直接拼进占位符字符串,数据边界混乱。 +- 旧链路依赖 `section` 拆分,和 `story-first` 架构冲突。 +- 占位符错误、残留或格式偏差会直接泄漏到阅读层。 +- “每 3 段 1 图”这类旧 fallback 是技术债,不应继续存在。 + +因此,新系统必须彻底移除正文占位符,把图片生成升级成 story/chapter 的结构化资产流程。 + +## 3. 目标与非目标 + +### 3.1 目标 + +- 让图片生成链路与 story/chapter 正文解耦。 +- 保证每个 story 恰好一张主插图。 +- 让 chapter 封面从章节内全部 stories 聚合生成。 +- 让 app、PDF、未来运营端消费统一的 markdown 图片协议。 +- 建立可重试、可审计、可回填的图片版本链。 + +### 3.2 非目标 + +- 不保留 `{{IMAGE:描述}}` 作为线上兼容格式。 +- 不支持 story 多张正文插图的一期能力。 +- 不在本阶段做通用媒体编辑器。 +- 不把封面也写回 chapter 正文 markdown。 + +## 4. 核心模型 + +### 4.1 Story 插图意图 + +建议新增 `story_image_intents`: + +| 字段 | 类型 | 说明 | +| --- | --- | --- | +| `id` | string | 主键 | +| `story_id` | string | 所属 story | +| `story_version_id` | string | 提取意图时对应的正文版本 | +| `intent_role` | string | 固定为 `primary` | +| `source_span` | json/null | 对应正文中的段落或块位置信息 | +| `caption` | string | 最终图注候选 | +| `prompt_brief` | text | 供出图使用的结构化场景摘要 | +| `style_profile` | string/null | 风格策略键 | +| `status` | string | pending / processing / completed / failed | +| `asset_id` | string/null | 生成成功后的资产 | +| `error` | text/null | 错误信息 | +| `created_at` | datetime | 创建时间 | +| `updated_at` | datetime | 更新时间 | + +关键约束: + +- 每个 active story 只能有 1 条 `intent_role=primary` 的有效 intent。 + +### 4.2 Chapter 封面意图 + +建议新增 `chapter_cover_intents`: + +| 字段 | 类型 | 说明 | +| --- | --- | --- | +| `id` | string | 主键 | +| `chapter_id` | string | 所属 chapter | +| `chapter_version_id` | string | 封面生成时对应的章节版本 | +| `story_ids` | json | 参与聚合的 stories | +| `prompt_brief` | text | 章节封面摘要 | +| `status` | string | pending / processing / completed / failed | +| `asset_id` | string/null | 封面资产 | +| `error` | text/null | 错误信息 | +| `created_at` | datetime | 创建时间 | +| `updated_at` | datetime | 更新时间 | + +### 4.3 统一资源表 + +建议统一使用 `assets` 或在现有图片表基础上重构: + +| 字段 | 类型 | 说明 | +| --- | --- | --- | +| `id` | string | 主键 | +| `asset_type` | string | story_image / chapter_cover | +| `storage_key` | string | 对象存储键 | +| `url` | string/null | 可访问地址 | +| `provider` | string | 生成 provider | +| `style_profile` | string/null | 风格配置 | +| `prompt_final` | text | 最终发送给模型的 prompt | +| `status` | string | completed / failed / deleted | +| `width` | int/null | 宽 | +| `height` | int/null | 高 | +| `created_at` | datetime | 创建时间 | + +## 5. 新的正文协议 + +### 5.1 允许形式 + +正文 markdown 中只允许最终图片引用: + +```md +![奶奶坐在院子里的藤椅上](asset://img_123) +``` + +### 5.2 禁止形式 + +以下形式全部退出线上正文: + +- `{{IMAGE:描述}}` +- `{{{{IMAGE:描述}}}}` +- `` +- 任意 HTML 媒体占位标记 + +### 5.3 解释 + +正文只服务阅读与导出,不再承载“待生成意图”。 + +待生成意图只存在于结构化表中。 + +## 6. 流程设计 + +### 6.1 Story 主插图流程 + +```mermaid +flowchart LR + A["StorySynthesisAgent"] --> B["story canonical markdown"] + B --> C["StoryImageIntentExtractor"] + C --> D["story_image_intents"] + D --> E["ImageGenerationTask"] + E --> F["assets"] + F --> G["Create new story_version"] + G --> H["story markdown includes asset:// reference"] +``` + +步骤: + +1. `StorySynthesisAgent` 生成或更新 story canonical markdown。 +2. `StoryImageIntentExtractor` 从 story markdown 或 AST 中提取唯一主图意图。 +3. 写入 `story_image_intents`,状态为 `pending`。 +4. 异步图片任务读取 intent,生成资产。 +5. 成功后写入 `assets`。 +6. 创建新的 `story_version`,把 markdown 中对应位置回填成 `asset://` 图片引用。 +7. 更新 `stories.current_version_id`。 + +### 6.2 Chapter 封面流程 + +```mermaid +flowchart LR + A["ChapterComposerOrchestrator"] --> B["chapter markdown"] + B --> C["Aggregate chapter stories"] + C --> D["chapter_cover_intent"] + D --> E["Cover image generation"] + E --> F["cover asset"] + F --> G["chapters.cover_asset_id"] +``` + +步骤: + +1. `ChapterComposerOrchestrator` 完成章节编排。 +2. 聚合本章 stories 的人物、地点、时间、情绪、时代背景。 +3. 生成唯一 `chapter_cover_intent`。 +4. 生成封面资源并绑定到 `chapters.cover_asset_id`。 + +说明: + +- 封面不回写进正文 markdown。 +- 阅读页顶部可单独展示封面 asset。 + +## 7. Image Intent 提取策略 + +### 7.1 规则 + +每个 story 必须且仅有一张主插图,因此 extractor 不做多图候选池。 + +优先级: + +1. 最具画面感的场景段落 +2. 具有人物 + 动作 + 场景 + 时代细节的段落 +3. 故事转折点或记忆锚点段落 +4. 若 story 过于抽象,则退化为“人物/地点/时代感”概括图 + +### 7.2 输出 + +输出结构至少包含: + +- `caption` +- `prompt_brief` +- `source_span` +- `style_profile` + +### 7.3 失败兜底 + +如果规则和 agent 都未提取到高质量意图,则使用最小兜底策略: + +- story title +- story stage +- time refs +- place refs +- people refs +- story summary + +即使降级,也必须生成 1 条 primary intent。 + +## 8. 版本回填策略 + +### 8.1 原则 + +图片生成成功后,不能原地覆盖 story 正文。 + +必须: + +1. 基于当前 story version 创建新版本 +2. 将最终图片引用回填到 markdown +3. 写入 `change_summary` +4. 更新当前生效版本指针 + +### 8.2 回填位置 + +建议由 `source_span` 或 block id 决定回填位置。 + +如果定位失败: + +- 退化为在 story 开头或相关段落后插入图片引用 +- 但仍需创建新版本,不可丢图 + +## 9. 状态机 + +### 9.1 Story 状态建议 + +- `content_pending` +- `content_ready_image_pending` +- `content_ready_image_processing` +- `published` +- `image_failed` + +### 9.2 约束 + +- `published` story 必须有 resolved primary image asset +- `content_ready_image_pending` 允许正文已就绪但图片仍在处理中 +- `image_failed` 允许重试,但不允许伪装成已发布完整内容 + +## 10. 失败处理 + +### 10.1 意图提取失败 + +- 走 deterministic fallback +- 必须产出 intent + +### 10.2 图片生成失败 + +- intent 状态置为 `failed` +- story 状态置为 `content_ready_image_pending` 或 `image_failed` +- 支持后台重试 + +### 10.3 回填失败 + +- asset 保留 +- intent 状态可为 `completed_but_unapplied` +- 创建修复任务重新生成 story version + +### 10.4 Chapter 封面失败 + +- 不影响章节正文阅读 +- 允许章节无封面但正文可读 + +## 11. 测试计划 + +### 11.1 单元测试 + +- 每个 story 只能生成一个 primary intent +- abstract story 走 fallback 也能生成 intent +- 回填后 markdown 只含 `asset://`,不含 placeholder + +### 11.2 集成测试 + +- story markdown -> image intent -> asset -> new story version +- chapter stories -> cover intent -> cover asset + +### 11.3 迁移测试 + +- 旧 `{{IMAGE:描述}}` 正文可被正确提取为 intent +- 旧图片记录可被映射为 asset +- 迁移后正文不再含 placeholder + +### 11.4 渲染测试 + +- app 阅读页正确渲染 `asset://` +- PDF 正确渲染 story 图片与 chapter 封面 +- 未解析外链或非法资源时安全失败 + +## 12. 旧链路退役清单 + +以下逻辑应退出线上主链路: + +- `inject_image_placeholder_template` +- `inject_placeholders` +- `parse_image_placeholders` +- `split_narrative_to_sections` +- `parse_narrative_to_sections` +- 基于 placeholder 创建段落配图的逻辑 +- “每 3 段 1 图”的旧 fallback + +注意: + +- 这些逻辑可短期保留在离线迁移脚本中读取历史数据 +- 但不允许继续出现在线上写路径和读路径 + +## 13. 一次性实施步骤 + +1. 新增 `story_image_intents` +2. 新增 `chapter_cover_intents` +3. 统一资源表为 `assets` +4. 删除 prompt 中对 `{{IMAGE:描述}}` 的输出要求 +5. 重写 story 生成链:正文生成后提取 primary image intent +6. 重写图片任务:读取 intent,不读取正文占位符 +7. 重写 story version 回填逻辑:写入 `asset://` +8. 重写 chapter 封面聚合逻辑 +9. app / PDF 渲染只认 `asset://` +10. 迁移历史正文与旧图片记录 +11. 删除旧占位符相关线上逻辑与测试 +12. 补齐新 intent / asset / cover 测试 + +## 14. 最终判断 + +这次“修复占位符”本质上不是字符串格式修复,而是把旧的正文 DSL 彻底退役。 + +正确的长期模型应当是: + +- story 有且只有一张主插图 +- chapter 有一张聚合封面 +- 图片意图是结构化资产流程 +- markdown 只保存最终可阅读结果 + +只要这四点成立,未来无论是运营润色、重新生成图片、替换封面、审计版本还是导出 PDF,都不需要再碰 `{{IMAGE:描述}}` 这类过渡协议。