diff --git a/api/.env.example b/api/.env.example index 78b8235..b602a9d 100644 --- a/api/.env.example +++ b/api/.env.example @@ -74,6 +74,7 @@ TTS_PROVIDER=tencent # 仅 TTS_PROVIDER=tencent 时生效,与 ASR 共用 TENCENT_SECRET_ID / TENCENT_SECRET_KEY # 音色 ID 见 https://cloud.tencent.com/document/product/1073/92668 TTS_VOICE_TYPE=603004 +TTS_CODEC=mp3 # ============================================================================= # WeChat Pay diff --git a/api/app/adapters/tts/tencent_tts.py b/api/app/adapters/tts/tencent_tts.py index 13fbc9b..443c05a 100644 --- a/api/app/adapters/tts/tencent_tts.py +++ b/api/app/adapters/tts/tencent_tts.py @@ -1,31 +1,76 @@ -"""Tencent Cloud TTS adapter — implements TTSProvider port. - -API: https://cloud.tencent.com/document/product/1073/37995 -""" +"""Tencent Cloud TTS adapter — implements TTSProvider port.""" import asyncio import base64 +import re import uuid from app.core.logging import get_logger logger = get_logger(__name__) +# OpenAI voice name -> Tencent VoiceType ID +VOICE_MAP: dict[str, int] = { + "alloy": 1001, + "echo": 1002, + "fable": 1003, + "onyx": 1004, + "nova": 1005, + "shimmer": 1006, +} + +# 中文 150 字 / 英文 500 字母,取保守值 +MAX_CHARS_PER_REQUEST = 150 + + +def _chunk_text(text: str, max_chars: int = MAX_CHARS_PER_REQUEST) -> list[str]: + """Split text into chunks within API limit.""" + text = text.strip() + if not text: + return [] + if len(text) <= max_chars: + return [text] + + chunks: list[str] = [] + # Split by sentence boundaries first + pattern = r"[。!?.!?\n]+" + parts = re.split(f"({pattern})", text) + current = "" + for i, p in enumerate(parts): + if re.match(pattern, p): + current += p + if current.strip(): + chunks.append(current.strip()) + current = "" + else: + if len(current) + len(p) <= max_chars: + current += p + else: + if current.strip(): + chunks.append(current.strip()) + current = "" + # Single part exceeds limit, split by length + while p: + chunk = p[:max_chars] + p = p[max_chars:] + chunks.append(chunk) + if current.strip(): + chunks.append(current.strip()) + return chunks + class TencentTTSProvider: def __init__( self, secret_id: str, secret_key: str, - voice_type: int = 603004, + voice_type: int = 1001, codec: str = "mp3", - sample_rate: int = 16000, ): self._secret_id = secret_id self._secret_key = secret_key self._voice_type = voice_type self._codec = codec - self._sample_rate = sample_rate self._client = None def _get_client(self): @@ -48,32 +93,52 @@ class TencentTTSProvider: logger.error("Tencent TTS client init failed: %s", e) return None - def _synthesize_sync(self, text: str) -> bytes: - """Sync synthesis (run in executor).""" + def _synthesize_sync(self, text: str, voice_type: int) -> bytes: client = self._get_client() if not client: return b"" - from tencentcloud.tts.v20190823 import models - - req = models.TextToVoiceRequest() - req.Text = text[:500] # 中文约150字,英文约500字母,保守截断 - req.SessionId = f"tts-{uuid.uuid4().hex}" - req.VoiceType = self._voice_type - req.Codec = self._codec - req.SampleRate = self._sample_rate - req.PrimaryLanguage = 1 # 1=中文 - - resp = client.TextToVoice(req) - if resp.Audio: - return base64.b64decode(resp.Audio) - return b"" - - async def synthesize(self, text: str, voice: str = "alloy") -> bytes: - """Convert text to speech. Returns mp3 bytes.""" - if not text or not self._secret_id or not self._secret_key: - return b"" try: - return await asyncio.to_thread(self._synthesize_sync, text) + from tencentcloud.common.exception.tencent_cloud_sdk_exception import ( + TencentCloudSDKException, + ) + from tencentcloud.tts.v20190823 import models + + req = models.TextToVoiceRequest() + req.Text = text + req.SessionId = uuid.uuid4().hex + req.VoiceType = voice_type + req.PrimaryLanguage = 1 + req.SampleRate = 16000 + req.Codec = self._codec + + resp = client.TextToVoice(req) + if not resp or not resp.Audio: + return b"" + return base64.b64decode(resp.Audio) + except TencentCloudSDKException as e: + logger.error("Tencent TTS SDK error: %s", e) + return b"" except Exception as e: logger.error("Tencent TTS synthesize failed: %s", e) return b"" + + async def synthesize(self, text: str, voice: str = "alloy") -> bytes: + if not self._secret_id or not self._secret_key: + logger.error("Tencent TTS credentials not configured") + return b"" + + voice_type = VOICE_MAP.get(voice.lower(), self._voice_type) + chunks = _chunk_text(text) + if not chunks: + return b"" + + results: list[bytes] = [] + for chunk in chunks: + audio = await asyncio.to_thread( + self._synthesize_sync, chunk, voice_type + ) + if not audio: + return b"" + results.append(audio) + + return b"".join(results) diff --git a/api/app/core/config.py b/api/app/core/config.py index 9be2704..419d244 100644 --- a/api/app/core/config.py +++ b/api/app/core/config.py @@ -63,6 +63,7 @@ class Settings(BaseSettings): tts_provider: str = "tencent" openai_api_key: str = "" tts_voice_type: int = 603004 # Tencent 音色 ID,见 https://cloud.tencent.com/document/product/1073/92668 + tts_codec: str = "mp3" # ── WeChat Pay ─────────────────────────────────────────── wechat_pay_app_id: str = "" diff --git a/api/app/core/dependencies.py b/api/app/core/dependencies.py index 7b9025c..36f9bdc 100644 --- a/api/app/core/dependencies.py +++ b/api/app/core/dependencies.py @@ -67,7 +67,7 @@ def get_tts_provider() -> TTSProvider: secret_id=settings.tencent_secret_id, secret_key=settings.tencent_secret_key, voice_type=settings.tts_voice_type, - codec="mp3", + codec=settings.tts_codec, ) from app.adapters.tts.openai_tts import OpenAITTSProvider diff --git a/api/app/features/conversation/ws/pipeline.py b/api/app/features/conversation/ws/pipeline.py index 6e5daee..db3935f 100644 --- a/api/app/features/conversation/ws/pipeline.py +++ b/api/app/features/conversation/ws/pipeline.py @@ -26,6 +26,7 @@ from app.features.conversation.ws.profile_collector import ( get_missing_profile_fields, ) from app.features.user.models import User +from app.core.config import settings from app.core.dependencies import get_asr_provider, get_tts_provider from app.features.memoir.state_service import get_or_create_state @@ -47,7 +48,7 @@ async def _send_tts_audio(conversation_id: str, text: str) -> None: "conversation_id": conversation_id, "data": { "audio_base64": base64.b64encode(audio_bytes).decode("utf-8"), - "format": "mp3", + "format": settings.tts_codec, }, "timestamp": datetime.now(timezone.utc).isoformat(), }) diff --git a/api/app/features/memoir/memoir_images/prompting.py b/api/app/features/memoir/memoir_images/prompting.py index 01e8565..ee2b539 100644 --- a/api/app/features/memoir/memoir_images/prompting.py +++ b/api/app/features/memoir/memoir_images/prompting.py @@ -93,6 +93,87 @@ class MemoirImagePromptService: "prompt_context": prompt_context, } + def build_cover_prompt( + self, + chapter_title: str, + chapter_category: str, + context_excerpt: str, + ) -> dict[str, str]: + """生成章节封面图的 image-generation prompt。""" + style = self.CATEGORY_STYLE_MAP.get(chapter_category, self.settings.default_style) + prompt_context = f"{chapter_category}: {chapter_title}" + + llm_input = { + "chapter_title": chapter_title, + "chapter_category": chapter_category, + "context_excerpt": context_excerpt, + "default_style": style, + "default_size": self.settings.default_size, + } + + if self.llm: + try: + response = self.llm.invoke( + "Return JSON only with keys prompt, style, size. " + "Create an image-generation prompt for a memoir chapter COVER. " + "Emphasize: hero composition, evocative scene, chapter cover aesthetic.\n" + + json.dumps(llm_input, ensure_ascii=False) + ) + parsed = json.loads(extract_json_payload(response.content)) + return { + "prompt": _ensure_style_in_prompt( + parsed["prompt"], parsed.get("style", style) + ), + "style": parsed.get("style", style), + "size": parsed.get("size", self.settings.default_size), + "prompt_context": prompt_context, + } + except Exception as exc: + logger.warning( + "封面 prompt 生成回退到默认模板: chapter_category=%s, title=%s, error=%s", + chapter_category, + chapter_title, + exc, + ) + + return { + "prompt": _ensure_style_in_prompt( + self._build_cover_fallback_prompt( + chapter_category=chapter_category, + context_excerpt=context_excerpt, + style=style, + ), + style, + ), + "style": style, + "size": self.settings.default_size, + "prompt_context": prompt_context, + } + + def _build_cover_fallback_prompt( + self, + chapter_category: str, + context_excerpt: str, + style: str, + ) -> str: + subject = self.CATEGORY_FALLBACK_SUBJECT_MAP.get( + chapter_category, "memoir scene" + ) + if _contains_cjk(context_excerpt): + return ( + f"A {style} chapter cover illustration of a {subject}, " + "hero composition, evocative scene, emotionally resonant, " + "cinematic framing, natural lighting, no text overlay." + ) + details = (context_excerpt or "").strip()[:200] + if not details: + details = "A personal life story scene with authentic emotional detail" + return ( + f"A {style} chapter cover illustration of a {subject}. " + f"Scene hint: {details}. " + "Hero composition, evocative scene, cinematic framing, no text overlay." + ) + def _build_fallback_prompt( self, chapter_category: str, diff --git a/api/app/tasks/memoir_tasks.py b/api/app/tasks/memoir_tasks.py index 7d45f58..4bc24e6 100644 --- a/api/app/tasks/memoir_tasks.py +++ b/api/app/tasks/memoir_tasks.py @@ -230,6 +230,25 @@ def _chapter_has_any_section_images_to_generate(chapter) -> bool: return any(_section_has_image_to_generate(s) for s in chapter.sections) +def _chapter_has_cover_to_generate(chapter) -> bool: + """章节是否有待生成的封面图(MemoirImage section_id=None 且 status 为 pending/failed)。""" + images = getattr(chapter, "images", None) or [] + for m in images: + if getattr(m, "section_id", None) is None: + status = (getattr(m, "status") or "").strip() + return status in (IMAGE_STATUS_PENDING, IMAGE_STATUS_FAILED) + return False + + +def _get_cover_memoir_image(chapter): + """获取章节封面 MemoirImage(section_id=None),若无可生成则返回 None。""" + images = getattr(chapter, "images", None) or [] + for m in images: + if getattr(m, "section_id", None) is None: + return m + return None + + def _select_placeholders_for_effective_max( placeholders: list[dict], existing_images: list[dict] | None, @@ -261,7 +280,7 @@ def _select_placeholders_for_effective_max( def _save_narrative_to_sections(db: Session, chapter, narrative: str, title: str, category: str, order_index: int, source_segments: list, user_id: str): """ 将带占位符的 narrative 拆成 chapter_sections 并写入;为每段占位符创建 pending 配图。 - 已有 section 与图片不删除,仅追加新内容。封面图先空着,不自动设置。 + 已有 section 与图片不删除,仅追加新内容。若无封面 MemoirImage 则创建 pending 封面(section_id=None)。 chapter 可为已有章节或 None(会新建)。返回 chapter。 """ now_iso = datetime.now(timezone.utc).isoformat() @@ -308,6 +327,9 @@ def _save_narrative_to_sections(db: Session, chapter, narrative: str, title: str narrative_to_parse = (narrative or "").strip() order_base = 0 + img_settings = MemoirImageSettings.from_env() + prompt_service = MemoirImagePromptService(llm=None, settings=img_settings) if img_settings.enabled else None + segments = split_narrative_to_sections(narrative_to_parse) if not segments: sec = ChapterSection( @@ -319,14 +341,35 @@ def _save_narrative_to_sections(db: Session, chapter, narrative: str, title: str ) db.add(sec) db.flush() + if img_settings.enabled: + stmt_cover = ( + select(MemoirImage) + .where( + MemoirImage.chapter_id == chapter.id, + MemoirImage.section_id.is_(None), + ) + ) + if not db.execute(stmt_cover).scalar_one_or_none(): + cover_ph = { + "placeholder": "{{{{{{{{IMAGE:章节封面}}}}}}}}", + "description": "章节封面", + "index": 0, + } + cover_asset = build_initial_image_assets( + [cover_ph], + img_settings.provider, + prompt_service.CATEGORY_STYLE_MAP.get(category, img_settings.default_style) if prompt_service else img_settings.default_style, + img_settings.default_size, + now_iso, + )[0] + cover_mi = _memoir_image_from_asset(chapter.id, None, 0, cover_asset) + db.add(cover_mi) + db.flush() chapter.title = title chapter.is_new = True chapter.source_segments = list(set((chapter.source_segments or []) + (source_segments or []))) return chapter - img_settings = MemoirImageSettings.from_env() - prompt_service = MemoirImagePromptService(llm=None, settings=img_settings) if img_settings.enabled else None - # 每 3 个 section 对应 1 张图片,其他 section 的 image_id 为空 def _should_have_image(order_idx: int) -> bool: return (order_idx % 3) == 2 @@ -371,7 +414,34 @@ def _save_narrative_to_sections(db: Session, chapter, narrative: str, title: str db.flush() sec.image_id = mi.id db.flush() - # 封面图先空着,不自动用首图做封面 + + # 封面图:若无则创建 pending MemoirImage(section_id=None, order_index=0) + if img_settings.enabled: + stmt_cover = ( + select(MemoirImage) + .where( + MemoirImage.chapter_id == chapter.id, + MemoirImage.section_id.is_(None), + ) + ) + existing_cover = db.execute(stmt_cover).scalar_one_or_none() + if not existing_cover: + cover_ph = { + "placeholder": "{{{{{{{{IMAGE:章节封面}}}}}}}}", + "description": "章节封面", + "index": 0, + } + cover_asset = build_initial_image_assets( + [cover_ph], + img_settings.provider, + prompt_service.CATEGORY_STYLE_MAP.get(category, img_settings.default_style) if prompt_service else img_settings.default_style, + img_settings.default_size, + now_iso, + )[0] + cover_mi = _memoir_image_from_asset(chapter.id, None, 0, cover_asset) + db.add(cover_mi) + db.flush() + chapter.title = title chapter.is_new = True chapter.source_segments = list(set((chapter.source_segments or []) + (source_segments or []))) @@ -611,7 +681,7 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]): combined_text = "\n\n".join(segment_texts) source_ids = [seg.id for seg in category_segments] - # 查找 active 章节(被清除的章节不继续更新,而是创建新的),并预加载 sections + # 查找 active 章节(被清除的章节不继续更新,而是创建新的),并预加载 sections、images stmt_chapter = ( select(Chapter) .where( @@ -619,7 +689,10 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]): Chapter.category == chapter_category, Chapter.is_active == True, ) - .options(joinedload(Chapter.sections)) + .options( + joinedload(Chapter.sections).joinedload(ChapterSection.image_record), + joinedload(Chapter.images), + ) ) result_chapter = db.execute(stmt_chapter) chapter = result_chapter.unique().scalar_one_or_none() @@ -699,7 +772,10 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]): ) db.flush() db.refresh(chapter) - if image_settings.enabled and _chapter_has_any_section_images_to_generate(chapter): + if image_settings.enabled and ( + _chapter_has_any_section_images_to_generate(chapter) + or _chapter_has_cover_to_generate(chapter) + ): chapters_to_enqueue.add(chapter.id) # 更新 Book @@ -825,6 +901,16 @@ def generate_chapter_content(self, user_id: str, stage: str, new_content: str): user_id=user_id, ) db.commit() + db.refresh(chapter) + image_settings = MemoirImageSettings.from_env() + if image_settings.enabled and chapter and ( + _chapter_has_any_section_images_to_generate(chapter) + or _chapter_has_cover_to_generate(chapter) + ): + try: + generate_chapter_images.delay(chapter.id) + except Exception as exc: + logger.warning("补图任务派发失败: chapter=%s, error=%s", chapter.id, exc) return {"status": "success"} except Exception as e: @@ -832,14 +918,15 @@ def generate_chapter_content(self, user_id: str, stage: str, new_content: str): raise self.retry(exc=e) -def build_cos_key(user_id: str, chapter_id: str, index: int, prompt: str) -> str: +def build_cos_key(user_id: str, chapter_id: str, index: int | str, prompt: str) -> str: short_hash = hashlib.sha1(prompt.encode("utf-8")).hexdigest()[:10] - return f"memoirs/{user_id}/{chapter_id}/{index}-{short_hash}.png" + index_part = "cover" if index in (-1, "cover") else str(index) + return f"memoirs/{user_id}/{chapter_id}/{index_part}-{short_hash}.png" @shared_task(bind=True, max_retries=3, default_retry_delay=30) def generate_chapter_images(self, chapter_id: str): - """Async task to generate images for a chapter's sections (each section has at most one image).""" + """Async task to generate images for a chapter's cover and sections (each section has at most one image).""" lock_acquired = False provider = None with get_sync_db() as db: @@ -860,7 +947,15 @@ def generate_chapter_images(self, chapter_id: str): sections_with_pending = [ (idx, s) for idx, s in enumerate(sections) if _section_has_image_to_generate(s) ] - if not sections_with_pending: + cover_rec = _get_cover_memoir_image(chapter) + cover_to_generate = ( + cover_rec + if cover_rec + and (getattr(cover_rec, "status") or "").strip() + in (IMAGE_STATUS_PENDING, IMAGE_STATUS_FAILED) + else None + ) + if not sections_with_pending and not cover_to_generate: logger.info("章节补图跳过: chapter=%s, reason=no_pending_images", chapter_id) return {"status": "no_images"} @@ -878,9 +973,10 @@ def generate_chapter_images(self, chapter_id: str): image_generator = get_image_generator() storage = TencentCosStorageService.from_env() logger.info( - "章节补图开始: chapter=%s, pending_sections=%d", + "章节补图开始: chapter=%s, pending_sections=%d, cover=%s", chapter_id, len(sections_with_pending), + bool(cover_to_generate), ) retryable_failures: list[str] = [] permanent_failures: list[str] = [] @@ -899,6 +995,69 @@ def generate_chapter_images(self, chapter_id: str): rec.retryable = d.get("retryable") rec.updated_at = datetime.now(timezone.utc) + # 先处理封面图 + if cover_to_generate: + current_item = memoir_image_to_dict(cover_to_generate) or {} + current_item.setdefault("placeholder", "") + current_item.setdefault("description", "") + current_item["status"] = IMAGE_STATUS_PROCESSING + current_item["updated_at"] = datetime.now(timezone.utc).isoformat() + _apply_item_to_memoir_image(cover_to_generate, current_item) + db.commit() + try: + sections_ordered = sorted(sections, key=lambda s: getattr(s, "order_index", 0)) + first_content = (sections_ordered[0].content or "").strip() if sections_ordered else "" + context_excerpt = " ".join(first_content.split("\n")[:5])[:200] + prompt_data = prompt_service.build_cover_prompt( + chapter_title=chapter.title, + chapter_category=chapter.category or "", + context_excerpt=context_excerpt, + ) + result = image_generator.generate( + prompt_data["prompt"], + prompt_data["size"], + prompt_data["style"], + ) + if result.status != TaskStatus.COMPLETED or not result.image_url: + raise RuntimeError(result.error or "Image generation failed") + image_bytes = _normalize_image_bytes_for_storage( + image_generator.download_image(result.image_url) + ) + key = build_cos_key(chapter.user_id, chapter.id, "cover", prompt_data["prompt"]) + current_item["storage_key"] = key + current_item["url"] = storage.upload_bytes(image_bytes, key, "image/png") + current_item["prompt"] = prompt_data["prompt"] + current_item["style"] = prompt_data["style"] + current_item["size"] = prompt_data["size"] + current_item["status"] = IMAGE_STATUS_COMPLETED + current_item["error"] = None + current_item["retryable"] = None + current_item["updated_at"] = datetime.now(timezone.utc).isoformat() + _apply_item_to_memoir_image(cover_to_generate, current_item) + db.commit() + logger.info( + "章节封面图生成成功: chapter=%s, url=%s", + chapter_id, + current_item["url"], + ) + except Exception as exc: + failure_msg = f"cover, error={exc}" + if isinstance(exc, CosUploadError) and not exc.retryable: + permanent_failures.append(failure_msg) + logger.error("封面图上传不可重试,清理: chapter=%s, %s", chapter_id, failure_msg) + db.delete(cover_to_generate) + db.commit() + else: + current_item = memoir_image_to_dict(cover_to_generate) or {} + current_item["status"] = IMAGE_STATUS_FAILED + current_item["error"] = str(exc) + current_item["retryable"] = True + current_item["updated_at"] = datetime.now(timezone.utc).isoformat() + retryable_failures.append(failure_msg) + logger.warning("封面图生成失败(可重试): chapter=%s, %s", chapter_id, failure_msg) + _apply_item_to_memoir_image(cover_to_generate, current_item) + db.commit() + for sec_index, section in sections_with_pending: item = memoir_image_to_dict(section.image_record) if section.image_record else {} current_item = dict(item) if item else {} @@ -966,7 +1125,6 @@ def generate_chapter_images(self, chapter_id: str): _apply_item_to_memoir_image(section.image_record, current_item) db.commit() - # 封面图先空着,不自动用首张完成图做封面 if retryable_failures: raise RuntimeError( f"章节补图存在可重试失败项: chapter={chapter_id}, failures={'; '.join(retryable_failures)}"