From ccdc4e42776bba44919b9bc5e38819dca49060ad Mon Sep 17 00:00:00 2001 From: Kevin Date: Mon, 11 May 2026 16:16:49 +0800 Subject: [PATCH] feat(i18n): persist language preference and thread through chat, memoir, TTS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add users.language_preference (Alembic 0018, default zh); capture at signup/SMS only; expose on auth and profile APIs - Lite English prompts for chat and memoir; localized stage labels and agent names (Life Echo / 岁月知己) - Tencent TTS: language-aware synthesis, ModelType=1 for 501004, English chunking - WebSocket pipeline: emit all AGENT_RESPONSE segments when TTS cancels; INFO logs for tts_this_turn and TTS decisions; on-demand TTS logging - Expo: device language on auth, i18n tiers/agent name, [SPLIT] streaming UX fixes - Tests for migration, prompts, pipeline, router tts_this_turn, reply segments Co-authored-by: Cursor --- api/.env.example | 2 +- api/.env.production | 2 +- api/.env.staging | 2 +- .../0018_users_language_preference.py | 42 +++ api/app/adapters/tts/openai_tts.py | 8 +- api/app/adapters/tts/tencent_tts.py | 162 ++++++++- api/app/agents/chat/interview_agent.py | 71 +++- api/app/agents/chat/orchestrator.py | 41 ++- api/app/agents/chat/output_rules.py | 60 +++- api/app/agents/chat/personas.py | 12 + api/app/agents/chat/profile_agent.py | 69 +++- api/app/agents/chat/prompt_context.py | 2 + api/app/agents/chat/prompt_layers.py | 3 +- api/app/agents/chat/prompts_conversation.py | 212 ++++++++++- api/app/agents/chat/prompts_profile.py | 194 +++++++++- api/app/agents/chat/reply_limits.py | 37 +- api/app/agents/chat/stage_detection.py | 8 +- api/app/agents/chat/stage_prompts.py | 12 +- api/app/agents/memoir/batch_phase1_prep.py | 20 +- api/app/agents/memoir/classification_agent.py | 3 +- api/app/agents/memoir/extraction_agent.py | 3 + api/app/agents/memoir/narrative_agent.py | 20 +- api/app/agents/memoir/orchestrator.py | 6 + api/app/agents/memoir/prompts.py | 292 ++++++++++++++- api/app/agents/stage_constants.py | 33 ++ api/app/agents/style_profiles.py | 44 ++- api/app/core/config.py | 8 +- api/app/core/dependencies.py | 1 + api/app/features/auth/router.py | 9 + api/app/features/auth/schemas.py | 21 +- api/app/features/auth/service.py | 33 +- api/app/features/conversation/service.py | 9 +- api/app/features/conversation/ws/pipeline.py | 274 ++++++++++++++- api/app/features/conversation/ws/router.py | 59 +++- .../features/memoir/story_pipeline_sync.py | 41 ++- api/app/features/user/models.py | 3 + api/app/features/user/router.py | 5 +- api/app/features/user/schemas.py | 3 + api/app/features/user/service.py | 9 + api/app/ports/tts.py | 9 +- api/app/tasks/memoir_quality_pass_tasks.py | 26 +- api/app/tasks/memoir_tasks.py | 27 ++ api/app/tasks/story_title_tasks.py | 25 +- api/tests/test_batch_phase1_chunked.py | 4 + .../test_conversation_list_fallback_title.py | 120 +++++++ api/tests/test_memoir_skip_story.py | 1 + ...t_migration_language_preference_default.py | 34 ++ api/tests/test_mock_sms_login_http.py | 1 + api/tests/test_pipeline_language_skip_tts.py | 200 +++++++++++ ..._pipeline_tts_cancel_emits_all_segments.py | 331 ++++++++++++++++++ api/tests/test_prompt_language_branching.py | 207 +++++++++++ api/tests/test_register_persists_language.py | 87 +++++ api/tests/test_reply_segments.py | 61 ++++ ...st_sms_login_new_user_persists_language.py | 93 +++++ ...est_ws_router_tts_this_turn_passthrough.py | 165 +++++++++ app-expo/src/app/(main)/conversation/[id].tsx | 93 ++--- app-expo/src/app/(tabs)/profile.tsx | 42 ++- app-expo/src/features/auth/hooks.ts | 22 +- app-expo/src/features/auth/types.ts | 6 + app-expo/src/features/conversation/hooks.ts | 11 +- app-expo/src/i18n/generated/resources.ts | 6 + app-expo/src/i18n/locales/en/profile.json | 6 + app-expo/src/i18n/locales/zh/profile.json | 6 + .../conversation/message-split.test.ts | 23 ++ 64 files changed, 3233 insertions(+), 208 deletions(-) create mode 100644 api/alembic/versions/0018_users_language_preference.py create mode 100644 api/tests/test_conversation_list_fallback_title.py create mode 100644 api/tests/test_migration_language_preference_default.py create mode 100644 api/tests/test_pipeline_language_skip_tts.py create mode 100644 api/tests/test_pipeline_tts_cancel_emits_all_segments.py create mode 100644 api/tests/test_prompt_language_branching.py create mode 100644 api/tests/test_register_persists_language.py create mode 100644 api/tests/test_sms_login_new_user_persists_language.py create mode 100644 api/tests/test_ws_router_tts_this_turn_passthrough.py diff --git a/api/.env.example b/api/.env.example index 7f4dff3..019462c 100644 --- a/api/.env.example +++ b/api/.env.example @@ -247,7 +247,7 @@ TTS_PROVIDER=tencent # 仅 TTS_PROVIDER=openai 时需要 # OPENAI_API_KEY= # 音色 ID 见 https://cloud.tencent.com/document/product/1073/92668 -TTS_VOICE_TYPE=502001 +TTS_VOICE_TYPE=501004 TTS_CODEC=mp3 # ============================================================================= diff --git a/api/.env.production b/api/.env.production index 01d4725..c059d7b 100644 --- a/api/.env.production +++ b/api/.env.production @@ -199,7 +199,7 @@ TTS_PROVIDER=tencent # 仅 TTS_PROVIDER=openai 时需要(填控制台密钥;勿在注释行写 =your_* 以免旧版 CI 误匹配) # OPENAI_API_KEY= # 音色 ID 见 https://cloud.tencent.com/document/product/1073/92668 -TTS_VOICE_TYPE=502001 +TTS_VOICE_TYPE=501004 TTS_CODEC=mp3 # ============================================================================= diff --git a/api/.env.staging b/api/.env.staging index dab7bd9..d170d40 100644 --- a/api/.env.staging +++ b/api/.env.staging @@ -133,7 +133,7 @@ TTS_PROVIDER=tencent # 仅 TTS_PROVIDER=openai 时需要 # OPENAI_API_KEY=your_openai_api_key # 音色 ID 见 https://cloud.tencent.com/document/product/1073/92668 -TTS_VOICE_TYPE=502001 +TTS_VOICE_TYPE=501004 TTS_CODEC=mp3 # ============================================================================= diff --git a/api/alembic/versions/0018_users_language_preference.py b/api/alembic/versions/0018_users_language_preference.py new file mode 100644 index 0000000..9052532 --- /dev/null +++ b/api/alembic/versions/0018_users_language_preference.py @@ -0,0 +1,42 @@ +"""users:language_preference 字段 + +Revision ID: 0018_users_language_preference +Revises: 0017_segment_narrative_defer +""" + +from typing import Sequence, Union + +import sqlalchemy as sa + +from alembic import op + +revision: str = "0018_users_language_preference" +down_revision: Union[str, None] = "0017_segment_narrative_defer" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def _column_names(table_name: str) -> set[str]: + bind = op.get_bind() + inspector = sa.inspect(bind) + return {column["name"] for column in inspector.get_columns(table_name)} + + +def upgrade() -> None: + columns = _column_names("users") + if "language_preference" not in columns: + op.add_column( + "users", + sa.Column( + "language_preference", + sa.String(length=8), + nullable=False, + server_default=sa.text("'zh'"), + ), + ) + + +def downgrade() -> None: + columns = _column_names("users") + if "language_preference" in columns: + op.drop_column("users", "language_preference") diff --git a/api/app/adapters/tts/openai_tts.py b/api/app/adapters/tts/openai_tts.py index c62fc32..6c2553a 100644 --- a/api/app/adapters/tts/openai_tts.py +++ b/api/app/adapters/tts/openai_tts.py @@ -28,7 +28,13 @@ class OpenAITTSProvider: buf.write(chunk) return buf.getvalue() - async def synthesize(self, text: str, voice: str = "alloy") -> bytes: + async def synthesize( + self, + text: str, + voice: str = "alloy", + *, + language: str = "zh", # noqa: ARG002 — OpenAI TTS auto-detects language + ) -> bytes: if not self._client: return b"" try: diff --git a/api/app/adapters/tts/tencent_tts.py b/api/app/adapters/tts/tencent_tts.py index 3401af5..39909d2 100644 --- a/api/app/adapters/tts/tencent_tts.py +++ b/api/app/adapters/tts/tencent_tts.py @@ -19,11 +19,22 @@ VOICE_MAP: dict[str, int] = { "shimmer": 1006, } -# 中文 150 字 / 英文 500 字母,取保守值 -MAX_CHARS_PER_REQUEST = 150 +# Tencent TTS API limit: ≤150 Chinese chars or ≤500 letters (英文按字母放宽到 ~480 留余量) +MAX_CHARS_PER_REQUEST_ZH = 150 +MAX_CHARS_PER_REQUEST_EN = 480 + +# Tencent PrimaryLanguage: 1=中文(含中英混读),2=英文 +PRIMARY_LANGUAGE_ZH = 1 +PRIMARY_LANGUAGE_EN = 2 + +# Tencent ModelType: 1=新模型(覆盖大模型音色 501xxx 系列与新版精品音色)。 +# 大模型音色(如 501004 月华)必须显式传 ModelType=1,否则可能被旧模型拒绝并返回空音频; +# 老精品音色(如 1001/101050 等)也接受 ModelType=1,因此无条件设置不会破坏老链路。 +# 文档:https://cloud.tencent.com/document/api/1073/37995 +MODEL_TYPE_LLM = 1 -def _chunk_text(text: str, max_chars: int = MAX_CHARS_PER_REQUEST) -> list[str]: +def _chunk_text(text: str, max_chars: int = MAX_CHARS_PER_REQUEST_ZH) -> list[str]: """Split text into chunks within API limit.""" text = text.strip() if not text: @@ -66,10 +77,15 @@ class TencentTTSProvider: secret_key: str, voice_type: int = 1001, codec: str = "mp3", + voice_type_en: int | None = None, ): self._secret_id = secret_id self._secret_key = secret_key self._voice_type = voice_type + # 英文音色未单独配置时回落到 501004(月华,腾讯云大模型音色,支持中英混合)。 + # 大模型音色 501xxx 系列在 PrimaryLanguage=1/2 下均支持中英混读,不会被 Tencent + # 以 InvalidParameterValue.PrimaryLanguage 拒绝;与之对应必须配合 ModelType=1。 + self._voice_type_en = voice_type_en if voice_type_en is not None else 501004 self._codec = codec self._client = None @@ -93,9 +109,18 @@ class TencentTTSProvider: logger.error("Tencent TTS client init failed: {}", e) return None - def _synthesize_sync(self, text: str, voice_type: int) -> bytes: + def _synthesize_sync( + self, + text: str, + voice_type: int, + primary_language: int = PRIMARY_LANGUAGE_ZH, + ) -> bytes: client = self._get_client() if not client: + logger.warning( + "tencent_tts._synthesize_sync no client provider=tencent voice_type={}", + voice_type, + ) return b"" try: from tencentcloud.common.exception.tencent_cloud_sdk_exception import ( @@ -107,41 +132,142 @@ class TencentTTSProvider: req.Text = text req.SessionId = uuid.uuid4().hex req.VoiceType = voice_type - req.PrimaryLanguage = 1 + req.PrimaryLanguage = primary_language req.SampleRate = 16000 req.Codec = self._codec + # 显式声明使用新模型;大模型音色(501xxx)若不带该字段会被旧模型拒绝并静默返回空音频。 + req.ModelType = MODEL_TYPE_LLM + + # 长期保留 INFO:TTS 实际请求腾讯云 SDK 时的关键参数 + logger.info( + "tencent_tts._synthesize_sync request voice_type={} primary_language={} " + "model_type={} sample_rate={} codec={} text_len={}", + voice_type, + primary_language, + MODEL_TYPE_LLM, + req.SampleRate, + self._codec, + len(text or ""), + ) resp = client.TextToVoice(req) - if not resp or not resp.Audio: + request_id = getattr(resp, "RequestId", None) if resp is not None else None + audio_b64 = getattr(resp, "Audio", "") if resp is not None else "" + if not audio_b64: + logger.warning( + "tencent_tts._synthesize_sync empty audio voice_type={} " + "primary_language={} model_type={} request_id={}", + voice_type, + primary_language, + MODEL_TYPE_LLM, + request_id, + ) return b"" - return base64.b64decode(resp.Audio) + audio_bytes = base64.b64decode(audio_b64) + # 长期保留 INFO:腾讯云 SDK 返回的 request_id + 音频字节数(用户排查必需) + logger.info( + "tencent_tts._synthesize_sync response request_id={} audio_bytes_len={} " + "voice_type={} primary_language={}", + request_id, + len(audio_bytes), + voice_type, + primary_language, + ) + return audio_bytes except TencentCloudSDKException as e: - logger.error("Tencent TTS SDK error: {}", e) + logger.error( + "Tencent TTS SDK error provider=tencent voice_type={} primary_language={} " + "model_type={} code={} message={} request_id={} raw={}", + voice_type, + primary_language, + MODEL_TYPE_LLM, + getattr(e, "code", None), + getattr(e, "message", None), + getattr(e, "requestId", None), + e, + ) return b"" except Exception as e: - logger.error("Tencent TTS synthesize failed: {}", e) + logger.error( + "Tencent TTS synthesize failed provider=tencent voice_type={} primary_language={}: {}", + voice_type, + primary_language, + e, + ) return b"" - async def synthesize(self, text: str, voice: str = "alloy") -> bytes: + async def synthesize( + self, + text: str, + voice: str = "alloy", + *, + language: str = "zh", + ) -> bytes: if not self._secret_id or not self._secret_key: - logger.error("Tencent TTS credentials not configured") + logger.error( + "Tencent TTS credentials not configured provider=tencent secret_id_set={} secret_key_set={}", + bool(self._secret_id), + bool(self._secret_key), + ) return b"" - # Default "alloy" aligns with OpenAI TTS naming; Tencent uses VoiceType IDs from settings. + is_en = (language or "zh").strip().lower() == "en" + primary_language = PRIMARY_LANGUAGE_EN if is_en else PRIMARY_LANGUAGE_ZH + default_voice = self._voice_type_en if is_en else self._voice_type + max_chars = MAX_CHARS_PER_REQUEST_EN if is_en else MAX_CHARS_PER_REQUEST_ZH + + # Default "alloy" aligns with OpenAI TTS naming. Caller 链路里目前不会传具体音色, + # 因此实际只走 default_voice 分支,对应 settings.tts_voice_type / tts_voice_type_en。 v = voice.lower() if v == "alloy": - voice_type = self._voice_type + voice_type = default_voice else: - voice_type = VOICE_MAP.get(v, self._voice_type) - chunks = _chunk_text(text) + voice_type = VOICE_MAP.get(v, default_voice) + chunks = _chunk_text(text, max_chars=max_chars) + # 长期保留 INFO:adapter 入口的 language / voice_type / chunk_count(排查必需) + logger.info( + "tencent_tts.synthesize entry language={} voice_arg={} resolved_voice_type={} " + "primary_language={} max_chars={} text_len={} chunk_count={}", + language, + voice, + voice_type, + primary_language, + max_chars, + len(text or ""), + len(chunks), + ) if not chunks: return b"" results: list[bytes] = [] - for chunk in chunks: - audio = await asyncio.to_thread(self._synthesize_sync, chunk, voice_type) + for idx, chunk in enumerate(chunks): + audio = await asyncio.to_thread( + self._synthesize_sync, chunk, voice_type, primary_language + ) if not audio: + logger.warning( + "tencent_tts.synthesize chunk failed chunk_index={} chunk_chars={} " + "voice_type={} primary_language={}", + idx, + len(chunk), + voice_type, + primary_language, + ) return b"" + logger.debug( + "tencent_tts.synthesize chunk ok chunk_index={} chunk_chars={} audio_bytes_len={}", + idx, + len(chunk), + len(audio), + ) results.append(audio) - return b"".join(results) + merged = b"".join(results) + logger.debug( + "tencent_tts.synthesize done language={} voice_type={} chunks={} total_bytes={}", + language, + voice_type, + len(chunks), + len(merged), + ) + return merged diff --git a/api/app/agents/chat/interview_agent.py b/api/app/agents/chat/interview_agent.py index 83af4ba..f912f97 100644 --- a/api/app/agents/chat/interview_agent.py +++ b/api/app/agents/chat/interview_agent.py @@ -22,6 +22,7 @@ from app.agents.chat.personas import normalize_interview_persona from app.agents.chat.prompt_context import ChatPromptContext from app.agents.chat.prompts_conversation import ( SLOT_NAME_MAP, + SLOT_NAME_MAP_EN, get_opening_prompt, ) from app.agents.chat.reply_limits import ( @@ -50,6 +51,23 @@ logger = get_logger(__name__) # LLM 不可用或调用失败时对用户展示(不暴露异常细节、不触发 TTS) _FALLBACK_REPLY = "刚才网络不太稳,没接上。你可以再说一遍,或稍后再试。" +_FALLBACK_REPLY_EN = ( + "Network glitch on my end — could you say that again, or give it another try in a moment?" +) + +_OPENING_FALLBACK_ZH = "你好呀~ 又见面了。今天想从人生里哪一小段回忆开始聊聊?" +_OPENING_FALLBACK_EN = ( + "Hi there — good to see you again. Where in your life would you like to start today?" +) + + +def _fallback_reply_for(language: str) -> str: + return _FALLBACK_REPLY_EN if language == "en" else _FALLBACK_REPLY + + +def _opening_fallback_for(language: str) -> str: + return _OPENING_FALLBACK_EN if language == "en" else _OPENING_FALLBACK_ZH + # 仅在「重复问句守卫」把正文削成单句兜底时追加二次 system,只多调一次模型。 _DUPLICATE_GUARD_LLM_RETRY_SYSTEM_APPENDIX = """## 二次生成(纠偏) @@ -60,6 +78,20 @@ _DUPLICATE_GUARD_LLM_RETRY_SYSTEM_APPENDIX = """## 二次生成(纠偏) - 若要提问,须换**全新角度**,并锚在用户刚说的具体细节里;也可以本轮**完全不提问**,只并肩承接; - **禁止**整段只有「这一段我记住了」或同类无信息套话。""" +_DUPLICATE_GUARD_LLM_RETRY_SYSTEM_APPENDIX_EN = """## Second pass (correction) +The previous reply was discarded because it repeated questions that already appeared in "recently asked questions" or restated facts already confirmed. Please **write a whole new reply**: +- Still obey every main rule above. +- Open with a half-sentence to a sentence or two that picks up the user's exact words this turn (with a touch of imagery is fine). +- **Do not** re-use the same confirmation question with only different wording. +- If you do ask a question, choose a **new angle** anchored in a specific detail the user just mentioned; you may also ask **no question** this turn and simply walk alongside what they said. +- **Do not** fall back on filler such as "I'll remember this part" or other content-free reassurance.""" + + +def _duplicate_guard_appendix_for(language: str) -> str: + if language == "en": + return _DUPLICATE_GUARD_LLM_RETRY_SYSTEM_APPENDIX_EN + return _DUPLICATE_GUARD_LLM_RETRY_SYSTEM_APPENDIX + def _finalize_chat_segments_after_llm( response_text: str, @@ -68,6 +100,7 @@ def _finalize_chat_segments_after_llm( max_chars: int, memoir_state: MemoirStateSchema, recent_questions: list[str], + language: str = "zh", ) -> tuple[list[str], bool]: raw_list = segments_from_llm_response( response_text, @@ -82,7 +115,7 @@ def _finalize_chat_segments_after_llm( ) if not out: out = [response_text.strip()[:max_chars]] - out = nonempty_segments_or_fallback(out, fallback=_FALLBACK_REPLY) + out = nonempty_segments_or_fallback(out, fallback=_fallback_reply_for(language)) out, deduped = apply_duplicate_question_guard( out, state=memoir_state, @@ -149,11 +182,12 @@ class InterviewAgent: profile_era_place: str = "", stage_switched_this_turn: bool = False, scene_cues_for_planner: Optional[list[str]] = None, + language: str = "zh", ) -> AgentChatTurn: """生成状态感知的访谈回复,不持久化(由 Orchestrator 负责)""" if not self.llm: logger.warning("InterviewAgent: LLM 未配置,返回兜底文案") - return AgentChatTurn(messages=[_FALLBACK_REPLY], skip_tts=True) + return AgentChatTurn(messages=[_fallback_reply_for(language)], skip_tts=True) try: text_for_model = self._resolve_text_for_model( user_message, normalized_user_message @@ -245,6 +279,7 @@ class InterviewAgent: persona_threads=memoir_state.persona_threads, recent_questions=recent_questions or memoir_state.recent_questions, turn_plan=turn_plan, + language=language, ) system_prompt = ctx.guided_system_prompt() messages: List[Any] = [SystemMessage(content=system_prompt)] @@ -305,11 +340,12 @@ class InterviewAgent: max_chars=max_chars, memoir_state=memoir_state, recent_questions=rq_base, + language=language, ) retry_used = False if deduped and segments_are_only_duplicate_guard_fallback(out): retry_system = ( - f"{system_prompt}\n\n{_DUPLICATE_GUARD_LLM_RETRY_SYSTEM_APPENDIX}" + f"{system_prompt}\n\n{_duplicate_guard_appendix_for(language)}" ) retry_messages: List[Any] = [ SystemMessage(content=retry_system), @@ -359,6 +395,7 @@ class InterviewAgent: max_chars=max_chars, memoir_state=memoir_state, recent_questions=rq_base, + language=language, ) retry_used = True out, auto_bio = apply_autobiographical_boundary_guard(out) @@ -394,7 +431,7 @@ class InterviewAgent: ) except Exception as e: logger.error("生成回应失败: {}", e, exc_info=True) - return AgentChatTurn(messages=[_FALLBACK_REPLY], skip_tts=True) + return AgentChatTurn(messages=[_fallback_reply_for(language)], skip_tts=True) async def generate_opening_message( self, @@ -405,17 +442,19 @@ class InterviewAgent: occupation: str = "", profile_birth_year: Optional[int] = None, profile_era_place: str = "", + language: str = "zh", ) -> List[str]: """生成空对话开场白,不持久化(由 Orchestrator 负责)""" if not self.llm: - return ["你好呀~ 又见面了。今天想从人生里哪一小段回忆开始聊聊?"] + return [_opening_fallback_for(language)] try: narrative_state = narrative_coverage_state(memoir_state) control_state = interview_control_state(memoir_state) empty_slots = control_state.prompt_empty_slots_for_stage( narrative_state, memoir_state.current_stage ) - empty_slots_readable = [SLOT_NAME_MAP.get(s, s) for s in empty_slots] + slot_table = SLOT_NAME_MAP_EN if language == "en" else SLOT_NAME_MAP + empty_slots_readable = [slot_table.get(s, s) for s in empty_slots] persona = normalize_interview_persona(settings.chat_interview_persona) prompt = get_opening_prompt( current_stage=memoir_state.current_stage, @@ -426,6 +465,7 @@ class InterviewAgent: occupation=occupation, profile_birth_year=profile_birth_year, profile_era_place=profile_era_place, + language=language, ) hw = await get_history_with_window( conversation_id, @@ -434,14 +474,19 @@ class InterviewAgent: ) messages: List[Any] = [SystemMessage(content=prompt)] messages.extend(hw.window) - if not hw.window: - messages.append( - HumanMessage(content="(对话刚开始,请自然地说出你的开场白。)") + if language == "en": + kickoff = ( + "(The conversation is just starting; please greet naturally.)" + if not hw.window + else "(Continue from the context above and deliver your opening line naturally.)" ) else: - messages.append( - HumanMessage(content="(请根据上文,自然接续并说出你的开场白。)") + kickoff = ( + "(对话刚开始,请自然地说出你的开场白。)" + if not hw.window + else "(请根据上文,自然接续并说出你的开场白。)" ) + messages.append(HumanMessage(content=kickoff)) log_agent_payload( logger, "InterviewAgent.opening.prompt", @@ -498,8 +543,8 @@ class InterviewAgent: segments = out if out else [response_text.strip()[:max_chars]] return nonempty_segments_or_fallback( segments, - fallback="你好呀~ 又见面了。今天想从人生里哪一小段回忆开始聊聊?", + fallback=_opening_fallback_for(language), ) except Exception as e: logger.error("生成开场白失败: {}", e, exc_info=True) - return ["你好呀~ 又见面了。今天想从人生里哪一小段回忆开始聊聊?"] + return [_opening_fallback_for(language)] diff --git a/api/app/agents/chat/orchestrator.py b/api/app/agents/chat/orchestrator.py index d7f43f4..89d406e 100644 --- a/api/app/agents/chat/orchestrator.py +++ b/api/app/agents/chat/orchestrator.py @@ -51,9 +51,20 @@ if TYPE_CHECKING: logger = get_logger(__name__) -_UNAUTH_TURN = AgentChatTurn( +_UNAUTH_TURN_ZH = AgentChatTurn( messages=["暂时没法继续对话,请先登录后再试。"], skip_tts=True ) +_UNAUTH_TURN_EN = AgentChatTurn( + messages=["You'll need to sign in again before we can continue."], + skip_tts=True, +) + + +def _user_language(user: Optional["User"]) -> str: + if not user: + return "zh" + lang = getattr(user, "language_preference", None) or "zh" + return "en" if str(lang).lower() == "en" else "zh" async def _fetch_interview_memory_bundle( @@ -145,6 +156,7 @@ class ChatOrchestrator: 根据 missing_fields 路由到 ProfileAgent 或 InterviewAgent。 """ t0 = time.perf_counter() + language = _user_language(user) # --- 资料收集模式 --- if user: @@ -179,7 +191,10 @@ class ChatOrchestrator: # Profile 阶段每轮都抽取:短确认语也可能带可推断资料,跳过抽取会导致槽位长期不更新 extracted = ( await self.profile_agent.extract_profile_from_message( - user_message, missing, conversation_id=conversation_id + user_message, + missing, + conversation_id=conversation_id, + language=language, ) ) logger.info( @@ -198,7 +213,7 @@ class ChatOrchestrator: if not remaining: st = await get_or_create_state(user.id, db) interview_stage_hint = life_stage_display_name( - st.current_stage + st.current_stage, language=language ) responses = await self.profile_agent.generate_profile_followup( conversation_id=conversation_id, @@ -207,6 +222,7 @@ class ChatOrchestrator: filled_fields=filled, nickname=user.nickname or "", interview_stage_hint=interview_stage_hint, + language=language, ) if agent_summary_enabled(): logger.info( @@ -223,8 +239,13 @@ class ChatOrchestrator: ) except Exception as e: logger.exception("资料收集处理失败: {}", e) + fb_msg = ( + "Sorry, I missed that. Could you say it again?" + if language == "en" + else "不好意思刚才没接住,你再说一遍好吗?" + ) return AgentChatTurn( - messages=["不好意思刚才没接住,你再说一遍好吗?"], + messages=[fb_msg], skip_tts=False, memory_retrieval_trace=None, ) @@ -239,7 +260,7 @@ class ChatOrchestrator: (time.perf_counter() - t0) * 1000, conversation_id, ) - return _UNAUTH_TURN + return _UNAUTH_TURN_EN if language == "en" else _UNAUTH_TURN_ZH log_agent_detail( logger, @@ -284,6 +305,7 @@ class ChatOrchestrator: birth_place=user.birth_place, grew_up_place=user.grew_up_place, occupation=user.occupation, + language=language, ) background_voice = infer_background_voice(user.occupation) occupation = user.occupation or "" @@ -331,6 +353,7 @@ class ChatOrchestrator: profile_era_place=profile_era_place, stage_switched_this_turn=stage_switched_this_turn, scene_cues_for_planner=scene_cues_for_planner, + language=language, ) recent_questions = prompt_state.recent_questions if turn.interview_state_meta and isinstance(turn.interview_state_meta, dict): @@ -387,6 +410,7 @@ class ChatOrchestrator: voice_session_id: str | None = None, user_message_timestamp: datetime | None = None, audio_duration_seconds: int | None = None, + language: str = "zh", ) -> List[str]: """委托 ProfileAgent 生成资料追问(持久化由调用方负责)。""" return await self.profile_agent.generate_profile_followup( @@ -395,6 +419,7 @@ class ChatOrchestrator: missing_fields=missing_fields, filled_fields=filled_fields, nickname=nickname, + language=language, ) async def generate_profile_greeting( @@ -402,12 +427,14 @@ class ChatOrchestrator: conversation_id: str, missing_fields: List[str], nickname: str = "", + language: str = "zh", ) -> List[str]: """委托 ProfileAgent 生成资料收集开场白(持久化由调用方负责)。""" return await self.profile_agent.generate_profile_greeting( conversation_id=conversation_id, missing_fields=missing_fields, nickname=nickname, + language=language, ) async def generate_response_with_state( @@ -431,6 +458,7 @@ class ChatOrchestrator: profile_era_place: str = "", stage_switched_this_turn: bool = False, scene_cues_for_planner: Optional[list[str]] = None, + language: str = "zh", ) -> AgentChatTurn: """委托 InterviewAgent 生成访谈回复(持久化由调用方负责)。""" return await self.interview_agent.generate_response_with_state( @@ -449,6 +477,7 @@ class ChatOrchestrator: profile_era_place=profile_era_place, stage_switched_this_turn=stage_switched_this_turn, scene_cues_for_planner=scene_cues_for_planner, + language=language, ) def detect_user_stage(self, user_message: str) -> str: @@ -464,6 +493,7 @@ class ChatOrchestrator: occupation: str = "", profile_birth_year: Optional[int] = None, profile_era_place: str = "", + language: str = "zh", ) -> List[str]: """ 委托 InterviewAgent 生成访谈开场白(持久化由调用方 ConversationHistoryStore 负责)。 @@ -476,4 +506,5 @@ class ChatOrchestrator: occupation=occupation, profile_birth_year=profile_birth_year, profile_era_place=profile_era_place, + language=language, ) diff --git a/api/app/agents/chat/output_rules.py b/api/app/agents/chat/output_rules.py index 688a134..f688963 100644 --- a/api/app/agents/chat/output_rules.py +++ b/api/app/agents/chat/output_rules.py @@ -1,4 +1,55 @@ -"""共用用户可见回复禁令与文风(访谈 / 资料收集 / 所有面向用户的 Agent)。""" +"""共用用户可见回复禁令与文风(访谈 / 资料收集 / 所有面向用户的 Agent)。 + +`*_en` variants are deliberately lighter: they preserve role / fact boundaries +/ format constraints, but drop CJK-specific rhetoric rules (e.g. "嗯。" 起头). +""" + + +def chat_output_rules_en() -> str: + """English-lite output guardrails for user-facing replies.""" + return ( + "**Do not** output Markdown or layout symbols: no headings, bold/italic, " + "code fences, links, lists, or rendering markers; speak in natural, " + "spoken-style prose. You **may** use the literal token `[SPLIT]` to break " + "a reply into at most two short bubbles. " + "**Do not** include parenthetical stage directions, sound effects, or " + "action descriptions (e.g. *(laughs softly)*, *(sighs)*, *(pauses)*); " + "speak as if talking out loud. " + "**Do not** use host/anchor language (\"Now then\", \"Let us\", \"Thank you " + "for sharing\") or hard topic switches (\"Let's move on to...\", \"Changing " + "subjects...\"). When you need to shift focus, lean on the user's own " + "words to bridge. " + "Avoid summarizing tone (\"It sounds like you...\", \"From what you're " + "saying...\") and avoid interview clichés (\"I noticed\", \"I'd like to " + "understand\"). When the user is sharing something heavy or emotional, " + "do not reply with a single neutral particle; respond with at least a " + "short half-sentence that picks up their actual words. " + "Do not invent facts the user has not stated (names, dates, places, " + "events, exact numbers). " + "**Do not** claim personal life experience as the assistant (childhood, " + "schooling, romance, family, career history); do not rewrite the user's " + "experience as \"me too\". If the user asks about your background, redirect " + "by referring back to what *they* shared (\"You mentioned earlier...\"). " + "**Avoid** loaded multi-clause questions or A/B options that smuggle in " + "the answer. **Do not** repeat the same metaphor or imagery across turns. " + "**Length**: prefer short and precise; one acknowledgement plus one " + "question per reply, never an essay." + ) + + +def chat_voice_style_en() -> str: + """English-lite voice style hint for all user-facing agents.""" + return ( + "Tone: like a warm, attentive interviewer who is here to help the user " + "tell their life story — friendly, conversational, never clinical. " + "Pick up on the specific detail the user just mentioned and gently push " + "one step deeper, rather than jumping to a new generic question. " + "Use everyday language with concrete imagery; avoid summary clichés " + "(\"It sounds like your childhood was happy\") in favor of conversational " + "follow-ups (\"That feeling you described — does it still come back to " + "you now?\"). When following up, stay close to the detail the user just " + "named instead of broadening the topic." + ) def chat_output_rules() -> str: @@ -51,4 +102,9 @@ def chat_voice_style() -> str: ) -__all__ = ["chat_output_rules", "chat_voice_style"] +__all__ = [ + "chat_output_rules", + "chat_voice_style", + "chat_output_rules_en", + "chat_voice_style_en", +] diff --git a/api/app/agents/chat/personas.py b/api/app/agents/chat/personas.py index 5b37d9c..7f28750 100644 --- a/api/app/agents/chat/personas.py +++ b/api/app/agents/chat/personas.py @@ -1,11 +1,23 @@ """ 访谈 Agent 可配置性格(Persona):仅影响语气,不替代事实边界与槽位约束。 +同时提供品牌名称(中英)的单一来源,便于跨 prompt / UI 文案一致。 """ from __future__ import annotations from typing import Final +# Brand / interviewer name — keep aligned with frontend i18n `conversation.agentName`, +# OpenAPI title, README, and project metadata. zh = 「岁月知己」,en = Life Echo. +AGENT_NAME_ZH: Final[str] = "岁月知己" +AGENT_NAME_EN: Final[str] = "Life Echo" + + +def agent_name(language: str = "zh") -> str: + """Return the interviewer brand name for the requested language.""" + return AGENT_NAME_EN if (language or "zh").strip().lower() == "en" else AGENT_NAME_ZH + + # 与 settings.chat_interview_persona 及文档保持一致 VALID_INTERVIEW_PERSONAS: Final[frozenset[str]] = frozenset( {"default", "warm_listener", "curious_guide"} diff --git a/api/app/agents/chat/profile_agent.py b/api/app/agents/chat/profile_agent.py index a5f738a..c479559 100644 --- a/api/app/agents/chat/profile_agent.py +++ b/api/app/agents/chat/profile_agent.py @@ -29,6 +29,31 @@ from app.ports.llm import LLMProvider logger = get_logger(__name__) +_FOLLOWUP_FALLBACK_ZH = "谢谢分享!能再告诉我一些吗?" +_FOLLOWUP_FALLBACK_EN = "Thanks for sharing — could you tell me a bit more?" +_GREETING_FALLBACK_ZH = "你好!在开始之前,能告诉我你是哪一年出生的吗?" +_GREETING_FALLBACK_EN = ( + "Hi! Before we get started, could you tell me what year you were born?" +) +_GREETING_FALLBACK_FULL_ZH = ( + "你好!在我们开始聊人生故事之前,能先简单介绍一下你自己吗?比如你是哪一年出生的?" +) +_GREETING_FALLBACK_FULL_EN = ( + "Hi! Before we dive into life stories, could you introduce yourself a little — for example, what year were you born?" +) + + +def _profile_followup_fallback(language: str) -> str: + return _FOLLOWUP_FALLBACK_EN if language == "en" else _FOLLOWUP_FALLBACK_ZH + + +def _profile_greeting_fallback(language: str) -> str: + return _GREETING_FALLBACK_EN if language == "en" else _GREETING_FALLBACK_ZH + + +def _profile_greeting_fallback_full(language: str) -> str: + return _GREETING_FALLBACK_FULL_EN if language == "en" else _GREETING_FALLBACK_FULL_ZH + class _ProviderBackedProfileGateway: def __init__(self, provider: LLMProvider) -> None: @@ -173,6 +198,7 @@ class ProfileAgent: user_message: str, missing_fields: List[str], conversation_id: Optional[str] = None, + language: str = "zh", ) -> Dict[str, Any]: """从用户消息中提取资料字段,不持久化""" if not missing_fields: @@ -186,15 +212,20 @@ class ProfileAgent: ) recent = hw.window[-4:] if len(hw.window) > 4 else hw.window parts = [] + user_label = "User" if language == "en" else "用户" + asst_label = "Assistant" if language == "en" else "助手" for msg in recent: if isinstance(msg, HumanMessage): - parts.append(f"用户: {msg.content}") + parts.append(f"{user_label}: {msg.content}") elif isinstance(msg, AIMessage): - parts.append(f"助手: {msg.content}") + parts.append(f"{asst_label}: {msg.content}") recent_dialogue = "\n".join(parts) if parts else "" try: prompt = get_profile_extraction_prompt( - user_message, missing_fields, recent_dialogue=recent_dialogue or None + user_message, + missing_fields, + recent_dialogue=recent_dialogue or None, + language=language, ) parsed = await self._llm_gateway.json_object( prompt, @@ -241,6 +272,7 @@ class ProfileAgent: filled_fields: Dict[str, str], nickname: str = "", interview_stage_hint: str = "", + language: str = "zh", ) -> List[str]: """生成资料追问回复,不持久化(由 Orchestrator 负责)""" try: @@ -249,6 +281,7 @@ class ProfileAgent: filled_fields, nickname, interview_stage_hint=interview_stage_hint, + language=language, ) hw = await get_history_with_window( conversation_id, @@ -284,7 +317,7 @@ class ProfileAgent: response_text, max_segments=3, max_chars_per_segment=settings.chat_interview_max_chars_per_segment, - fallback="谢谢分享!能再告诉我一些吗?", + fallback=_profile_followup_fallback(language), ) log_agent_summary( logger, @@ -295,17 +328,20 @@ class ProfileAgent: return segments except Exception as e: logger.error("生成资料跟进回复失败: {}", e) - return ["谢谢分享!能再告诉我一些吗?"] + return [_profile_followup_fallback(language)] async def generate_profile_greeting( self, conversation_id: str, missing_fields: List[str], nickname: str = "", + language: str = "zh", ) -> List[str]: """生成资料收集开场白,不持久化(由 Orchestrator 负责)""" try: - prompt = get_profile_greeting_prompt(missing_fields, nickname) + prompt = get_profile_greeting_prompt( + missing_fields, nickname, language=language + ) hw = await get_history_with_window( conversation_id, max_pairs=settings.chat_history_max_pairs, @@ -313,12 +349,19 @@ class ProfileAgent: ) messages: List[Any] = [SystemMessage(content=prompt)] messages.extend(hw.window) - if hw.window: - messages.append( - HumanMessage(content="(请根据上文自然接话,继续资料收集开场。)") + if language == "en": + kickoff = ( + "(Continue from the context above and warmly carry on the profile-gathering opener.)" + if hw.window + else "(Please deliver your profile-gathering opener.)" ) else: - messages.append(HumanMessage(content="(请说出资料收集开场白。)")) + kickoff = ( + "(请根据上文自然接话,继续资料收集开场。)" + if hw.window + else "(请说出资料收集开场白。)" + ) + messages.append(HumanMessage(content=kickoff)) log_agent_payload( logger, "ProfileAgent.greeting.prompt", @@ -345,7 +388,7 @@ class ProfileAgent: response_text, max_segments=2, max_chars_per_segment=settings.chat_interview_max_chars_per_segment, - fallback="你好!在开始之前,能告诉我你是哪一年出生的吗?", + fallback=_profile_greeting_fallback(language), ) log_agent_summary( logger, @@ -356,6 +399,4 @@ class ProfileAgent: return segments except Exception as e: logger.error("生成资料收集开场白失败: {}", e) - return [ - "你好!在我们开始聊人生故事之前,能先简单介绍一下你自己吗?比如你是哪一年出生的?" - ] + return [_profile_greeting_fallback_full(language)] diff --git a/api/app/agents/chat/prompt_context.py b/api/app/agents/chat/prompt_context.py index 0cd7b91..a59b4cc 100644 --- a/api/app/agents/chat/prompt_context.py +++ b/api/app/agents/chat/prompt_context.py @@ -29,6 +29,7 @@ class ChatPromptContext: persona_threads: List[PersonaThread] | None = None recent_questions: List[str] | None = None turn_plan: InterviewTurnPlan | None = None + language: str = "zh" def guided_system_prompt(self) -> str: """用户原话仅以对话历史 + HumanMessage 注入模型。 @@ -60,4 +61,5 @@ class ChatPromptContext: persona_threads=self.persona_threads or [], recent_questions=self.recent_questions or [], turn_directive_block=directive, + language=self.language, ) diff --git a/api/app/agents/chat/prompt_layers.py b/api/app/agents/chat/prompt_layers.py index be3e188..b49f6c9 100644 --- a/api/app/agents/chat/prompt_layers.py +++ b/api/app/agents/chat/prompt_layers.py @@ -26,6 +26,7 @@ from app.agents.chat.background_voice import ( ) from app.agents.chat.occupation_context import get_occupation_chat_hint from app.agents.chat.personas import ( + AGENT_NAME_ZH, get_interview_persona_tone_hint, normalize_interview_persona, ) @@ -328,7 +329,7 @@ def assemble_guided_prompt( ) intro = ( - "你是「岁月知己」——**主持式访谈者**:口语、克制、可靠;" + f"你是「{AGENT_NAME_ZH}」——**主持式访谈者**:口语、克制、可靠;" "**职责是帮用户把人生故事口述清楚**,不代写金句、不把问题写成散文、不替用户选边站队。" ) if intro_tone_line: diff --git a/api/app/agents/chat/prompts_conversation.py b/api/app/agents/chat/prompts_conversation.py index 260d673..8a0286f 100644 --- a/api/app/agents/chat/prompts_conversation.py +++ b/api/app/agents/chat/prompts_conversation.py @@ -9,8 +9,13 @@ from app.agents.chat.background_voice import ( normalize_background_voice, ) from app.agents.chat.occupation_context import get_occupation_chat_hint -from app.agents.chat.output_rules import chat_output_rules +from app.agents.chat.output_rules import ( + chat_output_rules, + chat_output_rules_en, +) from app.agents.chat.personas import ( + AGENT_NAME_EN, + AGENT_NAME_ZH, get_interview_persona_tone_hint, normalize_interview_persona, ) @@ -23,7 +28,10 @@ from app.agents.chat.prompt_layers import ( build_reply_strategy_block, build_style_profile_block, ) -from app.agents.stage_constants import STAGE_DISPLAY_ZH, STAGE_ERA_HINTS +from app.agents.stage_constants import ( + STAGE_ERA_HINTS, + stage_display_name, +) from app.agents.state_schema import KnownFact, PersonaThread from app.core.config import settings @@ -56,6 +64,36 @@ SLOT_NAME_MAP = { "lesson": "人生经验", } +SLOT_NAME_MAP_EN = { + "place": "where you grew up", + "people": "important people", + "daily_life": "everyday life", + "emotion": "childhood feelings", + "turning_event": "memorable moments", + "school": "school experiences", + "city": "the city you studied in", + "motivation": "what drove you", + "challenge": "challenges you faced", + "change": "how you changed", + "job": "what you did at work", + "environment": "your work environment", + "decision": "important decisions", + "pressure": "pressure and hardship", + "growth": "career growth", + "relationship": "family relationships", + "conflict": "conflicts and resolutions", + "support": "mutual support", + "responsibility": "family responsibilities", + "value": "core values", + "regret": "regrets and acceptance", + "pride": "moments you're proud of", + "lesson": "life lessons", +} + + +def slot_name_map_for(language: str) -> Dict[str, str]: + return SLOT_NAME_MAP_EN if language == "en" else SLOT_NAME_MAP + def _compact_era_hint( current_stage: str, @@ -106,6 +144,61 @@ def _compact_era_hint( return "\n".join(parts) + "\n" +def _get_opening_prompt_en( + current_stage: str, + empty_slots_readable: List[str], + user_profile_context: str = "", + profile_birth_year: Optional[int] = None, + profile_era_place: str = "", +) -> str: + """English-lite opening prompt; ignores persona/background-voice nuances.""" + stage_name = stage_display_name(current_stage, language="en") + if empty_slots_readable: + topics_str = ", ".join(empty_slots_readable) + topics_heading = ( + f"## Suggested topics for this stage ({stage_name})\n" + f"Pick one of these to ask about: {topics_str}" + ) + task_question = ( + "2. You are a **warm, host-style confidant**: ask one **specific, " + "easy-to-answer, vivid** question that pulls the user into telling a " + "life memory; ideally land on one of the topics above. Avoid vague " + "openers like \"How have you been?\" Open the door with one small " + "anchor (a place, a person, an object, or a tiny scene from a day)." + ) + else: + topics_heading = ( + f"## Current stage ({stage_name})\n" + "The main topics for this stage are largely covered. Open with " + "something tied to a previous memory or a fresh small angle of this " + "stage; do not interrogate from the start." + ) + task_question = ( + "2. **Greeting + a memory hook**: after a warm acknowledgement, " + "drop a light, concrete question tied to recollection — never " + "small-talk filler." + ) + + profile_section = "" + if user_profile_context.strip(): + profile_section = "## About the user\n" + user_profile_context.strip() + "\n" + + return f"""You are "{AGENT_NAME_EN}" — a warm host-style friend. The user just opened the chat and **has not said anything yet**; you speak first. Tone like an old friend, but your job is to help the user start telling their life story; in two or three short sentences, give a greeting plus **one vivid, recollection-oriented question** tied to the current stage or suggested topics. No flowery prose, no long literary descriptions, no generic small-talk. + +{profile_section}{topics_heading} + +## Task +1. Brief greeting. +{task_question} +3. Sound natural and warm. + +## Format +- Use `[SPLIT]` to break into at most two short bubbles, or keep greeting + question in one short bubble. +- {chat_output_rules_en()} Do not write the user's answer for them. + +Output (spoken-style English only, no Markdown):""" + + def get_opening_prompt( current_stage: str, empty_slots_readable: List[str], @@ -115,9 +208,18 @@ def get_opening_prompt( occupation: str = "", profile_birth_year: Optional[int] = None, profile_era_place: str = "", + language: str = "zh", ) -> str: """空对话时 AI 先开口的提示词""" - stage_name = STAGE_DISPLAY_ZH.get(current_stage, current_stage) + if language == "en": + return _get_opening_prompt_en( + current_stage, + empty_slots_readable, + user_profile_context=user_profile_context, + profile_birth_year=profile_birth_year, + profile_era_place=profile_era_place, + ) + stage_name = stage_display_name(current_stage, language="zh") bv_open = normalize_background_voice(background_voice) if empty_slots_readable: topics_str = "、".join(empty_slots_readable) @@ -178,13 +280,13 @@ def get_opening_prompt( tone_paragraph = " " + " ".join(tone_bits) + "\n\n" opening_head = ( - "你是「岁月知己」——主持式知己:用户刚进对话,**还没说话**,请你先开口。" + f"你是「{AGENT_NAME_ZH}」——主持式知己:用户刚进对话,**还没说话**,请你先开口。" "语气像老朋友,但**职责是帮对方开口讲人生故事**;两三句内问候 + **一个落在当前阶段或建议话题上的、有画面感的问题**;" "不要排比、不要长段文学描写,**不要**把泛泛问近况当主菜。\n\n" ) if bv_open != "default": opening_head = ( - "你是「岁月知己」——主持式知己:用户刚进对话,**还没说话**,请你先开口。" + f"你是「{AGENT_NAME_ZH}」——主持式知己:用户刚进对话,**还没说话**,请你先开口。" "**短**;两三句内问候 + **一个回忆向的具体问题**;不要排比、不要文学描写。\n\n" ) @@ -217,6 +319,92 @@ def get_opening_prompt( 直接输出(仅自然口语,无 Markdown):""" +def _get_guided_conversation_prompt_en( + current_stage: str, + empty_slots: List[str], + filled_slots: Dict[str, str], + detected_user_stage: str = "", + user_profile_context: str = "", + memory_evidence_text: str = "", + recent_questions: list[str] | None = None, + turn_directive_block: str = "", +) -> str: + """English-lite guided interview prompt (no persona/voice nuances).""" + stage_name = stage_display_name(current_stage, language="en") + detected_name = ( + stage_display_name(detected_user_stage, language="en") + if detected_user_stage and detected_user_stage != current_stage + else "" + ) + empty_readable = [SLOT_NAME_MAP_EN.get(s, s) for s in empty_slots] + filled_lines = [] + for k, v in (filled_slots or {}).items(): + name = SLOT_NAME_MAP_EN.get(k, k) + if v: + filled_lines.append(f"- {name}: {v}") + filled_block = "\n".join(filled_lines) if filled_lines else "(none yet)" + + suggested_block = ( + "Suggested still-open angles for this stage: " + ", ".join(empty_readable) + if empty_readable + else "Main angles for this stage are largely covered." + ) + + detected_line = ( + f"\nThe user is currently talking about: **{detected_name}** (system was tracking **{stage_name}**)." + if detected_name + else "" + ) + + profile_section = "" + if user_profile_context.strip(): + profile_section = "\n## About the user\n" + user_profile_context.strip() + + memory_section = "" + if (memory_evidence_text or "").strip(): + memory_section = ( + "\n## Reference memory snippets (for continuity only — do NOT write them as the user's first-person experience this turn)\n" + + memory_evidence_text.strip() + ) + + recent_q_section = "" + if recent_questions: + last = recent_questions[-4:] + recent_q_section = ( + "\n## Recently asked questions (do NOT repeat these; offer a new angle)\n" + + "\n".join(f"- {q}" for q in last) + ) + + directive_block = (turn_directive_block or "").strip() + directive_section = ( + f"\n## This turn's plan\n{directive_block}\n" if directive_block else "" + ) + + return f"""{directive_section}You are "{AGENT_NAME_EN}," a warm host-style friend helping the user record a memoir. Reply in conversational English. + +## Stage context +Currently tracking life stage: **{stage_name}**.{detected_line} +{suggested_block} + +## Already gathered for this stage +{filled_block}{profile_section}{memory_section}{recent_q_section} + +## Behaviour +- Pick up the **specific** detail the user just said (one tangible noun or short phrase) and gently push one step deeper before asking your next question. +- Prefer ONE clear, specific question per reply. Open-ended over forced A/B options. +- If the user is in the middle of a story, follow that thread; do not switch topics for the sake of coverage. +- If you previously asked about something and the user already answered, do not re-ask. +- Stay short and precise. One acknowledgement sentence + one question is the default shape. + +## Strict rules +- {chat_output_rules_en()} + +## Format +- Use `[SPLIT]` to split into at most two short bubbles when natural. + +Reply in English only. Do not output Markdown headings.""" + + def get_guided_conversation_prompt( current_stage: str, empty_slots: List[str], @@ -234,8 +422,20 @@ def get_guided_conversation_prompt( persona_threads: list[PersonaThread] | None = None, recent_questions: list[str] | None = None, turn_directive_block: str = "", + language: str = "zh", ) -> str: """生成状态感知的对话提示词;用户原话仅以 HumanMessage 传入,不写入本 system 文本。""" + if language == "en": + return _get_guided_conversation_prompt_en( + current_stage=current_stage, + empty_slots=empty_slots, + filled_slots=filled_slots, + detected_user_stage=detected_user_stage, + user_profile_context=user_profile_context, + memory_evidence_text=memory_evidence_text, + recent_questions=recent_questions, + turn_directive_block=turn_directive_block, + ) persona_key = normalize_interview_persona(persona) persona_tone = get_interview_persona_tone_hint(persona_key) voice_tone = get_background_voice_tone_hint(background_voice) @@ -307,6 +507,8 @@ def get_guided_conversation_prompt( __all__ = [ "SLOT_NAME_MAP", + "SLOT_NAME_MAP_EN", + "slot_name_map_for", "get_guided_conversation_prompt", "get_opening_prompt", ] diff --git a/api/app/agents/chat/prompts_profile.py b/api/app/agents/chat/prompts_profile.py index b175938..aae4c99 100644 --- a/api/app/agents/chat/prompts_profile.py +++ b/api/app/agents/chat/prompts_profile.py @@ -4,7 +4,13 @@ from typing import Dict, List, Optional -from app.agents.chat.output_rules import chat_output_rules, chat_voice_style +from app.agents.chat.output_rules import ( + chat_output_rules, + chat_output_rules_en, + chat_voice_style, + chat_voice_style_en, +) +from app.agents.chat.personas import AGENT_NAME_EN, AGENT_NAME_ZH PROFILE_FIELD_NAMES = { "birth_year": "出生年份", @@ -13,16 +19,69 @@ PROFILE_FIELD_NAMES = { "occupation": "职业", } +PROFILE_FIELD_NAMES_EN = { + "birth_year": "year of birth", + "birth_place": "birthplace", + "grew_up_place": "where you grew up", + "occupation": "occupation", +} -def get_profile_greeting_prompt(missing_fields: List[str], nickname: str = "") -> str: + +def _profile_field_names_for(language: str) -> Dict[str, str]: + return PROFILE_FIELD_NAMES_EN if language == "en" else PROFILE_FIELD_NAMES + + +def _get_profile_greeting_prompt_en( + missing_fields: List[str], nickname: str = "" +) -> str: + missing_names = [ + PROFILE_FIELD_NAMES_EN[f] + for f in missing_fields + if f in PROFILE_FIELD_NAMES_EN + ] + missing_str = ", ".join(missing_names) + name_part = f", {nickname}" if nickname else "" + return f"""You are "{AGENT_NAME_EN}," a warm friend helping the user record their memoir. You are meeting the user for the first time{name_part}. + +{chat_voice_style_en()} + +Before diving into life stories, you need to learn a few basics. Still missing: {missing_str}. + +## Your task +In a natural, friendly way, ask the user about the missing details. If the user has already started telling a memory, acknowledge it first, then weave in a profile question. + +## Rules +1. Do not ask everything at once — ask 1–2 things per turn. +2. Do not re-ask facts the user already mentioned. +3. Use casual, warm phrasing; vary your wording instead of fixed templates. +4. Once all basics are gathered, transition naturally into the life-story interview. + +## Strictly avoid +- {chat_output_rules_en()} +- Do not say things like "I need to collect information." +- Do not list all the questions at once. + +## Format +- Use `[SPLIT]` to break a long reply into at most two short messages. + +Output exactly what you would say:""" + + +def get_profile_greeting_prompt( + missing_fields: List[str], + nickname: str = "", + language: str = "zh", +) -> str: """生成初次见面、收集基础资料的引导提示词""" + if language == "en": + return _get_profile_greeting_prompt_en(missing_fields, nickname) missing_names = [ PROFILE_FIELD_NAMES[f] for f in missing_fields if f in PROFILE_FIELD_NAMES ] missing_str = "、".join(missing_names) name_part = f",{nickname}" if nickname else "" - return f"""你是「岁月知己」,像最懂我的老朋友。你正在和用户初次见面{name_part}。 + return f"""你是「{AGENT_NAME_ZH}」,像最懂我的老朋友。你正在和用户初次见面{name_part}。 {chat_voice_style()} @@ -48,12 +107,50 @@ def get_profile_greeting_prompt(missing_fields: List[str], nickname: str = "") - 直接输出你要说的话:""" -def get_profile_extraction_prompt( +def _get_profile_extraction_prompt_en( user_message: str, missing_fields: List[str], recent_dialogue: Optional[str] = None, +) -> str: + missing_names = { + f: PROFILE_FIELD_NAMES_EN[f] + for f in missing_fields + if f in PROFILE_FIELD_NAMES_EN + } + dialogue_section = "" + if recent_dialogue and recent_dialogue.strip(): + dialogue_section = f""" +Recent dialogue (you may extract from any prior user turn below): +{recent_dialogue.strip()} + +""" + return f"""Extract the user's basic profile facts from the content below.{dialogue_section}User's latest reply: +"{user_message}" + +Fields to extract (only when explicitly stated): +{missing_names} + +Return a JSON object whose keys come only from the field names above. `birth_year` is a four-digit integer; the others are strings. Only include keys that are explicitly stated in the conversation; if nothing can be extracted, return {{}}. + +Rules: +1. `birth_year` must be a four-digit integer (e.g. "born in '65" → 1965). +2. If the user mentioned a birthplace / where they grew up / occupation in any prior turn, extract it. +3. Only extract what is explicitly stated; do not guess. +4. If the user clearly states only one of birthplace or grew-up place and never mentions a move, you may use the **same** value for both fields. +5. If no information can be extracted, return the empty object {{}}.""" + + +def get_profile_extraction_prompt( + user_message: str, + missing_fields: List[str], + recent_dialogue: Optional[str] = None, + language: str = "zh", ) -> str: """从用户回答中提取基础资料信息(可包含最近几轮对话,避免漏提)""" + if language == "en": + return _get_profile_extraction_prompt_en( + user_message, missing_fields, recent_dialogue=recent_dialogue + ) missing_names = { f: PROFILE_FIELD_NAMES[f] for f in missing_fields if f in PROFILE_FIELD_NAMES } @@ -81,13 +178,85 @@ def get_profile_extraction_prompt( 5. 如果没有提取到任何信息,返回空对象 {{}}""" -def get_profile_followup_prompt( +def _get_profile_followup_prompt_en( missing_fields: List[str], filled_fields: Dict[str, str], nickname: str = "", interview_stage_hint: str = "", +) -> str: + missing_names = [ + PROFILE_FIELD_NAMES_EN[f] + for f in missing_fields + if f in PROFILE_FIELD_NAMES_EN + ] + missing_str = ", ".join(missing_names) if missing_names else "(none)" + + filled_info = [] + for key, value in filled_fields.items(): + name = PROFILE_FIELD_NAMES_EN.get(key, key) + filled_info.append(f"{name}: {value}") + filled_str = "\n".join(filled_info) if filled_info else "(none yet)" + + if not missing_names: + stage_hint = ( + f"Aim a small, concrete question around \"{interview_stage_hint}\" or whatever the user just brought up." + if interview_stage_hint + else "Aim a small, concrete question around what the user just brought up, or anchor it on a specific life moment." + ) + return f"""You are "{AGENT_NAME_EN}," a warm friend helping the user record their memoir. Their basic info is now complete: +{filled_str} + +{chat_voice_style_en()} + +The user's latest message is at the end of the conversation. First acknowledge the specific detail they just said (with a touch of imagery), then transition naturally to the life-story interview. +Improvise the bridge sentence; do not use canned phrasing. {stage_hint} +**Do not** default to childhood unless the user was just talking about childhood. + +Format: separate multiple bubbles with `[SPLIT]`. +Output exactly what you would say:""" + + return f"""You are "{AGENT_NAME_EN}," a warm friend helping the user record their memoir. You're chatting with the user while quietly learning a few basic facts. + +{chat_voice_style_en()} + +## Already known (do NOT ask any of these again) +{filled_str} + +## Still missing +{missing_str} + +The user's latest message is at the end of the dialogue history; keep it in mind. + +## How to reply +1. **Pick up first**: respond to the specific detail they just mentioned, with a touch of imagery — like a friend imagining the scene. Avoid generic "that sounds nice." +2. **Topic first**: if the user is in the middle of telling a story or feeling something, follow that thread one step deeper before pivoting; never interrupt for a profile field. +3. **Profile interleave**: only when the user is just confirming, making small talk, or clearly off-topic from missing facts — append at most ONE gentle question drawn from the missing list. +4. **Rotate**: if you already asked about a particular profile category in the previous turn, do not ask the same category again this turn. +5. At most 1–2 profile-related questions per reply. + +Strictly avoid: +- **Never** re-ask anything in "Already known." +- {chat_output_rules_en()} + +Format: separate multiple bubbles with `[SPLIT]`. +Output exactly what you would say:""" + + +def get_profile_followup_prompt( + missing_fields: List[str], + filled_fields: Dict[str, str], + nickname: str = "", + interview_stage_hint: str = "", + language: str = "zh", ) -> str: """在收集资料过程中的跟进提问""" + if language == "en": + return _get_profile_followup_prompt_en( + missing_fields, + filled_fields, + nickname=nickname, + interview_stage_hint=interview_stage_hint, + ) missing_names = [ PROFILE_FIELD_NAMES[f] for f in missing_fields if f in PROFILE_FIELD_NAMES ] @@ -105,7 +274,7 @@ def get_profile_followup_prompt( if interview_stage_hint else "问一个与**用户刚才关注点**或人生故事相关的**具体、好回答**的问题作为开场。" ) - return f"""你是「岁月知己」,像最懂我的老朋友。用户的基本信息已经收集完毕: + return f"""你是「{AGENT_NAME_ZH}」,像最懂我的老朋友。用户的基本信息已经收集完毕: {filled_str} {chat_voice_style()} @@ -117,7 +286,7 @@ def get_profile_followup_prompt( 回复格式:多条消息用 [SPLIT] 分隔。 直接输出你要说的话:""" - return f"""你是「岁月知己」,像最懂我的老朋友。你正在和用户聊天,同时自然地了解一些基本信息。 + return f"""你是「{AGENT_NAME_ZH}」,像最懂我的老朋友。你正在和用户聊天,同时自然地了解一些基本信息。 {chat_voice_style()} @@ -149,9 +318,20 @@ def format_user_profile_context( birth_place: Optional[str] = None, grew_up_place: Optional[str] = None, occupation: Optional[str] = None, + language: str = "zh", ) -> str: """将用户基础信息格式化为上下文字符串,供其他 agent 使用""" parts = [] + if language == "en": + if birth_year: + parts.append(f"Year of birth: {birth_year}") + if birth_place: + parts.append(f"Birthplace: {birth_place}") + if grew_up_place: + parts.append(f"Where they grew up: {grew_up_place}") + if occupation: + parts.append(f"Occupation: {occupation}") + return "\n".join(parts) if parts else "" if birth_year: parts.append(f"出生年份:{birth_year}年") if birth_place: diff --git a/api/app/agents/chat/reply_limits.py b/api/app/agents/chat/reply_limits.py index 5d8761a..8f011bf 100644 --- a/api/app/agents/chat/reply_limits.py +++ b/api/app/agents/chat/reply_limits.py @@ -4,11 +4,35 @@ from __future__ import annotations import re +# 零宽字符:LLM 偶尔会在 [SPLIT] 周围注入 ZWSP/ZWNJ/ZWJ/BOM,需在拆段前去掉 +_ZERO_WIDTH_RE = re.compile(r"[\u200B-\u200D\uFEFF]") + +# 与客户端 `message-split.ts` 对齐:宽松正则匹配 [SPLIT] / [ SPLIT ] / [split] 等 +# 全角中括号 【】 / [] 先在 _normalize_split_markers 里折成 ASCII 再走该正则 +SPLIT_MARKER_RE = re.compile(r"\[\s*SPLIT\s*\]", re.IGNORECASE) + + +def _normalize_split_markers(text: str) -> str: + """归一化 [SPLIT] 周围常见变体,确保后端拆段与前端 `MESSAGE_SPLIT_REGEX` 等价。 + + 覆盖: + - 零宽空格 / ZWNJ / ZWJ / BOM + - 全角方括号 【】 / [] 折叠为 ASCII [] + 后续仍用 ``SPLIT_MARKER_RE`` 一次性匹配(含大小写、内部空白)。 + """ + if not text: + return text + s = _ZERO_WIDTH_RE.sub("", text) + s = s.replace("\uff3b", "[").replace("\uff3d", "]") + s = s.replace("\u3010", "[").replace("\u3011", "]") + return s + def strip_markdown_for_chat(text: str) -> str: """ 将模型偶然输出的常见 Markdown 剥成纯文本,供 App 聊天气泡展示。 - 保留换行与字面量 [SPLIT];不做完整 MD 解析,以简单可预测为主。 + 保留换行与字面量 [SPLIT](实际拆段由 `segments_from_llm_response` 用宽松正则完成, + 支持 `[ SPLIT ]`、`[split]`、`【SPLIT】` 等变体)。不做完整 MD 解析,以简单可预测为主。 """ if not text: return text @@ -82,21 +106,24 @@ def segments_from_llm_response( min_paragraph_chars: int = 12, ) -> list[str]: """ - 优先按字面 [SPLIT] 拆段;若模型只输出一段、但用空行写了多段,再按段落拆。 - 解决「两段话 + 换行」却未写 [SPLIT] 时仍要拆气泡 / 多段 TTS 的情况。 + 优先按 [SPLIT] 标记拆段(容错:大小写、内部空白、全角中括号、零宽字符均视作分隔符); + 若模型只输出一段、但用空行写了多段,再按段落拆。 + 解决「两段话 + 换行」却未写 [SPLIT] 时仍要拆气泡 / 多段 TTS 的情况, + 并避免后端 literal split 与前端容错正则不一致时把字面 `[ SPLIT ]` 留在文本里。 """ text = strip_markdown_for_chat((response_text or "").strip()) text = strip_parenthetical_asides_for_chat(text) if not text: return [] + normalized = _normalize_split_markers(text) primary = [ strip_leading_en_period_ack_for_chat(p) - for p in text.split("[SPLIT]") + for p in SPLIT_MARKER_RE.split(normalized) if strip_leading_en_period_ack_for_chat(p).strip() ] if len(primary) > 1: return primary[:max_segments] - blob = primary[0] if primary else strip_leading_en_period_ack_for_chat(text) + blob = primary[0] if primary else strip_leading_en_period_ack_for_chat(normalized) blob = strip_leading_en_period_ack_for_chat(blob) if "\n" not in blob: return [blob] diff --git a/api/app/agents/chat/stage_detection.py b/api/app/agents/chat/stage_detection.py index a3bf27c..94b8d27 100644 --- a/api/app/agents/chat/stage_detection.py +++ b/api/app/agents/chat/stage_detection.py @@ -83,8 +83,12 @@ async def detect_primary_life_stage( return normalize_chat_stage(result.detected_stage, fb) -def life_stage_display_name(stage: str) -> str: - """供提示词展示的中文名。""" +def life_stage_display_name(stage: str, language: str = "zh") -> str: + """供提示词展示的本地化名称(默认中文)。""" + if language == "en": + from app.agents.stage_constants import stage_display_name + + return stage_display_name(stage, language="en") return life_stage_display_zh(stage) diff --git a/api/app/agents/chat/stage_prompts.py b/api/app/agents/chat/stage_prompts.py index 0c1126d..1d86c9e 100644 --- a/api/app/agents/chat/stage_prompts.py +++ b/api/app/agents/chat/stage_prompts.py @@ -2,7 +2,12 @@ 访谈「人生阶段」判定专用短提示词(与回忆录五阶段 slots 一致)。 """ -from app.agents.stage_constants import CHAT_STAGES, STAGE_DISPLAY_ZH, VALID_CHAT_STAGES +from app.agents.stage_constants import ( + CHAT_STAGES, + STAGE_DISPLAY_ZH, + VALID_CHAT_STAGES, + stage_display_name, +) VALID_CHAT_LIFE_STAGES = VALID_CHAT_STAGES @@ -11,6 +16,11 @@ def life_stage_display_zh(stage: str) -> str: return STAGE_DISPLAY_ZH.get(stage, stage) +def life_stage_display(stage: str, language: str = "zh") -> str: + """Localized life-stage display name (delegates to stage_constants helper).""" + return stage_display_name(stage, language=language) + + def get_chat_stage_detection_prompt(user_message: str, current_stage: str) -> str: """ 仅判定用户本轮**主要**在谈哪一人生阶段;输出 JSON。 diff --git a/api/app/agents/memoir/batch_phase1_prep.py b/api/app/agents/memoir/batch_phase1_prep.py index 829ceca..6849673 100644 --- a/api/app/agents/memoir/batch_phase1_prep.py +++ b/api/app/agents/memoir/batch_phase1_prep.py @@ -48,6 +48,8 @@ def run_batch_phase1_prep( segments: List[Segment], state: MemoirStateSchema, llm: Any, + *, + language: str = "zh", ) -> Dict[str, BatchPhase1SegmentRow]: """对 segments 顺序批量调用 LLM;返回 id → 行。id 集合必须与入参完全一致。""" if not llm: @@ -59,6 +61,7 @@ def run_batch_phase1_prep( system_current_stage=state.current_stage or "childhood", slots_snapshot=_slots_snapshot(state), segment_items=items, + language=language, ) try: parsed = llm_json_call( @@ -108,18 +111,24 @@ def _run_batch_phase1_prep_chunk_with_bisect( segments: List[Segment], state: MemoirStateSchema, llm: Any, + *, + language: str = "zh", ) -> Dict[str, BatchPhase1SegmentRow]: """单块 LLM;失败时(如输出截断)将块二等分重试直至单段。""" try: - return run_batch_phase1_prep(segments, state, llm) + return run_batch_phase1_prep(segments, state, llm, language=language) except ValueError: if len(segments) <= 1: raise mid = len(segments) // 2 if mid < 1: raise - left = _run_batch_phase1_prep_chunk_with_bisect(segments[:mid], state, llm) - right = _run_batch_phase1_prep_chunk_with_bisect(segments[mid:], state, llm) + left = _run_batch_phase1_prep_chunk_with_bisect( + segments[:mid], state, llm, language=language + ) + right = _run_batch_phase1_prep_chunk_with_bisect( + segments[mid:], state, llm, language=language + ) merged = {**left, **right} expected = {str(s.id) for s in segments} if merged.keys() != expected: @@ -136,6 +145,7 @@ def run_batch_phase1_prep_chunked( *, chunk_size: int, on_chunk: Callable[[int, int], None] | None = None, + language: str = "zh", ) -> Dict[str, BatchPhase1SegmentRow]: """ 将 segments 按 chunk_size 切片多次调用 Phase1 批处理 LLM,合并 by_id。 @@ -158,7 +168,9 @@ def run_batch_phase1_prep_chunked( total_chunks, len(sub), ) - part = _run_batch_phase1_prep_chunk_with_bisect(sub, state, llm) + part = _run_batch_phase1_prep_chunk_with_bisect( + sub, state, llm, language=language + ) merged.update(part) if on_chunk is not None: on_chunk(chunk_idx, total_chunks) diff --git a/api/app/agents/memoir/classification_agent.py b/api/app/agents/memoir/classification_agent.py index e0245d9..e7b6997 100644 --- a/api/app/agents/memoir/classification_agent.py +++ b/api/app/agents/memoir/classification_agent.py @@ -116,6 +116,7 @@ class ClassificationAgent: llm: Any, *, segment_id: str | None = None, + language: str = "zh", ) -> ChapterClassifyResult: """ 分类到 8 个章节类别之一。 @@ -138,7 +139,7 @@ class ClassificationAgent: if llm: try: - prompt = get_chapter_classification_json_prompt(text) + prompt = get_chapter_classification_json_prompt(text, language=language) out = llm_json_call( llm, prompt, diff --git a/api/app/agents/memoir/extraction_agent.py b/api/app/agents/memoir/extraction_agent.py index 956369e..e3a4b2d 100644 --- a/api/app/agents/memoir/extraction_agent.py +++ b/api/app/agents/memoir/extraction_agent.py @@ -35,6 +35,8 @@ class ExtractionAgent: current_stage: str, stage_slots: Dict[str, Any], llm: Any, + *, + language: str = "zh", ) -> ExtractionResult: """ 提取结构化信息并判断阶段。 @@ -56,6 +58,7 @@ class ExtractionAgent: k: v.model_dump() if hasattr(v, "model_dump") else v for k, v in (stage_slots or {}).items() }, + language=language, ) parsed = llm_json_call( llm, diff --git a/api/app/agents/memoir/narrative_agent.py b/api/app/agents/memoir/narrative_agent.py index 04dba01..66f86f2 100644 --- a/api/app/agents/memoir/narrative_agent.py +++ b/api/app/agents/memoir/narrative_agent.py @@ -13,7 +13,7 @@ from app.agents.memoir.prompts import ( get_narrative_merge_json_prompt, ) from app.agents.memoir.schemas import MemoirTitleOutput -from app.agents.stage_constants import CHAPTER_CATEGORIES +from app.agents.stage_constants import CHAPTER_CATEGORIES, chapter_category_display from app.core.config import settings from app.core.langchain_llm import invoke_json_object from app.core.llm_call import llm_json_call @@ -22,6 +22,13 @@ from app.core.logging import get_logger logger = get_logger(__name__) +def _default_title_for(stage: str, language: str) -> str: + if language == "en": + cat = chapter_category_display(stage, language="en") or stage + return f"{cat} Memory" + return f"{CHAPTER_CATEGORIES.get(stage, stage)} 回忆" + + class NarrativeAgent: """生成章节标题和叙事正文""" @@ -33,10 +40,11 @@ class NarrativeAgent: user_profile: str = "", birth_year: Optional[int] = None, llm: Any = None, + language: str = "zh", ) -> str: """生成创意标题。若无 LLM 则返回默认标题""" if not llm: - return f"{CHAPTER_CATEGORIES.get(stage, stage)} 回忆" + return _default_title_for(stage, language) try: prompt = get_creative_title_json_prompt( stage=stage, @@ -44,8 +52,9 @@ class NarrativeAgent: slots=slots, user_profile=user_profile, birth_year=birth_year, + language=language, ) - default_title = f"{CHAPTER_CATEGORIES.get(stage, stage)} 回忆" + default_title = _default_title_for(stage, language) def _title_fallback() -> MemoirTitleOutput: return MemoirTitleOutput(title=default_title) @@ -64,7 +73,7 @@ class NarrativeAgent: return default_title except Exception as e: logger.warning("NarrativeAgent 生成标题失败: {}", e) - return f"{CHAPTER_CATEGORIES.get(stage, stage)} 回忆" + return _default_title_for(stage, language) def generate_narrative( self, @@ -79,6 +88,7 @@ class NarrativeAgent: occupation: str = "", *, fallback_plain_oral: str = "", + language: str = "zh", ) -> str: """将新对话改写为叙述。若无 LLM 则直接拼接。 @@ -106,6 +116,7 @@ class NarrativeAgent: birth_year=birth_year, background_voice=background_voice, occupation=occupation, + language=language, ) max_tokens = int(settings.memoir_narrative_merge_max_tokens) agent_name = "NarrativeAgent.generate_narrative_merge" @@ -119,6 +130,7 @@ class NarrativeAgent: birth_year=birth_year, background_voice=background_voice, occupation=occupation, + language=language, ) max_tokens = int(settings.memoir_narrative_max_tokens) agent_name = "NarrativeAgent.generate_narrative" diff --git a/api/app/agents/memoir/orchestrator.py b/api/app/agents/memoir/orchestrator.py index 56d7091..67ac74e 100644 --- a/api/app/agents/memoir/orchestrator.py +++ b/api/app/agents/memoir/orchestrator.py @@ -72,6 +72,7 @@ class MemoirOrchestrator: update_slot: Callable[[str, str, str, List[str]], MemoirStateSchema], llm_fast: Any | None = None, on_phase1_chunk: Optional[Callable[[int, int], None]] = None, + language: str = "zh", ) -> PreparedMemoirBatches: """ 遍历 segments:Extraction → slot 更新 → Classification → 按 category 分桶。 @@ -99,6 +100,7 @@ class MemoirOrchestrator: classify_extract_llm=classify_extract_llm, update_slot=update_slot, on_phase1_chunk=on_phase1_chunk, + language=language, ) logger.info( "event=phase1_batch_path_used segment_count={} " @@ -132,6 +134,7 @@ class MemoirOrchestrator: current_stage=state.current_stage or "childhood", stage_slots=stage_slots_raw, llm=classify_extract_llm, + language=language, ) fb = state.current_stage or "childhood" detected_stage = normalize_chat_stage(result.detected_stage, fb) @@ -151,6 +154,7 @@ class MemoirOrchestrator: fallback_stage=detected_stage, llm=classify_extract_llm, segment_id=segment.id, + language=language, ) chapter_category = classify_result.category if (not result_slots) and classify_result.llm_said_none: @@ -190,6 +194,7 @@ class MemoirOrchestrator: classify_extract_llm: Any, update_slot: Callable[[str, str, str, List[str]], MemoirStateSchema], on_phase1_chunk: Optional[Callable[[int, int], None]] = None, + language: str = "zh", ) -> PreparedMemoirBatches: category_to_segments: Dict[str, List[Segment]] = {} segment_skip_story_ids: Set[str] = set() @@ -201,6 +206,7 @@ class MemoirOrchestrator: classify_extract_llm, chunk_size=int(settings.memoir_phase1_batch_llm_chunk_size), on_chunk=on_phase1_chunk, + language=language, ) for segment in segments: diff --git a/api/app/agents/memoir/prompts.py b/api/app/agents/memoir/prompts.py index e636d21..14a11e0 100644 --- a/api/app/agents/memoir/prompts.py +++ b/api/app/agents/memoir/prompts.py @@ -13,6 +13,30 @@ from app.agents.stage_constants import STAGE_ERA_HINTS, STAGE_SLOT_KEYS from app.agents.style_profiles import MemoirStyleProfile +def _memoir_fidelity_core_rules_en() -> str: + """English-lite version of the fact-boundary rules 1–4.""" + return """## Fact boundary (must follow; takes precedence over style) +1. **The body may only expand on the content in the "User's oral memory this turn" block.** If the input includes a "Reference memory snippets" block, you must not write its specifics as the user's first-hand experience this turn; at most use one short bridging sentence, and never introduce names, places, dates, dialogue, or numbers that appear only in the snippets. +2. **No fabrication.** Do not add people, dialogue, places, dates, events, causes, or numbers the user did not state. Do not invent inner monologue or "typical era" filler. If the user did not state an outcome (selected, accepted, rejected, etc.), do not write a definite conclusion. Prefer neutral, partial wording when uncertain. +3. **Do not pad for length.** Short input → short output. Paragraph count and length follow the material. +4. Allowed: removing fillers and small talk, reordering for clarity, merging redundant references, lifting spoken language to written prose. Do not invent details to "make the writing nicer." + +## Encouraged operations (not fabrication) +- Lift colloquial speech to clean written English: trim filler, smooth syntax, choose more precise verbs. +- Add short bridging sentences ("Looking back," "In those days") as long as they introduce no new entities. +- Render emotions already stated in the oral memory in slightly more literary phrasing (the user said "it was hard," you may write "it weighed on me") — provided you add no new scenes, numbers, or actions. +- Merge synonymous repeated statements for tighter narration. +- Correct obvious speech-to-text typos. +- **Era / cultural texture (only with anchored facts)**: when the oral memory or profile fields make the year, region, or environment clear, you may use period-appropriate vocabulary and ambient texture as a touch — but you may not invent specific people, events, dialogue, or scenes.""" + + +def _memoir_fidelity_user_profile_rules_en() -> str: + return """## User profile and stage information +- The "About the user" / "Time reference" blocks may only be used for items that are explicitly listed. +- **Cultural / era texture (encouraged when anchored)**: when this turn's oral memory clearly belongs to the same era or place that profile facts describe, you may weave the era and place into the prose as **language and atmosphere** (forms of address, regional expressions, period feel). You still may not turn profile facts alone into a specific event the user did not narrate this turn. +- Do not put concrete biographical details from the profile into the body unless the user actually mentioned them this turn.""" + + def _memoir_fidelity_core_rules() -> str: """事实边界 1–4 条(与文体第 5 条拆分,供 story 叙事与标题等复用)。""" return """## 事实边界(必须遵守,优先于文采) @@ -37,8 +61,15 @@ def _memoir_fidelity_user_profile_rules() -> str: - 档案中的具体经历细节不得写入正文,除非用户在本段口述里已提及或明确关联。""" -def get_memoir_fidelity_system_prompt() -> str: +def get_memoir_fidelity_system_prompt(language: str = "zh") -> str: """叙事/标题生成专用:准确性优先,禁止编造事实。""" + if language == "en": + return f"""You are a memoir editor. Your task is to lift the user's oral memory into first-person written prose. + +{_memoir_fidelity_core_rules_en()} +5. **Plain narrative tone.** Keep description and metaphor restrained; clear chronicle, not lyrical essay. + +{_memoir_fidelity_user_profile_rules_en()}""" return f"""你是回忆录编辑助手,任务是把用户口述整理为第一人称书面叙述。 {_memoir_fidelity_core_rules()} @@ -47,8 +78,15 @@ def get_memoir_fidelity_system_prompt() -> str: {_memoir_fidelity_user_profile_rules()}""" -def get_memoir_fidelity_facts_only_prompt() -> str: +def get_memoir_fidelity_facts_only_prompt(language: str = "zh") -> str: """与 `get_memoir_fidelity_system_prompt` 相同的事实 1–4 条,第 5 条改为允许传记作家式文采(仍禁止编造)。""" + if language == "en": + return f"""You are a memoir editor. Your task is to lift the user's oral memory into first-person written prose. + +{_memoir_fidelity_core_rules_en()} +5. **Style**: while obeying rules 1–4, write in a **first-person, lightly literary memoir voice** (scenes and emotion follow the material, never list-like reporting). Polish the speech into **graceful, flowing, readable** prose; where the oral memory or profile already anchors an era or region, you may let period vocabulary and atmosphere season the writing. You may organize the structure (paragraph splits within a single oral block, transitions, callbacks to people/things already named) **without introducing new facts**. Style serves truth; never use invented imagery to fill in missing facts. + +{_memoir_fidelity_user_profile_rules_en()}""" return f"""你是回忆录编辑助手,任务是把用户口述整理为第一人称书面叙述。 {_memoir_fidelity_core_rules()} @@ -57,20 +95,25 @@ def get_memoir_fidelity_facts_only_prompt() -> str: {_memoir_fidelity_user_profile_rules()}""" -def _memoir_editor_narrative_style_block() -> str: +def _memoir_editor_narrative_style_block(language: str = "zh") -> str: """传记作家改写要点:委托到独立的 `MemoirStyleProfile`,与 chat 风格隔离。""" - return MemoirStyleProfile().render_narrative_style_block() + return MemoirStyleProfile().render_narrative_style_block(language=language) def get_narrative_editor_system_prompt( - background_voice: str = "default", occupation: str = "" + background_voice: str = "default", + occupation: str = "", + language: str = "zh", ) -> str: """故事/章节叙事:传记作家式书面语 + 事实边界(chapter 直接展示 story 时使用)。""" + base = f"""{get_memoir_fidelity_facts_only_prompt(language=language)} + +{_memoir_editor_narrative_style_block(language=language)}""" + if language == "en": + # Skip occupation/background-voice Chinese-only addendums for English path. + return base occ_hint = get_occupation_narrative_hint(occupation, background_voice) tail = get_background_voice_narrative_block(background_voice) - base = f"""{get_memoir_fidelity_facts_only_prompt()} - -{_memoir_editor_narrative_style_block()}""" if occ_hint: base = f"{base}\n\n{occ_hint}" if not tail: @@ -78,14 +121,34 @@ def get_narrative_editor_system_prompt( return f"{base}\n\n{tail}" -def _short_classification_edit_prefix() -> str: +def _short_classification_edit_prefix(language: str = "zh") -> str: """章节分类专用短系统前缀。""" + if language == "en": + return """You are a memoir editor. Ignore filler and small talk; classify only by **substantive life-experience content**. +Keep: events, relationships, places and times, emotions and beliefs. Filter out: pure greetings, AI-interaction, unrelated chit-chat.""" return """你是回忆录编辑。先忽略语气词与寒暄,只根据**与人生经历有关的实质内容**判断归类。 保留:事件、人物关系、地点时间、情感与信念。过滤:纯寒暄、与 AI 的交互、无关闲聊。""" -def get_chapter_classification_json_prompt(segments_text: str) -> str: +def get_chapter_classification_json_prompt( + segments_text: str, language: str = "zh" +) -> str: """章节分类:JSON 输出(与 invoke_json_object 配合)。""" + if language == "en": + return f"""{_short_classification_edit_prefix("en")} + +## Chapter keys +childhood, education, career_early, career_achievement, career_challenge, family, beliefs, summary; if not enough to form a story → **none**. + +If, after stripping greetings, only profile-style point facts remain with no narrative spine (no event / scene / process / interaction / emotion arc) → **none**; a short but vivid micro-story belongs in the closest category. + +Dialogue content: +{segments_text} + +Output shape (only this object): +{{"category": "childhood|education|career_early|career_achievement|career_challenge|family|beliefs|summary|none"}} + +If you return **none**, the server will map this batch to the **summary** chapter and still write it into the memoir body (it is not dropped).""" return f"""{_short_classification_edit_prefix()} ## 章节 key(英文) @@ -103,12 +166,48 @@ childhood, education, career_early, career_achievement, career_challenge, family def get_state_extraction_prompt( - user_message: str, current_stage: str, stage_slots: dict + user_message: str, + current_stage: str, + stage_slots: dict, + language: str = "zh", ) -> str: """抽取结构化信息并判断阶段""" slot_keys = list(stage_slots.keys()) all_stage_slots = {k: list(v) for k, v in STAGE_SLOT_KEYS.items()} + if language == "en": + return f"""You are a memoir interview information extractor. From the user's utterance, extract structured information and decide which life stage they are actually talking about. +Only extract snippets that are clearly supported by the oral memory; do not fabricate or guess. + +You should first distill the **substantive life-experience content** from the user's words, then extract structured slots (only when there is clear evidence in the oral memory). + +System currently tracking stage: {current_stage} +Allowed slots for this stage: {slot_keys} + +All stages and their slots: +{json.dumps(all_stage_slots, ensure_ascii=False, indent=2)} + +User utterance: +{user_message} + +Return JSON only, in this shape: +{{ + "detected_stage": "childhood|education|career|family|belief", + "slots": {{ + "slot_key": "snippet" + }}, + "emotion": "neutral|warm|low|highlight", + "is_new_chapter": true +}} + +Requirements: +1. **First strip filler, AI-interaction commands, greetings, and small talk** — focus only on real life-experience content. +2. **Only when slots is non-empty**, detected_stage must reflect what the user actually talked about; the user may discuss a different stage than the system is tracking. +3. The keys in `slots` must belong to the slot list of `detected_stage`. +4. Only fill slots with substantive, life-experience content the user actually mentioned. +5. **Snippets are distilled cores** — strip filler, keep within ~50 characters where possible. +6. If the utterance has no real life-experience content (pure small talk, meta-instructions like "organize my memories", commands, fillers), `slots` must be the empty object and `detected_stage` must equal the system's current stage.""" + return f"""你是回忆录访谈信息抽取助手。从用户话语中提取结构化信息,判断用户实际在谈论哪个人生阶段。 只提取口述中确有依据的片段,不得编造或推测。 @@ -148,11 +247,51 @@ def get_batch_memoir_phase1_prep_prompt( system_current_stage: str, slots_snapshot: dict, segment_items: list[tuple[str, str]], + language: str = "zh", ) -> str: """ Phase1 批处理:多段口述一次 JSON 输出「抽取 + 章节分类」。 segment_items: (segment_id, user_text),须按时间顺序。 """ + if language == "en": + lines_en: list[str] = [] + for sid, text in segment_items: + lines_en.append(f"- id={sid}\n text: {text}") + slot_lines_en = "\n".join( + f"- {st}: {', '.join(keys)}" for st, keys in STAGE_SLOT_KEYS.items() + ) + return f"""You are a memoir interview assistant. Below are several user oral memory segments (in time order). For **each segment**: +1) Extract information (slots, detected_stage) — same rules as single-segment extraction. +2) Classify the chapter (chapter_category) — same rules as single-segment classification. + +System currently tracking stage (chat stage key): {system_current_stage} +Slot summary already gathered (context only — do not invent details that did not appear): +{json.dumps(slots_snapshot, ensure_ascii=False, indent=2)} + +`detected_stage` allowed values: childhood | education | career | family | belief +The keys in `slots` must belong to the slot list for that stage: +{slot_lines_en} + +`chapter_category` allowed values: childhood | education | career_early | career_achievement | career_challenge | family | beliefs | summary | **none** +(Profile-only points or pure small talk → **none**, same as single-segment classification.) + +Per-segment task (the `segments` array MUST cover every id below in the same order): +{chr(10).join(lines_en)} + +Return JSON object only (no markdown), shaped: +{{ + "segments": [ + {{ + "id": "", + "detected_stage": "childhood|education|career|family|belief", + "slots": {{ "slot_key": "snippet within ~50 chars" }}, + "chapter_category": "childhood|education|career_early|career_achievement|career_challenge|family|beliefs|summary|none" + }} + ] +}} + +Same as single-segment extraction: **only when `slots` is non-empty** does `detected_stage` follow the content; if no life-experience content exists this segment, `slots` must be empty and `detected_stage` must equal the current system stage `{system_current_stage}`.""" + lines: list[str] = [] for sid, text in segment_items: lines.append(f"- id={sid}\n 文本:{text}") @@ -213,9 +352,35 @@ def get_creative_title_prompt( slots: dict, user_profile: str = "", birth_year: Optional[int] = None, + language: str = "zh", ) -> str: """生成故事标题:概括口述事实或主题,禁止纯意象编造。""" age_hint = _build_age_hint(stage, birth_year) + if language == "en": + profile_section_en = ( + f"\nAbout the user:\n{user_profile}" if user_profile else "" + ) + time_section_en = f"\nTime reference: {age_hint}" if age_hint else "" + return f"""{get_memoir_fidelity_facts_only_prompt(language="en")} + +Generate **one** memoir story title based on the stage, emotion, and available information below. + +Stage: {stage} +Emotion: {emotion} +Available information (oral slots and profile): {slots}{profile_section_en}{time_section_en} + +Requirements: +1. Format: "Time tag · Title body" (the time tag may use age, era, or stage; it must be consistent with the information above; do not invent years). +2. The title body should be **6–12 words**, concisely summarizing a theme or fact present in the oral memory or slots; literary phrasing is welcome but **invention is forbidden**. +3. Any **specific facts in the title** (job titles, unit names, battles, names, life-or-death outcomes) must have **literal evidence** in the oral excerpt or other slots; do not extrapolate from the stage name or age hint. +4. Be concise; memoir-flavored; neither flat nor florid. + +### Examples (facts come from slots/oral memory; the format is illustrative) +- Slots include childhood, river, heavy rain → `Around age 6 · Grandfather carrying me across the river in the rain` +- Slots include dorm, instant noodles, cafeteria → `Student years · Instant noodles when the cafeteria did not suit me` + +Output only the title line — no quotes, no brackets. +""" profile_section = f"\n用户基本信息:\n{user_profile}" if user_profile else "" time_section = f"\n时间参考:{age_hint}" if age_hint else "" @@ -247,6 +412,7 @@ def get_creative_title_json_prompt( slots: dict, user_profile: str = "", birth_year: Optional[int] = None, + language: str = "zh", ) -> str: """生成故事标题(JSON:`{"title":"..."}`),与 invoke_json_object 配合。""" base = get_creative_title_prompt( @@ -255,7 +421,14 @@ def get_creative_title_json_prompt( slots=slots, user_profile=user_profile, birth_year=birth_year, + language=language, ) + if language == "en": + return ( + base.rstrip() + + "\n\nExample output (only this JSON object):" + + '\n{"title":"Full title on one line (with time tag · body format)"}\n' + ) return ( base.rstrip() + "\n\n输出示例(仅此 JSON 对象):" @@ -272,6 +445,7 @@ def get_narrative_json_prompt( birth_year: Optional[int] = None, background_voice: str = "default", occupation: str = "", + language: str = "zh", ) -> str: """将新对话改写为叙述,输出 JSON 格式(paragraphs: [{content, image_description}])""" context_tail = "" @@ -279,13 +453,52 @@ def get_narrative_json_prompt( context_tail = ( existing_content[-300:] if len(existing_content) > 300 else existing_content ) + age_hint = _build_age_hint(stage, birth_year) + if language == "en": + context_section_en = ( + f"\n\n[Bridging context — tail of the existing story, for continuity only; do not repeat]:\n{context_tail}" + if context_tail + else "" + ) + profile_section_en = ( + f"\n\nAbout the user:\n{user_profile}" if user_profile else "" + ) + time_section_en = f"\nTime reference: {age_hint}" if age_hint else "" + return f"""{get_narrative_editor_system_prompt(background_voice=background_voice, occupation=occupation, language="en")} + +Rewrite the "User's oral memory this turn" block into first-person written prose and return **pure JSON** (no markdown fences). + +Stage: {stage} +Available information (slots): {slots}{profile_section_en}{time_section_en} + +Input material: +{new_content} +{context_section_en} + +## Requirements +1. **Format**: JSON only; first person; no `#`, `##`, no tables; `content` is body text only. +2. **Facts and material**: obey the fact boundary; do not fill in details that were not given. Expand only the "User's oral memory this turn"; if a reference-snippet block is included, do not write its specifics as the user's first-hand experience this turn; strip filler and small talk; do not repeat the full body of an existing story; stay within the same theme/event chain; paragraph count and length follow the material; do not pad for length. +3. **Do not infer outcomes**: when the user did not state a result (admitted, accepted, etc.), do not fill in a definite conclusion based on common sense. + +## Output schema (strict JSON) +{{ + "paragraphs": [ + {{"content": "paragraph body"}}, + ... + ] +}} + +- content: body text only. + +If nothing is worth recording: {{"paragraphs": []}} +""" + context_section = ( f"\n\n【衔接上下文(已有内容的末尾,仅供参考衔接,不要重复)】:\n{context_tail}" if context_tail else "" ) profile_section = f"\n\n用户基本信息:\n{user_profile}" if user_profile else "" - age_hint = _build_age_hint(stage, birth_year) time_section = f"\n时间参考:{age_hint}" if age_hint else "" return f"""{get_narrative_editor_system_prompt(background_voice=background_voice, occupation=occupation)} @@ -348,19 +561,59 @@ def get_narrative_merge_json_prompt( birth_year: Optional[int] = None, background_voice: str = "default", occupation: str = "", + language: str = "zh", ) -> str: """ 已有故事追加:将「已有全文(或节选)」与「本段口述」合并为**一篇**第一人称叙述, 按事件发生顺序组织段落,输出覆盖全篇的 JSON paragraphs。 """ clipped = clip_existing_story_body_for_merge(existing_content) + age_hint = _build_age_hint(stage, birth_year) + + if language == "en": + existing_section_en = ( + f"\n\n[Existing story body — keep all of its facts; reorder and bridge only; do not fabricate]:\n{clipped}" + if clipped + else "" + ) + profile_section_en = ( + f"\n\nAbout the user:\n{user_profile}" if user_profile else "" + ) + time_section_en = f"\nTime reference: {age_hint}" if age_hint else "" + return f"""{get_narrative_editor_system_prompt(background_voice=background_voice, occupation=occupation, language="en")} + +You are **expanding and reorganizing** an existing memoir story: you must keep every fact from the existing story in the output (you may merge redundant phrasing and adjust order), and weave in the new facts from "User's oral memory this turn"; order paragraphs by **chronological order of events** (earliest → latest); do not drop existing content unless the new memory contradicts it. + +Stage: {stage} +Available information (slots): {slots}{profile_section_en}{time_section_en} + +[User's oral memory this turn and reference — when an evidence-snippet block is present, follow the fact boundary]: +{new_content} +{existing_section_en} + +## Requirements +1. **Full body output**: `paragraphs` must be the **complete reorganized story body** (not just this turn's segment). +2. **Fact boundary**: obey the fact boundary; do not fill in missing details. Do not add people, places, dates, dialogue, or numbers that appear in neither the existing body nor this turn; write first-person, graceful prose; no `#`, `##`, no tables. +3. If this turn fully overlaps the old body or adds no new information, return a faithful reorganized version of the old body (do not arbitrarily shorten it). +4. **Do not infer outcomes**: when this turn does not state an outcome, do not assert a definite outcome unless the old body already states the same fact. + +## Output schema (strict JSON) +{{ + "paragraphs": [ + {{"content": "paragraph body"}}, + ... + ] +}} + +If nothing can be retained: {{"paragraphs": []}} +""" + existing_section = ( f"\n\n【已有故事正文(须全部保留事实,仅调整顺序与衔接;不得编造)】:\n{clipped}" if clipped else "" ) profile_section = f"\n\n用户基本信息:\n{user_profile}" if user_profile else "" - age_hint = _build_age_hint(stage, birth_year) time_section = f"\n时间参考:{age_hint}" if age_hint else "" return f"""{get_narrative_editor_system_prompt(background_voice=background_voice, occupation=occupation)} @@ -546,13 +799,24 @@ def get_story_batch_plan_prompt( """ -def format_narrative_user_content(oral_text: str, evidence_text: str = "") -> str: +def format_narrative_user_content( + oral_text: str, evidence_text: str = "", language: str = "zh" +) -> str: """ 将口述与检索摘录分区,供叙事模型区分「亲历」与参考材料。 evidence 为空时仅输出口述块。 """ oral = (oral_text or "").strip() ev = (evidence_text or "").strip() + if language == "en": + if not ev: + return f"[User's oral memory this turn]\n{oral}" + return ( + "[User's oral memory this turn]\n" + f"{oral}\n\n" + "[Reference memory snippets (not this turn's oral memory; do NOT write their specifics as the user's first-hand experience this turn — bridging only)]\n" + f"{ev}" + ) if not ev: return f"【本段用户口述】\n{oral}" return ( diff --git a/api/app/agents/stage_constants.py b/api/app/agents/stage_constants.py index 4281831..93c44da 100644 --- a/api/app/agents/stage_constants.py +++ b/api/app/agents/stage_constants.py @@ -27,6 +27,14 @@ STAGE_DISPLAY_ZH = { "belief": "人生信念", } +STAGE_DISPLAY_EN = { + "childhood": "Childhood", + "education": "Schooling", + "career": "Career", + "family": "Family", + "belief": "Beliefs", +} + STAGE_TO_DEFAULT_CATEGORY = { "childhood": "childhood", "education": "education", @@ -46,8 +54,33 @@ CHAPTER_CATEGORIES = { "summary": "人生总结", } +CHAPTER_CATEGORIES_EN = { + "childhood": "Childhood & Early Years", + "education": "Schooling & Youth", + "career_early": "Finding My Footing", + "career_achievement": "Highlights & Milestones", + "career_challenge": "Setbacks & Turning Points", + "family": "Family & Loved Ones", + "beliefs": "Beliefs & Values", + "summary": "Looking Back", +} + VALID_CHAPTER_CATEGORIES: frozenset[str] = frozenset(CHAPTER_CATEGORIES.keys()) + +def stage_display_name(stage: str | None, language: str = "zh") -> str: + """Return human display name for a chat stage in the requested language.""" + s = (stage or "").strip() + table = STAGE_DISPLAY_EN if language == "en" else STAGE_DISPLAY_ZH + return table.get(s, s) + + +def chapter_category_display(category: str | None, language: str = "zh") -> str: + """Return human display name for a chapter category in the requested language.""" + c = (category or "").strip() + table = CHAPTER_CATEGORIES_EN if language == "en" else CHAPTER_CATEGORIES + return table.get(c, c) + CHAPTER_ORDER = [ "childhood", "education", diff --git a/api/app/agents/style_profiles.py b/api/app/agents/style_profiles.py index e15cb89..774fe31 100644 --- a/api/app/agents/style_profiles.py +++ b/api/app/agents/style_profiles.py @@ -177,7 +177,49 @@ class MemoirStyleProfile: quality_hints: MemoirQualityHints = field(default_factory=MemoirQualityHints) - def render_narrative_style_block(self) -> str: + def render_narrative_style_block(self, language: str = "zh") -> str: + if language == "en": + return """## Biographer voice (must also obey the fact boundary above) +You are a biographer / editor lifting spoken memories into a **lightly literary** memoir chapter (first-person prose), warm and time-textured — not a flat summary. + +### Distill and select +Conversation tends to include noise — filter strictly: keep concrete events, relationships, places and times, emotion and conviction, and details the user already mentioned; drop fillers, small talk, AI-interaction, unrelated chit-chat, redundant repetition. **Sense detail (color, sound, smell, touch, image)**: you may render only what the user already mentioned in the oral memory; do not invent any new sensory detail or scene element. + +### Two internal steps (do NOT show in output) +First, in your head, **distill** (filter noise; lock the propositions to what is in "User's oral memory this turn"); then **narrate** (syntax, rhythm, paragraphing, transitions). The **final output** must conform to the user-message format requirement (e.g. JSON only); do not output the distillation step or any draft. + +### Rewriting principles +- Keep the user's true emotion; let the reader feel the narrator's mood. +- Use graceful but warm written English; do not directly quote spoken phrases verbatim. +- Add transition sentences for flow. +- Preserve vivid details; render colloquial expression with picture-quality written prose. +- Remove fillers and meaningless repetition. +- Keep time order and logic clear. +- **Within the fact boundary**, lean into a warm biographer's voice; modest literary expression and emotional shading are welcome. +- **No meta-talk in the body**: do not write conversational phrases such as "let me tell you," "you know," "honestly speaking" — the reader should meet the experience directly. + +### Structure and rhythm (zero new facts) +Without adding any new people, places, dates, dialogue, numbers, or causes, you may vary sentence length: short sentences to land, longer ones to unfold what is already given; use connectors and pronouns at the start of paragraphs to bridge; when the material allows, split one oral block by inner scenes or steps. Aim for **a short essay** rather than a list of bullet points. Reorganize given propositions only — do not add new facts to "improve rhythm." + +### Era and culture (must be anchored in the oral memory or profile) +When the material already names an era, place, or occupation/identity, you may use **period-appropriate** vocabulary and ambient texture to set the scene — only as **language and atmosphere on top of known facts**, never as new dramatic content. If the oral memory is very short, keep the cultural touch light. + +### Quality dimensions (orientation; none may breach the fact boundary) +- **Truth and coverage**: expand only on the oral memory; do not invent or extrapolate outcomes; write the named life moments in full; keep short input short. +- **Information density**: after stripping fillers and merging repetition, you may slightly increase readable density; never pad for length. +- **Information quality**: keep verifiable, specific people / events / places; cut filler and repetition; readers should feel there is **substance**. +- **Narrative structure**: clear time order within a paragraph; write scenes and turning points when present; "a small chapter" rather than a flat record. +- **Language and prose**: readable, with **clear literary feel** beyond plain reportage; restrained metaphor and synesthesia; smooth transitions; permitted "expansion" is rhetorical only, never invented facts. +- **Emotional expression**: emotion matches the oral memory; written voice may be elevated but not melodramatic. +- **Character modeling**: relationships, attitudes, and choices come through clearly so the reader knows "what kind of person this is." +- **Coherence**: pronouns and timeline align with any "bridging context"; never self-contradictory. +- **Expression richness**: tasteful metaphor and varied phrasing; no marching parallelism. +- **Publication readiness**: reads like a chapter draft an editor could continue to polish, not a chat transcript or marketing copy. + +### Output format constraints +- First person. +- No Markdown headings (`#`, `##`), no tables. +- If a "bridging context" block is present, keep tone and timeline consistent with it; do not repeat its body verbatim.""" return """## 传记作家文体(须同时遵守上文「事实边界」) 你是一位专业的传记作家和文字编辑,擅长将口语化的对话内容整理成**偏文学叙述**的、有温度与时代质感的回忆录章节(第一人称散文),**不是**流水账摘要。 diff --git a/api/app/core/config.py b/api/app/core/config.py index a167651..f4fbcab 100644 --- a/api/app/core/config.py +++ b/api/app/core/config.py @@ -163,7 +163,13 @@ class Settings(BaseSettings): enable_tts: bool = True tts_provider: str = "tencent" openai_api_key: str = "" - tts_voice_type: int = 502001 # Tencent 音色 ID,见 https://cloud.tencent.com/document/product/1073/92668 + # 501004 = 月华,腾讯云大模型音色,支持中英混合(PrimaryLanguage=1/2 均可)。 + # 调用 TextToVoice 时必须配合 ModelType=1,详见 https://cloud.tencent.com/document/api/1073/37995 + # 与音色清单 https://cloud.tencent.com/document/product/1073/92668 + tts_voice_type: int = 501004 + # 英文场景默认同样使用 501004(月华大模型音色,原生支持中英混合), + # 因此无需另配独立英文音色;如需切换英文专用音色请显式覆盖此项。 + tts_voice_type_en: int = 501004 tts_codec: str = "mp3" # ── WeChat Pay ─────────────────────────────────────────── diff --git a/api/app/core/dependencies.py b/api/app/core/dependencies.py index adb4f27..f170169 100644 --- a/api/app/core/dependencies.py +++ b/api/app/core/dependencies.py @@ -97,6 +97,7 @@ def get_tts_provider() -> TTSProvider: secret_key=settings.tencent_secret_key, voice_type=settings.tts_voice_type, codec=settings.tts_codec, + voice_type_en=settings.tts_voice_type_en, ) from app.adapters.tts.openai_tts import OpenAITTSProvider diff --git a/api/app/features/auth/router.py b/api/app/features/auth/router.py index c6cd9fd..92c2a97 100644 --- a/api/app/features/auth/router.py +++ b/api/app/features/auth/router.py @@ -65,6 +65,10 @@ def _map_auth_error(e: AuthError) -> HTTPException: def _user_response(user: User) -> UserResponse: + raw_lang = getattr(user, "language_preference", "zh") + lang = str(raw_lang).strip().lower() if isinstance(raw_lang, str) else "zh" + if lang not in ("zh", "en"): + lang = "zh" return UserResponse( id=user.id, phone=user.phone, @@ -73,6 +77,7 @@ def _user_response(user: User) -> UserResponse: avatar_url=user.avatar_url, subscription_type=user.subscription_type, created_at=user.created_at.isoformat(), + language_preference=lang, ) @@ -112,6 +117,7 @@ async def register( password=request.password, nickname=request.nickname, email=request.email, + language=request.language, ) except AuthError as e: raise _map_auth_error(e) @@ -477,6 +483,7 @@ async def login_with_sms( phone=request.phone, code=request.code, nickname=request.nickname, + language=request.language, ) except AuthError as e: raise _map_auth_error(e) @@ -507,6 +514,7 @@ async def mock_sms_login_route( result = await service.mock_sms_login( phone=request.phone, nickname=request.nickname, + language=request.language, ) except AuthError as e: raise _map_auth_error(e) @@ -535,6 +543,7 @@ async def register_with_sms( password=request.password, nickname=request.nickname, email=request.email, + language=request.language, ) except AuthError as e: raise _map_auth_error(e) diff --git a/api/app/features/auth/schemas.py b/api/app/features/auth/schemas.py index 15bac34..4e5716b 100644 --- a/api/app/features/auth/schemas.py +++ b/api/app/features/auth/schemas.py @@ -1,7 +1,9 @@ -from typing import Optional +from typing import Literal, Optional from pydantic import BaseModel, Field +LanguagePreference = Literal["zh", "en"] + class RegisterRequest(BaseModel): phone: str = Field(..., min_length=11, max_length=11, description="手机号(11位)") @@ -9,6 +11,10 @@ class RegisterRequest(BaseModel): nickname: str = Field(..., min_length=1, max_length=50, description="昵称") email: Optional[str] = Field(None, description="邮箱(可选)") agreed_to_terms: bool = Field(..., description="是否同意用户协议和隐私政策") + language: Optional[LanguagePreference] = Field( + None, + description="device language at signup; only used when creating a new user", + ) class LoginRequest(BaseModel): @@ -35,6 +41,7 @@ class UserResponse(BaseModel): avatar_url: Optional[str] = None subscription_type: str created_at: str + language_preference: LanguagePreference = "zh" class SendSmsRequest(BaseModel): @@ -57,6 +64,10 @@ class SmsLoginRequest(BaseModel): nickname: Optional[str] = Field( None, max_length=50, description="昵称(注册时必填,登录时可选)" ) + language: Optional[LanguagePreference] = Field( + None, + description="device language at signup; only used when creating a new user", + ) class MockSmsLoginRequest(BaseModel): @@ -67,6 +78,10 @@ class MockSmsLoginRequest(BaseModel): nickname: Optional[str] = Field( None, max_length=50, description="新用户昵称(可选)" ) + language: Optional[LanguagePreference] = Field( + None, + description="device language at signup; only used when creating a new user", + ) class SmsRegisterRequest(BaseModel): @@ -76,6 +91,10 @@ class SmsRegisterRequest(BaseModel): nickname: str = Field(..., min_length=1, max_length=50, description="昵称") email: Optional[str] = Field(None, description="邮箱(可选)") agreed_to_terms: bool = Field(..., description="是否同意用户协议和隐私政策") + language: Optional[LanguagePreference] = Field( + None, + description="device language at signup; only used when creating a new user", + ) class ResetPasswordRequest(BaseModel): diff --git a/api/app/features/auth/service.py b/api/app/features/auth/service.py index a523aad..83eb9b7 100644 --- a/api/app/features/auth/service.py +++ b/api/app/features/auth/service.py @@ -24,6 +24,16 @@ CODE_LENGTH = 6 CODE_EXPIRE_MINUTES = 5 RATE_LIMIT_SECONDS = 60 +_VALID_LANGUAGES = {"zh", "en"} + + +def _normalize_language(lang: str | None) -> str: + """Normalize device language token; default to zh on missing/unknown.""" + if not lang: + return "zh" + s = str(lang).strip().lower() + return s if s in _VALID_LANGUAGES else "zh" + class AuthError(Exception): def __init__(self, message: str, code: str = "AUTH_ERROR"): @@ -120,6 +130,7 @@ class AuthService: password: str, nickname: str, email: str | None = None, + language: str | None = None, ) -> dict: """Register new user. Returns {user, access_token, refresh_token}.""" if await repo.get_user_by_phone(phone, self._db): @@ -137,6 +148,7 @@ class AuthService: nickname=nickname, subscription_type="free", created_at=datetime.now(timezone.utc), + language_preference=_normalize_language(language), ) await repo.create_user(user, self._db) tokens = await self._issue_tokens(user_id) @@ -206,6 +218,7 @@ class AuthService: code: str, device_info: str = "", nickname: str | None = None, + language: str | None = None, ) -> dict: """SMS login (auto-register if new). Returns {user, access_token, refresh_token, is_new_user}.""" success = False @@ -219,7 +232,10 @@ class AuthService: raise AuthError(message, "INVALID_SMS_CODE") return await self._sms_login_after_code_verified( - phone, device_info=device_info, nickname=nickname + phone, + device_info=device_info, + nickname=nickname, + language=language, ) async def _sms_login_after_code_verified( @@ -228,8 +244,12 @@ class AuthService: *, device_info: str = "", nickname: str | None = None, + language: str | None = None, ) -> dict: - """SMS 已校验通过后:查找或创建用户并签发令牌。""" + """SMS 已校验通过后:查找或创建用户并签发令牌。 + + ``language`` 仅在「新用户」分支下写入;命中已有用户时不覆盖偏好。 + """ user = await repo.get_user_by_phone(phone, self._db) is_new_user = user is None @@ -242,6 +262,7 @@ class AuthService: nickname=(nickname or "").strip(), subscription_type="free", created_at=datetime.now(timezone.utc), + language_preference=_normalize_language(language), ) await repo.create_user(user, self._db) @@ -257,10 +278,14 @@ class AuthService: phone: str, device_info: str = "", nickname: str | None = None, + language: str | None = None, ) -> dict: """跳过短信校验的登录/自动注册(仅由 mock 路由在配置允许时调用)。""" return await self._sms_login_after_code_verified( - phone, device_info=device_info, nickname=nickname + phone, + device_info=device_info, + nickname=nickname, + language=language, ) async def register_with_sms( @@ -271,6 +296,7 @@ class AuthService: nickname: str, email: str | None = None, device_info: str = "", + language: str | None = None, ) -> dict: """SMS register. Returns {user, access_token, refresh_token}.""" success, message = await self._verify_sms_code(phone, code, "register") @@ -292,6 +318,7 @@ class AuthService: nickname=nickname, subscription_type="free", created_at=datetime.now(timezone.utc), + language_preference=_normalize_language(language), ) await repo.create_user(user, self._db) tokens = await self._issue_tokens(user_id, device_info) diff --git a/api/app/features/conversation/service.py b/api/app/features/conversation/service.py index 5668fe3..aeb21a5 100644 --- a/api/app/features/conversation/service.py +++ b/api/app/features/conversation/service.py @@ -7,6 +7,7 @@ from datetime import datetime, timezone from fastapi import HTTPException from sqlalchemy.ext.asyncio import AsyncSession +from app.agents.chat.personas import agent_name from app.core.cos_url_keys import ( collect_cos_keys_from_conversation_history, collect_cos_keys_from_tts_url_list, @@ -23,6 +24,7 @@ from app.features.conversation.session_history import ( from app.features.conversation.tts_delivery import apply_presigned_tts_urls_to_messages from app.features.memory import repo as memory_repo from app.features.quota.service import QuotaService +from app.features.user.models import User from app.ports.storage import ObjectStorage from app.tasks.memoir_tasks import ( dispatch_pending_memoir_phase2_for_user, @@ -196,6 +198,11 @@ class ConversationService: async def list_for_user(self, user_id: str) -> list[dict]: conversations = await repo.get_user_conversations(user_id, self._db) + # Fetch language once for fallback title localization (no per-row N+1). + user_obj = await self._db.get(User, user_id) + raw_lang = getattr(user_obj, "language_preference", "zh") if user_obj else "zh" + lang = str(raw_lang or "zh").strip().lower() + fallback_title = agent_name(lang) result = [] for conv in conversations: history: list[dict] = [] @@ -208,7 +215,7 @@ class ConversationService: result.append( { "id": conv.id, - "title": (conv.summary or "")[:30] or "岁月知己", + "title": (conv.summary or "")[:30] or fallback_title, "avatarUrl": None, "latestMessagePreview": latest_message or conv.summary, "latestMessageTime": _latest_message_time_ms(conv, history), diff --git a/api/app/features/conversation/ws/pipeline.py b/api/app/features/conversation/ws/pipeline.py index ee96511..8bd37f6 100644 --- a/api/app/features/conversation/ws/pipeline.py +++ b/api/app/features/conversation/ws/pipeline.py @@ -64,6 +64,12 @@ def _tts_epoch_value(conversation_id: str) -> int: return _tts_cancel_epoch.get(conversation_id, 0) +def _resolve_user_language(user) -> str: + """Return 'en' iff user.language_preference is set to 'en'; default 'zh'.""" + raw = getattr(user, "language_preference", "zh") if user is not None else "zh" + return "en" if str(raw or "zh").strip().lower() == "en" else "zh" + + def _tts_object_ext(codec: str) -> str: c = (codec or "mp3").lower().lstrip(".") if c in ("wave",): @@ -89,31 +95,101 @@ async def _send_tts_audio( assistant_message_id: str | None, tts_epoch_start: int, manual: bool = False, + language: str = "zh", ) -> str | None: """Synthesize TTS, upload to COS, append Redis, send TTS_AUDIO. Returns public URL or None.""" + current_epoch = _tts_epoch_value(conversation_id) + # 长期保留 INFO:TTS 决策与执行链路必须在 INFO 级别全程可见 + logger.info( + "pipeline._send_tts_audio entry conversation_id={} chunk_index={} chunk_total={} " + "text_len={} language={} manual={} tts_epoch_start={} current_epoch={} " + "enable_tts={} provider={}", + conversation_id, + chunk_index, + chunk_total, + len(text or ""), + language, + manual, + tts_epoch_start, + current_epoch, + settings.enable_tts, + settings.tts_provider, + ) if not settings.enable_tts: + logger.info( + "pipeline._send_tts_audio result conversation_id={} chunk_index={} ok=False " + "url_set=False audio_bytes_len=0 reason=enable_tts_false", + conversation_id, + chunk_index, + ) return None - if _tts_epoch_value(conversation_id) != tts_epoch_start: + if current_epoch != tts_epoch_start: + logger.info( + "pipeline._send_tts_audio result conversation_id={} chunk_index={} ok=False " + "url_set=False audio_bytes_len=0 reason=epoch_mismatch_pre_synth " + "tts_epoch_start={} current_epoch={}", + conversation_id, + chunk_index, + tts_epoch_start, + current_epoch, + ) return None try: tts = get_tts_provider() - audio_bytes = await tts.synthesize(text) + audio_bytes = await tts.synthesize(text, language=language) if not audio_bytes: logger.warning( - "TTS skipped: synthesize returned empty. Check TTS config in .env" + "TTS skipped: synthesize returned empty conversation_id={} chunk_index={} " + "language={} text_preview={!r} voice_provider={}", + conversation_id, + chunk_index, + language, + (text or "")[:30], + settings.tts_provider, + ) + logger.info( + "pipeline._send_tts_audio result conversation_id={} chunk_index={} ok=False " + "url_set=False audio_bytes_len=0 reason=synthesize_empty", + conversation_id, + chunk_index, ) return None if _tts_epoch_value(conversation_id) != tts_epoch_start: + logger.info( + "pipeline._send_tts_audio result conversation_id={} chunk_index={} ok=False " + "url_set=False audio_bytes_len={} reason=epoch_mismatch_post_synth", + conversation_id, + chunk_index, + len(audio_bytes), + ) return None ext = _tts_object_ext(settings.tts_codec) content_type = _tts_codec_to_content_type(settings.tts_codec) storage = get_object_storage() key = f"conversations/{conversation_id}/tts/{uuid.uuid4().hex}.{ext}" + upload_started = time.perf_counter() + logger.debug( + "pipeline._send_tts_audio uploading key={} audio_bytes_len={} content_type={}", + key, + len(audio_bytes), + content_type, + ) public_url = storage.upload(key, audio_bytes, content_type) + upload_ms = (time.perf_counter() - upload_started) * 1000 # 与 `tts_delivery.apply_presigned_tts_urls_to_messages` / 回忆录图片 presign 一致:下发可播 URL playback_url = storage.get_url(key, expires=TTS_PRESIGNED_EXPIRES_SEC) + logger.debug( + "pipeline._send_tts_audio uploaded key={} audio_bytes_len={} upload_ms={:.2f} " + "public_url_set={} playback_url_set={}", + key, + len(audio_bytes), + upload_ms, + bool(public_url), + bool(playback_url), + ) + audio_b64 = base64.b64encode(audio_bytes).decode("utf-8") payload_data: Dict[str, Any] = { - "audio_base64": base64.b64encode(audio_bytes).decode("utf-8"), + "audio_base64": audio_b64, "format": settings.tts_codec, "audio_url": playback_url, "index": chunk_index, @@ -123,6 +199,16 @@ async def _send_tts_audio( payload_data["assistant_message_id"] = assistant_message_id if manual: payload_data["manual"] = True + logger.debug( + "pipeline._send_tts_audio sending TTS_AUDIO conversation_id={} chunk_index={} " + "chunk_total={} payload_fields={} audio_b64_len={} manual={}", + conversation_id, + chunk_index, + chunk_total, + sorted(payload_data.keys()), + len(audio_b64), + manual, + ) await manager.send_message( conversation_id, { @@ -132,6 +218,16 @@ async def _send_tts_audio( "timestamp": datetime.now(timezone.utc).isoformat(), }, ) + logger.info( + "pipeline._send_tts_audio result conversation_id={} chunk_index={} ok=True " + "url_set={} audio_bytes_len={} upload_ms={:.2f} manual={}", + conversation_id, + chunk_index, + bool(public_url), + len(audio_bytes), + upload_ms, + manual, + ) return public_url except Exception as e: err_str = str(e) @@ -142,6 +238,13 @@ async def _send_tts_audio( ) else: logger.error("TTS synthesize failed: {}", e) + logger.info( + "pipeline._send_tts_audio result conversation_id={} chunk_index={} ok=False " + "url_set=False audio_bytes_len=0 reason=exception err={}", + conversation_id, + chunk_index, + type(e).__name__, + ) return None @@ -155,15 +258,44 @@ async def handle_tts_request_on_demand( db: AsyncSession, ) -> tuple[bool, str]: """用户点喇叭:该段已有 TTS 则预签名下发;否则合成后落库并下发。不重复合成同一段。""" + logger.info( + "pipeline.handle_tts_request_on_demand entry conversation_id={} user_id={} " + "assistant_message_id={} segment_index={} segment_text_len={} enable_tts={} provider={}", + conversation_id, + user_id, + assistant_message_id, + segment_index, + len(segment_text or ""), + settings.enable_tts, + settings.tts_provider, + ) if not settings.enable_tts: + logger.info( + "pipeline.handle_tts_request_on_demand result ok=False reason=未开启语音合成 " + "conversation_id={} assistant_message_id={}", + conversation_id, + assistant_message_id, + ) return False, "未开启语音合成" conv = await db.get(Conversation, conversation_id) if not conv or conv.user_id != user_id or conv.deleted_at is not None: + logger.debug( + "pipeline.handle_tts_request_on_demand result ok=False reason=对话不存在或无权访问 " + "conversation_id={} user_id={}", + conversation_id, + user_id, + ) return False, "对话不存在或无权访问" msg = await db.get(ConversationMessage, assistant_message_id) if not msg or msg.conversation_id != conversation_id or msg.role != "ai": + logger.debug( + "pipeline.handle_tts_request_on_demand result ok=False reason=消息不存在 " + "conversation_id={} assistant_message_id={}", + conversation_id, + assistant_message_id, + ) return False, "消息不存在" # 与客户端 splitMessageParts / segments_from_llm_response 对齐(含无 [SPLIT] 时的段落拆段) @@ -195,6 +327,14 @@ async def handle_tts_request_on_demand( chunk_total = len(parts) if existing: + logger.info( + "pipeline.handle_tts_request_on_demand reuse existing url conversation_id={} " + "assistant_message_id={} segment_index={} url_len={}", + conversation_id, + assistant_message_id, + segment_index, + len(existing), + ) storage = get_object_storage() key = extract_cos_object_key_if_owned(existing) try: @@ -222,8 +362,27 @@ async def handle_tts_request_on_demand( "timestamp": datetime.now(timezone.utc).isoformat(), }, ) + logger.info( + "pipeline.handle_tts_request_on_demand result ok=True reason=existing_reused " + "conversation_id={} assistant_message_id={} segment_index={}", + conversation_id, + assistant_message_id, + segment_index, + ) return True, "" + logger.info( + "pipeline.handle_tts_request_on_demand no existing url, will synthesize " + "conversation_id={} assistant_message_id={} segment_index={} canon_len={}", + conversation_id, + assistant_message_id, + segment_index, + len(canon), + ) + + user_obj = await db.get(User, user_id) + user_language = _resolve_user_language(user_obj) + tts_epoch_start = _tts_epoch_value(conversation_id) url_stored = await _send_tts_audio( conversation_id, @@ -233,8 +392,24 @@ async def handle_tts_request_on_demand( assistant_message_id=assistant_message_id, tts_epoch_start=tts_epoch_start, manual=True, + language=user_language, + ) + logger.info( + "pipeline.handle_tts_request_on_demand _send_tts_audio returned url_stored_set={} " + "conversation_id={} assistant_message_id={} segment_index={}", + bool(url_stored), + conversation_id, + assistant_message_id, + segment_index, ) if not url_stored: + logger.info( + "pipeline.handle_tts_request_on_demand result ok=False reason=语音合成失败 " + "conversation_id={} assistant_message_id={} segment_index={}", + conversation_id, + assistant_message_id, + segment_index, + ) return False, "语音合成失败" while len(urls) <= segment_index: @@ -245,6 +420,13 @@ async def handle_tts_request_on_demand( store = ConversationHistoryStore(db) await store._sync_redis_best_effort(conversation_id) + logger.info( + "pipeline.handle_tts_request_on_demand result ok=True reason=synthesized " + "conversation_id={} assistant_message_id={} segment_index={}", + conversation_id, + assistant_message_id, + segment_index, + ) return True, "" @@ -852,6 +1034,7 @@ async def process_user_message( """处理用户消息,生成 Agent 回应。由 ChatOrchestrator 路由到 ProfileAgent 或 InterviewAgent。""" store = ConversationHistoryStore(db) tts_urls: list[str] = [] + user_language = _resolve_user_language(user) try: logger.info( "process_user_message 开始: conversation_id={} segment_id={} user_chars={}", @@ -859,6 +1042,18 @@ async def process_user_message( segment.id, len(user_message or ""), ) + # 长期保留:TTS 决策入口(pipeline 层);INFO 级别可见所有控制位 + logger.info( + "pipeline.process_user_message entry conversation_id={} segment_id={} " + "tts_this_turn={} force_skip_tts={} enable_tts={} provider={} user_language={}", + conversation_id, + segment.id, + tts_this_turn, + force_skip_tts, + settings.enable_tts, + settings.tts_provider, + user_language, + ) is_from_voice = bool(segment.audio_url) voice_session_id = _voice_session_id_from_audio_url(segment.audio_url) audio_dur = getattr(segment, "audio_duration_seconds", None) @@ -886,6 +1081,21 @@ async def process_user_message( skip_tts = bool(turn.skip_tts) want_voice = bool(tts_this_turn) if tts_this_turn is not None else False want_tts = want_voice and settings.enable_tts and not skip_tts + # 长期保留 INFO:TTS 决策最终结论;不再被 agent_summary_enabled 门控 + logger.info( + "pipeline.process_user_message tts_decision conversation_id={} segment_id={} " + "tts_this_turn={} force_skip_tts={} enable_tts={} skip_tts_from_turn={} " + "want_voice={} want_tts={} response_segments={}", + conversation_id, + segment.id, + tts_this_turn, + force_skip_tts, + settings.enable_tts, + skip_tts, + want_voice, + want_tts, + len(turn.messages), + ) if agent_summary_enabled(): logger.info( "pipeline.process_user_message duration_ms={:.2f} " @@ -952,21 +1162,55 @@ async def process_user_message( ai_msg_id = turn_ids.assistant_message_id tts_epoch_start = _tts_epoch_value(conversation_id) n = len(responses) + # tts_cancelled 仅用于跳过后续 TTS 合成;AGENT_RESPONSE 必须为每段完整下发, + # 否则 FE 会停留在 "正在回复…" 或丢失尾段文本。 + tts_cancelled = False for i, response_text in enumerate(responses): url_for_segment: Optional[str] = None - if want_tts: + if want_tts and not tts_cancelled: if _tts_epoch_value(conversation_id) != tts_epoch_start: - break - url_for_segment = await _send_tts_audio( + tts_cancelled = True + logger.info( + "pipeline.process_user_message segment={}/{} tts_branch=skip_cancelled " + "tts_cancelled={} conversation_id={}", + i, + n, + tts_cancelled, + conversation_id, + ) + else: + logger.info( + "pipeline.process_user_message segment={}/{} tts_branch=synthesize " + "tts_cancelled={} conversation_id={}", + i, + n, + tts_cancelled, + conversation_id, + ) + url_for_segment = await _send_tts_audio( + conversation_id, + response_text, + chunk_index=i, + chunk_total=n, + assistant_message_id=ai_msg_id, + tts_epoch_start=tts_epoch_start, + language=user_language, + ) + if url_for_segment: + tts_urls.append(url_for_segment) + if _tts_epoch_value(conversation_id) != tts_epoch_start: + tts_cancelled = True + else: + logger.info( + "pipeline.process_user_message segment={}/{} tts_branch={} " + "tts_cancelled={} want_tts={} conversation_id={}", + i, + n, + "skip_cancelled" if tts_cancelled else "skip_no_tts", + tts_cancelled, + want_tts, conversation_id, - response_text, - chunk_index=i, - chunk_total=n, - assistant_message_id=ai_msg_id, - tts_epoch_start=tts_epoch_start, ) - if url_for_segment: - tts_urls.append(url_for_segment) await manager.send_message( conversation_id, @@ -983,8 +1227,6 @@ async def process_user_message( }, ) - if _tts_epoch_value(conversation_id) != tts_epoch_start: - break if i < n - 1: await asyncio.sleep(0.5) diff --git a/api/app/features/conversation/ws/router.py b/api/app/features/conversation/ws/router.py index 8ff35ac..fbb32e3 100644 --- a/api/app/features/conversation/ws/router.py +++ b/api/app/features/conversation/ws/router.py @@ -158,6 +158,11 @@ async def websocket_endpoint( history = await conversation_service.ensure_redis_history_from_db( conversation_id ) + user_language = ( + "en" + if str(getattr(user, "language_preference", "zh") or "zh").lower() == "en" + else "zh" + ) if not history: missing_profile = get_missing_profile_fields(user) if missing_profile: @@ -166,6 +171,7 @@ async def websocket_endpoint( conversation_id=conversation_id, missing_fields=missing_profile, nickname=user.nickname or "", + language=user_language, ) ai_msg_id = await ConversationHistoryStore( db @@ -201,6 +207,7 @@ async def websocket_endpoint( birth_place=user.birth_place, grew_up_place=user.grew_up_place, occupation=user.occupation, + language=user_language, ) era_place = (user.grew_up_place or user.birth_place or "") or "" opening_messages = ( @@ -214,6 +221,7 @@ async def websocket_endpoint( occupation=user.occupation or "", profile_birth_year=user.birth_year, profile_era_place=era_place, + language=user_language, ) ) ai_msg_id = await ConversationHistoryStore( @@ -281,6 +289,13 @@ async def websocket_endpoint( data = message.get("data") or {} text_message = data.get("text", "") tts_this_turn = bool(data.get("tts_this_turn")) + # 长期保留:TTS 决策入口可见性(INFO 级别即可定位 FE 是否带 tts_this_turn) + logger.info( + "ws.user_message tts_this_turn={} conversation_id={} text_len={}", + tts_this_turn, + conversation_id, + len(text_message or ""), + ) if text_message: can_send, quota_msg = await check_ws_quota( @@ -381,6 +396,17 @@ async def websocket_endpoint( voice_session_id = str(resolved_vs).strip() is_last = bool(data.get("is_last", False)) audio_duration = int(data.get("duration", 0) or 0) + tts_this_turn_segment = bool(data.get("tts_this_turn")) + # 长期保留:分段语音轮的 TTS 决策入口可见性 + logger.info( + "ws.audio_segment tts_this_turn={} is_last={} " + "conversation_id={} voice_session_id={} segment_index_raw={}", + tts_this_turn_segment, + is_last, + conversation_id, + voice_session_id, + segment_index_raw, + ) if not audio_base64: await manager.send_message( @@ -488,7 +514,7 @@ async def websocket_endpoint( audio_base64=audio_base64, audio_duration=audio_duration, is_last=is_last, - tts_this_turn=bool(data.get("tts_this_turn")), + tts_this_turn=tts_this_turn_segment, ) ) register_segment_task(conversation_id, voice_session_id, task) @@ -498,6 +524,13 @@ async def websocket_endpoint( audio_base64 = data.get("audio_base64", "") audio_duration = data.get("duration", 0) tts_this_turn = bool(data.get("tts_this_turn")) + # 长期保留:单次整段音频路径的 TTS 决策入口可见性 + logger.info( + "ws.audio_message tts_this_turn={} conversation_id={} duration_s={}", + tts_this_turn, + conversation_id, + audio_duration, + ) if audio_base64: can_send, quota_msg = await check_ws_quota( @@ -659,6 +692,12 @@ async def websocket_endpoint( "assistantMessageId" ) if not aid or not str(aid).strip(): + logger.warning( + "ws.TTS_REQUEST 缺少 assistant_message_id " + "conversation_id={} user_id={}", + conversation_id, + user_id, + ) await manager.send_message( conversation_id, { @@ -680,6 +719,15 @@ async def websocket_endpoint( st_val = None else: st_val = str(st).strip() or None + logger.info( + "ws.TTS_REQUEST received conversation_id={} user_id={} " + "assistant_message_id={} segment_index={} segment_text_len={}", + conversation_id, + user_id, + str(aid).strip(), + seg_idx, + len(st_val or ""), + ) ok, err_msg = await handle_tts_request_on_demand( conversation_id=conversation_id, user_id=user_id, @@ -688,6 +736,15 @@ async def websocket_endpoint( segment_text=st_val, db=db, ) + logger.info( + "ws.TTS_REQUEST handled conversation_id={} assistant_message_id={} " + "segment_index={} ok={} err_msg={}", + conversation_id, + str(aid).strip(), + seg_idx, + ok, + err_msg, + ) if not ok: await manager.send_message( conversation_id, diff --git a/api/app/features/memoir/story_pipeline_sync.py b/api/app/features/memoir/story_pipeline_sync.py index 93221c4..f4cc3f6 100644 --- a/api/app/features/memoir/story_pipeline_sync.py +++ b/api/app/features/memoir/story_pipeline_sync.py @@ -254,7 +254,11 @@ def _slot_snippets_for_narrative( return slot_snippets -def _placeholder_title(chapter_category: str) -> str: +def _placeholder_title(chapter_category: str, language: str = "zh") -> str: + if language == "en": + from app.agents.stage_constants import chapter_category_display + + return chapter_category_display(chapter_category, language="en") return CHAPTER_CATEGORIES.get(chapter_category, chapter_category) @@ -301,16 +305,19 @@ def _strip_ungrounded_title_segments( hay: str, *, chapter_category: str, + language: str = "zh", ) -> str: """ 按 · / • 分节丢弃含未落地履历短语的小节;全部丢弃则占位。 """ if not settings.memoir_title_hay_grounding_strict_phrases_enabled: - return (title or "").strip() or _placeholder_title(chapter_category) + return (title or "").strip() or _placeholder_title( + chapter_category, language=language + ) t = (title or "").strip() h = (hay or "").strip() if not t: - return _placeholder_title(chapter_category) + return _placeholder_title(chapter_category, language=language) segments = [s.strip() for s in re.split(r"\s*[·•]\s*", t) if s.strip()] if not segments: segments = [t] @@ -329,7 +336,7 @@ def _strip_ungrounded_title_segments( continue kept.append(seg) if not kept: - return _placeholder_title(chapter_category) + return _placeholder_title(chapter_category, language=language) if len(kept) == 1: return kept[0] return " · ".join(kept) @@ -346,11 +353,12 @@ def _maybe_generate_title( llm: Any, oral_scope: str = "", narrow_profile_for_title: bool = True, + language: str = "zh", ) -> str: """Generate a title only when body is long enough; otherwise return placeholder.""" body_len = len((md or "").strip()) if body_len < settings.story_title_min_body_chars: - return _placeholder_title(chapter_category) + return _placeholder_title(chapter_category, language=language) content_excerpt = (md or "").strip()[:300] merged_slots = _title_slots_filtered_for_generation( slot_snippets, md=md, oral_scope=oral_scope @@ -366,10 +374,14 @@ def _maybe_generate_title( user_profile=profile_for_title, birth_year=user_birth_year, llm=llm, + language=language, ) hay = _title_hay_for_grounding(merged_slots, md, oral_scope) return _strip_ungrounded_title_segments( - raw_title, hay, chapter_category=chapter_category + raw_title, + hay, + chapter_category=chapter_category, + language=language, ) @@ -733,6 +745,7 @@ def _execute_narrative_unit( occupation: str = "", memoir_correlation_id: str | None = None, fidelity_llm: Any | None = None, + language: str = "zh", ) -> tuple[str | None, bool]: """ Unified narrative unit executor: generate narrative, apply fidelity/safety, @@ -740,7 +753,9 @@ def _execute_narrative_unit( """ t0 = time.perf_counter() oral_norm = (oral_text or "").strip() - new_content_input = format_narrative_user_content(oral_text, evidence_text) + new_content_input = format_narrative_user_content( + oral_text, evidence_text, language=language + ) raw_gen = narrative_agent.generate_narrative( stage=chapter_category, @@ -753,6 +768,7 @@ def _execute_narrative_unit( background_voice=background_voice, occupation=occupation, fallback_plain_oral=oral_norm, + language=language, ) json_invalid = False s0 = (raw_gen or "").strip() @@ -816,7 +832,7 @@ def _execute_narrative_unit( sid_log = target_story_id is_append = True else: - story_title = _placeholder_title(chapter_category) + story_title = _placeholder_title(chapter_category, language=language) st = create_story_with_version_sync( session, user_id=user_id, @@ -905,6 +921,7 @@ def _run_batch_plan_writes( occupation: str = "", memoir_correlation_id: str | None = None, fidelity_llm: Any | None = None, + language: str = "zh", ) -> set[str]: dispatch_ids: set[str] = set() for unit in plan.units: @@ -951,6 +968,7 @@ def _run_batch_plan_writes( occupation=occupation, memoir_correlation_id=memoir_correlation_id, fidelity_llm=fidelity_llm, + language=language, ) if sid: dispatch_ids.add(sid) @@ -972,6 +990,7 @@ def run_story_pipeline_for_category_batch( memoir_correlation_id: str | None = None, llm_fast: Any | None = None, memory_evidence: dict | None = None, + language: str = "zh", ) -> StoryPipelineResult: """运行某 chapter_category 的 Phase2 写入管线。 @@ -1064,7 +1083,9 @@ def run_story_pipeline_for_category_batch( user_id=user_id, ) - title = chapter.title if chapter else _placeholder_title(chapter_category) + title = chapter.title if chapter else _placeholder_title( + chapter_category, language=language + ) # 仅同 chapter_category(story.stage)的 Story 可作为 append 候选,避免跨章节链接导致多章内容相同 all_stories = list_active_stories_for_user_sync(session, user_id) @@ -1167,6 +1188,7 @@ def run_story_pipeline_for_category_batch( occupation=occupation, memoir_correlation_id=memoir_correlation_id, fidelity_llm=llm_fidelity, + language=language, ) else: route = single_route @@ -1215,6 +1237,7 @@ def run_story_pipeline_for_category_batch( occupation=occupation, memoir_correlation_id=memoir_correlation_id, fidelity_llm=llm_fidelity, + language=language, ) if sid: dispatch_ids.add(sid) diff --git a/api/app/features/user/models.py b/api/app/features/user/models.py index 3d49f56..133a648 100644 --- a/api/app/features/user/models.py +++ b/api/app/features/user/models.py @@ -21,6 +21,9 @@ class User(Base): birth_place = Column(String, nullable=True) grew_up_place = Column(String, nullable=True) occupation = Column(String, nullable=True) + language_preference = Column( + String(8), nullable=False, default="zh", server_default="zh" + ) conversations = relationship("Conversation", back_populates="user") chapters = relationship("Chapter", back_populates="user") diff --git a/api/app/features/user/router.py b/api/app/features/user/router.py index 26a1eaa..0b3093b 100644 --- a/api/app/features/user/router.py +++ b/api/app/features/user/router.py @@ -17,7 +17,7 @@ from app.features.user.schemas import ( UpdateUserProfileRequest, UserProfileResponse, ) -from app.features.user.service import UserService +from app.features.user.service import UserService, _coerce_language as _coerce_language_token from app.ports.storage import ObjectStorage logger = get_logger(__name__) @@ -57,6 +57,9 @@ async def get_user_profile( birth_place=current_user.birth_place, grew_up_place=current_user.grew_up_place, occupation=current_user.occupation, + language_preference=_coerce_language_token( + getattr(current_user, "language_preference", "zh") + ), ) diff --git a/api/app/features/user/schemas.py b/api/app/features/user/schemas.py index 09d719e..6a0f1a2 100644 --- a/api/app/features/user/schemas.py +++ b/api/app/features/user/schemas.py @@ -2,6 +2,8 @@ from typing import Literal, Optional from pydantic import BaseModel, Field +LanguagePreference = Literal["zh", "en"] + class UserProfileResponse(BaseModel): id: str @@ -15,6 +17,7 @@ class UserProfileResponse(BaseModel): birth_place: Optional[str] = None grew_up_place: Optional[str] = None occupation: Optional[str] = None + language_preference: LanguagePreference = "zh" class UpdateUserProfileRequest(BaseModel): diff --git a/api/app/features/user/service.py b/api/app/features/user/service.py index 2681df2..dcaf72c 100644 --- a/api/app/features/user/service.py +++ b/api/app/features/user/service.py @@ -20,6 +20,12 @@ from app.ports.storage import ObjectStorage logger = get_logger(__name__) +def _coerce_language(raw) -> str: + """Normalize a stored language token to the 'zh' / 'en' Literal.""" + s = str(raw).strip().lower() if isinstance(raw, str) else "" + return s if s in ("zh", "en") else "zh" + + def _user_to_profile(user: User) -> UserProfileResponse: return UserProfileResponse( id=user.id, @@ -33,6 +39,9 @@ def _user_to_profile(user: User) -> UserProfileResponse: birth_place=user.birth_place, grew_up_place=user.grew_up_place, occupation=user.occupation, + language_preference=_coerce_language( + getattr(user, "language_preference", "zh") + ), ) diff --git a/api/app/ports/tts.py b/api/app/ports/tts.py index 4e9d54e..75f49cb 100644 --- a/api/app/ports/tts.py +++ b/api/app/ports/tts.py @@ -5,6 +5,11 @@ from typing import Protocol, runtime_checkable @runtime_checkable class TTSProvider(Protocol): - async def synthesize(self, text: str, voice: str = "alloy") -> bytes: - """Convert text to speech audio bytes.""" + async def synthesize( + self, text: str, voice: str = "alloy", *, language: str = "zh" + ) -> bytes: + """Convert text to speech audio bytes. + + language: 'zh' or 'en'. Adapters that natively detect language may ignore it. + """ ... diff --git a/api/app/tasks/memoir_quality_pass_tasks.py b/api/app/tasks/memoir_quality_pass_tasks.py index 25955e8..b6dfaa3 100644 --- a/api/app/tasks/memoir_quality_pass_tasks.py +++ b/api/app/tasks/memoir_quality_pass_tasks.py @@ -41,13 +41,16 @@ def _polish_story_title( llm, *, chapter_category: str, + language: str = "zh", ) -> bool: """Re-generate title if current title is a placeholder. Returns True if updated.""" from app.features.memoir.story_pipeline_sync import _placeholder_title current = (story.title or "").strip() - placeholder = _placeholder_title(chapter_category) - if current and current != placeholder: + placeholder_zh = _placeholder_title(chapter_category, language="zh") + placeholder_en = _placeholder_title(chapter_category, language="en") + placeholder = _placeholder_title(chapter_category, language=language) + if current and current not in (placeholder_zh, placeholder_en): return False body = (story.canonical_markdown or "").strip() @@ -63,9 +66,10 @@ def _polish_story_title( user_profile="", birth_year=None, llm=llm, + language=language, ) new_title = (new_title or "").strip() - if not new_title or new_title == placeholder: + if not new_title or new_title in (placeholder_zh, placeholder_en, placeholder): return False story.title = new_title @@ -138,6 +142,16 @@ def memoir_quality_pass( chapters_dirtied: set[str] = set() with get_sync_db() as db: + from app.features.user.models import User + + user_obj = db.get(User, user_id) + user_language = ( + "en" + if user_obj is not None + and str(getattr(user_obj, "language_preference", "zh") or "zh").lower() + == "en" + else "zh" + ) for sid in story_ids: story = db.get(Story, sid) if not story or story.user_id != user_id: @@ -145,7 +159,11 @@ def memoir_quality_pass( chapter_category = story.stage or "summary" if _polish_story_title( - db, story, llm, chapter_category=chapter_category + db, + story, + llm, + chapter_category=chapter_category, + language=user_language, ): titles_polished += 1 stmt = select(Chapter.id).where( diff --git a/api/app/tasks/memoir_tasks.py b/api/app/tasks/memoir_tasks.py index 440d08c..90f9963 100644 --- a/api/app/tasks/memoir_tasks.py +++ b/api/app/tasks/memoir_tasks.py @@ -666,13 +666,21 @@ def process_memoir_phase2( user_birth_year = None background_voice = "default" user_occupation = "" + user_language = "zh" if user_obj: user_birth_year = user_obj.birth_year + user_language = ( + "en" + if str(getattr(user_obj, "language_preference", "zh") or "zh").lower() + == "en" + else "zh" + ) user_profile = format_user_profile_context( birth_year=user_obj.birth_year, birth_place=user_obj.birth_place, grew_up_place=user_obj.grew_up_place, occupation=user_obj.occupation, + language=user_language, ) background_voice = infer_background_voice(user_obj.occupation) user_occupation = user_obj.occupation or "" @@ -752,6 +760,7 @@ def process_memoir_phase2( memoir_correlation_id=cid, llm_fast=llm_fast, memory_evidence=memory_evidence, + language=user_language, ) pipeline_elapsed = time.perf_counter() - pipeline_t0 @@ -931,6 +940,14 @@ def process_memoir_phase1(self, user_id: str, segment_ids: List[str]): try: with get_sync_db() as db: + user_obj_for_lang = db.get(User, user_id) + user_language = ( + "en" + if user_obj_for_lang is not None + and str(getattr(user_obj_for_lang, "language_preference", "zh") or "zh").lower() + == "en" + else "zh" + ) stmt = ( select(Segment) .where(Segment.id.in_(segment_ids)) @@ -1056,6 +1073,7 @@ def process_memoir_phase1(self, user_id: str, segment_ids: List[str]): memoir_batch=True, ), on_phase1_chunk=_phase1_chunk_cb, + language=user_language, ) prep_elapsed = time.perf_counter() - prep_t0 merge_pipeline_run( @@ -1273,13 +1291,21 @@ def generate_chapter_content(self, user_id: str, stage: str, new_content: str): user_birth_year = None background_voice = "default" user_occupation = "" + user_language = "zh" if user_obj: user_birth_year = user_obj.birth_year + user_language = ( + "en" + if str(getattr(user_obj, "language_preference", "zh") or "zh").lower() + == "en" + else "zh" + ) user_profile = format_user_profile_context( birth_year=user_obj.birth_year, birth_place=user_obj.birth_place, grew_up_place=user_obj.grew_up_place, occupation=user_obj.occupation, + language=user_language, ) background_voice = infer_background_voice(user_obj.occupation) user_occupation = user_obj.occupation or "" @@ -1303,6 +1329,7 @@ def generate_chapter_content(self, user_id: str, stage: str, new_content: str): occupation=user_occupation, memoir_correlation_id=cid, llm_fast=llm_fast, + language=user_language, ) db.flush() if chapter is None: diff --git a/api/app/tasks/story_title_tasks.py b/api/app/tasks/story_title_tasks.py index cad63b0..7452c91 100644 --- a/api/app/tasks/story_title_tasks.py +++ b/api/app/tasks/story_title_tasks.py @@ -51,8 +51,19 @@ def generate_story_title_after_create( ms, ) return {"status": "skip_not_found"} - expected_ph = _placeholder_title(chapter_category) - if (st.title or "").strip() and (st.title or "").strip() != expected_ph: + user_obj_pre = db.get(User, user_id) + user_language = ( + "en" + if user_obj_pre is not None + and str(getattr(user_obj_pre, "language_preference", "zh") or "zh").lower() + == "en" + else "zh" + ) + expected_ph_zh = _placeholder_title(chapter_category, language="zh") + expected_ph_en = _placeholder_title(chapter_category, language="en") + expected_ph = _placeholder_title(chapter_category, language=user_language) + current = (st.title or "").strip() + if current and current not in (expected_ph_zh, expected_ph_en): ms = (time.perf_counter() - t0) * 1000 logger.info( "event=story_title_task_skip story_id={} reason=user_modified duration_ms={:.1f} " @@ -73,7 +84,7 @@ def generate_story_title_after_create( ) return {"status": "skip_no_llm"} - user_obj = db.get(User, user_id) + user_obj = user_obj_pre user_profile = "" birth_year = None if user_obj: @@ -83,6 +94,7 @@ def generate_story_title_after_create( birth_place=user_obj.birth_place, grew_up_place=user_obj.grew_up_place, occupation=user_obj.occupation, + language=user_language, ) state = get_or_create_state_sync(user_id, db) @@ -101,8 +113,13 @@ def generate_story_title_after_create( user_birth_year=birth_year, llm=llm, oral_scope=oral_scope or "", + language=user_language, ) - if not new_title.strip() or new_title.strip() == expected_ph: + if not new_title.strip() or new_title.strip() in ( + expected_ph_zh, + expected_ph_en, + expected_ph, + ): ms = (time.perf_counter() - t0) * 1000 logger.info( "event=story_title_task_skip story_id={} reason=placeholder duration_ms={:.1f} " diff --git a/api/tests/test_batch_phase1_chunked.py b/api/tests/test_batch_phase1_chunked.py index de0af98..8bbc1cc 100644 --- a/api/tests/test_batch_phase1_chunked.py +++ b/api/tests/test_batch_phase1_chunked.py @@ -32,6 +32,8 @@ def test_run_batch_phase1_prep_chunked_splits_95_into_four_calls( segments: list, state: MemoirStateSchema, llm: object, + *, + language: str = "zh", ) -> dict[str, BatchPhase1SegmentRow]: chunk_lengths.append(len(segments)) return { @@ -66,6 +68,8 @@ def test_chunked_bisect_on_value_error(monkeypatch: pytest.MonkeyPatch) -> None: segments: list, state: MemoirStateSchema, llm: object, + *, + language: str = "zh", ) -> dict[str, BatchPhase1SegmentRow]: chunk_lengths.append(len(segments)) if len(segments) == 4: diff --git a/api/tests/test_conversation_list_fallback_title.py b/api/tests/test_conversation_list_fallback_title.py new file mode 100644 index 0000000..14d0075 --- /dev/null +++ b/api/tests/test_conversation_list_fallback_title.py @@ -0,0 +1,120 @@ +"""ConversationService.list_for_user 兜底标题随用户语言切换(zh→岁月知己 / en→Life Echo)。""" + +from __future__ import annotations + +import uuid +from datetime import datetime, timezone +from types import SimpleNamespace +from unittest.mock import AsyncMock, MagicMock + +import pytest + +# 预加载所有 feature 模型,触发 SQLAlchemy 关系解析 +from app.features.asset import models as _asset_models # noqa: F401 +from app.features.auth import models as _auth_models # noqa: F401 +from app.features.conversation import models as _conv_models # noqa: F401 +from app.features.memoir import models as _memoir_models # noqa: F401 +from app.features.memory import models as _memory_models # noqa: F401 +from app.features.payment import models as _payment_models # noqa: F401 +from app.features.story import models as _story_models # noqa: F401 +from app.features.user import models as _user_models # noqa: F401 + +from app.features.conversation import service as conv_service_mod +from app.features.conversation.service import ConversationService +from app.features.user.models import User + + +def _make_conv(user_id: str, summary: str | None) -> SimpleNamespace: + now = datetime.now(timezone.utc) + return SimpleNamespace( + id=str(uuid.uuid4()), + user_id=user_id, + summary=summary, + started_at=now, + last_message_at=now, + ) + + +def _build_user(language: str) -> User: + return User( + id=str(uuid.uuid4()), + phone=f"138{uuid.uuid4().int % 100_000_000:08d}", + password_hash="x", + nickname="t", + subscription_type="free", + created_at=datetime.now(timezone.utc), + language_preference=language, + ) + + +@pytest.mark.asyncio +async def test_list_for_user_zh_fallback_title(monkeypatch) -> None: + user = _build_user("zh") + convs = [_make_conv(user.id, summary=None), _make_conv(user.id, summary="夏日记忆")] + + db = MagicMock() + db.get = AsyncMock(return_value=user) + svc = ConversationService(db, MagicMock()) + + monkeypatch.setattr( + conv_service_mod.repo, + "get_user_conversations", + AsyncMock(return_value=convs), + ) + monkeypatch.setattr( + ConversationService, + "ensure_redis_history_from_db", + AsyncMock(return_value=[]), + ) + + rows = await svc.list_for_user(user.id) + assert rows[0]["title"] == "岁月知己" + assert rows[1]["title"] == "夏日记忆" + + +@pytest.mark.asyncio +async def test_list_for_user_en_fallback_title(monkeypatch) -> None: + user = _build_user("en") + convs = [_make_conv(user.id, summary=None)] + + db = MagicMock() + db.get = AsyncMock(return_value=user) + svc = ConversationService(db, MagicMock()) + + monkeypatch.setattr( + conv_service_mod.repo, + "get_user_conversations", + AsyncMock(return_value=convs), + ) + monkeypatch.setattr( + ConversationService, + "ensure_redis_history_from_db", + AsyncMock(return_value=[]), + ) + + rows = await svc.list_for_user(user.id) + assert rows[0]["title"] == "Life Echo" + + +@pytest.mark.asyncio +async def test_list_for_user_missing_user_falls_back_to_zh(monkeypatch) -> None: + """安全兜底:如果 DB 查不到 user 行(极端情况),用 zh 默认。""" + convs = [_make_conv("uid", summary=None)] + + db = MagicMock() + db.get = AsyncMock(return_value=None) + svc = ConversationService(db, MagicMock()) + + monkeypatch.setattr( + conv_service_mod.repo, + "get_user_conversations", + AsyncMock(return_value=convs), + ) + monkeypatch.setattr( + ConversationService, + "ensure_redis_history_from_db", + AsyncMock(return_value=[]), + ) + + rows = await svc.list_for_user("uid") + assert rows[0]["title"] == "岁月知己" diff --git a/api/tests/test_memoir_skip_story.py b/api/tests/test_memoir_skip_story.py index 3eed571..69c80eb 100644 --- a/api/tests/test_memoir_skip_story.py +++ b/api/tests/test_memoir_skip_story.py @@ -134,6 +134,7 @@ def test_prepare_batches_batch_llm_path_matches_per_segment_skip_logic( *, chunk_size: int = 24, on_chunk=None, + language: str = "zh", ) -> dict: return { "mix-1": BatchPhase1SegmentRow( diff --git a/api/tests/test_migration_language_preference_default.py b/api/tests/test_migration_language_preference_default.py new file mode 100644 index 0000000..b14e860 --- /dev/null +++ b/api/tests/test_migration_language_preference_default.py @@ -0,0 +1,34 @@ +"""验证 0018 迁移使用 server_default='zh' 落库(防止已有用户 NULL)。""" + +from __future__ import annotations + +from pathlib import Path + +from app.features.user.models import User + + +def test_migration_0018_uses_server_default_zh() -> None: + path = ( + Path(__file__).resolve().parent.parent + / "alembic" + / "versions" + / "0018_users_language_preference.py" + ) + src = path.read_text(encoding="utf-8") + assert "language_preference" in src + # server_default 'zh' 是已有用户回填的关键 + assert "server_default=sa.text(\"'zh'\")" in src + assert "nullable=False" in src + # 在多行 op.add_column 调用中第一参数为 "users" + assert "op.add_column" in src + assert '"users"' in src + + +def test_user_model_language_preference_default_zh() -> None: + """模型层默认值与迁移一致;新建实例不传值时为 'zh'。""" + column = User.__table__.c.language_preference + assert column.default is not None + assert column.default.arg == "zh" + assert column.server_default is not None + assert "zh" in str(column.server_default.arg) + assert column.nullable is False diff --git a/api/tests/test_mock_sms_login_http.py b/api/tests/test_mock_sms_login_http.py index 2da6cc3..b9917a7 100644 --- a/api/tests/test_mock_sms_login_http.py +++ b/api/tests/test_mock_sms_login_http.py @@ -85,6 +85,7 @@ async def test_mock_sms_login_enabled_returns_valid_access_jwt( svc.mock_sms_login.assert_awaited_once_with( phone="13800138000", nickname=None, + language=None, ) diff --git a/api/tests/test_pipeline_language_skip_tts.py b/api/tests/test_pipeline_language_skip_tts.py new file mode 100644 index 0000000..de32d60 --- /dev/null +++ b/api/tests/test_pipeline_language_skip_tts.py @@ -0,0 +1,200 @@ +"""WS pipeline 语言解析与 Tencent TTS 英文合成参数。""" + +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest + +from app.adapters.tts.tencent_tts import ( + MODEL_TYPE_LLM, + PRIMARY_LANGUAGE_EN, + PRIMARY_LANGUAGE_ZH, + TencentTTSProvider, +) +from app.features.conversation.ws.pipeline import _resolve_user_language + + +# ── pipeline._resolve_user_language ───────────────────────────────── + + +def test_resolve_user_language_zh_default_when_missing() -> None: + assert _resolve_user_language(None) == "zh" + assert _resolve_user_language(SimpleNamespace()) == "zh" + assert _resolve_user_language(SimpleNamespace(language_preference=None)) == "zh" + assert _resolve_user_language(SimpleNamespace(language_preference="zh")) == "zh" + + +def test_resolve_user_language_en_only_for_en_token() -> None: + assert _resolve_user_language(SimpleNamespace(language_preference="en")) == "en" + assert _resolve_user_language(SimpleNamespace(language_preference="EN")) == "en" + assert _resolve_user_language(SimpleNamespace(language_preference=" en ")) == "en" + + +def test_resolve_user_language_unknown_falls_back_to_zh() -> None: + assert _resolve_user_language(SimpleNamespace(language_preference="ja")) == "zh" + assert _resolve_user_language(SimpleNamespace(language_preference="")) == "zh" + + +# ── TencentTTSProvider 语言分支 ────────────────────────────────────── + + +@pytest.mark.asyncio +async def test_tencent_tts_zh_uses_primary_language_1_and_zh_voice() -> None: + provider = TencentTTSProvider( + secret_id="id", + secret_key="key", + voice_type=501004, + voice_type_en=501004, + ) + seen: dict = {} + + def fake_sync(text: str, voice_type: int, primary_language: int) -> bytes: + seen["text"] = text + seen["voice_type"] = voice_type + seen["primary_language"] = primary_language + return b"AUDIO" + + with patch.object(provider, "_synthesize_sync", side_effect=fake_sync): + out = await provider.synthesize("你好", language="zh") + + assert out == b"AUDIO" + assert seen["primary_language"] == PRIMARY_LANGUAGE_ZH + assert seen["voice_type"] == 501004 + + +@pytest.mark.asyncio +async def test_tencent_tts_en_uses_primary_language_2_and_en_voice() -> None: + provider = TencentTTSProvider( + secret_id="id", + secret_key="key", + voice_type=501004, + voice_type_en=501004, + ) + seen: dict = {} + + def fake_sync(text: str, voice_type: int, primary_language: int) -> bytes: + seen["text"] = text + seen["voice_type"] = voice_type + seen["primary_language"] = primary_language + return b"AUDIO_EN" + + with patch.object(provider, "_synthesize_sync", side_effect=fake_sync): + out = await provider.synthesize("Hello there.", language="en") + + assert out == b"AUDIO_EN" + assert seen["primary_language"] == PRIMARY_LANGUAGE_EN + assert seen["voice_type"] == 501004 + + +@pytest.mark.asyncio +async def test_tencent_tts_en_uses_relaxed_chunk_size() -> None: + """English text up to ~480 letters fits in a single chunk; zh path would split it.""" + provider = TencentTTSProvider( + secret_id="id", + secret_key="key", + voice_type=501004, + voice_type_en=501004, + ) + en_chunks: list[int] = [] + zh_chunks: list[int] = [] + + def fake_en(text: str, voice_type: int, primary_language: int) -> bytes: + en_chunks.append(len(text)) + return b"X" + + def fake_zh(text: str, voice_type: int, primary_language: int) -> bytes: + zh_chunks.append(len(text)) + return b"X" + + text_400 = ("Word " * 80).strip() # 399 chars, no sentence terminators + with patch.object(provider, "_synthesize_sync", side_effect=fake_en): + await provider.synthesize(text_400, language="en") + with patch.object(provider, "_synthesize_sync", side_effect=fake_zh): + await provider.synthesize(text_400, language="zh") + + # English allows the 400-char text in a single request; Chinese path must split + assert en_chunks == [len(text_400)] + assert len(zh_chunks) > 1 + + +@pytest.mark.asyncio +async def test_tencent_tts_returns_empty_when_credentials_missing() -> None: + provider = TencentTTSProvider(secret_id="", secret_key="") + out = await provider.synthesize("Hello", language="en") + assert out == b"" + + +@pytest.mark.asyncio +async def test_tencent_tts_voice_type_en_falls_back_to_english_voice_when_unset() -> None: + """缺省 voice_type_en 时回落到 501004(月华,大模型音色,原生中英混合)。""" + provider = TencentTTSProvider(secret_id="id", secret_key="key") + seen: dict = {} + + def fake_sync(text: str, voice_type: int, primary_language: int) -> bytes: + seen["voice_type"] = voice_type + return b"X" + + with patch.object(provider, "_synthesize_sync", side_effect=fake_sync): + await provider.synthesize("Hi", language="en") + + assert seen["voice_type"] == 501004 + # 显式断言不是中文老精品音色(防止回归):禁止回落到 1001 / 1002 等 + assert seen["voice_type"] not in (1001, 1002) + + +# ── 关键回归:_synthesize_sync 必须在请求中设置 ModelType=1(大模型音色路由所需) ── + + +@pytest.mark.asyncio +async def test_tencent_tts_synthesize_sync_sets_model_type_1() -> None: + """501004 月华属于大模型音色,TextToVoice 必须显式带 ModelType=1,否则会被旧模型 + 拒绝并静默返回空音频。这里 mock SDK client 捕获 req.ModelType 防止回归。""" + import base64 as _b64 + + provider = TencentTTSProvider( + secret_id="id", + secret_key="key", + voice_type=501004, + voice_type_en=501004, + ) + + captured: dict = {} + + def _fake_text_to_voice(req): + captured["VoiceType"] = req.VoiceType + captured["PrimaryLanguage"] = req.PrimaryLanguage + captured["ModelType"] = req.ModelType + captured["Codec"] = req.Codec + captured["SampleRate"] = req.SampleRate + captured["Text"] = req.Text + fake_resp = MagicMock() + fake_resp.Audio = _b64.b64encode(b"AUDIO").decode("ascii") + fake_resp.RequestId = "req-test" + return fake_resp + + fake_client = MagicMock() + fake_client.TextToVoice.side_effect = _fake_text_to_voice + + with patch.object(provider, "_get_client", return_value=fake_client): + out = await provider.synthesize("你好", language="zh") + + assert out == b"AUDIO" + assert captured["ModelType"] == MODEL_TYPE_LLM == 1 + assert captured["VoiceType"] == 501004 + assert captured["PrimaryLanguage"] == PRIMARY_LANGUAGE_ZH + assert captured["Text"] == "你好" + + +# ── port 兼容性:OpenAI 实现接受 language kwarg ────────────────────── + + +@pytest.mark.asyncio +async def test_openai_tts_accepts_language_kwarg() -> None: + """端口签名兼容:OpenAI adapter 必须接受 language(即使不使用)。""" + from app.adapters.tts.openai_tts import OpenAITTSProvider + + provider = OpenAITTSProvider(api_key="") # No client → returns b"" + assert await provider.synthesize("hi", language="en") == b"" + assert await provider.synthesize("你好", language="zh") == b"" diff --git a/api/tests/test_pipeline_tts_cancel_emits_all_segments.py b/api/tests/test_pipeline_tts_cancel_emits_all_segments.py new file mode 100644 index 0000000..ca480e7 --- /dev/null +++ b/api/tests/test_pipeline_tts_cancel_emits_all_segments.py @@ -0,0 +1,331 @@ +"""WS pipeline 回归:TTS 取消不能丢段,AGENT_RESPONSE 必须为每段下发。 + +- 历史 bug:客户端在多段回复中途发送 ``tts_cancel`` 时, + ``process_user_message`` 在 TTS 分支 ``break``,导致剩余段的 ``agent_response`` + 被静默丢弃;FE 失去后续文本气泡,并可能停留在 "正在回复…" 状态。 +- 期望:取消仅影响后续 TTS 合成,AGENT_RESPONSE 必须为每段完整下发。 +- 同时校验:``responses`` 为空时下发 ERROR;异常路径下发 ERROR。 +""" + +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from app.features.conversation.chat_turn import ChatTurnDecision, ChatTurnResult +from app.features.conversation.history_store import HumanAiTurnIds +from app.features.conversation.ws import pipeline as ws_pipeline +from app.features.conversation.ws.message_types import MessageType + + +class _FakeResult: + def __init__(self) -> None: + self.rowcount = 0 + + +class _FakeDb: + """足以驱动 process_user_message 的最小 AsyncSession 替身。""" + + def __init__(self) -> None: + self.execute = AsyncMock(return_value=_FakeResult()) + self.commit = AsyncMock(return_value=None) + self.rollback = AsyncMock(return_value=None) + + +def _make_segment(*, segment_id: str = "seg-1") -> MagicMock: + """构造与 pipeline 字段访问对齐的 Segment 替身(不真正落库)。""" + seg = MagicMock() + seg.id = segment_id + seg.audio_url = None + seg.audio_duration_seconds = None + seg.created_at = None + return seg + + +def _make_conversation(*, conversation_id: str) -> MagicMock: + conv = MagicMock() + conv.id = conversation_id + conv.last_message_at = None + return conv + + +def _make_user(*, user_id: str = "user-1", language: str = "zh") -> SimpleNamespace: + return SimpleNamespace(id=user_id, language_preference=language) + + +def _ids_for(conversation_id: str) -> HumanAiTurnIds: + return HumanAiTurnIds( + human_message_id=f"{conversation_id}-human", + assistant_message_id=f"{conversation_id}-ai", + ) + + +def _patch_common(monkeypatch: pytest.MonkeyPatch) -> tuple[list[dict], MagicMock]: + """统一 mock 持久化层与段间 sleep;返回 manager.send_message 的捕获列表。""" + sent_messages: list[dict] = [] + + async def _capture_send(_conv_id: str, message: dict) -> None: + sent_messages.append(message) + + monkeypatch.setattr(ws_pipeline.manager, "send_message", _capture_send) + monkeypatch.setattr(ws_pipeline.manager, "active_connections", {}) + + fake_store = MagicMock() + fake_store.record_human_ai_turn = AsyncMock() + fake_store.attach_ai_tts_audio_urls = AsyncMock(return_value=None) + monkeypatch.setattr( + ws_pipeline, "ConversationHistoryStore", lambda _db: fake_store + ) + + # 段间 sleep 在测试里不需要真等;保留 await 语义 + async def _no_sleep(_seconds: float) -> None: # pragma: no cover - trivial + return None + + monkeypatch.setattr(ws_pipeline.asyncio, "sleep", _no_sleep) + + return sent_messages, fake_store + + +@pytest.mark.asyncio +async def test_tts_cancel_mid_flight_still_emits_all_agent_response_segments( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """关键回归:i=0 完成 TTS 后客户端取消,i=1/i=2 的 AGENT_RESPONSE 必须仍下发。""" + sent_messages, fake_store = _patch_common(monkeypatch) + conversation_id = "conv-cancel-mid" + ws_pipeline.manager.active_connections[conversation_id] = object() + fake_store.record_human_ai_turn.return_value = _ids_for(conversation_id) + + turn_result = ChatTurnResult( + messages=["第一段", "第二段", "第三段"], + skip_tts=False, + decision=ChatTurnDecision(), + ) + monkeypatch.setattr( + ws_pipeline.chat_turn_service, + "process_turn", + AsyncMock(return_value=turn_result), + ) + + monkeypatch.setattr(ws_pipeline.settings, "enable_tts", True) + + tts_calls: list[int] = [] + + async def _fake_send_tts_audio( + _conv_id: str, + _text: str, + *, + chunk_index: int, + chunk_total: int, # noqa: ARG001 + assistant_message_id: str | None, # noqa: ARG001 + tts_epoch_start: int, # noqa: ARG001 + manual: bool = False, # noqa: ARG001 + language: str = "zh", # noqa: ARG001 + ) -> str | None: + tts_calls.append(chunk_index) + # 第 0 段合成完成后客户端按取消,第 1 段进入循环时已经是新 epoch + if chunk_index == 0: + ws_pipeline.bump_tts_cancel_epoch(_conv_id) + return f"https://cos/{_conv_id}/seg-0.mp3" + return None + + monkeypatch.setattr(ws_pipeline, "_send_tts_audio", _fake_send_tts_audio) + + db = _FakeDb() + await ws_pipeline.process_user_message( + conversation_id=conversation_id, + user_message="说说你小时候", + conversation=_make_conversation(conversation_id=conversation_id), + segment=_make_segment(), + db=db, + user=_make_user(), + tts_this_turn=True, + ) + + agent_responses = [ + m for m in sent_messages if m["type"] == MessageType.AGENT_RESPONSE + ] + assert [m["data"]["text"] for m in agent_responses] == [ + "第一段", + "第二段", + "第三段", + ], "TTS 取消后剩余段的 AGENT_RESPONSE 必须仍然下发" + assert [m["data"]["index"] for m in agent_responses] == [0, 1, 2] + assert all(m["data"]["total"] == 3 for m in agent_responses) + # i=0 已合成;取消后不应再触发 i=1 / i=2 的 TTS 合成 + assert tts_calls == [0] + + +@pytest.mark.asyncio +async def test_tts_cancel_before_any_segment_still_emits_agent_response( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """边界:TTS 在第 0 段刚开始就被取消,第 0 段的 AGENT_RESPONSE 仍必须下发。""" + sent_messages, fake_store = _patch_common(monkeypatch) + conversation_id = "conv-cancel-pre" + ws_pipeline.manager.active_connections[conversation_id] = object() + fake_store.record_human_ai_turn.return_value = _ids_for(conversation_id) + + turn_result = ChatTurnResult( + messages=["唯一段"], + skip_tts=False, + decision=ChatTurnDecision(), + ) + monkeypatch.setattr( + ws_pipeline.chat_turn_service, + "process_turn", + AsyncMock(return_value=turn_result), + ) + + monkeypatch.setattr(ws_pipeline.settings, "enable_tts", True) + + async def _fake_tts_then_cancel( + _conv_id: str, + _text: str, + *, + chunk_index: int, # noqa: ARG001 + chunk_total: int, # noqa: ARG001 + assistant_message_id: str | None, # noqa: ARG001 + tts_epoch_start: int, # noqa: ARG001 + manual: bool = False, # noqa: ARG001 + language: str = "zh", # noqa: ARG001 + ) -> str | None: + ws_pipeline.bump_tts_cancel_epoch(_conv_id) + return None + + monkeypatch.setattr(ws_pipeline, "_send_tts_audio", _fake_tts_then_cancel) + + db = _FakeDb() + await ws_pipeline.process_user_message( + conversation_id=conversation_id, + user_message="嗯", + conversation=_make_conversation(conversation_id=conversation_id), + segment=_make_segment(), + db=db, + user=_make_user(), + tts_this_turn=True, + ) + + agent_responses = [ + m for m in sent_messages if m["type"] == MessageType.AGENT_RESPONSE + ] + assert len(agent_responses) == 1 + assert agent_responses[0]["data"]["text"] == "唯一段" + assert agent_responses[0]["data"]["index"] == 0 + assert agent_responses[0]["data"]["total"] == 1 + + +@pytest.mark.asyncio +async def test_empty_responses_emits_terminal_error( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """无回复时必须下发 ERROR,否则 FE 会卡在 "正在回复…"。""" + sent_messages, fake_store = _patch_common(monkeypatch) + conversation_id = "conv-empty" + ws_pipeline.manager.active_connections[conversation_id] = object() + fake_store.record_human_ai_turn.return_value = None + + monkeypatch.setattr( + ws_pipeline.chat_turn_service, + "process_turn", + AsyncMock( + return_value=ChatTurnResult( + messages=[], + skip_tts=False, + decision=ChatTurnDecision(), + ) + ), + ) + + db = _FakeDb() + await ws_pipeline.process_user_message( + conversation_id=conversation_id, + user_message="x", + conversation=_make_conversation(conversation_id=conversation_id), + segment=_make_segment(), + db=db, + user=_make_user(), + tts_this_turn=False, + ) + + error_messages = [m for m in sent_messages if m["type"] == MessageType.ERROR] + assert len(error_messages) == 1 + + +@pytest.mark.asyncio +async def test_process_turn_exception_emits_terminal_error( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """LLM/编排抛错时必须下发 ERROR,否则 FE 会卡在 "正在回复…"。""" + sent_messages, _ = _patch_common(monkeypatch) + conversation_id = "conv-boom" + ws_pipeline.manager.active_connections[conversation_id] = object() + + async def _boom(*_args, **_kwargs): + raise RuntimeError("upstream blew up") + + monkeypatch.setattr(ws_pipeline.chat_turn_service, "process_turn", _boom) + + db = _FakeDb() + await ws_pipeline.process_user_message( + conversation_id=conversation_id, + user_message="y", + conversation=_make_conversation(conversation_id=conversation_id), + segment=_make_segment(), + db=db, + user=_make_user(), + tts_this_turn=False, + ) + + error_messages = [m for m in sent_messages if m["type"] == MessageType.ERROR] + assert len(error_messages) == 1 + + +@pytest.mark.asyncio +async def test_tts_disabled_emits_all_segments_without_tts_calls( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """未开启本轮 TTS 时不调用合成,但每段 AGENT_RESPONSE 仍必须下发。""" + sent_messages, fake_store = _patch_common(monkeypatch) + conversation_id = "conv-text-only" + ws_pipeline.manager.active_connections[conversation_id] = object() + fake_store.record_human_ai_turn.return_value = _ids_for(conversation_id) + + monkeypatch.setattr( + ws_pipeline.chat_turn_service, + "process_turn", + AsyncMock( + return_value=ChatTurnResult( + messages=["A", "B"], + skip_tts=False, + decision=ChatTurnDecision(), + ) + ), + ) + + tts_calls: list[int] = [] + + async def _should_not_be_called(*_args, **_kwargs): + tts_calls.append(1) + return None + + monkeypatch.setattr(ws_pipeline, "_send_tts_audio", _should_not_be_called) + + db = _FakeDb() + await ws_pipeline.process_user_message( + conversation_id=conversation_id, + user_message="hi", + conversation=_make_conversation(conversation_id=conversation_id), + segment=_make_segment(), + db=db, + user=_make_user(), + tts_this_turn=False, + ) + + agent_responses = [ + m for m in sent_messages if m["type"] == MessageType.AGENT_RESPONSE + ] + assert [m["data"]["text"] for m in agent_responses] == ["A", "B"] + assert tts_calls == [] diff --git a/api/tests/test_prompt_language_branching.py b/api/tests/test_prompt_language_branching.py new file mode 100644 index 0000000..0d3a842 --- /dev/null +++ b/api/tests/test_prompt_language_branching.py @@ -0,0 +1,207 @@ +"""Prompt builders 在 language='en' 时返回纯 ASCII;language='zh' 时含中文(防回归)。""" + +from __future__ import annotations + +from app.agents.chat.output_rules import ( + chat_output_rules, + chat_output_rules_en, + chat_voice_style, + chat_voice_style_en, +) +from app.agents.chat.personas import AGENT_NAME_EN, AGENT_NAME_ZH, agent_name +from app.agents.chat.prompts_conversation import ( + get_guided_conversation_prompt, + get_opening_prompt, +) +from app.agents.chat.prompts_profile import ( + get_profile_followup_prompt, + get_profile_greeting_prompt, +) +from app.agents.memoir.prompts import ( + get_creative_title_json_prompt, + get_memoir_fidelity_facts_only_prompt, + get_memoir_fidelity_system_prompt, + get_narrative_json_prompt, +) +from app.agents.stage_constants import ( + chapter_category_display, + stage_display_name, +) + + +def _has_cjk(s: str) -> bool: + return any("\u4e00" <= ch <= "\u9fff" for ch in s) + + +# ── chat output rules ──────────────────────────────────────────────── + + +def test_chat_output_rules_en_has_no_cjk() -> None: + txt = chat_output_rules_en() + assert txt.strip() + assert not _has_cjk(txt) + + +def test_chat_voice_style_en_has_no_cjk() -> None: + txt = chat_voice_style_en() + assert txt.strip() + assert not _has_cjk(txt) + + +def test_chat_output_rules_zh_unchanged_has_cjk() -> None: + assert _has_cjk(chat_output_rules()) + + +def test_chat_voice_style_zh_unchanged_has_cjk() -> None: + assert _has_cjk(chat_voice_style()) + + +# ── memoir narrative ──────────────────────────────────────────────── + + +def test_memoir_fidelity_facts_only_en_branch_has_no_cjk() -> None: + assert not _has_cjk(get_memoir_fidelity_facts_only_prompt(language="en")) + + +def test_memoir_fidelity_facts_only_zh_default_has_cjk() -> None: + assert _has_cjk(get_memoir_fidelity_facts_only_prompt()) + + +def test_memoir_fidelity_system_en_branch_has_no_cjk() -> None: + assert not _has_cjk(get_memoir_fidelity_system_prompt(language="en")) + + +def test_get_narrative_json_prompt_en_has_no_cjk() -> None: + out = get_narrative_json_prompt( + stage="childhood", + slots={"place": "the village"}, + new_content="[User's oral memory this turn]\nI grew up by the river.", + language="en", + ) + assert not _has_cjk(out) + assert "paragraphs" in out + + +def test_get_narrative_json_prompt_zh_default_has_cjk() -> None: + out = get_narrative_json_prompt( + stage="childhood", + slots={"place": "村里"}, + new_content="【本段用户口述】\n我小时候住在河边。", + ) + assert _has_cjk(out) + + +# ── memoir title ──────────────────────────────────────────────────── + + +def test_creative_title_json_prompt_en_has_no_cjk() -> None: + out = get_creative_title_json_prompt( + stage="childhood", + emotion="warm", + slots={"place": "the river"}, + language="en", + ) + assert not _has_cjk(out) + assert "title" in out + + +def test_creative_title_json_prompt_zh_default_has_cjk() -> None: + out = get_creative_title_json_prompt( + stage="childhood", + emotion="warm", + slots={"place": "河边"}, + ) + assert _has_cjk(out) + + +# ── stage / category display helpers ──────────────────────────────── + + +def test_stage_display_name_branches() -> None: + assert stage_display_name("childhood", language="en") == "Childhood" + assert stage_display_name("childhood", language="zh") == "童年时光" + # unknown stages pass through + assert stage_display_name("unknown", language="en") == "unknown" + + +def test_chapter_category_display_branches() -> None: + assert chapter_category_display("childhood", language="en") == "Childhood & Early Years" + assert chapter_category_display("childhood", language="zh") == "童年与成长背景" + + +# ── agent brand name (interviewer identity) ───────────────────────── + + +def test_agent_name_constants_aligned_with_brand() -> None: + """中英品牌名是单一来源,prompt / UI / 兜底标题统一引用此处。""" + assert AGENT_NAME_ZH == "岁月知己" + assert AGENT_NAME_EN == "Life Echo" + + +def test_agent_name_helper_handles_inputs() -> None: + assert agent_name("zh") == "岁月知己" + assert agent_name("en") == "Life Echo" + assert agent_name("EN") == "Life Echo" + assert agent_name(" en ") == "Life Echo" + assert agent_name(None) == "岁月知己" # type: ignore[arg-type] + assert agent_name("ja") == "岁月知己" + + +def test_profile_greeting_prompt_introduces_life_echo_in_en() -> None: + en = get_profile_greeting_prompt( + ["birth_year", "occupation"], nickname="Sam", language="en" + ) + assert "Life Echo" in en + assert "岁月知己" not in en + + +def test_profile_greeting_prompt_keeps_chinese_brand_in_zh() -> None: + zh = get_profile_greeting_prompt(["birth_year"], nickname="老王") + assert "岁月知己" in zh + assert "Life Echo" not in zh + + +def test_profile_followup_prompt_introduces_life_echo_in_en() -> None: + en = get_profile_followup_prompt( + missing_fields=["occupation"], + filled_fields={"birth_year": "1990"}, + language="en", + ) + assert "Life Echo" in en + assert "岁月知己" not in en + + +def test_profile_followup_prompt_full_basics_branch_introduces_life_echo() -> None: + en = get_profile_followup_prompt( + missing_fields=[], + filled_fields={ + "birth_year": "1990", + "birth_place": "Boston", + "grew_up_place": "Boston", + "occupation": "engineer", + }, + language="en", + ) + assert "Life Echo" in en + assert "岁月知己" not in en + + +def test_opening_prompt_introduces_life_echo_in_en() -> None: + out = get_opening_prompt( + current_stage="childhood", + empty_slots_readable=["place", "people"], + language="en", + ) + assert "Life Echo" in out + assert "岁月知己" not in out + + +def test_guided_conversation_prompt_introduces_life_echo_in_en() -> None: + out = get_guided_conversation_prompt( + current_stage="childhood", + empty_slots=["place"], + filled_slots={}, + language="en", + ) + assert "Life Echo" in out + assert "岁月知己" not in out diff --git a/api/tests/test_register_persists_language.py b/api/tests/test_register_persists_language.py new file mode 100644 index 0000000..fc60cae --- /dev/null +++ b/api/tests/test_register_persists_language.py @@ -0,0 +1,87 @@ +"""注册接口将 device language 透传到 AuthService(仅创建路径)。""" + +from __future__ import annotations + +from unittest.mock import AsyncMock, MagicMock + +import pytest +from fastapi import FastAPI +from httpx import ASGITransport, AsyncClient + +from app.features.auth.deps import get_auth_service +from app.features.auth.router import router as auth_router +from app.features.auth.service import AuthService + + +def _build_app() -> tuple[FastAPI, MagicMock]: + app = FastAPI() + app.include_router(auth_router) + mock_service = MagicMock(spec=AuthService) + mock_service.register = AsyncMock( + return_value={"access_token": "a", "refresh_token": "r"} + ) + mock_service.register_with_sms = AsyncMock( + return_value={"access_token": "a", "refresh_token": "r"} + ) + app.dependency_overrides[get_auth_service] = lambda: mock_service + app.state._mock_auth_service = mock_service + return app, mock_service + + +@pytest.mark.asyncio +async def test_register_passes_language_en_to_service(unique_phone: str) -> None: + app, svc = _build_app() + transport = ASGITransport(app=app) + async with AsyncClient(transport=transport, base_url="http://test") as ac: + r = await ac.post( + "/api/auth/register", + json={ + "phone": unique_phone, + "password": "secret12", + "nickname": "T", + "agreed_to_terms": True, + "language": "en", + }, + ) + assert r.status_code == 201 + call = svc.register.await_args + assert call.kwargs.get("language") == "en" + + +@pytest.mark.asyncio +async def test_register_without_language_passes_none(unique_phone: str) -> None: + """缺省 language 时透传 None;service 内部默认 zh。""" + app, svc = _build_app() + transport = ASGITransport(app=app) + async with AsyncClient(transport=transport, base_url="http://test") as ac: + r = await ac.post( + "/api/auth/register", + json={ + "phone": unique_phone, + "password": "secret12", + "nickname": "T", + "agreed_to_terms": True, + }, + ) + assert r.status_code == 201 + assert svc.register.await_args.kwargs.get("language") is None + + +@pytest.mark.asyncio +async def test_sms_register_passes_language(unique_phone: str) -> None: + app, svc = _build_app() + transport = ASGITransport(app=app) + async with AsyncClient(transport=transport, base_url="http://test") as ac: + r = await ac.post( + "/api/auth/register/sms", + json={ + "phone": unique_phone, + "code": "123456", + "password": "secret12", + "nickname": "T", + "agreed_to_terms": True, + "language": "en", + }, + ) + assert r.status_code == 201 + assert svc.register_with_sms.await_args.kwargs.get("language") == "en" diff --git a/api/tests/test_reply_segments.py b/api/tests/test_reply_segments.py index 410a92c..ff9d2a9 100644 --- a/api/tests/test_reply_segments.py +++ b/api/tests/test_reply_segments.py @@ -71,3 +71,64 @@ def test_strip_leading_en_period_ack(): def test_segments_strip_leading_en_ack(): assert segments_from_llm_response("嗯。只有一句", max_segments=3) == ["只有一句"] assert segments_from_llm_response("嗯。A[SPLIT]嗯。B", max_segments=3) == ["A", "B"] + + +# ── 与客户端 MESSAGE_SPLIT_REGEX 对齐的容错拆段 ─────────────── +# 防回归:避免后端只发 1 条 AGENT_RESPONSE,文本里残留 `[ SPLIT ]` / `[split]` 等字面量, +# 导致前端用容错正则拆出空尾段后渲染出「假装在回复」的空气泡。 + + +def test_split_marker_with_inner_whitespace(): + """LLM 偶尔会写 `[ SPLIT ]` 带空格,后端必须按分隔符拆,不能留在文本里。""" + assert segments_from_llm_response("第一段[ SPLIT ]第二段", max_segments=3) == [ + "第一段", + "第二段", + ] + assert segments_from_llm_response("a [SPLIT ] b", max_segments=3) == ["a", "b"] + assert segments_from_llm_response("a [ SPLIT] b", max_segments=3) == ["a", "b"] + + +def test_split_marker_case_insensitive(): + """与客户端正则 `/i` 对齐:`[split]` / `[Split]` 同样视为分隔符。""" + assert segments_from_llm_response("a[split]b", max_segments=3) == ["a", "b"] + assert segments_from_llm_response("a[Split]b", max_segments=3) == ["a", "b"] + assert segments_from_llm_response("a[SpLiT]b", max_segments=3) == ["a", "b"] + + +def test_split_marker_fullwidth_brackets(): + """模型在中文环境下偶尔输出全角括号 【SPLIT】 / [SPLIT],应正常拆段。""" + assert segments_from_llm_response("第一段【SPLIT】第二段", max_segments=3) == [ + "第一段", + "第二段", + ] + assert segments_from_llm_response("第一段[SPLIT]第二段", max_segments=3) == [ + "第一段", + "第二段", + ] + + +def test_split_marker_with_zero_width_chars(): + """LLM 偶尔会在分隔符前后插入 ZWSP/ZWNJ/ZWJ/BOM,应先归一化再拆段。""" + assert segments_from_llm_response( + "第一段\u200b[SPLIT]\u200c第二段", max_segments=3 + ) == ["第一段", "第二段"] + assert segments_from_llm_response( + "first\ufeff[ SPLIT ]\u200dsecond", max_segments=3 + ) == ["first", "second"] + + +def test_split_marker_trailing_only_returns_single_segment(): + """`[SPLIT]` 出现在结尾时只剩一段非空内容,不应留下空尾段污染前端拆段。""" + assert segments_from_llm_response("hello[SPLIT]", max_segments=3) == ["hello"] + assert segments_from_llm_response("hello [ SPLIT ]", max_segments=3) == ["hello"] + assert segments_from_llm_response("hello【SPLIT】", max_segments=3) == ["hello"] + + +def test_split_marker_combined_variants(): + """混合大小写 + 全角 + 空格:与客户端规范化一致即可正常拆段。""" + assert segments_from_llm_response("a【 split 】b", max_segments=3) == ["a", "b"] + assert segments_from_llm_response("a[ SPLIT ]b[Split]c", max_segments=3) == [ + "a", + "b", + "c", + ] diff --git a/api/tests/test_sms_login_new_user_persists_language.py b/api/tests/test_sms_login_new_user_persists_language.py new file mode 100644 index 0000000..b6da060 --- /dev/null +++ b/api/tests/test_sms_login_new_user_persists_language.py @@ -0,0 +1,93 @@ +"""SMS login (auto-register) 写入 language_preference;命中已有用户不覆盖。""" + +from __future__ import annotations + +import uuid +from datetime import datetime, timezone +from unittest.mock import AsyncMock, MagicMock + +import pytest + +# 预加载所有 feature 模型,触发 SQLAlchemy 关系解析(Order / Subscription 等) +from app.features.asset import models as _asset_models # noqa: F401 +from app.features.auth import models as _auth_models # noqa: F401 +from app.features.conversation import models as _conv_models # noqa: F401 +from app.features.memoir import models as _memoir_models # noqa: F401 +from app.features.memory import models as _memory_models # noqa: F401 +from app.features.payment import models as _payment_models # noqa: F401 +from app.features.story import models as _story_models # noqa: F401 +from app.features.user import models as _user_models # noqa: F401 + +from app.features.auth import service as auth_service_mod +from app.features.auth.service import AuthService +from app.features.user.models import User + + +def _make_service() -> AuthService: + db = MagicMock() + db.commit = AsyncMock(return_value=None) + db.refresh = AsyncMock(return_value=None) + sms = MagicMock() + return AuthService(db=db, sms=sms) + + +@pytest.mark.asyncio +async def test_sms_login_new_user_writes_language(monkeypatch) -> None: + svc = _make_service() + + captured: dict = {} + + async def fake_get_by_phone(phone, db): + return None + + async def fake_create_user(user, db): + captured["language"] = user.language_preference + captured["phone"] = user.phone + + async def fake_issue_tokens(user_id, device_info: str = ""): + return {"access_token": "a", "refresh_token": "r"} + + monkeypatch.setattr(auth_service_mod.repo, "get_user_by_phone", fake_get_by_phone) + monkeypatch.setattr(auth_service_mod.repo, "create_user", fake_create_user) + monkeypatch.setattr(svc, "_issue_tokens", fake_issue_tokens) + + out = await svc._sms_login_after_code_verified( + "13800138000", nickname="t", language="en" + ) + assert out["is_new_user"] is True + assert captured["language"] == "en" + + +@pytest.mark.asyncio +async def test_sms_login_existing_user_keeps_existing_language(monkeypatch) -> None: + svc = _make_service() + + existing = User( + id=str(uuid.uuid4()), + phone="13800138000", + password_hash="x", + nickname="老用户", + subscription_type="free", + created_at=datetime.now(timezone.utc), + language_preference="zh", + ) + + async def fake_get_by_phone(phone, db): + return existing + + async def fake_create_user(user, db): # pragma: no cover — must not be called + raise AssertionError("existing-user branch must not call create_user") + + async def fake_issue_tokens(user_id, device_info: str = ""): + return {"access_token": "a", "refresh_token": "r"} + + monkeypatch.setattr(auth_service_mod.repo, "get_user_by_phone", fake_get_by_phone) + monkeypatch.setattr(auth_service_mod.repo, "create_user", fake_create_user) + monkeypatch.setattr(svc, "_issue_tokens", fake_issue_tokens) + + out = await svc._sms_login_after_code_verified( + "13800138000", language="en" + ) + assert out["is_new_user"] is False + # 命中老用户分支后不覆盖偏好 + assert existing.language_preference == "zh" diff --git a/api/tests/test_ws_router_tts_this_turn_passthrough.py b/api/tests/test_ws_router_tts_this_turn_passthrough.py new file mode 100644 index 0000000..e471c83 --- /dev/null +++ b/api/tests/test_ws_router_tts_this_turn_passthrough.py @@ -0,0 +1,165 @@ +"""路由器 → pipeline 的 ``tts_this_turn`` 字段传递契约测试。 + +回归保护:FE(``app-expo/src/core/ws/client.ts`` 与 +``app-expo/src/features/conversation/realtime-session.ts``)一律以蛇形 ``tts_this_turn`` +作为 WebSocket payload 的 ``data`` 字段名;后端 ``ws/router.py`` 三个对话入口分支也按 +``data.get("tts_this_turn")`` 取值并传递给 pipeline。 + +这里不依赖完整 WebSocket runtime,只在两处验证契约: + +1. **数据形状契约**:用 FE 实际发送的 payload 形状(snake_case key)调用 + ``data.get("tts_this_turn")``,断言能取到 True;同时驼峰 ``ttsThisTurn`` 应取不到, + 防止后续有人把后端 key 改成驼峰。 +2. **wrapper 透传契约**:直接驱动 ``process_persisted_user_segment_response`` + (路由 TEXT / AUDIO_MESSAGE 分支唯一会调用的 pipeline 包装),mock 内部 DB + + ``process_user_message``,断言 ``tts_this_turn=True`` 被原样传到 ``process_user_message``。 +""" + +from __future__ import annotations + +from contextlib import asynccontextmanager +from types import SimpleNamespace +from unittest.mock import AsyncMock + +import pytest + +from app.features.conversation.ws import pipeline as ws_pipeline + + +# ── 数据形状契约 ──────────────────────────────────────────────── + + +def test_router_text_payload_extracts_tts_this_turn_snake_case() -> None: + """FE 发送 ``{type:'text', data:{text, tts_this_turn:true}}``,后端按蛇形读取。""" + fe_payload_data = {"text": "hi", "tts_this_turn": True} + assert bool(fe_payload_data.get("tts_this_turn")) is True + + # 缺省 / 显式 false:均应取到 False + assert bool({"text": "hi"}.get("tts_this_turn")) is False + assert bool({"text": "hi", "tts_this_turn": False}.get("tts_this_turn")) is False + + # 驼峰回归保护:FE 历史曾误传过驼峰,蛇形 get 必须取不到,避免静默吞掉 + assert bool({"text": "hi", "ttsThisTurn": True}.get("tts_this_turn")) is False + + +def test_router_audio_segment_payload_extracts_tts_this_turn_snake_case() -> None: + """FE ``sendAudioSegment`` 也按 ``data.tts_this_turn`` 输出,蛇形保持一致。""" + fe_payload_data = { + "audio_base64": "AAA", + "segment_index": 0, + "voice_session_id": "vs-1", + "is_last": True, + "duration": 3, + "tts_this_turn": True, + } + assert bool(fe_payload_data.get("tts_this_turn")) is True + + +# ── wrapper 透传契约 ──────────────────────────────────────────── + + +@pytest.mark.asyncio +async def test_process_persisted_user_segment_response_passes_tts_this_turn( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """路由 TEXT 分支 → ``process_persisted_user_segment_response`` → ``process_user_message``。""" + captured: dict = {} + + async def _spy_process_user_message(*args, **kwargs) -> None: + captured.update(kwargs) + # 兼容位置参数(实际调用是关键字调用,这里只为防御性记录) + captured["_args"] = args + + monkeypatch.setattr(ws_pipeline, "process_user_message", _spy_process_user_message) + + conversation = SimpleNamespace( + id="conv-1", + user_id="user-1", + deleted_at=None, + last_message_at=None, + ) + user = SimpleNamespace(id="user-1", language_preference="zh") + segment = SimpleNamespace( + id="seg-1", + conversation_id="conv-1", + user_input_text="说说童年", + created_at=None, + ) + + fake_db = SimpleNamespace( + get=AsyncMock( + side_effect=lambda model, oid: { + "conv-1": conversation, + "user-1": user, + "seg-1": segment, + }.get(oid) + ) + ) + + @asynccontextmanager + async def _fake_session_local(): + yield fake_db + + monkeypatch.setattr(ws_pipeline, "AsyncSessionLocal", _fake_session_local) + + await ws_pipeline.process_persisted_user_segment_response( + conversation_id="conv-1", + user_id="user-1", + segment_id="seg-1", + tts_this_turn=True, + ) + + assert captured.get("tts_this_turn") is True, ( + "router TEXT/AUDIO_MESSAGE 分支必须把 tts_this_turn=True 透传到 process_user_message" + ) + assert captured.get("conversation_id") == "conv-1" + assert captured.get("user_message") == "说说童年" + + +@pytest.mark.asyncio +async def test_process_persisted_user_segment_response_default_tts_this_turn_false( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """缺省(未开本轮朗读)必须以 False 透传,不能被默默改成 True。""" + captured: dict = {} + + async def _spy_process_user_message(*_args, **kwargs) -> None: + captured.update(kwargs) + + monkeypatch.setattr(ws_pipeline, "process_user_message", _spy_process_user_message) + + conversation = SimpleNamespace( + id="conv-2", user_id="user-2", deleted_at=None, last_message_at=None + ) + user = SimpleNamespace(id="user-2", language_preference="zh") + segment = SimpleNamespace( + id="seg-2", + conversation_id="conv-2", + user_input_text="x", + created_at=None, + ) + + fake_db = SimpleNamespace( + get=AsyncMock( + side_effect=lambda model, oid: { + "conv-2": conversation, + "user-2": user, + "seg-2": segment, + }.get(oid) + ) + ) + + @asynccontextmanager + async def _fake_session_local(): + yield fake_db + + monkeypatch.setattr(ws_pipeline, "AsyncSessionLocal", _fake_session_local) + + await ws_pipeline.process_persisted_user_segment_response( + conversation_id="conv-2", + user_id="user-2", + segment_id="seg-2", + # 不传 tts_this_turn → 默认 False + ) + + assert captured.get("tts_this_turn") is False diff --git a/app-expo/src/app/(main)/conversation/[id].tsx b/app-expo/src/app/(main)/conversation/[id].tsx index 5aa38cc..0593552 100644 --- a/app-expo/src/app/(main)/conversation/[id].tsx +++ b/app-expo/src/app/(main)/conversation/[id].tsx @@ -593,6 +593,13 @@ function StreamingBubbles({ const streamingPart = segments.length > 0 ? segments[segments.length - 1]! : streamingText; const streamingWithCursor = streamingPart + (!isComplete ? '▌' : ''); + /** + * `splitStreamingSegments` 故意保留尾部空段以支持「上一段完成 + 下一段尚未到字」的 + * 流式过渡。但 isComplete=true 时再渲染空尾段就会变成一只永不消失的「假装回复」气泡 + * (含字面 [SPLIT] 残留时尤甚)。 + */ + const showStreamingBubble = + !isComplete || streamingPart.trim().length > 0; const inner = ( <> @@ -627,49 +634,51 @@ function StreamingBubbles({ ))} - - - {agentName} + {showStreamingBubble ? ( + + + {agentName} + + + {!isComplete ? ( + + + + ) : ( + + + + )} + - - {!isComplete ? ( - - - - ) : ( - - - - )} - - + ) : null} ); diff --git a/app-expo/src/app/(tabs)/profile.tsx b/app-expo/src/app/(tabs)/profile.tsx index cef7346..31f54aa 100644 --- a/app-expo/src/app/(tabs)/profile.tsx +++ b/app-expo/src/app/(tabs)/profile.tsx @@ -136,22 +136,23 @@ function SettingRow({ ); } -function planDisplayName( - subscriptionType: string | undefined, - planName?: string, -) { - if (planName) return planName; - switch (subscriptionType) { - case 'free': - return 'Free'; - case 'pro': - case 'premium': - return 'Pro'; - case 'pro_plus': - return 'Pro+'; - default: - return 'Free'; - } +type TierI18nKey = + | 'tier.free' + | 'tier.pro' + | 'tier.pro_plus' + | 'tier.test'; + +const TIER_I18N_KEYS: Record = { + free: 'tier.free', + pro: 'tier.pro', + premium: 'tier.pro', + pro_plus: 'tier.pro_plus', + test: 'tier.test', +}; + +function tierI18nKey(value: string | undefined): TierI18nKey | undefined { + if (!value) return undefined; + return TIER_I18N_KEYS[value]; } export default function ProfileScreen() { @@ -173,8 +174,13 @@ export default function ProfileScreen() { changeDarkMode, } = useAppSettings(); - const tierLabel = - currentPlan?.plan_name ?? planDisplayName(user?.subscription_type); + const tierKey = + tierI18nKey(currentPlan?.plan_id) ?? + tierI18nKey(currentPlan?.subscription_type) ?? + tierI18nKey(user?.subscription_type); + const tierLabel = tierKey + ? t(tierKey) + : (currentPlan?.plan_name ?? t('tier.free')); const currentLanguageLabel = hasLanguageOverride && language ? (languageOptions.find((o) => o.code === language)?.label ?? language) diff --git a/app-expo/src/features/auth/hooks.ts b/app-expo/src/features/auth/hooks.ts index 6bc217a..ca30ab9 100644 --- a/app-expo/src/features/auth/hooks.ts +++ b/app-expo/src/features/auth/hooks.ts @@ -5,9 +5,11 @@ import { useCallback } from 'react'; import { AuthError } from '@/core/api/types'; import { tokenManager } from '@/core/auth/token-manager'; import { disposeAllBackgroundConversationWs } from '@/features/conversation/conversation-ws-background-pool'; +import { getDeviceLanguage } from '@/i18n'; import { authApi } from './api'; import type { + LanguagePreference, LoginRequest, RegisterRequest, SessionState, @@ -19,6 +21,17 @@ import type { UserInfo, } from './types'; +/** + * Resolve the device language to send at sign-up. Backend only persists + * language_preference on first user creation; subsequent logins ignore it. + */ +function withDeviceLanguage( + body: T, +): T { + if (body.language) return body; + return { ...body, language: getDeviceLanguage() }; +} + // ─── Query keys ─── export const authKeys = { @@ -139,7 +152,8 @@ export function useSmsLogin() { const onSuccess = usePostAuthSetup(); return useMutation({ - mutationFn: (body: SmsLoginRequest) => authApi.loginWithSms(body), + mutationFn: (body: SmsLoginRequest) => + authApi.loginWithSms(withDeviceLanguage(body)), onSuccess, }); } @@ -150,7 +164,8 @@ export function useRegister() { const onSuccess = usePostAuthSetup(); return useMutation({ - mutationFn: (body: RegisterRequest) => authApi.register(body), + mutationFn: (body: RegisterRequest) => + authApi.register(withDeviceLanguage(body)), onSuccess, }); } @@ -161,7 +176,8 @@ export function useSmsRegister() { const onSuccess = usePostAuthSetup(); return useMutation({ - mutationFn: (body: SmsRegisterRequest) => authApi.registerWithSms(body), + mutationFn: (body: SmsRegisterRequest) => + authApi.registerWithSms(withDeviceLanguage(body)), onSuccess, }); } diff --git a/app-expo/src/features/auth/types.ts b/app-expo/src/features/auth/types.ts index c1db232..069aba7 100644 --- a/app-expo/src/features/auth/types.ts +++ b/app-expo/src/features/auth/types.ts @@ -1,5 +1,7 @@ // ─── Response types ─── +export type LanguagePreference = 'zh' | 'en'; + export interface TokenResponse { access_token: string; refresh_token: string; @@ -14,6 +16,7 @@ export interface UserInfo { avatar_url: string | null; subscription_type: string; created_at: string; + language_preference?: LanguagePreference; } // ─── Request types ─── @@ -30,6 +33,7 @@ export interface RegisterRequest { nickname: string; email?: string; agreed_to_terms: boolean; + language?: LanguagePreference; } export type SmsPurpose = @@ -48,6 +52,7 @@ export interface SmsLoginRequest { code: string; agreed_to_terms: boolean; nickname?: string; + language?: LanguagePreference; } export interface SmsRegisterRequest { @@ -57,6 +62,7 @@ export interface SmsRegisterRequest { nickname: string; email?: string; agreed_to_terms: boolean; + language?: LanguagePreference; } export interface ResetPasswordRequest { diff --git a/app-expo/src/features/conversation/hooks.ts b/app-expo/src/features/conversation/hooks.ts index e944aa1..1457807 100644 --- a/app-expo/src/features/conversation/hooks.ts +++ b/app-expo/src/features/conversation/hooks.ts @@ -3,6 +3,7 @@ import { File, Paths } from 'expo-file-system'; import { useCallback, useEffect, useRef, useState } from 'react'; import { AppState, type AppStateStatus } from 'react-native'; +import i18n from '@/i18n'; import type { WsConnectionState } from '@/core/ws/types'; import { conversationApi } from './api'; @@ -111,7 +112,7 @@ export function useCreateConversation() { const now = Date.now(); const item: ConversationListItem = { id: newConversation.id, - title: '岁月知己', + title: i18n.t('agentName', { ns: 'conversation' }), avatarUrl: null, latestMessagePreview: '', latestMessageTime: now, @@ -247,6 +248,14 @@ export function useRealtimeSession({ setAwaitingAssistantReply(false); return; } + /** + * 空文本 + 未完成时不能写成 `{text: '', isComplete: false}`: + * UI 会渲染一只空 `StreamingBubbles`(pulsing 气泡 + 光标),看上去与 + * 「正在回复…」typing 气泡难以区分,且会一直挂在底部不消失。 + */ + if (text.length === 0) { + return; + } setStreamingMessage({ text, isComplete }); }, [], diff --git a/app-expo/src/i18n/generated/resources.ts b/app-expo/src/i18n/generated/resources.ts index 906c54d..bf45e7d 100644 --- a/app-expo/src/i18n/generated/resources.ts +++ b/app-expo/src/i18n/generated/resources.ts @@ -234,6 +234,12 @@ interface Resources { }; signOut: 'Sign Out'; signingOut: 'Signing out...'; + tier: { + free: 'Free'; + pro: 'Pro'; + pro_plus: 'Pro+'; + test: 'Test'; + }; userNamePlaceholder: 'User'; userTier: '{{tier}}'; }; diff --git a/app-expo/src/i18n/locales/en/profile.json b/app-expo/src/i18n/locales/en/profile.json index c9d4cdf..b368656 100644 --- a/app-expo/src/i18n/locales/en/profile.json +++ b/app-expo/src/i18n/locales/en/profile.json @@ -64,6 +64,12 @@ }, "signingOut": "Signing out...", "signOut": "Sign Out", + "tier": { + "free": "Free", + "pro": "Pro", + "pro_plus": "Pro+", + "test": "Test" + }, "userNamePlaceholder": "User", "userTier": "{{tier}}" } diff --git a/app-expo/src/i18n/locales/zh/profile.json b/app-expo/src/i18n/locales/zh/profile.json index e993e8c..268434a 100644 --- a/app-expo/src/i18n/locales/zh/profile.json +++ b/app-expo/src/i18n/locales/zh/profile.json @@ -64,6 +64,12 @@ }, "signingOut": "退出中...", "signOut": "退出登录", + "tier": { + "free": "免费体验版", + "pro": "Pro 版", + "pro_plus": "Pro+ 版", + "test": "一分钱测试版" + }, "userNamePlaceholder": "用户", "userTier": "{{tier}}" } diff --git a/app-expo/tests/features/conversation/message-split.test.ts b/app-expo/tests/features/conversation/message-split.test.ts index a10ab8e..151595a 100644 --- a/app-expo/tests/features/conversation/message-split.test.ts +++ b/app-expo/tests/features/conversation/message-split.test.ts @@ -39,9 +39,32 @@ describe('message-split', () => { }); it('splitStreamingSegments keeps empty tail after delimiter', () => { + /** + * 流式上下文(!isComplete)下保留尾部空段,让 UI 能在分隔符已出现、第二段尚未到字时 + * 渲染「上一段已完成气泡 + 空流式气泡」。`StreamingBubbles` 在 isComplete=true 时 + * 会过滤掉这只空尾段(见 conversation/[id].tsx 与对应注释),所以底部不会再永久挂一只 + * 假装的「Replying…」气泡。 + */ expect(splitStreamingSegments('first [SPLIT]')).toEqual(['first', '']); }); + it('splitStreamingSegments handles lowercase / fullwidth split markers', () => { + expect(splitStreamingSegments('a [split] b')).toEqual(['a', 'b']); + expect(splitStreamingSegments('a【SPLIT】b')).toEqual(['a', 'b']); + expect(splitStreamingSegments('a [ SPLIT ] b')).toEqual(['a', 'b']); + }); + + it('splitMessageParts accepts spaced / lowercase delimiters', () => { + expect(splitMessageParts('first [ SPLIT ] second')).toEqual([ + 'first', + 'second', + ]); + expect(splitMessageParts('first [split] second')).toEqual([ + 'first', + 'second', + ]); + }); + it('lastSegmentPreview uses last non-empty part', () => { expect(lastSegmentPreview('a [SPLIT] b', 10)).toBe('b'); expect(lastSegmentPreview('hello', 3)).toBe('hel');