feat(api): 访谈人格/回复长度策略、口述归一、背景语气与输入净稿全链路
Chat 访谈 - 新增 persona 系统(default / warm_listener / curious_guide)与 background_voice 语气层 - 回复长度由 compute_reply_plan 统一决策(brief / standard / expanded),融合信息密度启发式 - 输入净稿(input_normalize):编排层可选 rules/llm 归一用户口语后再喂模型与记忆检索 - 记忆证据注入:按用户话检索 memory evidence 并注入 prompt Memoir 回忆录 - 口述归一(oral_normalize):segment 原文保留,story 管线取派生净稿作叙事输入 - segment 入队批次门闸:累计字数 + 最长等待秒数,减少零碎提交 - fidelity_check / prompts / narrative_agent 微调 - Alembic 0005:清理跨章节 story 外键 Infra - Dockerfile 加入 ffmpeg - pyproject.toml 新增依赖并同步 uv.lock - .env.example / .env.production 补全新配置项 Tests - 新增 test_background_voice、test_chat_input_normalize、test_experience_regressions - 扩展 test_interview_prompts、test_interview_reply_length、test_story_route_oral_invariant Made-with: Cursor
This commit is contained in:
@@ -2,6 +2,7 @@
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import io
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
@@ -358,6 +359,58 @@ async def _delayed_listening_feedback(
|
||||
await _send_segment_transition_feedback(conversation_id, 0)
|
||||
|
||||
|
||||
# ── 长音频切片转写 ────────────────────────────────────────────
|
||||
|
||||
MAX_ASR_CHUNK_MS = 55_000
|
||||
|
||||
|
||||
def _split_audio_bytes(audio_bytes: bytes, fmt: str) -> list[bytes]:
|
||||
"""用 pydub 将长音频按 ≤55 s 切片,每片导出为 16 kHz mono WAV(腾讯 ASR 3 MB 限制内)。"""
|
||||
from pydub import AudioSegment as PydubSegment
|
||||
|
||||
audio = PydubSegment.from_file(io.BytesIO(audio_bytes), format=fmt)
|
||||
duration_ms = len(audio)
|
||||
|
||||
if duration_ms <= MAX_ASR_CHUNK_MS:
|
||||
return [audio_bytes]
|
||||
|
||||
mono_16k = audio.set_frame_rate(16000).set_channels(1).set_sample_width(2)
|
||||
chunks: list[bytes] = []
|
||||
for start in range(0, duration_ms, MAX_ASR_CHUNK_MS):
|
||||
chunk = mono_16k[start : start + MAX_ASR_CHUNK_MS]
|
||||
buf = io.BytesIO()
|
||||
chunk.export(buf, format="wav")
|
||||
chunks.append(buf.getvalue())
|
||||
return chunks
|
||||
|
||||
|
||||
async def _transcribe_long_audio(audio_bytes: bytes, fmt: str = "m4a") -> str:
|
||||
"""超过 55 s 的音频自动切片后并行 ASR;短音频直接转写。"""
|
||||
asr = get_asr_provider()
|
||||
try:
|
||||
chunks = await asyncio.to_thread(_split_audio_bytes, audio_bytes, fmt)
|
||||
except Exception as exc:
|
||||
logger.warning("pydub 切片失败 ({}), 回退到直接转写", exc)
|
||||
return await asr.transcribe(audio_bytes, format=fmt)
|
||||
|
||||
if len(chunks) <= 1:
|
||||
return await asr.transcribe(audio_bytes, format=fmt)
|
||||
|
||||
logger.info("长音频切片: {} 段", len(chunks))
|
||||
results = await asyncio.gather(
|
||||
*[asr.transcribe(c, format="wav") for c in chunks],
|
||||
return_exceptions=True,
|
||||
)
|
||||
texts: list[str] = []
|
||||
for i, r in enumerate(results):
|
||||
if isinstance(r, BaseException):
|
||||
logger.warning("切片 {} 转写异常: {}", i, r)
|
||||
continue
|
||||
if r and not _is_transcribe_failure(r):
|
||||
texts.append(r)
|
||||
return "".join(texts)
|
||||
|
||||
|
||||
# ── 分段语音异步处理 ────────────────────────────────────────────
|
||||
|
||||
|
||||
@@ -439,9 +492,7 @@ async def process_audio_segment(
|
||||
conversation_id,
|
||||
segment_index,
|
||||
)
|
||||
transcript_text = await get_asr_provider().transcribe(
|
||||
audio_bytes, format="m4a"
|
||||
)
|
||||
transcript_text = await _transcribe_long_audio(audio_bytes, fmt="m4a")
|
||||
await manager.send_message(
|
||||
conversation_id,
|
||||
{
|
||||
@@ -513,7 +564,11 @@ async def process_audio_segment(
|
||||
user_message_timestamp = _mark_conversation_active(conversation)
|
||||
await db.commit()
|
||||
await db.refresh(segment)
|
||||
await background_runner.queue_message(conversation.user_id, segment.id)
|
||||
await background_runner.queue_message(
|
||||
conversation.user_id,
|
||||
segment.id,
|
||||
text_char_count=len((transcript_text or "").strip()),
|
||||
)
|
||||
|
||||
ready_segments: List[Tuple[int, str, Segment]] = []
|
||||
async with state.lock:
|
||||
|
||||
@@ -11,6 +11,7 @@ from datetime import datetime, timezone
|
||||
from fastapi import WebSocket, WebSocketDisconnect, status
|
||||
from starlette.websockets import WebSocketState
|
||||
|
||||
from app.agents.chat.background_voice import infer_background_voice
|
||||
from app.agents.chat.prompts_profile import format_user_profile_context
|
||||
from app.core.db import AsyncSessionLocal
|
||||
from app.core.dependencies import get_asr_provider
|
||||
@@ -201,6 +202,9 @@ async def websocket_endpoint(
|
||||
conversation_id=conversation_id,
|
||||
memoir_state=state,
|
||||
user_profile_context=user_profile_context,
|
||||
background_voice=infer_background_voice(
|
||||
user.occupation
|
||||
),
|
||||
)
|
||||
)
|
||||
ai_msg_id = await ConversationHistoryStore(
|
||||
@@ -300,7 +304,9 @@ async def websocket_endpoint(
|
||||
await db.commit()
|
||||
await db.refresh(segment)
|
||||
await background_runner.queue_message(
|
||||
conversation.user_id, segment.id
|
||||
conversation.user_id,
|
||||
segment.id,
|
||||
text_char_count=len(text_message.strip()),
|
||||
)
|
||||
|
||||
await process_user_message(
|
||||
@@ -563,7 +569,9 @@ async def websocket_endpoint(
|
||||
await db.commit()
|
||||
await db.refresh(segment)
|
||||
await background_runner.queue_message(
|
||||
conversation.user_id, segment.id
|
||||
conversation.user_id,
|
||||
segment.id,
|
||||
text_char_count=len((asr_text or "").strip()),
|
||||
)
|
||||
|
||||
if asr_text and not asr_text.startswith("转写失败"):
|
||||
|
||||
Reference in New Issue
Block a user