api/app/adapters/asr/whisper_local.py

"""Local faster-whisper ASR adapter — implements ASRProvider port."""

from __future__ import annotations

import asyncio
import os
import re
import tempfile
from typing import Any, Iterable

from app.core.business_telemetry import business_span
from app.core.logging import get_logger
from app.ports.asr import ASRTranscriptionError

logger = get_logger(__name__)

_SUBTITLE_WATERMARK_RE = re.compile(
    r"(字幕|听译|压制|字幕组).{0,20}(by|BY|By)|字幕\s*by",
    re.UNICODE,
)


def _looks_like_subtitle_hallucination(text: str) -> bool:
    """静音时第二遍易吐出视频字幕水印；仅丢弃此类短句。"""
    t = (text or "").strip()
    if len(t) > 48:
        return False
    if _SUBTITLE_WATERMARK_RE.search(t):
        return True
    if len(t) <= 12 and "字幕" in t and not re.search(r"[？?！!。，、]", t):
        return True
    return False


def _join_segment_text(segments: Iterable[Any]) -> tuple[str, int]:
    segs = list(segments)
    return "".join(str(getattr(seg, "text", "") or "") for seg in segs).strip(), len(
        segs
    )


_DEFAULT_CACHE_DIR = os.path.normpath(
    os.path.join(
        os.path.dirname(os.path.abspath(__file__)),
        "..",
        "..",
        "..",
        "models",
        "whisper",
    )
)


class WhisperASRProvider:
    def __init__(
        self,
        model_size: str = "small",
        device: str = "auto",
        compute_type: str = "auto",
        cache_dir: str = "",
    ):
        self._model_size = model_size
        self._device = device
        self._compute_type = compute_type
        self._cache_dir = cache_dir
        self._model = None

    def _load_model(self) -> bool:
        if self._model is not None:
            return True
        try:
            from faster_whisper import WhisperModel

            device = self._device
            compute_type = self._compute_type
            if device == "auto":
                try:
                    import torch  # type: ignore[import-untyped]

                    device = "cuda" if torch.cuda.is_available() else "cpu"
                except ImportError:
                    device = "cpu"
            if compute_type == "auto":
                compute_type = "float16" if device == "cuda" else "int8"

            download_root = self._cache_dir or _DEFAULT_CACHE_DIR
            local_files_only = bool(self._cache_dir)
            os.makedirs(download_root, exist_ok=True)

            self._model = WhisperModel(
                self._model_size,
                device=device,
                compute_type=compute_type,
                download_root=download_root,
                local_files_only=local_files_only,
            )
            return True
        except Exception as e:
            logger.error("Failed to load Whisper model: {}", e)
            return False

    def ensure_ready(self) -> bool:
        return self._load_model()

    async def transcribe(self, audio: bytes, format: str = "m4a") -> str:
        with business_span("asr.transcribe", provider="whisper"):
            return await self._transcribe_inner(audio, format)

    async def _transcribe_inner(self, audio: bytes, format: str) -> str:
        # 与 v1.1.0 相同的单次 transcribe；推理放线程池，避免阻塞 asyncio（tag 上为同步调用）。
        self._load_model()
        if not self._model:
            raise ASRTranscriptionError("Whisper model not loaded")

        model = self._model

        def _sync_transcribe() -> str:
            tmp_path = None
            try:
                with tempfile.NamedTemporaryFile(
                    suffix=f".{format}", delete=False
                ) as tmp:
                    tmp.write(audio)
                    tmp_path = tmp.name

                segments, _info = model.transcribe(
                    tmp_path,
                    language="zh",
                    beam_size=5,
                    vad_filter=True,
                    vad_parameters={
                        "min_silence_duration_ms": 500,
                        "threshold": 0.35,
                        "min_speech_duration_ms": 200,
                    },
                )
                text, pass1_seg_count = _join_segment_text(segments)
                used_second_pass = False
                pass2_seg_count = 0
                pass3_seg_count = 0

                if not text:
                    logger.info(
                        "Whisper VAD pass 无文本，关闭 VAD 再试一次（短录音易被 VAD 判为静音）"
                    )
                    segments2, _info2 = model.transcribe(
                        tmp_path,
                        language="zh",
                        beam_size=5,
                        vad_filter=False,
                        condition_on_previous_text=False,
                        # 略抬高：减少边界片段被标成 no_speech 而整段为空
                        no_speech_threshold=0.85,
                    )
                    raw2, pass2_seg_count = _join_segment_text(segments2)
                    used_second_pass = True
                    if raw2 and _looks_like_subtitle_hallucination(raw2):
                        logger.info(
                            "Whisper 丢弃疑似字幕水印幻听: {!r}",
                            raw2[:120],
                        )
                        text = ""
                    else:
                        text = raw2

                if not text and used_second_pass:
                    try:
                        from faster_whisper import decode_audio

                        audio_np = decode_audio(tmp_path, sampling_rate=16000)
                        segments3, _info3 = model.transcribe(
                            audio_np,
                            language="zh",
                            beam_size=5,
                            vad_filter=False,
                            condition_on_previous_text=False,
                            no_speech_threshold=0.85,
                        )
                        raw3, pass3_seg_count = _join_segment_text(segments3)
                        if raw3 and _looks_like_subtitle_hallucination(raw3):
                            logger.info(
                                "Whisper decode_audio 回退仍是疑似字幕水印幻听: {!r}",
                                raw3[:120],
                            )
                        elif raw3:
                            text = raw3
                    except Exception as ex:
                        logger.warning("Whisper decode_audio 回退失败: {}", ex)

                return text
            except ASRTranscriptionError:
                raise
            except Exception as e:
                logger.error("Whisper transcribe failed: {}", e)
                raise ASRTranscriptionError(f"Whisper transcribe failed: {e!s}") from e
            finally:
                if tmp_path and os.path.exists(tmp_path):
                    try:
                        os.remove(tmp_path)
                    except OSError:
                        pass

        return await asyncio.to_thread(_sync_transcribe)
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								"""Local faster-whisper ASR adapter — implements ASRProvider port."""
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								from __future__ import annotations
 								import asyncio
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								import os
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								import re
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								import tempfile
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								from typing import Any, Iterable
-												feat: OpenTelemetry LGTM observability, dev tooling, and memoir UX fixes (#31)

* add staging ios app build script

* feat(api): add OpenTelemetry LGTM stack for local observability

Wire OTel traces, metrics, and logs through a collector to Tempo,
Prometheus, and Loki, with custom LLM instrumentation, dev compose overlay,
Grafana provisioning, env templates, and development.sh auto-start.

Co-authored-by: Cursor <cursoragent@cursor.com>

* feat: expand observability, harden dev tooling, and fix expo staging UX

Add business and LLM Prometheus metrics with Grafana dashboards, alerting,
and a metrics verification script. Wire telemetry through adapters and core
LLM paths, and document the local LGTM workflow.

Fix development.sh for macOS bash 3.2, open Grafana and eval-web in Chrome,
and repair eval-web auto-open (unbound EVAL_WEB_BROWSER_SCHEDULED). Merge
internal-eval into the main dev script with improved compose handling.

Require EXPO_PUBLIC_* at build time, improve iOS HTTP ATS for staging IPs,
show memoir empty state instead of load errors when no chapters exist, and
add jest env setup plus chapter list response normalization.

Co-authored-by: Cursor <cursoragent@cursor.com>

* chore: enable Grafana Assistant Cursor plugin

Co-authored-by: Cursor <cursoragent@cursor.com>

* fix: memoir empty state and repair withdrawn 0020_chapters_book_id stamp

Show empty memoir UI when the chapter list succeeds with no items; treat auth/404 as non-fatal. Extend alembic revision repair so local dev DBs stamped with the removed 0020_chapters_book_id migration can roll back and upgrade to 0019.

Co-authored-by: Cursor <cursoragent@cursor.com>

---------

Co-authored-by: Kevin <kevin@brighteng.org>
Co-authored-by: Cursor <cursoragent@cursor.com>
											
										
										
											2026-05-20 15:12:21 +08:00
+								from app.core.business_telemetry import business_span
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								from app.core.logging import get_logger
-												feat: 回忆录证据血缘与内部评测可追溯，顺带对齐本地评测台与 CI

数据库与模型：新增多版迁移（章节证据快照、对话血缘、记忆事实/时间线 lineage 等），把「成稿 ↔ 对话/记忆」的溯源信息落到表结构里。
业务链路：会话与 WS、回忆录/故事流水线、记忆写入与 enrichment 等跟着接上线索与快照；新增章节证据快照与评测侧 EvalTraceService 等模块，方便组评审用的证据包。
内部评测：自动化 run 与手工 memoir 评审共用可追溯证据；rubric/ judge 相关脚本与文档有配套调整。
app-eval-web：Memoir/实验详情里能展开看证据摘要与 evidence_trace（含对话轮次 id）；Vite 代理与 development.sh 注入的 API 端口与当前默认内部评测端口一致，避免改端口后页面连错服务。
工程杂项：GitHub Actions / 仓库说明有更新；各适配器与支付/配额/plan 等多处为小改动或跟随主改动的收尾；新增/扩充了?

											
										
										
											2026-04-08 15:37:09 +08:00
+								from app.ports.asr import ASRTranscriptionError
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
 								logger = get_logger(__name__)
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								_SUBTITLE_WATERMARK_RE = re.compile(
 								    r"(字幕|听译|压制|字幕组).{0,20}(by|BY|By)|字幕\s*by",
 								    re.UNICODE,
 								)
 								def _looks_like_subtitle_hallucination(text: str) -> bool:
 								    """静音时第二遍易吐出视频字幕水印；仅丢弃此类短句。"""
 								    t = (text or "").strip()
 								    if len(t) > 48:
 								        return False
 								    if _SUBTITLE_WATERMARK_RE.search(t):
 								        return True
 								    if len(t) <= 12 and "字幕" in t and not re.search(r"[？?！!。，、]", t):
 								        return True
 								    return False
 								def _join_segment_text(segments: Iterable[Any]) -> tuple[str, int]:
 								    segs = list(segments)
 								    return "".join(str(getattr(seg, "text", "") or "") for seg in segs).strip(), len(
 								        segs
 								    )
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								_DEFAULT_CACHE_DIR = os.path.normpath(
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
+								    os.path.join(
 								        os.path.dirname(os.path.abspath(__file__)),
 								        "..",
 								        "..",
 								        "..",
 								        "models",
 								        "whisper",
 								    )
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								)
 								class WhisperASRProvider:
 								    def __init__(
 								        self,
 								        model_size: str = "small",
 								        device: str = "auto",
 								        compute_type: str = "auto",
 								        cache_dir: str = "",
 								    ):
 								        self._model_size = model_size
 								        self._device = device
 								        self._compute_type = compute_type
 								        self._cache_dir = cache_dir
 								        self._model = None
 								    def _load_model(self) -> bool:
 								        if self._model is not None:
 								            return True
 								        try:
 								            from faster_whisper import WhisperModel
 								            device = self._device
 								            compute_type = self._compute_type
 								            if device == "auto":
 								                try:
 								                    import torch  # type: ignore[import-untyped]
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								                    device = "cuda" if torch.cuda.is_available() else "cpu"
 								                except ImportError:
 								                    device = "cpu"
 								            if compute_type == "auto":
 								                compute_type = "float16" if device == "cuda" else "int8"
 								            download_root = self._cache_dir or _DEFAULT_CACHE_DIR
 								            local_files_only = bool(self._cache_dir)
 								            os.makedirs(download_root, exist_ok=True)
 								            self._model = WhisperModel(
 								                self._model_size,
 								                device=device,
 								                compute_type=compute_type,
 								                download_root=download_root,
 								                local_files_only=local_files_only,
 								            )
 								            return True
 								        except Exception as e:
-												feat(api+app): 对话阶段化、回忆录流水线与客户端会话体验
- DB: segments 用户输入文本（Alembic 0002）
- Chat: 阶段检测/阶段提示/回复限制，编排与访谈/画像 prompts 调整
- Memoir: 忠实度检查 agent，叙事与分类等链路更新
- Core: agent 日志、Alembic 启动、LangChain/日志/配置等
- Story: time_hints；Memory 检索与相关测试
- Expo: 助手头像、会话页与消息拆分、实时会话与文案/i18n
- Docs/scripts/tests: 迁移脚本、LLM JSON/记忆检索文档、新增单测

											
										
										
											2026-03-26 12:13:36 +08:00
+								            logger.error("Failed to load Whisper model: {}", e)
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								            return False
 								    def ensure_ready(self) -> bool:
 								        return self._load_model()
 								    async def transcribe(self, audio: bytes, format: str = "m4a") -> str:
-												feat: OpenTelemetry LGTM observability, dev tooling, and memoir UX fixes (#31)

* add staging ios app build script

* feat(api): add OpenTelemetry LGTM stack for local observability

Wire OTel traces, metrics, and logs through a collector to Tempo,
Prometheus, and Loki, with custom LLM instrumentation, dev compose overlay,
Grafana provisioning, env templates, and development.sh auto-start.

Co-authored-by: Cursor <cursoragent@cursor.com>

* feat: expand observability, harden dev tooling, and fix expo staging UX

Add business and LLM Prometheus metrics with Grafana dashboards, alerting,
and a metrics verification script. Wire telemetry through adapters and core
LLM paths, and document the local LGTM workflow.

Fix development.sh for macOS bash 3.2, open Grafana and eval-web in Chrome,
and repair eval-web auto-open (unbound EVAL_WEB_BROWSER_SCHEDULED). Merge
internal-eval into the main dev script with improved compose handling.

Require EXPO_PUBLIC_* at build time, improve iOS HTTP ATS for staging IPs,
show memoir empty state instead of load errors when no chapters exist, and
add jest env setup plus chapter list response normalization.

Co-authored-by: Cursor <cursoragent@cursor.com>

* chore: enable Grafana Assistant Cursor plugin

Co-authored-by: Cursor <cursoragent@cursor.com>

* fix: memoir empty state and repair withdrawn 0020_chapters_book_id stamp

Show empty memoir UI when the chapter list succeeds with no items; treat auth/404 as non-fatal. Extend alembic revision repair so local dev DBs stamped with the removed 0020_chapters_book_id migration can roll back and upgrade to 0019.

Co-authored-by: Cursor <cursoragent@cursor.com>

---------

Co-authored-by: Kevin <kevin@brighteng.org>
Co-authored-by: Cursor <cursoragent@cursor.com>
											
										
										
											2026-05-20 15:12:21 +08:00
+								        with business_span("asr.transcribe", provider="whisper"):
 								            return await self._transcribe_inner(audio, format)
 								    async def _transcribe_inner(self, audio: bytes, format: str) -> str:
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								        # 与 v1.1.0 相同的单次 transcribe；推理放线程池，避免阻塞 asyncio（tag 上为同步调用）。
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								        self._load_model()
 								        if not self._model:
-												feat: 回忆录证据血缘与内部评测可追溯，顺带对齐本地评测台与 CI

数据库与模型：新增多版迁移（章节证据快照、对话血缘、记忆事实/时间线 lineage 等），把「成稿 ↔ 对话/记忆」的溯源信息落到表结构里。
业务链路：会话与 WS、回忆录/故事流水线、记忆写入与 enrichment 等跟着接上线索与快照；新增章节证据快照与评测侧 EvalTraceService 等模块，方便组评审用的证据包。
内部评测：自动化 run 与手工 memoir 评审共用可追溯证据；rubric/ judge 相关脚本与文档有配套调整。
app-eval-web：Memoir/实验详情里能展开看证据摘要与 evidence_trace（含对话轮次 id）；Vite 代理与 development.sh 注入的 API 端口与当前默认内部评测端口一致，避免改端口后页面连错服务。
工程杂项：GitHub Actions / 仓库说明有更新；各适配器与支付/配额/plan 等多处为小改动或跟随主改动的收尾；新增/扩充了?

											
										
										
											2026-04-08 15:37:09 +08:00
+								            raise ASRTranscriptionError("Whisper model not loaded")
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								        model = self._model
 								        def _sync_transcribe() -> str:
 								            tmp_path = None
 								            try:
 								                with tempfile.NamedTemporaryFile(
 								                    suffix=f".{format}", delete=False
 								                ) as tmp:
 								                    tmp.write(audio)
 								                    tmp_path = tmp.name
 								                segments, _info = model.transcribe(
 								                    tmp_path,
 								                    language="zh",
 								                    beam_size=5,
 								                    vad_filter=True,
 								                    vad_parameters={
 								                        "min_silence_duration_ms": 500,
 								                        "threshold": 0.35,
 								                        "min_speech_duration_ms": 200,
 								                    },
 								                )
 								                text, pass1_seg_count = _join_segment_text(segments)
 								                used_second_pass = False
 								                pass2_seg_count = 0
 								                pass3_seg_count = 0
 								                if not text:
 								                    logger.info(
 								                        "Whisper VAD pass 无文本，关闭 VAD 再试一次（短录音易被 VAD 判为静音）"
 								                    )
 								                    segments2, _info2 = model.transcribe(
 								                        tmp_path,
 								                        language="zh",
 								                        beam_size=5,
 								                        vad_filter=False,
 								                        condition_on_previous_text=False,
 								                        # 略抬高：减少边界片段被标成 no_speech 而整段为空
 								                        no_speech_threshold=0.85,
 								                    )
 								                    raw2, pass2_seg_count = _join_segment_text(segments2)
 								                    used_second_pass = True
 								                    if raw2 and _looks_like_subtitle_hallucination(raw2):
 								                        logger.info(
 								                            "Whisper 丢弃疑似字幕水印幻听: {!r}",
 								                            raw2[:120],
 								                        )
 								                        text = ""
 								                    else:
 								                        text = raw2
 								                if not text and used_second_pass:
 								                    try:
 								                        from faster_whisper import decode_audio
 								                        audio_np = decode_audio(tmp_path, sampling_rate=16000)
 								                        segments3, _info3 = model.transcribe(
 								                            audio_np,
 								                            language="zh",
 								                            beam_size=5,
 								                            vad_filter=False,
 								                            condition_on_previous_text=False,
 								                            no_speech_threshold=0.85,
 								                        )
 								                        raw3, pass3_seg_count = _join_segment_text(segments3)
 								                        if raw3 and _looks_like_subtitle_hallucination(raw3):
 								                            logger.info(
 								                                "Whisper decode_audio 回退仍是疑似字幕水印幻听: {!r}",
 								                                raw3[:120],
 								                            )
 								                        elif raw3:
 								                            text = raw3
 								                    except Exception as ex:
 								                        logger.warning("Whisper decode_audio 回退失败: {}", ex)
 								                return text
-												feat: 回忆录证据血缘与内部评测可追溯，顺带对齐本地评测台与 CI

数据库与模型：新增多版迁移（章节证据快照、对话血缘、记忆事实/时间线 lineage 等），把「成稿 ↔ 对话/记忆」的溯源信息落到表结构里。
业务链路：会话与 WS、回忆录/故事流水线、记忆写入与 enrichment 等跟着接上线索与快照；新增章节证据快照与评测侧 EvalTraceService 等模块，方便组评审用的证据包。
内部评测：自动化 run 与手工 memoir 评审共用可追溯证据；rubric/ judge 相关脚本与文档有配套调整。
app-eval-web：Memoir/实验详情里能展开看证据摘要与 evidence_trace（含对话轮次 id）；Vite 代理与 development.sh 注入的 API 端口与当前默认内部评测端口一致，避免改端口后页面连错服务。
工程杂项：GitHub Actions / 仓库说明有更新；各适配器与支付/配额/plan 等多处为小改动或跟随主改动的收尾；新增/扩充了?

											
										
										
											2026-04-08 15:37:09 +08:00
+								            except ASRTranscriptionError:
 								                raise
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								            except Exception as e:
 								                logger.error("Whisper transcribe failed: {}", e)
-												feat: 回忆录证据血缘与内部评测可追溯，顺带对齐本地评测台与 CI

数据库与模型：新增多版迁移（章节证据快照、对话血缘、记忆事实/时间线 lineage 等），把「成稿 ↔ 对话/记忆」的溯源信息落到表结构里。
业务链路：会话与 WS、回忆录/故事流水线、记忆写入与 enrichment 等跟着接上线索与快照；新增章节证据快照与评测侧 EvalTraceService 等模块，方便组评审用的证据包。
内部评测：自动化 run 与手工 memoir 评审共用可追溯证据；rubric/ judge 相关脚本与文档有配套调整。
app-eval-web：Memoir/实验详情里能展开看证据摘要与 evidence_trace（含对话轮次 id）；Vite 代理与 development.sh 注入的 API 端口与当前默认内部评测端口一致，避免改端口后页面连错服务。
工程杂项：GitHub Actions / 仓库说明有更新；各适配器与支付/配额/plan 等多处为小改动或跟随主改动的收尾；新增/扩充了?

											
										
										
											2026-04-08 15:37:09 +08:00
+								                raise ASRTranscriptionError(f"Whisper transcribe failed: {e!s}") from e
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								            finally:
 								                if tmp_path and os.path.exists(tmp_path):
 								                    try:
 								                        os.remove(tmp_path)
 								                    except OSError:
 								                        pass
 								        return await asyncio.to_thread(_sync_transcribe)