""" 口述归一:在进入叙事与忠实度校验前,对同一段文本做可控预处理(规则 / 可选 LLM)。 不改变 segment 落库原文;仅作为 memoir story 生成路径的派生输入。 规则层与聊天侧共用 `apply_conversation_input_rules`(见 conversation.input_normalize)。 """ from __future__ import annotations from typing import Any from app.core.config import settings from app.core.text_normalize import apply_oral_rules, llm_normalize_text def _llm_normalize_oral(text: str, llm: Any) -> str | None: """仅修正明显错字与同音字,不增事实;失败返回 None。""" return llm_normalize_text( text, llm, max_input_chars=int(settings.memoir_oral_normalize_llm_max_input_chars), max_tokens=int(settings.memoir_oral_normalize_llm_max_tokens), agent_name="oral_normalize.llm", ) def normalize_oral_for_memoir(text: str, *, llm: Any | None = None) -> str: """ 供 story pipeline 单一出口:叙事与忠实度使用同一返回值。 - off / 全局关闭:原文 - rules:仅规则 - rules + LLM 分支:先规则,再(可选)LLM;LLM 失败则保留规则结果 """ if not settings.memoir_oral_normalize_enabled: return text or "" mode = (settings.memoir_oral_normalize_mode or "rules").strip().lower() if mode == "off": return text or "" base = apply_oral_rules(text or "") if mode != "llm": return base refined = _llm_normalize_oral(base, llm) if refined is not None: return refined return base