""" 聊天输入归一:供访谈 Agent / 编排层对 ASR 与键盘输入做可控预处理(规则 / 可选 LLM)。 不改变 segment 落库原文;仅作为模型侧派生净稿。 与 memoir 共用同一套确定性规则,避免聊天与回忆录对同一句理解割裂。 """ from __future__ import annotations from typing import Any from app.core.config import settings from app.core.logging import get_logger from app.core.text_normalize import apply_oral_rules, llm_normalize_text logger = get_logger(__name__) apply_conversation_input_rules = apply_oral_rules def _llm_normalize_chat_input(text: str, llm: Any) -> str | None: """仅修正明显错字与同音字,不增事实;失败返回 None。""" return llm_normalize_text( text, llm, max_input_chars=int(settings.chat_input_normalize_llm_max_input_chars), max_tokens=int(settings.chat_input_normalize_llm_max_tokens), agent_name="chat_input_normalize.llm", ) def normalize_chat_input_for_agent( text: str, *, llm: Any | None = None, is_from_voice: bool = False, ) -> str: """ 聊天侧单一出口:编排层与 InterviewAgent 共用。 - 全局关闭:原文 - off:原文 - rules:仅规则 - llm:先规则,再(可选)LLM;无 llm 或失败则保留规则结果 - chat_input_normalize_llm_voice_only:mode=llm 时仅 is_from_voice 为真才调用 LLM """ if not settings.chat_input_normalize_enabled: return text or "" mode = (settings.chat_input_normalize_mode or "rules").strip().lower() if mode == "off": return text or "" base = apply_conversation_input_rules(text or "") if mode != "llm": return base effective_llm = llm if settings.chat_input_normalize_llm_voice_only and not is_from_voice: effective_llm = None refined = _llm_normalize_chat_input(base, effective_llm) if refined is not None: return refined return base