life-echo/api/app/features/conversation/input_normalize.py

"""
聊天输入归一：供访谈 Agent / 编排层对 ASR 与键盘输入做可控预处理（规则 / 可选 LLM）。

不改变 segment 落库原文；仅作为模型侧派生净稿。
与 memoir 共用同一套确定性规则，避免聊天与回忆录对同一句理解割裂。
"""

from __future__ import annotations

from typing import Any

from app.core.logging import get_logger
from app.core.text_normalize import apply_oral_rules, llm_normalize_text
from app.features.conversation.constants import chat

logger = get_logger(__name__)

apply_conversation_input_rules = apply_oral_rules


def _llm_normalize_chat_input(text: str, llm: Any) -> str | None:
    """仅修正明显错字与同音字，不增事实；失败返回 None。"""
    return llm_normalize_text(
        text,
        llm,
        max_input_chars=int(chat.input_normalize_llm_max_input_chars),
        max_tokens=int(chat.input_normalize_llm_max_tokens),
        agent_name="chat_input_normalize.llm",
    )


def normalize_chat_input_for_agent(
    text: str,
    *,
    llm: Any | None = None,
    is_from_voice: bool = False,
) -> str:
    """
    聊天侧单一出口：编排层与 InterviewAgent 共用。

    - 全局关闭：原文
    - off：原文
    - rules：仅规则
    - llm：先规则，再（可选）LLM；无 llm 或失败则保留规则结果
    - chat_input_normalize_llm_voice_only：mode=llm 时仅 is_from_voice 为真才调用 LLM
    """
    if not chat.input_normalize_enabled:
        return text or ""
    mode = (chat.input_normalize_mode or "rules").strip().lower()
    if mode == "off":
        return text or ""

    base = apply_conversation_input_rules(text or "")
    if mode != "llm":
        return base

    effective_llm = llm
    if chat.input_normalize_llm_voice_only and not is_from_voice:
        effective_llm = None

    refined = _llm_normalize_chat_input(base, effective_llm)
    if refined is not None:
        return refined
    return base