Files
life-echo/api/app/features/conversation/input_normalize.py

65 lines
2.0 KiB
Python
Raw Normal View History

"""
聊天输入归一供访谈 Agent / 编排层对 ASR 与键盘输入做可控预处理规则 / 可选 LLM
不改变 segment 落库原文仅作为模型侧派生净稿
memoir 共用同一套确定性规则避免聊天与回忆录对同一句理解割裂
"""
from __future__ import annotations
from typing import Any
from app.core.config import settings
from app.core.logging import get_logger
from app.core.text_normalize import apply_oral_rules, llm_normalize_text
logger = get_logger(__name__)
apply_conversation_input_rules = apply_oral_rules
def _llm_normalize_chat_input(text: str, llm: Any) -> str | None:
"""仅修正明显错字与同音字,不增事实;失败返回 None。"""
return llm_normalize_text(
text,
llm,
max_input_chars=int(settings.chat_input_normalize_llm_max_input_chars),
max_tokens=int(settings.chat_input_normalize_llm_max_tokens),
agent_name="chat_input_normalize.llm",
)
def normalize_chat_input_for_agent(
text: str,
*,
llm: Any | None = None,
is_from_voice: bool = False,
) -> str:
"""
聊天侧单一出口编排层与 InterviewAgent 共用
- 全局关闭原文
- off原文
- rules仅规则
- llm先规则可选LLM llm 或失败则保留规则结果
- chat_input_normalize_llm_voice_onlymode=llm 时仅 is_from_voice 为真才调用 LLM
"""
if not settings.chat_input_normalize_enabled:
return text or ""
mode = (settings.chat_input_normalize_mode or "rules").strip().lower()
if mode == "off":
return text or ""
base = apply_conversation_input_rules(text or "")
if mode != "llm":
return base
effective_llm = llm
if settings.chat_input_normalize_llm_voice_only and not is_from_voice:
effective_llm = None
refined = _llm_normalize_chat_input(base, effective_llm)
if refined is not None:
return refined
return base