Files
life-echo/api/app/features/conversation/input_normalize.py
Kevin 53d9e003af feat(api): 叙事 prompt、职业上下文、读路径章节、WS 解耦与错误脱敏
- 回忆录:事实边界补充允许清单;传记文体示例与 JSON 叙事要求对齐
- default 职业提示 occupation_context;cadre/military 退休语境
- GET 章节读路径零写入,prepare_chapter_read_view + markdown_for_response
- 文本归一抽到 core/text_normalize;移除弃用 reply 策略与 recompose_chapters_for_story
- ConversationService:WS 连接/用户段落/结束对话;对外错误固定文案
- 测试:HTTP 脱敏契约、章节读视图、occupation 与 background_voice
2026-04-01 11:55:52 +08:00

55 lines
1.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
聊天输入归一:供访谈 Agent / 编排层对 ASR 与键盘输入做可控预处理(规则 / 可选 LLM
不改变 segment 落库原文;仅作为模型侧派生净稿。
与 memoir 共用同一套确定性规则,避免聊天与回忆录对同一句理解割裂。
"""
from __future__ import annotations
from typing import Any
from app.core.config import settings
from app.core.text_normalize import apply_oral_rules, llm_normalize_text
from app.core.logging import get_logger
logger = get_logger(__name__)
apply_conversation_input_rules = apply_oral_rules
def _llm_normalize_chat_input(text: str, llm: Any) -> str | None:
"""仅修正明显错字与同音字,不增事实;失败返回 None。"""
return llm_normalize_text(
text,
llm,
max_input_chars=int(settings.chat_input_normalize_llm_max_input_chars),
max_tokens=int(settings.chat_input_normalize_llm_max_tokens),
agent_name="chat_input_normalize.llm",
)
def normalize_chat_input_for_agent(text: str, *, llm: Any | None = None) -> str:
"""
聊天侧单一出口:编排层与 InterviewAgent 共用。
- 全局关闭:原文
- off原文
- rules仅规则
- llm先规则可选LLM无 llm 或失败则保留规则结果
"""
if not settings.chat_input_normalize_enabled:
return text or ""
mode = (settings.chat_input_normalize_mode or "rules").strip().lower()
if mode == "off":
return text or ""
base = apply_conversation_input_rules(text or "")
if mode != "llm":
return base
refined = _llm_normalize_chat_input(base, llm)
if refined is not None:
return refined
return base