Files
life-echo/api/app/features/conversation/input_normalize.py
Kevin 309a051038 feat: 回忆录证据血缘与内部评测可追溯,顺带对齐本地评测台与 CI
数据库与模型:新增多版迁移(章节证据快照、对话血缘、记忆事实/时间线 lineage 等),把「成稿 ↔ 对话/记忆」的溯源信息落到表结构里。
业务链路:会话与 WS、回忆录/故事流水线、记忆写入与 enrichment 等跟着接上线索与快照;新增章节证据快照与评测侧 EvalTraceService 等模块,方便组评审用的证据包。
内部评测:自动化 run 与手工 memoir 评审共用可追溯证据;rubric/ judge 相关脚本与文档有配套调整。
app-eval-web:Memoir/实验详情里能展开看证据摘要与 evidence_trace(含对话轮次 id);Vite 代理与 development.sh 注入的 API 端口与当前默认内部评测端口一致,避免改端口后页面连错服务。
工程杂项:GitHub Actions / 仓库说明有更新;各适配器与支付/配额/plan 等多处为小改动或跟随主改动的收尾;新增/扩充了?
2026-04-08 15:37:09 +08:00

65 lines
2.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
聊天输入归一:供访谈 Agent / 编排层对 ASR 与键盘输入做可控预处理(规则 / 可选 LLM
不改变 segment 落库原文;仅作为模型侧派生净稿。
与 memoir 共用同一套确定性规则,避免聊天与回忆录对同一句理解割裂。
"""
from __future__ import annotations
from typing import Any
from app.core.config import settings
from app.core.logging import get_logger
from app.core.text_normalize import apply_oral_rules, llm_normalize_text
logger = get_logger(__name__)
apply_conversation_input_rules = apply_oral_rules
def _llm_normalize_chat_input(text: str, llm: Any) -> str | None:
"""仅修正明显错字与同音字,不增事实;失败返回 None。"""
return llm_normalize_text(
text,
llm,
max_input_chars=int(settings.chat_input_normalize_llm_max_input_chars),
max_tokens=int(settings.chat_input_normalize_llm_max_tokens),
agent_name="chat_input_normalize.llm",
)
def normalize_chat_input_for_agent(
text: str,
*,
llm: Any | None = None,
is_from_voice: bool = False,
) -> str:
"""
聊天侧单一出口:编排层与 InterviewAgent 共用。
- 全局关闭:原文
- off原文
- rules仅规则
- llm先规则可选LLM无 llm 或失败则保留规则结果
- chat_input_normalize_llm_voice_onlymode=llm 时仅 is_from_voice 为真才调用 LLM
"""
if not settings.chat_input_normalize_enabled:
return text or ""
mode = (settings.chat_input_normalize_mode or "rules").strip().lower()
if mode == "off":
return text or ""
base = apply_conversation_input_rules(text or "")
if mode != "llm":
return base
effective_llm = llm
if settings.chat_input_normalize_llm_voice_only and not is_from_voice:
effective_llm = None
refined = _llm_normalize_chat_input(base, effective_llm)
if refined is not None:
return refined
return base