Files
life-echo/api/app/features/memoir/oral_normalize.py

50 lines
1.6 KiB
Python
Raw Normal View History

"""
口述归一在进入叙事与忠实度校验前对同一段文本做可控预处理规则 / 可选 LLM
不改变 segment 落库原文仅作为 memoir story 生成路径的派生输入
规则层与聊天侧共用 `apply_conversation_input_rules` conversation.input_normalize
"""
from __future__ import annotations
from typing import Any
from app.core.config import settings
from app.core.text_normalize import apply_oral_rules, llm_normalize_text
def _llm_normalize_oral(text: str, llm: Any) -> str | None:
"""仅修正明显错字与同音字,不增事实;失败返回 None。"""
return llm_normalize_text(
text,
llm,
max_input_chars=int(settings.memoir_oral_normalize_llm_max_input_chars),
max_tokens=int(settings.memoir_oral_normalize_llm_max_tokens),
agent_name="oral_normalize.llm",
)
def normalize_oral_for_memoir(text: str, *, llm: Any | None = None) -> str:
"""
story pipeline 单一出口叙事与忠实度使用同一返回值
- off / 全局关闭原文
- rules仅规则
- rules + LLM 分支先规则可选LLMLLM 失败则保留规则结果
"""
if not settings.memoir_oral_normalize_enabled:
return text or ""
mode = (settings.memoir_oral_normalize_mode or "rules").strip().lower()
if mode == "off":
return text or ""
base = apply_oral_rules(text or "")
if mode != "llm":
return base
refined = _llm_normalize_oral(base, llm)
if refined is not None:
return refined
return base