feat(api): 叙事 prompt、职业上下文、读路径章节、WS 解耦与错误脱敏
- 回忆录:事实边界补充允许清单;传记文体示例与 JSON 叙事要求对齐 - default 职业提示 occupation_context;cadre/military 退休语境 - GET 章节读路径零写入,prepare_chapter_read_view + markdown_for_response - 文本归一抽到 core/text_normalize;移除弃用 reply 策略与 recompose_chapters_for_story - ConversationService:WS 连接/用户段落/结束对话;对外错误固定文案 - 测试:HTTP 脱敏契约、章节读视图、occupation 与 background_voice
This commit is contained in:
@@ -8,64 +8,21 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.langchain_llm import invoke_json_object
|
||||
from app.core.logging import get_logger
|
||||
from app.features.conversation.input_normalize import apply_conversation_input_rules
|
||||
from app.features.memoir.memoir_images.json_payload import extract_json_payload
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def apply_oral_normalization_rules(text: str) -> str:
|
||||
"""确定性规则;与 `apply_conversation_input_rules` 等价(memoir 历史名保留)。"""
|
||||
return apply_conversation_input_rules(text)
|
||||
from app.core.text_normalize import apply_oral_rules, llm_normalize_text
|
||||
|
||||
|
||||
def _llm_normalize_oral(text: str, llm: Any) -> str | None:
|
||||
"""仅修正明显错字与同音字,不增事实;失败返回 None。"""
|
||||
if not llm or not (text or "").strip():
|
||||
return None
|
||||
max_in = int(settings.memoir_oral_normalize_llm_max_input_chars)
|
||||
t = (text or "").strip()
|
||||
if len(t) > max_in:
|
||||
logger.debug(
|
||||
"event=oral_normalize_llm_skip reason=input_too_long len={} max={}",
|
||||
len(t),
|
||||
max_in,
|
||||
)
|
||||
return None
|
||||
prompt = f"""你是口述转写纠错助手。只修正明显的同音错别字、别字与标点,使句子通顺可读。
|
||||
禁止增加事实、不补充细节、不摘要、不改写句式风格;不得新增人名、地名、数字、事件。
|
||||
若原文已通顺或无法确定错误,则照抄输入。
|
||||
|
||||
【用户口述】
|
||||
{t}
|
||||
|
||||
**JSON 输出**:只输出一个合法 JSON 对象。
|
||||
{{"normalized_text": "纠错后的完整文本(与输入等意,仅修错字与标点)"}}
|
||||
|
||||
只输出 JSON,不要其它文字。"""
|
||||
try:
|
||||
raw = invoke_json_object(
|
||||
llm,
|
||||
prompt,
|
||||
max_tokens=int(settings.memoir_oral_normalize_llm_max_tokens),
|
||||
agent="oral_normalize.llm",
|
||||
)
|
||||
data = json.loads(extract_json_payload(raw))
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
out = (data.get("normalized_text") or "").strip()
|
||||
if not out:
|
||||
return None
|
||||
return out
|
||||
except Exception as e:
|
||||
logger.warning("oral_normalize LLM 失败,回退规则结果: {}", e)
|
||||
return None
|
||||
return llm_normalize_text(
|
||||
text,
|
||||
llm,
|
||||
max_input_chars=int(settings.memoir_oral_normalize_llm_max_input_chars),
|
||||
max_tokens=int(settings.memoir_oral_normalize_llm_max_tokens),
|
||||
agent_name="oral_normalize.llm",
|
||||
)
|
||||
|
||||
|
||||
def normalize_oral_for_memoir(text: str, *, llm: Any | None = None) -> str:
|
||||
@@ -82,7 +39,7 @@ def normalize_oral_for_memoir(text: str, *, llm: Any | None = None) -> str:
|
||||
if mode == "off":
|
||||
return text or ""
|
||||
|
||||
base = apply_oral_normalization_rules(text or "")
|
||||
base = apply_oral_rules(text or "")
|
||||
if mode != "llm":
|
||||
return base
|
||||
|
||||
|
||||
Reference in New Issue
Block a user