feat(api): 叙事 prompt、职业上下文、读路径章节、WS 解耦与错误脱敏

- 回忆录:事实边界补充允许清单;传记文体示例与 JSON 叙事要求对齐
- default 职业提示 occupation_context;cadre/military 退休语境
- GET 章节读路径零写入,prepare_chapter_read_view + markdown_for_response
- 文本归一抽到 core/text_normalize;移除弃用 reply 策略与 recompose_chapters_for_story
- ConversationService:WS 连接/用户段落/结束对话;对外错误固定文案
- 测试:HTTP 脱敏契约、章节读视图、occupation 与 background_voice
This commit is contained in:
Kevin
2026-04-01 11:49:33 +08:00
parent a5473e8fe2
commit 53d9e003af
28 changed files with 598 additions and 397 deletions

View File

@@ -8,64 +8,21 @@
from __future__ import annotations
import json
from typing import Any
from app.core.config import settings
from app.core.langchain_llm import invoke_json_object
from app.core.logging import get_logger
from app.features.conversation.input_normalize import apply_conversation_input_rules
from app.features.memoir.memoir_images.json_payload import extract_json_payload
logger = get_logger(__name__)
def apply_oral_normalization_rules(text: str) -> str:
"""确定性规则;与 `apply_conversation_input_rules` 等价memoir 历史名保留)。"""
return apply_conversation_input_rules(text)
from app.core.text_normalize import apply_oral_rules, llm_normalize_text
def _llm_normalize_oral(text: str, llm: Any) -> str | None:
"""仅修正明显错字与同音字,不增事实;失败返回 None。"""
if not llm or not (text or "").strip():
return None
max_in = int(settings.memoir_oral_normalize_llm_max_input_chars)
t = (text or "").strip()
if len(t) > max_in:
logger.debug(
"event=oral_normalize_llm_skip reason=input_too_long len={} max={}",
len(t),
max_in,
)
return None
prompt = f"""你是口述转写纠错助手。只修正明显的同音错别字、别字与标点,使句子通顺可读。
禁止增加事实、不补充细节、不摘要、不改写句式风格;不得新增人名、地名、数字、事件。
若原文已通顺或无法确定错误,则照抄输入。
【用户口述】
{t}
**JSON 输出**:只输出一个合法 JSON 对象。
{{"normalized_text": "纠错后的完整文本(与输入等意,仅修错字与标点)"}}
只输出 JSON不要其它文字。"""
try:
raw = invoke_json_object(
llm,
prompt,
max_tokens=int(settings.memoir_oral_normalize_llm_max_tokens),
agent="oral_normalize.llm",
)
data = json.loads(extract_json_payload(raw))
if not isinstance(data, dict):
return None
out = (data.get("normalized_text") or "").strip()
if not out:
return None
return out
except Exception as e:
logger.warning("oral_normalize LLM 失败,回退规则结果: {}", e)
return None
return llm_normalize_text(
text,
llm,
max_input_chars=int(settings.memoir_oral_normalize_llm_max_input_chars),
max_tokens=int(settings.memoir_oral_normalize_llm_max_tokens),
agent_name="oral_normalize.llm",
)
def normalize_oral_for_memoir(text: str, *, llm: Any | None = None) -> str:
@@ -82,7 +39,7 @@ def normalize_oral_for_memoir(text: str, *, llm: Any | None = None) -> str:
if mode == "off":
return text or ""
base = apply_oral_normalization_rules(text or "")
base = apply_oral_rules(text or "")
if mode != "llm":
return base