feat(api): 统一 LLM JSON 调用层 llm_json_call,按域 Schema 迁移 chat/memoir agents

This commit is contained in:
Kevin
2026-04-03 13:34:27 +08:00
parent 41518bda11
commit 43d1689e9c
28 changed files with 1006 additions and 352 deletions

View File

@@ -0,0 +1,13 @@
"""共用用户可见回复禁令(访谈 / 资料收集)。"""
def chat_output_rules() -> str:
"""用户可见回复共用禁令(括号/元注释/采访腔/编造等)。"""
return (
"**禁止**输出括号、括号内的策略/舞台说明(例如「(先接住情绪)」「(共情)」)、"
"思考过程或任何元注释——这些只存在于系统指令里,**绝不可**出现在你对用户说的话中;"
"采访腔(「我注意到」「我想了解」);重复确认对方已经说过或能推断出的信息;编造对方没说的细节。"
)
__all__ = ["chat_output_rules"]

View File

@@ -3,7 +3,6 @@ ProfileAgent用户资料收集 Specialist
负责提取资料、资料追问、资料收集开场白,不负责 Redis 持久化(由 Orchestrator 统一处理)
"""
import json
import time
from typing import Any, Dict, List, Optional
@@ -15,11 +14,11 @@ from app.agents.chat.prompts_profile import (
get_profile_followup_prompt,
get_profile_greeting_prompt,
)
from app.agents.chat.schemas import ProfileExtractionOutput
from app.core.agent_logging import agent_span, log_agent_payload, log_agent_summary
from app.core.config import settings
from app.core.dependencies import get_llm_provider
from app.core.json_utils import extract_json_payload
from app.core.langchain_llm import ainvoke_json_object
from app.core.llm_call import allm_json_call
from app.core.logging import get_logger
from app.agents.chat.reply_limits import (
nonempty_segments_or_fallback,
@@ -53,6 +52,53 @@ class ProfileAgent:
def __init__(self):
self.llm = _get_langchain_llm()
async def _invoke_chat(
self,
messages: List[Any],
*,
max_tokens: int,
conversation_id: Optional[str],
agent_name: str,
) -> str:
chat_llm = self.llm.bind(max_tokens=max_tokens)
llm_t0 = time.perf_counter()
with agent_span(
logger, f"{agent_name}.llm", conversation_id=conversation_id or ""
):
response = await chat_llm.ainvoke(messages)
logger.info(
"event=chat_llm_done agent={} response_latency_ms={:.2f}",
agent_name,
(time.perf_counter() - llm_t0) * 1000,
)
return (
response.content if hasattr(response, "content") else str(response)
) or ""
async def _segments_from_response(
self,
response_text: str,
*,
max_segments: int,
max_chars_per_segment: int,
fallback: str,
) -> List[str]:
log_agent_payload(
logger,
"ProfileAgent._segments_from_response.raw_response",
response_text,
)
raw_list = segments_from_llm_response(response_text, max_segments=max_segments)
if not raw_list:
raw_list = [response_text.strip()]
out = truncate_chat_segments(
raw_list,
max_segments=max_segments,
max_chars_per_segment=max_chars_per_segment,
)
segments = out if out else [response_text.strip()[:max_chars_per_segment]]
return nonempty_segments_or_fallback(segments, fallback=fallback)
async def extract_profile_from_message(
self,
user_message: str,
@@ -81,16 +127,17 @@ class ProfileAgent:
prompt = get_profile_extraction_prompt(
user_message, missing_fields, recent_dialogue=recent_dialogue or None
)
content = await ainvoke_json_object(
parsed = await allm_json_call(
self.llm,
prompt,
max_tokens=512,
ProfileExtractionOutput,
max_tokens=settings.chat_profile_extract_max_tokens,
agent="ProfileAgent.extract_profile_from_message",
fallback_factory=lambda: ProfileExtractionOutput(),
)
parsed = json.loads(extract_json_payload(content))
result = {}
if "birth_year" in parsed and parsed["birth_year"] is not None:
raw = parsed["birth_year"]
if parsed.birth_year is not None:
raw = parsed.birth_year
if isinstance(raw, int) and 1900 <= raw <= 2100:
result["birth_year"] = raw
elif isinstance(raw, str) and raw.isdigit():
@@ -99,14 +146,14 @@ class ProfileAgent:
y = 1900 + y if y >= 50 else 2000 + y
if 1900 <= y <= 2100:
result["birth_year"] = y
if "birth_place" in parsed and parsed["birth_place"]:
result["birth_place"] = str(parsed["birth_place"])
if "grew_up_place" in parsed and parsed["grew_up_place"]:
result["grew_up_place"] = str(parsed["grew_up_place"])
if "occupation" in parsed and parsed["occupation"]:
result["occupation"] = str(parsed["occupation"])
if parsed.birth_place:
result["birth_place"] = str(parsed.birth_place)
if parsed.grew_up_place:
result["grew_up_place"] = str(parsed.grew_up_place)
if parsed.occupation:
result["occupation"] = str(parsed.occupation)
return result
except (json.JSONDecodeError, Exception) as e:
except Exception as e:
logger.error("提取资料信息失败: {}", e)
return {}
@@ -143,61 +190,33 @@ class ProfileAgent:
"ProfileAgent.followup.prompt",
format_history_string(messages),
)
chat_llm = self.llm.bind(
max_tokens=settings.chat_profile_followup_max_tokens
)
llm_t0 = time.perf_counter()
with agent_span(
logger,
"ProfileAgent.followup.llm",
conversation_id=conversation_id,
):
logger.info(
"event=chat_prompt_built agent=ProfileAgent.generate_profile_followup "
"prompt_chars={} history_pairs_total={} history_pairs_windowed={}",
_message_contents_char_count(messages),
hw.turn_total,
len(hw.window) // 2,
)
response = await chat_llm.ainvoke(messages)
prompt_chars = _message_contents_char_count(messages)
logger.info(
"event=chat_llm_done agent=ProfileAgent.generate_profile_followup "
"response_latency_ms={:.2f}",
(time.perf_counter() - llm_t0) * 1000,
"event=chat_prompt_built agent=ProfileAgent.generate_profile_followup "
"prompt_chars={} history_pairs_total={} history_pairs_windowed={}",
prompt_chars,
hw.turn_total,
len(hw.window) // 2,
)
response_text = (
response.content if hasattr(response, "content") else str(response)
response_text = await self._invoke_chat(
messages,
max_tokens=settings.chat_profile_followup_max_tokens,
conversation_id=conversation_id,
agent_name="ProfileAgent.generate_profile_followup",
)
log_agent_payload(
logger, "ProfileAgent.followup.raw_response", response_text
)
raw_list = segments_from_llm_response(response_text, max_segments=3)
if not raw_list:
raw_list = [response_text.strip()]
out = truncate_chat_segments(
raw_list,
segments = await self._segments_from_response(
response_text,
max_segments=3,
max_chars_per_segment=settings.chat_interview_max_chars_per_segment,
fallback="谢谢分享!能再告诉我一些吗?",
)
log_agent_summary(
logger,
"ProfileAgent.followup segments={} conversation_id={}",
len(out),
len(segments),
conversation_id,
)
segments = (
out
if out
else [
response_text.strip()[
: settings.chat_interview_max_chars_per_segment
]
]
)
return nonempty_segments_or_fallback(
segments,
fallback="谢谢分享!能再告诉我一些吗?",
)
return segments
except Exception as e:
logger.error("生成资料跟进回复失败: {}", e)
return ["谢谢分享!能再告诉我一些吗?"]
@@ -229,61 +248,33 @@ class ProfileAgent:
log_agent_payload(
logger, "ProfileAgent.greeting.prompt", format_history_string(messages)
)
chat_llm = self.llm.bind(
max_tokens=settings.chat_profile_followup_max_tokens
)
llm_t0 = time.perf_counter()
with agent_span(
logger,
"ProfileAgent.greeting.llm",
conversation_id=conversation_id,
):
logger.info(
"event=chat_prompt_built agent=ProfileAgent.generate_profile_greeting "
"prompt_chars={} history_pairs_total={} history_pairs_windowed={}",
_message_contents_char_count(messages),
hw.turn_total,
len(hw.window) // 2,
)
response = await chat_llm.ainvoke(messages)
prompt_chars = _message_contents_char_count(messages)
logger.info(
"event=chat_llm_done agent=ProfileAgent.generate_profile_greeting "
"response_latency_ms={:.2f}",
(time.perf_counter() - llm_t0) * 1000,
"event=chat_prompt_built agent=ProfileAgent.generate_profile_greeting "
"prompt_chars={} history_pairs_total={} history_pairs_windowed={}",
prompt_chars,
hw.turn_total,
len(hw.window) // 2,
)
response_text = (
response.content if hasattr(response, "content") else str(response)
response_text = await self._invoke_chat(
messages,
max_tokens=settings.chat_profile_followup_max_tokens,
conversation_id=conversation_id,
agent_name="ProfileAgent.generate_profile_greeting",
)
log_agent_payload(
logger, "ProfileAgent.greeting.raw_response", response_text
)
raw_list = segments_from_llm_response(response_text, max_segments=2)
if not raw_list:
raw_list = [response_text.strip()]
out = truncate_chat_segments(
raw_list,
segments = await self._segments_from_response(
response_text,
max_segments=2,
max_chars_per_segment=settings.chat_interview_max_chars_per_segment,
fallback="你好!在开始之前,能告诉我你是哪一年出生的吗?",
)
log_agent_summary(
logger,
"ProfileAgent.greeting segments={} conversation_id={}",
len(out),
len(segments),
conversation_id,
)
segments = (
out
if out
else [
response_text.strip()[
: settings.chat_interview_max_chars_per_segment
]
]
)
return nonempty_segments_or_fallback(
segments,
fallback="你好!在开始之前,能告诉我你是哪一年出生的吗?",
)
return segments
except Exception as e:
logger.error("生成资料收集开场白失败: {}", e)
return [

View File

@@ -2,6 +2,8 @@
Chat 模块提示词:用户资料收集 + 对话访谈
"""
from app.agents.chat.output_rules import chat_output_rules
# Profile prompts用户资料收集
from app.agents.chat.prompts_profile import (
PROFILE_FIELD_NAMES,
@@ -20,6 +22,7 @@ from app.agents.chat.prompts_conversation import (
)
__all__ = [
"chat_output_rules",
"PROFILE_FIELD_NAMES",
"format_user_profile_context",
"get_missing_profile_fields",

View File

@@ -19,7 +19,8 @@ from app.agents.chat.personas import (
get_opening_persona_line,
normalize_interview_persona,
)
from app.agents.stage_constants import CHAT_STAGES, STAGE_DISPLAY_ZH
from app.agents.chat.output_rules import chat_output_rules
from app.agents.stage_constants import CHAT_STAGES, STAGE_DISPLAY_ZH, STAGE_ERA_HINTS
from app.core.config import settings
SLOT_NAME_MAP = {
@@ -176,7 +177,7 @@ def get_opening_prompt(
## 格式
- 可用 [SPLIT] 分成最多 2 条;或一条里「问候 + 问题」。
- **禁止**括号、括号内策略/旁白(如「(先接住情绪)」)、思考过程;不要替用户编回答。
- {chat_output_rules()} 不要替用户编回答。
{style_examples}
@@ -202,18 +203,7 @@ def _build_era_context(current_stage: str, user_profile_context: str) -> str:
if not birth_year:
return ""
stage_era_map = {
"childhood": (0, 12),
"education": (6, 22),
"career": (18, 50),
"family": (20, 50),
"belief": (30, 60),
# chapter / 防御性 key与 belief 同档年龄参照
"beliefs": (30, 60),
"summary": (30, 60),
}
age_range = stage_era_map.get(current_stage, (0, 30))
age_range = STAGE_ERA_HINTS.get(current_stage, (0, 30))
era_start = birth_year + age_range[0]
era_end = birth_year + age_range[1]
@@ -463,7 +453,7 @@ def get_guided_conversation_prompt(
{dynamic_guidance}{uncovered_hint}
## 不要做的
**禁止**输出括号、括号内的策略/舞台说明(例如「(先接住情绪)」「(共情)」)、思考过程或任何元注释——这些只存在于系统指令里,**绝不可**出现在你对用户说的话中;采访腔(「我注意到」「我想了解」);重复确认对方已经说过或能推断出的信息;编造对方没说的细节。
{chat_output_rules()}
直接输出(仅自然口语,无任何括号前缀或旁白):"""

View File

@@ -4,6 +4,8 @@
from typing import Dict, List, Optional
from app.agents.chat.output_rules import chat_output_rules
PROFILE_FIELD_NAMES = {
"birth_year": "出生年份",
@@ -40,7 +42,7 @@ def get_profile_greeting_prompt(missing_fields: List[str], nickname: str = "") -
- "你现在是做什么工作的呀?或者之前主要从事什么职业?"
## 严格禁止
- 禁止输出括号注释、思考过程
- {chat_output_rules()}
- 禁止说"我需要收集信息"之类的机械话
- 禁止一次列出所有问题
@@ -71,12 +73,10 @@ def get_profile_extraction_prompt(
return f"""请从以下内容中提取用户已提到的基础资料信息。{dialogue_section}用户本轮回答:
"{user_message}"
**JSON 输出**:接口已启用 `response_format=json_object`DeepSeek JSON 模式),你必须只输出一个合法 JSON 对象。
需要提取的字段(只提取确实在对话中出现过的):
{missing_names}
请返回 JSON 格式,只包含确实提到的字段
输出示例(只含确实提到的字段;无则 {{}}
{{
"birth_year": 1965,
"birth_place": "湖南长沙",
@@ -88,9 +88,7 @@ def get_profile_extraction_prompt(
1. birth_year 填整数(四位数),如"65年出生"转为 1965
2. 如果用户在任一轮说过出生地/成长地/职业等,都要提取
3. 只提取明确提到的信息,不要猜测
4. 如果没有提取到任何信息,返回空对象 {{}}
只返回 JSON不要其他内容。"""
4. 如果没有提取到任何信息,返回空对象 {{}}"""
def get_profile_followup_prompt(
@@ -145,8 +143,7 @@ def get_profile_followup_prompt(
严格禁止:
- **严禁再次询问「已知信息」中已列出的内容**(例如已知出生年份就绝不要再问哪年出生)
- 禁止输出括号注释、思考过程
- 禁止说"我注意到""我需要了解"
- {chat_output_rules()}
回复格式:多条消息用 [SPLIT] 分隔。
直接输出你要说的话:"""

View File

@@ -0,0 +1,19 @@
"""LLM JSON 边界契约Chat agents"""
from __future__ import annotations
from pydantic import BaseModel, Field
class StageDetectionOutput(BaseModel):
detected_stage: str = Field(default="", description="CHAT_STAGES key")
class ProfileExtractionOutput(BaseModel):
birth_year: int | str | None = None
birth_place: str | None = None
grew_up_place: str | None = None
occupation: str | None = None
__all__ = ["ProfileExtractionOutput", "StageDetectionOutput"]

View File

@@ -4,9 +4,9 @@
from __future__ import annotations
import json
from typing import Any, Optional
from app.agents.chat.schemas import StageDetectionOutput
from app.agents.chat.stage_prompts import (
VALID_CHAT_LIFE_STAGES,
get_chat_stage_detection_prompt,
@@ -18,9 +18,8 @@ from app.agents.stage_constants import (
normalize_chat_stage,
)
from app.core.config import settings
from app.core.langchain_llm import ainvoke_json_object
from app.core.llm_call import allm_json_call
from app.core.logging import get_logger
from app.core.json_utils import extract_json_payload
logger = get_logger(__name__)
@@ -51,6 +50,11 @@ def keyword_fallback_primary_stage(user_message: str) -> str:
return candidates[0]
def _keyword_fallback_stage(user_message: str, fb: str) -> str:
k = keyword_fallback_primary_stage(user_message)
return normalize_chat_stage(k, fb) if k else fb
async def detect_primary_life_stage(
user_message: str,
current_stage: str,
@@ -64,35 +68,30 @@ async def detect_primary_life_stage(
"""
fb = normalize_chat_stage(current_stage, "childhood")
if not settings.chat_stage_detection_enabled:
k = keyword_fallback_primary_stage(user_message)
return normalize_chat_stage(k, fb) if k else fb
return _keyword_fallback_stage(user_message, fb)
if skip_llm and settings.chat_stage_detection_skip_llm_on_insufficient_signal:
k = keyword_fallback_primary_stage(user_message)
return normalize_chat_stage(k, fb) if k else fb
return _keyword_fallback_stage(user_message, fb)
if not llm:
k = keyword_fallback_primary_stage(user_message)
return normalize_chat_stage(k, fb) if k else fb
return _keyword_fallback_stage(user_message, fb)
try:
prompt = get_chat_stage_detection_prompt(user_message, fb)
raw = await ainvoke_json_object(
llm,
prompt,
max_tokens=settings.chat_stage_detection_max_tokens,
agent="detect_primary_life_stage",
prompt = get_chat_stage_detection_prompt(user_message, fb)
def fallback_factory() -> StageDetectionOutput:
return StageDetectionOutput(
detected_stage=_keyword_fallback_stage(user_message, fb)
)
if not raw.strip():
k = keyword_fallback_primary_stage(user_message)
return normalize_chat_stage(k, fb) if k else fb
parsed = json.loads(extract_json_payload(raw))
detected = parsed.get("detected_stage", fb)
return normalize_chat_stage(str(detected) if detected is not None else "", fb)
except (json.JSONDecodeError, Exception) as e:
logger.warning("detect_primary_life_stage 解析失败,使用关键词回退: {}", e)
k = keyword_fallback_primary_stage(user_message)
return normalize_chat_stage(k, fb) if k else fb
result = await allm_json_call(
llm,
prompt,
StageDetectionOutput,
max_tokens=settings.chat_stage_detection_max_tokens,
agent="detect_primary_life_stage",
fallback_factory=fallback_factory,
)
return normalize_chat_stage(result.detected_stage, fb)
def life_stage_display_name(stage: str) -> str:

View File

@@ -26,8 +26,7 @@ def get_chat_stage_detection_prompt(user_message: str, current_stage: str) -> st
用户话语:
"{user_message}"
**JSON 输出**:只输出一个合法 JSON 对象,不要 markdown 或其它文字,例如:
{{"detected_stage":"education"}}
输出形状示例:{{"detected_stage":"education"}}
规则:
1. 根据**本轮**与人生故事相关的实质内容判断主阶段;不要因系统当前阶段而强行归类。
@@ -37,5 +36,4 @@ def get_chat_stage_detection_prompt(user_message: str, current_stage: str) -> st
5. 若主要是价值观、信念、人生感悟、遗憾与骄傲等 → belief。
6. 若主要是童年、幼年成长环境、小时候 → childhood。
7. 若本轮**没有**任何与人生经历相关的实质内容(纯寒暄、谢谢、指令、语气词),则 detected_stage 取 **{current_stage}**(保持不动)。
只返回 JSON。"""
"""