fix(chat): 重复追问被拦截时再多问一次模型
防重复问句会把整段回复削成「这一段我记住了。」只剩一句套话时,用带纠偏说明的 system 再调一次 LLM,尽量避免用户只看到干巴巴_ack。仍只重试一次,并打日志与 meta 标记 duplicate_question_guard_llm_retry。
This commit is contained in:
@@ -13,6 +13,7 @@ from app.agents.chat.helpers import format_history_string, get_history_with_wind
|
||||
from app.agents.chat.interview_state_hints import (
|
||||
apply_duplicate_question_guard,
|
||||
extract_recent_questions,
|
||||
segments_are_only_duplicate_guard_fallback,
|
||||
update_recent_questions,
|
||||
)
|
||||
from app.agents.chat.interview_turn_plan import plan_interview_turn
|
||||
@@ -44,6 +45,45 @@ logger = get_logger(__name__)
|
||||
# LLM 不可用或调用失败时对用户展示(不暴露异常细节、不触发 TTS)
|
||||
_FALLBACK_REPLY = "刚才网络不太稳,没接上。你可以再说一遍,或稍后再试。"
|
||||
|
||||
# 仅在「重复问句守卫」把正文削成单句兜底时追加二次 system,只多调一次模型。
|
||||
_DUPLICATE_GUARD_LLM_RETRY_SYSTEM_APPENDIX = """## 二次生成(纠偏)
|
||||
上一版模型输出因包含与「最近已问过的问题」或「已确认事实」重复的问句,已被系统弃用。请**重新写一整条回复**:
|
||||
- 仍须遵守上文全部主规则;
|
||||
- 先贴着用户本轮原话承接半句到一两句(可有画面感);
|
||||
- **禁止**再用与刚才同义、仅换说法的确认型问句;
|
||||
- 若要提问,须换**全新角度**,并锚在用户刚说的具体细节里;也可以本轮**完全不提问**,只并肩承接;
|
||||
- **禁止**整段只有「这一段我记住了」或同类无信息套话。"""
|
||||
|
||||
|
||||
def _finalize_chat_segments_after_llm(
|
||||
response_text: str,
|
||||
*,
|
||||
max_segments: int,
|
||||
max_chars: int,
|
||||
memoir_state: MemoirStateSchema,
|
||||
recent_questions: list[str],
|
||||
) -> tuple[list[str], bool]:
|
||||
raw_list = segments_from_llm_response(
|
||||
response_text,
|
||||
max_segments=max_segments,
|
||||
)
|
||||
if not raw_list:
|
||||
raw_list = [response_text.strip()]
|
||||
out = truncate_chat_segments(
|
||||
raw_list,
|
||||
max_segments=max_segments,
|
||||
max_chars_per_segment=max_chars,
|
||||
)
|
||||
if not out:
|
||||
out = [response_text.strip()[:max_chars]]
|
||||
out = nonempty_segments_or_fallback(out, fallback=_FALLBACK_REPLY)
|
||||
out, deduped = apply_duplicate_question_guard(
|
||||
out,
|
||||
state=memoir_state,
|
||||
recent_questions=recent_questions,
|
||||
)
|
||||
return out, deduped
|
||||
|
||||
|
||||
def _get_langchain_llm():
|
||||
try:
|
||||
@@ -219,29 +259,70 @@ class InterviewAgent:
|
||||
log_agent_payload(
|
||||
logger, "InterviewAgent.generate_response.raw_response", response_text
|
||||
)
|
||||
raw_list = segments_from_llm_response(
|
||||
rq_base = recent_questions or memoir_state.recent_questions
|
||||
out, deduped = _finalize_chat_segments_after_llm(
|
||||
response_text,
|
||||
max_segments=max_segments,
|
||||
max_chars=max_chars,
|
||||
memoir_state=memoir_state,
|
||||
recent_questions=rq_base,
|
||||
)
|
||||
if not raw_list:
|
||||
raw_list = [response_text.strip()]
|
||||
out = truncate_chat_segments(
|
||||
raw_list,
|
||||
max_segments=max_segments,
|
||||
max_chars_per_segment=max_chars,
|
||||
)
|
||||
if not out:
|
||||
out = [response_text.strip()[:max_chars]]
|
||||
out = nonempty_segments_or_fallback(out, fallback=_FALLBACK_REPLY)
|
||||
out, deduped = apply_duplicate_question_guard(
|
||||
out,
|
||||
state=memoir_state,
|
||||
recent_questions=recent_questions or memoir_state.recent_questions,
|
||||
)
|
||||
updated_recent_questions = update_recent_questions(
|
||||
recent_questions or memoir_state.recent_questions,
|
||||
out,
|
||||
)
|
||||
retry_used = False
|
||||
if deduped and segments_are_only_duplicate_guard_fallback(out):
|
||||
retry_system = (
|
||||
f"{system_prompt}\n\n{_DUPLICATE_GUARD_LLM_RETRY_SYSTEM_APPENDIX}"
|
||||
)
|
||||
retry_messages: List[Any] = [
|
||||
SystemMessage(content=retry_system),
|
||||
*hw.window,
|
||||
HumanMessage(content=text_for_model),
|
||||
]
|
||||
log_agent_payload(
|
||||
logger,
|
||||
"InterviewAgent.generate_response.retry_prompt",
|
||||
format_history_string(
|
||||
retry_messages,
|
||||
omit_system_body=settings.agent_log_omit_system_message_body,
|
||||
),
|
||||
)
|
||||
llm_t1 = time.perf_counter()
|
||||
with agent_span(
|
||||
logger,
|
||||
"InterviewAgent.generate_response.llm_retry",
|
||||
conversation_id=conversation_id,
|
||||
stage=memoir_state.current_stage,
|
||||
):
|
||||
logger.info(
|
||||
"event=chat_prompt_built agent=InterviewAgent.duplicate_guard_retry "
|
||||
"prompt_chars={} conversation_id={}",
|
||||
_message_contents_char_count(retry_messages),
|
||||
conversation_id,
|
||||
)
|
||||
response_retry = await chat_llm.ainvoke(retry_messages)
|
||||
logger.info(
|
||||
"event=chat_llm_done agent=InterviewAgent.duplicate_guard_retry "
|
||||
"response_latency_ms={:.2f}",
|
||||
(time.perf_counter() - llm_t1) * 1000,
|
||||
)
|
||||
response_text_retry = (
|
||||
response_retry.content
|
||||
if hasattr(response_retry, "content")
|
||||
else str(response_retry)
|
||||
)
|
||||
log_agent_payload(
|
||||
logger,
|
||||
"InterviewAgent.generate_response.raw_response_retry",
|
||||
response_text_retry,
|
||||
)
|
||||
out, deduped = _finalize_chat_segments_after_llm(
|
||||
response_text_retry,
|
||||
max_segments=max_segments,
|
||||
max_chars=max_chars,
|
||||
memoir_state=memoir_state,
|
||||
recent_questions=rq_base,
|
||||
)
|
||||
retry_used = True
|
||||
updated_recent_questions = update_recent_questions(rq_base, out)
|
||||
log_agent_summary(
|
||||
logger,
|
||||
"InterviewAgent.generate_response segments={} conversation_id={} "
|
||||
@@ -256,6 +337,7 @@ class InterviewAgent:
|
||||
interview_state_meta={
|
||||
"recent_questions": updated_recent_questions,
|
||||
"duplicate_question_guard_triggered": deduped,
|
||||
"duplicate_question_guard_llm_retry": retry_used,
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user