feat(api): 统一 LLM JSON 调用层 llm_json_call,按域 Schema 迁移 chat/memoir agents
This commit is contained in:
@@ -4,30 +4,22 @@ Phase1 批处理:一次 LLM 调用完成多段的抽取 + 章节分类(与
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from app.agents.memoir.prompts import get_batch_memoir_phase1_prep_prompt
|
||||
from app.agents.memoir.schemas import BatchPhase1LLMOutput
|
||||
from app.agents.state_schema import MemoirStateSchema
|
||||
from app.agents.stage_constants import STAGE_SLOT_KEYS
|
||||
from app.core.config import settings
|
||||
from app.core.json_utils import extract_json_payload
|
||||
from app.core.langchain_llm import invoke_json_object
|
||||
from app.core.llm_call import LLMCallError, llm_json_call
|
||||
from app.core.logging import get_logger
|
||||
from app.features.conversation.models import Segment
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
STAGE_ALLOWED_SLOTS: Dict[str, frozenset[str]] = {
|
||||
"childhood": frozenset(
|
||||
{"place", "people", "daily_life", "emotion", "turning_event"}
|
||||
),
|
||||
"education": frozenset({"school", "city", "motivation", "challenge", "change"}),
|
||||
"career": frozenset({"job", "environment", "decision", "pressure", "growth"}),
|
||||
"family": frozenset(
|
||||
{"relationship", "conflict", "support", "responsibility", "change"}
|
||||
),
|
||||
"belief": frozenset({"value", "regret", "pride", "lesson"}),
|
||||
k: frozenset(v) for k, v in STAGE_SLOT_KEYS.items()
|
||||
}
|
||||
|
||||
|
||||
@@ -73,32 +65,35 @@ def run_batch_phase1_prep(
|
||||
slots_snapshot=_slots_snapshot(state),
|
||||
segment_items=items,
|
||||
)
|
||||
raw = invoke_json_object(
|
||||
llm,
|
||||
prompt,
|
||||
max_tokens=int(settings.memoir_phase1_batch_llm_max_tokens),
|
||||
agent="BatchPhase1Prep.run",
|
||||
)
|
||||
parsed = json.loads(extract_json_payload(raw))
|
||||
rows = parsed.get("segments") or []
|
||||
if not isinstance(rows, list):
|
||||
raise ValueError("batch phase1: segments must be a list")
|
||||
try:
|
||||
parsed = llm_json_call(
|
||||
llm,
|
||||
prompt,
|
||||
BatchPhase1LLMOutput,
|
||||
max_tokens=int(settings.memoir_phase1_batch_llm_max_tokens),
|
||||
agent="BatchPhase1Prep.run",
|
||||
)
|
||||
except LLMCallError as e:
|
||||
logger.warning("batch phase1 LLM 解析失败: {}", e)
|
||||
raise ValueError("batch phase1: llm parse failed") from e
|
||||
|
||||
rows = parsed.segments
|
||||
if not rows:
|
||||
raise ValueError("batch phase1: segments must be a non-empty list")
|
||||
|
||||
by_id: Dict[str, BatchPhase1SegmentRow] = {}
|
||||
for row in rows:
|
||||
if not isinstance(row, dict):
|
||||
continue
|
||||
sid = str(row.get("id", "")).strip()
|
||||
sid = str(row.id).strip()
|
||||
if not sid:
|
||||
continue
|
||||
ds = str(row.get("detected_stage", "") or "").strip().lower()
|
||||
slots_raw = row.get("slots") or {}
|
||||
slots: Dict[str, str] = {}
|
||||
if isinstance(slots_raw, dict):
|
||||
for k, v in slots_raw.items():
|
||||
if k and isinstance(k, str):
|
||||
slots[k] = v if isinstance(v, str) else str(v)
|
||||
cat_raw = str(row.get("chapter_category", row.get("category", "")) or "")
|
||||
ds = str(row.detected_stage or "").strip().lower()
|
||||
slots_raw = row.slots or {}
|
||||
slots = {
|
||||
k: v if isinstance(v, str) else str(v)
|
||||
for k, v in slots_raw.items()
|
||||
if k and isinstance(k, str)
|
||||
}
|
||||
cat_raw = str(row.chapter_category or "")
|
||||
by_id[sid] = BatchPhase1SegmentRow(
|
||||
detected_stage=ds or (state.current_stage or "childhood"),
|
||||
slots=slots,
|
||||
|
||||
Reference in New Issue
Block a user