life-echo/api/app/agents/memoir/classification_agent.py

"""
ClassificationAgent：将内容分类到 8 个章节类别，或判定无价值返回 None。
对应现有逻辑：_classify_chapter_category
"""

from __future__ import annotations

from typing import Any, Optional

from app.agents.memoir.prompts import (
    CHAPTER_CATEGORIES,
    get_chapter_classification_prompt,
)
from app.core.logging import get_logger

logger = get_logger(__name__)

# 5-stage 关键词（用于 LLM 失败时的兜底）
STAGE_KEYWORDS = {
    "childhood": ["童年", "小时候", "出生", "家乡", "小镇"],
    "education": ["上学", "学校", "老师", "同学", "教育", "大学"],
    "career": ["工作", "职业", "事业", "公司", "同事", "创业"],
    "family": ["伴侣", "孩子", "家庭", "家人", "结婚", "父母"],
    "belief": ["信念", "价值观", "座右铭", "坚持", "原则"],
}

# 5-stage → 默认 8-category 映射（LLM 分类失败时的兜底）
_STAGE_TO_DEFAULT_CATEGORY = {
    "childhood": "childhood",
    "education": "education",
    "career": "career_early",
    "family": "family",
    "belief": "beliefs",
}


def _detect_stage(text: str, fallback_stage: str) -> str:
    """根据关键词检测消息所属的 5-stage 阶段"""
    message = (text or "").lower()
    for stage, keywords in STAGE_KEYWORDS.items():
        if any(word in message for word in keywords):
            return stage
    return fallback_stage


class ClassificationAgent:
    """将内容分类到 8 个章节类别之一，或判定无价值返回 None"""

    def classify(
        self,
        text: str,
        fallback_stage: str,
        llm: Any,
    ) -> Optional[str]:
        """
        分类到 8 个章节类别之一。
        若 LLM 判定内容无实质回忆录价值，返回 None。
        llm 需支持 .invoke(prompt) 同步调用。
        """
        if llm:
            try:
                prompt = get_chapter_classification_prompt(text)
                response = llm.invoke(prompt)
                category = (response.content or "").strip().lower()
                if category == "none":
                    logger.debug(
                        "LLM 判定内容无回忆录价值，跳过: text_len=%s text=%s",
                        len(text or ""),
                        text or "",
                    )
                    return None
                if category in CHAPTER_CATEGORIES:
                    return category
            except Exception as e:
                logger.warning("ClassificationAgent LLM 章节分类失败: %s", e)

        stage = _detect_stage(text, fallback_stage)
        return _STAGE_TO_DEFAULT_CATEGORY.get(
            stage,
            _STAGE_TO_DEFAULT_CATEGORY.get(fallback_stage, "childhood"),
        )