feat(api+app): 对话阶段化、回忆录流水线与客户端会话体验
- DB: segments 用户输入文本(Alembic 0002) - Chat: 阶段检测/阶段提示/回复限制,编排与访谈/画像 prompts 调整 - Memoir: 忠实度检查 agent,叙事与分类等链路更新 - Core: agent 日志、Alembic 启动、LangChain/日志/配置等 - Story: time_hints;Memory 检索与相关测试 - Expo: 助手头像、会话页与消息拆分、实时会话与文案/i18n - Docs/scripts/tests: 迁移脚本、LLM JSON/记忆检索文档、新增单测
This commit is contained in:
@@ -1,22 +1,37 @@
|
||||
"""
|
||||
loguru 统一日志配置 + InterceptHandler 拦截标准库 logging。
|
||||
loguru 统一日志配置 + InterceptHandler 拦截第三方库的标准库 logging。
|
||||
|
||||
日志约定:
|
||||
- INFO:面向运维的稳定摘要,避免敏感字段与高频噪音。
|
||||
- DEBUG:可记录完整上下文、用户内容、连接串、URL 等敏感信息;仅用于受控环境排查,
|
||||
生产环境勿长期开启 DEBUG。
|
||||
约定:
|
||||
- **业务代码**(`app` 包内):一律使用 ``get_logger(__name__)``,返回 **loguru** ``logger.bind(module=...)``,
|
||||
直接走 loguru sink;占位符用 **``{}``**(勿用 ``%s``,否则不会插值)。
|
||||
**禁止**用 ``import logging`` 取业务 logger(适配器层与第三方 SDK 除外)。
|
||||
- **第三方**(uvicorn、celery、httpx、langchain 等):仍用标准库 ``logging``,经 ``InterceptHandler`` 汇入 loguru。
|
||||
|
||||
由 ``Settings.log_level`` 控制(环境变量 ``LOG_LEVEL``,默认 ``INFO``);
|
||||
设为 ``DEBUG`` 时上述详细日志才会输出。
|
||||
级别:
|
||||
- INFO:面向运维的稳定摘要。
|
||||
- DEBUG:可含完整上下文、用户内容;仅受控环境长期开启。
|
||||
|
||||
由 ``Settings.log_level`` 控制 sink(``LOG_LEVEL``);``LOG_LEVEL=DEBUG`` 时业务 ``logger.debug`` 可见。
|
||||
|
||||
**实践说明**:开发/终端用「人类可读」单行格式;若上生产聚合(ELK、Loki、CloudWatch),建议**另加** JSON sink(``serialize=True`` 或自定义 ``format``)与现有 stderr 并存,便于检索与关联,而不是在控制台格式里硬塞结构化字段。
|
||||
|
||||
Agent / LLM 诊断见 ``app.core.agent_logging``;``LOG_AGENT_VERBOSE``、``AGENT_LOG_MAX_CHARS`` 见配置说明。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from app.core.config import settings
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from loguru import Logger
|
||||
|
||||
|
||||
def _sink_min_level() -> str:
|
||||
raw = (settings.log_level or "INFO").strip().upper()
|
||||
@@ -25,65 +40,165 @@ def _sink_min_level() -> str:
|
||||
return "INFO"
|
||||
|
||||
|
||||
class InterceptHandler(logging.Handler):
|
||||
"""Route standard-library logging messages into loguru."""
|
||||
def _parse_stdlib_level(name: str) -> int | None:
|
||||
s = name.strip().upper()
|
||||
if not s:
|
||||
return None
|
||||
if s == "TRACE":
|
||||
return logging.DEBUG
|
||||
return logging._nameToLevel.get(s) # type: ignore[attr-defined]
|
||||
|
||||
def emit(self, record: logging.LogRecord) -> None:
|
||||
try:
|
||||
level = logger.level(record.levelname).name
|
||||
except ValueError:
|
||||
level = record.levelno
|
||||
|
||||
frame, depth = logging.currentframe(), 2
|
||||
while frame and frame.f_code.co_filename == logging.__file__:
|
||||
frame = frame.f_back
|
||||
depth += 1
|
||||
def _stdlib_logging_package_dir() -> str:
|
||||
"""标准库 ``logging`` 包目录(``logging.__file__`` 的父目录),用于路径判断。"""
|
||||
return os.path.dirname(os.path.abspath(logging.__file__))
|
||||
|
||||
logger.opt(depth=depth, exception=record.exc_info).log(
|
||||
level, record.getMessage()
|
||||
|
||||
def _path_is_stdlib_logging_source(path: str) -> bool:
|
||||
"""是否落在 CPython 自带的 ``logging`` 包源码下(避免用 ``/logging/`` 子串误伤业务目录名)。"""
|
||||
if not path:
|
||||
return False
|
||||
try:
|
||||
root = os.path.normcase(_stdlib_logging_package_dir())
|
||||
norm = os.path.normcase(os.path.abspath(path))
|
||||
except OSError:
|
||||
return False
|
||||
sep = os.sep
|
||||
return norm == root or norm.startswith(root + sep)
|
||||
|
||||
|
||||
# CPython 未暴露「真实调用方」与「logging 内部 dispatch」的区分 API。部分链路里
|
||||
# pathname 已是业务文件(如 celery/.../trace.py),但 funcName 仍被记成 logging
|
||||
# 内部入口(callHandlers / Handler.emit 等)。此处枚举与 ``Lib/logging`` 中常见
|
||||
# 帧名对齐;若未来版本改名,可据栈样本增补。
|
||||
_LOG_DISPATCH_FUNC_NAMES: frozenset[str] = frozenset(
|
||||
{"callHandlers", "emit", "handle", "makeRecord", "callWithContext"}
|
||||
)
|
||||
|
||||
|
||||
def _stdlib_emit_display(log_record: logging.LogRecord) -> tuple[str, int]:
|
||||
"""从 LogRecord 解析更可读的 function / line(供 InterceptHandler 写入 loguru)。"""
|
||||
fn = log_record.funcName or "?"
|
||||
ln = log_record.lineno
|
||||
path = log_record.pathname or ""
|
||||
|
||||
if _path_is_stdlib_logging_source(path):
|
||||
return "-", 0
|
||||
|
||||
if fn in _LOG_DISPATCH_FUNC_NAMES:
|
||||
base = os.path.basename(path)
|
||||
stem = base[:-3] if base.endswith(".py") else base
|
||||
return stem or "?", ln
|
||||
|
||||
return fn, ln
|
||||
|
||||
|
||||
def _stderr_format(record: Any) -> str:
|
||||
"""控制台 sink 格式:无有效 request_id 时不占一列 ``-``,减少 Celery/Worker 噪声。"""
|
||||
rid = str(record["extra"].get("request_id") or "").strip()
|
||||
rid_part = "<dim>{extra[request_id]}</dim> | " if rid and rid != "-" else ""
|
||||
return (
|
||||
"<green>{time:YYYY-MM-DD HH:mm:ss.SSS}</green> | "
|
||||
"<level>{level.name: <8}</level> | "
|
||||
"<cyan>{extra[module]}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> | "
|
||||
f"{rid_part}"
|
||||
"<level>{message}</level>\n{exception}"
|
||||
)
|
||||
|
||||
|
||||
def _apply_third_party_log_levels() -> None:
|
||||
"""在全局 sink 为 DEBUG/TRACE 时压低 Celery/httpx 噪声;可通过 CELERY_LOG_LEVEL / HTTPX_LOG_LEVEL 覆盖。"""
|
||||
sink = _sink_min_level()
|
||||
verbose = sink in ("TRACE", "DEBUG")
|
||||
|
||||
raw_c = (settings.celery_log_level or "").strip()
|
||||
if raw_c:
|
||||
parsed = _parse_stdlib_level(raw_c)
|
||||
cel_level = (
|
||||
parsed
|
||||
if parsed is not None
|
||||
else (logging.INFO if verbose else logging.NOTSET)
|
||||
)
|
||||
else:
|
||||
cel_level = logging.INFO if verbose else logging.NOTSET
|
||||
|
||||
for name in ("celery", "celery.worker"):
|
||||
logging.getLogger(name).setLevel(cel_level)
|
||||
|
||||
raw_h = (settings.httpx_log_level or "").strip()
|
||||
if raw_h:
|
||||
parsed = _parse_stdlib_level(raw_h)
|
||||
httpx_level = (
|
||||
parsed
|
||||
if parsed is not None
|
||||
else (logging.WARNING if verbose else logging.NOTSET)
|
||||
)
|
||||
else:
|
||||
httpx_level = logging.WARNING if verbose else logging.NOTSET
|
||||
|
||||
for name in ("httpx", "httpcore"):
|
||||
logging.getLogger(name).setLevel(httpx_level)
|
||||
|
||||
|
||||
class InterceptHandler(logging.Handler):
|
||||
"""Route standard-library logging messages into loguru.
|
||||
|
||||
使用 stdlib LogRecord 的「真实文件/行」覆盖 loguru 的 function/line;
|
||||
module 使用 record.name(如 celery.app.trace)。若只能解析到 logging 内部,则显示 ``-:0``。
|
||||
"""
|
||||
|
||||
def emit(self, log_record: logging.LogRecord) -> None:
|
||||
try:
|
||||
level = logger.level(log_record.levelname).name
|
||||
except ValueError:
|
||||
level = log_record.levelno
|
||||
|
||||
modname = log_record.name or "logging"
|
||||
fn, ln = _stdlib_emit_display(log_record)
|
||||
|
||||
def patch_record(record: object) -> None:
|
||||
r = record # loguru Record, dict-like
|
||||
r["function"] = fn # type: ignore[index]
|
||||
r["line"] = ln # type: ignore[index]
|
||||
r["extra"]["module"] = modname # type: ignore[index]
|
||||
|
||||
msg = log_record.getMessage()
|
||||
patched = logger.patch(patch_record)
|
||||
if log_record.exc_info:
|
||||
patched.opt(exception=log_record.exc_info).log(level, msg)
|
||||
else:
|
||||
patched.log(level, msg)
|
||||
|
||||
|
||||
def setup_logging() -> None:
|
||||
"""Call once at process entry (API:`main`;Worker:`celery_app` 在首行调用)。
|
||||
"""Call once at process entry(API:`main`;Worker:`celery_app` 首行)。
|
||||
|
||||
Celery 需在 `app.tasks.celery_app` 中设置 `worker_hijack_root_logger=False`,否则
|
||||
会覆盖根 logger,无法与下方 InterceptHandler + loguru 格式对齐。
|
||||
Celery 需 ``worker_hijack_root_logger=False``,否则会覆盖根 logger。
|
||||
"""
|
||||
logger.remove()
|
||||
|
||||
logger.add(
|
||||
sys.stderr,
|
||||
level=_sink_min_level(),
|
||||
format=(
|
||||
"<green>{time:YYYY-MM-DD HH:mm:ss.SSS}</green> | "
|
||||
"<level>{level.name: <8}</level> | "
|
||||
"<cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> | "
|
||||
"{extra[request_id]} | "
|
||||
"<level>{message}</level>"
|
||||
),
|
||||
format=_stderr_format,
|
||||
backtrace=True,
|
||||
diagnose=False,
|
||||
)
|
||||
|
||||
# 将根 logger 重定向到 loguru,不再使用 basicConfig(文档要求:统一走 loguru)
|
||||
logger.configure(extra={"request_id": "-", "module": "-"})
|
||||
|
||||
# 仅 root 挂 InterceptHandler,避免子 logger 与 root 各处理一次导致重复行
|
||||
root = logging.getLogger()
|
||||
root.handlers = [InterceptHandler()]
|
||||
root.setLevel(0)
|
||||
root.setLevel(logging.NOTSET)
|
||||
|
||||
for name in (
|
||||
"uvicorn",
|
||||
"uvicorn.error",
|
||||
"uvicorn.access",
|
||||
"sqlalchemy.engine",
|
||||
"celery",
|
||||
"celery.worker",
|
||||
):
|
||||
logging.getLogger(name).handlers = [InterceptHandler()]
|
||||
|
||||
logger.configure(extra={"request_id": "-"})
|
||||
_apply_third_party_log_levels()
|
||||
|
||||
|
||||
def get_logger(name: str) -> logging.Logger:
|
||||
"""获取具名 logger,统一走 loguru 拦截。各模块应使用此函数而非直接 import logging。"""
|
||||
return logging.getLogger(name)
|
||||
def get_logger(name: str) -> Logger:
|
||||
"""返回带 ``module`` 上下文的 loguru Logger;业务模块应 ``get_logger(__name__)``。"""
|
||||
return logger.bind(module=name)
|
||||
|
||||
|
||||
# 供 middleware 等使用 ``contextualize`` 的同一 loguru 实例(与 get_logger 同源)
|
||||
__all__ = ["logger", "setup_logging", "get_logger", "InterceptHandler"]
|
||||
|
||||
Reference in New Issue
Block a user