Files
life-echo/api/app/core/logging.py
Sully 53e0065e3e refactor(api): TOML 配置 SSOT、统一错误契约、Auth/事务加固与可观测性 (#33)
配置 SSOT(TOML + .env)
统一错误契约
Auth 与事务边界
Redis / Celery 可靠性:业务 Redis(DB/0)与 Celery broker/backend(DB/1)显式拆分;连接池、sync client
可观测性(OpenTelemetry + LGTM)
2026-05-22 13:44:50 +08:00

294 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
loguru 统一日志配置 + InterceptHandler 拦截第三方库的标准库 logging。
约定:
- **业务代码**`app` 包内):一律使用 ``get_logger(__name__)``,返回 **loguru** ``logger.bind(module=...)``
直接走 loguru sink占位符用 **``{}``**(勿用 ``%s``,否则不会插值)。
**禁止**用 ``import logging`` 取业务 logger适配器层与第三方 SDK 除外)。
- **第三方**uvicorn、celery、httpx、langchain 等):仍用标准库 ``logging``,经 ``InterceptHandler`` 汇入 loguru。
默认将 ``celery*``、``httpx``/``httpcore`` 调到 WARNING避免刷屏任务边界见 ``app.tasks.celery_app`` 中 ``event=celery_task_*``。
级别:
- INFO面向运维的稳定摘要生产/预发推荐长期保持)。
- DEBUG可含 prompt/响应预览或哈希;会显著增噪与体积,仅短时排障;可与 ``AGENT_LOG_MAX_CHARS`` / ``AGENT_LOG_PROMPT_MODE`` 配合。
由 ``Settings.log_level`` 控制 sink``LOG_LEVEL````LOG_LEVEL=DEBUG`` 时业务 ``logger.debug`` 可见。
不打开全局 DEBUG 也可设 ``LOG_AGENT_VERBOSE=1`` 查看 Agent 单行耗时与规模(见 ``app.core.agent_logging``)。
**实践说明**:开发/终端用「人类可读」单行格式若上生产聚合ELK、Loki、CloudWatch建议**另加** JSON sink``serialize=True`` 或自定义 ``format``)与现有 stderr 并存,便于检索与关联,而不是在控制台格式里硬塞结构化字段。
**字段约定(可读性)**
- 机读键用英文 ``snake_case``:优先 ``event=...``,其余 ``key=value`` 空格分隔;与人相关的说明用 ``msg=中文短句``(可含空格),放在行尾或紧邻 ``event`` 后。
- **HTTP**``request_id`` 由中间件 ``contextualize``;业务处可 ``logger.bind(**correlation_bind_kwargs(user_id=..., memoir_correlation_id=...))``(见 ``app.core.log_events``)。
- **Celery**``task_prerun`` 会通过 ``app.core.celery_log_context`` 注入 ``user_id`` / ``correlation_id`` / ``task_id`` 等到 loguru ``extra``(不覆盖已有 ``bind````task_postrun`` 清除,避免串任务。
- **耗时**:业务里程碑的结束行带 ``duration_ms````perf_counter`` × 1000LLM 细粒度见 ``app.core.agent_logging`` 的 ``agent_span`` / ``LOG_AGENT_VERBOSE``。
- **级别**INFO=里程碑与任务起止DEBUG=体积与路径WARNING=可恢复失败与降级。
Agent / LLM 诊断见 ``app.core.agent_logging````LOG_AGENT_VERBOSE``、``AGENT_LOG_MAX_CHARS``、``AGENT_LOG_PROMPT_MODE``、``AGENT_LOG_PROMPT_DEDUP`` 见 ``api/.env.example`` 与 ``Settings``。
"""
from __future__ import annotations
import logging
import os
import sys
from typing import TYPE_CHECKING, Any
from loguru import logger
from app.core.config import settings
from app.core.log_events import (
celery_prerun_extras,
correlation_bind_kwargs,
format_log_event,
)
from app.core.runtime_constants import agent_log_defaults
if TYPE_CHECKING:
from loguru import Logger
def _sink_min_level() -> str:
raw = (settings.log_level or "INFO").strip().upper()
if raw in ("TRACE", "DEBUG", "INFO", "SUCCESS", "WARNING", "ERROR", "CRITICAL"):
return raw
return "INFO"
def _parse_stdlib_level(name: str) -> int | None:
s = name.strip().upper()
if not s:
return None
if s == "TRACE":
return logging.DEBUG
return logging._nameToLevel.get(s) # type: ignore[attr-defined]
def _stdlib_logging_package_dir() -> str:
"""标准库 ``logging`` 包目录(``logging.__file__`` 的父目录),用于路径判断。"""
return os.path.dirname(os.path.abspath(logging.__file__))
def _path_is_stdlib_logging_source(path: str) -> bool:
"""是否落在 CPython 自带的 ``logging`` 包源码下(避免用 ``/logging/`` 子串误伤业务目录名)。"""
if not path:
return False
try:
root = os.path.normcase(_stdlib_logging_package_dir())
norm = os.path.normcase(os.path.abspath(path))
except OSError:
return False
sep = os.sep
return norm == root or norm.startswith(root + sep)
# CPython 未暴露「真实调用方」与「logging 内部 dispatch」的区分 API。部分链路里
# pathname 已是业务文件(如 celery/.../trace.py但 funcName 仍被记成 logging
# 内部入口callHandlers / Handler.emit 等)。此处枚举与 ``Lib/logging`` 中常见
# 帧名对齐;若未来版本改名,可据栈样本增补。
_LOG_DISPATCH_FUNC_NAMES: frozenset[str] = frozenset(
{"callHandlers", "emit", "handle", "makeRecord", "callWithContext"}
)
def _stdlib_emit_display(log_record: logging.LogRecord) -> tuple[str, int]:
"""从 LogRecord 解析更可读的 function / line供 InterceptHandler 写入 loguru"""
fn = log_record.funcName or "?"
ln = log_record.lineno
path = log_record.pathname or ""
if _path_is_stdlib_logging_source(path):
return "-", 0
if fn in _LOG_DISPATCH_FUNC_NAMES:
base = os.path.basename(path)
stem = base[:-3] if base.endswith(".py") else base
return stem or "?", ln
return fn, ln
def _merge_trace_context(record: Any) -> None:
"""每条日志合并当前 OTel trace/span覆盖 Celery/后台无 HTTP middleware 的场景)。"""
try:
from app.core.telemetry import current_trace_context
ctx = current_trace_context()
if not ctx:
return
except Exception:
return
ex = record["extra"]
for k, v in ctx.items():
if not v:
continue
cur = ex.get(k)
if cur is None or str(cur).strip() in ("", "-"):
ex[k] = v
def _stderr_format(record: Any) -> str:
"""控制台 sinkrequest_id / correlation_id / user_id / trace_id 有值时才显示对应列。"""
rid = str(record["extra"].get("request_id") or "").strip()
rid_part = "<dim>rid={extra[request_id]}</dim> | " if rid and rid != "-" else ""
tid = str(record["extra"].get("trace_id") or "").strip()
tid_short = tid[:12] if len(tid) > 12 else tid
tid_part = f"<dim>tid={tid_short}</dim> | " if tid else ""
cid = str(record["extra"].get("correlation_id") or "").strip()
cid_part = "<dim>corr={extra[correlation_id]}</dim> | " if cid else ""
uid = str(record["extra"].get("user_id") or "").strip()
uid_part = "<dim>uid={extra[user_id]}</dim> | " if uid else ""
return (
"<green>{time:YYYY-MM-DD HH:mm:ss.SSS}</green> | "
"<level>{level.name: <8}</level> | "
"<cyan>{extra[module]}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> | "
f"{rid_part}{tid_part}{cid_part}{uid_part}"
"<level>{message}</level>\n{exception}"
)
def _merge_celery_worker_extra(record: Any) -> None:
"""把 ContextVar 中的 Celery 上下文字段并入本条 loguru 记录(不覆盖已有非空 extra"""
try:
from app.core.celery_log_context import get_celery_log_extras
ctx = get_celery_log_extras()
if not ctx:
return
except Exception:
return
ex = record["extra"]
for k, v in ctx.items():
if not v:
continue
cur = ex.get(k)
if cur is None or str(cur).strip() in ("", "-"):
ex[k] = v
def _apply_third_party_log_levels() -> None:
"""压低 Celery/httpx 框架日志噪声。
根 logger 为 NOTSET 时,子 logger 若也为 NOTSET有效级别会变成 0NOTSETINFO 会全部通过,
因此这里**必须**写死默认级别,不能依赖 NOTSET「继承」。
默认(未设 CELERY_LOG_LEVEL / HTTPX_LOG_LEVEL
- ``LOG_LEVEL`` 为 TRACE/DEBUGCelery→INFOhttpx/httpcore→WARNING
- 否则Celery 与 httpx/httpcore→WARNING保留业务 loguru 与 ``event=celery_task_*`` 摘要)
需要框架原始行时,设置 ``CELERY_LOG_LEVEL=INFO``、``HTTPX_LOG_LEVEL=INFO`` 等。
"""
sink = _sink_min_level()
verbose = sink in ("TRACE", "DEBUG")
# 无效环境变量时的回退:与「未设置变量」分支一致,禁止 NOTSET
default_celery = logging.INFO if verbose else logging.WARNING
default_httpx = logging.WARNING
raw_c = (agent_log_defaults.celery_log_level or "").strip()
if raw_c:
parsed = _parse_stdlib_level(raw_c)
cel_level = parsed if parsed is not None else default_celery
else:
cel_level = default_celery
for name in ("celery", "celery.worker"):
logging.getLogger(name).setLevel(cel_level)
raw_h = (agent_log_defaults.httpx_log_level or "").strip()
if raw_h:
parsed = _parse_stdlib_level(raw_h)
httpx_level = parsed if parsed is not None else default_httpx
else:
httpx_level = default_httpx
for name in ("httpx", "httpcore"):
logging.getLogger(name).setLevel(httpx_level)
class InterceptHandler(logging.Handler):
"""Route standard-library logging messages into loguru.
使用 stdlib LogRecord 的「真实文件/行」覆盖 loguru 的 function/line
module 使用 record.name如 celery.app.trace。若只能解析到 logging 内部,则显示 ``-:0``。
"""
def emit(self, log_record: logging.LogRecord) -> None:
try:
level = logger.level(log_record.levelname).name
except ValueError:
level = log_record.levelno
modname = log_record.name or "logging"
fn, ln = _stdlib_emit_display(log_record)
def patch_record(record: object) -> None:
r = record # loguru Record, dict-like
r["function"] = fn # type: ignore[index]
r["line"] = ln # type: ignore[index]
r["extra"]["module"] = modname # type: ignore[index]
msg = log_record.getMessage()
patched = logger.patch(patch_record)
if log_record.exc_info:
patched.opt(exception=log_record.exc_info).log(level, msg)
else:
patched.log(level, msg)
def setup_logging() -> None:
"""Call once at process entryAPI`main`Worker`celery_app` 首行)。
Celery 需 ``worker_hijack_root_logger=False``,否则会覆盖根 logger。
"""
global logger
logger.remove()
logger.add(
sys.stderr,
level=_sink_min_level(),
format=_stderr_format,
backtrace=True,
diagnose=False,
)
json_path = (agent_log_defaults.log_json_file or "").strip()
if json_path:
logger.add(
json_path,
level=_sink_min_level(),
serialize=True,
rotation="20 MB",
retention="7 days",
encoding="utf-8",
enqueue=True,
)
logger.configure(extra={"request_id": "-", "module": "-", "trace_id": "", "span_id": ""})
logger = logger.patch(_merge_celery_worker_extra).patch(_merge_trace_context)
# 仅 root 挂 InterceptHandler避免子 logger 与 root 各处理一次导致重复行
root = logging.getLogger()
root.handlers = [InterceptHandler()]
root.setLevel(logging.NOTSET)
_apply_third_party_log_levels()
def get_logger(name: str) -> Logger:
"""返回带 ``module`` 上下文的 loguru Logger业务模块应 ``get_logger(__name__)``。"""
return logger.bind(module=name)
# 供 middleware 等使用 ``contextualize`` 的同一 loguru 实例(与 get_logger 同源)
__all__ = [
"logger",
"setup_logging",
"get_logger",
"InterceptHandler",
"format_log_event",
"correlation_bind_kwargs",
"celery_prerun_extras",
]