diff --git a/.cursor/settings.json b/.cursor/settings.json
index 9c2ee48..331de8f 100644
--- a/.cursor/settings.json
+++ b/.cursor/settings.json
@@ -5,6 +5,9 @@
},
"postman": {
"enabled": true
+ },
+ "grafana-assistant": {
+ "enabled": true
}
}
}
diff --git a/api/.env.example b/api/.env.example
index 35cd342..ed543ba 100644
--- a/api/.env.example
+++ b/api/.env.example
@@ -16,6 +16,37 @@
# LIFE_ECHO_API_HOST_PORT=8000
# 若 Caddy 跑在独立容器且非 host 网络,不要用 127.0.0.1,应把 Caddy 加入与本 compose 相同的 Docker 网络,并对 http://life-echo-api-prod:8000 做 reverse_proxy。
+# =============================================================================
+# OpenTelemetry(见 docs/observability.md;Settings 只读 .env,勿 shell export)
+# =============================================================================
+# docker-compose.observability.yml 宿主机端口(高位口,避免 3000/9090/4317 冲突)
+# GRAFANA_HOST_PORT=48300
+# PROMETHEUS_HOST_PORT=49090
+# OTEL_GRPC_HOST_PORT=48317
+# OTEL_HTTP_HOST_PORT=48318
+# OTEL_COLLECTOR_HEALTH_HOST_PORT=48333
+# TEMPO_HTTP_HOST_PORT=43200
+# LOKI_HTTP_HOST_PORT=43100
+#
+# --- development(.env.development):本机 uvicorn/celery ---
+# OTEL_ENABLED=true
+# OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:48317
+# OTEL_TRACES_SAMPLER=always_on
+#
+# --- staging / production(.env.staging / .env.production):容器内 compose ---
+# OTEL_ENABLED=false
+# OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4317
+# OTEL_TRACES_SAMPLER=parentbased_traceidratio
+# OTEL_TRACES_SAMPLER_ARG=0.1
+#
+OTEL_ENABLED=true
+OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:48317
+OTEL_EXPORTER_OTLP_INSECURE=true
+OTEL_SERVICE_NAME=life-echo-api
+OTEL_TRACES_SAMPLER=always_on
+# OTEL_TRACES_SAMPLER_ARG=0.1
+# OTEL_METRIC_EXPORT_INTERVAL_MS=10000
+
# =============================================================================
# Logging(loguru sink 最低级别:TRACE / DEBUG / INFO / WARNING / ERROR / CRITICAL)
# =============================================================================
@@ -140,7 +171,7 @@ REDIS_SESSION_TTL=86400
# CELERY_MEMORY_ENRICHMENT_QUEUE=memory_idle
# =============================================================================
-# Internal evaluation API(internal_main / internal-eval.sh;与主 API 进程隔离)
+# Internal evaluation API(internal_main;development.sh 默认一并启动;与主 API 进程隔离)
# =============================================================================
# 本地:`openssl rand -hex 32`;不用 internal eval 时可留空
INTERNAL_EVAL_API_KEY=
diff --git a/api/.env.production b/api/.env.production
index c059d7b..6650d28 100644
--- a/api/.env.production
+++ b/api/.env.production
@@ -33,6 +33,18 @@ LOG_LEVEL=INFO
# CELERY_LOG_LEVEL=
# HTTPX_LOG_LEVEL=
+# =============================================================================
+# OpenTelemetry(生产;第二阶段 compose profile 接入后设 OTEL_ENABLED=true,见 docs/observability.md)
+# 容器内 API/Celery → http://otel-collector:4317;勿用 localhost
+# =============================================================================
+OTEL_ENABLED=false
+OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4317
+OTEL_EXPORTER_OTLP_INSECURE=true
+OTEL_SERVICE_NAME=life-echo-api
+OTEL_TRACES_SAMPLER=parentbased_traceidratio
+OTEL_TRACES_SAMPLER_ARG=0.1
+# OTEL_METRIC_EXPORT_INTERVAL_MS=10000
+
# =============================================================================
# LLM / DeepSeek
# =============================================================================
diff --git a/api/.env.staging b/api/.env.staging
index 7a6dcc6..fa1ed8b 100644
--- a/api/.env.staging
+++ b/api/.env.staging
@@ -32,6 +32,18 @@ LOG_LEVEL=INFO
# CELERY_LOG_LEVEL=
# HTTPX_LOG_LEVEL=
+# =============================================================================
+# OpenTelemetry(预发;compose 接入 LGTM 后设 OTEL_ENABLED=true,见 docs/observability.md)
+# API/Celery 容器内 endpoint 用服务名;Grafana 宿主机端口见 observability compose(默认 48300 等)
+# =============================================================================
+OTEL_ENABLED=false
+OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4317
+OTEL_EXPORTER_OTLP_INSECURE=true
+OTEL_SERVICE_NAME=life-echo-api
+OTEL_TRACES_SAMPLER=parentbased_traceidratio
+OTEL_TRACES_SAMPLER_ARG=0.1
+# OTEL_METRIC_EXPORT_INTERVAL_MS=10000
+
# =============================================================================
# LLM / DeepSeek
# =============================================================================
diff --git a/api/README.md b/api/README.md
index fa26cd1..715351f 100644
--- a/api/README.md
+++ b/api/README.md
@@ -32,6 +32,16 @@ Life Echo API 是一个智能对话系统,通过 WebSocket 实时连接,使
- **ASR/TTS**: OpenAI Whisper API
- **认证**: JWT (python-jose) + bcrypt
- **其他**: Pydantic, python-dotenv
+- **可观测性**: OpenTelemetry → Grafana LGTM(Tempo / Prometheus / Loki),见 [`docs/observability.md`](docs/observability.md)
+
+## 可观测性(本地)
+
+```bash
+docker compose -f docker-compose.dev.yml -f docker-compose.observability.yml up -d
+# Grafana: http://127.0.0.1:48300
+```
+
+在 `.env` 中配置 `OTEL_*`(见 [`.env.example`](.env.example)),与 Postgres/Redis 一样由 Settings 加载,无需 shell export。详见 [`docs/observability.md`](docs/observability.md)。
## 项目结构
diff --git a/api/app/adapters/asr/tencent_asr.py b/api/app/adapters/asr/tencent_asr.py
index 0620910..d80fccf 100644
--- a/api/app/adapters/asr/tencent_asr.py
+++ b/api/app/adapters/asr/tencent_asr.py
@@ -3,6 +3,7 @@
import asyncio
import base64
+from app.core.business_telemetry import business_span
from app.core.logging import get_logger
from app.ports.asr import ASRTranscriptionError
@@ -39,6 +40,10 @@ class TencentASRProvider:
return bool(self._secret_id and self._secret_key and self._get_client())
async def transcribe(self, audio: bytes, format: str = "m4a") -> str:
+ with business_span("asr.transcribe", provider="tencent"):
+ return await self._transcribe_inner(audio, format)
+
+ async def _transcribe_inner(self, audio: bytes, format: str) -> str:
client = self._get_client()
if not client:
raise ASRTranscriptionError(
diff --git a/api/app/adapters/asr/whisper_local.py b/api/app/adapters/asr/whisper_local.py
index a64a0a5..dfe6bd9 100644
--- a/api/app/adapters/asr/whisper_local.py
+++ b/api/app/adapters/asr/whisper_local.py
@@ -8,6 +8,7 @@ import re
import tempfile
from typing import Any, Iterable
+from app.core.business_telemetry import business_span
from app.core.logging import get_logger
from app.ports.asr import ASRTranscriptionError
@@ -102,6 +103,10 @@ class WhisperASRProvider:
return self._load_model()
async def transcribe(self, audio: bytes, format: str = "m4a") -> str:
+ with business_span("asr.transcribe", provider="whisper"):
+ return await self._transcribe_inner(audio, format)
+
+ async def _transcribe_inner(self, audio: bytes, format: str) -> str:
# 与 v1.1.0 相同的单次 transcribe;推理放线程池,避免阻塞 asyncio(tag 上为同步调用)。
self._load_model()
if not self._model:
diff --git a/api/app/adapters/embedding/zhipu.py b/api/app/adapters/embedding/zhipu.py
index cb018ad..249d415 100644
--- a/api/app/adapters/embedding/zhipu.py
+++ b/api/app/adapters/embedding/zhipu.py
@@ -6,6 +6,7 @@ import asyncio
from zai import ZhipuAiClient
+from app.core.business_telemetry import business_span
from app.core.embedding import MEMORY_EMBEDDING_DIMENSION
from app.core.logging import get_logger
@@ -57,12 +58,13 @@ class ZhipuEmbeddingProvider:
async def embed_texts(self, texts: list[str]) -> list[list[float]]:
if not self._client or not texts:
return []
- out: list[list[float]] = []
- for i in range(0, len(texts), _EMBED_BATCH_SIZE):
- batch = texts[i : i + _EMBED_BATCH_SIZE]
- part = await asyncio.to_thread(self._create_vectors_sync, batch)
- out.extend(part)
- return out
+ with business_span("embedding.zhipu.embed", batch_size=len(texts)):
+ out: list[list[float]] = []
+ for i in range(0, len(texts), _EMBED_BATCH_SIZE):
+ batch = texts[i : i + _EMBED_BATCH_SIZE]
+ part = await asyncio.to_thread(self._create_vectors_sync, batch)
+ out.extend(part)
+ return out
def embed_text_sync(self, text: str) -> list[float]:
vecs = self.embed_texts_sync([text])
@@ -71,8 +73,9 @@ class ZhipuEmbeddingProvider:
def embed_texts_sync(self, texts: list[str]) -> list[list[float]]:
if not self._client or not texts:
return []
- out: list[list[float]] = []
- for i in range(0, len(texts), _EMBED_BATCH_SIZE):
- batch = texts[i : i + _EMBED_BATCH_SIZE]
- out.extend(self._create_vectors_sync(batch))
- return out
+ with business_span("embedding.zhipu.embed", batch_size=len(texts)):
+ out: list[list[float]] = []
+ for i in range(0, len(texts), _EMBED_BATCH_SIZE):
+ batch = texts[i : i + _EMBED_BATCH_SIZE]
+ out.extend(self._create_vectors_sync(batch))
+ return out
diff --git a/api/app/adapters/llm/deepseek.py b/api/app/adapters/llm/deepseek.py
index 244574c..0d6805d 100644
--- a/api/app/adapters/llm/deepseek.py
+++ b/api/app/adapters/llm/deepseek.py
@@ -4,6 +4,8 @@ from collections.abc import AsyncIterator
from langchain_openai import ChatOpenAI
+from app.core.llm_telemetry import langchain_invoke_span, observe_astream
+
class DeepSeekLLMProvider:
"""LangChain-based LLM adapter for DeepSeek and OpenAI-compatible APIs.
@@ -56,7 +58,15 @@ class DeepSeekLLMProvider:
) -> str:
llm = self._get_llm(temperature, model, max_tokens)
lc_messages = _to_langchain_messages(messages)
- result = await llm.ainvoke(lc_messages)
+ resolved_model = model or self._default_model
+ with langchain_invoke_span(
+ agent="deepseek.complete",
+ provider="deepseek",
+ model=resolved_model,
+ call_type="chat",
+ ) as tel:
+ result = await llm.ainvoke(lc_messages)
+ tel["response"] = result
return str(result.content)
async def stream(
@@ -69,7 +79,14 @@ class DeepSeekLLMProvider:
) -> AsyncIterator[str]:
llm = self._get_llm(temperature, model, max_tokens)
lc_messages = _to_langchain_messages(messages)
- async for chunk in llm.astream(lc_messages):
+ resolved_model = model or self._default_model
+ async for chunk in observe_astream(
+ llm,
+ lc_messages,
+ agent="deepseek.stream",
+ provider="deepseek",
+ model=resolved_model,
+ ):
if chunk.content:
yield str(chunk.content)
diff --git a/api/app/adapters/sms/tencent.py b/api/app/adapters/sms/tencent.py
index 357988b..e3f736d 100644
--- a/api/app/adapters/sms/tencent.py
+++ b/api/app/adapters/sms/tencent.py
@@ -7,6 +7,7 @@ from tencentcloud.common.exception.tencent_cloud_sdk_exception import (
from tencentcloud.sms.v20210111 import models as sms_models
from tencentcloud.sms.v20210111 import sms_client
+from app.core.business_telemetry import business_span
from app.core.logging import get_logger
logger = get_logger(__name__)
@@ -32,6 +33,10 @@ class TencentSmsSender:
self._template_param_count = template_param_count
def send_verification_code(self, phone: str, code: str) -> bool:
+ with business_span("sms.tencent.send"):
+ return self._send_verification_code_inner(phone, code)
+
+ def _send_verification_code_inner(self, phone: str, code: str) -> bool:
if not self._secret_id or not self._secret_key:
logger.error("Tencent SMS credentials not configured")
return False
diff --git a/api/app/adapters/tts/openai_tts.py b/api/app/adapters/tts/openai_tts.py
index 6c2553a..55eaf64 100644
--- a/api/app/adapters/tts/openai_tts.py
+++ b/api/app/adapters/tts/openai_tts.py
@@ -5,6 +5,7 @@ from io import BytesIO
from openai import OpenAI
+from app.core.business_telemetry import business_span
from app.core.logging import get_logger
logger = get_logger(__name__)
@@ -35,6 +36,10 @@ class OpenAITTSProvider:
*,
language: str = "zh", # noqa: ARG002 — OpenAI TTS auto-detects language
) -> bytes:
+ with business_span("tts.synthesize", provider="openai"):
+ return await self._synthesize_api(text, voice)
+
+ async def _synthesize_api(self, text: str, voice: str) -> bytes:
if not self._client:
return b""
try:
diff --git a/api/app/adapters/tts/tencent_tts.py b/api/app/adapters/tts/tencent_tts.py
index 2377fa3..c00fa15 100644
--- a/api/app/adapters/tts/tencent_tts.py
+++ b/api/app/adapters/tts/tencent_tts.py
@@ -5,6 +5,7 @@ import base64
import re
import uuid
+from app.core.business_telemetry import business_span
from app.core.logging import get_logger
logger = get_logger(__name__)
@@ -180,6 +181,16 @@ class TencentTTSProvider:
voice: str = "alloy",
*,
language: str = "zh",
+ ) -> bytes:
+ with business_span("tts.synthesize", provider="tencent"):
+ return await self._synthesize_inner(text, voice, language=language)
+
+ async def _synthesize_inner(
+ self,
+ text: str,
+ voice: str = "alloy",
+ *,
+ language: str = "zh",
) -> bytes:
if not self._secret_id or not self._secret_key:
logger.error(
diff --git a/api/app/agents/chat/interview_agent.py b/api/app/agents/chat/interview_agent.py
index 105598a..15293c8 100644
--- a/api/app/agents/chat/interview_agent.py
+++ b/api/app/agents/chat/interview_agent.py
@@ -38,6 +38,7 @@ from app.agents.state_schema import (
interview_control_state,
narrative_coverage_state,
)
+from app.core.llm_telemetry import infer_provider_model, observe_ainvoke
from app.core.agent_logging import (
agent_span,
log_agent_payload,
@@ -331,7 +332,15 @@ class InterviewAgent:
conversation_turn_total,
history_pairs_windowed,
)
- response = await chat_llm.ainvoke(messages)
+ provider, model = infer_provider_model(chat_llm)
+ response = await observe_ainvoke(
+ chat_llm,
+ messages,
+ agent="InterviewAgent.generate_response",
+ provider=provider,
+ model=model,
+ call_type="chat",
+ )
response_ms = (time.perf_counter() - llm_t0) * 1000
logger.info(
"event=chat_llm_done agent=InterviewAgent.generate_response_with_state "
@@ -384,7 +393,15 @@ class InterviewAgent:
_message_contents_char_count(retry_messages),
conversation_id,
)
- response_retry = await chat_llm.ainvoke(retry_messages)
+ provider, model = infer_provider_model(chat_llm)
+ response_retry = await observe_ainvoke(
+ chat_llm,
+ retry_messages,
+ agent="InterviewAgent.duplicate_guard_retry",
+ provider=provider,
+ model=model,
+ call_type="chat",
+ )
logger.info(
"event=chat_llm_done agent=InterviewAgent.duplicate_guard_retry "
"response_latency_ms={:.2f}",
@@ -524,7 +541,15 @@ class InterviewAgent:
hw.turn_total,
len(hw.window) // 2,
)
- response = await opening_llm.ainvoke(messages)
+ provider, model = infer_provider_model(opening_llm)
+ response = await observe_ainvoke(
+ opening_llm,
+ messages,
+ agent="InterviewAgent.opening",
+ provider=provider,
+ model=model,
+ call_type="chat",
+ )
logger.info(
"event=chat_llm_done agent=InterviewAgent.generate_opening_message "
"response_latency_ms={:.2f}",
@@ -643,7 +668,15 @@ class InterviewAgent:
len(hw.window) // 2,
idle_hours,
)
- response = await re_greet_llm.ainvoke(messages)
+ provider, model = infer_provider_model(re_greet_llm)
+ response = await observe_ainvoke(
+ re_greet_llm,
+ messages,
+ agent="InterviewAgent.re_greeting",
+ provider=provider,
+ model=model,
+ call_type="chat",
+ )
logger.info(
"event=chat_llm_done agent=InterviewAgent.generate_re_greeting_message "
"response_latency_ms={:.2f}",
diff --git a/api/app/core/agent_logging.py b/api/app/core/agent_logging.py
index e2021e0..b468a99 100644
--- a/api/app/core/agent_logging.py
+++ b/api/app/core/agent_logging.py
@@ -24,7 +24,11 @@ import time
from contextlib import contextmanager
from typing import Any, Iterator
+from opentelemetry import trace
+from opentelemetry.trace import Status, StatusCode
+
from app.core.config import settings
+from app.core.telemetry import get_tracer
_dedup_lock = threading.Lock()
_last_prompt_sha256_by_label: dict[str, str] = {}
@@ -97,15 +101,41 @@ def agent_span(
ctx = " ".join(f"{k}={v!r}" for k, v in context.items())
if agent_verbose_enabled():
logger.debug("agent_span_start {} {}", operation, ctx)
- try:
- yield
- finally:
- ms = (time.perf_counter() - t0) * 1000
+
+ def _log_end(ms: float) -> None:
if agent_verbose_enabled():
logger.debug("agent_span_end {} duration_ms={:.2f} {}", operation, ms, ctx)
elif settings.log_agent_verbose:
logger.info("agent_span {} duration_ms={:.2f} {}", operation, ms, ctx)
+ if settings.otel_enabled:
+ tracer = get_tracer("app.agent")
+ with tracer.start_as_current_span(
+ "agent.operation",
+ attributes={"agent.operation": operation},
+ ) as span:
+ failed = False
+ try:
+ yield
+ except Exception:
+ failed = True
+ if span.is_recording():
+ span.set_status(Status(StatusCode.ERROR))
+ raise
+ finally:
+ ms = (time.perf_counter() - t0) * 1000
+ if span.is_recording():
+ span.set_attribute("agent.duration_ms", round(ms, 2))
+ if not failed:
+ span.set_status(Status(StatusCode.OK))
+ _log_end(ms)
+ return
+
+ try:
+ yield
+ finally:
+ _log_end((time.perf_counter() - t0) * 1000)
+
def log_agent_payload(
logger: Any,
diff --git a/api/app/core/alembic_revision_repair.py b/api/app/core/alembic_revision_repair.py
index 69a3e87..2426348 100644
--- a/api/app/core/alembic_revision_repair.py
+++ b/api/app/core/alembic_revision_repair.py
@@ -9,6 +9,8 @@ _WITHDRAWN_0020_REVISIONS = frozenset(
"0020_add_tts_audio_urls_column",
"0020_backfill_missing_schema",
"0020_backfill_all_missing_columns",
+ # 曾本地试运行后从仓库撤回,仅 dev 库可能残留 stamp
+ "0020_chapters_book_id",
}
)
_REPAIR_TARGET_REVISION = "0018_users_language_preference"
diff --git a/api/app/core/business_telemetry.py b/api/app/core/business_telemetry.py
new file mode 100644
index 0000000..0a0488d
--- /dev/null
+++ b/api/app/core/business_telemetry.py
@@ -0,0 +1,81 @@
+"""
+业务链路 OpenTelemetry span(回忆录阶段、WS、外部依赖等)。
+"""
+
+from __future__ import annotations
+
+import time
+from contextlib import contextmanager
+from typing import Any, Iterator
+
+from opentelemetry import trace
+from opentelemetry.trace import Status, StatusCode
+
+from app.core.config import settings
+from app.core.telemetry import get_meter, get_tracer
+
+_meter = None
+_duration_hist = None
+
+# 仅低基数字段进入 span attribute(禁止 user_id / conversation_id 等)
+_ALLOWED_SPAN_ATTRS = frozenset(
+ {"provider", "chapter_category", "segment_count", "batch_size", "hours"}
+)
+
+
+def _ensure_instruments() -> None:
+ global _meter, _duration_hist
+ if _meter is not None or not settings.otel_enabled:
+ return
+ _meter = get_meter("app.business")
+ _duration_hist = _meter.create_histogram(
+ "business.operation.duration",
+ unit="ms",
+ description="Business operation wall time",
+ )
+
+
+def _normalize_attr_value(value: Any) -> str | int | float | bool:
+ if isinstance(value, (str, int, float, bool)):
+ return value
+ return str(value)
+
+
+@contextmanager
+def business_span(
+ name: str,
+ /,
+ **attributes: Any,
+) -> Iterator[trace.Span]:
+ if not settings.otel_enabled:
+ yield trace.INVALID_SPAN
+ return
+
+ tracer = get_tracer("app.business")
+ otel_attrs = {
+ f"business.{k}": _normalize_attr_value(v)
+ for k, v in attributes.items()
+ if k in _ALLOWED_SPAN_ATTRS and v is not None and v != ""
+ }
+ t0 = time.perf_counter()
+ outcome = "ok"
+ with tracer.start_as_current_span(name, attributes=otel_attrs) as span:
+ try:
+ yield span
+ except Exception:
+ outcome = "error"
+ if span.is_recording():
+ span.set_status(Status(StatusCode.ERROR))
+ raise
+ finally:
+ duration_ms = (time.perf_counter() - t0) * 1000
+ if span.is_recording():
+ span.set_attribute("business.duration_ms", round(duration_ms, 2))
+ if outcome == "ok":
+ span.set_status(Status(StatusCode.OK))
+ _ensure_instruments()
+ if _duration_hist is not None:
+ _duration_hist.record(
+ duration_ms,
+ {"operation": name, "outcome": outcome},
+ )
diff --git a/api/app/core/config.py b/api/app/core/config.py
index 6341f26..03a172a 100644
--- a/api/app/core/config.py
+++ b/api/app/core/config.py
@@ -223,6 +223,36 @@ class Settings(BaseSettings):
# 非空时额外写入 JSONL(serialize=True),便于 Loki/ELK;与 stderr 彩色控制台并存
log_json_file: str = ""
+ # ── OpenTelemetry ─────────────────────────────────────────
+ otel_enabled: bool = False
+ otel_exporter_otlp_endpoint: str = "http://localhost:48317"
+ otel_exporter_otlp_insecure: bool = True
+ otel_service_name: str = ""
+ otel_traces_sampler: str = Field(
+ default="always_on",
+ description="always_on | parentbased_traceidratio | always_off",
+ )
+ otel_traces_sampler_arg: float | None = Field(default=None, ge=0.0, le=1.0)
+ otel_metric_export_interval_ms: int = Field(default=10_000, ge=1000, le=300_000)
+
+ @field_validator("otel_enabled", mode="before")
+ @classmethod
+ def _coerce_otel_enabled(cls, v: object) -> bool:
+ if isinstance(v, bool):
+ return v
+ if v is None:
+ return False
+ return str(v).strip().lower() in ("1", "true", "yes", "on")
+
+ @field_validator("otel_exporter_otlp_insecure", mode="before")
+ @classmethod
+ def _coerce_otel_exporter_otlp_insecure(cls, v: object) -> bool:
+ if isinstance(v, bool):
+ return v
+ if v is None:
+ return True
+ return str(v).strip().lower() in ("1", "true", "yes", "on")
+
@field_validator("celery_purge_broker_on_startup", mode="before")
@classmethod
def _coerce_celery_purge_broker_on_startup(cls, v: object) -> bool:
diff --git a/api/app/core/langchain_llm.py b/api/app/core/langchain_llm.py
index 6c09ba7..b8fd33a 100644
--- a/api/app/core/langchain_llm.py
+++ b/api/app/core/langchain_llm.py
@@ -16,6 +16,7 @@ from app.core.agent_logging import (
agent_verbose_enabled,
log_agent_payload,
)
+from app.core.llm_telemetry import infer_provider_model, langchain_invoke_span
from app.core.logging import get_logger
logger = get_logger(__name__)
@@ -68,29 +69,41 @@ def invoke_json_object(
sha = _prompt_sha12(prompt_for_api)
attempts = 2 if retry_empty else 1
t0 = time.perf_counter()
+ provider, model = infer_provider_model(llm)
last_content = ""
- for attempt in range(attempts):
- response = bound.invoke(prompt_for_api)
- content = (getattr(response, "content", None) or "").strip()
- last_content = content
- if content:
- if attempt > 0:
- logger.info(
- "json_object 空内容重试成功 agent={} prompt_sha12={}",
+ with langchain_invoke_span(
+ agent=tag,
+ provider=provider,
+ model=model,
+ call_type="json",
+ prompt_sha12=sha,
+ max_tokens=max_tokens,
+ ) as tel:
+ for attempt in range(attempts):
+ response = bound.invoke(prompt_for_api)
+ tel["response"] = response
+ content = (getattr(response, "content", None) or "").strip()
+ last_content = content
+ if content:
+ if attempt > 0:
+ logger.info(
+ "json_object 空内容重试成功 agent={} prompt_sha12={}",
+ tag,
+ sha,
+ )
+ tel["outcome"] = "ok"
+ _log_json_object_done(
+ tag, sha, prompt_for_api, content, attempt + 1, t0, success=True
+ )
+ return content
+ if attempt == 0 and retry_empty:
+ logger.warning(
+ "json_object 返回空 content,将重试 agent={} attempt={} prompt_sha12={}",
tag,
+ attempt,
sha,
)
- _log_json_object_done(
- tag, sha, prompt_for_api, content, attempt + 1, t0, success=True
- )
- return content
- if attempt == 0 and retry_empty:
- logger.warning(
- "json_object 返回空 content,将重试 agent={} attempt={} prompt_sha12={}",
- tag,
- attempt,
- sha,
- )
+ tel["outcome"] = "error"
logger.warning("json_object 仍为空 agent={} prompt_sha12={}", tag, sha)
_log_json_object_done(
tag, sha, prompt_for_api, last_content, attempts, t0, success=False
@@ -113,29 +126,41 @@ async def ainvoke_json_object(
sha = _prompt_sha12(prompt_for_api)
attempts = 2 if retry_empty else 1
t0 = time.perf_counter()
+ provider, model = infer_provider_model(llm)
last_content = ""
- for attempt in range(attempts):
- response = await bound.ainvoke(prompt_for_api)
- content = (getattr(response, "content", None) or "").strip()
- last_content = content
- if content:
- if attempt > 0:
- logger.info(
- "json_object 空内容重试成功 agent={} prompt_sha12={}",
+ with langchain_invoke_span(
+ agent=tag,
+ provider=provider,
+ model=model,
+ call_type="json",
+ prompt_sha12=sha,
+ max_tokens=max_tokens,
+ ) as tel:
+ for attempt in range(attempts):
+ response = await bound.ainvoke(prompt_for_api)
+ tel["response"] = response
+ content = (getattr(response, "content", None) or "").strip()
+ last_content = content
+ if content:
+ if attempt > 0:
+ logger.info(
+ "json_object 空内容重试成功 agent={} prompt_sha12={}",
+ tag,
+ sha,
+ )
+ tel["outcome"] = "ok"
+ _log_json_object_done(
+ tag, sha, prompt_for_api, content, attempt + 1, t0, success=True
+ )
+ return content
+ if attempt == 0 and retry_empty:
+ logger.warning(
+ "json_object 返回空 content,将重试 agent={} attempt={} prompt_sha12={}",
tag,
+ attempt,
sha,
)
- _log_json_object_done(
- tag, sha, prompt_for_api, content, attempt + 1, t0, success=True
- )
- return content
- if attempt == 0 and retry_empty:
- logger.warning(
- "json_object 返回空 content,将重试 agent={} attempt={} prompt_sha12={}",
- tag,
- attempt,
- sha,
- )
+ tel["outcome"] = "error"
logger.warning("json_object 仍为空 agent={} prompt_sha12={}", tag, sha)
_log_json_object_done(
tag, sha, prompt_for_api, last_content, attempts, t0, success=False
diff --git a/api/app/core/llm_call.py b/api/app/core/llm_call.py
index 4b4ae7b..52cf767 100644
--- a/api/app/core/llm_call.py
+++ b/api/app/core/llm_call.py
@@ -30,6 +30,12 @@ from app.core.langchain_llm import (
)
from app.core.llm_errors import LlmHttpErrorVendor, format_llm_http_error_message
from app.core.llm_http_openai_chat_errors import should_log_openai_error_as_warning
+from app.core.llm_telemetry import (
+ extract_token_usage,
+ infer_provider_model,
+ llm_call_span,
+ record_llm_call,
+)
from app.core.logging import get_logger
logger = get_logger(__name__)
@@ -138,14 +144,16 @@ def _invoke_raw_sync(
max_tokens: int,
agent: str,
retry_empty: bool,
-) -> tuple[str, int]:
+) -> tuple[str, int, int, int]:
prompt_for_api = ensure_json_object_prompt_has_json_keyword(prompt)
bound = bind_json_object_mode(llm, max_tokens=max_tokens)
tag = agent or "json_object"
sha = _prompt_sha12(prompt_for_api)
attempts = 2 if retry_empty else 1
+ last_in, last_out = 0, 0
for attempt in range(attempts):
response = bound.invoke(prompt_for_api)
+ last_in, last_out = extract_token_usage(response)
content = (getattr(response, "content", None) or "").strip()
if content:
if attempt > 0:
@@ -154,7 +162,7 @@ def _invoke_raw_sync(
tag,
sha,
)
- return content, attempt + 1
+ return content, attempt + 1, last_in, last_out
if attempt == 0 and retry_empty:
logger.warning(
"json_object 返回空 content,将重试 agent={} attempt={} prompt_sha12={}",
@@ -163,7 +171,7 @@ def _invoke_raw_sync(
sha,
)
logger.warning("json_object 仍为空 agent={} prompt_sha12={}", tag, sha)
- return "", attempts
+ return "", attempts, last_in, last_out
async def _invoke_raw_async(
@@ -173,14 +181,16 @@ async def _invoke_raw_async(
max_tokens: int,
agent: str,
retry_empty: bool,
-) -> tuple[str, int]:
+) -> tuple[str, int, int, int]:
prompt_for_api = ensure_json_object_prompt_has_json_keyword(prompt)
bound = bind_json_object_mode(llm, max_tokens=max_tokens)
tag = agent or "json_object"
sha = _prompt_sha12(prompt_for_api)
attempts = 2 if retry_empty else 1
+ last_in, last_out = 0, 0
for attempt in range(attempts):
response = await bound.ainvoke(prompt_for_api)
+ last_in, last_out = extract_token_usage(response)
content = (getattr(response, "content", None) or "").strip()
if content:
if attempt > 0:
@@ -189,7 +199,7 @@ async def _invoke_raw_async(
tag,
sha,
)
- return content, attempt + 1
+ return content, attempt + 1, last_in, last_out
if attempt == 0 and retry_empty:
logger.warning(
"json_object 返回空 content,将重试 agent={} attempt={} prompt_sha12={}",
@@ -198,7 +208,7 @@ async def _invoke_raw_async(
sha,
)
logger.warning("json_object 仍为空 agent={} prompt_sha12={}", tag, sha)
- return "", attempts
+ return "", attempts, last_in, last_out
def _parse_and_validate(
@@ -252,6 +262,12 @@ def _emit_meta(
parse_ok: bool,
used_fallback: bool,
error_kind: str | None,
+ provider: str,
+ model: str,
+ prompt_sha12: str,
+ input_tokens: int = 0,
+ output_tokens: int = 0,
+ span: Any | None = None,
) -> None:
meta = LLMCallMeta(
agent=agent,
@@ -263,17 +279,35 @@ def _emit_meta(
used_fallback=used_fallback,
error_kind=error_kind,
)
- logger.bind(
- event="llm_json_call",
+ bind = {
+ "event": "llm_json_call",
+ "agent": meta.agent,
+ "schema": meta.schema_name,
+ "max_tokens": meta.max_tokens,
+ "duration_ms": round(meta.duration_ms, 2),
+ "attempts": meta.attempts,
+ "parse_ok": meta.parse_ok,
+ "used_fallback": meta.used_fallback,
+ "error_kind": meta.error_kind,
+ "provider": provider,
+ "prompt_sha12": prompt_sha12,
+ }
+ logger.bind(**bind).info("llm_json_call_done")
+ record_llm_call(
agent=meta.agent,
- schema=meta.schema_name,
- max_tokens=meta.max_tokens,
- duration_ms=round(meta.duration_ms, 2),
+ schema_name=meta.schema_name,
+ provider=provider,
+ model=model,
+ duration_ms=meta.duration_ms,
attempts=meta.attempts,
parse_ok=meta.parse_ok,
used_fallback=meta.used_fallback,
error_kind=meta.error_kind,
- ).info("llm_json_call_done")
+ prompt_sha12=prompt_sha12,
+ input_tokens=input_tokens,
+ output_tokens=output_tokens,
+ span=span,
+ )
def llm_json_call(
@@ -288,13 +322,59 @@ def llm_json_call(
http_error_vendor: LlmHttpErrorVendor = "deepseek",
) -> T:
"""同步:invoke → 解析 JSON → `schema.model_validate`;失败时 `fallback_factory` 或 `LLMCallError`。"""
- t0 = time.perf_counter()
schema_name = getattr(schema, "__name__", str(schema))
+ provider, model = infer_provider_model(llm, http_error_vendor=http_error_vendor)
+ prompt_sha12 = _prompt_sha12(prompt)
+
+ with llm_call_span(
+ agent=agent,
+ schema_name=schema_name,
+ provider=provider,
+ model=model,
+ prompt_sha12=prompt_sha12,
+ max_tokens=max_tokens,
+ ) as span:
+ return _llm_json_call_sync_body(
+ llm,
+ prompt,
+ schema,
+ max_tokens=max_tokens,
+ agent=agent,
+ fallback_factory=fallback_factory,
+ retry_empty=retry_empty,
+ http_error_vendor=http_error_vendor,
+ schema_name=schema_name,
+ provider=provider,
+ model=model,
+ prompt_sha12=prompt_sha12,
+ span=span,
+ )
+
+
+def _llm_json_call_sync_body(
+ llm: Any,
+ prompt: str,
+ schema: type[T],
+ *,
+ max_tokens: int,
+ agent: str,
+ fallback_factory: Callable[[], T] | None,
+ retry_empty: bool,
+ http_error_vendor: LlmHttpErrorVendor,
+ schema_name: str,
+ provider: str,
+ model: str,
+ prompt_sha12: str,
+ span: Any,
+) -> T:
+ t0 = time.perf_counter()
attempts_used = 0
+ input_tokens = 0
+ output_tokens = 0
raw = ""
try:
- raw, attempts_used = _invoke_raw_sync(
+ raw, attempts_used, input_tokens, output_tokens = _invoke_raw_sync(
llm,
prompt,
max_tokens=max_tokens,
@@ -311,6 +391,12 @@ def llm_json_call(
parse_ok=True,
used_fallback=False,
error_kind=None,
+ provider=provider,
+ model=model,
+ prompt_sha12=prompt_sha12,
+ input_tokens=input_tokens,
+ output_tokens=output_tokens,
+ span=span,
)
if agent_verbose_enabled():
log_agent_payload(
@@ -331,6 +417,12 @@ def llm_json_call(
parse_ok=False,
used_fallback=used_fb,
error_kind=e.kind,
+ provider=provider,
+ model=model,
+ prompt_sha12=prompt_sha12,
+ input_tokens=input_tokens,
+ output_tokens=output_tokens,
+ span=span,
)
if agent_verbose_enabled():
log_agent_payload(
@@ -354,6 +446,12 @@ def llm_json_call(
parse_ok=False,
used_fallback=used_fb,
error_kind="invoke",
+ provider=provider,
+ model=model,
+ prompt_sha12=prompt_sha12,
+ input_tokens=input_tokens,
+ output_tokens=output_tokens,
+ span=span,
)
if agent_verbose_enabled():
log_agent_payload(
@@ -383,13 +481,59 @@ async def allm_json_call(
http_error_vendor: LlmHttpErrorVendor = "deepseek",
) -> T:
"""异步版,语义与 `llm_json_call` 一致。"""
- t0 = time.perf_counter()
schema_name = getattr(schema, "__name__", str(schema))
+ provider, model = infer_provider_model(llm, http_error_vendor=http_error_vendor)
+ prompt_sha12 = _prompt_sha12(prompt)
+
+ with llm_call_span(
+ agent=agent,
+ schema_name=schema_name,
+ provider=provider,
+ model=model,
+ prompt_sha12=prompt_sha12,
+ max_tokens=max_tokens,
+ ) as span:
+ return await _allm_json_call_async_body(
+ llm,
+ prompt,
+ schema,
+ max_tokens=max_tokens,
+ agent=agent,
+ fallback_factory=fallback_factory,
+ retry_empty=retry_empty,
+ http_error_vendor=http_error_vendor,
+ schema_name=schema_name,
+ provider=provider,
+ model=model,
+ prompt_sha12=prompt_sha12,
+ span=span,
+ )
+
+
+async def _allm_json_call_async_body(
+ llm: Any,
+ prompt: str,
+ schema: type[T],
+ *,
+ max_tokens: int,
+ agent: str,
+ fallback_factory: Callable[[], T] | None,
+ retry_empty: bool,
+ http_error_vendor: LlmHttpErrorVendor,
+ schema_name: str,
+ provider: str,
+ model: str,
+ prompt_sha12: str,
+ span: Any,
+) -> T:
+ t0 = time.perf_counter()
attempts_used = 0
+ input_tokens = 0
+ output_tokens = 0
raw = ""
try:
- raw, attempts_used = await _invoke_raw_async(
+ raw, attempts_used, input_tokens, output_tokens = await _invoke_raw_async(
llm,
prompt,
max_tokens=max_tokens,
@@ -406,6 +550,12 @@ async def allm_json_call(
parse_ok=True,
used_fallback=False,
error_kind=None,
+ provider=provider,
+ model=model,
+ prompt_sha12=prompt_sha12,
+ input_tokens=input_tokens,
+ output_tokens=output_tokens,
+ span=span,
)
if agent_verbose_enabled():
log_agent_payload(
@@ -426,6 +576,12 @@ async def allm_json_call(
parse_ok=False,
used_fallback=used_fb,
error_kind=e.kind,
+ provider=provider,
+ model=model,
+ prompt_sha12=prompt_sha12,
+ input_tokens=input_tokens,
+ output_tokens=output_tokens,
+ span=span,
)
if agent_verbose_enabled():
log_agent_payload(
@@ -449,6 +605,12 @@ async def allm_json_call(
parse_ok=False,
used_fallback=used_fb,
error_kind="invoke",
+ provider=provider,
+ model=model,
+ prompt_sha12=prompt_sha12,
+ input_tokens=input_tokens,
+ output_tokens=output_tokens,
+ span=span,
)
if agent_verbose_enabled():
log_agent_payload(
diff --git a/api/app/core/llm_gateway.py b/api/app/core/llm_gateway.py
index 3e7c16d..29eadf1 100644
--- a/api/app/core/llm_gateway.py
+++ b/api/app/core/llm_gateway.py
@@ -14,6 +14,7 @@ from pydantic import BaseModel
from app.core.dependencies import get_llm_provider, get_llm_provider_fast
from app.core.llm_call import allm_json_call, llm_json_call
+from app.core.llm_telemetry import langchain_invoke_span
T = TypeVar("T", bound=BaseModel)
@@ -58,16 +59,32 @@ class LlmGateway:
else 0.7
)
)
- return await provider.complete(
- messages,
+ resolved_model = model if model is not None else (use_case.model if use_case else None)
+ agent_name = use_case.name if use_case else "llm_gateway.chat"
+ kwargs = dict(
+ messages=messages,
temperature=resolved_temperature,
- model=model if model is not None else (use_case.model if use_case else None),
+ model=resolved_model,
max_tokens=(
max_tokens
if max_tokens is not None
else (use_case.max_tokens if use_case else None)
),
)
+ # DeepSeekProvider.complete 已包 langchain_invoke_span,避免双层 span
+ from app.adapters.llm.deepseek import DeepSeekLLMProvider
+
+ if isinstance(provider, DeepSeekLLMProvider):
+ return await provider.complete(**kwargs)
+
+ provider_label = type(provider).__name__.replace("Provider", "").lower() or "unknown"
+ with langchain_invoke_span(
+ agent=agent_name,
+ provider=provider_label,
+ model=resolved_model or "unknown",
+ call_type="chat",
+ ):
+ return await provider.complete(**kwargs)
async def json_object(
self,
diff --git a/api/app/core/llm_telemetry.py b/api/app/core/llm_telemetry.py
new file mode 100644
index 0000000..6d52d55
--- /dev/null
+++ b/api/app/core/llm_telemetry.py
@@ -0,0 +1,384 @@
+"""
+LLM 调用 OpenTelemetry span 与 metrics(低基数 attributes,不含 prompt/response 正文)。
+"""
+
+from __future__ import annotations
+
+import time
+from contextlib import contextmanager
+from typing import Any, Iterator, Literal
+
+from opentelemetry import trace
+from opentelemetry.trace import Status, StatusCode
+
+from app.core.config import settings
+from app.core.telemetry import get_meter, get_tracer
+
+CallType = Literal["json", "chat", "stream"]
+
+_meter = None
+_duration_hist = None
+_call_counter = None
+_tokens_in_counter = None
+_tokens_out_counter = None
+
+
+def _ensure_instruments() -> None:
+ global _meter, _duration_hist, _call_counter, _tokens_in_counter, _tokens_out_counter
+ if _meter is not None or not settings.otel_enabled:
+ return
+ _meter = get_meter("app.llm")
+ _duration_hist = _meter.create_histogram(
+ "llm.call.duration",
+ unit="ms",
+ description="LLM call wall time",
+ )
+ _call_counter = _meter.create_counter(
+ "llm.call.total",
+ description="LLM call count by outcome",
+ )
+ _tokens_in_counter = _meter.create_counter(
+ "llm.tokens.input",
+ description="LLM input tokens when reported by provider",
+ )
+ _tokens_out_counter = _meter.create_counter(
+ "llm.tokens.output",
+ description="LLM output tokens when reported by provider",
+ )
+
+
+def infer_provider_model(
+ llm: Any,
+ *,
+ http_error_vendor: str = "deepseek",
+) -> tuple[str, str]:
+ model = ""
+ for attr in ("model_name", "model"):
+ v = getattr(llm, attr, None)
+ if v:
+ model = str(v)
+ break
+ provider = (http_error_vendor or "unknown").strip().lower()
+ return provider, model
+
+
+def _outcome_label(*, parse_ok: bool, used_fallback: bool, error_kind: str | None) -> str:
+ if parse_ok and not used_fallback:
+ return "ok"
+ if used_fallback:
+ return "fallback"
+ return error_kind or "error"
+
+
+def extract_token_usage(response: Any) -> tuple[int, int]:
+ """从 LangChain AIMessage / chunk 解析 token 用量。"""
+ usage = getattr(response, "usage_metadata", None)
+ if usage is None and hasattr(response, "response_metadata"):
+ meta = getattr(response, "response_metadata", None) or {}
+ if isinstance(meta, dict):
+ usage = meta.get("token_usage") or meta.get("usage")
+ if usage is None:
+ return 0, 0
+ if isinstance(usage, dict):
+ inp = usage.get("input_tokens") or usage.get("prompt_tokens") or 0
+ out = usage.get("output_tokens") or usage.get("completion_tokens") or 0
+ return int(inp or 0), int(out or 0)
+ inp = getattr(usage, "input_tokens", None) or getattr(usage, "prompt_tokens", None) or 0
+ out = (
+ getattr(usage, "output_tokens", None)
+ or getattr(usage, "completion_tokens", None)
+ or 0
+ )
+ return int(inp or 0), int(out or 0)
+
+
+def record_llm_completion(
+ *,
+ agent: str,
+ provider: str,
+ model: str,
+ duration_ms: float,
+ call_type: CallType = "chat",
+ outcome: str = "ok",
+ input_tokens: int = 0,
+ output_tokens: int = 0,
+ span: trace.Span | None = None,
+ extra_span_attributes: dict[str, Any] | None = None,
+) -> None:
+ if not settings.otel_enabled:
+ return
+
+ _ensure_instruments()
+ attrs = {
+ "agent": agent,
+ "provider": provider,
+ "call_type": call_type,
+ "outcome": outcome,
+ }
+ if _duration_hist is not None:
+ _duration_hist.record(duration_ms, attrs)
+ if _call_counter is not None:
+ _call_counter.add(1, attrs)
+ if input_tokens > 0 and _tokens_in_counter is not None:
+ _tokens_in_counter.add(input_tokens, {"provider": provider, "agent": agent})
+ if output_tokens > 0 and _tokens_out_counter is not None:
+ _tokens_out_counter.add(output_tokens, {"provider": provider, "agent": agent})
+
+ if span is not None and span.is_recording():
+ span.set_attribute("llm.duration_ms", round(duration_ms, 2))
+ span.set_attribute("llm.call_type", call_type)
+ span.set_attribute("llm.outcome", outcome)
+ if input_tokens:
+ span.set_attribute("llm.tokens.input", input_tokens)
+ if output_tokens:
+ span.set_attribute("llm.tokens.output", output_tokens)
+ if extra_span_attributes:
+ for k, v in extra_span_attributes.items():
+ span.set_attribute(k, v)
+ if outcome == "ok":
+ span.set_status(Status(StatusCode.OK))
+ elif outcome == "fallback":
+ span.set_status(Status(StatusCode.OK, "fallback"))
+ else:
+ span.set_status(Status(StatusCode.ERROR, outcome))
+
+
+@contextmanager
+def langchain_invoke_span(
+ *,
+ agent: str,
+ provider: str,
+ model: str,
+ call_type: CallType,
+ prompt_sha12: str = "",
+ max_tokens: int | None = None,
+) -> Iterator[dict[str, Any]]:
+ """
+ 包住 LangChain invoke/ainvoke;yield 可变 dict 供调用方写入 response 后触发 record。
+ keys: response, outcome, input_tokens, output_tokens, error_kind
+ """
+ ctx: dict[str, Any] = {
+ "response": None,
+ "outcome": "ok",
+ "input_tokens": 0,
+ "output_tokens": 0,
+ }
+ if not settings.otel_enabled:
+ yield ctx
+ return
+
+ tracer = get_tracer("app.llm")
+ span_name = {
+ "json": "llm.json_invoke",
+ "chat": "llm.chat_invoke",
+ "stream": "llm.stream_invoke",
+ }.get(call_type, "llm.invoke")
+ attrs: dict[str, Any] = {
+ "llm.agent": agent,
+ "llm.provider": provider,
+ "llm.model": model or "unknown",
+ "llm.call_type": call_type,
+ }
+ if prompt_sha12:
+ attrs["llm.prompt_sha12"] = prompt_sha12
+ if max_tokens is not None:
+ attrs["llm.max_tokens"] = max_tokens
+
+ t0 = time.perf_counter()
+ with tracer.start_as_current_span(span_name, attributes=attrs) as span:
+ try:
+ yield ctx
+ except Exception:
+ ctx["outcome"] = "error"
+ raise
+ finally:
+ duration_ms = (time.perf_counter() - t0) * 1000
+ resp = ctx.get("response")
+ if resp is not None and not ctx.get("input_tokens") and not ctx.get("output_tokens"):
+ inp, out = extract_token_usage(resp)
+ ctx["input_tokens"] = inp
+ ctx["output_tokens"] = out
+ record_llm_completion(
+ agent=agent,
+ provider=provider,
+ model=model,
+ duration_ms=duration_ms,
+ call_type=call_type,
+ outcome=str(ctx.get("outcome") or "ok"),
+ input_tokens=int(ctx.get("input_tokens") or 0),
+ output_tokens=int(ctx.get("output_tokens") or 0),
+ span=span,
+ )
+
+
+@contextmanager
+def llm_call_span(
+ *,
+ agent: str,
+ schema_name: str,
+ provider: str,
+ model: str,
+ prompt_sha12: str,
+ max_tokens: int,
+) -> Iterator[trace.Span]:
+ if not settings.otel_enabled:
+ yield trace.INVALID_SPAN
+ return
+ tracer = get_tracer("app.llm")
+ with tracer.start_as_current_span(
+ "llm.json_call",
+ attributes={
+ "llm.agent": agent,
+ "llm.schema_name": schema_name,
+ "llm.provider": provider,
+ "llm.model": model or "unknown",
+ "llm.prompt_sha12": prompt_sha12,
+ "llm.max_tokens": max_tokens,
+ "llm.call_type": "json",
+ },
+ ) as span:
+ yield span
+
+
+async def observe_ainvoke(
+ llm: Any,
+ messages: Any,
+ *,
+ agent: str,
+ provider: str = "deepseek",
+ model: str = "",
+ call_type: CallType = "chat",
+ extra_span_attributes: dict[str, Any] | None = None,
+ record_response_latency_ms: bool = True,
+) -> Any:
+ """包装 ``ainvoke``,统一 span + metrics。"""
+ t0 = time.perf_counter()
+ with langchain_invoke_span(
+ agent=agent,
+ provider=provider,
+ model=model,
+ call_type=call_type,
+ ) as tel:
+ result = await llm.ainvoke(messages)
+ tel["response"] = result
+ span = trace.get_current_span()
+ if span.is_recording():
+ if record_response_latency_ms:
+ span.set_attribute(
+ "llm.response_latency_ms",
+ round((time.perf_counter() - t0) * 1000, 2),
+ )
+ if extra_span_attributes:
+ for key, value in extra_span_attributes.items():
+ if value is not None:
+ span.set_attribute(key, value)
+ return result
+
+
+async def observe_astream(
+ llm: Any,
+ prompt: Any,
+ *,
+ agent: str,
+ provider: str = "deepseek",
+ model: str = "",
+):
+ """包装 ``astream``,记录 wall time 与可选 TTFT。"""
+ if not settings.otel_enabled:
+ async for chunk in llm.astream(prompt):
+ yield chunk
+ return
+
+ tracer = get_tracer("app.llm")
+ t0 = time.perf_counter()
+ ttft_ms: float | None = None
+ last_chunk: Any = None
+ with tracer.start_as_current_span(
+ "llm.stream_invoke",
+ attributes={
+ "llm.agent": agent,
+ "llm.provider": provider,
+ "llm.model": model or "unknown",
+ "llm.call_type": "stream",
+ },
+ ) as span:
+ try:
+ async for chunk in llm.astream(prompt):
+ if ttft_ms is None and getattr(chunk, "content", None):
+ ttft_ms = (time.perf_counter() - t0) * 1000
+ last_chunk = chunk
+ yield chunk
+ except Exception:
+ duration_ms = (time.perf_counter() - t0) * 1000
+ record_llm_completion(
+ agent=agent,
+ provider=provider,
+ model=model,
+ duration_ms=duration_ms,
+ call_type="stream",
+ outcome="error",
+ span=span,
+ extra_span_attributes=(
+ {"llm.ttft_ms": round(ttft_ms, 2)} if ttft_ms is not None else None
+ ),
+ )
+ raise
+ duration_ms = (time.perf_counter() - t0) * 1000
+ inp, out = extract_token_usage(last_chunk) if last_chunk else (0, 0)
+ extra: dict[str, Any] = {}
+ if ttft_ms is not None:
+ extra["llm.ttft_ms"] = round(ttft_ms, 2)
+ record_llm_completion(
+ agent=agent,
+ provider=provider,
+ model=model,
+ duration_ms=duration_ms,
+ call_type="stream",
+ outcome="ok",
+ input_tokens=inp,
+ output_tokens=out,
+ span=span,
+ extra_span_attributes=extra or None,
+ )
+
+
+def record_llm_call(
+ *,
+ agent: str,
+ schema_name: str,
+ provider: str,
+ model: str,
+ duration_ms: float,
+ attempts: int,
+ parse_ok: bool,
+ used_fallback: bool,
+ error_kind: str | None,
+ prompt_sha12: str,
+ input_tokens: int = 0,
+ output_tokens: int = 0,
+ span: trace.Span | None = None,
+) -> None:
+ outcome = _outcome_label(
+ parse_ok=parse_ok,
+ used_fallback=used_fallback,
+ error_kind=error_kind,
+ )
+ record_llm_completion(
+ agent=agent,
+ provider=provider,
+ model=model,
+ duration_ms=duration_ms,
+ call_type="json",
+ outcome=outcome,
+ input_tokens=input_tokens,
+ output_tokens=output_tokens,
+ span=span,
+ extra_span_attributes={
+ "llm.schema_name": schema_name,
+ "llm.attempts": attempts,
+ "llm.parse_ok": parse_ok,
+ "llm.used_fallback": used_fallback,
+ **({"llm.error_kind": error_kind} if error_kind else {}),
+ **({"llm.prompt_sha12": prompt_sha12} if prompt_sha12 else {}),
+ },
+ )
diff --git a/api/app/core/logging.py b/api/app/core/logging.py
index 18175bd..b065ce9 100644
--- a/api/app/core/logging.py
+++ b/api/app/core/logging.py
@@ -108,10 +108,32 @@ def _stdlib_emit_display(log_record: logging.LogRecord) -> tuple[str, int]:
return fn, ln
+def _merge_trace_context(record: Any) -> None:
+ """每条日志合并当前 OTel trace/span(覆盖 Celery/后台无 HTTP middleware 的场景)。"""
+ try:
+ from app.core.telemetry import current_trace_context
+
+ ctx = current_trace_context()
+ if not ctx:
+ return
+ except Exception:
+ return
+ ex = record["extra"]
+ for k, v in ctx.items():
+ if not v:
+ continue
+ cur = ex.get(k)
+ if cur is None or str(cur).strip() in ("", "-"):
+ ex[k] = v
+
+
def _stderr_format(record: Any) -> str:
- """控制台 sink:request_id / correlation_id / user_id 有值时才显示对应列。"""
+ """控制台 sink:request_id / correlation_id / user_id / trace_id 有值时才显示对应列。"""
rid = str(record["extra"].get("request_id") or "").strip()
rid_part = "rid={extra[request_id]} | " if rid and rid != "-" else ""
+ tid = str(record["extra"].get("trace_id") or "").strip()
+ tid_short = tid[:12] if len(tid) > 12 else tid
+ tid_part = f"tid={tid_short} | " if tid else ""
cid = str(record["extra"].get("correlation_id") or "").strip()
cid_part = "corr={extra[correlation_id]} | " if cid else ""
uid = str(record["extra"].get("user_id") or "").strip()
@@ -120,7 +142,7 @@ def _stderr_format(record: Any) -> str:
"{time:YYYY-MM-DD HH:mm:ss.SSS} | "
"{level.name: <8} | "
"{extra[module]}:{function}:{line} | "
- f"{rid_part}{cid_part}{uid_part}"
+ f"{rid_part}{tid_part}{cid_part}{uid_part}"
"{message}\n{exception}"
)
@@ -242,8 +264,8 @@ def setup_logging() -> None:
enqueue=True,
)
- logger.configure(extra={"request_id": "-", "module": "-"})
- logger = logger.patch(_merge_celery_worker_extra)
+ logger.configure(extra={"request_id": "-", "module": "-", "trace_id": "", "span_id": ""})
+ logger = logger.patch(_merge_celery_worker_extra).patch(_merge_trace_context)
# 仅 root 挂 InterceptHandler,避免子 logger 与 root 各处理一次导致重复行
root = logging.getLogger()
diff --git a/api/app/core/middleware.py b/api/app/core/middleware.py
index 9bdcdc0..c7a708d 100644
--- a/api/app/core/middleware.py
+++ b/api/app/core/middleware.py
@@ -8,6 +8,7 @@ from starlette.middleware.base import BaseHTTPMiddleware
from starlette.requests import Request
from app.core.logging import logger
+from app.core.telemetry import current_trace_context
class RequestIdMiddleware(BaseHTTPMiddleware):
@@ -17,7 +18,8 @@ class RequestIdMiddleware(BaseHTTPMiddleware):
request_id = request.headers.get("X-Request-ID") or str(uuid.uuid4())
request.state.request_id = request_id
- with logger.contextualize(request_id=request_id):
+ bind = {"request_id": request_id, **current_trace_context()}
+ with logger.contextualize(**bind):
response = await call_next(request)
response.headers["X-Request-ID"] = request_id
diff --git a/api/app/core/telemetry.py b/api/app/core/telemetry.py
new file mode 100644
index 0000000..21b45a4
--- /dev/null
+++ b/api/app/core/telemetry.py
@@ -0,0 +1,146 @@
+"""
+OpenTelemetry 初始化:traces / metrics / logs 导出至 OTLP Collector。
+
+在 ``setup_logging()`` 之后、FastAPI / Celery 应用创建前调用 ``setup_telemetry(service_name=...)``。
+``OTEL_ENABLED=false`` 时无操作,便于测试与无 Collector 环境。
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING
+
+from opentelemetry import metrics, trace
+from opentelemetry._logs import set_logger_provider
+from opentelemetry.exporter.otlp.proto.grpc._log_exporter import OTLPLogExporter
+from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
+from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
+from opentelemetry.instrumentation.celery import CeleryInstrumentor
+from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
+from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
+from opentelemetry.instrumentation.logging import LoggingInstrumentor
+from opentelemetry.instrumentation.redis import RedisInstrumentor
+from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
+from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler
+from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
+from opentelemetry.sdk.metrics import MeterProvider
+from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
+from opentelemetry.sdk.resources import Resource
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import BatchSpanProcessor
+from opentelemetry.sdk.trace.sampling import ParentBasedTraceIdRatio
+
+from app.core.config import settings
+
+if TYPE_CHECKING:
+ from fastapi import FastAPI
+
+_initialized = False
+_otel_logging_handler: LoggingHandler | None = None
+
+
+def _build_resource(service_name: str) -> Resource:
+ return Resource.create(
+ {
+ "service.name": service_name,
+ "deployment.environment": settings.app_environment,
+ "service.version": "0.2.0",
+ }
+ )
+
+
+def _build_sampler():
+ from opentelemetry.sdk.trace.sampling import (
+ ALWAYS_OFF,
+ ALWAYS_ON,
+ TraceIdRatioBased,
+ )
+
+ name = (settings.otel_traces_sampler or "always_on").strip().lower()
+ arg = settings.otel_traces_sampler_arg
+ if name in ("always_on", "alwayson"):
+ return ALWAYS_ON
+ if name in ("always_off", "alwaysoff"):
+ return ALWAYS_OFF
+ ratio = 0.1 if arg is None else arg
+ if name == "traceidratio":
+ return TraceIdRatioBased(ratio)
+ return ParentBasedTraceIdRatio(ratio)
+
+
+def setup_telemetry(*, service_name: str) -> None:
+ """配置 OTLP exporter 与自动 instrumentation(幂等)。"""
+ global _initialized, _otel_logging_handler
+ if _initialized or not settings.otel_enabled:
+ return
+
+ endpoint = settings.otel_exporter_otlp_endpoint.rstrip("/")
+ insecure = settings.otel_exporter_otlp_insecure
+
+ resource = _build_resource(service_name)
+
+ span_exporter = OTLPSpanExporter(endpoint=endpoint, insecure=insecure)
+ tracer_provider = TracerProvider(resource=resource, sampler=_build_sampler())
+ tracer_provider.add_span_processor(BatchSpanProcessor(span_exporter))
+ trace.set_tracer_provider(tracer_provider)
+
+ metric_exporter = OTLPMetricExporter(endpoint=endpoint, insecure=insecure)
+ metric_reader = PeriodicExportingMetricReader(
+ metric_exporter,
+ export_interval_millis=settings.otel_metric_export_interval_ms,
+ )
+ meter_provider = MeterProvider(resource=resource, metric_readers=[metric_reader])
+ metrics.set_meter_provider(meter_provider)
+
+ log_exporter = OTLPLogExporter(endpoint=endpoint, insecure=insecure)
+ log_provider = LoggerProvider(resource=resource)
+ log_provider.add_log_record_processor(BatchLogRecordProcessor(log_exporter))
+ set_logger_provider(log_provider)
+
+ LoggingInstrumentor().instrument(set_logging_format=True)
+ _otel_logging_handler = LoggingHandler(
+ level=logging.NOTSET,
+ logger_provider=log_provider,
+ )
+ logging.getLogger().addHandler(_otel_logging_handler)
+
+ HTTPXClientInstrumentor().instrument()
+ RedisInstrumentor().instrument()
+ SQLAlchemyInstrumentor().instrument()
+
+ _initialized = True
+
+
+def instrument_fastapi_app(app: FastAPI) -> None:
+ if not settings.otel_enabled:
+ return
+ FastAPIInstrumentor.instrument_app(
+ app,
+ excluded_urls="/health",
+ )
+
+
+def instrument_celery() -> None:
+ if not settings.otel_enabled:
+ return
+ CeleryInstrumentor().instrument()
+
+
+def get_tracer(name: str):
+ return trace.get_tracer(name)
+
+
+def get_meter(name: str):
+ return metrics.get_meter(name)
+
+
+def current_trace_context() -> dict[str, str]:
+ """返回当前 span 的 trace_id / span_id(十六进制),无活跃 span 时为空 dict。"""
+ span = trace.get_current_span()
+ ctx = span.get_span_context()
+ if not ctx.is_valid:
+ return {}
+ return {
+ "trace_id": format(ctx.trace_id, "032x"),
+ "span_id": format(ctx.span_id, "016x"),
+ }
diff --git a/api/app/features/conversation/ws/pipeline.py b/api/app/features/conversation/ws/pipeline.py
index bfeeca6..0745491 100644
--- a/api/app/features/conversation/ws/pipeline.py
+++ b/api/app/features/conversation/ws/pipeline.py
@@ -20,6 +20,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
from app.agents.chat import ChatOrchestrator
from app.agents.chat.reply_limits import segments_from_llm_response
from app.core.agent_logging import agent_summary_enabled
+from app.core.business_telemetry import business_span
from app.core.config import settings
from app.core.cos_url_keys import (
TTS_PRESIGNED_EXPIRES_SEC,
@@ -634,6 +635,12 @@ def _split_audio_bytes(audio_bytes: bytes, fmt: str) -> list[bytes]:
async def _transcribe_long_audio(audio_bytes: bytes, fmt: str = "m4a") -> str:
"""超过 55 s 的音频自动切片后并行 ASR;短音频直接转写。"""
asr = get_asr_provider()
+ return await _transcribe_long_audio_inner(audio_bytes, fmt, asr)
+
+
+async def _transcribe_long_audio_inner(
+ audio_bytes: bytes, fmt: str, asr: Any
+) -> str:
try:
chunks = await asyncio.to_thread(_split_audio_bytes, audio_bytes, fmt)
except Exception as exc:
@@ -938,6 +945,32 @@ async def process_user_message(
tts_this_turn: Optional[bool] = None,
) -> None:
"""处理用户消息,生成 Agent 回应。由 ChatOrchestrator 路由到 ProfileAgent 或 InterviewAgent。"""
+ with business_span("conversation.ws.process_turn"):
+ await _process_user_message_inner(
+ conversation_id,
+ user_message,
+ conversation,
+ segment,
+ db,
+ user,
+ user_message_timestamp,
+ force_skip_tts=force_skip_tts,
+ tts_this_turn=tts_this_turn,
+ )
+
+
+async def _process_user_message_inner(
+ conversation_id: str,
+ user_message: str,
+ conversation: Conversation,
+ segment: Segment,
+ db: AsyncSession,
+ user: User = None,
+ user_message_timestamp: Optional[datetime] = None,
+ *,
+ force_skip_tts: bool = False,
+ tts_this_turn: Optional[bool] = None,
+) -> None:
store = ConversationHistoryStore(db)
tts_urls: list[str] = []
user_language = _resolve_user_language(user)
diff --git a/api/app/features/evaluation/judge_service.py b/api/app/features/evaluation/judge_service.py
index e4b89fe..d9f9929 100644
--- a/api/app/features/evaluation/judge_service.py
+++ b/api/app/features/evaluation/judge_service.py
@@ -445,7 +445,16 @@ class EvalJudgeService:
if hasattr(llm, "bind"):
llm = llm.bind(max_tokens=_COMPARE_STREAM_MAX)
try:
- async for chunk in llm.astream(prompt):
+ from app.core.llm_telemetry import infer_provider_model, observe_astream
+
+ provider, model = infer_provider_model(llm, http_error_vendor="zhipu")
+ async for chunk in observe_astream(
+ llm,
+ prompt,
+ agent="EvalJudge.stream_conversation_compare",
+ provider=provider,
+ model=model,
+ ):
piece = getattr(chunk, "content", None)
if piece:
yield piece
diff --git a/api/app/features/memoir/story_pipeline_sync.py b/api/app/features/memoir/story_pipeline_sync.py
index f4cc3f6..dc127d6 100644
--- a/api/app/features/memoir/story_pipeline_sync.py
+++ b/api/app/features/memoir/story_pipeline_sync.py
@@ -27,6 +27,7 @@ from app.agents.memoir.story_route_agent import (
StoryRouteAgent,
default_append_target_story_id,
)
+from app.core.business_telemetry import business_span
from app.agents.stage_constants import (
CATEGORY_TO_CHAT_STAGE,
CHAPTER_CATEGORIES,
@@ -996,6 +997,46 @@ def run_story_pipeline_for_category_batch(
返回 :class:`StoryPipelineResult`。低置信路由会被延迟而不创建 Story/Chapter。
"""
+ with business_span(
+ "memoir.story_pipeline.batch",
+ chapter_category=chapter_category,
+ segment_count=len(category_segments),
+ ):
+ return _run_story_pipeline_batch_inner(
+ session,
+ user_id=user_id,
+ chapter_category=chapter_category,
+ category_segments=category_segments,
+ state=state,
+ user_profile=user_profile,
+ user_birth_year=user_birth_year,
+ llm=llm,
+ background_voice=background_voice,
+ occupation=occupation,
+ memoir_correlation_id=memoir_correlation_id,
+ llm_fast=llm_fast,
+ memory_evidence=memory_evidence,
+ language=language,
+ )
+
+
+def _run_story_pipeline_batch_inner(
+ session: Session,
+ *,
+ user_id: str,
+ chapter_category: str,
+ category_segments: list,
+ state: MemoirStateSchema,
+ user_profile: str,
+ user_birth_year: int | None,
+ llm: Any,
+ background_voice: str = "default",
+ occupation: str = "",
+ memoir_correlation_id: str | None = None,
+ llm_fast: Any | None = None,
+ memory_evidence: dict | None = None,
+ language: str = "zh",
+) -> StoryPipelineResult:
pipeline_phase_timings: dict[str, float] = {}
narrative_agent = NarrativeAgent()
route_agent = StoryRouteAgent()
@@ -1013,9 +1054,10 @@ def run_story_pipeline_for_category_batch(
top_k = int(settings.evidence_top_k_large_batch)
def _oral_job() -> tuple[str, float]:
- t_oral = time.perf_counter()
- out = normalize_oral_for_memoir(combined_text, llm=llm)
- return out, time.perf_counter() - t_oral
+ with business_span("memoir.story_pipeline.oral_normalize"):
+ t_oral = time.perf_counter()
+ out = normalize_oral_for_memoir(combined_text, llm=llm)
+ return out, time.perf_counter() - t_oral
_t_parallel = time.perf_counter()
with ThreadPoolExecutor(max_workers=1) as pool:
@@ -1045,7 +1087,8 @@ def run_story_pipeline_for_category_batch(
top_k,
)
- evidence_text = format_evidence_chunks_for_prompt(evidence)
+ with business_span("memoir.story_pipeline.evidence_prep", chapter_category=chapter_category):
+ evidence_text = format_evidence_chunks_for_prompt(evidence)
ct_raw = (combined_text or "").strip()
om_norm = (oral_for_memoir or "").strip()
if ct_raw != om_norm:
@@ -1099,35 +1142,36 @@ def run_story_pipeline_for_category_batch(
calculated_order_index = STAGE_TO_ORDER.get(chapter_category, 999)
_t0 = time.perf_counter()
- use_batch_plan = (
- llm_route
- and len(category_segments) >= 2
- and len(category_segments) <= PLAN_BATCH_MAX_SEGMENTS
- )
- plan: StoryBatchPlan | None = None
- if use_batch_plan:
- segs = _route_segment_texts(category_segments)
- plan = route_agent.plan_batch(
- chapter_category=chapter_category,
- chapter_title=title,
- segments=segs,
- candidate_stories=candidates,
- llm=llm_route,
- valid_story_ids=valid_ids,
- story_meta=story_meta,
+ with business_span("memoir.story_pipeline.route", chapter_category=chapter_category):
+ use_batch_plan = (
+ llm_route
+ and len(category_segments) >= 2
+ and len(category_segments) <= PLAN_BATCH_MAX_SEGMENTS
)
+ plan: StoryBatchPlan | None = None
+ if use_batch_plan:
+ segs = _route_segment_texts(category_segments)
+ plan = route_agent.plan_batch(
+ chapter_category=chapter_category,
+ chapter_title=title,
+ segments=segs,
+ candidate_stories=candidates,
+ llm=llm_route,
+ valid_story_ids=valid_ids,
+ story_meta=story_meta,
+ )
- single_route: Any = None
- if plan is None:
- single_route = route_agent.decide(
- chapter_category=chapter_category,
- chapter_title=title,
- batch_transcript=route_transcript,
- candidate_stories=candidates,
- llm=llm_route,
- valid_story_ids=valid_ids,
- story_meta=story_meta,
- )
+ single_route: Any = None
+ if plan is None:
+ single_route = route_agent.decide(
+ chapter_category=chapter_category,
+ chapter_title=title,
+ batch_transcript=route_transcript,
+ candidate_stories=candidates,
+ llm=llm_route,
+ valid_story_ids=valid_ids,
+ story_meta=story_meta,
+ )
pipeline_phase_timings["route"] = time.perf_counter() - _t0
if (
@@ -1166,89 +1210,91 @@ def run_story_pipeline_for_category_batch(
)
_t0 = time.perf_counter()
- if plan is not None:
- dispatch_ids = _run_batch_plan_writes(
- session,
- plan=plan,
- category_segments=category_segments,
- chapter=chapter,
- chapter_category=chapter_category,
- evidence_text=evidence_text,
- evidence=evidence,
- evidence_top_k=top_k,
- slot_snippets=slot_snippets,
- user_id=user_id,
- user_profile=user_profile,
- user_birth_year=user_birth_year,
- llm=llm,
- narrative_agent=narrative_agent,
- candidate_stories=candidates,
- story_meta=story_meta,
- background_voice=background_voice,
- occupation=occupation,
- memoir_correlation_id=memoir_correlation_id,
- fidelity_llm=llm_fidelity,
- language=language,
- )
- else:
- route = single_route
- decision_source = (
- route.reason
- if route.reason in FALLBACK_NEW_STORY_REASONS
- else ("fallback_no_llm" if not llm_route else "single_decide")
- )
- target_story_id, existing_for_narrative, decision_source = (
- _resolve_append_target(
+ with business_span("memoir.story_pipeline.narrative_writes", chapter_category=chapter_category):
+ if plan is not None:
+ dispatch_ids = _run_batch_plan_writes(
session,
- route_decision=route.decision,
- route_target_story_id=route.target_story_id,
- user_id=user_id,
+ plan=plan,
+ category_segments=category_segments,
+ chapter=chapter,
chapter_category=chapter_category,
- oral_norm=om_norm,
+ evidence_text=evidence_text,
+ evidence=evidence,
+ evidence_top_k=top_k,
+ slot_snippets=slot_snippets,
+ user_id=user_id,
+ user_profile=user_profile,
+ user_birth_year=user_birth_year,
+ llm=llm,
+ narrative_agent=narrative_agent,
candidate_stories=candidates,
story_meta=story_meta,
- decision_source=decision_source,
+ background_voice=background_voice,
+ occupation=occupation,
memoir_correlation_id=memoir_correlation_id,
+ fidelity_llm=llm_fidelity,
+ language=language,
+ )
+ else:
+ route = single_route
+ decision_source = (
+ route.reason
+ if route.reason in FALLBACK_NEW_STORY_REASONS
+ else ("fallback_no_llm" if not llm_route else "single_decide")
+ )
+ target_story_id, existing_for_narrative, decision_source = (
+ _resolve_append_target(
+ session,
+ route_decision=route.decision,
+ route_target_story_id=route.target_story_id,
+ user_id=user_id,
+ chapter_category=chapter_category,
+ oral_norm=om_norm,
+ candidate_stories=candidates,
+ story_meta=story_meta,
+ decision_source=decision_source,
+ memoir_correlation_id=memoir_correlation_id,
+ )
)
- )
- sid, _ = _execute_narrative_unit(
- session,
- oral_text=oral_for_memoir,
- evidence_text=evidence_text,
- evidence=evidence,
- evidence_top_k=top_k,
- chapter=chapter,
- chapter_category=chapter_category,
- slot_snippets=slot_snippets,
- user_id=user_id,
- user_profile=user_profile,
- user_birth_year=user_birth_year,
- llm=llm,
- narrative_agent=narrative_agent,
- target_story_id=target_story_id,
- existing_for_narrative=existing_for_narrative,
- decision_source=decision_source,
- route_decision=route.decision,
- route_type="single",
- segment_ids=[str(s.id) for s in category_segments],
- category_segments=category_segments,
- background_voice=background_voice,
- occupation=occupation,
- memoir_correlation_id=memoir_correlation_id,
- fidelity_llm=llm_fidelity,
- language=language,
- )
- if sid:
- dispatch_ids.add(sid)
+ sid, _ = _execute_narrative_unit(
+ session,
+ oral_text=oral_for_memoir,
+ evidence_text=evidence_text,
+ evidence=evidence,
+ evidence_top_k=top_k,
+ chapter=chapter,
+ chapter_category=chapter_category,
+ slot_snippets=slot_snippets,
+ user_id=user_id,
+ user_profile=user_profile,
+ user_birth_year=user_birth_year,
+ llm=llm,
+ narrative_agent=narrative_agent,
+ target_story_id=target_story_id,
+ existing_for_narrative=existing_for_narrative,
+ decision_source=decision_source,
+ route_decision=route.decision,
+ route_type="single",
+ segment_ids=[str(s.id) for s in category_segments],
+ category_segments=category_segments,
+ background_voice=background_voice,
+ occupation=occupation,
+ memoir_correlation_id=memoir_correlation_id,
+ fidelity_llm=llm_fidelity,
+ language=language,
+ )
+ if sid:
+ dispatch_ids.add(sid)
pipeline_phase_timings["narrative_writes"] = time.perf_counter() - _t0
_t0 = time.perf_counter()
- reorder_chapter_story_links_by_life_order_sync(session, str(chapter.id))
- mark_chapter_dirty_sync(session, str(chapter.id))
- session.flush()
- refresh_chapter_evidence_snapshot_with_retry_sync(session, str(chapter.id))
+ with business_span("memoir.story_pipeline.finalize", chapter_category=chapter_category):
+ reorder_chapter_story_links_by_life_order_sync(session, str(chapter.id))
+ mark_chapter_dirty_sync(session, str(chapter.id))
+ session.flush()
+ refresh_chapter_evidence_snapshot_with_retry_sync(session, str(chapter.id))
pipeline_phase_timings["finalize"] = time.perf_counter() - _t0
image_settings = MemoirImageSettings.from_env()
diff --git a/api/app/features/payment/alipay_client.py b/api/app/features/payment/alipay_client.py
index 8f4b0f7..dad2b0a 100644
--- a/api/app/features/payment/alipay_client.py
+++ b/api/app/features/payment/alipay_client.py
@@ -4,6 +4,7 @@
from typing import Dict
+from app.core.business_telemetry import business_span
from app.core.logging import get_logger
from app.features.payment.payment_config import AlipayConfig
from app.features.payment.payment_exceptions import (
@@ -46,6 +47,15 @@ class AlipayClient:
out_trade_no: str,
total_amount: int,
subject: str,
+ ) -> PaymentResult:
+ with business_span("payment.alipay.create_app_order"):
+ return self._create_app_order_inner(out_trade_no, total_amount, subject)
+
+ def _create_app_order_inner(
+ self,
+ out_trade_no: str,
+ total_amount: int,
+ subject: str,
) -> PaymentResult:
self._ensure_client()
try:
@@ -100,6 +110,10 @@ class AlipayClient:
raise PaymentNotifyError(f"支付宝回调处理失败: {e}")
def query_order(self, out_trade_no: str) -> PaymentStatus:
+ with business_span("payment.alipay.query_order"):
+ return self._query_order_inner(out_trade_no)
+
+ def _query_order_inner(self, out_trade_no: str) -> PaymentStatus:
self._ensure_client()
try:
result = self._client.api_alipay_trade_query(out_trade_no=out_trade_no)
diff --git a/api/app/features/payment/wechat_client.py b/api/app/features/payment/wechat_client.py
index 3268628..89b9ec6 100644
--- a/api/app/features/payment/wechat_client.py
+++ b/api/app/features/payment/wechat_client.py
@@ -7,6 +7,7 @@ import os
import time
from typing import Dict
+from app.core.business_telemetry import business_span
from app.core.logging import get_logger
from app.features.payment.payment_config import WeChatPayConfig
from app.features.payment.payment_exceptions import (
@@ -149,6 +150,15 @@ class WeChatPayClient:
out_trade_no: str,
total_amount: int,
description: str,
+ ) -> PaymentResult:
+ with business_span("payment.wechat.create_app_order"):
+ return self._create_app_order_inner(out_trade_no, total_amount, description)
+
+ def _create_app_order_inner(
+ self,
+ out_trade_no: str,
+ total_amount: int,
+ description: str,
) -> PaymentResult:
self._ensure_client()
try:
@@ -217,6 +227,10 @@ class WeChatPayClient:
raise PaymentNotifyError(f"微信支付回调处理失败: {e}")
def query_order(self, out_trade_no: str) -> PaymentStatus:
+ with business_span("payment.wechat.query_order"):
+ return self._query_order_inner(out_trade_no)
+
+ def _query_order_inner(self, out_trade_no: str) -> PaymentStatus:
self._ensure_client()
try:
code, message = self._client.query(out_trade_no=out_trade_no)
diff --git a/api/app/internal_main.py b/api/app/internal_main.py
index 179b4fb..55e8354 100644
--- a/api/app/internal_main.py
+++ b/api/app/internal_main.py
@@ -14,12 +14,18 @@ from app.core.logging import get_logger, setup_logging
setup_logging()
+from app.core.config import settings
+from app.core.telemetry import instrument_fastapi_app, setup_telemetry
+
+setup_telemetry(
+ service_name=settings.otel_service_name or "life-echo-internal-api",
+)
+
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
-from app.core.config import settings
from app.core.errors import register_exception_handlers
from app.core.middleware import RequestIdMiddleware
from app.features.evaluation import models as _eval_models # noqa: F401
@@ -35,6 +41,8 @@ internal_app = FastAPI(
openapi_url="/openapi.json" if settings.internal_eval_enable_docs else None,
)
+instrument_fastapi_app(internal_app)
+
internal_app.add_middleware(RequestIdMiddleware)
_origins = [
o.strip()
@@ -66,7 +74,7 @@ async def internal_eval_landing():
Life Echo · 内部回归评测 API
这里是 HTTP API(端口由启动命令决定),没有内置网页。
-浏览「回归评测台」请在仓库执行 ./internal-eval.sh 或 cd app-eval-web && npm run dev,
+浏览「回归评测台」请在仓库执行 ./development.sh 或 cd app-eval-web && npm run dev,
在终端里打开 Vite 给出的地址(一般为 http://127.0.0.1:5174/)。
健康检查:/health
{docs_hint}
diff --git a/api/app/main.py b/api/app/main.py
index a9ae9dd..18a7b65 100644
--- a/api/app/main.py
+++ b/api/app/main.py
@@ -8,11 +8,17 @@ from app.core.logging import get_logger, setup_logging
setup_logging()
+from app.core.config import settings
+from app.core.telemetry import instrument_fastapi_app, setup_telemetry
+
+setup_telemetry(
+ service_name=settings.otel_service_name or "life-echo-api",
+)
+
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
-from app.core.config import settings
from app.core.errors import register_exception_handlers
from app.core.middleware import RequestIdMiddleware
from app.core.openapi import custom_openapi
@@ -46,6 +52,8 @@ app = FastAPI(
openapi_url="/openapi.json" if settings.enable_docs else None,
)
+instrument_fastapi_app(app)
+
# OpenAPI 全局增强
app.openapi = lambda: custom_openapi(app) # type: ignore[assignment]
diff --git a/api/app/tasks/celery_app.py b/api/app/tasks/celery_app.py
index a8a8ad3..62a439c 100644
--- a/api/app/tasks/celery_app.py
+++ b/api/app/tasks/celery_app.py
@@ -14,11 +14,17 @@ from app.core.logging import get_logger, setup_logging
# 与 app.main 一致:先配置 loguru + InterceptHandler,再加载会打日志的依赖
setup_logging()
+from app.core.config import settings
+from app.core.telemetry import instrument_celery, setup_telemetry
+
+# Worker 与 API 共用 .env,固定 service.name,勿读 OTEL_SERVICE_NAME(留给主站 / internal)
+setup_telemetry(service_name="life-echo-celery-worker")
+instrument_celery()
+
from celery import Celery
from celery.signals import task_failure, task_postrun, task_prerun, task_success
from app.core.celery_log_context import clear_celery_log_extras, set_celery_log_extras
-from app.core.config import settings
from app.core.log_events import celery_prerun_extras
from app.features.asset import models as _asset_models # noqa: F401 - register Asset
from app.features.auth import models as _auth_models # noqa: F401
@@ -123,9 +129,12 @@ def _log_task_prerun(
**_: object,
) -> None:
name = getattr(task, "name", None) or "?"
+ from app.core.telemetry import current_trace_context
+
extras = celery_prerun_extras(name, tuple(args or ()), dict(kwargs or {}))
if task_id:
extras["task_id"] = str(task_id).strip()
+ extras.update(current_trace_context())
set_celery_log_extras(extras if extras else None)
_celery_lifecycle_log.info(
"event=celery_task_start task={} task_id={} msg=Celery 任务已开始",
diff --git a/api/app/tasks/memoir_tasks.py b/api/app/tasks/memoir_tasks.py
index 90f9963..0d6b6da 100644
--- a/api/app/tasks/memoir_tasks.py
+++ b/api/app/tasks/memoir_tasks.py
@@ -26,6 +26,7 @@ from app.core.chapter_pipeline_lock import (
from app.core.chapter_pipeline_lock import (
release_chapter_pipeline_lock as _release_chapter_lock,
)
+from app.core.business_telemetry import business_span
from app.core.config import settings
from app.core.db import AsyncSessionLocal, get_sync_db
from app.core.dependencies import get_embedding_provider
@@ -614,7 +615,10 @@ def process_memoir_phase2(
},
)
try:
- with get_sync_db() as db:
+ with business_span(
+ "memoir.phase2",
+ chapter_category=chapter_category,
+ ), get_sync_db() as db:
user_convs = select(Conversation.id).where(
Conversation.user_id == user_id,
Conversation.deleted_at.is_(None),
@@ -691,9 +695,13 @@ def process_memoir_phase2(
affected_chapter_ids: Set[str] = set()
lock_t0 = time.perf_counter()
- lock_handle = _acquire_chapter_lock(
- user_id, chapter_category, ttl_seconds=_chapter_lock_ttl()
- )
+ with business_span(
+ "memoir.phase2.lock",
+ chapter_category=chapter_category,
+ ):
+ lock_handle = _acquire_chapter_lock(
+ user_id, chapter_category, ttl_seconds=_chapter_lock_ttl()
+ )
lock_elapsed = time.perf_counter() - lock_t0
if lock_handle is None:
logger.warning(
@@ -746,22 +754,26 @@ def process_memoir_phase2(
"relevant_stories": [],
}
pipeline_t0 = time.perf_counter()
- pipeline_result = run_story_pipeline_for_category_batch(
- db,
- user_id=user_id,
+ with business_span(
+ "memoir.phase2.story_pipeline",
chapter_category=chapter_category,
- category_segments=category_segments,
- state=state,
- user_profile=user_profile,
- user_birth_year=user_birth_year,
- llm=llm,
- background_voice=background_voice,
- occupation=user_occupation,
- memoir_correlation_id=cid,
- llm_fast=llm_fast,
- memory_evidence=memory_evidence,
- language=user_language,
- )
+ ):
+ pipeline_result = run_story_pipeline_for_category_batch(
+ db,
+ user_id=user_id,
+ chapter_category=chapter_category,
+ category_segments=category_segments,
+ state=state,
+ user_profile=user_profile,
+ user_birth_year=user_birth_year,
+ llm=llm,
+ background_voice=background_voice,
+ occupation=user_occupation,
+ memoir_correlation_id=cid,
+ llm_fast=llm_fast,
+ memory_evidence=memory_evidence,
+ language=user_language,
+ )
pipeline_elapsed = time.perf_counter() - pipeline_t0
if pipeline_result.deferred:
@@ -939,7 +951,10 @@ def process_memoir_phase1(self, user_id: str, segment_ids: List[str]):
phase1_t0 = time.perf_counter()
try:
- with get_sync_db() as db:
+ with business_span(
+ "memoir.phase1",
+ segment_count=len(segment_ids),
+ ), get_sync_db() as db:
user_obj_for_lang = db.get(User, user_id)
user_language = (
"en"
@@ -986,47 +1001,48 @@ def process_memoir_phase1(self, user_id: str, segment_ids: List[str]):
},
)
ingest_t0 = time.perf_counter()
- ingest_items: list[tuple[str, str, dict | None]] = []
- non_empty_segments: list = []
- for seg in segments:
- text = (seg.user_input_text or "").strip()
- if not text:
- continue
- conv_id = getattr(seg, "conversation_id", None) or ""
- ln = getattr(seg, "lineage_json", None)
- lineage_payload = ln if isinstance(ln, dict) else None
- ingest_items.append((conv_id, text, lineage_payload))
- non_empty_segments.append(seg)
+ with business_span("memoir.phase1.ingest"):
+ ingest_items: list[tuple[str, str, dict | None]] = []
+ non_empty_segments: list = []
+ for seg in segments:
+ text = (seg.user_input_text or "").strip()
+ if not text:
+ continue
+ conv_id = getattr(seg, "conversation_id", None) or ""
+ ln = getattr(seg, "lineage_json", None)
+ lineage_payload = ln if isinstance(ln, dict) else None
+ ingest_items.append((conv_id, text, lineage_payload))
+ non_empty_segments.append(seg)
- ingested_source_ids: list[str] = []
- if ingest_items:
- try:
- ingested_source_ids = asyncio.run(
- _memory_ingest_transcripts_batch(
- user_id,
- ingest_items,
- memoir_correlation_id=memoir_correlation_id,
+ ingested_source_ids: list[str] = []
+ if ingest_items:
+ try:
+ ingested_source_ids = asyncio.run(
+ _memory_ingest_transcripts_batch(
+ user_id,
+ ingest_items,
+ memoir_correlation_id=memoir_correlation_id,
+ )
)
- )
- for seg, sid in zip(
- non_empty_segments, ingested_source_ids, strict=True
- ):
- logger.info(
- "event=memory_transcript_ingested user_id={} task_id={} "
- "source_id={} conversation_id={} segment_id={} transcript_chars={}",
- user_id,
- task_id,
- sid,
- getattr(seg, "conversation_id", None) or "",
- seg.id,
- len((seg.user_input_text or "").strip()),
+ for seg, sid in zip(
+ non_empty_segments, ingested_source_ids, strict=True
+ ):
+ logger.info(
+ "event=memory_transcript_ingested user_id={} task_id={} "
+ "source_id={} conversation_id={} segment_id={} transcript_chars={}",
+ user_id,
+ task_id,
+ sid,
+ getattr(seg, "conversation_id", None) or "",
+ seg.id,
+ len((seg.user_input_text or "").strip()),
+ )
+ except Exception as e:
+ logger.warning(
+ "Memory batch ingest 失败: {} exc_type={}",
+ e,
+ type(e).__name__,
)
- except Exception as e:
- logger.warning(
- "Memory batch ingest 失败: {} exc_type={}",
- e,
- type(e).__name__,
- )
ingest_elapsed = time.perf_counter() - ingest_t0
merge_pipeline_run(
memoir_correlation_id,
@@ -1050,31 +1066,32 @@ def process_memoir_phase1(self, user_id: str, segment_ids: List[str]):
)
prep_t0 = time.perf_counter()
- memoir_orchestrator = MemoirOrchestrator()
+ with business_span("memoir.phase1.prepare_batches"):
+ memoir_orchestrator = MemoirOrchestrator()
- def _phase1_chunk_cb(idx: int, total: int) -> None:
- merge_pipeline_run(
- memoir_correlation_id,
- {"phase1": {"detail": {"prepare_batches_chunk": [idx, total]}}},
+ def _phase1_chunk_cb(idx: int, total: int) -> None:
+ merge_pipeline_run(
+ memoir_correlation_id,
+ {"phase1": {"detail": {"prepare_batches_chunk": [idx, total]}}},
+ )
+
+ prepared = memoir_orchestrator.prepare_batches(
+ segments=list(segments),
+ llm=llm,
+ llm_fast=llm_fast,
+ get_or_create_state=lambda: get_or_create_state_sync(user_id, db),
+ update_slot=lambda stage, slot_name, snippet, seg_ids: update_slot_sync(
+ user_id,
+ stage,
+ slot_name,
+ snippet,
+ seg_ids,
+ db,
+ memoir_batch=True,
+ ),
+ on_phase1_chunk=_phase1_chunk_cb,
+ language=user_language,
)
-
- prepared = memoir_orchestrator.prepare_batches(
- segments=list(segments),
- llm=llm,
- llm_fast=llm_fast,
- get_or_create_state=lambda: get_or_create_state_sync(user_id, db),
- update_slot=lambda stage, slot_name, snippet, seg_ids: update_slot_sync(
- user_id,
- stage,
- slot_name,
- snippet,
- seg_ids,
- db,
- memoir_batch=True,
- ),
- on_phase1_chunk=_phase1_chunk_cb,
- language=user_language,
- )
prep_elapsed = time.perf_counter() - prep_t0
merge_pipeline_run(
memoir_correlation_id,
diff --git a/api/app/tasks/memory_compaction_tasks.py b/api/app/tasks/memory_compaction_tasks.py
index d906f49..4002cf7 100644
--- a/api/app/tasks/memory_compaction_tasks.py
+++ b/api/app/tasks/memory_compaction_tasks.py
@@ -9,6 +9,7 @@ from typing import Any
from celery import shared_task
+from app.core.business_telemetry import business_span
from app.core.config import settings
from app.core.db import AsyncSessionLocal
from app.core.logging import get_logger
@@ -49,7 +50,8 @@ def memory_compaction_sweep() -> dict[str, Any]:
if not settings.memory_compaction_enabled:
return {"skipped": True, "reason": "disabled"}
hours = int(settings.memory_compaction_sweep_recent_hours)
- user_ids = asyncio.run(_list_users_with_recent_chunks_async(hours))
+ with business_span("memory.compaction.sweep", hours=hours):
+ user_ids = asyncio.run(_list_users_with_recent_chunks_async(hours))
ctx_base: dict[str, Any] = {"trigger_source": "beat", "sweep_hours": hours}
for uid in user_ids:
schedule_memory_compaction_run(uid, dict(ctx_base))
@@ -100,7 +102,8 @@ def memory_compaction_run(
return out
try:
- out = asyncio.run(_run_memory_compaction_async(user_id, ctx))
+ with business_span("memory.compaction.run"):
+ out = asyncio.run(_run_memory_compaction_async(user_id, ctx))
if out.get("new_cursor_ts") and out.get("new_cursor_id") is not None:
set_incremental_cursor_pair(
diff --git a/api/app/tasks/memory_enrichment_tasks.py b/api/app/tasks/memory_enrichment_tasks.py
index 54434ef..2a70136 100644
--- a/api/app/tasks/memory_enrichment_tasks.py
+++ b/api/app/tasks/memory_enrichment_tasks.py
@@ -11,6 +11,7 @@ from typing import Any, cast
from celery import shared_task
+from app.core.business_telemetry import business_span
from app.core.config import settings
from app.core.db import AsyncSessionLocal
from app.core.dependencies import get_embedding_provider
@@ -166,7 +167,8 @@ def embed_memory_source(
status="running",
)
try:
- result = asyncio.run(_embed_memory_source_async(user_id, source_id))
+ with business_span("memory.embed_source"):
+ result = asyncio.run(_embed_memory_source_async(user_id, source_id))
ms = (time.perf_counter() - t0) * 1000
logger.info(
"event=memory_embedding_done user_id={} source_id={} duration_ms={:.1f} status={} vectors_written={} msg=记忆向量化完成",
@@ -241,7 +243,8 @@ def enrich_memory_source(
status="running",
)
try:
- asyncio.run(_enrich_memory_source_async(user_id, source_id))
+ with business_span("memory.enrich_source"):
+ asyncio.run(_enrich_memory_source_async(user_id, source_id))
ms = (time.perf_counter() - t0) * 1000
logger.info(
"event=memory_enrichment_done user_id={} source_id={} duration_ms={:.1f} "
diff --git a/api/deploy/observability/grafana/dashboards/life-echo-business.json b/api/deploy/observability/grafana/dashboards/life-echo-business.json
new file mode 100644
index 0000000..e14f545
--- /dev/null
+++ b/api/deploy/observability/grafana/dashboards/life-echo-business.json
@@ -0,0 +1,75 @@
+{
+ "annotations": { "list": [] },
+ "editable": true,
+ "graphTooltip": 1,
+ "id": null,
+ "links": [],
+ "panels": [
+ {
+ "datasource": { "type": "prometheus", "uid": "Prometheus" },
+ "fieldConfig": { "defaults": { "unit": "ms" }, "overrides": [] },
+ "gridPos": { "h": 8, "w": 24, "x": 0, "y": 0 },
+ "id": 1,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.95, sum(rate(business_operation_duration_milliseconds_bucket[5m])) by (le, operation))",
+ "legendFormat": "{{operation}} p95",
+ "refId": "A"
+ }
+ ],
+ "title": "Business operation duration p95",
+ "type": "timeseries"
+ },
+ {
+ "datasource": { "type": "prometheus", "uid": "Prometheus" },
+ "gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
+ "id": 2,
+ "targets": [
+ {
+ "expr": "sum(rate(business_operation_duration_milliseconds_count[5m])) by (operation, outcome)",
+ "legendFormat": "{{operation}} / {{outcome}}",
+ "refId": "A"
+ }
+ ],
+ "title": "Business operations rate",
+ "type": "timeseries"
+ },
+ {
+ "datasource": { "type": "prometheus", "uid": "Prometheus" },
+ "fieldConfig": { "defaults": { "unit": "ms" }, "overrides": [] },
+ "gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
+ "id": 3,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.95, sum(rate(business_operation_duration_milliseconds_bucket[5m])) by (le, operation)) and on(operation) (operation=~\"conversation\\\\.ws\\\\..*|asr\\\\.transcribe|tts\\\\.synthesize\")",
+ "legendFormat": "{{operation}}",
+ "refId": "A"
+ }
+ ],
+ "title": "WS / ASR / TTS p95",
+ "type": "timeseries"
+ },
+ {
+ "datasource": { "type": "prometheus", "uid": "Prometheus" },
+ "fieldConfig": { "defaults": { "unit": "ms" }, "overrides": [] },
+ "gridPos": { "h": 8, "w": 24, "x": 0, "y": 16 },
+ "id": 4,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.95, sum(rate(business_operation_duration_milliseconds_bucket[5m])) by (le, operation)) and on(operation) (operation=~\"memoir\\\\..*\")",
+ "legendFormat": "{{operation}}",
+ "refId": "A"
+ }
+ ],
+ "title": "Memoir pipeline phases p95",
+ "type": "timeseries"
+ }
+ ],
+ "schemaVersion": 39,
+ "tags": ["life-echo", "business"],
+ "templating": { "list": [] },
+ "time": { "from": "now-6h", "to": "now" },
+ "title": "Life Echo Business",
+ "uid": "life-echo-business",
+ "version": 1
+}
diff --git a/api/deploy/observability/grafana/dashboards/life-echo-llm.json b/api/deploy/observability/grafana/dashboards/life-echo-llm.json
new file mode 100644
index 0000000..3505ab2
--- /dev/null
+++ b/api/deploy/observability/grafana/dashboards/life-echo-llm.json
@@ -0,0 +1,79 @@
+{
+ "annotations": { "list": [] },
+ "editable": true,
+ "graphTooltip": 1,
+ "id": null,
+ "links": [],
+ "panels": [
+ {
+ "datasource": { "type": "prometheus", "uid": "Prometheus" },
+ "fieldConfig": { "defaults": { "unit": "ms" }, "overrides": [] },
+ "gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
+ "id": 1,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.95, sum(rate(llm_call_duration_milliseconds_bucket[5m])) by (le, agent, call_type))",
+ "legendFormat": "{{agent}} / {{call_type}} p95",
+ "refId": "A"
+ }
+ ],
+ "title": "LLM duration p95 by agent / call_type",
+ "type": "timeseries"
+ },
+ {
+ "datasource": { "type": "prometheus", "uid": "Prometheus" },
+ "fieldConfig": { "defaults": { "unit": "ms" }, "overrides": [] },
+ "gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
+ "id": 2,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.50, sum(rate(llm_call_duration_milliseconds_bucket[5m])) by (le, call_type))",
+ "legendFormat": "{{call_type}} p50",
+ "refId": "A"
+ }
+ ],
+ "title": "LLM duration p50 by call_type (json vs chat vs stream)",
+ "type": "timeseries"
+ },
+ {
+ "datasource": { "type": "prometheus", "uid": "Prometheus" },
+ "gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
+ "id": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(llm_call_total[5m])) by (outcome, call_type)",
+ "legendFormat": "{{outcome}} / {{call_type}}",
+ "refId": "A"
+ }
+ ],
+ "title": "LLM calls by outcome",
+ "type": "timeseries"
+ },
+ {
+ "datasource": { "type": "prometheus", "uid": "Prometheus" },
+ "gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
+ "id": 4,
+ "targets": [
+ {
+ "expr": "sum(rate(llm_tokens_input_total[5m])) by (agent)",
+ "legendFormat": "in {{agent}}",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(rate(llm_tokens_output_total[5m])) by (agent)",
+ "legendFormat": "out {{agent}}",
+ "refId": "B"
+ }
+ ],
+ "title": "LLM tokens/min",
+ "type": "timeseries"
+ }
+ ],
+ "schemaVersion": 39,
+ "tags": ["life-echo", "llm"],
+ "templating": { "list": [] },
+ "time": { "from": "now-1h", "to": "now" },
+ "title": "Life Echo LLM",
+ "uid": "life-echo-llm",
+ "version": 1
+}
diff --git a/api/deploy/observability/grafana/dashboards/life-echo-logs.json b/api/deploy/observability/grafana/dashboards/life-echo-logs.json
new file mode 100644
index 0000000..3cd9ddc
--- /dev/null
+++ b/api/deploy/observability/grafana/dashboards/life-echo-logs.json
@@ -0,0 +1,69 @@
+{
+ "annotations": { "list": [] },
+ "editable": true,
+ "graphTooltip": 1,
+ "id": null,
+ "links": [],
+ "panels": [
+ {
+ "datasource": { "type": "loki", "uid": "loki" },
+ "gridPos": { "h": 10, "w": 24, "x": 0, "y": 0 },
+ "id": 1,
+ "options": { "showTime": true, "sortOrder": "Descending" },
+ "targets": [
+ {
+ "expr": "{compose_service=~\".+\"} |= \"event=llm_json_call\"",
+ "refId": "A"
+ }
+ ],
+ "title": "LLM JSON calls (event=llm_json_call)",
+ "type": "logs"
+ },
+ {
+ "datasource": { "type": "loki", "uid": "loki" },
+ "gridPos": { "h": 10, "w": 24, "x": 0, "y": 10 },
+ "id": 2,
+ "options": { "showTime": true, "sortOrder": "Descending" },
+ "targets": [
+ {
+ "expr": "{compose_service=~\".+\"} |= \"event=celery_task_failed\"",
+ "refId": "A"
+ }
+ ],
+ "title": "Celery task failures",
+ "type": "logs"
+ },
+ {
+ "datasource": { "type": "loki", "uid": "loki" },
+ "gridPos": { "h": 10, "w": 24, "x": 0, "y": 20 },
+ "id": 3,
+ "options": { "showTime": true, "sortOrder": "Descending" },
+ "targets": [
+ {
+ "expr": "{trace_id=~\"$trace_id\"}",
+ "refId": "A"
+ }
+ ],
+ "title": "Logs by trace_id",
+ "type": "logs"
+ }
+ ],
+ "schemaVersion": 39,
+ "tags": ["life-echo", "logs"],
+ "templating": {
+ "list": [
+ {
+ "current": { "text": "", "value": "" },
+ "label": "trace_id",
+ "name": "trace_id",
+ "options": [],
+ "query": "",
+ "type": "textbox"
+ }
+ ]
+ },
+ "time": { "from": "now-1h", "to": "now" },
+ "title": "Life Echo Logs",
+ "uid": "life-echo-logs",
+ "version": 1
+}
diff --git a/api/deploy/observability/grafana/dashboards/life-echo-overview.json b/api/deploy/observability/grafana/dashboards/life-echo-overview.json
new file mode 100644
index 0000000..f43e4ba
--- /dev/null
+++ b/api/deploy/observability/grafana/dashboards/life-echo-overview.json
@@ -0,0 +1,154 @@
+{
+ "annotations": { "list": [] },
+ "editable": true,
+ "fiscalYearStartMonth": 0,
+ "graphTooltip": 1,
+ "id": null,
+ "links": [],
+ "panels": [
+ {
+ "datasource": { "type": "prometheus", "uid": "Prometheus" },
+ "fieldConfig": { "defaults": { "unit": "reqps" }, "overrides": [] },
+ "gridPos": { "h": 8, "w": 8, "x": 0, "y": 0 },
+ "id": 1,
+ "options": { "legend": { "displayMode": "list", "placement": "bottom" } },
+ "targets": [
+ {
+ "expr": "sum(rate(http_server_request_duration_seconds_count[5m]))",
+ "legendFormat": "HTTP requests/s",
+ "refId": "A"
+ }
+ ],
+ "title": "API request rate",
+ "type": "timeseries"
+ },
+ {
+ "datasource": { "type": "prometheus", "uid": "Prometheus" },
+ "fieldConfig": { "defaults": { "unit": "ms" }, "overrides": [] },
+ "gridPos": { "h": 8, "w": 8, "x": 8, "y": 0 },
+ "id": 2,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.95, sum(rate(http_server_request_duration_seconds_bucket[5m])) by (le)) * 1000",
+ "legendFormat": "p95",
+ "refId": "A"
+ }
+ ],
+ "title": "API latency p95",
+ "type": "timeseries"
+ },
+ {
+ "datasource": { "type": "prometheus", "uid": "Prometheus" },
+ "fieldConfig": { "defaults": { "unit": "ms" }, "overrides": [] },
+ "gridPos": { "h": 8, "w": 8, "x": 16, "y": 0 },
+ "id": 3,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.95, sum(rate(llm_call_duration_milliseconds_bucket[5m])) by (le, agent, provider))",
+ "legendFormat": "{{agent}} / {{provider}}",
+ "refId": "A"
+ }
+ ],
+ "title": "LLM call duration p95",
+ "type": "timeseries"
+ },
+ {
+ "datasource": { "type": "prometheus", "uid": "Prometheus" },
+ "fieldConfig": { "defaults": { "unit": "short" }, "overrides": [] },
+ "gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
+ "id": 4,
+ "targets": [
+ {
+ "expr": "sum(rate(llm_call_total[5m])) by (outcome)",
+ "legendFormat": "{{outcome}}",
+ "refId": "A"
+ }
+ ],
+ "title": "LLM calls by outcome",
+ "type": "timeseries"
+ },
+ {
+ "datasource": { "type": "loki", "uid": "loki" },
+ "gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
+ "id": 5,
+ "options": { "showTime": true, "sortOrder": "Descending" },
+ "targets": [
+ {
+ "expr": "{compose_service=~\".+\"} |= \"llm_json_call\"",
+ "refId": "A"
+ }
+ ],
+ "title": "LLM JSON call logs",
+ "type": "logs"
+ },
+ {
+ "datasource": { "type": "prometheus", "uid": "Prometheus" },
+ "fieldConfig": { "defaults": { "unit": "ms" }, "overrides": [] },
+ "gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 },
+ "id": 6,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.95, sum(rate(db_client_operation_duration_seconds_bucket[5m])) by (le)) * 1000",
+ "legendFormat": "DB p95",
+ "refId": "A"
+ }
+ ],
+ "title": "DB client latency p95",
+ "type": "timeseries"
+ },
+ {
+ "datasource": { "type": "prometheus", "uid": "Prometheus" },
+ "fieldConfig": { "defaults": { "unit": "ms" }, "overrides": [] },
+ "gridPos": { "h": 8, "w": 12, "x": 12, "y": 16 },
+ "id": 7,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.95, sum(rate(http_client_request_duration_seconds_bucket[5m])) by (le)) * 1000",
+ "legendFormat": "HTTP client p95",
+ "refId": "A"
+ }
+ ],
+ "title": "Outbound HTTP latency p95",
+ "type": "timeseries"
+ },
+ {
+ "datasource": { "type": "prometheus", "uid": "Prometheus" },
+ "gridPos": { "h": 8, "w": 12, "x": 0, "y": 24 },
+ "id": 8,
+ "targets": [
+ {
+ "expr": "sum(rate(http_server_request_duration_seconds_count{http_response_status_code=~\"5..\"}[5m])) / clamp_min(sum(rate(http_server_request_duration_seconds_count[5m])), 1e-9)",
+ "legendFormat": "5xx rate",
+ "refId": "A"
+ }
+ ],
+ "title": "HTTP 5xx error rate",
+ "type": "timeseries"
+ },
+ {
+ "datasource": { "type": "prometheus", "uid": "Prometheus" },
+ "fieldConfig": { "defaults": { "unit": "ms" }, "overrides": [] },
+ "gridPos": { "h": 8, "w": 12, "x": 12, "y": 24 },
+ "id": 9,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.95, sum(rate(redis_client_operation_duration_seconds_bucket[5m])) by (le)) * 1000",
+ "legendFormat": "Redis p95",
+ "refId": "A"
+ }
+ ],
+ "title": "Redis client latency p95",
+ "type": "timeseries"
+ }
+ ],
+ "refresh": "30s",
+ "schemaVersion": 39,
+ "tags": ["life-echo"],
+ "templating": { "list": [] },
+ "time": { "from": "now-1h", "to": "now" },
+ "timepicker": {},
+ "timezone": "browser",
+ "title": "Life Echo Overview",
+ "uid": "life-echo-overview",
+ "version": 1
+}
diff --git a/api/deploy/observability/grafana/provisioning/alerting/contact_points.yml b/api/deploy/observability/grafana/provisioning/alerting/contact_points.yml
new file mode 100644
index 0000000..96dcc4f
--- /dev/null
+++ b/api/deploy/observability/grafana/provisioning/alerting/contact_points.yml
@@ -0,0 +1,4 @@
+apiVersion: 1
+
+# 本地 dev 占位:不配置真实通知渠道。在 Grafana UI 中可绑定 Slack/Webhook。
+contactPoints: []
diff --git a/api/deploy/observability/grafana/provisioning/alerting/rules.yml b/api/deploy/observability/grafana/provisioning/alerting/rules.yml
new file mode 100644
index 0000000..5228f33
--- /dev/null
+++ b/api/deploy/observability/grafana/provisioning/alerting/rules.yml
@@ -0,0 +1,147 @@
+apiVersion: 1
+
+groups:
+ - orgId: 1
+ name: life-echo-alerts
+ folder: Life Echo
+ interval: 1m
+ rules:
+ - uid: life_echo_api_p95_high
+ title: API latency p95 > 2s
+ condition: C
+ data:
+ - refId: A
+ relativeTimeRange: { from: 300, to: 0 }
+ datasourceUid: Prometheus
+ model:
+ expr: histogram_quantile(0.95, sum(rate(http_server_request_duration_seconds_bucket[5m])) by (le)) * 1000
+ refId: A
+ - refId: B
+ datasourceUid: __expr__
+ model:
+ type: reduce
+ expression: A
+ reducer: last
+ refId: B
+ - refId: C
+ datasourceUid: __expr__
+ model:
+ type: threshold
+ expression: B
+ conditions:
+ - evaluator: { type: gt, params: [2000] }
+ operator: { type: and }
+ reducer: { type: last }
+ refId: C
+ noDataState: NoData
+ execErrState: Error
+ for: 5m
+ annotations:
+ summary: API p95 latency above 2s for 5 minutes
+ labels:
+ severity: warning
+
+ - uid: life_echo_llm_error_rate
+ title: LLM error rate > 5%
+ condition: C
+ data:
+ - refId: A
+ relativeTimeRange: { from: 300, to: 0 }
+ datasourceUid: Prometheus
+ model:
+ expr: sum(rate(llm_call_total{outcome="error"}[5m])) / clamp_min(sum(rate(llm_call_total[5m])), 1e-9)
+ refId: A
+ - refId: B
+ datasourceUid: __expr__
+ model:
+ type: reduce
+ expression: A
+ reducer: last
+ refId: B
+ - refId: C
+ datasourceUid: __expr__
+ model:
+ type: threshold
+ expression: B
+ conditions:
+ - evaluator: { type: gt, params: [0.05] }
+ operator: { type: and }
+ reducer: { type: last }
+ refId: C
+ noDataState: NoData
+ execErrState: Error
+ for: 5m
+ annotations:
+ summary: LLM call error rate above 5%
+ labels:
+ severity: warning
+
+ - uid: life_echo_otel_collector_down
+ title: OTel Collector scrape down
+ condition: C
+ data:
+ - refId: A
+ relativeTimeRange: { from: 120, to: 0 }
+ datasourceUid: Prometheus
+ model:
+ expr: up{job="otel-collector"}
+ refId: A
+ - refId: B
+ datasourceUid: __expr__
+ model:
+ type: reduce
+ expression: A
+ reducer: last
+ refId: B
+ - refId: C
+ datasourceUid: __expr__
+ model:
+ type: threshold
+ expression: B
+ conditions:
+ - evaluator: { type: lt, params: [1] }
+ operator: { type: and }
+ reducer: { type: last }
+ refId: C
+ noDataState: Alerting
+ execErrState: Error
+ for: 2m
+ annotations:
+ summary: Prometheus cannot scrape otel-collector
+ labels:
+ severity: critical
+
+ - uid: life_echo_celery_task_failed
+ title: Celery task failures detected
+ condition: C
+ data:
+ - refId: A
+ relativeTimeRange: { from: 300, to: 0 }
+ datasourceUid: loki
+ model:
+ expr: sum(count_over_time({compose_service=~".+"} |= "event=celery_task_failed" [5m]))
+ refId: A
+ - refId: B
+ datasourceUid: __expr__
+ model:
+ type: reduce
+ expression: A
+ reducer: last
+ refId: B
+ - refId: C
+ datasourceUid: __expr__
+ model:
+ type: threshold
+ expression: B
+ conditions:
+ - evaluator: { type: gt, params: [0] }
+ operator: { type: and }
+ reducer: { type: last }
+ refId: C
+ noDataState: NoData
+ execErrState: Error
+ for: 5m
+ annotations:
+ summary: Celery task failure logs in last 5 minutes
+ labels:
+ severity: warning
diff --git a/api/deploy/observability/grafana/provisioning/dashboards/dashboards.yml b/api/deploy/observability/grafana/provisioning/dashboards/dashboards.yml
new file mode 100644
index 0000000..c2f6cff
--- /dev/null
+++ b/api/deploy/observability/grafana/provisioning/dashboards/dashboards.yml
@@ -0,0 +1,11 @@
+apiVersion: 1
+
+providers:
+ - name: Life Echo
+ orgId: 1
+ folder: Life Echo
+ type: file
+ disableDeletion: false
+ editable: true
+ options:
+ path: /etc/grafana/dashboards
diff --git a/api/deploy/observability/grafana/provisioning/datasources/datasources.yml b/api/deploy/observability/grafana/provisioning/datasources/datasources.yml
new file mode 100644
index 0000000..89fd49e
--- /dev/null
+++ b/api/deploy/observability/grafana/provisioning/datasources/datasources.yml
@@ -0,0 +1,43 @@
+apiVersion: 1
+
+datasources:
+ - name: Prometheus
+ type: prometheus
+ access: proxy
+ url: http://prometheus:9090
+ isDefault: true
+ editable: false
+
+ - name: Tempo
+ type: tempo
+ access: proxy
+ url: http://tempo:3200
+ editable: false
+ jsonData:
+ httpMethod: GET
+ tracesToLogsV2:
+ datasourceUid: loki
+ spanStartTimeShift: -1m
+ spanEndTimeShift: 1m
+ filterByTraceID: true
+ filterBySpanID: false
+ customQuery: true
+ query: '{container=~".+"} | json | trace_id="$${__trace.traceId}"'
+ serviceMap:
+ datasourceUid: prometheus
+ nodeGraph:
+ enabled: true
+
+ - name: Loki
+ type: loki
+ uid: loki
+ access: proxy
+ url: http://loki:3100
+ editable: false
+ jsonData:
+ derivedFields:
+ - datasourceUid: tempo
+ matcherRegex: '"trace_id":"([a-f0-9]+)"'
+ name: TraceID
+ url: "$${__value.raw}"
+ urlDisplayLabel: View Trace
diff --git a/api/deploy/observability/loki-config.yaml b/api/deploy/observability/loki-config.yaml
new file mode 100644
index 0000000..4a09ace
--- /dev/null
+++ b/api/deploy/observability/loki-config.yaml
@@ -0,0 +1,32 @@
+auth_enabled: false
+
+server:
+ http_listen_port: 3100
+
+common:
+ instance_addr: 127.0.0.1
+ path_prefix: /loki
+ storage:
+ filesystem:
+ chunks_directory: /loki/chunks
+ rules_directory: /loki/rules
+ replication_factor: 1
+ ring:
+ kvstore:
+ store: inmemory
+
+schema_config:
+ configs:
+ - from: 2024-01-01
+ store: tsdb
+ object_store: filesystem
+ schema: v13
+ index:
+ prefix: index_
+ period: 24h
+
+limits_config:
+ retention_period: 168h
+
+ruler:
+ alertmanager_url: http://localhost:9093
diff --git a/api/deploy/observability/otel-collector-config.yaml b/api/deploy/observability/otel-collector-config.yaml
new file mode 100644
index 0000000..d8fcef7
--- /dev/null
+++ b/api/deploy/observability/otel-collector-config.yaml
@@ -0,0 +1,53 @@
+receivers:
+ otlp:
+ protocols:
+ grpc:
+ endpoint: 0.0.0.0:4317
+ http:
+ endpoint: 0.0.0.0:4318
+
+processors:
+ batch:
+ timeout: 5s
+ send_batch_size: 1024
+ memory_limiter:
+ check_interval: 1s
+ limit_mib: 512
+ spike_limit_mib: 128
+ resource:
+ attributes:
+ - key: deployment.environment
+ value: development
+ action: upsert
+
+exporters:
+ otlp/tempo:
+ endpoint: tempo:4317
+ tls:
+ insecure: true
+ prometheus:
+ endpoint: 0.0.0.0:8889
+ loki:
+ endpoint: http://loki:3100/loki/api/v1/push
+ tls:
+ insecure: true
+
+extensions:
+ health_check:
+ endpoint: 0.0.0.0:13133
+
+service:
+ extensions: [health_check]
+ pipelines:
+ traces:
+ receivers: [otlp]
+ processors: [memory_limiter, batch]
+ exporters: [otlp/tempo]
+ metrics:
+ receivers: [otlp]
+ processors: [memory_limiter, batch]
+ exporters: [prometheus]
+ logs:
+ receivers: [otlp]
+ processors: [memory_limiter, batch]
+ exporters: [loki]
diff --git a/api/deploy/observability/prometheus.yml b/api/deploy/observability/prometheus.yml
new file mode 100644
index 0000000..ea02974
--- /dev/null
+++ b/api/deploy/observability/prometheus.yml
@@ -0,0 +1,12 @@
+global:
+ scrape_interval: 15s
+ evaluation_interval: 15s
+
+scrape_configs:
+ - job_name: prometheus
+ static_configs:
+ - targets: ["localhost:9090"]
+
+ - job_name: otel-collector
+ static_configs:
+ - targets: ["otel-collector:8889"]
diff --git a/api/deploy/observability/promtail-config.yaml b/api/deploy/observability/promtail-config.yaml
new file mode 100644
index 0000000..e09c808
--- /dev/null
+++ b/api/deploy/observability/promtail-config.yaml
@@ -0,0 +1,41 @@
+server:
+ http_listen_port: 9080
+ grpc_listen_port: 0
+
+positions:
+ filename: /tmp/positions.yaml
+
+clients:
+ - url: http://loki:3100/loki/api/v1/push
+
+scrape_configs:
+ - job_name: docker
+ docker_sd_configs:
+ - host: unix:///var/run/docker.sock
+ refresh_interval: 5s
+ relabel_configs:
+ - source_labels: ["__meta_docker_container_name"]
+ regex: "/(.*)"
+ target_label: container
+ - source_labels: ["__meta_docker_container_log_stream"]
+ target_label: stream
+ - source_labels: ["__meta_docker_container_label_com_docker_compose_service"]
+ target_label: compose_service
+ pipeline_stages:
+ - regex:
+ expression: '(?:tid=|trace_id=)(?P[0-9a-f]{12,32})'
+ - regex:
+ expression: 'event=(?P[a-zA-Z0-9_.-]+)'
+ - regex:
+ expression: 'duration_ms=(?P[0-9.]+)'
+ - json:
+ expressions:
+ trace_id: trace_id
+ span_id: span_id
+ request_id: request_id
+ event: event
+ - structured_metadata:
+ trace_id:
+ - labels:
+ request_id:
+ event:
diff --git a/api/deploy/observability/tempo.yaml b/api/deploy/observability/tempo.yaml
new file mode 100644
index 0000000..9c0d969
--- /dev/null
+++ b/api/deploy/observability/tempo.yaml
@@ -0,0 +1,29 @@
+server:
+ http_listen_port: 3200
+
+distributor:
+ receivers:
+ otlp:
+ protocols:
+ grpc:
+ endpoint: 0.0.0.0:4317
+
+ingester:
+ max_block_duration: 5m
+
+compactor:
+ compaction:
+ block_retention: 48h
+
+storage:
+ trace:
+ backend: local
+ local:
+ path: /var/tempo/traces
+ wal:
+ path: /var/tempo/wal
+
+query_frontend:
+ search:
+ duration_slo: 5s
+ throughput_bytes_slo: 1.073741824e+09
diff --git a/api/development.sh b/api/development.sh
index d64c9c9..ceb3576 100755
--- a/api/development.sh
+++ b/api/development.sh
@@ -25,10 +25,19 @@ API_PORT="${API_PORT:-8000}"
CELERY_POOL="${CELERY_POOL:-solo}"
SKIP_INSTALL="${SKIP_INSTALL:-0}"
SKIP_INFRA="${SKIP_INFRA:-0}"
+# 可观测性:空=若 .env 中 OTEL_ENABLED=true 则启动 compose;0=不启;1=强制启动
+START_OBSERVABILITY="${START_OBSERVABILITY:-}"
SHUTDOWN_TIMEOUT="${SHUTDOWN_TIMEOUT:-12}"
-# 由 internal-eval.sh 开启:在 main:app + Celery 之外再启 internal_main(:8001) 与 app-eval-web
-LIFE_ECHO_WITH_INTERNAL_EVAL="${LIFE_ECHO_WITH_INTERNAL_EVAL:-0}"
+# 与 docker-compose.observability.yml / .env.example 默认宿主机端口一致
+OTEL_GRPC_HOST_PORT="${OTEL_GRPC_HOST_PORT:-48317}"
+GRAFANA_HOST_PORT="${GRAFANA_HOST_PORT:-48300}"
+PROMETHEUS_HOST_PORT="${PROMETHEUS_HOST_PORT:-49090}"
+
+# 默认一并启动 internal_main + app-eval-web(设 0 可仅主站)
+LIFE_ECHO_WITH_INTERNAL_EVAL="${LIFE_ECHO_WITH_INTERNAL_EVAL:-1}"
+# 自动用 Google Chrome 打开 Grafana / 评测 Web(勿用 Vite --open,避免落到 Safari)
+OPEN_OBSERVABILITY_UI="${OPEN_OBSERVABILITY_UI:-1}"
# 若 :8000 已由其他 development 实例占用,仅附加 :8001 + 前端(需自备同一份 Celery/主站)
EVAL_ATTACH_ONLY="${EVAL_ATTACH_ONLY:-0}"
INTERNAL_EVAL_HOST="${INTERNAL_EVAL_HOST:-0.0.0.0}"
@@ -43,6 +52,9 @@ INTERNAL_EVAL_PID=""
EVAL_WEB_PID=""
CLEANED_UP=0
INFRA_STARTED=0
+OBSERVABILITY_STARTED=0
+OBSERVABILITY_BROWSER_SCHEDULED=0
+EVAL_WEB_BROWSER_SCHEDULED=0
print_header() {
echo -e "\n${BLUE}========================================${NC}"
@@ -62,6 +74,64 @@ print_err() {
echo -e "${RED}✗ $1${NC}"
}
+open_browser_url() {
+ local url="$1"
+ if command -v open >/dev/null 2>&1 && [[ "$(uname -s)" == "Darwin" ]]; then
+ if open -a "Google Chrome" "${url}" >/dev/null 2>&1; then
+ return 0
+ fi
+ print_warn "未找到 Google Chrome,请手动打开: ${url}"
+ return 1
+ fi
+ if command -v google-chrome >/dev/null 2>&1; then
+ google-chrome "${url}" >/dev/null 2>&1 &
+ return 0
+ fi
+ if command -v chromium-browser >/dev/null 2>&1; then
+ chromium-browser "${url}" >/dev/null 2>&1 &
+ return 0
+ fi
+ if command -v chromium >/dev/null 2>&1; then
+ chromium "${url}" >/dev/null 2>&1 &
+ return 0
+ fi
+ print_warn "未找到 Chrome/Chromium,请手动打开: ${url}"
+ return 1
+}
+
+schedule_observability_browser() {
+ if [[ "${OPEN_OBSERVABILITY_UI}" != "1" ]] || [[ "${OBSERVABILITY_BROWSER_SCHEDULED}" == "1" ]]; then
+ return 0
+ fi
+ OBSERVABILITY_BROWSER_SCHEDULED=1
+ local grafana_url="http://127.0.0.1:${GRAFANA_HOST_PORT}"
+ (
+ sleep 4
+ open_browser_url "${grafana_url}"
+ ) &
+ print_ok "将自动打开 Grafana: ${grafana_url}"
+}
+
+schedule_eval_web_browser() {
+ if [[ "${OPEN_EVAL_WEB}" != "1" ]] || [[ "${EVAL_WEB_BROWSER_SCHEDULED:-0}" == "1" ]]; then
+ return 0
+ fi
+ EVAL_WEB_BROWSER_SCHEDULED=1
+ local eval_url="http://127.0.0.1:${EVAL_WEB_PORT}/"
+ (
+ local i=0
+ while (( i < 30 )); do
+ if is_port_listening "${EVAL_WEB_PORT}"; then
+ break
+ fi
+ sleep 1
+ i=$((i + 1))
+ done
+ open_browser_url "${eval_url}"
+ ) &
+ print_ok "将自动打开评测 Web (Chrome): ${eval_url}"
+}
+
is_pid_alive() {
local pid="$1"
[[ -n "${pid}" ]] && kill -0 "${pid}" 2>/dev/null
@@ -147,11 +217,9 @@ cleanup() {
fi
if [[ "${INFRA_STARTED}" == "1" ]]; then
- print_warn "正在停止 PostgreSQL / Redis 容器..."
- (
- cd "${ROOT_DIR}" && docker compose -f docker-compose.dev.yml stop
- ) >/dev/null 2>&1 || true
- print_ok "PostgreSQL/Redis 容器已停止"
+ print_warn "正在停止 Docker 基础设施..."
+ docker_compose_cmd stop >/dev/null 2>&1 || true
+ print_ok "Docker 容器已停止"
fi
}
@@ -163,12 +231,107 @@ require_cmd() {
fi
}
+read_env_bool() {
+ local key="$1"
+ local default="${2:-0}"
+ local line val
+
+ if [[ -n "${!key:-}" ]]; then
+ val="${!key}"
+ case "${val}" in
+ 1 | true | TRUE | yes | YES | on | ON) return 0 ;;
+ *) return 1 ;;
+ esac
+ fi
+
+ if [[ ! -f "${ROOT_DIR}/.env" ]]; then
+ [[ "${default}" == "1" ]]
+ return
+ fi
+
+ line="$(grep -E "^${key}=" "${ROOT_DIR}/.env" | tail -1 | cut -d= -f2- | tr -d '\r' | sed 's/^"//;s/"$//')"
+ case "${line}" in
+ 1 | true | TRUE | yes | YES | on | ON) return 0 ;;
+ *) [[ "${default}" == "1" ]] ;;
+ esac
+}
+
+should_start_observability() {
+ case "${START_OBSERVABILITY}" in
+ 0 | false | FALSE | no | NO | off | OFF) return 1 ;;
+ 1 | true | TRUE | yes | YES | on | ON) return 0 ;;
+ esac
+ read_env_bool "OTEL_ENABLED" "0"
+}
+
+docker_compose_cmd() {
+ # 统一 compose -f,兼容 macOS 自带 bash 3.2(勿用 local -n / local arr=(-f …))
+ if should_start_observability; then
+ (cd "${ROOT_DIR}" && docker compose \
+ -f docker-compose.dev.yml \
+ -f docker-compose.observability.yml \
+ "$@")
+ return
+ fi
+ if [[ "$1" == "up" ]]; then
+ (cd "${ROOT_DIR}" && docker compose -f docker-compose.dev.yml "$@" --remove-orphans)
+ else
+ (cd "${ROOT_DIR}" && docker compose -f docker-compose.dev.yml "$@")
+ fi
+}
+
+wait_otel_collector_ready() {
+ local retries="${1:-30}"
+ local i=0
+ while (( i < retries )); do
+ if is_port_listening "${OTEL_GRPC_HOST_PORT}"; then
+ return 0
+ fi
+ sleep 1
+ i=$((i + 1))
+ done
+ return 1
+}
+
+check_otel_collector_ready() {
+ if ! read_env_bool "OTEL_ENABLED" "0"; then
+ return 0
+ fi
+ if is_port_listening "${OTEL_GRPC_HOST_PORT}"; then
+ print_ok "OTel Collector 端口已监听 (:${OTEL_GRPC_HOST_PORT})"
+ return 0
+ fi
+ if [[ "${OBSERVABILITY_STARTED}" == "1" ]]; then
+ print_warn "等待 OTel Collector 端口 :${OTEL_GRPC_HOST_PORT} …"
+ if wait_otel_collector_ready 45; then
+ print_ok "OTel Collector 端口已监听 (:${OTEL_GRPC_HOST_PORT})"
+ return 0
+ fi
+ fi
+ print_warn "OTEL_ENABLED=true 但 :${OTEL_GRPC_HOST_PORT} 未监听"
+ print_warn "请确认本次启动日志中有「启动可观测性栈」;或手动执行:"
+ print_warn " docker compose -f docker-compose.dev.yml -f docker-compose.observability.yml up -d"
+ print_warn "不需要可观测性时在 .env.development 设 OTEL_ENABLED=false"
+ return 1
+}
+
start_infra() {
- print_header "启动 PostgreSQL 和 Redis"
- cd "${ROOT_DIR}"
- docker compose -f docker-compose.dev.yml up -d
+ if should_start_observability; then
+ print_header "启动 PostgreSQL、Redis 与可观测性栈 (OTel / Grafana LGTM)"
+ OBSERVABILITY_STARTED=1
+ else
+ print_header "启动 PostgreSQL 和 Redis"
+ fi
+ docker_compose_cmd up -d
INFRA_STARTED=1
print_ok "PostgreSQL 127.0.0.1:48291,Redis 127.0.0.1:48307(见 docker-compose.dev.yml / .env.example)"
+ if [[ "${OBSERVABILITY_STARTED}" == "1" ]]; then
+ print_ok "Grafana http://127.0.0.1:${GRAFANA_HOST_PORT} (admin/admin)"
+ print_ok "Prometheus http://127.0.0.1:${PROMETHEUS_HOST_PORT}"
+ print_ok "OTLP gRPC 127.0.0.1:${OTEL_GRPC_HOST_PORT}(应用读 .env 中 OTEL_*,无需 export)"
+ print_ok "详见 docs/observability.md"
+ schedule_observability_browser
+ fi
print_ok "基础设施已就绪"
}
@@ -467,19 +630,15 @@ start_eval_web() {
exit 1
fi
- local vite_extra=()
- if [[ "${OPEN_EVAL_WEB}" == "1" ]]; then
- vite_extra+=(--open)
- fi
-
(
cd "${EVAL_WEB_DIR}"
VITE_EVAL_API_KEY="${api_key}" \
VITE_EVAL_PROXY_TARGET="http://127.0.0.1:${INTERNAL_EVAL_PORT}" \
- npm run dev -- --host 127.0.0.1 --port "${EVAL_WEB_PORT}" "${vite_extra[@]}"
+ npm run dev -- --host 127.0.0.1 --port "${EVAL_WEB_PORT}"
) &
EVAL_WEB_PID=$!
print_ok "eval-web 已启动 (PID: ${EVAL_WEB_PID}) → http://127.0.0.1:${EVAL_WEB_PORT}/"
+ schedule_eval_web_browser
}
start_internal_eval_http() {
@@ -493,7 +652,8 @@ start_internal_eval_http() {
exit 1
fi
- "${UVICORN_BIN}" app.internal_main:internal_app --reload \
+ OTEL_SERVICE_NAME="${INTERNAL_EVAL_OTEL_SERVICE_NAME:-life-echo-internal-api}" \
+ "${UVICORN_BIN}" app.internal_main:internal_app --reload \
--reload-exclude 'alembic/**' \
--reload-exclude 'alembic.ini' \
--host "${INTERNAL_EVAL_HOST}" --port "${INTERNAL_EVAL_PORT}" &
@@ -547,7 +707,7 @@ start_services() {
fi
if [[ "${skip_main}" == "1" ]] && [[ "${LIFE_ECHO_WITH_INTERNAL_EVAL}" != "1" ]]; then
- print_err "EVAL_ATTACH_ONLY=1 仅用于在已有主站时附加内部评测;请使用 ./internal-eval.sh 或导出 LIFE_ECHO_WITH_INTERNAL_EVAL=1"
+ print_err "EVAL_ATTACH_ONLY=1 仅用于在已有主站时附加内部评测;请设置 LIFE_ECHO_WITH_INTERNAL_EVAL=1"
exit 1
fi
@@ -601,14 +761,27 @@ start_services() {
echo "主站文档: http://localhost:${API_PORT}/docs"
echo "健康检查: http://localhost:${API_PORT}/health"
fi
+ if [[ "${LIFE_ECHO_WITH_INTERNAL_EVAL}" == "1" ]]; then
+ echo "评测 Web UI: http://127.0.0.1:${EVAL_WEB_PORT}/"
+ echo "内部评测 API: http://127.0.0.1:${INTERNAL_EVAL_PORT}/health"
+ fi
+ if read_env_bool "OTEL_ENABLED" "0"; then
+ echo "可观测性: Grafana http://127.0.0.1:${GRAFANA_HOST_PORT} | Prometheus http://127.0.0.1:${PROMETHEUS_HOST_PORT}"
+ if is_port_listening "${GRAFANA_HOST_PORT}"; then
+ schedule_observability_browser
+ fi
+ fi
+ if [[ "${LIFE_ECHO_WITH_INTERNAL_EVAL}" == "1" ]] && is_pid_alive "${EVAL_WEB_PID}"; then
+ schedule_eval_web_browser
+ fi
echo "按 Ctrl+C 停止所有进程"
}
main() {
if [[ "${LIFE_ECHO_WITH_INTERNAL_EVAL}" == "1" ]]; then
- print_header "Life Echo 开发环境 + 内部评测(主站 + :${INTERNAL_EVAL_PORT} + Eval Web)"
+ print_header "Life Echo 开发环境(主站 + 内部评测 + 可观测性)"
else
- print_header "Life Echo 开发环境一键启动"
+ print_header "Life Echo 开发环境一键启动(无内部评测)"
fi
require_cmd "uv"
@@ -618,16 +791,22 @@ main() {
trap cleanup EXIT INT TERM
+ ensure_venv
+ # 必须在 start_infra 之前同步,否则 should_start_observability 读不到 .env.development 里的 OTEL_ENABLED
+ ensure_dotenv_from_development
+
if [[ "${SKIP_INFRA}" != "1" ]]; then
start_infra
wait_postgres_ready || true
else
print_warn "已跳过 docker 基础设施 (SKIP_INFRA=1)"
+ if should_start_observability; then
+ print_warn "SKIP_INFRA=1 未自动启动 observability;若需 LGTM 请手动 docker compose up observability overlay"
+ fi
fi
- ensure_venv
- ensure_dotenv_from_development
check_env_file
+ check_otel_collector_ready || true
wait_host_infra_ready
run_migrations
start_services
diff --git a/api/docker-compose.observability.yml b/api/docker-compose.observability.yml
new file mode 100644
index 0000000..a251fb2
--- /dev/null
+++ b/api/docker-compose.observability.yml
@@ -0,0 +1,122 @@
+# 本地可观测性栈 overlay(与 docker-compose.dev.yml 一起使用)
+#
+# docker compose -f docker-compose.dev.yml -f docker-compose.observability.yml up -d
+#
+# 宿主机端口刻意避开 3000/9090/4317 等常用口,与 .env.example 中 OTEL_* / *_HOST_PORT 对齐。
+# Grafana: http://127.0.0.1:${GRAFANA_HOST_PORT:-48300} (admin / admin)
+# OTLP: 127.0.0.1:${OTEL_GRPC_HOST_PORT:-48317} (gRPC) :${OTEL_HTTP_HOST_PORT:-48318} (HTTP)
+
+services:
+ otel-collector:
+ image: otel/opentelemetry-collector-contrib:0.120.0
+ container_name: life-echo-otel-collector
+ command: ["--config=/etc/otelcol/config.yaml"]
+ volumes:
+ - ./deploy/observability/otel-collector-config.yaml:/etc/otelcol/config.yaml:ro
+ ports:
+ - "127.0.0.1:${OTEL_GRPC_HOST_PORT:-48317}:4317"
+ - "127.0.0.1:${OTEL_HTTP_HOST_PORT:-48318}:4318"
+ - "127.0.0.1:${OTEL_COLLECTOR_HEALTH_HOST_PORT:-48333}:13133"
+ depends_on:
+ tempo:
+ condition: service_started
+ loki:
+ condition: service_started
+ networks:
+ - default
+ restart: unless-stopped
+
+ tempo:
+ image: grafana/tempo:2.7.2
+ container_name: life-echo-tempo
+ command: ["-config.file=/etc/tempo.yaml"]
+ volumes:
+ - ./deploy/observability/tempo.yaml:/etc/tempo.yaml:ro
+ - tempo_data:/var/tempo
+ ports:
+ - "127.0.0.1:${TEMPO_HTTP_HOST_PORT:-43200}:3200"
+ networks:
+ - default
+ restart: unless-stopped
+
+ loki:
+ image: grafana/loki:3.4.2
+ container_name: life-echo-loki
+ command: ["-config.file=/etc/loki/loki-config.yaml"]
+ volumes:
+ - ./deploy/observability/loki-config.yaml:/etc/loki/loki-config.yaml:ro
+ - loki_data:/loki
+ ports:
+ - "127.0.0.1:${LOKI_HTTP_HOST_PORT:-43100}:3100"
+ networks:
+ - default
+ restart: unless-stopped
+
+ promtail:
+ image: grafana/promtail:3.4.2
+ container_name: life-echo-promtail
+ command: ["-config.file=/etc/promtail/config.yml"]
+ volumes:
+ - ./deploy/observability/promtail-config.yaml:/etc/promtail/config.yml:ro
+ - /var/run/docker.sock:/var/run/docker.sock:ro
+ depends_on:
+ loki:
+ condition: service_started
+ networks:
+ - default
+ restart: unless-stopped
+
+ prometheus:
+ image: prom/prometheus:v3.2.1
+ container_name: life-echo-prometheus
+ command:
+ - --config.file=/etc/prometheus/prometheus.yml
+ - --storage.tsdb.path=/prometheus
+ - --web.enable-lifecycle
+ volumes:
+ - ./deploy/observability/prometheus.yml:/etc/prometheus/prometheus.yml:ro
+ - prometheus_data:/prometheus
+ ports:
+ - "127.0.0.1:${PROMETHEUS_HOST_PORT:-49090}:9090"
+ depends_on:
+ otel-collector:
+ condition: service_started
+ networks:
+ - default
+ restart: unless-stopped
+
+ grafana:
+ image: grafana/grafana:11.5.2
+ container_name: life-echo-grafana
+ environment:
+ GF_SECURITY_ADMIN_USER: admin
+ GF_SECURITY_ADMIN_PASSWORD: admin
+ GF_USERS_ALLOW_SIGN_UP: "false"
+ GF_AUTH_ANONYMOUS_ENABLED: "false"
+ volumes:
+ - ./deploy/observability/grafana/provisioning:/etc/grafana/provisioning:ro
+ - ./deploy/observability/grafana/dashboards:/etc/grafana/dashboards:ro
+ - grafana_data:/var/lib/grafana
+ ports:
+ - "127.0.0.1:${GRAFANA_HOST_PORT:-48300}:3000"
+ depends_on:
+ prometheus:
+ condition: service_started
+ tempo:
+ condition: service_started
+ loki:
+ condition: service_started
+ networks:
+ - default
+ restart: unless-stopped
+
+volumes:
+ tempo_data:
+ loki_data:
+ prometheus_data:
+ grafana_data:
+
+networks:
+ default:
+ name: life-echo-dev
+ external: true
diff --git a/api/docs/internal-eval.md b/api/docs/internal-eval.md
index 7b16fce..d6824e1 100644
--- a/api/docs/internal-eval.md
+++ b/api/docs/internal-eval.md
@@ -4,29 +4,30 @@
## 启动
-**推荐一条命令**:`internal-eval.sh` 实际调用 `development.sh`,在同一进程树里启动主站 `main:app`(**8000**)、**一份** Celery、内部评测 `internal_app`(默认 **8001**)以及 `app-eval-web`(默认 **5174**)。不需要再并行执行两份启动脚本。
+**推荐一条命令**:`./development.sh` 默认启动主站(**8000**)、Celery、内部评测 API(默认 **7999**)、评测 Web(**5174**);`.env` 中 `OTEL_ENABLED=true` 时并起 Grafana 且自动打开浏览器。`./internal-eval.sh` 仅为兼容转发。
-| | 单一命令 `./internal-eval.sh` |
+| | `./development.sh`(默认) |
|---|-------------------------------|
-| HTTP | 主站 **8000** + internal **8001** |
-| Celery | 仅 **一个** worker(与主站共用队列) |
-| 前端 | 默认启动 `app-eval-web`(`START_EVAL_WEB=0` 可关) |
+| HTTP | 主站 **8000** + internal **7999** |
+| Celery | 仅 **一个** worker |
+| 评测 UI | `open` → http://127.0.0.1:5174/(`OPEN_EVAL_WEB=0` 可关) |
+| 可观测性 | Grafana :48300(`OPEN_OBSERVABILITY_UI=0` 可关) |
若 **主站 + Celery 已在其他终端** 由 `./development.sh` 跑起来了,只在同一台机器上多开评测 HTTP 与前端、且 **不再起第二份 Worker**:
```bash
cd api
# 确保 .env.development / .env 含 INTERNAL_EVAL_API_KEY;:8000 已被主站监听
-SKIP_INFRA=1 SKIP_INSTALL=1 EVAL_ATTACH_ONLY=1 ./internal-eval.sh
+SKIP_INFRA=1 SKIP_INSTALL=1 EVAL_ATTACH_ONLY=1 ./development.sh
```
兼容旧写法:`SKIP_CELERY=1` 会映射为 `EVAL_ATTACH_ONLY=1`(仍要求 **8000 已在监听**)。
-仅主业务、不要评测台时照旧:`./development.sh`(不设置 `LIFE_ECHO_WITH_INTERNAL_EVAL`)。
+仅主业务、不要评测台:`LIFE_ECHO_WITH_INTERNAL_EVAL=0 ./development.sh`。
-若你只需要 **8001**、刻意不启主站 **8000**,请用下文「手动 uvicorn」配合既有 Celery,不要用 `./internal-eval.sh`(一键脚本会顺带拉起主站)。
+若只需 **7999**、不启主站 **8000**,见下文「手动 uvicorn」;不要用一键脚本。
-**默认会起 `app-eval-web`,并用 Vite `--open` 尝试打开浏览器**(`http://127.0.0.1:5174/`)。不要前端时设 `START_EVAL_WEB=0`;只要前端但不要弹窗时设 `OPEN_EVAL_WEB=0`。
+**默认会起 `app-eval-web`,并用系统浏览器打开评测台**(`http://127.0.0.1:5174/`,与 Grafana 同为 `open`)。不要前端时设 `START_EVAL_WEB=0`;只要前端但不要弹窗时设 `OPEN_EVAL_WEB=0`。
数据库与主服务共用;需配置环境变量后启动专用进程:
diff --git a/api/docs/observability.md b/api/docs/observability.md
new file mode 100644
index 0000000..4c67c29
--- /dev/null
+++ b/api/docs/observability.md
@@ -0,0 +1,139 @@
+# 可观测性(OpenTelemetry + Grafana LGTM)
+
+本地开发使用 **OpenTelemetry** 采集 traces / metrics / logs,经 **OTel Collector** 写入 **Tempo / Prometheus / Loki**,在 **Grafana** 统一查看。
+
+配置写在 **`.env`**(由 `.env.development` 经 `development.sh` 同步,或从 [`.env.example`](../.env.example) 复制),`app.core.config.settings` 启动时自动读取,**无需**在 shell 里 `export OTEL_*`。
+
+## 启动栈
+
+在 `api/` 目录:
+
+```bash
+# 1. 数据库与 Redis
+docker compose -f docker-compose.dev.yml up -d
+
+# 2. 可观测性(需已存在 life-echo-dev 网络;端口来自 .env 或下列默认)
+docker compose -f docker-compose.dev.yml -f docker-compose.observability.yml up -d
+```
+
+| 服务 | 默认宿主机地址 | compose 变量 |
+|------|----------------|--------------|
+| Grafana | http://127.0.0.1:48300 (admin / admin) | `GRAFANA_HOST_PORT` |
+| Prometheus | http://127.0.0.1:49090 | `PROMETHEUS_HOST_PORT` |
+| OTLP gRPC | http://127.0.0.1:48317 | `OTEL_GRPC_HOST_PORT` |
+| OTLP HTTP | http://127.0.0.1:48318 | `OTEL_HTTP_HOST_PORT` |
+| Collector health | http://127.0.0.1:48333 | `OTEL_COLLECTOR_HEALTH_HOST_PORT` |
+
+容器**内部**仍使用标准端口(如 Collector `4317`);仅宿主机映射使用 `48xxx` 段,与 Postgres `48291`、Redis `48307` 同一风格。
+
+预置 Dashboard(**Life Echo** 文件夹):
+
+| Dashboard | 用途 |
+|-----------|------|
+| Life Echo Overview | API RED、LLM 摘要、依赖延迟 |
+| Life Echo LLM | `call_type` / agent / tokens、outcome 分布 |
+| Life Echo Business | 回忆录阶段、WS/ASR/TTS、Celery 业务 span |
+| Life Echo Logs | Loki 按 `event` / `trace_id` 检索 |
+
+## 启用应用导出
+
+在 [`.env.example`](../.env.example) 已给出本地默认值,同步到 `.env` 即可,例如:
+
+```env
+OTEL_ENABLED=true
+OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:48317
+OTEL_TRACES_SAMPLER=always_on
+OTEL_SERVICE_NAME=life-echo-api
+```
+
+推荐与全栈一并启动(`./development.sh` 在 `.env` 里 `OTEL_ENABLED=true` 时会起 observability compose,并默认打开 Grafana 浏览器标签):
+
+```bash
+cd api
+./development.sh
+```
+
+仅手动起 API(不自动开 Grafana):
+
+```bash
+cd api
+uv run uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
+```
+
+Celery worker 同一 `.env`;未设 `OTEL_SERVICE_NAME` 时 worker 默认为 `life-echo-celery-worker`。
+
+若 API 跑在 **Docker compose** 里,应设 `OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4317`(服务名 + 容器内端口),而不是 `localhost`。
+
+不需要可观测性时:`.env` 中 `OTEL_ENABLED=false`(或未启动 observability compose)。
+
+## 采集内容
+
+| 类型 | 来源 |
+|------|------|
+| HTTP | FastAPI 自动 instrumentation(`/health` 排除) |
+| DB | SQLAlchemy |
+| Redis | redis-py |
+| 出站 HTTP | httpx(DeepSeek 等) |
+| Celery | 任务 span + W3C trace 传播 |
+| LLM | `llm_telemetry`(LangChain / DeepSeek / `llm_call`)+ `llm.call.*` / `llm.tokens.*` metrics |
+| 业务 | `business_telemetry`:WS 回合、回忆录 phase、ASR/TTS、支付等子 span |
+| 日志 | loguru patcher 注入 `trace_id`;Promtail 解析 `event` / `tid=`;可选 `LOG_JSON_FILE` JSON sink |
+
+日志字段:`request_id`、`trace_id`、`span_id`。HTTP 由中间件 `contextualize`;**Celery / 后台**由 loguru **patcher** 从当前 OTel span 合并,无需经过 HTTP 中间件。
+
+## 常用排查
+
+1. **API 慢**:Grafana → Tempo,按 `service.name=life-echo-api` 查 trace;看 DB / httpx / `llm.*` / `conversation.ws.*` 子 span。
+2. **LLM 慢**:**Life Echo LLM** Dashboard,或 Loki:`{compose_service=~".+"} |= "event=llm_json_call"`。
+3. **回忆录卡阶段**:Tempo 搜 `memoir.phase1` / `memoir.phase2` / `memoir.story_pipeline.*`;**Life Echo Business** Dashboard 看 `business_operation_duration_milliseconds`。
+4. **日志 ↔ Trace**:在 Tempo 复制 `trace_id` → Loki:`{compose_service=~".+"} |= "tid=<前12位>"`(控制台短格式);Promtail 将 `trace_id` 写入 **structured metadata**(非高基数 label)。
+5. **Celery 堆积**:Tempo 过滤 `life-echo-celery-worker`;Loki `event=celery_task_failed`。
+6. **无数据**:`.env` 中 `OTEL_ENABLED=true`、`OTEL_EXPORTER_OTLP_ENDPOINT` 端口与 `OTEL_GRPC_HOST_PORT` 一致;Collector health `http://127.0.0.1:48333`;Prometheus target `otel-collector:8889` UP。
+
+### LOG_JSON_FILE 与 Promtail
+
+- **默认**:loguru 人类可读行 → Docker stdout → Promtail **regex** 提取 `tid` / `event` / `duration_ms`;`trace_id` 进 structured metadata,**不作为 Loki label**。
+- **可选**:`LOG_JSON_FILE=/path/to/app.jsonl` 开启 JSON sink(`serialize=true`),便于与 OTLP logs 或自建采集对齐;与 Promtail 可**并存**(同一容器 stdout 仍走 regex)。
+
+## 采样(staging/prod 第二阶段)
+
+| 环境 | 建议 |
+|------|------|
+| development | `OTEL_TRACES_SAMPLER=always_on` |
+| staging/production | `OTEL_TRACES_SAMPLER=parentbased_traceidratio`,`OTEL_TRACES_SAMPLER_ARG=0.1` |
+
+关闭 telemetry:`OTEL_ENABLED=false`,无 exporter 开销。
+
+## Prometheus 指标名(OTel → Prometheus)
+
+| OTel 仪器 | Prometheus 系列(histogram) |
+|-----------|------------------------------|
+| `llm.call.duration` (ms) | `llm_call_duration_milliseconds_bucket` |
+| `business.operation.duration` (ms) | `business_operation_duration_milliseconds_bucket` |
+| `http.server.request.duration` (s) | `http_server_request_duration_seconds_bucket` |
+| `db.client.operation.duration` (s) | `db_client_operation_duration_seconds_bucket` |
+| `http.client.request.duration` (s) | `http_client_request_duration_seconds_bucket` |
+
+Counter 示例:`llm_call_total`、`llm_tokens_input_total`。
+
+校验脚本(需 observability compose + 有流量):
+
+```bash
+chmod +x scripts/verify_observability_metrics.sh
+./scripts/verify_observability_metrics.sh
+```
+
+## 验收清单(本地 E2E)
+
+- [ ] `OTEL_ENABLED=true`,启动 compose + API + Celery worker
+- [ ] 跑一条 WS 对话;Tempo 可见 `conversation.ws.process_turn`、`llm.chat_invoke`
+- [ ] 触发 memoir phase1;Tempo 可见 `memoir.phase1.*`、`memoir.story_pipeline.*`
+- [ ] Prometheus:`call_type` label 存在;真实 LLM 后 `llm_tokens_input_total` > 0
+- [ ] Loki:`|= "tid="` 能查到同次请求日志
+- [ ] `./scripts/verify_observability_metrics.sh` 通过
+- [ ] Grafana Alerting 页无 provisioning 错误(通知渠道可空)
+
+## 配置目录
+
+- [`deploy/observability/`](../deploy/observability/):Collector、Tempo、Loki、Prometheus、Grafana provisioning
+- [`docker-compose.observability.yml`](../docker-compose.observability.yml):本地 overlay
diff --git a/api/docs/部署指南.md b/api/docs/部署指南.md
index dda4786..75f3a1d 100644
--- a/api/docs/部署指南.md
+++ b/api/docs/部署指南.md
@@ -305,11 +305,13 @@ sudo journalctl -u life-echo-api -f
### 8. 监控与告警
+本地开发与预发可观测性栈(OpenTelemetry + Grafana LGTM)见 **[可观测性指南](observability.md)**。staging/production 全量接入为第二阶段(`docker-compose` profile)。
+
#### 8.1 配置日志监控
建议使用以下工具:
+- **Grafana + Loki + Tempo + Prometheus**(仓库内 `deploy/observability/`,推荐)
- ELK Stack (Elasticsearch + Logstash + Kibana)
-- Grafana + Loki
- 云服务商的日志服务
#### 8.2 配置性能监控
diff --git a/api/internal-eval.sh b/api/internal-eval.sh
index 32130e2..3557df7 100755
--- a/api/internal-eval.sh
+++ b/api/internal-eval.sh
@@ -1,22 +1,18 @@
#!/usr/bin/env bash
-# 在 development.sh 全栈之上附加 internal_main(默认 :8001)与 app-eval-web。
-# 只需一条命令,无需再并行跑两份脚本;共用同一份 Postgres/Redis/Celery(本脚本只起一个 Worker)。
+# 已合并入 development.sh(默认启动评测台 + 自动打开 Grafana / 评测 UI)。
+# 本脚本保留为兼容入口,行为与 ./development.sh 相同。
#
-# 用法:cd api && ./internal-eval.sh
+# 若主站已在其他终端占用 :8000,仅附加评测 HTTP + 前端(不再起 Celery):
+# SKIP_INFRA=1 SKIP_INSTALL=1 EVAL_ATTACH_ONLY=1 ./development.sh
#
-# 若主站已在其他终端由 ./development.sh 占用 :8000,仅多开评测 HTTP + 前端(不再起第二份 Celery):
-# SKIP_INFRA=1 SKIP_INSTALL=1 EVAL_ATTACH_ONLY=1 ./internal-eval.sh
-#
-# 兼容旧环境变量:SKIP_CELERY=1 等价于 EVAL_ATTACH_ONLY=1(仍要求 :8000 已有监听)。
-#
-# 其他可选变量与 development.sh 一致,例如:
-# SKIP_INFRA=1 SKIP_INSTALL=1 START_EVAL_WEB=0 OPEN_EVAL_WEB=0
-# INTERNAL_EVAL_PORT EVAL_WEB_PORT INTERNAL_EVAL_API_KEY
+# 兼容旧变量:SKIP_CELERY=1 等价于 EVAL_ATTACH_ONLY=1
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+echo -e "\033[1;33m⚠ internal-eval.sh 已并入 development.sh,正在转发…\033[0m" >&2
+
export LIFE_ECHO_WITH_INTERNAL_EVAL=1
if [[ "${SKIP_CELERY:-}" == "1" ]]; then
diff --git a/api/pyproject.toml b/api/pyproject.toml
index cf056f0..95d2598 100644
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@@ -17,6 +17,15 @@ dependencies = [
"langchain-openai>=1.1.11",
"loguru>=0.7.3",
"openai>=2.26.0",
+ "opentelemetry-api>=1.42.0",
+ "opentelemetry-exporter-otlp-proto-grpc>=1.42.0",
+ "opentelemetry-instrumentation-celery>=0.63b0",
+ "opentelemetry-instrumentation-fastapi>=0.63b0",
+ "opentelemetry-instrumentation-httpx>=0.63b0",
+ "opentelemetry-instrumentation-logging>=0.63b0",
+ "opentelemetry-instrumentation-redis>=0.63b0",
+ "opentelemetry-instrumentation-sqlalchemy>=0.63b0",
+ "opentelemetry-sdk>=1.42.0",
"pgvector>=0.4.2",
"pillow>=12.1.1",
"psycopg[binary]>=3.2.0",
diff --git a/api/scripts/verify_observability_metrics.sh b/api/scripts/verify_observability_metrics.sh
new file mode 100755
index 0000000..1ea22c5
--- /dev/null
+++ b/api/scripts/verify_observability_metrics.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+# 校验本地 Prometheus 是否已暴露 OTel 导出指标(需 observability compose 运行中)。
+set -euo pipefail
+
+PROM_URL="${PROMETHEUS_URL:-http://127.0.0.1:49090}"
+QUERY_ENDPOINT="${PROM_URL}/api/v1/query"
+
+check_metric() {
+ local name="$1"
+ local result
+ result="$(curl -sf "${QUERY_ENDPOINT}?query=${name}" | python3 -c "
+import json, sys
+data = json.load(sys.stdin)
+r = data.get('data', {}).get('result', [])
+print('ok' if r else 'missing')
+")"
+ if [[ "${result}" != "ok" ]]; then
+ echo "MISSING: ${name}"
+ return 1
+ fi
+ echo "OK: ${name}"
+}
+
+echo "Checking Prometheus at ${PROM_URL} ..."
+fail=0
+for m in \
+ "llm_call_duration_milliseconds_bucket" \
+ "llm_call_total" \
+ "business_operation_duration_milliseconds_bucket" \
+ "http_server_request_duration_seconds_bucket"
+do
+ check_metric "${m}" || fail=1
+done
+
+if [[ "${fail}" -ne 0 ]]; then
+ echo ""
+ echo "Some metrics missing. Ensure OTEL_ENABLED=true, API/worker running, and traffic generated."
+ exit 1
+fi
+echo "All required metrics present."
diff --git a/api/tests/core/test_business_telemetry.py b/api/tests/core/test_business_telemetry.py
new file mode 100644
index 0000000..9ed81da
--- /dev/null
+++ b/api/tests/core/test_business_telemetry.py
@@ -0,0 +1,64 @@
+"""Business telemetry helpers (no real Collector required)."""
+
+from __future__ import annotations
+
+import pytest
+from opentelemetry import trace
+
+from app.core.business_telemetry import business_span
+from app.core.config import settings
+
+
+class TestBusinessSpan:
+ def test_disabled_is_noop(self, monkeypatch: pytest.MonkeyPatch) -> None:
+ monkeypatch.setattr(settings, "otel_enabled", False)
+ with business_span("memoir.phase1", user_id="u1") as span:
+ assert span == trace.INVALID_SPAN
+
+ def test_filters_high_cardinality_attrs(self, monkeypatch: pytest.MonkeyPatch) -> None:
+ monkeypatch.setattr(settings, "otel_enabled", True)
+ from opentelemetry.sdk.trace import TracerProvider
+ from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+ from opentelemetry.sdk.trace.export.in_memory_span_exporter import (
+ InMemorySpanExporter,
+ )
+
+ exporter = InMemorySpanExporter()
+ provider = TracerProvider()
+ provider.add_span_processor(SimpleSpanProcessor(exporter))
+ monkeypatch.setattr(
+ "app.core.business_telemetry.get_tracer",
+ lambda _name: provider.get_tracer("test"),
+ )
+
+ with business_span(
+ "memoir.phase2",
+ user_id="user-123",
+ chapter_category="childhood",
+ ):
+ pass
+
+ spans = exporter.get_finished_spans()
+ assert spans
+ attrs = dict(spans[0].attributes or {})
+ assert attrs.get("business.chapter_category") == "childhood"
+ assert "business.user_id" not in attrs
+
+ def test_enabled_yields_span(self, monkeypatch: pytest.MonkeyPatch) -> None:
+ monkeypatch.setattr(settings, "otel_enabled", True)
+ from opentelemetry.sdk.trace import TracerProvider
+ from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+ from opentelemetry.sdk.trace.export.in_memory_span_exporter import (
+ InMemorySpanExporter,
+ )
+
+ exporter = InMemorySpanExporter()
+ provider = TracerProvider()
+ provider.add_span_processor(SimpleSpanProcessor(exporter))
+ monkeypatch.setattr(
+ "app.core.business_telemetry.get_tracer",
+ lambda _name: provider.get_tracer("test"),
+ )
+
+ with business_span("conversation.ws.process_turn") as span:
+ assert span.is_recording()
diff --git a/api/tests/core/test_llm_telemetry.py b/api/tests/core/test_llm_telemetry.py
new file mode 100644
index 0000000..05a3658
--- /dev/null
+++ b/api/tests/core/test_llm_telemetry.py
@@ -0,0 +1,118 @@
+"""LLM telemetry helpers (no real Collector required)."""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+from unittest.mock import patch
+
+import pytest
+
+from app.core import llm_telemetry
+from app.core.config import settings
+
+
+class TestExtractTokenUsage:
+ def test_usage_metadata_object(self) -> None:
+ msg = SimpleNamespace(usage_metadata=SimpleNamespace(input_tokens=10, output_tokens=4))
+ assert llm_telemetry.extract_token_usage(msg) == (10, 4)
+
+ def test_response_metadata_dict(self) -> None:
+ msg = SimpleNamespace(
+ usage_metadata=None,
+ response_metadata={"token_usage": {"prompt_tokens": 3, "completion_tokens": 7}},
+ )
+ assert llm_telemetry.extract_token_usage(msg) == (3, 7)
+
+ def test_missing_usage_returns_zero(self) -> None:
+ assert llm_telemetry.extract_token_usage(SimpleNamespace()) == (0, 0)
+
+
+class TestOtelDisabledNoOp:
+ def test_record_llm_completion_disabled(self, monkeypatch: pytest.MonkeyPatch) -> None:
+ monkeypatch.setattr(settings, "otel_enabled", False)
+ llm_telemetry.record_llm_completion(
+ agent="Test",
+ provider="mock",
+ model="m",
+ duration_ms=1.0,
+ input_tokens=5,
+ output_tokens=2,
+ )
+
+ def test_langchain_invoke_span_disabled(self, monkeypatch: pytest.MonkeyPatch) -> None:
+ monkeypatch.setattr(settings, "otel_enabled", False)
+ with llm_telemetry.langchain_invoke_span(
+ agent="Test",
+ provider="mock",
+ model="m",
+ call_type="chat",
+ ) as ctx:
+ ctx["response"] = SimpleNamespace(
+ usage_metadata=SimpleNamespace(input_tokens=1, output_tokens=1)
+ )
+ assert ctx["outcome"] == "ok"
+
+
+class TestLangchainInvokeSpanRecordsTokens:
+ def test_records_completion_with_tokens(self, monkeypatch: pytest.MonkeyPatch) -> None:
+ monkeypatch.setattr(settings, "otel_enabled", True)
+ recorded: list[dict] = []
+
+ def _capture(**kwargs: object) -> None:
+ recorded.append(kwargs)
+
+ with patch.object(llm_telemetry, "record_llm_completion", side_effect=_capture):
+ with llm_telemetry.langchain_invoke_span(
+ agent="TestAgent",
+ provider="mock",
+ model="m1",
+ call_type="chat",
+ ) as ctx:
+ ctx["response"] = SimpleNamespace(
+ usage_metadata=SimpleNamespace(input_tokens=11, output_tokens=5)
+ )
+
+ assert len(recorded) == 1
+ assert recorded[0]["input_tokens"] == 11
+ assert recorded[0]["output_tokens"] == 5
+ assert recorded[0]["agent"] == "TestAgent"
+
+
+class TestObserveAinvokeExtraAttributes:
+ @pytest.mark.asyncio
+ async def test_response_latency_on_span(self, monkeypatch: pytest.MonkeyPatch) -> None:
+ monkeypatch.setattr(settings, "otel_enabled", True)
+ from opentelemetry.sdk.trace import TracerProvider
+ from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+ from opentelemetry.sdk.trace.export.in_memory_span_exporter import (
+ InMemorySpanExporter,
+ )
+ from opentelemetry import trace
+
+ exporter = InMemorySpanExporter()
+ provider = TracerProvider()
+ provider.add_span_processor(SimpleSpanProcessor(exporter))
+ monkeypatch.setattr(
+ "app.core.llm_telemetry.get_tracer",
+ lambda _name: provider.get_tracer("test"),
+ )
+
+ class _LLM:
+ async def ainvoke(self, messages: list) -> SimpleNamespace:
+ return SimpleNamespace(
+ usage_metadata=SimpleNamespace(input_tokens=1, output_tokens=1)
+ )
+
+ await llm_telemetry.observe_ainvoke(
+ _LLM(),
+ [],
+ agent="Test",
+ provider="mock",
+ model="m",
+ extra_span_attributes={"llm.custom": "x"},
+ )
+ spans = exporter.get_finished_spans()
+ assert spans
+ attrs = dict(spans[-1].attributes or {})
+ assert "llm.response_latency_ms" in attrs
+ assert attrs.get("llm.custom") == "x"
diff --git a/api/uv.lock b/api/uv.lock
index 38daf95..1645a62 100644
--- a/api/uv.lock
+++ b/api/uv.lock
@@ -88,6 +88,15 @@ dependencies = [
{ name = "langchain-openai" },
{ name = "loguru" },
{ name = "openai" },
+ { name = "opentelemetry-api" },
+ { name = "opentelemetry-exporter-otlp-proto-grpc" },
+ { name = "opentelemetry-instrumentation-celery" },
+ { name = "opentelemetry-instrumentation-fastapi" },
+ { name = "opentelemetry-instrumentation-httpx" },
+ { name = "opentelemetry-instrumentation-logging" },
+ { name = "opentelemetry-instrumentation-redis" },
+ { name = "opentelemetry-instrumentation-sqlalchemy" },
+ { name = "opentelemetry-sdk" },
{ name = "pgvector" },
{ name = "pillow" },
{ name = "psycopg", extra = ["binary"] },
@@ -129,6 +138,15 @@ requires-dist = [
{ name = "langchain-openai", specifier = ">=1.1.11" },
{ name = "loguru", specifier = ">=0.7.3" },
{ name = "openai", specifier = ">=2.26.0" },
+ { name = "opentelemetry-api", specifier = ">=1.42.0" },
+ { name = "opentelemetry-exporter-otlp-proto-grpc", specifier = ">=1.42.0" },
+ { name = "opentelemetry-instrumentation-celery", specifier = ">=0.63b0" },
+ { name = "opentelemetry-instrumentation-fastapi", specifier = ">=0.63b0" },
+ { name = "opentelemetry-instrumentation-httpx", specifier = ">=0.63b0" },
+ { name = "opentelemetry-instrumentation-logging", specifier = ">=0.63b0" },
+ { name = "opentelemetry-instrumentation-redis", specifier = ">=0.63b0" },
+ { name = "opentelemetry-instrumentation-sqlalchemy", specifier = ">=0.63b0" },
+ { name = "opentelemetry-sdk", specifier = ">=1.42.0" },
{ name = "pgvector", specifier = ">=0.4.2" },
{ name = "pillow", specifier = ">=12.1.1" },
{ name = "psycopg", extras = ["binary"], specifier = ">=3.2.0" },
@@ -156,6 +174,15 @@ dev = [
{ name = "ruff", specifier = ">=0.15.6" },
]
+[[package]]
+name = "asgiref"
+version = "3.11.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/63/40/f03da1264ae8f7cfdbf9146542e5e7e8100a4c66ab48e791df9a03d3f6c0/asgiref-3.11.1.tar.gz", hash = "sha256:5f184dc43b7e763efe848065441eac62229c9f7b0475f41f80e207a114eda4ce", size = 38550, upload-time = "2026-02-03T13:30:14.33Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/5c/0a/a72d10ed65068e115044937873362e6e32fab1b7dce0046aeb224682c989/asgiref-3.11.1-py3-none-any.whl", hash = "sha256:e8667a091e69529631969fd45dc268fa79b99c92c5fcdda727757e52146ec133", size = 24345, upload-time = "2026-02-03T13:30:13.039Z" },
+]
+
[[package]]
name = "av"
version = "16.1.0"
@@ -920,6 +947,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/e6/ab/fb21f4c939bb440104cc2b396d3be1d9b7a9fd3c6c2a53d98c45b3d7c954/fsspec-2026.2.0-py3-none-any.whl", hash = "sha256:98de475b5cb3bd66bedd5c4679e87b4fdfe1a3bf4d707b151b3c07e58c9a2437", size = 202505, upload-time = "2026-02-05T21:50:51.819Z" },
]
+[[package]]
+name = "googleapis-common-protos"
+version = "1.75.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "protobuf" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b5/c8/f439cffde755cffa462bfbb156278fa6f9d09119719af9814b858fd4f81f/googleapis_common_protos-1.75.0.tar.gz", hash = "sha256:53a062ff3c32552fbd62c11fe23768b78e4ddf0494d5e5fd97d3f4689c75fbbd", size = 151035, upload-time = "2026-05-07T08:04:49.423Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/e7/c8/e2645aa8ed02fd4c7a2f59d68783b65b1f3cbdfe39a6308e156509d1fee8/googleapis_common_protos-1.75.0-py3-none-any.whl", hash = "sha256:961ed60399c457ceb0ee8f285a84c870aabc9c6a832b9d37bb281b5bebde43ed", size = 300631, upload-time = "2026-05-07T08:03:30.345Z" },
+]
+
[[package]]
name = "greenlet"
version = "3.3.2"
@@ -954,6 +993,37 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/29/4b/45d90626aef8e65336bed690106d1382f7a43665e2249017e9527df8823b/greenlet-3.3.2-cp314-cp314t-win_amd64.whl", hash = "sha256:c04c5e06ec3e022cbfe2cd4a846e1d4e50087444f875ff6d2c2ad8445495cf1a", size = 237086, upload-time = "2026-02-20T20:20:45.786Z" },
]
+[[package]]
+name = "grpcio"
+version = "1.80.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b7/48/af6173dbca4454f4637a4678b67f52ca7e0c1ed7d5894d89d434fecede05/grpcio-1.80.0.tar.gz", hash = "sha256:29aca15edd0688c22ba01d7cc01cb000d72b2033f4a3c72a81a19b56fd143257", size = 12978905, upload-time = "2026-03-30T08:49:10.502Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/2f/3a/7c3c25789e3f069e581dc342e03613c5b1cb012c4e8c7d9d5cf960a75856/grpcio-1.80.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:e9e408fc016dffd20661f0126c53d8a31c2821b5c13c5d67a0f5ed5de93319ad", size = 6017243, upload-time = "2026-03-30T08:47:40.075Z" },
+ { url = "https://files.pythonhosted.org/packages/04/19/21a9806eb8240e174fd1ab0cd5b9aa948bb0e05c2f2f55f9d5d7405e6d08/grpcio-1.80.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:92d787312e613754d4d8b9ca6d3297e69994a7912a32fa38c4c4e01c272974b0", size = 12010840, upload-time = "2026-03-30T08:47:43.11Z" },
+ { url = "https://files.pythonhosted.org/packages/18/3a/23347d35f76f639e807fb7a36fad3068aed100996849a33809591f26eca6/grpcio-1.80.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8ac393b58aa16991a2f1144ec578084d544038c12242da3a215966b512904d0f", size = 6567644, upload-time = "2026-03-30T08:47:46.806Z" },
+ { url = "https://files.pythonhosted.org/packages/ff/40/96e07ecb604a6a67ae6ab151e3e35b132875d98bc68ec65f3e5ab3e781d7/grpcio-1.80.0-cp313-cp313-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:68e5851ac4b9afe07e7f84483803ad167852570d65326b34d54ca560bfa53fb6", size = 7277830, upload-time = "2026-03-30T08:47:49.643Z" },
+ { url = "https://files.pythonhosted.org/packages/9b/e2/da1506ecea1f34a5e365964644b35edef53803052b763ca214ba3870c856/grpcio-1.80.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:873ff5d17d68992ef6605330127425d2fc4e77e612fa3c3e0ed4e668685e3140", size = 6783216, upload-time = "2026-03-30T08:47:52.817Z" },
+ { url = "https://files.pythonhosted.org/packages/44/83/3b20ff58d0c3b7f6caaa3af9a4174d4023701df40a3f39f7f1c8e7c48f9d/grpcio-1.80.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2bea16af2750fd0a899bf1abd9022244418b55d1f37da2202249ba4ba673838d", size = 7385866, upload-time = "2026-03-30T08:47:55.687Z" },
+ { url = "https://files.pythonhosted.org/packages/47/45/55c507599c5520416de5eefecc927d6a0d7af55e91cfffb2e410607e5744/grpcio-1.80.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ba0db34f7e1d803a878284cd70e4c63cb6ae2510ba51937bf8f45ba997cefcf7", size = 8391602, upload-time = "2026-03-30T08:47:58.303Z" },
+ { url = "https://files.pythonhosted.org/packages/10/bb/dd06f4c24c01db9cf11341b547d0a016b2c90ed7dbbb086a5710df7dd1d7/grpcio-1.80.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8eb613f02d34721f1acf3626dfdb3545bd3c8505b0e52bf8b5710a28d02e8aa7", size = 7826752, upload-time = "2026-03-30T08:48:01.311Z" },
+ { url = "https://files.pythonhosted.org/packages/f9/1e/9d67992ba23371fd63d4527096eb8c6b76d74d52b500df992a3343fd7251/grpcio-1.80.0-cp313-cp313-win32.whl", hash = "sha256:93b6f823810720912fd131f561f91f5fed0fda372b6b7028a2681b8194d5d294", size = 4142310, upload-time = "2026-03-30T08:48:04.594Z" },
+ { url = "https://files.pythonhosted.org/packages/cf/e6/283326a27da9e2c3038bc93eeea36fb118ce0b2d03922a9cda6688f53c5b/grpcio-1.80.0-cp313-cp313-win_amd64.whl", hash = "sha256:e172cf795a3ba5246d3529e4d34c53db70e888fa582a8ffebd2e6e48bc0cba50", size = 4882833, upload-time = "2026-03-30T08:48:07.363Z" },
+ { url = "https://files.pythonhosted.org/packages/c5/6d/e65307ce20f5a09244ba9e9d8476e99fb039de7154f37fb85f26978b59c3/grpcio-1.80.0-cp314-cp314-linux_armv7l.whl", hash = "sha256:3d4147a97c8344d065d01bbf8b6acec2cf86fb0400d40696c8bdad34a64ffc0e", size = 6017376, upload-time = "2026-03-30T08:48:10.005Z" },
+ { url = "https://files.pythonhosted.org/packages/69/10/9cef5d9650c72625a699c549940f0abb3c4bfdb5ed45a5ce431f92f31806/grpcio-1.80.0-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:d8e11f167935b3eb089ac9038e1a063e6d7dbe995c0bb4a661e614583352e76f", size = 12018133, upload-time = "2026-03-30T08:48:12.927Z" },
+ { url = "https://files.pythonhosted.org/packages/04/82/983aabaad82ba26113caceeb9091706a0696b25da004fe3defb5b346e15b/grpcio-1.80.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f14b618fc30de822681ee986cfdcc2d9327229dc4c98aed16896761cacd468b9", size = 6574748, upload-time = "2026-03-30T08:48:16.386Z" },
+ { url = "https://files.pythonhosted.org/packages/07/d7/031666ef155aa0bf399ed7e19439656c38bbd143779ae0861b038ce82abd/grpcio-1.80.0-cp314-cp314-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:4ed39fbdcf9b87370f6e8df4e39ca7b38b3e5e9d1b0013c7b6be9639d6578d14", size = 7277711, upload-time = "2026-03-30T08:48:19.627Z" },
+ { url = "https://files.pythonhosted.org/packages/e8/43/f437a78f7f4f1d311804189e8f11fb311a01049b2e08557c1068d470cb2e/grpcio-1.80.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2dcc70e9f0ba987526e8e8603a610fb4f460e42899e74e7a518bf3c68fe1bf05", size = 6785372, upload-time = "2026-03-30T08:48:22.373Z" },
+ { url = "https://files.pythonhosted.org/packages/93/3d/f6558e9c6296cb4227faa5c43c54a34c68d32654b829f53288313d16a86e/grpcio-1.80.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:448c884b668b868562b1bda833c5fce6272d26e1926ec46747cda05741d302c1", size = 7395268, upload-time = "2026-03-30T08:48:25.638Z" },
+ { url = "https://files.pythonhosted.org/packages/06/21/0fdd77e84720b08843c371a2efa6f2e19dbebf56adc72df73d891f5506f0/grpcio-1.80.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:a1dc80fe55685b4a543555e6eef975303b36c8db1023b1599b094b92aa77965f", size = 8392000, upload-time = "2026-03-30T08:48:28.974Z" },
+ { url = "https://files.pythonhosted.org/packages/f5/68/67f4947ed55d2e69f2cc199ab9fd85e0a0034d813bbeef84df6d2ba4d4b7/grpcio-1.80.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:31b9ac4ad1aa28ffee5503821fafd09e4da0a261ce1c1281c6c8da0423c83b6e", size = 7828477, upload-time = "2026-03-30T08:48:32.054Z" },
+ { url = "https://files.pythonhosted.org/packages/44/b6/8d4096691b2e385e8271911a0de4f35f0a6c7d05aff7098e296c3de86939/grpcio-1.80.0-cp314-cp314-win32.whl", hash = "sha256:367ce30ba67d05e0592470428f0ec1c31714cab9ef19b8f2e37be1f4c7d32fae", size = 4218563, upload-time = "2026-03-30T08:48:34.538Z" },
+ { url = "https://files.pythonhosted.org/packages/e5/8c/bbe6baf2557262834f2070cf668515fa308b2d38a4bbf771f8f7872a7036/grpcio-1.80.0-cp314-cp314-win_amd64.whl", hash = "sha256:3b01e1f5464c583d2f567b2e46ff0d516ef979978f72091fd81f5ab7fa6e2e7f", size = 5019457, upload-time = "2026-03-30T08:48:37.308Z" },
+]
+
[[package]]
name = "h11"
version = "0.16.0"
@@ -1523,6 +1593,218 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/c6/2e/3f73e8ca53718952222cacd0cf7eecc9db439d020f0c1fe7ae717e4e199a/openai-2.26.0-py3-none-any.whl", hash = "sha256:6151bf8f83802f036117f06cc8a57b3a4da60da9926826cc96747888b57f394f", size = 1136409, upload-time = "2026-03-05T23:17:34.072Z" },
]
+[[package]]
+name = "opentelemetry-api"
+version = "1.42.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/86/ca/25288069c399be6769159d9fb7b1190b603537d82aad2fa2746a0cc2c8c6/opentelemetry_api-1.42.0.tar.gz", hash = "sha256:ea84c893ad177791d138e0349d6ceebd8d3bf006440900400ce220008dafc372", size = 72300, upload-time = "2026-05-19T09:46:29.885Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/1b/0b/be5daf659b82b525338fde371dfcfab09b606a19bb5620c37076964710ec/opentelemetry_api-1.42.0-py3-none-any.whl", hash = "sha256:558d88f88192a973579910ef6f2c13db47a268d5ec2e53e83e50e74a39a02922", size = 61310, upload-time = "2026-05-19T09:46:06.561Z" },
+]
+
+[[package]]
+name = "opentelemetry-exporter-otlp-proto-common"
+version = "1.42.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "opentelemetry-proto" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/76/a9/1496f27ecdfc7d504eac80f5e16474ee9d47cd08cda1f2917b58cf1c299c/opentelemetry_exporter_otlp_proto_common-1.42.0.tar.gz", hash = "sha256:c7a1a61f3a4c4dfa83127353edb1c75b873289d9ee42379db46eb835963b72e3", size = 21430, upload-time = "2026-05-19T09:46:32.838Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/8b/7b/1542eb6e3d941a7dd93648d485b7c8495bc2841a2bb7dd5f394f370cf607/opentelemetry_exporter_otlp_proto_common-1.42.0-py3-none-any.whl", hash = "sha256:92de67f096c9200770f16fbdb63b96fb6061d604b4bc266726d8355caeb864e8", size = 17328, upload-time = "2026-05-19T09:46:11.291Z" },
+]
+
+[[package]]
+name = "opentelemetry-exporter-otlp-proto-grpc"
+version = "1.42.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "googleapis-common-protos" },
+ { name = "grpcio" },
+ { name = "opentelemetry-api" },
+ { name = "opentelemetry-exporter-otlp-proto-common" },
+ { name = "opentelemetry-proto" },
+ { name = "opentelemetry-sdk" },
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/01/6a/63812e4f67d3658b21e94bc890b67296951f3aa8f6950fdf735f763500e5/opentelemetry_exporter_otlp_proto_grpc-1.42.0.tar.gz", hash = "sha256:75eac4e9d0bd69bea8199d75dfeb585cce05a9baa8215d1f7aad9e3583bf5ef9", size = 27136, upload-time = "2026-05-19T09:46:33.594Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/4d/e9/308c4c03b536005a1443bee0d9f06de38aad8b94f59f58ac688ead7a8cf9/opentelemetry_exporter_otlp_proto_grpc-1.42.0-py3-none-any.whl", hash = "sha256:5d6d1691586f2e656fd14187f2f2f5fa06e94834e1acdce71edcbbe35730b31d", size = 19614, upload-time = "2026-05-19T09:46:12.331Z" },
+]
+
+[[package]]
+name = "opentelemetry-instrumentation"
+version = "0.63b0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "opentelemetry-api" },
+ { name = "opentelemetry-semantic-conventions" },
+ { name = "packaging" },
+ { name = "wrapt" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/2a/2d/322d464f4105966fb8555f871a84f43e821ce9aaf64ecae9586e9691c6a2/opentelemetry_instrumentation-0.63b0.tar.gz", hash = "sha256:80a339ef030a8d0fd1962375a9801dd31954e5063d74c00bc3d4e6581f43bab1", size = 41083, upload-time = "2026-05-19T09:47:06.194Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/ae/45/a38e74da3f1b5c82c97289da91d978caa04321877f0ab170fc620a0753f2/opentelemetry_instrumentation-0.63b0-py3-none-any.whl", hash = "sha256:984b18763b652a881ac5a596098d89923f74cf53a658c2dde660387e018147ca", size = 35574, upload-time = "2026-05-19T09:46:07.257Z" },
+]
+
+[[package]]
+name = "opentelemetry-instrumentation-asgi"
+version = "0.63b0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "asgiref" },
+ { name = "opentelemetry-api" },
+ { name = "opentelemetry-instrumentation" },
+ { name = "opentelemetry-semantic-conventions" },
+ { name = "opentelemetry-util-http" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b2/ba/dd540189230d211898ccc4df899874bc0d84f5c54a1e07a13a2bde606a57/opentelemetry_instrumentation_asgi-0.63b0.tar.gz", hash = "sha256:e201eed7616f7da0840adf8ab8c5ea64db7ab19b920373b38983e2bac8d3645d", size = 26154, upload-time = "2026-05-19T09:47:10.023Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/ad/4f/caa793347febb9dae45f3d03d8bac04bf0752170a19c53016a0a91a214a0/opentelemetry_instrumentation_asgi-0.63b0-py3-none-any.whl", hash = "sha256:4e89555c110677226b9ca1734eda248360916bccf0ebadf8db8baf0015c9efca", size = 15907, upload-time = "2026-05-19T09:46:13.675Z" },
+]
+
+[[package]]
+name = "opentelemetry-instrumentation-celery"
+version = "0.63b0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "opentelemetry-api" },
+ { name = "opentelemetry-instrumentation" },
+ { name = "opentelemetry-semantic-conventions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/67/a7/82e696152b65178d13f9ee2241cadb72b7b908603c692a8519f0c0295e35/opentelemetry_instrumentation_celery-0.63b0.tar.gz", hash = "sha256:c02371fe46073b57ecf1287d833bfe00c02f79ba600549752ae7bd4fbcd8f06a", size = 15520, upload-time = "2026-05-19T09:47:15.445Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/f7/9a/03f9168c0a07a0441129a9a426405f6b0efc3804f4c0c1e200f0a3a7c568/opentelemetry_instrumentation_celery-0.63b0-py3-none-any.whl", hash = "sha256:732d3a0b883cb777d8e0213ebbfa49fe8a8ee987ea49a6d45ec1351cb09e8b93", size = 13170, upload-time = "2026-05-19T09:46:21.78Z" },
+]
+
+[[package]]
+name = "opentelemetry-instrumentation-fastapi"
+version = "0.63b0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "opentelemetry-api" },
+ { name = "opentelemetry-instrumentation" },
+ { name = "opentelemetry-instrumentation-asgi" },
+ { name = "opentelemetry-semantic-conventions" },
+ { name = "opentelemetry-util-http" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/51/73/6e44cd21b17d4affd41a621804421d476940b1dab352254b1a9c08a08df6/opentelemetry_instrumentation_fastapi-0.63b0.tar.gz", hash = "sha256:5117df842d0ce47e1fb9eb3c2ad2a7594bd139b129de9f3fa1ce5b28e970c046", size = 25387, upload-time = "2026-05-19T09:47:20.726Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/78/2d/f869b45eddbb7332cce7a863a4d1e758d58a9c890db6dbf0fe6aedd3eda1/opentelemetry_instrumentation_fastapi-0.63b0-py3-none-any.whl", hash = "sha256:ed43d2358164df83d811a8d69a7578cad3ab66fde4db027296c1ee20f703e3f0", size = 12797, upload-time = "2026-05-19T09:46:28.885Z" },
+]
+
+[[package]]
+name = "opentelemetry-instrumentation-httpx"
+version = "0.63b0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "opentelemetry-api" },
+ { name = "opentelemetry-instrumentation" },
+ { name = "opentelemetry-semantic-conventions" },
+ { name = "opentelemetry-util-http" },
+ { name = "wrapt" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d3/22/21c1d745b82eb28c41c4f0635be1d7b9d9d77bbe0b6c718d7e7d7fcc6f20/opentelemetry_instrumentation_httpx-0.63b0.tar.gz", hash = "sha256:aafb9e336be48b4c0c19ae1f003621e23d75b3560797d42baa656dcc3a555266", size = 23556, upload-time = "2026-05-19T09:47:22.997Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/af/f1/0c9ba71e48129390a9db60ec92ab0149cf97d1a983c11a77e1a04ec5dc7b/opentelemetry_instrumentation_httpx-0.63b0-py3-none-any.whl", hash = "sha256:e4359d317a3313fa8607b7ab4c47088a428856349363c754013fbd595f60fb23", size = 16338, upload-time = "2026-05-19T09:46:32.015Z" },
+]
+
+[[package]]
+name = "opentelemetry-instrumentation-logging"
+version = "0.63b0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "opentelemetry-api" },
+ { name = "opentelemetry-instrumentation" },
+ { name = "opentelemetry-semantic-conventions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/11/a8/e5ae9bf71babc3589252d826ffd212c004582a42699ab24245ecf8004f4a/opentelemetry_instrumentation_logging-0.63b0.tar.gz", hash = "sha256:c4b875cdd712e01e2a0b904d9c9248f4f03a8f41a8acd64000984359841b98d8", size = 19824, upload-time = "2026-05-19T09:47:24.771Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/d7/1c/f61d7aa67ecf4ecc04bba5a276f6dc67f0803f6d0a61eceb585f3bb2fcb9/opentelemetry_instrumentation_logging-0.63b0-py3-none-any.whl", hash = "sha256:8fe17ed310de42683dc585f1bf6af6ccaa3192c997c431c57177e15bee6885f5", size = 15992, upload-time = "2026-05-19T09:46:35.553Z" },
+]
+
+[[package]]
+name = "opentelemetry-instrumentation-redis"
+version = "0.63b0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "opentelemetry-api" },
+ { name = "opentelemetry-instrumentation" },
+ { name = "opentelemetry-semantic-conventions" },
+ { name = "wrapt" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/9c/bc/98f3355db9dd0f2885f168a2544739783349df7ed495cba2c06dddb3c183/opentelemetry_instrumentation_redis-0.63b0.tar.gz", hash = "sha256:a369c140eb7cdd8b59192255eb4e361755dc5353be5aa0ff25a2cbf964fb993c", size = 16713, upload-time = "2026-05-19T09:47:32.264Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/9c/6a/b9955b1e659793e9e5e787e90d6b203b17fcf2b88811794fe1efa584ee94/opentelemetry_instrumentation_redis-0.63b0-py3-none-any.whl", hash = "sha256:61e1c18f1f87d2ebec1ed69dd187e233c4482ae528e02929150ef2699d15120a", size = 14538, upload-time = "2026-05-19T09:46:46.242Z" },
+]
+
+[[package]]
+name = "opentelemetry-instrumentation-sqlalchemy"
+version = "0.63b0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "opentelemetry-api" },
+ { name = "opentelemetry-instrumentation" },
+ { name = "opentelemetry-semantic-conventions" },
+ { name = "packaging" },
+ { name = "wrapt" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/36/72/0def937531c0e7a423af06cbffaf235caea7af0275082c6bca13a25701ec/opentelemetry_instrumentation_sqlalchemy-0.63b0.tar.gz", hash = "sha256:b854ac9fd5707a8f79dc9b252cdec6873217e5a6e7e5fdb43dca6858a26342cb", size = 18007, upload-time = "2026-05-19T09:47:34.518Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/91/98/eb7430900f683fd6cec4745736bc69ca7260442b6b20ad05194abe97a187/opentelemetry_instrumentation_sqlalchemy-0.63b0-py3-none-any.whl", hash = "sha256:6a31bf004798f8eabb74f75e1d90cf081c7d470933867be6a5c8c985925ddb3e", size = 14410, upload-time = "2026-05-19T09:46:49.328Z" },
+]
+
+[[package]]
+name = "opentelemetry-proto"
+version = "1.42.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "protobuf" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/71/2c/7c56a19498b46da4c54dc4e765c95d17f8fec2ba86bec1817b41ae635360/opentelemetry_proto-1.42.0.tar.gz", hash = "sha256:5d56a9067b631ea931a135d7b86428ae99649f591d4db69b9fc8c8e0465fce65", size = 45841, upload-time = "2026-05-19T09:46:42.058Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/2d/ad/ff5f619a04cddb4936ead0dd8f590c5b373c5b4b9f2eef555e9d3d951ccb/opentelemetry_proto-1.42.0-py3-none-any.whl", hash = "sha256:2c0716a37e5c12efef37cbd01906d649b7fb85c85ac687518d0bd28527c6498e", size = 71779, upload-time = "2026-05-19T09:46:24.536Z" },
+]
+
+[[package]]
+name = "opentelemetry-sdk"
+version = "1.42.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "opentelemetry-api" },
+ { name = "opentelemetry-semantic-conventions" },
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b7/c9/dabaaf1c754a57b82b5a36aeca3806d92c1877ccfb12a697b65f88bf027c/opentelemetry_sdk-1.42.0.tar.gz", hash = "sha256:2479e462cc69357825c2c847ce4a601bc1b17e1279aa7f80d3490f0ae614d0e5", size = 239072, upload-time = "2026-05-19T09:46:42.992Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/7b/7d/16bf9a9d42ebbd1679e0cda018d57a0712f3b6f6f1e7ae5ef3c7ee5927c0/opentelemetry_sdk-1.42.0-py3-none-any.whl", hash = "sha256:ec4a4f69e15220b3d7bccd93217aac745682bb6435b9381f7bb44cb7e07b4f2b", size = 170879, upload-time = "2026-05-19T09:46:25.871Z" },
+]
+
+[[package]]
+name = "opentelemetry-semantic-conventions"
+version = "0.63b0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "opentelemetry-api" },
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/20/f8/be4625838aae098c2f9fbdc062a1b3128ebb9e799b891b654ee8cad94897/opentelemetry_semantic_conventions-0.63b0.tar.gz", hash = "sha256:cfea295264654fa324fcef24aa56fb1836fdc0da27db128645dc6aa76115cc6c", size = 148333, upload-time = "2026-05-19T09:46:44.01Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/8f/6f/8d0ce225b8fdbb72c97cf4130107d861eafcb3d8e5c3f5891e8556177316/opentelemetry_semantic_conventions-0.63b0-py3-none-any.whl", hash = "sha256:1f3962732b04f43e4fef28173c9a3615b8847b4b2d6386fdc085361b29875ab9", size = 203712, upload-time = "2026-05-19T09:46:27.569Z" },
+]
+
+[[package]]
+name = "opentelemetry-util-http"
+version = "0.63b0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/21/cf/0b53c5fe1113fb01e23c6c88b66d8289f979e61cece444576b286a3415fd/opentelemetry_util_http-0.63b0.tar.gz", hash = "sha256:401ddd686cd943ef801b9384b0722b904250f6bf3906951ce4f27bb6b63b04a3", size = 11101, upload-time = "2026-05-19T09:47:42.885Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/6e/8c/7fd6f06139cca88a6341bebf2b01f3e97bb8fd8d12e7d3ad3d2ad88b8c49/opentelemetry_util_http-0.63b0-py3-none-any.whl", hash = "sha256:80536361b6348e57503cdae8c1b1be79574d14c30e879367336c5a076fd4f673", size = 8209, upload-time = "2026-05-19T09:47:01.712Z" },
+]
+
[[package]]
name = "orjson"
version = "3.11.7"
@@ -1693,17 +1975,17 @@ wheels = [
[[package]]
name = "protobuf"
-version = "7.34.0"
+version = "6.33.6"
source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/f2/00/04a2ab36b70a52d0356852979e08b44edde0435f2115dc66e25f2100f3ab/protobuf-7.34.0.tar.gz", hash = "sha256:3871a3df67c710aaf7bb8d214cc997342e63ceebd940c8c7fc65c9b3d697591a", size = 454726, upload-time = "2026-02-27T00:30:25.421Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/66/70/e908e9c5e52ef7c3a6c7902c9dfbb34c7e29c25d2f81ade3856445fd5c94/protobuf-6.33.6.tar.gz", hash = "sha256:a6768d25248312c297558af96a9f9c929e8c4cee0659cb07e780731095f38135", size = 444531, upload-time = "2026-03-18T19:05:00.988Z" }
wheels = [
- { url = "https://files.pythonhosted.org/packages/13/c4/6322ab5c8f279c4c358bc14eb8aefc0550b97222a39f04eb3c1af7a830fa/protobuf-7.34.0-cp310-abi3-macosx_10_9_universal2.whl", hash = "sha256:8e329966799f2c271d5e05e236459fe1cbfdb8755aaa3b0914fa60947ddea408", size = 429248, upload-time = "2026-02-27T00:30:14.924Z" },
- { url = "https://files.pythonhosted.org/packages/45/99/b029bbbc61e8937545da5b79aa405ab2d9cf307a728f8c9459ad60d7a481/protobuf-7.34.0-cp310-abi3-manylinux2014_aarch64.whl", hash = "sha256:9d7a5005fb96f3c1e64f397f91500b0eb371b28da81296ae73a6b08a5b76cdd6", size = 325753, upload-time = "2026-02-27T00:30:17.247Z" },
- { url = "https://files.pythonhosted.org/packages/cc/79/09f02671eb75b251c5550a1c48e7b3d4b0623efd7c95a15a50f6f9fc1e2e/protobuf-7.34.0-cp310-abi3-manylinux2014_s390x.whl", hash = "sha256:4a72a8ec94e7a9f7ef7fe818ed26d073305f347f8b3b5ba31e22f81fd85fca02", size = 340200, upload-time = "2026-02-27T00:30:18.672Z" },
- { url = "https://files.pythonhosted.org/packages/b5/57/89727baef7578897af5ed166735ceb315819f1c184da8c3441271dbcfde7/protobuf-7.34.0-cp310-abi3-manylinux2014_x86_64.whl", hash = "sha256:964cf977e07f479c0697964e83deda72bcbc75c3badab506fb061b352d991b01", size = 324268, upload-time = "2026-02-27T00:30:20.088Z" },
- { url = "https://files.pythonhosted.org/packages/1f/3e/38ff2ddee5cc946f575c9d8cc822e34bde205cf61acf8099ad88ef19d7d2/protobuf-7.34.0-cp310-abi3-win32.whl", hash = "sha256:f791ec509707a1d91bd02e07df157e75e4fb9fbdad12a81b7396201ec244e2e3", size = 426628, upload-time = "2026-02-27T00:30:21.555Z" },
- { url = "https://files.pythonhosted.org/packages/cb/71/7c32eaf34a61a1bae1b62a2ac4ffe09b8d1bb0cf93ad505f42040023db89/protobuf-7.34.0-cp310-abi3-win_amd64.whl", hash = "sha256:9f9079f1dde4e32342ecbd1c118d76367090d4aaa19da78230c38101c5b3dd40", size = 437901, upload-time = "2026-02-27T00:30:22.836Z" },
- { url = "https://files.pythonhosted.org/packages/a4/e7/14dc9366696dcb53a413449881743426ed289d687bcf3d5aee4726c32ebb/protobuf-7.34.0-py3-none-any.whl", hash = "sha256:e3b914dd77fa33fa06ab2baa97937746ab25695f389869afdf03e81f34e45dc7", size = 170716, upload-time = "2026-02-27T00:30:23.994Z" },
+ { url = "https://files.pythonhosted.org/packages/fc/9f/2f509339e89cfa6f6a4c4ff50438db9ca488dec341f7e454adad60150b00/protobuf-6.33.6-cp310-abi3-win32.whl", hash = "sha256:7d29d9b65f8afef196f8334e80d6bc1d5d4adedb449971fefd3723824e6e77d3", size = 425739, upload-time = "2026-03-18T19:04:48.373Z" },
+ { url = "https://files.pythonhosted.org/packages/76/5d/683efcd4798e0030c1bab27374fd13a89f7c2515fb1f3123efdfaa5eab57/protobuf-6.33.6-cp310-abi3-win_amd64.whl", hash = "sha256:0cd27b587afca21b7cfa59a74dcbd48a50f0a6400cfb59391340ad729d91d326", size = 437089, upload-time = "2026-03-18T19:04:50.381Z" },
+ { url = "https://files.pythonhosted.org/packages/5c/01/a3c3ed5cd186f39e7880f8303cc51385a198a81469d53d0fdecf1f64d929/protobuf-6.33.6-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:9720e6961b251bde64edfdab7d500725a2af5280f3f4c87e57c0208376aa8c3a", size = 427737, upload-time = "2026-03-18T19:04:51.866Z" },
+ { url = "https://files.pythonhosted.org/packages/ee/90/b3c01fdec7d2f627b3a6884243ba328c1217ed2d978def5c12dc50d328a3/protobuf-6.33.6-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:e2afbae9b8e1825e3529f88d514754e094278bb95eadc0e199751cdd9a2e82a2", size = 324610, upload-time = "2026-03-18T19:04:53.096Z" },
+ { url = "https://files.pythonhosted.org/packages/9b/ca/25afc144934014700c52e05103c2421997482d561f3101ff352e1292fb81/protobuf-6.33.6-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:c96c37eec15086b79762ed265d59ab204dabc53056e3443e702d2681f4b39ce3", size = 339381, upload-time = "2026-03-18T19:04:54.616Z" },
+ { url = "https://files.pythonhosted.org/packages/16/92/d1e32e3e0d894fe00b15ce28ad4944ab692713f2e7f0a99787405e43533a/protobuf-6.33.6-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:e9db7e292e0ab79dd108d7f1a94fe31601ce1ee3f7b79e0692043423020b0593", size = 323436, upload-time = "2026-03-18T19:04:55.768Z" },
+ { url = "https://files.pythonhosted.org/packages/c4/72/02445137af02769918a93807b2b7890047c32bfb9f90371cbc12688819eb/protobuf-6.33.6-py3-none-any.whl", hash = "sha256:77179e006c476e69bf8e8ce866640091ec42e1beb80b213c3900006ecfba6901", size = 170656, upload-time = "2026-03-18T19:04:59.826Z" },
]
[[package]]
@@ -2869,6 +3151,59 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/e1/07/c6fe3ad3e685340704d314d765b7912993bcb8dc198f0e7a89382d37974b/win32_setctime-1.2.0-py3-none-any.whl", hash = "sha256:95d644c4e708aba81dc3704a116d8cbc974d70b3bdb8be1d150e36be6e9d1390", size = 4083, upload-time = "2024-12-07T15:28:26.465Z" },
]
+[[package]]
+name = "wrapt"
+version = "2.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/2e/64/925f213fdcbb9baeb1530449ac71a4d57fc361c053d06bf78d0c5c7cd80c/wrapt-2.1.2.tar.gz", hash = "sha256:3996a67eecc2c68fd47b4e3c564405a5777367adfd9b8abb58387b63ee83b21e", size = 81678, upload-time = "2026-03-06T02:53:25.134Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/4c/7a/d936840735c828b38d26a854e85d5338894cda544cb7a85a9d5b8b9c4df7/wrapt-2.1.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:787fd6f4d67befa6fe2abdffcbd3de2d82dfc6fb8a6d850407c53332709d030b", size = 61259, upload-time = "2026-03-06T02:53:41.922Z" },
+ { url = "https://files.pythonhosted.org/packages/5e/88/9a9b9a90ac8ca11c2fdb6a286cb3a1fc7dd774c00ed70929a6434f6bc634/wrapt-2.1.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4bdf26e03e6d0da3f0e9422fd36bcebf7bc0eeb55fdf9c727a09abc6b9fe472e", size = 61851, upload-time = "2026-03-06T02:52:48.672Z" },
+ { url = "https://files.pythonhosted.org/packages/03/a9/5b7d6a16fd6533fed2756900fc8fc923f678179aea62ada6d65c92718c00/wrapt-2.1.2-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:bbac24d879aa22998e87f6b3f481a5216311e7d53c7db87f189a7a0266dafffb", size = 121446, upload-time = "2026-03-06T02:54:14.013Z" },
+ { url = "https://files.pythonhosted.org/packages/45/bb/34c443690c847835cfe9f892be78c533d4f32366ad2888972c094a897e39/wrapt-2.1.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:16997dfb9d67addc2e3f41b62a104341e80cac52f91110dece393923c0ebd5ca", size = 123056, upload-time = "2026-03-06T02:54:10.829Z" },
+ { url = "https://files.pythonhosted.org/packages/93/b9/ff205f391cb708f67f41ea148545f2b53ff543a7ac293b30d178af4d2271/wrapt-2.1.2-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:162e4e2ba7542da9027821cb6e7c5e068d64f9a10b5f15512ea28e954893a267", size = 117359, upload-time = "2026-03-06T02:53:03.623Z" },
+ { url = "https://files.pythonhosted.org/packages/1f/3d/1ea04d7747825119c3c9a5e0874a40b33594ada92e5649347c457d982805/wrapt-2.1.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f29c827a8d9936ac320746747a016c4bc66ef639f5cd0d32df24f5eacbf9c69f", size = 121479, upload-time = "2026-03-06T02:53:45.844Z" },
+ { url = "https://files.pythonhosted.org/packages/78/cc/ee3a011920c7a023b25e8df26f306b2484a531ab84ca5c96260a73de76c0/wrapt-2.1.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:a9dd9813825f7ecb018c17fd147a01845eb330254dff86d3b5816f20f4d6aaf8", size = 116271, upload-time = "2026-03-06T02:54:46.356Z" },
+ { url = "https://files.pythonhosted.org/packages/98/fd/e5ff7ded41b76d802cf1191288473e850d24ba2e39a6ec540f21ae3b57cb/wrapt-2.1.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6f8dbdd3719e534860d6a78526aafc220e0241f981367018c2875178cf83a413", size = 120573, upload-time = "2026-03-06T02:52:50.163Z" },
+ { url = "https://files.pythonhosted.org/packages/47/c5/242cae3b5b080cd09bacef0591691ba1879739050cc7c801ff35c8886b66/wrapt-2.1.2-cp313-cp313-win32.whl", hash = "sha256:5c35b5d82b16a3bc6e0a04349b606a0582bc29f573786aebe98e0c159bc48db6", size = 58205, upload-time = "2026-03-06T02:53:47.494Z" },
+ { url = "https://files.pythonhosted.org/packages/12/69/c358c61e7a50f290958809b3c61ebe8b3838ea3e070d7aac9814f95a0528/wrapt-2.1.2-cp313-cp313-win_amd64.whl", hash = "sha256:f8bc1c264d8d1cf5b3560a87bbdd31131573eb25f9f9447bb6252b8d4c44a3a1", size = 60452, upload-time = "2026-03-06T02:53:30.038Z" },
+ { url = "https://files.pythonhosted.org/packages/8e/66/c8a6fcfe321295fd8c0ab1bd685b5a01462a9b3aa2f597254462fc2bc975/wrapt-2.1.2-cp313-cp313-win_arm64.whl", hash = "sha256:3beb22f674550d5634642c645aba4c72a2c66fb185ae1aebe1e955fae5a13baf", size = 58842, upload-time = "2026-03-06T02:52:52.114Z" },
+ { url = "https://files.pythonhosted.org/packages/da/55/9c7052c349106e0b3f17ae8db4b23a691a963c334de7f9dbd60f8f74a831/wrapt-2.1.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0fc04bc8664a8bc4c8e00b37b5355cffca2535209fba1abb09ae2b7c76ddf82b", size = 63075, upload-time = "2026-03-06T02:53:19.108Z" },
+ { url = "https://files.pythonhosted.org/packages/09/a8/ce7b4006f7218248dd71b7b2b732d0710845a0e49213b18faef64811ffef/wrapt-2.1.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a9b9d50c9af998875a1482a038eb05755dfd6fe303a313f6a940bb53a83c3f18", size = 63719, upload-time = "2026-03-06T02:54:33.452Z" },
+ { url = "https://files.pythonhosted.org/packages/e4/e5/2ca472e80b9e2b7a17f106bb8f9df1db11e62101652ce210f66935c6af67/wrapt-2.1.2-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2d3ff4f0024dd224290c0eabf0240f1bfc1f26363431505fb1b0283d3b08f11d", size = 152643, upload-time = "2026-03-06T02:52:42.721Z" },
+ { url = "https://files.pythonhosted.org/packages/36/42/30f0f2cefca9d9cbf6835f544d825064570203c3e70aa873d8ae12e23791/wrapt-2.1.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3278c471f4468ad544a691b31bb856374fbdefb7fee1a152153e64019379f015", size = 158805, upload-time = "2026-03-06T02:54:25.441Z" },
+ { url = "https://files.pythonhosted.org/packages/bb/67/d08672f801f604889dcf58f1a0b424fe3808860ede9e03affc1876b295af/wrapt-2.1.2-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a8914c754d3134a3032601c6984db1c576e6abaf3fc68094bb8ab1379d75ff92", size = 145990, upload-time = "2026-03-06T02:53:57.456Z" },
+ { url = "https://files.pythonhosted.org/packages/68/a7/fd371b02e73babec1de6ade596e8cd9691051058cfdadbfd62a5898f3295/wrapt-2.1.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:ff95d4264e55839be37bafe1536db2ab2de19da6b65f9244f01f332b5286cfbf", size = 155670, upload-time = "2026-03-06T02:54:55.309Z" },
+ { url = "https://files.pythonhosted.org/packages/86/2d/9fe0095dfdb621009f40117dcebf41d7396c2c22dca6eac779f4c007b86c/wrapt-2.1.2-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:76405518ca4e1b76fbb1b9f686cff93aebae03920cc55ceeec48ff9f719c5f67", size = 144357, upload-time = "2026-03-06T02:54:24.092Z" },
+ { url = "https://files.pythonhosted.org/packages/0e/b6/ec7b4a254abbe4cde9fa15c5d2cca4518f6b07d0f1b77d4ee9655e30280e/wrapt-2.1.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c0be8b5a74c5824e9359b53e7e58bef71a729bacc82e16587db1c4ebc91f7c5a", size = 150269, upload-time = "2026-03-06T02:53:31.268Z" },
+ { url = "https://files.pythonhosted.org/packages/6e/6b/2fabe8ebf148f4ee3c782aae86a795cc68ffe7d432ef550f234025ce0cfa/wrapt-2.1.2-cp313-cp313t-win32.whl", hash = "sha256:f01277d9a5fc1862f26f7626da9cf443bebc0abd2f303f41c5e995b15887dabd", size = 59894, upload-time = "2026-03-06T02:54:15.391Z" },
+ { url = "https://files.pythonhosted.org/packages/ca/fb/9ba66fc2dedc936de5f8073c0217b5d4484e966d87723415cc8262c5d9c2/wrapt-2.1.2-cp313-cp313t-win_amd64.whl", hash = "sha256:84ce8f1c2104d2f6daa912b1b5b039f331febfeee74f8042ad4e04992bd95c8f", size = 63197, upload-time = "2026-03-06T02:54:41.943Z" },
+ { url = "https://files.pythonhosted.org/packages/c0/1c/012d7423c95d0e337117723eb8ecf73c622ce15a97847e84cf3f8f26cd7e/wrapt-2.1.2-cp313-cp313t-win_arm64.whl", hash = "sha256:a93cd767e37faeddbe07d8fc4212d5cba660af59bdb0f6372c93faaa13e6e679", size = 60363, upload-time = "2026-03-06T02:54:48.093Z" },
+ { url = "https://files.pythonhosted.org/packages/39/25/e7ea0b417db02bb796182a5316398a75792cd9a22528783d868755e1f669/wrapt-2.1.2-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:1370e516598854e5b4366e09ce81e08bfe94d42b0fd569b88ec46cc56d9164a9", size = 61418, upload-time = "2026-03-06T02:53:55.706Z" },
+ { url = "https://files.pythonhosted.org/packages/ec/0f/fa539e2f6a770249907757eaeb9a5ff4deb41c026f8466c1c6d799088a9b/wrapt-2.1.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:6de1a3851c27e0bd6a04ca993ea6f80fc53e6c742ee1601f486c08e9f9b900a9", size = 61914, upload-time = "2026-03-06T02:52:53.37Z" },
+ { url = "https://files.pythonhosted.org/packages/53/37/02af1867f5b1441aaeda9c82deed061b7cd1372572ddcd717f6df90b5e93/wrapt-2.1.2-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:de9f1a2bbc5ac7f6012ec24525bdd444765a2ff64b5985ac6e0692144838542e", size = 120417, upload-time = "2026-03-06T02:54:30.74Z" },
+ { url = "https://files.pythonhosted.org/packages/c3/b7/0138a6238c8ba7476c77cf786a807f871672b37f37a422970342308276e7/wrapt-2.1.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:970d57ed83fa040d8b20c52fe74a6ae7e3775ae8cff5efd6a81e06b19078484c", size = 122797, upload-time = "2026-03-06T02:54:51.539Z" },
+ { url = "https://files.pythonhosted.org/packages/e1/ad/819ae558036d6a15b7ed290d5b14e209ca795dd4da9c58e50c067d5927b0/wrapt-2.1.2-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3969c56e4563c375861c8df14fa55146e81ac11c8db49ea6fb7f2ba58bc1ff9a", size = 117350, upload-time = "2026-03-06T02:54:37.651Z" },
+ { url = "https://files.pythonhosted.org/packages/8b/2d/afc18dc57a4600a6e594f77a9ae09db54f55ba455440a54886694a84c71b/wrapt-2.1.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:57d7c0c980abdc5f1d98b11a2aa3bb159790add80258c717fa49a99921456d90", size = 121223, upload-time = "2026-03-06T02:54:35.221Z" },
+ { url = "https://files.pythonhosted.org/packages/b9/5b/5ec189b22205697bc56eb3b62aed87a1e0423e9c8285d0781c7a83170d15/wrapt-2.1.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:776867878e83130c7a04237010463372e877c1c994d449ca6aaafeab6aab2586", size = 116287, upload-time = "2026-03-06T02:54:19.654Z" },
+ { url = "https://files.pythonhosted.org/packages/f7/2d/f84939a7c9b5e6cdd8a8d0f6a26cabf36a0f7e468b967720e8b0cd2bdf69/wrapt-2.1.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:fab036efe5464ec3291411fabb80a7a39e2dd80bae9bcbeeca5087fdfa891e19", size = 119593, upload-time = "2026-03-06T02:54:16.697Z" },
+ { url = "https://files.pythonhosted.org/packages/0b/fe/ccd22a1263159c4ac811ab9374c061bcb4a702773f6e06e38de5f81a1bdc/wrapt-2.1.2-cp314-cp314-win32.whl", hash = "sha256:e6ed62c82ddf58d001096ae84ce7f833db97ae2263bff31c9b336ba8cfe3f508", size = 58631, upload-time = "2026-03-06T02:53:06.498Z" },
+ { url = "https://files.pythonhosted.org/packages/65/0a/6bd83be7bff2e7efaac7b4ac9748da9d75a34634bbbbc8ad077d527146df/wrapt-2.1.2-cp314-cp314-win_amd64.whl", hash = "sha256:467e7c76315390331c67073073d00662015bb730c566820c9ca9b54e4d67fd04", size = 60875, upload-time = "2026-03-06T02:53:50.252Z" },
+ { url = "https://files.pythonhosted.org/packages/6c/c0/0b3056397fe02ff80e5a5d72d627c11eb885d1ca78e71b1a5c1e8c7d45de/wrapt-2.1.2-cp314-cp314-win_arm64.whl", hash = "sha256:da1f00a557c66225d53b095a97eace0fc5349e3bfda28fa34ffae238978ee575", size = 59164, upload-time = "2026-03-06T02:53:59.128Z" },
+ { url = "https://files.pythonhosted.org/packages/71/ed/5d89c798741993b2371396eb9d4634f009ff1ad8a6c78d366fe2883ea7a6/wrapt-2.1.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:62503ffbc2d3a69891cf29beeaccdb4d5e0a126e2b6a851688d4777e01428dbb", size = 63163, upload-time = "2026-03-06T02:52:54.873Z" },
+ { url = "https://files.pythonhosted.org/packages/c6/8c/05d277d182bf36b0a13d6bd393ed1dec3468a25b59d01fba2dd70fe4d6ae/wrapt-2.1.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c7e6cd120ef837d5b6f860a6ea3745f8763805c418bb2f12eeb1fa6e25f22d22", size = 63723, upload-time = "2026-03-06T02:52:56.374Z" },
+ { url = "https://files.pythonhosted.org/packages/f4/27/6c51ec1eff4413c57e72d6106bb8dec6f0c7cdba6503d78f0fa98767bcc9/wrapt-2.1.2-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:3769a77df8e756d65fbc050333f423c01ae012b4f6731aaf70cf2bef61b34596", size = 152652, upload-time = "2026-03-06T02:53:23.79Z" },
+ { url = "https://files.pythonhosted.org/packages/db/4c/d7dd662d6963fc7335bfe29d512b02b71cdfa23eeca7ab3ac74a67505deb/wrapt-2.1.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a76d61a2e851996150ba0f80582dd92a870643fa481f3b3846f229de88caf044", size = 158807, upload-time = "2026-03-06T02:53:35.742Z" },
+ { url = "https://files.pythonhosted.org/packages/b4/4d/1e5eea1a78d539d346765727422976676615814029522c76b87a95f6bcdd/wrapt-2.1.2-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:6f97edc9842cf215312b75fe737ee7c8adda75a89979f8e11558dfff6343cc4b", size = 146061, upload-time = "2026-03-06T02:52:57.574Z" },
+ { url = "https://files.pythonhosted.org/packages/89/bc/62cabea7695cd12a288023251eeefdcb8465056ddaab6227cb78a2de005b/wrapt-2.1.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:4006c351de6d5007aa33a551f600404ba44228a89e833d2fadc5caa5de8edfbf", size = 155667, upload-time = "2026-03-06T02:53:39.422Z" },
+ { url = "https://files.pythonhosted.org/packages/e9/99/6f2888cd68588f24df3a76572c69c2de28287acb9e1972bf0c83ce97dbc1/wrapt-2.1.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:a9372fc3639a878c8e7d87e1556fa209091b0a66e912c611e3f833e2c4202be2", size = 144392, upload-time = "2026-03-06T02:54:22.41Z" },
+ { url = "https://files.pythonhosted.org/packages/40/51/1dfc783a6c57971614c48e361a82ca3b6da9055879952587bc99fe1a7171/wrapt-2.1.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3144b027ff30cbd2fca07c0a87e67011adb717eb5f5bd8496325c17e454257a3", size = 150296, upload-time = "2026-03-06T02:54:07.848Z" },
+ { url = "https://files.pythonhosted.org/packages/6c/38/cbb8b933a0201076c1f64fc42883b0023002bdc14a4964219154e6ff3350/wrapt-2.1.2-cp314-cp314t-win32.whl", hash = "sha256:3b8d15e52e195813efe5db8cec156eebe339aaf84222f4f4f051a6c01f237ed7", size = 60539, upload-time = "2026-03-06T02:54:00.594Z" },
+ { url = "https://files.pythonhosted.org/packages/82/dd/e5176e4b241c9f528402cebb238a36785a628179d7d8b71091154b3e4c9e/wrapt-2.1.2-cp314-cp314t-win_amd64.whl", hash = "sha256:08ffa54146a7559f5b8df4b289b46d963a8e74ed16ba3687f99896101a3990c5", size = 63969, upload-time = "2026-03-06T02:54:39Z" },
+ { url = "https://files.pythonhosted.org/packages/5c/99/79f17046cf67e4a95b9987ea129632ba8bcec0bc81f3fb3d19bdb0bd60cd/wrapt-2.1.2-cp314-cp314t-win_arm64.whl", hash = "sha256:72aaa9d0d8e4ed0e2e98019cea47a21f823c9dd4b43c7b77bba6679ffcca6a00", size = 60554, upload-time = "2026-03-06T02:53:14.132Z" },
+ { url = "https://files.pythonhosted.org/packages/1a/c7/8528ac2dfa2c1e6708f647df7ae144ead13f0a31146f43c7264b4942bf12/wrapt-2.1.2-py3-none-any.whl", hash = "sha256:b8fd6fa2b2c4e7621808f8c62e8317f4aae56e59721ad933bac5239d913cf0e8", size = 43993, upload-time = "2026-03-06T02:53:12.905Z" },
+]
+
[[package]]
name = "xmltodict"
version = "1.0.4"
diff --git a/app-expo/.env.example b/app-expo/.env.example
index f525d74..f952c6c 100644
--- a/app-expo/.env.example
+++ b/app-expo/.env.example
@@ -20,8 +20,12 @@
# EXPO_PUBLIC_API_URL=http://127.0.0.1:8000
# EXPO_PUBLIC_WS_URL=ws://127.0.0.1:8000
-# --- staging ---
+# --- staging(必填,无默认值;示例见 env/staging)---
# APP_VARIANT=staging
# EXPO_PUBLIC_APP_VARIANT=staging
-EXPO_PUBLIC_API_URL=https://your-api.example.com
-EXPO_PUBLIC_WS_URL=wss://your-api.example.com
+# EXPO_PUBLIC_API_URL=http://your-staging-host:8000
+# EXPO_PUBLIC_WS_URL=ws://your-staging-host:8000
+
+# --- production ---
+# EXPO_PUBLIC_API_URL=https://your-api.example.com
+# EXPO_PUBLIC_WS_URL=wss://your-api.example.com
diff --git a/app-expo/app.config.ts b/app-expo/app.config.ts
index 3931de8..602cfe1 100644
--- a/app-expo/app.config.ts
+++ b/app-expo/app.config.ts
@@ -28,7 +28,16 @@ const LOCALES: Record = {
const SUPPORTED_LOCALES = ['zh', 'en'] as const;
const PRIMARY_LOCALE = process.env.EXPO_PUBLIC_PRIMARY_LOCALE ?? 'zh';
-const API_BASE_URL = process.env.EXPO_PUBLIC_API_URL ?? '';
+const API_BASE_URL = process.env.EXPO_PUBLIC_API_URL?.trim() ?? '';
+const WS_BASE_URL = process.env.EXPO_PUBLIC_WS_URL?.trim() ?? '';
+
+if (!API_BASE_URL || !WS_BASE_URL) {
+ throw new Error(
+ '[app.config] Missing EXPO_PUBLIC_API_URL or EXPO_PUBLIC_WS_URL. ' +
+ 'Run `npm run use-env -- ` in app-expo before prebuild or Metro.',
+ );
+}
+
const ALLOW_INSECURE_HTTP = API_BASE_URL.startsWith('http://');
const APP_VARIANT =
@@ -176,7 +185,14 @@ export default ({ config }: ConfigContext): ExpoConfig => {
'./plugins/withAndroidCleartextTraffic',
{ enabled: ALLOW_INSECURE_HTTP },
],
- ['./plugins/withIosInsecureHttp', { enabled: ALLOW_INSECURE_HTTP }],
+ [
+ './plugins/withIosInsecureHttp',
+ {
+ enabled: ALLOW_INSECURE_HTTP,
+ apiUrl: API_BASE_URL,
+ wsUrl: WS_BASE_URL,
+ },
+ ],
'expo-router',
[
'expo-splash-screen',
diff --git a/app-expo/jest.config.js b/app-expo/jest.config.js
index 1082f91..7db9baa 100644
--- a/app-expo/jest.config.js
+++ b/app-expo/jest.config.js
@@ -1,5 +1,6 @@
module.exports = {
preset: 'jest-expo',
+ setupFiles: ['/tests/jest.setup.ts'],
clearMocks: true,
moduleNameMapper: {
'^@/(.*)$': '/src/$1',
diff --git a/app-expo/plugins/withIosInsecureHttp.js b/app-expo/plugins/withIosInsecureHttp.js
index 9e9810c..1203de5 100644
--- a/app-expo/plugins/withIosInsecureHttp.js
+++ b/app-expo/plugins/withIosInsecureHttp.js
@@ -1,43 +1,81 @@
// @ts-check
/**
- * Allow HTTP / WS to staging API host via App Transport Security exception.
+ * Allow HTTP / WS to staging API hosts via App Transport Security.
*
* Enabled when EXPO_PUBLIC_API_URL uses http:// (same rule as Android cleartext).
- * Host is parsed from the URL so IP:port staging endpoints work without hard-coding.
+ * Collects hosts from both API and WS URLs (IP:port staging often differs only by scheme).
*/
const { withInfoPlist } = require('@expo/config-plugins');
/**
+ * @param {string | undefined} raw
* @returns {string | null}
*/
-function getHttpExceptionHost() {
- const raw = process.env.EXPO_PUBLIC_API_URL ?? '';
- if (!raw.startsWith('http://')) {
+function insecureHttpHostFromUrl(raw) {
+ if (!raw || !raw.startsWith('http://')) {
return null;
}
try {
- return new URL(raw).hostname;
+ return new URL(raw).hostname || null;
} catch {
return null;
}
}
+/**
+ * @param {string | undefined} raw
+ * @returns {string | null}
+ */
+function insecureWsHostFromUrl(raw) {
+ if (!raw || !raw.startsWith('ws://')) {
+ return null;
+ }
+ try {
+ return new URL(raw).hostname || null;
+ } catch {
+ return null;
+ }
+}
+
+/**
+ * @param {string | undefined} apiUrl
+ * @param {string | undefined} wsUrl
+ * @returns {string[]}
+ */
+function collectInsecureHosts(apiUrl, wsUrl) {
+ const hosts = new Set(
+ [insecureHttpHostFromUrl(apiUrl), insecureWsHostFromUrl(wsUrl)].filter(
+ (h) => typeof h === 'string' && h.length > 0,
+ ),
+ );
+ return [...hosts];
+}
+
+/**
+ * @param {string} host
+ */
+function isIpv4Literal(host) {
+ return /^\d{1,3}(\.\d{1,3}){3}$/u.test(host);
+}
+
/**
* @param {import('expo/config').ExpoConfig} config
- * @param {{ enabled?: boolean }} props
+ * @param {{ enabled?: boolean; apiUrl?: string; wsUrl?: string }} props
*/
function withIosInsecureHttp(config, props = {}) {
const enabled = props.enabled ?? false;
+ const apiUrl = props.apiUrl ?? process.env.EXPO_PUBLIC_API_URL ?? '';
+ const wsUrl = props.wsUrl ?? process.env.EXPO_PUBLIC_WS_URL ?? '';
return withInfoPlist(config, (mod) => {
if (!enabled) {
return mod;
}
- const host = getHttpExceptionHost();
- if (!host) {
+ const hosts = collectInsecureHosts(apiUrl, wsUrl);
+ if (hosts.length === 0) {
console.warn(
- '[withIosInsecureHttp] enabled but EXPO_PUBLIC_API_URL has no http host; skipping ATS exception.',
+ '[withIosInsecureHttp] enabled but no http/ws hosts found in apiUrl/wsUrl; skipping ATS exception.',
);
return mod;
}
@@ -45,17 +83,32 @@ function withIosInsecureHttp(config, props = {}) {
const existing = mod.modResults.NSAppTransportSecurity ?? {};
const existingDomains = existing.NSExceptionDomains ?? {};
+ /** @type {Record} */
+ const exceptionDomains = { ...existingDomains };
+
+ for (const host of hosts) {
+ exceptionDomains[host] = {
+ NSExceptionAllowsInsecureHTTPLoads: true,
+ // IP literals have no subdomains; false avoids odd ATS behavior on some iOS versions.
+ NSIncludesSubdomains: !isIpv4Literal(host),
+ NSExceptionRequiresForwardSecrecy: false,
+ };
+ }
+
mod.modResults.NSAppTransportSecurity = {
...existing,
- NSExceptionDomains: {
- ...existingDomains,
- [host]: {
- NSExceptionAllowsInsecureHTTPLoads: true,
- NSIncludesSubdomains: true,
- },
- },
+ /**
+ * Staging often uses bare IP:port HTTP. Domain exceptions alone can fail on
+ * newer iOS builds; allow cleartext while this plugin is enabled (http:// API only).
+ */
+ NSAllowsArbitraryLoads: true,
+ NSExceptionDomains: exceptionDomains,
};
+ console.log(
+ `[withIosInsecureHttp] ATS cleartext enabled for host(s): ${hosts.join(', ')}`,
+ );
+
return mod;
});
}
diff --git a/app-expo/src/app/(tabs)/memoir.tsx b/app-expo/src/app/(tabs)/memoir.tsx
index 1b6045e..266ce29 100644
--- a/app-expo/src/app/(tabs)/memoir.tsx
+++ b/app-expo/src/app/(tabs)/memoir.tsx
@@ -1,5 +1,5 @@
import { Image } from 'expo-image';
-import { router } from 'expo-router';
+import { router, useFocusEffect } from 'expo-router';
import React, {
useCallback,
useEffect,
@@ -17,22 +17,26 @@ import {
} from 'react-native';
import { SafeAreaView } from 'react-native-safe-area-context';
import { useTranslation } from 'react-i18next';
-import { FileText } from 'lucide-react-native';
+import { FileText, MessageCirclePlus } from 'lucide-react-native';
import { Icon } from '@/components/ui/icon';
import { Skeleton } from '@/components/ui/skeleton';
import { Text } from '@/components/ui/text';
import { ScreenGutter } from '@/constants/layout';
+import { ApiError, NetworkError } from '@/core/api/types';
+import { config, shouldShowAboutBackendUrl } from '@/core/config';
import { useTypography } from '@/core/typography-context';
import {
buildFrameworkChapterPlaceholders,
mergeFrameworkChaptersWithFetched,
} from '@/features/memoir/framework-chapter-keys';
import {
+ hasAnyMemoirDraftingActivity,
memoirDraftCharsRemaining,
memoirDraftHasStarted,
resolvedChapterCategory,
} from '@/features/memoir/draft-progress';
+import { useSession } from '@/features/auth/hooks';
import {
useChapters,
useCheckCoverGeneration,
@@ -286,13 +290,41 @@ function ChapterCard({
return null;
}
-function MemoirLoadError({ onRetry }: { onRetry: () => void }) {
+function formatChapterLoadErrorHint(error: unknown): string | null {
+ if (!shouldShowAboutBackendUrl()) return null;
+ if (error instanceof NetworkError) {
+ return `${error.message}\n${config.apiBaseUrl}`;
+ }
+ if (error instanceof ApiError) {
+ return `HTTP ${error.status}: ${error.message}`;
+ }
+ if (error instanceof Error) return error.message;
+ return null;
+}
+
+function MemoirLoadError({
+ error,
+ onRetry,
+}: {
+ error: unknown;
+ onRetry: () => void;
+}) {
const { t } = useTranslation('memoir');
+ const hint = formatChapterLoadErrorHint(error);
return (
{t('loadErrorMessage')}
+ {hint ? (
+
+ {hint}
+
+ ) : null}
void }) {
);
}
+function MemoirEmptyState({ onStartChat }: { onStartChat: () => void }) {
+ const { t } = useTranslation('memoir');
+ return (
+
+
+
+
+ {t('emptyTitle')}
+
+
+ {t('emptySubtitle')}
+
+
+
+ );
+}
+
export default function MemoirScreen() {
const { t } = useTranslation('memoir');
- const { viewModels: chapters, isLoading, isError, refetch } = useChapters();
- const { data: memoirState, refetch: refetchMemoirState } = useMemoirState();
+ const { isAuthenticated } = useSession();
+ const {
+ viewModels: chapters,
+ isLoading,
+ hasCompletedChapters,
+ isEmptyList,
+ showLoadError,
+ error: chaptersError,
+ refetch,
+ } = useChapters({ enabled: isAuthenticated });
+ const {
+ data: memoirState,
+ isLoading: isMemoirStateLoading,
+ refetch: refetchMemoirState,
+ } = useMemoirState({ enabled: isAuthenticated });
const checkCover = useCheckCoverGeneration();
const [refreshing, setRefreshing] = useState(false);
const didRunInitialCoverCheckRef = useRef(false);
@@ -327,6 +396,29 @@ export default function MemoirScreen() {
[frameworkPlaceholders, chapters],
);
+ const hasDraftingActivity = useMemo(() => {
+ if (hasCompletedChapters) return true;
+ if (chapters.some((ch) => !ch.isEmpty || ch.wordCount > 0)) return true;
+ return hasAnyMemoirDraftingActivity(memoirState?.slots);
+ }, [chapters, hasCompletedChapters, memoirState?.slots]);
+
+ const isBootstrapping =
+ isLoading || (isEmptyList && isMemoirStateLoading);
+
+ const isEmptyMemoir =
+ !isBootstrapping &&
+ !showLoadError &&
+ isEmptyList &&
+ !hasDraftingActivity;
+
+ useFocusEffect(
+ useCallback(() => {
+ if (!isAuthenticated) return;
+ void refetch();
+ void refetchMemoirState();
+ }, [isAuthenticated, refetch, refetchMemoirState]),
+ );
+
useEffect(() => {
if (didRunInitialCoverCheckRef.current) return;
didRunInitialCoverCheckRef.current = true;
@@ -336,7 +428,7 @@ export default function MemoirScreen() {
const handleRefresh = useCallback(async () => {
setRefreshing(true);
try {
- await checkCover.mutateAsync(undefined);
+ await checkCover.mutateAsync(undefined).catch(() => undefined);
await Promise.all([refetch(), refetchMemoirState()]);
} finally {
setRefreshing(false);
@@ -347,6 +439,10 @@ export default function MemoirScreen() {
router.push(`/(main)/chapter/${chapterId}`);
}, []);
+ const handleStartChat = useCallback(() => {
+ router.push('/(tabs)');
+ }, []);
+
return (
@@ -361,19 +457,27 @@ export default function MemoirScreen() {
paddingTop: 24,
paddingBottom: 96,
gap: 24,
- ...(!isLoading && isError
+ ...(!isBootstrapping && (showLoadError || isEmptyMemoir)
? { flexGrow: 1, justifyContent: 'center' }
: {}),
}}
>
- {isLoading ? (
+ {isBootstrapping ? (
<>
>
- ) : isError ? (
- void refetch()} />
+ ) : showLoadError ? (
+ {
+ void refetch();
+ void refetchMemoirState();
+ }}
+ />
+ ) : isEmptyMemoir ? (
+
) : (
displayChapters.map((item) => {
const variant = getChapterVariant(item);
diff --git a/app-expo/src/core/config.ts b/app-expo/src/core/config.ts
index f2349e6..7fc0f50 100644
--- a/app-expo/src/core/config.ts
+++ b/app-expo/src/core/config.ts
@@ -4,6 +4,57 @@ function trimTrailingSlashes(value: string): string {
export type AppVariant = 'development' | 'staging' | 'production';
+const MISSING_ENV_HINT =
+ 'Run `npm run use-env -- ` in app-expo, ' +
+ 'then restart Metro or re-run `expo prebuild` before building.';
+
+/**
+ * EXPO_PUBLIC_* must be set at bundle time (Metro / EAS / Xcode Archive).
+ * Refuses silent fallbacks to a hard-coded LAN IP.
+ */
+export function requirePublicEnv(name: string): string {
+ const value = process.env[name]?.trim();
+ if (!value) {
+ throw new Error(`[config] Missing ${name}. ${MISSING_ENV_HINT}`);
+ }
+ return value;
+}
+
+function parseBackendUrl(raw: string, envName: string): URL {
+ let parsed: URL;
+ try {
+ parsed = new URL(raw);
+ } catch {
+ throw new Error(`[config] Invalid ${envName}: ${raw}`);
+ }
+ if (!parsed.protocol || parsed.protocol === ':') {
+ throw new Error(`[config] ${envName} must include a scheme (http/https or ws/wss): ${raw}`);
+ }
+ return parsed;
+}
+
+function resolveApiBaseUrl(): string {
+ const raw = requirePublicEnv('EXPO_PUBLIC_API_URL');
+ const parsed = parseBackendUrl(raw, 'EXPO_PUBLIC_API_URL');
+ if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {
+ throw new Error(
+ `[config] EXPO_PUBLIC_API_URL must use http:// or https:// (got ${parsed.protocol})`,
+ );
+ }
+ return trimTrailingSlashes(raw);
+}
+
+function resolveWsBaseUrl(): string {
+ const raw = requirePublicEnv('EXPO_PUBLIC_WS_URL');
+ const parsed = parseBackendUrl(raw, 'EXPO_PUBLIC_WS_URL');
+ if (parsed.protocol !== 'ws:' && parsed.protocol !== 'wss:') {
+ throw new Error(
+ `[config] EXPO_PUBLIC_WS_URL must use ws:// or wss:// (got ${parsed.protocol})`,
+ );
+ }
+ return trimTrailingSlashes(raw);
+}
+
function resolveAppVariant(): AppVariant {
const raw = process.env.EXPO_PUBLIC_APP_VARIANT;
if (raw === 'development' || raw === 'staging' || raw === 'production') {
@@ -33,12 +84,8 @@ export function shouldShowAboutBackendUrl(variant: AppVariant = appVariant): boo
export const appVariant = resolveAppVariant();
export const config = {
- apiBaseUrl: trimTrailingSlashes(
- process.env.EXPO_PUBLIC_API_URL ?? 'http://192.168.10.151:8000',
- ),
- wsBaseUrl: trimTrailingSlashes(
- process.env.EXPO_PUBLIC_WS_URL ?? 'ws://192.168.10.151:8000',
- ),
+ apiBaseUrl: resolveApiBaseUrl(),
+ wsBaseUrl: resolveWsBaseUrl(),
isDebugMode: __DEV__,
appVariant,
showAboutBackendUrl: shouldShowAboutBackendUrl(),
diff --git a/app-expo/src/features/auth/hooks.ts b/app-expo/src/features/auth/hooks.ts
index b6cfefc..7c06a4e 100644
--- a/app-expo/src/features/auth/hooks.ts
+++ b/app-expo/src/features/auth/hooks.ts
@@ -6,6 +6,8 @@ import { tokenManager } from '@/core/auth/token-manager';
import { clearLocalSessionAndReplayEntry } from '@/features/auth/clear-local-session-and-replay-entry';
import { getDeviceLanguage } from '@/i18n';
+import { memoirKeys } from '@/features/memoir/query-keys';
+
import { authApi } from './api';
import { authKeys } from './auth-query-keys';
import type {
@@ -126,7 +128,10 @@ function usePostAuthSetup() {
async (tokens: TokenResponse) => {
await tokenManager.setTokens(tokens.access_token, tokens.refresh_token);
queryClient.setQueryData(authKeys.tokenCheck, true);
- await queryClient.invalidateQueries({ queryKey: authKeys.session });
+ await Promise.all([
+ queryClient.invalidateQueries({ queryKey: authKeys.session }),
+ queryClient.invalidateQueries({ queryKey: memoirKeys.all }),
+ ]);
},
[queryClient],
);
diff --git a/app-expo/src/features/memoir/api.ts b/app-expo/src/features/memoir/api.ts
index 82266e9..d054e07 100644
--- a/app-expo/src/features/memoir/api.ts
+++ b/app-expo/src/features/memoir/api.ts
@@ -1,5 +1,9 @@
import { api } from '@/core/api/client';
+import {
+ isChapterListNotFoundError,
+ normalizeChapterList,
+} from './chapter-list-response';
import type {
Book,
Chapter,
@@ -32,10 +36,18 @@ export const memoirApi = {
return api.post('/api/books/export-pdf', { body });
},
- fetchChapters(isNew?: boolean) {
- return api.get('/api/chapters', {
- params: isNew !== undefined ? { is_new: isNew } : undefined,
- });
+ async fetchChapters(isNew?: boolean): Promise {
+ try {
+ const data = await api.get('/api/chapters', {
+ params: isNew !== undefined ? { is_new: isNew } : undefined,
+ });
+ return normalizeChapterList(data);
+ } catch (error) {
+ if (isChapterListNotFoundError(error)) {
+ return [];
+ }
+ throw error;
+ }
},
fetchChapterDetail(chapterId: string) {
diff --git a/app-expo/src/features/memoir/chapter-list-response.ts b/app-expo/src/features/memoir/chapter-list-response.ts
new file mode 100644
index 0000000..980d760
--- /dev/null
+++ b/app-expo/src/features/memoir/chapter-list-response.ts
@@ -0,0 +1,47 @@
+import { ApiError, AuthError } from '@/core/api/types';
+
+import type { Chapter } from './types';
+
+/** Normalize GET /api/chapters payload; reject non-arrays without surfacing as query errors. */
+export function normalizeChapterList(data: unknown): Chapter[] {
+ if (data == null) return [];
+ if (Array.isArray(data)) return data as Chapter[];
+ return [];
+}
+
+export function isChapterListNotFoundError(error: unknown): boolean {
+ return error instanceof ApiError && error.status === 404;
+}
+
+/** 未登录/无权限:不应展示「加载章节失败」(会话层会处理或展示框架位)。 */
+export function isChapterListAuthError(error: unknown): boolean {
+ if (error instanceof AuthError) return true;
+ return (
+ error instanceof ApiError &&
+ (error.status === 401 || error.status === 403)
+ );
+}
+
+/**
+ * True when GET /api/chapters succeeded but there are no list items (incl. filtered
+ * non-displayable chapters). Distinct from transport/auth failures.
+ */
+export function isChapterListEmptySuccess(
+ isSuccess: boolean,
+ chapters: Chapter[],
+): boolean {
+ return isSuccess && chapters.length === 0;
+}
+
+/** Only show "Could not load chapters" for real failures, not empty memoir or auth redirect. */
+export function shouldShowChapterListLoadError(
+ error: unknown,
+ isSuccess: boolean,
+ chapterCount: number,
+): boolean {
+ if (isSuccess && chapterCount === 0) return false;
+ if (error == null) return false;
+ if (isChapterListNotFoundError(error)) return false;
+ if (isChapterListAuthError(error)) return false;
+ return true;
+}
diff --git a/app-expo/src/features/memoir/draft-progress.ts b/app-expo/src/features/memoir/draft-progress.ts
index 34d04ff..d74fe83 100644
--- a/app-expo/src/features/memoir/draft-progress.ts
+++ b/app-expo/src/features/memoir/draft-progress.ts
@@ -37,6 +37,16 @@ export function interviewStageHasSnippetMaterial(
);
}
+/** 访谈槽位是否已有任意口述片段(尚无成稿章节时仍视为「进行中」)。 */
+export function hasAnyMemoirDraftingActivity(
+ slots: MemoirState['slots'] | undefined,
+): boolean {
+ if (!slots) return false;
+ return Object.keys(slots).some((stage) =>
+ interviewStageHasSnippetMaterial(slots, stage),
+ );
+}
+
export function memoirDraftHasStarted(
slots: MemoirState['slots'] | undefined,
chapterCategory: string,
diff --git a/app-expo/src/features/memoir/hooks.ts b/app-expo/src/features/memoir/hooks.ts
index 1972816..c55667e 100644
--- a/app-expo/src/features/memoir/hooks.ts
+++ b/app-expo/src/features/memoir/hooks.ts
@@ -1,6 +1,9 @@
import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query';
+import { AuthError } from '@/core/api/types';
+
import { memoirApi } from './api';
+import { shouldShowChapterListLoadError } from './chapter-list-response';
import { toChapterViewModels } from './mappers';
import { memoirKeys } from './query-keys';
import type { ExportPdfRequest, UpdateBookRequest } from './types';
@@ -38,15 +41,42 @@ export function useUpdateBookTitle() {
// ─── Chapters ───
-export function useChapters() {
+export function hasCompletedMemoirChapter(
+ chapters: { isEmpty: boolean }[],
+): boolean {
+ return chapters.some((ch) => !ch.isEmpty);
+}
+
+export function useChapters(options?: { enabled?: boolean }) {
+ const enabled = options?.enabled ?? true;
const query = useQuery({
queryKey: memoirKeys.chapters(),
queryFn: () => memoirApi.fetchChapters(),
+ enabled,
+ retry: (failureCount, error) => {
+ if (error instanceof AuthError) return false;
+ return failureCount < 1;
+ },
});
+ const viewModels = query.data ? toChapterViewModels(query.data) : [];
+ const hasCompletedChapters = hasCompletedMemoirChapter(viewModels);
+ const isEmptyList =
+ query.isSuccess && viewModels.length === 0 && !hasCompletedChapters;
+ const showLoadError =
+ !query.isLoading &&
+ shouldShowChapterListLoadError(
+ query.error,
+ query.isSuccess,
+ viewModels.length,
+ );
+
return {
...query,
- viewModels: query.data ? toChapterViewModels(query.data) : [],
+ viewModels,
+ hasCompletedChapters,
+ isEmptyList,
+ showLoadError,
};
}
@@ -84,10 +114,12 @@ export function useCheckCoverGeneration() {
// ─── Memoir state ───
-export function useMemoirState() {
+export function useMemoirState(options?: { enabled?: boolean }) {
+ const enabled = options?.enabled ?? true;
return useQuery({
queryKey: memoirKeys.state(),
queryFn: () => memoirApi.fetchMemoirState(),
+ enabled,
});
}
diff --git a/app-expo/tests/core/config.test.ts b/app-expo/tests/core/config.test.ts
index 6cffd7e..0e74766 100644
--- a/app-expo/tests/core/config.test.ts
+++ b/app-expo/tests/core/config.test.ts
@@ -1,10 +1,37 @@
import {
appVariant,
config,
+ requirePublicEnv,
shouldShowAboutBackendUrl,
type AppVariant,
} from '@/core/config';
+describe('requirePublicEnv', () => {
+ it('throws when variable is missing or blank', () => {
+ const key = 'EXPO_PUBLIC_API_URL';
+ const previous = process.env[key];
+ try {
+ delete process.env[key];
+ expect(() => requirePublicEnv(key)).toThrow(/Missing EXPO_PUBLIC_API_URL/);
+ process.env[key] = ' ';
+ expect(() => requirePublicEnv(key)).toThrow(/Missing EXPO_PUBLIC_API_URL/);
+ } finally {
+ if (previous === undefined) {
+ process.env[key] = 'http://127.0.0.1:8000';
+ } else {
+ process.env[key] = previous;
+ }
+ }
+ });
+});
+
+describe('config backend URLs', () => {
+ it('loads API and WS from EXPO_PUBLIC_* (jest.setup defaults)', () => {
+ expect(config.apiBaseUrl).toBe('http://127.0.0.1:8000');
+ expect(config.wsBaseUrl).toBe('ws://127.0.0.1:8000');
+ });
+});
+
describe('shouldShowAboutBackendUrl', () => {
it('shows backend URL for development and staging', () => {
expect(shouldShowAboutBackendUrl('development')).toBe(true);
diff --git a/app-expo/tests/features/memoir/chapter-list-response.test.ts b/app-expo/tests/features/memoir/chapter-list-response.test.ts
new file mode 100644
index 0000000..b5136da
--- /dev/null
+++ b/app-expo/tests/features/memoir/chapter-list-response.test.ts
@@ -0,0 +1,71 @@
+import { ApiError, AuthError, NetworkError } from '@/core/api/types';
+import {
+ isChapterListAuthError,
+ isChapterListEmptySuccess,
+ isChapterListNotFoundError,
+ normalizeChapterList,
+ shouldShowChapterListLoadError,
+} from '@/features/memoir/chapter-list-response';
+import type { Chapter } from '@/features/memoir/types';
+
+describe('normalizeChapterList', () => {
+ it('returns empty array for nullish or non-array payloads', () => {
+ expect(normalizeChapterList(null)).toEqual([]);
+ expect(normalizeChapterList(undefined)).toEqual([]);
+ expect(normalizeChapterList({ items: [] })).toEqual([]);
+ });
+
+ it('passes through chapter arrays', () => {
+ const chapters = [{ id: 'ch-1' }] as Chapter[];
+ expect(normalizeChapterList(chapters)).toBe(chapters);
+ });
+});
+
+describe('isChapterListNotFoundError', () => {
+ it('detects ApiError 404', () => {
+ expect(isChapterListNotFoundError(new ApiError('missing', 404))).toBe(true);
+ expect(isChapterListNotFoundError(new ApiError('bad', 500))).toBe(false);
+ expect(isChapterListNotFoundError(new Error('other'))).toBe(false);
+ });
+});
+
+describe('isChapterListEmptySuccess', () => {
+ it('is true only for successful empty arrays', () => {
+ expect(isChapterListEmptySuccess(true, [])).toBe(true);
+ expect(isChapterListEmptySuccess(true, [{ id: 'x' } as never])).toBe(
+ false,
+ );
+ expect(isChapterListEmptySuccess(false, [])).toBe(false);
+ });
+});
+
+describe('isChapterListAuthError', () => {
+ it('treats AuthError and 401/403 ApiError as auth errors', () => {
+ expect(isChapterListAuthError(new AuthError())).toBe(true);
+ expect(isChapterListAuthError(new ApiError('unauthorized', 401))).toBe(true);
+ expect(isChapterListAuthError(new ApiError('forbidden', 403))).toBe(true);
+ expect(isChapterListAuthError(new ApiError('server', 500))).toBe(false);
+ });
+});
+
+describe('shouldShowChapterListLoadError', () => {
+ it('hides load error for empty success, 404, and auth failures', () => {
+ expect(shouldShowChapterListLoadError(null, true, 0)).toBe(false);
+ expect(shouldShowChapterListLoadError(new ApiError('nope', 404), false, 0)).toBe(
+ false,
+ );
+ expect(shouldShowChapterListLoadError(new AuthError(), false, 0)).toBe(false);
+ expect(
+ shouldShowChapterListLoadError(new ApiError('unauthorized', 401), false, 0),
+ ).toBe(false);
+ });
+
+ it('shows load error for network and server failures', () => {
+ expect(
+ shouldShowChapterListLoadError(new NetworkError('offline'), false, 0),
+ ).toBe(true);
+ expect(
+ shouldShowChapterListLoadError(new ApiError('boom', 500), false, 0),
+ ).toBe(true);
+ });
+});
diff --git a/app-expo/tests/features/memoir/draft-progress.test.ts b/app-expo/tests/features/memoir/draft-progress.test.ts
index 7916f79..cabc32e 100644
--- a/app-expo/tests/features/memoir/draft-progress.test.ts
+++ b/app-expo/tests/features/memoir/draft-progress.test.ts
@@ -1,5 +1,6 @@
import {
chapterCategoryToInterviewStage,
+ hasAnyMemoirDraftingActivity,
memoirDraftCharsRemaining,
memoirDraftHasStarted,
MIN_CHAPTER_DISPLAY_CHARS,
@@ -23,6 +24,14 @@ describe('draft-progress', () => {
).toBe('career_early');
});
+ test('hasAnyMemoirDraftingActivity when any stage has snippet', () => {
+ const slots = {
+ childhood: { q1: { snippet: '小时候…', status: 'filled' } },
+ };
+ expect(hasAnyMemoirDraftingActivity(slots)).toBe(true);
+ expect(hasAnyMemoirDraftingActivity({})).toBe(false);
+ });
+
test('memoirDraftHasStarted when interview slots have snippet', () => {
const slots = {
childhood: { place: { snippet: '老家在小城', segment_ids: [] } },
diff --git a/app-expo/tests/jest.setup.ts b/app-expo/tests/jest.setup.ts
new file mode 100644
index 0000000..9179b0c
--- /dev/null
+++ b/app-expo/tests/jest.setup.ts
@@ -0,0 +1,9 @@
+/**
+ * Jest loads config at import time; EXPO_PUBLIC_* must be set before any @/core/config import.
+ */
+process.env.EXPO_PUBLIC_API_URL =
+ process.env.EXPO_PUBLIC_API_URL ?? 'http://127.0.0.1:8000';
+process.env.EXPO_PUBLIC_WS_URL =
+ process.env.EXPO_PUBLIC_WS_URL ?? 'ws://127.0.0.1:8000';
+process.env.EXPO_PUBLIC_APP_VARIANT =
+ process.env.EXPO_PUBLIC_APP_VARIANT ?? 'development';