feat(api): 收敛对话与记忆流程边界,引入 LLM 网关与专用服务

- MemoryService 异步路径委托 MemoryIngestService / MemoryRetrievalService;富化派发经 MemoryEnrichmentScheduler
- WebSocket pipeline 经 ChatTurnService 与显式 DTO 编排单轮对话;回忆录片段入队由 MemoirIngestScheduler 封装
- 新增 LlmGateway(LlmUseCase),各 agent、任务与适配器对齐 ports
- 补充 memory 提示适配、runtime 类型、memory-retrieval 文档、ai-touchpoints 说明与扫描脚本及配套测试

Made-with: Cursor
This commit is contained in:
Kevin
2026-04-30 09:17:01 +08:00
parent eddb2c3078
commit ac436b87a2
37 changed files with 1400 additions and 199 deletions

102
api/app/core/llm_gateway.py Normal file
View File

@@ -0,0 +1,102 @@
"""Use-case oriented LLM gateway.
This is a small compatibility layer over the existing provider and JSON helper
functions. It gives new code a stable place to request model capabilities while
older agents continue to use LangChain directly during the transition.
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Any, Callable, TypeVar
from pydantic import BaseModel
from app.core.dependencies import get_llm_provider, get_llm_provider_fast
from app.core.llm_call import allm_json_call, llm_json_call
T = TypeVar("T", bound=BaseModel)
@dataclass(frozen=True)
class LlmUseCase:
name: str
fast: bool = False
max_tokens: int | None = None
temperature: float | None = None
model: str | None = None
class LlmGateway:
"""Facade for text and JSON LLM calls."""
def provider_for(self, use_case: LlmUseCase | None = None):
if use_case and use_case.fast:
return get_llm_provider_fast()
return get_llm_provider()
def langchain_llm_for(self, use_case: LlmUseCase | None = None) -> Any | None:
provider = self.provider_for(use_case)
return getattr(provider, "langchain_llm", None)
async def chat_text(
self,
messages: list[dict],
*,
use_case: LlmUseCase | None = None,
temperature: float | None = None,
model: str | None = None,
max_tokens: int | None = None,
) -> str:
provider = self.provider_for(use_case)
return await provider.complete(
messages,
temperature=(
temperature
if temperature is not None
else (use_case.temperature if use_case else 0.7)
),
model=model if model is not None else (use_case.model if use_case else None),
max_tokens=(
max_tokens
if max_tokens is not None
else (use_case.max_tokens if use_case else None)
),
)
async def json_object(
self,
prompt: str,
schema: type[T],
*,
use_case: LlmUseCase,
fallback_factory: Callable[[], T] | None = None,
) -> T:
return await allm_json_call(
self.langchain_llm_for(use_case),
prompt,
schema,
max_tokens=use_case.max_tokens or 1024,
agent=use_case.name,
fallback_factory=fallback_factory,
)
def sync_json_object(
self,
prompt: str,
schema: type[T],
*,
use_case: LlmUseCase,
fallback_factory: Callable[[], T] | None = None,
) -> T:
return llm_json_call(
self.langchain_llm_for(use_case),
prompt,
schema,
max_tokens=use_case.max_tokens or 1024,
agent=use_case.name,
fallback_factory=fallback_factory,
)
__all__ = ["LlmGateway", "LlmUseCase"]