feat(api): 接入智谱 embedding-3(1024 维)并迁移 memory_chunks 向量列

This commit is contained in:
Kevin
2026-03-30 13:54:35 +08:00
parent aac484463d
commit 42ae2a5e91
11 changed files with 157 additions and 26 deletions

View File

@@ -1,21 +0,0 @@
"""OpenAI embedding adapter — implements EmbeddingProvider port."""
from openai import AsyncOpenAI
class OpenAIEmbeddingProvider:
def __init__(self, api_key: str, model: str = "text-embedding-3-small"):
self._client = AsyncOpenAI(api_key=api_key) if api_key else None
self._model = model
async def embed_text(self, text: str) -> list[float]:
if not self._client:
return []
resp = await self._client.embeddings.create(input=[text], model=self._model)
return resp.data[0].embedding
async def embed_texts(self, texts: list[str]) -> list[list[float]]:
if not self._client or not texts:
return []
resp = await self._client.embeddings.create(input=texts, model=self._model)
return [item.embedding for item in sorted(resp.data, key=lambda d: d.index)]

View File

@@ -0,0 +1,56 @@
"""智谱 BigModel 国内 embedding API — 实现 EmbeddingProviderzai-sdk / ZhipuAiClient"""
from __future__ import annotations
import asyncio
from zai import ZhipuAiClient
from app.core.embedding import MEMORY_EMBEDDING_DIMENSION
# 单次请求最多 64 条文本(智谱 Embedding-3 文档)
_EMBED_BATCH_SIZE = 64
class ZhipuEmbeddingProvider:
def __init__(
self,
*,
api_key: str,
base_url: str | None = None,
model: str = "embedding-3",
) -> None:
self._model = model
if not api_key:
self._client = None
elif base_url:
self._client = ZhipuAiClient(
api_key=api_key,
base_url=base_url.rstrip("/"),
)
else:
self._client = ZhipuAiClient(api_key=api_key)
def _create_vectors_sync(self, texts: list[str]) -> list[list[float]]:
assert self._client is not None
resp = self._client.embeddings.create(
input=texts,
model=self._model,
dimensions=MEMORY_EMBEDDING_DIMENSION,
)
ordered = sorted(resp.data, key=lambda d: d.index or 0)
return [list(item.embedding) for item in ordered]
async def embed_text(self, text: str) -> list[float]:
vectors = await self.embed_texts([text])
return vectors[0] if vectors else []
async def embed_texts(self, texts: list[str]) -> list[list[float]]:
if not self._client or not texts:
return []
out: list[list[float]] = []
for i in range(0, len(texts), _EMBED_BATCH_SIZE):
batch = texts[i : i + _EMBED_BATCH_SIZE]
part = await asyncio.to_thread(self._create_vectors_sync, batch)
out.extend(part)
return out

View File

@@ -49,6 +49,11 @@ class Settings(BaseSettings):
llm_model: str = ""
llm_temperature: float = 0.7
# ── Memory 向量(智谱 BigModel 国内 embedding-3与 LLM/DeepSeek 密钥分离)──
zhipu_api_key: str = ""
embedding_base_url: str = "https://open.bigmodel.cn/api/paas/v4"
embedding_model: str = "embedding-3"
# ── Chat 访谈短回复token 上限 + 代码截断,见 reply_limits──
chat_interview_max_tokens: int = 320
chat_interview_max_segments: int = 2

View File

@@ -124,10 +124,13 @@ def get_object_storage() -> ObjectStorage:
@lru_cache
def get_embedding_provider() -> EmbeddingProvider:
from app.adapters.embedding.openai import OpenAIEmbeddingProvider
from app.adapters.embedding.zhipu import ZhipuEmbeddingProvider
api_key = settings.openai_api_key or settings.deepseek_api_key
return OpenAIEmbeddingProvider(api_key=api_key)
return ZhipuEmbeddingProvider(
api_key=settings.zhipu_api_key,
base_url=settings.embedding_base_url or None,
model=settings.embedding_model,
)
# ── Auth dependencies ────────────────────────────────────────

View File

@@ -0,0 +1,6 @@
"""Memory chunk 向量维度(与智谱 embedding-3、pgvector 列一致)。
本期固定 1024若调整维度需独立迁移与排期勿仅改此处常量。
"""
MEMORY_EMBEDDING_DIMENSION = 1024

View File

@@ -10,12 +10,13 @@ from sqlalchemy import (
String,
Text,
)
from sqlalchemy.orm import relationship
from sqlalchemy.dialects.postgresql import TSVECTOR as TSVector
from sqlalchemy.orm import relationship
from app.core.db import Base, utc_now
from app.core.embedding import MEMORY_EMBEDDING_DIMENSION
pgvector_type = Vector(1536)
pgvector_type = Vector(MEMORY_EMBEDDING_DIMENSION)
class MemorySource(Base):