配置 SSOT(TOML + .env) 统一错误契约 Auth 与事务边界 Redis / Celery 可靠性:业务 Redis(DB/0)与 Celery broker/backend(DB/1)显式拆分;连接池、sync client 可观测性(OpenTelemetry + LGTM)
129 lines
5.0 KiB
Python
129 lines
5.0 KiB
Python
from pgvector.sqlalchemy import Vector
|
|
from sqlalchemy import (
|
|
JSON,
|
|
Boolean,
|
|
Column,
|
|
DateTime,
|
|
Float,
|
|
ForeignKey,
|
|
Integer,
|
|
String,
|
|
Text,
|
|
)
|
|
from sqlalchemy.orm import relationship
|
|
|
|
from app.core.db import Base, utc_now
|
|
from app.core.embedding import MEMORY_EMBEDDING_DIMENSION
|
|
|
|
pgvector_type = Vector(MEMORY_EMBEDDING_DIMENSION)
|
|
|
|
|
|
class MemorySource(Base):
|
|
__tablename__ = "memory_sources"
|
|
id = Column(String, primary_key=True)
|
|
user_id = Column(String, ForeignKey("users.id"), nullable=False, index=True)
|
|
source_type = Column(String, nullable=False) # transcript / note / draft
|
|
raw_text = Column(Text, nullable=True)
|
|
storage_key = Column(String, nullable=True)
|
|
speaker = Column(String, nullable=True)
|
|
captured_at = Column(DateTime(timezone=True), nullable=True)
|
|
status = Column(String, default="active")
|
|
embedding_status = Column(String, default="pending")
|
|
embedding_error = Column(Text, nullable=True)
|
|
enrichment_status = Column(String, default="pending")
|
|
enrichment_error = Column(Text, nullable=True)
|
|
conversation_id = Column(String, ForeignKey("conversations.id"), nullable=True)
|
|
segment_id = Column(String, ForeignKey("segments.id", ondelete="SET NULL"), nullable=True)
|
|
lineage_json = Column(JSON, nullable=True)
|
|
primary_user_message_id = Column(String, nullable=True)
|
|
created_at = Column(DateTime(timezone=True), default=utc_now)
|
|
chunks = relationship(
|
|
"MemoryChunk", back_populates="source", cascade="all, delete-orphan"
|
|
)
|
|
|
|
|
|
class MemoryChunk(Base):
|
|
__tablename__ = "memory_chunks"
|
|
id = Column(String, primary_key=True)
|
|
source_id = Column(
|
|
String, ForeignKey("memory_sources.id"), nullable=False, index=True
|
|
)
|
|
user_id = Column(String, ForeignKey("users.id"), nullable=False, index=True)
|
|
content = Column(Text, nullable=False)
|
|
# pgvector embedding — Alembic migration 负责 CREATE EXTENSION vector 及列类型
|
|
embedding = Column(pgvector_type, nullable=True)
|
|
chunk_index = Column(Integer, nullable=False)
|
|
speaker = Column(String, nullable=True)
|
|
event_year = Column(Integer, nullable=True)
|
|
metadata_json = Column(JSON, nullable=True)
|
|
is_excluded = Column(Boolean, default=False)
|
|
embedding_status = Column(String, default="pending")
|
|
embedding_error = Column(Text, nullable=True)
|
|
created_at = Column(DateTime(timezone=True), default=utc_now)
|
|
source = relationship("MemorySource", back_populates="chunks")
|
|
|
|
|
|
class MemorySummary(Base):
|
|
__tablename__ = "memory_summaries"
|
|
id = Column(String, primary_key=True)
|
|
user_id = Column(String, ForeignKey("users.id"), nullable=False, index=True)
|
|
summary_type = Column(String, nullable=False) # session / rolling / topic
|
|
content = Column(Text, nullable=False)
|
|
source_chunk_ids = Column(JSON, nullable=True)
|
|
created_at = Column(DateTime(timezone=True), default=utc_now)
|
|
updated_at = Column(DateTime(timezone=True), default=utc_now, onupdate=utc_now)
|
|
|
|
|
|
class MemoryFact(Base):
|
|
__tablename__ = "memory_facts"
|
|
id = Column(String, primary_key=True)
|
|
user_id = Column(String, ForeignKey("users.id"), nullable=False, index=True)
|
|
fact_type = Column(
|
|
String, nullable=False
|
|
) # person / event / relation / place / milestone
|
|
subject = Column(String, nullable=True)
|
|
predicate = Column(String, nullable=True)
|
|
object_json = Column(JSON, nullable=True)
|
|
confidence = Column(Float, default=0.0)
|
|
source_chunk_id = Column(String, ForeignKey("memory_chunks.id"), nullable=True)
|
|
status = Column(
|
|
String, default="candidate"
|
|
) # candidate / confirmed / rejected / stale (chunk excluded / superseded)
|
|
lineage_json = Column(JSON, nullable=True)
|
|
created_at = Column(DateTime(timezone=True), default=utc_now)
|
|
|
|
|
|
class TimelineEvent(Base):
|
|
__tablename__ = "timeline_events"
|
|
id = Column(String, primary_key=True)
|
|
user_id = Column(String, ForeignKey("users.id"), nullable=False, index=True)
|
|
memory_source_id = Column(
|
|
String,
|
|
ForeignKey("memory_sources.id", ondelete="SET NULL"),
|
|
nullable=True,
|
|
index=True,
|
|
)
|
|
event_year = Column(Integer, nullable=True)
|
|
event_date = Column(String, nullable=True)
|
|
title = Column(String, nullable=False)
|
|
description = Column(Text, nullable=True)
|
|
person_refs = Column(JSON, nullable=True)
|
|
source_fact_ids = Column(JSON, nullable=True)
|
|
lineage_json = Column(JSON, nullable=True)
|
|
created_at = Column(DateTime(timezone=True), default=utc_now)
|
|
|
|
|
|
class MemoryCurationAction(Base):
|
|
__tablename__ = "memory_curation_actions"
|
|
id = Column(String, primary_key=True)
|
|
user_id = Column(String, ForeignKey("users.id"), nullable=False, index=True)
|
|
action_type = Column(
|
|
String, nullable=False
|
|
) # exclude / restore / correct / merge / confirm / reject
|
|
target_type = Column(
|
|
String, nullable=False
|
|
) # chunk / fact / summary / timeline_event
|
|
target_id = Column(String, nullable=False)
|
|
details = Column(JSON, nullable=True)
|
|
created_at = Column(DateTime(timezone=True), default=utc_now)
|