chore/ 精简展示AI活动的日志
This commit is contained in:
@@ -22,6 +22,11 @@ LOG_LEVEL=INFO
|
|||||||
# LOG_AGENT_VERBOSE=0
|
# LOG_AGENT_VERBOSE=0
|
||||||
# DEBUG 下 prompt/响应预览最大字符数
|
# DEBUG 下 prompt/响应预览最大字符数
|
||||||
# AGENT_LOG_MAX_CHARS=4096
|
# AGENT_LOG_MAX_CHARS=4096
|
||||||
|
# DEBUG 下访谈/资料:省略 SystemMessage 正文(仅 total_len+sha12);0/false=打出全文
|
||||||
|
# AGENT_LOG_OMIT_SYSTEM_MESSAGE_BODY=1
|
||||||
|
# DEBUG 下超长单段 *.prompt:总长超过下一项时,先跳过前 N 字符再预览(0=不跳过)
|
||||||
|
# AGENT_LOG_JSON_PROMPT_PREFIX_CHARS=0
|
||||||
|
# AGENT_LOG_JSON_PROMPT_PREFIX_ONLY_IF_LEN_GT=4000
|
||||||
# 第三方 stdlib logging(空=自动:LOG_LEVEL 为 DEBUG/TRACE 时 Celery→INFO、httpx/httpcore→WARNING,减少刷屏)
|
# 第三方 stdlib logging(空=自动:LOG_LEVEL 为 DEBUG/TRACE 时 Celery→INFO、httpx/httpcore→WARNING,减少刷屏)
|
||||||
# CELERY_LOG_LEVEL=
|
# CELERY_LOG_LEVEL=
|
||||||
# HTTPX_LOG_LEVEL=
|
# HTTPX_LOG_LEVEL=
|
||||||
|
|||||||
@@ -20,6 +20,11 @@ LOG_LEVEL=INFO
|
|||||||
# LOG_AGENT_VERBOSE=0
|
# LOG_AGENT_VERBOSE=0
|
||||||
# DEBUG 下 prompt/响应预览最大字符数
|
# DEBUG 下 prompt/响应预览最大字符数
|
||||||
# AGENT_LOG_MAX_CHARS=4096
|
# AGENT_LOG_MAX_CHARS=4096
|
||||||
|
# DEBUG 下访谈/资料:省略 SystemMessage 正文(仅 total_len+sha12);0/false=打出全文
|
||||||
|
# AGENT_LOG_OMIT_SYSTEM_MESSAGE_BODY=1
|
||||||
|
# DEBUG 下超长单段 *.prompt:总长超过下一项时,先跳过前 N 字符再预览(0=不跳过)
|
||||||
|
# AGENT_LOG_JSON_PROMPT_PREFIX_CHARS=0
|
||||||
|
# AGENT_LOG_JSON_PROMPT_PREFIX_ONLY_IF_LEN_GT=4000
|
||||||
# 第三方 stdlib logging(空=自动:LOG_LEVEL 为 DEBUG/TRACE 时 Celery→INFO、httpx/httpcore→WARNING,减少刷屏)
|
# 第三方 stdlib logging(空=自动:LOG_LEVEL 为 DEBUG/TRACE 时 Celery→INFO、httpx/httpcore→WARNING,减少刷屏)
|
||||||
# CELERY_LOG_LEVEL=
|
# CELERY_LOG_LEVEL=
|
||||||
# HTTPX_LOG_LEVEL=
|
# HTTPX_LOG_LEVEL=
|
||||||
|
|||||||
@@ -9,6 +9,18 @@
|
|||||||
# Logging(loguru sink 最低级别:TRACE / DEBUG / INFO / WARNING / ERROR / CRITICAL)
|
# Logging(loguru sink 最低级别:TRACE / DEBUG / INFO / WARNING / ERROR / CRITICAL)
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
LOG_LEVEL=INFO
|
LOG_LEVEL=INFO
|
||||||
|
# Agent 单行 INFO 摘要(耗时、路由、段落规模);与 LOG_LEVEL 独立
|
||||||
|
# LOG_AGENT_VERBOSE=0
|
||||||
|
# DEBUG 下 prompt/响应预览最大字符数
|
||||||
|
# AGENT_LOG_MAX_CHARS=4096
|
||||||
|
# DEBUG 下访谈/资料:省略 SystemMessage 正文(仅 total_len+sha12);0/false=打出全文
|
||||||
|
# AGENT_LOG_OMIT_SYSTEM_MESSAGE_BODY=1
|
||||||
|
# DEBUG 下超长单段 *.prompt:总长超过下一项时,先跳过前 N 字符再预览(0=不跳过)
|
||||||
|
# AGENT_LOG_JSON_PROMPT_PREFIX_CHARS=0
|
||||||
|
# AGENT_LOG_JSON_PROMPT_PREFIX_ONLY_IF_LEN_GT=4000
|
||||||
|
# 第三方 stdlib logging(空=自动)
|
||||||
|
# CELERY_LOG_LEVEL=
|
||||||
|
# HTTPX_LOG_LEVEL=
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# LLM / DeepSeek
|
# LLM / DeepSeek
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
"""聊天 Agent 共享工具:历史获取、格式化、存储"""
|
"""聊天 Agent 共享工具:历史获取、格式化、存储"""
|
||||||
|
|
||||||
|
import hashlib
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Any, List
|
from typing import Any, List
|
||||||
@@ -68,12 +69,28 @@ async def get_history_messages(conversation_id: str) -> List[Any]:
|
|||||||
return _lc_messages_from_rows(_human_ai_rows(history))
|
return _lc_messages_from_rows(_human_ai_rows(history))
|
||||||
|
|
||||||
|
|
||||||
def format_history_string(messages: List[Any]) -> str:
|
def _sha12_utf8(text: str) -> str:
|
||||||
|
return hashlib.sha256((text or "").encode("utf-8")).hexdigest()[:12]
|
||||||
|
|
||||||
|
|
||||||
|
def format_history_string(
|
||||||
|
messages: List[Any], *, omit_system_body: bool = False
|
||||||
|
) -> str:
|
||||||
"""将 LangChain 消息列表格式化为调试日志用多段文本(含 System,不静默跳过)。"""
|
"""将 LangChain 消息列表格式化为调试日志用多段文本(含 System,不静默跳过)。"""
|
||||||
history_parts: list[str] = []
|
history_parts: list[str] = []
|
||||||
for msg in messages:
|
for msg in messages:
|
||||||
if isinstance(msg, SystemMessage):
|
if isinstance(msg, SystemMessage):
|
||||||
history_parts.append(f"System: {msg.content}")
|
if omit_system_body:
|
||||||
|
c = (
|
||||||
|
(msg.content or "")
|
||||||
|
if isinstance(msg.content, str)
|
||||||
|
else str(msg.content)
|
||||||
|
)
|
||||||
|
history_parts.append(
|
||||||
|
f"System: <omitted total_len={len(c)} sha12={_sha12_utf8(c)}>"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
history_parts.append(f"System: {msg.content}")
|
||||||
elif isinstance(msg, HumanMessage):
|
elif isinstance(msg, HumanMessage):
|
||||||
history_parts.append(f"Human: {msg.content}")
|
history_parts.append(f"Human: {msg.content}")
|
||||||
elif isinstance(msg, AIMessage):
|
elif isinstance(msg, AIMessage):
|
||||||
|
|||||||
@@ -176,7 +176,10 @@ class InterviewAgent:
|
|||||||
log_agent_payload(
|
log_agent_payload(
|
||||||
logger,
|
logger,
|
||||||
"InterviewAgent.generate_response.prompt",
|
"InterviewAgent.generate_response.prompt",
|
||||||
format_history_string(messages),
|
format_history_string(
|
||||||
|
messages,
|
||||||
|
omit_system_body=settings.agent_log_omit_system_message_body,
|
||||||
|
),
|
||||||
)
|
)
|
||||||
chat_llm = self.llm.bind(max_tokens=reply_plan.max_tokens)
|
chat_llm = self.llm.bind(max_tokens=reply_plan.max_tokens)
|
||||||
prompt_chars = _message_contents_char_count(messages)
|
prompt_chars = _message_contents_char_count(messages)
|
||||||
@@ -276,7 +279,10 @@ class InterviewAgent:
|
|||||||
log_agent_payload(
|
log_agent_payload(
|
||||||
logger,
|
logger,
|
||||||
"InterviewAgent.opening.prompt",
|
"InterviewAgent.opening.prompt",
|
||||||
format_history_string(messages),
|
format_history_string(
|
||||||
|
messages,
|
||||||
|
omit_system_body=settings.agent_log_omit_system_message_body,
|
||||||
|
),
|
||||||
)
|
)
|
||||||
opening_llm = self.llm.bind(max_tokens=settings.chat_opening_max_tokens)
|
opening_llm = self.llm.bind(max_tokens=settings.chat_opening_max_tokens)
|
||||||
prompt_chars = _message_contents_char_count(messages)
|
prompt_chars = _message_contents_char_count(messages)
|
||||||
|
|||||||
@@ -58,38 +58,46 @@ async def _fetch_interview_memory_evidence(
|
|||||||
from app.features.memory.service import MemoryService
|
from app.features.memory.service import MemoryService
|
||||||
|
|
||||||
if not settings.chat_memory_retrieval_enabled:
|
if not settings.chat_memory_retrieval_enabled:
|
||||||
|
logger.debug(
|
||||||
|
"event=chat_memory_retrieval_skip reason=disabled user_id={}", user_id
|
||||||
|
)
|
||||||
return ""
|
return ""
|
||||||
msg = (user_message or "").strip()
|
msg = (user_message or "").strip()
|
||||||
if not msg:
|
if not msg:
|
||||||
|
logger.debug(
|
||||||
|
"event=chat_memory_retrieval_skip reason=empty user_id={}", user_id
|
||||||
|
)
|
||||||
return ""
|
return ""
|
||||||
if (
|
if (
|
||||||
settings.chat_memory_retrieval_require_substantive
|
settings.chat_memory_retrieval_require_substantive
|
||||||
and not should_run_chat_stage_memory_heavy_work(msg)
|
and not should_run_chat_stage_memory_heavy_work(msg)
|
||||||
):
|
):
|
||||||
|
logger.debug(
|
||||||
|
"event=chat_memory_retrieval_skip reason=not_substantive user_id={}",
|
||||||
|
user_id,
|
||||||
|
)
|
||||||
return ""
|
return ""
|
||||||
try:
|
try:
|
||||||
emb = get_embedding_provider()
|
emb = get_embedding_provider()
|
||||||
ms = MemoryService(db, embedding_provider=emb)
|
ms = MemoryService(db, embedding_provider=emb)
|
||||||
bundle = await ms.retrieve(user_id, msg, top_k=settings.chat_memory_top_k)
|
bundle = await ms.retrieve(user_id, msg, top_k=settings.chat_memory_top_k)
|
||||||
bd = bundle.model_dump()
|
bd = bundle.model_dump()
|
||||||
vector_ok = emb.is_available()
|
|
||||||
logger.info(
|
|
||||||
"memory_evidence_retrieved user_id={} chunks={} facts={} summaries={} timeline={} stories={} vector_ok={}",
|
|
||||||
user_id,
|
|
||||||
len(bd.get("relevant_chunks") or []),
|
|
||||||
len(bd.get("relevant_facts") or []),
|
|
||||||
len(bd.get("relevant_summaries") or []),
|
|
||||||
len(bd.get("timeline_hints") or []),
|
|
||||||
len(bd.get("relevant_stories") or []),
|
|
||||||
vector_ok,
|
|
||||||
)
|
|
||||||
text = format_evidence_chunks_for_prompt(bd)
|
text = format_evidence_chunks_for_prompt(bd)
|
||||||
t = (text or "").strip()
|
t = (text or "").strip()
|
||||||
if not t:
|
if not t:
|
||||||
|
logger.debug(
|
||||||
|
"event=memory_evidence_for_prompt user_id={} formatted_chars=0",
|
||||||
|
user_id,
|
||||||
|
)
|
||||||
return ""
|
return ""
|
||||||
max_c = settings.chat_memory_evidence_max_chars
|
max_c = settings.chat_memory_evidence_max_chars
|
||||||
if len(t) > max_c:
|
if len(t) > max_c:
|
||||||
return t[: max_c - 3] + "..."
|
t = t[: max_c - 3] + "..."
|
||||||
|
logger.info(
|
||||||
|
"event=memory_evidence_for_prompt user_id={} formatted_chars={}",
|
||||||
|
user_id,
|
||||||
|
len(t),
|
||||||
|
)
|
||||||
return t
|
return t
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -188,7 +188,10 @@ class ProfileAgent:
|
|||||||
log_agent_payload(
|
log_agent_payload(
|
||||||
logger,
|
logger,
|
||||||
"ProfileAgent.followup.prompt",
|
"ProfileAgent.followup.prompt",
|
||||||
format_history_string(messages),
|
format_history_string(
|
||||||
|
messages,
|
||||||
|
omit_system_body=settings.agent_log_omit_system_message_body,
|
||||||
|
),
|
||||||
)
|
)
|
||||||
prompt_chars = _message_contents_char_count(messages)
|
prompt_chars = _message_contents_char_count(messages)
|
||||||
logger.info(
|
logger.info(
|
||||||
@@ -246,7 +249,12 @@ class ProfileAgent:
|
|||||||
else:
|
else:
|
||||||
messages.append(HumanMessage(content="(请说出资料收集开场白。)"))
|
messages.append(HumanMessage(content="(请说出资料收集开场白。)"))
|
||||||
log_agent_payload(
|
log_agent_payload(
|
||||||
logger, "ProfileAgent.greeting.prompt", format_history_string(messages)
|
logger,
|
||||||
|
"ProfileAgent.greeting.prompt",
|
||||||
|
format_history_string(
|
||||||
|
messages,
|
||||||
|
omit_system_body=settings.agent_log_omit_system_message_body,
|
||||||
|
),
|
||||||
)
|
)
|
||||||
prompt_chars = _message_contents_char_count(messages)
|
prompt_chars = _message_contents_char_count(messages)
|
||||||
logger.info(
|
logger.info(
|
||||||
|
|||||||
@@ -6,6 +6,10 @@ Agent / LLM 诊断日志:耗时、输入输出规模、截断预览。
|
|||||||
便于生产环境在不把全局日志调到 DEBUG 的情况下排查 Agent 性能与路径。
|
便于生产环境在不把全局日志调到 DEBUG 的情况下排查 Agent 性能与路径。
|
||||||
|
|
||||||
敏感内容:DEBUG 下会记录用户相关文本截断预览,生产环境请勿长期开启 DEBUG。
|
敏感内容:DEBUG 下会记录用户相关文本截断预览,生产环境请勿长期开启 DEBUG。
|
||||||
|
|
||||||
|
配置(节选):``AGENT_LOG_OMIT_SYSTEM_MESSAGE_BODY``(默认 true)省略聊天 System 正文,仅打 len+sha12;
|
||||||
|
``AGENT_LOG_JSON_PROMPT_PREFIX_CHARS`` + ``AGENT_LOG_JSON_PROMPT_PREFIX_ONLY_IF_LEN_GT`` 在 DEBUG 下跳过
|
||||||
|
超长单段 prompt 的前缀再预览。
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
@@ -96,10 +100,23 @@ def log_agent_payload(
|
|||||||
"""在 DEBUG 下记录文本长度与截断预览。"""
|
"""在 DEBUG 下记录文本长度与截断预览。"""
|
||||||
if not agent_verbose_enabled():
|
if not agent_verbose_enabled():
|
||||||
return
|
return
|
||||||
preview = truncate_for_log(text, max_chars=max_chars)
|
raw = text or ""
|
||||||
|
total_len = len(raw)
|
||||||
|
preview_source = raw
|
||||||
|
extra_note = ""
|
||||||
|
if (
|
||||||
|
label.endswith(".prompt")
|
||||||
|
and settings.agent_log_json_prompt_prefix_chars > 0
|
||||||
|
and total_len > settings.agent_log_json_prompt_prefix_only_if_len_gt
|
||||||
|
):
|
||||||
|
skip = settings.agent_log_json_prompt_prefix_chars
|
||||||
|
preview_source = raw[skip:]
|
||||||
|
extra_note = f" skipped_prefix_chars={skip}"
|
||||||
|
preview = truncate_for_log(preview_source, max_chars=max_chars)
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"agent_payload {} total_len={} preview={}",
|
"agent_payload {} total_len={}{} preview={}",
|
||||||
label,
|
label,
|
||||||
len(text or ""),
|
total_len,
|
||||||
|
extra_note,
|
||||||
preview,
|
preview,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -188,6 +188,14 @@ class Settings(BaseSettings):
|
|||||||
log_agent_verbose: bool = False
|
log_agent_verbose: bool = False
|
||||||
# AGENT_LOG_MAX_CHARS:DEBUG 下记录 prompt/响应预览时的最大字符数
|
# AGENT_LOG_MAX_CHARS:DEBUG 下记录 prompt/响应预览时的最大字符数
|
||||||
agent_log_max_chars: int = Field(default=4096, ge=256, le=100_000)
|
agent_log_max_chars: int = Field(default=4096, ge=256, le=100_000)
|
||||||
|
# AGENT_LOG_OMIT_SYSTEM_MESSAGE_BODY:DEBUG 下访谈/资料聊天日志省略 System 正文(仅 len+sha12)
|
||||||
|
agent_log_omit_system_message_body: bool = True
|
||||||
|
# AGENT_LOG_JSON_PROMPT_PREFIX_CHARS:DEBUG 下 *.prompt 总长超过下项时再跳过前 N 字符后预览(0=不跳过)
|
||||||
|
agent_log_json_prompt_prefix_chars: int = Field(default=0, ge=0, le=500_000)
|
||||||
|
# AGENT_LOG_JSON_PROMPT_PREFIX_ONLY_IF_LEN_GT:触发“跳过前缀”的最小 prompt 长度
|
||||||
|
agent_log_json_prompt_prefix_only_if_len_gt: int = Field(
|
||||||
|
default=4000, ge=0, le=2_000_000
|
||||||
|
)
|
||||||
# 第三方 stdlib logging(空=自动:LOG_LEVEL 为 DEBUG/TRACE 时 Celery→INFO、httpx/httpcore→WARNING)
|
# 第三方 stdlib logging(空=自动:LOG_LEVEL 为 DEBUG/TRACE 时 Celery→INFO、httpx/httpcore→WARNING)
|
||||||
celery_log_level: str = ""
|
celery_log_level: str = ""
|
||||||
httpx_log_level: str = ""
|
httpx_log_level: str = ""
|
||||||
@@ -201,6 +209,18 @@ class Settings(BaseSettings):
|
|||||||
return False
|
return False
|
||||||
return str(v).strip().lower() in ("1", "true", "yes", "on")
|
return str(v).strip().lower() in ("1", "true", "yes", "on")
|
||||||
|
|
||||||
|
@field_validator("agent_log_omit_system_message_body", mode="before")
|
||||||
|
@classmethod
|
||||||
|
def _coerce_agent_log_omit_system_message_body(cls, v: object) -> bool:
|
||||||
|
if isinstance(v, bool):
|
||||||
|
return v
|
||||||
|
if v is None:
|
||||||
|
return True
|
||||||
|
s = str(v).strip().lower()
|
||||||
|
if s in ("0", "false", "no", "off"):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
# ── Misc ─────────────────────────────────────────────────
|
# ── Misc ─────────────────────────────────────────────────
|
||||||
enable_test_subscription: int = 0
|
enable_test_subscription: int = 0
|
||||||
enable_test_plan: str = "" # "1" / "true" / "yes" 为 True
|
enable_test_plan: str = "" # "1" / "true" / "yes" 为 True
|
||||||
|
|||||||
@@ -70,16 +70,20 @@ class MemoryService:
|
|||||||
|
|
||||||
await self._db.flush()
|
await self._db.flush()
|
||||||
|
|
||||||
|
from app.core.config import settings
|
||||||
|
|
||||||
|
vectors_written = 0
|
||||||
# Embedding: 若有 provider 则写入
|
# Embedding: 若有 provider 则写入
|
||||||
if self._embedding and chunk_records:
|
if self._embedding and chunk_records:
|
||||||
texts = [c for _, c in chunk_records]
|
texts = [c for _, c in chunk_records]
|
||||||
embeddings = await self._embedding.embed_texts(texts)
|
embeddings = await self._embedding.embed_texts(texts)
|
||||||
for (chunk_id, _), emb in zip(chunk_records, embeddings):
|
for (chunk_id, _), emb in zip(chunk_records, embeddings):
|
||||||
if emb:
|
if emb:
|
||||||
|
vectors_written += 1
|
||||||
await update_chunk_embedding(self._db, chunk_id, emb)
|
await update_chunk_embedding(self._db, chunk_id, emb)
|
||||||
|
|
||||||
|
enrichment_ok: bool | None = None
|
||||||
try:
|
try:
|
||||||
from app.core.config import settings
|
|
||||||
from app.core.dependencies import get_llm_provider_fast
|
from app.core.dependencies import get_llm_provider_fast
|
||||||
from app.features.memory.enrichment import enrich_memory_after_ingest_async
|
from app.features.memory.enrichment import enrich_memory_after_ingest_async
|
||||||
|
|
||||||
@@ -88,12 +92,28 @@ class MemoryService:
|
|||||||
await enrich_memory_after_ingest_async(
|
await enrich_memory_after_ingest_async(
|
||||||
self._db, user_id, source.id, llm
|
self._db, user_id, source.id, llm
|
||||||
)
|
)
|
||||||
|
enrichment_ok = True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
if settings.memory_enrichment_enabled:
|
||||||
|
enrichment_ok = False
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"memory enrichment 跳过: {} exc_type={}", e, type(e).__name__
|
"memory enrichment 跳过: {} exc_type={}", e, type(e).__name__
|
||||||
)
|
)
|
||||||
|
|
||||||
await self._db.commit()
|
await self._db.commit()
|
||||||
|
emb_ok = self._embedding.is_available() if self._embedding else False
|
||||||
|
logger.info(
|
||||||
|
"event=memory_ingest_done user_id={} conversation_id={} source_id={} "
|
||||||
|
"chunks={} vectors_written={} embedding_available={} enrichment_enabled={} enrichment_ok={}",
|
||||||
|
user_id,
|
||||||
|
conversation_id,
|
||||||
|
source.id,
|
||||||
|
len(chunk_records),
|
||||||
|
vectors_written,
|
||||||
|
emb_ok,
|
||||||
|
settings.memory_enrichment_enabled,
|
||||||
|
enrichment_ok,
|
||||||
|
)
|
||||||
return source.id
|
return source.id
|
||||||
|
|
||||||
async def retrieve(
|
async def retrieve(
|
||||||
@@ -104,7 +124,23 @@ class MemoryService:
|
|||||||
|
|
||||||
retriever = HybridRetriever(self._db, embedding_provider=self._embedding)
|
retriever = HybridRetriever(self._db, embedding_provider=self._embedding)
|
||||||
raw = await retriever.retrieve(user_id=user_id, query=query, top_k=top_k)
|
raw = await retriever.retrieve(user_id=user_id, query=query, top_k=top_k)
|
||||||
return EvidenceBundle.model_validate(raw)
|
bundle = EvidenceBundle.model_validate(raw)
|
||||||
|
bd = bundle.model_dump()
|
||||||
|
vec_ok = self._embedding.is_available() if self._embedding else False
|
||||||
|
logger.info(
|
||||||
|
"event=memory_retrieve_done user_id={} query_len={} top_k={} "
|
||||||
|
"chunks={} facts={} summaries={} timeline={} stories={} vector_ok={}",
|
||||||
|
user_id,
|
||||||
|
len((query or "").strip()),
|
||||||
|
top_k,
|
||||||
|
len(bd.get("relevant_chunks") or []),
|
||||||
|
len(bd.get("relevant_facts") or []),
|
||||||
|
len(bd.get("relevant_summaries") or []),
|
||||||
|
len(bd.get("timeline_hints") or []),
|
||||||
|
len(bd.get("relevant_stories") or []),
|
||||||
|
vec_ok,
|
||||||
|
)
|
||||||
|
return bundle
|
||||||
|
|
||||||
async def exclude_chunk(
|
async def exclude_chunk(
|
||||||
self, user_id: str, chunk_id: str, *, reason: str = ""
|
self, user_id: str, chunk_id: str, *, reason: str = ""
|
||||||
@@ -215,29 +251,51 @@ def ingest_transcript_sync(
|
|||||||
session.flush()
|
session.flush()
|
||||||
chunk_records.append((chunk.id, content))
|
chunk_records.append((chunk.id, content))
|
||||||
|
|
||||||
|
from app.core.config import settings
|
||||||
|
|
||||||
|
vectors_written = 0
|
||||||
|
embedding_available = False
|
||||||
try:
|
try:
|
||||||
embedding_provider = get_embedding_provider()
|
embedding_provider = get_embedding_provider()
|
||||||
|
if embedding_provider is not None:
|
||||||
|
embedding_available = embedding_provider.is_available()
|
||||||
if chunk_records and embedding_provider is not None:
|
if chunk_records and embedding_provider is not None:
|
||||||
texts = [content for _, content in chunk_records]
|
texts = [content for _, content in chunk_records]
|
||||||
embeddings = embedding_provider.embed_texts_sync(texts)
|
embeddings = embedding_provider.embed_texts_sync(texts)
|
||||||
for (chunk_id, _), emb in zip(chunk_records, embeddings):
|
for (chunk_id, _), emb in zip(chunk_records, embeddings):
|
||||||
if emb:
|
if emb:
|
||||||
|
vectors_written += 1
|
||||||
update_chunk_embedding_sync(session, chunk_id, emb)
|
update_chunk_embedding_sync(session, chunk_id, emb)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"memory embedding 跳过(sync): {} exc_type={}", e, type(e).__name__
|
"memory embedding 跳过(sync): {} exc_type={}", e, type(e).__name__
|
||||||
)
|
)
|
||||||
|
|
||||||
|
enrichment_ok: bool | None = None
|
||||||
try:
|
try:
|
||||||
from app.core.config import settings
|
|
||||||
from app.features.memory.enrichment import enrich_memory_after_ingest_sync
|
from app.features.memory.enrichment import enrich_memory_after_ingest_sync
|
||||||
|
|
||||||
if settings.memory_enrichment_enabled:
|
if settings.memory_enrichment_enabled:
|
||||||
enrich_memory_after_ingest_sync(session, user_id, source.id, llm=None)
|
enrich_memory_after_ingest_sync(session, user_id, source.id, llm=None)
|
||||||
|
enrichment_ok = True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
if settings.memory_enrichment_enabled:
|
||||||
|
enrichment_ok = False
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"memory enrichment 跳过(sync): {} exc_type={}", e, type(e).__name__
|
"memory enrichment 跳过(sync): {} exc_type={}", e, type(e).__name__
|
||||||
)
|
)
|
||||||
|
|
||||||
session.commit()
|
session.commit()
|
||||||
|
logger.info(
|
||||||
|
"event=memory_ingest_done user_id={} conversation_id={} source_id={} "
|
||||||
|
"chunks={} vectors_written={} embedding_available={} enrichment_enabled={} enrichment_ok={} sync=1",
|
||||||
|
user_id,
|
||||||
|
conversation_id,
|
||||||
|
source.id,
|
||||||
|
len(chunk_records),
|
||||||
|
vectors_written,
|
||||||
|
embedding_available,
|
||||||
|
settings.memory_enrichment_enabled,
|
||||||
|
enrichment_ok,
|
||||||
|
)
|
||||||
return source.id
|
return source.id
|
||||||
|
|||||||
@@ -195,3 +195,17 @@ def test_format_history_string_includes_system_for_debug_logs() -> None:
|
|||||||
assert "System: SYS_INSTRUCTIONS" in s
|
assert "System: SYS_INSTRUCTIONS" in s
|
||||||
assert "Human: hi" in s
|
assert "Human: hi" in s
|
||||||
assert "Assistant: hello" in s
|
assert "Assistant: hello" in s
|
||||||
|
|
||||||
|
|
||||||
|
def test_format_history_string_omit_system_body() -> None:
|
||||||
|
s = format_history_string(
|
||||||
|
[
|
||||||
|
SystemMessage(content="SYS_INSTRUCTIONS"),
|
||||||
|
HumanMessage(content="hi"),
|
||||||
|
],
|
||||||
|
omit_system_body=True,
|
||||||
|
)
|
||||||
|
assert "SYS_INSTRUCTIONS" not in s
|
||||||
|
assert "System: <omitted total_len=16" in s
|
||||||
|
assert "sha12=" in s
|
||||||
|
assert "Human: hi" in s
|
||||||
|
|||||||
Reference in New Issue
Block a user