api/app/features/memory/enrichment.py

"""
Transcript ingest 之后的记忆富化：单次 LLM 调用产出会话摘要 + 结构化事实。

由 async MemoryService 调用；失败仅打日志，不阻断主流程。
不再维护 ingest 路径上的 rolling 摘要与 timeline 物化。
"""

from __future__ import annotations

from typing import Any

from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession

from app.core.langchain_llm import ainvoke_json_object
from app.core.llm_gateway import LlmGateway, LlmUseCase
from app.core.logging import get_logger
from app.features.memory.enrichment_pipeline import (
    dedupe_key,
    normalize_object_json,
    normalize_subject,
)
from app.features.memory.llm_schemas import (
    EnrichmentPayload,
    enrichment_payload_to_fact_dicts,
    parse_json_payload,
)
from app.features.memory.models import MemoryChunk, MemorySource
from app.features.memory.repo import (
    create_memory_fact,
    create_memory_summary,
)
from app.features.user.models import User

logger = get_logger(__name__)


def _lineage_snapshot_from_source(source: MemorySource | None) -> dict | None:
    raw = getattr(source, "lineage_json", None) if source else None
    return raw if isinstance(raw, dict) and raw else None


def _resolve_llm() -> Any | None:
    try:
        return LlmGateway().langchain_llm_for(
            LlmUseCase("memory.enrichment", fast=True)
        )
    except Exception as e:
        logger.warning("memory enrichment 无法获取 LLM: {}", e)
        return None


def _max_enrichment_chars() -> int:
    from app.core.config import settings

    return settings.memory_enrichment_max_chars


def _enrichment_prompt(numbered_blocks: str, narrator_label: str) -> str:
    """合并会话摘要说明与事实抽取规则，供单次 JSON 输出。"""
    text = numbered_blocks.strip()[: _max_enrichment_chars()]
    return (
        "你是回忆录记忆分析助手。用户正在口述人生回忆，所有内容默认是**过去发生的事**，"
        "而非当前或未来计划（除非原文明确说「现在」「打算」「准备将要」等）。\n\n"
        "请从下列口述内容中完成两件事：\n"
        "1) 用 2～8 句中文概括要点（不编造、不评价）\n"
        "2) 抽取结构化事实列表（见下方规则）\n\n"
        "## 事实抽取规则\n"
        "1. subject 必须用明确的人名或固定称谓：\n"
        f"   - 叙述者本人统一用「{narrator_label}」\n"
        "   - 其他人用全名或稳定专名（如「王伟」），禁止用「他」「她」「我」「我们大伙」等代词作 subject；"
        "若代词在上下文中可唯一解析为某人，则 subject 写该人姓名/专名\n"
        "2. 事件、职务变动、地点迁移等一律按**过去回忆**理解；travel/调动/命令类表述勿写成「即将要做」"
        "除非原文明确为未来时态\n"
        "3. 若可推断大约年代或人生阶段，将 approximate_era 写入 object_json（与 value 等字段并存），"
        '例如 "1990年代"、"2001年"、"退休后"、"30岁前后"\n'
        "4. fact_type: person|event|relation|place|milestone\n"
        "5. predicate：简短中文谓语（如「出生地」「担任职务」「调往」）\n"
        "6. object_json：字符串或对象；可含 value、approximate_era 等\n"
        "7. confidence 0..1；source_chunk_id 必须等于某段 [chunk_id=...] 中的 id\n\n"
        '只输出 JSON：{"summary":"...","facts":[...]}，无事实则 "facts":[]。\n\n'
        f"{text}"
    )


async def _run_enrichment_llm_async(
    llm: Any, numbered: str, narrator_label: str
) -> EnrichmentPayload | None:
    if not llm or not (numbered or "").strip():
        return None
    prompt = _enrichment_prompt(numbered, narrator_label)
    try:
        raw = await ainvoke_json_object(
            llm,
            prompt,
            max_tokens=8192,
            agent="memory.enrichment",
        )
        return parse_json_payload(raw, EnrichmentPayload)
    except (TypeError, ValueError) as e:
        logger.warning("enrichment LLM async 解析失败: {}", e)
        return None


async def enrich_memory_after_ingest_async(
    db: AsyncSession,
    user_id: str,
    source_id: str,
    llm: Any | None = None,
) -> None:
    from app.core.config import settings

    if not settings.memory_enrichment_enabled:
        return
    if llm is None:
        llm = _resolve_llm()
    if not llm:
        return
    narrator_name: str | None = None
    u_row = await db.get(User, user_id)
    if u_row and (u_row.nickname or "").strip():
        narrator_name = (u_row.nickname or "").strip()
    stmt = (
        select(MemoryChunk)
        .where(MemoryChunk.source_id == source_id)
        .order_by(MemoryChunk.chunk_index.asc())
    )
    result = await db.execute(stmt)
    chunks = list(result.unique().scalars().all())
    if not chunks:
        return
    src_row = await db.get(MemorySource, source_id)
    lineage_snapshot = _lineage_snapshot_from_source(src_row)
    chunk_ids = [c.id for c in chunks]
    chunk_texts = [c.content for c in chunks]
    numbered = "\n\n".join(
        f"[chunk_id={cid}]\n{txt}"
        for cid, txt in zip(chunk_ids, chunk_texts, strict=False)
    )
    narrator_label = (narrator_name or "").strip() or "叙述者"

    payload = await _run_enrichment_llm_async(llm, numbered, narrator_label)
    if payload is None:
        return

    session_summary_text = str(payload.summary or "").strip()
    if session_summary_text:
        await create_memory_summary(
            db,
            user_id=user_id,
            summary_type="session",
            content=session_summary_text,
            source_chunk_ids=chunk_ids,
        )

    raw_facts = enrichment_payload_to_fact_dicts(payload)
    seen: set[tuple] = set()
    for f in raw_facts:
        key = dedupe_key(f, narrator_name=narrator_name)
        if key in seen:
            continue
        seen.add(key)
        scid = f.get("source_chunk_id")
        if scid and scid not in chunk_ids:
            scid = chunk_ids[0] if chunk_ids else None
        await create_memory_fact(
            db,
            user_id=user_id,
            fact_type=f.get("fact_type") or "event",
            subject=normalize_subject(f.get("subject"), narrator_name),
            predicate=f.get("predicate"),
            object_json=normalize_object_json(f.get("object_json")),
            confidence=float(f.get("confidence") or 0.75),
            source_chunk_id=scid,
            status="confirmed",
            lineage_json=lineage_snapshot,
        )
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								"""
-												feat(eval): memoir A/B chapter judging and eval-web parity with dialogue

- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas.

- Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error.

- MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings.

- app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS.

- Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014.

- Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.

											
										
										
											2026-04-10 10:23:43 +08:00
+								Transcript ingest 之后的记忆富化：单次 LLM 调用产出会话摘要 + 结构化事实。
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
-												feat(api)!: memory single chain — async MemoryService, strict eval closure

Route all memory ingest/retrieve/enrichment/compaction through async MemoryService.
Remove legacy sync memory implementations (ingest/retrieve/compaction); Celery and
memoir Phase2 call asyncio.run into MemoryService-backed helpers.

Memoir Phase1 batch ingest uses MemoryService.ingest_transcripts_batch; drop chapters.
evidence_bundle_json mirror (Alembic 0015). Evaluation uses snapshot/link-only bundles;
raise EvidenceClosureMissing instead of partial/fallback lineage tiers.

Split memoir state into NarrativeCoverageState and InterviewControlState; delete the
_interview_meta_store adapter layer. Remove rolling-query and recent-fact fallback
settings from config and evidence assembly.

Update judges, docs, tests, and PlaygroundPage alignment.

Made-with: Cursor

											
										
										
											2026-04-30 14:11:46 +08:00
+								由 async MemoryService 调用；失败仅打日志，不阻断主流程。
-												feat(eval): memoir A/B chapter judging and eval-web parity with dialogue

- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas.

- Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error.

- MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings.

- app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS.

- Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014.

- Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.

											
										
										
											2026-04-10 10:23:43 +08:00
+								不再维护 ingest 路径上的 rolling 摘要与 timeline 物化。
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								"""
 								from __future__ import annotations
 								from typing import Any
 								from sqlalchemy import select
 								from sqlalchemy.ext.asyncio import AsyncSession
-												feat(api)!: memory single chain — async MemoryService, strict eval closure

Route all memory ingest/retrieve/enrichment/compaction through async MemoryService.
Remove legacy sync memory implementations (ingest/retrieve/compaction); Celery and
memoir Phase2 call asyncio.run into MemoryService-backed helpers.

Memoir Phase1 batch ingest uses MemoryService.ingest_transcripts_batch; drop chapters.
evidence_bundle_json mirror (Alembic 0015). Evaluation uses snapshot/link-only bundles;
raise EvidenceClosureMissing instead of partial/fallback lineage tiers.

Split memoir state into NarrativeCoverageState and InterviewControlState; delete the
_interview_meta_store adapter layer. Remove rolling-query and recent-fact fallback
settings from config and evidence assembly.

Update judges, docs, tests, and PlaygroundPage alignment.

Made-with: Cursor

											
										
										
											2026-04-30 14:11:46 +08:00
+								from app.core.langchain_llm import ainvoke_json_object
-												feat(api): 收敛对话与记忆流程边界，引入 LLM 网关与专用服务

- MemoryService 异步路径委托 MemoryIngestService / MemoryRetrievalService；富化派发经 MemoryEnrichmentScheduler
- WebSocket pipeline 经 ChatTurnService 与显式 DTO 编排单轮对话；回忆录片段入队由 MemoirIngestScheduler 封装
- 新增 LlmGateway（LlmUseCase），各 agent、任务与适配器对齐 ports
- 补充 memory 提示适配、runtime 类型、memory-retrieval 文档、ai-touchpoints 说明与扫描脚本及配套测试

Made-with: Cursor

											
										
										
											2026-04-30 09:17:01 +08:00
+								from app.core.llm_gateway import LlmGateway, LlmUseCase
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								from app.core.logging import get_logger
-												feat: 回忆录证据血缘与内部评测可追溯，顺带对齐本地评测台与 CI

数据库与模型：新增多版迁移（章节证据快照、对话血缘、记忆事实/时间线 lineage 等），把「成稿 ↔ 对话/记忆」的溯源信息落到表结构里。
业务链路：会话与 WS、回忆录/故事流水线、记忆写入与 enrichment 等跟着接上线索与快照；新增章节证据快照与评测侧 EvalTraceService 等模块，方便组评审用的证据包。
内部评测：自动化 run 与手工 memoir 评审共用可追溯证据；rubric/ judge 相关脚本与文档有配套调整。
app-eval-web：Memoir/实验详情里能展开看证据摘要与 evidence_trace（含对话轮次 id）；Vite 代理与 development.sh 注入的 API 端口与当前默认内部评测端口一致，避免改端口后页面连错服务。
工程杂项：GitHub Actions / 仓库说明有更新；各适配器与支付/配额/plan 等多处为小改动或跟随主改动的收尾；新增/扩充了?

											
										
										
											2026-04-08 15:37:09 +08:00
+								from app.features.memory.enrichment_pipeline import (
 								    dedupe_key,
 								    normalize_object_json,
 								    normalize_subject,
 								)
-												feat(eval): memoir A/B chapter judging and eval-web parity with dialogue

- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas.

- Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error.

- MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings.

- app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS.

- Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014.

- Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.

											
										
										
											2026-04-10 10:23:43 +08:00
+								from app.features.memory.llm_schemas import (
 								    EnrichmentPayload,
 								    enrichment_payload_to_fact_dicts,
 								    parse_json_payload,
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								)
-												feat(eval): memoir A/B chapter judging and eval-web parity with dialogue

- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas.

- Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error.

- MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings.

- app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS.

- Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014.

- Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.

											
										
										
											2026-04-10 10:23:43 +08:00
+								from app.features.memory.models import MemoryChunk, MemorySource
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								from app.features.memory.repo import (
 								    create_memory_fact,
 								    create_memory_summary,
 								)
-												feat: 回忆录证据血缘与内部评测可追溯，顺带对齐本地评测台与 CI

数据库与模型：新增多版迁移（章节证据快照、对话血缘、记忆事实/时间线 lineage 等），把「成稿 ↔ 对话/记忆」的溯源信息落到表结构里。
业务链路：会话与 WS、回忆录/故事流水线、记忆写入与 enrichment 等跟着接上线索与快照；新增章节证据快照与评测侧 EvalTraceService 等模块，方便组评审用的证据包。
内部评测：自动化 run 与手工 memoir 评审共用可追溯证据；rubric/ judge 相关脚本与文档有配套调整。
app-eval-web：Memoir/实验详情里能展开看证据摘要与 evidence_trace（含对话轮次 id）；Vite 代理与 development.sh 注入的 API 端口与当前默认内部评测端口一致，避免改端口后页面连错服务。
工程杂项：GitHub Actions / 仓库说明有更新；各适配器与支付/配额/plan 等多处为小改动或跟随主改动的收尾；新增/扩充了?

											
										
										
											2026-04-08 15:37:09 +08:00
+								from app.features.user.models import User
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
 								logger = get_logger(__name__)
-												feat: 回忆录证据血缘与内部评测可追溯，顺带对齐本地评测台与 CI

数据库与模型：新增多版迁移（章节证据快照、对话血缘、记忆事实/时间线 lineage 等），把「成稿 ↔ 对话/记忆」的溯源信息落到表结构里。
业务链路：会话与 WS、回忆录/故事流水线、记忆写入与 enrichment 等跟着接上线索与快照；新增章节证据快照与评测侧 EvalTraceService 等模块，方便组评审用的证据包。
内部评测：自动化 run 与手工 memoir 评审共用可追溯证据；rubric/ judge 相关脚本与文档有配套调整。
app-eval-web：Memoir/实验详情里能展开看证据摘要与 evidence_trace（含对话轮次 id）；Vite 代理与 development.sh 注入的 API 端口与当前默认内部评测端口一致，避免改端口后页面连错服务。
工程杂项：GitHub Actions / 仓库说明有更新；各适配器与支付/配额/plan 等多处为小改动或跟随主改动的收尾；新增/扩充了?

											
										
										
											2026-04-08 15:37:09 +08:00
+								def _lineage_snapshot_from_source(source: MemorySource | None) -> dict | None:
 								    raw = getattr(source, "lineage_json", None) if source else None
 								    return raw if isinstance(raw, dict) and raw else None
-												feat(api)!: memory single chain — async MemoryService, strict eval closure

Route all memory ingest/retrieve/enrichment/compaction through async MemoryService.
Remove legacy sync memory implementations (ingest/retrieve/compaction); Celery and
memoir Phase2 call asyncio.run into MemoryService-backed helpers.

Memoir Phase1 batch ingest uses MemoryService.ingest_transcripts_batch; drop chapters.
evidence_bundle_json mirror (Alembic 0015). Evaluation uses snapshot/link-only bundles;
raise EvidenceClosureMissing instead of partial/fallback lineage tiers.

Split memoir state into NarrativeCoverageState and InterviewControlState; delete the
_interview_meta_store adapter layer. Remove rolling-query and recent-fact fallback
settings from config and evidence assembly.

Update judges, docs, tests, and PlaygroundPage alignment.

Made-with: Cursor

											
										
										
											2026-04-30 14:11:46 +08:00
+								def _resolve_llm() -> Any | None:
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								    try:
-												feat(api): 收敛对话与记忆流程边界，引入 LLM 网关与专用服务

- MemoryService 异步路径委托 MemoryIngestService / MemoryRetrievalService；富化派发经 MemoryEnrichmentScheduler
- WebSocket pipeline 经 ChatTurnService 与显式 DTO 编排单轮对话；回忆录片段入队由 MemoirIngestScheduler 封装
- 新增 LlmGateway（LlmUseCase），各 agent、任务与适配器对齐 ports
- 补充 memory 提示适配、runtime 类型、memory-retrieval 文档、ai-touchpoints 说明与扫描脚本及配套测试

Made-with: Cursor

											
										
										
											2026-04-30 09:17:01 +08:00
+								        return LlmGateway().langchain_llm_for(
-												feat(api)!: memory single chain — async MemoryService, strict eval closure

Route all memory ingest/retrieve/enrichment/compaction through async MemoryService.
Remove legacy sync memory implementations (ingest/retrieve/compaction); Celery and
memoir Phase2 call asyncio.run into MemoryService-backed helpers.

Memoir Phase1 batch ingest uses MemoryService.ingest_transcripts_batch; drop chapters.
evidence_bundle_json mirror (Alembic 0015). Evaluation uses snapshot/link-only bundles;
raise EvidenceClosureMissing instead of partial/fallback lineage tiers.

Split memoir state into NarrativeCoverageState and InterviewControlState; delete the
_interview_meta_store adapter layer. Remove rolling-query and recent-fact fallback
settings from config and evidence assembly.

Update judges, docs, tests, and PlaygroundPage alignment.

Made-with: Cursor

											
										
										
											2026-04-30 14:11:46 +08:00
+								            LlmUseCase("memory.enrichment", fast=True)
-												feat(api): 收敛对话与记忆流程边界，引入 LLM 网关与专用服务

- MemoryService 异步路径委托 MemoryIngestService / MemoryRetrievalService；富化派发经 MemoryEnrichmentScheduler
- WebSocket pipeline 经 ChatTurnService 与显式 DTO 编排单轮对话；回忆录片段入队由 MemoirIngestScheduler 封装
- 新增 LlmGateway（LlmUseCase），各 agent、任务与适配器对齐 ports
- 补充 memory 提示适配、runtime 类型、memory-retrieval 文档、ai-touchpoints 说明与扫描脚本及配套测试

Made-with: Cursor

											
										
										
											2026-04-30 09:17:01 +08:00
+								        )
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								    except Exception as e:
 								        logger.warning("memory enrichment 无法获取 LLM: {}", e)
 								        return None
-												feat(eval): memoir A/B chapter judging and eval-web parity with dialogue

- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas.

- Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error.

- MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings.

- app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS.

- Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014.

- Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.

											
										
										
											2026-04-10 10:23:43 +08:00
+								def _max_enrichment_chars() -> int:
 								    from app.core.config import settings
 								    return settings.memory_enrichment_max_chars
 								def _enrichment_prompt(numbered_blocks: str, narrator_label: str) -> str:
 								    """合并会话摘要说明与事实抽取规则，供单次 JSON 输出。"""
 								    text = numbered_blocks.strip()[: _max_enrichment_chars()]
 								    return (
 								        "你是回忆录记忆分析助手。用户正在口述人生回忆，所有内容默认是**过去发生的事**，"
 								        "而非当前或未来计划（除非原文明确说「现在」「打算」「准备将要」等）。\n\n"
 								        "请从下列口述内容中完成两件事：\n"
 								        "1) 用 2～8 句中文概括要点（不编造、不评价）\n"
 								        "2) 抽取结构化事实列表（见下方规则）\n\n"
 								        "## 事实抽取规则\n"
 								        "1. subject 必须用明确的人名或固定称谓：\n"
 								        f"   - 叙述者本人统一用「{narrator_label}」\n"
 								        "   - 其他人用全名或稳定专名（如「王伟」），禁止用「他」「她」「我」「我们大伙」等代词作 subject；"
 								        "若代词在上下文中可唯一解析为某人，则 subject 写该人姓名/专名\n"
 								        "2. 事件、职务变动、地点迁移等一律按**过去回忆**理解；travel/调动/命令类表述勿写成「即将要做」"
 								        "除非原文明确为未来时态\n"
 								        "3. 若可推断大约年代或人生阶段，将 approximate_era 写入 object_json（与 value 等字段并存），"
 								        '例如 "1990年代"、"2001年"、"退休后"、"30岁前后"\n'
 								        "4. fact_type: person|event|relation|place|milestone\n"
 								        "5. predicate：简短中文谓语（如「出生地」「担任职务」「调往」）\n"
 								        "6. object_json：字符串或对象；可含 value、approximate_era 等\n"
 								        "7. confidence 0..1；source_chunk_id 必须等于某段 [chunk_id=...] 中的 id\n\n"
 								        '只输出 JSON：{"summary":"...","facts":[...]}，无事实则 "facts":[]。\n\n'
 								        f"{text}"
 								    )
 								async def _run_enrichment_llm_async(
 								    llm: Any, numbered: str, narrator_label: str
 								) -> EnrichmentPayload | None:
 								    if not llm or not (numbered or "").strip():
 								        return None
 								    prompt = _enrichment_prompt(numbered, narrator_label)
 								    try:
 								        raw = await ainvoke_json_object(
 								            llm,
 								            prompt,
 								            max_tokens=8192,
-												feat(api)!: memory single chain — async MemoryService, strict eval closure

Route all memory ingest/retrieve/enrichment/compaction through async MemoryService.
Remove legacy sync memory implementations (ingest/retrieve/compaction); Celery and
memoir Phase2 call asyncio.run into MemoryService-backed helpers.

Memoir Phase1 batch ingest uses MemoryService.ingest_transcripts_batch; drop chapters.
evidence_bundle_json mirror (Alembic 0015). Evaluation uses snapshot/link-only bundles;
raise EvidenceClosureMissing instead of partial/fallback lineage tiers.

Split memoir state into NarrativeCoverageState and InterviewControlState; delete the
_interview_meta_store adapter layer. Remove rolling-query and recent-fact fallback
settings from config and evidence assembly.

Update judges, docs, tests, and PlaygroundPage alignment.

Made-with: Cursor

											
										
										
											2026-04-30 14:11:46 +08:00
+								            agent="memory.enrichment",
-												feat(eval): memoir A/B chapter judging and eval-web parity with dialogue

- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas.

- Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error.

- MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings.

- app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS.

- Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014.

- Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.

											
										
										
											2026-04-10 10:23:43 +08:00
+								        )
 								        return parse_json_payload(raw, EnrichmentPayload)
 								    except (TypeError, ValueError) as e:
 								        logger.warning("enrichment LLM async 解析失败: {}", e)
 								        return None
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								async def enrich_memory_after_ingest_async(
 								    db: AsyncSession,
 								    user_id: str,
 								    source_id: str,
 								    llm: Any | None = None,
 								) -> None:
 								    from app.core.config import settings
 								    if not settings.memory_enrichment_enabled:
 								        return
 								    if llm is None:
-												feat(api)!: memory single chain — async MemoryService, strict eval closure

Route all memory ingest/retrieve/enrichment/compaction through async MemoryService.
Remove legacy sync memory implementations (ingest/retrieve/compaction); Celery and
memoir Phase2 call asyncio.run into MemoryService-backed helpers.

Memoir Phase1 batch ingest uses MemoryService.ingest_transcripts_batch; drop chapters.
evidence_bundle_json mirror (Alembic 0015). Evaluation uses snapshot/link-only bundles;
raise EvidenceClosureMissing instead of partial/fallback lineage tiers.

Split memoir state into NarrativeCoverageState and InterviewControlState; delete the
_interview_meta_store adapter layer. Remove rolling-query and recent-fact fallback
settings from config and evidence assembly.

Update judges, docs, tests, and PlaygroundPage alignment.

Made-with: Cursor

											
										
										
											2026-04-30 14:11:46 +08:00
+								        llm = _resolve_llm()
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								    if not llm:
 								        return
-												聊天和回忆录证据检索都走 pgvector，去掉 Postgres FTS/content_tsv，新迁移删掉 content_tsv 列（部署要先 alembic upgrade）。

Embedding 端口增加 is_available()，聊天和回忆录日志用统一方式表示向量是否真能调用。

记忆整理（compaction）支持 Beat 定期扫用户；

事实抽取提示与 subject 归一化，减少同一人多种称呼；

											
										
										
											2026-04-03 11:43:16 +08:00
+								    narrator_name: str | None = None
 								    u_row = await db.get(User, user_id)
 								    if u_row and (u_row.nickname or "").strip():
 								        narrator_name = (u_row.nickname or "").strip()
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								    stmt = (
 								        select(MemoryChunk)
 								        .where(MemoryChunk.source_id == source_id)
 								        .order_by(MemoryChunk.chunk_index.asc())
 								    )
 								    result = await db.execute(stmt)
 								    chunks = list(result.unique().scalars().all())
 								    if not chunks:
 								        return
-												feat: 回忆录证据血缘与内部评测可追溯，顺带对齐本地评测台与 CI

数据库与模型：新增多版迁移（章节证据快照、对话血缘、记忆事实/时间线 lineage 等），把「成稿 ↔ 对话/记忆」的溯源信息落到表结构里。
业务链路：会话与 WS、回忆录/故事流水线、记忆写入与 enrichment 等跟着接上线索与快照；新增章节证据快照与评测侧 EvalTraceService 等模块，方便组评审用的证据包。
内部评测：自动化 run 与手工 memoir 评审共用可追溯证据；rubric/ judge 相关脚本与文档有配套调整。
app-eval-web：Memoir/实验详情里能展开看证据摘要与 evidence_trace（含对话轮次 id）；Vite 代理与 development.sh 注入的 API 端口与当前默认内部评测端口一致，避免改端口后页面连错服务。
工程杂项：GitHub Actions / 仓库说明有更新；各适配器与支付/配额/plan 等多处为小改动或跟随主改动的收尾；新增/扩充了?

											
										
										
											2026-04-08 15:37:09 +08:00
+								    src_row = await db.get(MemorySource, source_id)
 								    lineage_snapshot = _lineage_snapshot_from_source(src_row)
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								    chunk_ids = [c.id for c in chunks]
-												feat(eval): memoir A/B chapter judging and eval-web parity with dialogue

- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas.

- Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error.

- MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings.

- app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS.

- Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014.

- Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.

											
										
										
											2026-04-10 10:23:43 +08:00
+								    chunk_texts = [c.content for c in chunks]
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								    numbered = "\n\n".join(
-												feat(api): 收敛对话与记忆流程边界，引入 LLM 网关与专用服务

- MemoryService 异步路径委托 MemoryIngestService / MemoryRetrievalService；富化派发经 MemoryEnrichmentScheduler
- WebSocket pipeline 经 ChatTurnService 与显式 DTO 编排单轮对话；回忆录片段入队由 MemoirIngestScheduler 封装
- 新增 LlmGateway（LlmUseCase），各 agent、任务与适配器对齐 ports
- 补充 memory 提示适配、runtime 类型、memory-retrieval 文档、ai-touchpoints 说明与扫描脚本及配套测试

Made-with: Cursor

											
										
										
											2026-04-30 09:17:01 +08:00
+								        f"[chunk_id={cid}]\n{txt}"
 								        for cid, txt in zip(chunk_ids, chunk_texts, strict=False)
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								    )
-												feat(eval): memoir A/B chapter judging and eval-web parity with dialogue

- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas.

- Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error.

- MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings.

- app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS.

- Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014.

- Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.

											
										
										
											2026-04-10 10:23:43 +08:00
+								    narrator_label = (narrator_name or "").strip() or "叙述者"
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
-												feat(eval): memoir A/B chapter judging and eval-web parity with dialogue

- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas.

- Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error.

- MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings.

- app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS.

- Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014.

- Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.

											
										
										
											2026-04-10 10:23:43 +08:00
+								    payload = await _run_enrichment_llm_async(llm, numbered, narrator_label)
 								    if payload is None:
 								        return
 								    session_summary_text = str(payload.summary or "").strip()
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								    if session_summary_text:
 								        await create_memory_summary(
 								            db,
 								            user_id=user_id,
 								            summary_type="session",
 								            content=session_summary_text,
 								            source_chunk_ids=chunk_ids,
 								        )
-												feat(eval): memoir A/B chapter judging and eval-web parity with dialogue

- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas.

- Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error.

- MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings.

- app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS.

- Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014.

- Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.

											
										
										
											2026-04-10 10:23:43 +08:00
+								    raw_facts = enrichment_payload_to_fact_dicts(payload)
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								    seen: set[tuple] = set()
 								    for f in raw_facts:
-												聊天和回忆录证据检索都走 pgvector，去掉 Postgres FTS/content_tsv，新迁移删掉 content_tsv 列（部署要先 alembic upgrade）。

Embedding 端口增加 is_available()，聊天和回忆录日志用统一方式表示向量是否真能调用。

记忆整理（compaction）支持 Beat 定期扫用户；

事实抽取提示与 subject 归一化，减少同一人多种称呼；

											
										
										
											2026-04-03 11:43:16 +08:00
+								        key = dedupe_key(f, narrator_name=narrator_name)
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								        if key in seen:
 								            continue
 								        seen.add(key)
 								        scid = f.get("source_chunk_id")
 								        if scid and scid not in chunk_ids:
 								            scid = chunk_ids[0] if chunk_ids else None
-												feat(eval): memoir A/B chapter judging and eval-web parity with dialogue

- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas.

- Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error.

- MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings.

- app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS.

- Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014.

- Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.

											
										
										
											2026-04-10 10:23:43 +08:00
+								        await create_memory_fact(
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								            db,
 								            user_id=user_id,
 								            fact_type=f.get("fact_type") or "event",
-												聊天和回忆录证据检索都走 pgvector，去掉 Postgres FTS/content_tsv，新迁移删掉 content_tsv 列（部署要先 alembic upgrade）。

Embedding 端口增加 is_available()，聊天和回忆录日志用统一方式表示向量是否真能调用。

记忆整理（compaction）支持 Beat 定期扫用户；

事实抽取提示与 subject 归一化，减少同一人多种称呼；

											
										
										
											2026-04-03 11:43:16 +08:00
+								            subject=normalize_subject(f.get("subject"), narrator_name),
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								            predicate=f.get("predicate"),
 								            object_json=normalize_object_json(f.get("object_json")),
 								            confidence=float(f.get("confidence") or 0.75),
 								            source_chunk_id=scid,
 								            status="confirmed",
-												feat: 回忆录证据血缘与内部评测可追溯，顺带对齐本地评测台与 CI

数据库与模型：新增多版迁移（章节证据快照、对话血缘、记忆事实/时间线 lineage 等），把「成稿 ↔ 对话/记忆」的溯源信息落到表结构里。
业务链路：会话与 WS、回忆录/故事流水线、记忆写入与 enrichment 等跟着接上线索与快照；新增章节证据快照与评测侧 EvalTraceService 等模块，方便组评审用的证据包。
内部评测：自动化 run 与手工 memoir 评审共用可追溯证据；rubric/ judge 相关脚本与文档有配套调整。
app-eval-web：Memoir/实验详情里能展开看证据摘要与 evidence_trace（含对话轮次 id）；Vite 代理与 development.sh 注入的 API 端口与当前默认内部评测端口一致，避免改端口后页面连错服务。
工程杂项：GitHub Actions / 仓库说明有更新；各适配器与支付/配额/plan 等多处为小改动或跟随主改动的收尾；新增/扩充了?

											
										
										
											2026-04-08 15:37:09 +08:00
+								            lineage_json=lineage_snapshot,
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								        )