feat(eval): memoir A/B chapter judging and eval-web parity with dialogue

- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas.

- Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error.

- MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings.

- app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS.

- Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014.

- Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.
This commit is contained in:
Kevin
2026-04-10 10:23:43 +08:00
parent b0251e5b26
commit ac49bc7f23
59 changed files with 4773 additions and 696 deletions

View File

@@ -0,0 +1,82 @@
"""pg_trgm + composite indexes for memory evidence ILIKE and filters.
Revision ID: 0014_memory_evidence_indexes
Revises: 0013_playground_judge
"""
from typing import Sequence, Union
from alembic import op
revision: str = "0014_memory_evidence_indexes"
down_revision: Union[str, None] = "0013_playground_judge"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm")
op.create_index(
"ix_memory_facts_user_status",
"memory_facts",
["user_id", "status"],
unique=False,
)
op.execute(
"CREATE INDEX IF NOT EXISTS ix_memory_facts_subject_trgm "
"ON memory_facts USING gin (subject gin_trgm_ops)"
)
op.execute(
"CREATE INDEX IF NOT EXISTS ix_memory_facts_predicate_trgm "
"ON memory_facts USING gin (predicate gin_trgm_ops)"
)
op.execute(
"CREATE INDEX IF NOT EXISTS ix_timeline_events_title_trgm "
"ON timeline_events USING gin (title gin_trgm_ops)"
)
op.execute(
"CREATE INDEX IF NOT EXISTS ix_timeline_events_description_trgm "
"ON timeline_events USING gin (description gin_trgm_ops)"
)
op.execute(
"CREATE INDEX IF NOT EXISTS ix_memory_summaries_content_trgm "
"ON memory_summaries USING gin (content gin_trgm_ops)"
)
op.create_index(
"ix_stories_user_status",
"stories",
["user_id", "status"],
unique=False,
)
op.execute(
"CREATE INDEX IF NOT EXISTS ix_stories_title_trgm "
"ON stories USING gin (title gin_trgm_ops)"
)
op.execute(
"CREATE INDEX IF NOT EXISTS ix_stories_summary_trgm "
"ON stories USING gin (summary gin_trgm_ops)"
)
op.execute(
"CREATE INDEX IF NOT EXISTS ix_memory_chunks_embedding_hnsw "
"ON memory_chunks USING hnsw (embedding vector_cosine_ops) "
"WITH (m = 16, ef_construction = 64)"
)
def downgrade() -> None:
op.execute("DROP INDEX IF EXISTS ix_memory_chunks_embedding_hnsw")
op.execute("DROP INDEX IF EXISTS ix_stories_summary_trgm")
op.execute("DROP INDEX IF EXISTS ix_stories_title_trgm")
op.drop_index("ix_stories_user_status", table_name="stories")
op.execute("DROP INDEX IF EXISTS ix_memory_summaries_content_trgm")
op.execute("DROP INDEX IF EXISTS ix_timeline_events_description_trgm")
op.execute("DROP INDEX IF EXISTS ix_timeline_events_title_trgm")
op.execute("DROP INDEX IF EXISTS ix_memory_facts_predicate_trgm")
op.execute("DROP INDEX IF EXISTS ix_memory_facts_subject_trgm")
op.drop_index("ix_memory_facts_user_status", table_name="memory_facts")