feat(eval): server-side replay/phase1 timing + memoir phase1 batch chunking
- Replay and memoir-submit responses include started/finished UTC and elapsed_ms; Phase1 poll exposes Redis-backed submit time and elapsed_ms_since_submit. - Phase1 batch LLM splits segments by memoir_phase1_batch_llm_chunk_size with bisect fallback per chunk; Playground shows server timings. Made-with: Cursor
This commit is contained in:
@@ -1,5 +1,7 @@
|
||||
"""memoir-phase1-ready internal 路由(依赖注入替身)。"""
|
||||
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import pytest
|
||||
from httpx import ASGITransport, AsyncClient
|
||||
|
||||
@@ -54,6 +56,60 @@ async def test_memoir_phase1_ready_returns_bundle(monkeypatch: pytest.MonkeyPatc
|
||||
assert body["pending_segment_ids"] == []
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_memoir_phase1_ready_includes_server_elapsed_fields(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
from fastapi import FastAPI
|
||||
|
||||
monkeypatch.setattr(
|
||||
"app.core.config.settings.internal_eval_api_key",
|
||||
"secret",
|
||||
raising=False,
|
||||
)
|
||||
from app.features.evaluation.deps import get_memoir_readiness_service
|
||||
from app.features.evaluation.router import router
|
||||
|
||||
class _Fake:
|
||||
async def memoir_phase1_ready_for_segments(
|
||||
self, *, conversation_id: str, segment_ids: list[str]
|
||||
) -> MemoirPhase1ReadyOut:
|
||||
return MemoirPhase1ReadyOut(
|
||||
ready=False,
|
||||
checked_segment_ids=list(segment_ids),
|
||||
pending_segment_ids=["pending-1"],
|
||||
job_submitted_at_utc=datetime(
|
||||
2026, 4, 9, 8, 0, 0, tzinfo=timezone.utc
|
||||
),
|
||||
elapsed_ms_since_submit=12_000,
|
||||
durations_ms={"since_playground_submit": 12_000},
|
||||
)
|
||||
|
||||
app = FastAPI()
|
||||
app.include_router(router, prefix="/internal/api/evaluation")
|
||||
|
||||
async def _override_auth():
|
||||
from app.features.evaluation.internal_auth import InternalEvalPrincipal
|
||||
|
||||
return InternalEvalPrincipal()
|
||||
|
||||
app.dependency_overrides[get_internal_eval_principal] = _override_auth
|
||||
app.dependency_overrides[get_memoir_readiness_service] = lambda: _Fake()
|
||||
|
||||
transport = ASGITransport(app=app)
|
||||
async with AsyncClient(transport=transport, base_url="http://t") as client:
|
||||
r = await client.get(
|
||||
"/internal/api/evaluation/sessions/cid-a/memoir-phase1-ready",
|
||||
headers={"X-Internal-Eval-Key": "secret"},
|
||||
params=[("segment_ids", "s1")],
|
||||
)
|
||||
assert r.status_code == 200
|
||||
body = r.json()
|
||||
assert body["elapsed_ms_since_submit"] == 12_000
|
||||
assert body["durations_ms"]["since_playground_submit"] == 12_000
|
||||
assert body["job_submitted_at_utc"] is not None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_memoir_phase1_ready_404_propagates(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
from fastapi import FastAPI
|
||||
|
||||
71
api/tests/evaluation/test_replay_timing_response.py
Normal file
71
api/tests/evaluation/test_replay_timing_response.py
Normal file
@@ -0,0 +1,71 @@
|
||||
"""replay/conversation 响应携带服务端 elapsed 字段。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import pytest
|
||||
from httpx import ASGITransport, AsyncClient
|
||||
|
||||
from app.features.evaluation.internal_auth import get_internal_eval_principal
|
||||
from app.features.evaluation.replay_service import ReplayServerTiming
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_replay_conversation_includes_server_elapsed_ms(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
from fastapi import FastAPI
|
||||
|
||||
monkeypatch.setattr(
|
||||
"app.core.config.settings.internal_eval_api_key",
|
||||
"secret",
|
||||
raising=False,
|
||||
)
|
||||
from app.features.evaluation.deps import get_replay_conversation_service
|
||||
from app.features.evaluation.router import router
|
||||
|
||||
t0 = datetime(2026, 4, 9, 10, 0, 0, tzinfo=timezone.utc)
|
||||
t1 = datetime(2026, 4, 9, 10, 0, 1, tzinfo=timezone.utc)
|
||||
|
||||
class _FakeReplay:
|
||||
async def replay_utterances(self, **kwargs):
|
||||
return (
|
||||
1,
|
||||
["seg-a"],
|
||||
ReplayServerTiming(
|
||||
started_at_utc=t0,
|
||||
finished_at_utc=t1,
|
||||
elapsed_ms=150,
|
||||
),
|
||||
)
|
||||
|
||||
app = FastAPI()
|
||||
app.include_router(router, prefix="/internal/api/evaluation")
|
||||
|
||||
async def _override_auth():
|
||||
from app.features.evaluation.internal_auth import InternalEvalPrincipal
|
||||
|
||||
return InternalEvalPrincipal()
|
||||
|
||||
app.dependency_overrides[get_internal_eval_principal] = _override_auth
|
||||
app.dependency_overrides[get_replay_conversation_service] = lambda: _FakeReplay()
|
||||
|
||||
transport = ASGITransport(app=app)
|
||||
async with AsyncClient(transport=transport, base_url="http://t") as client:
|
||||
r = await client.post(
|
||||
"/internal/api/evaluation/replay/conversation",
|
||||
headers={"X-Internal-Eval-Key": "secret"},
|
||||
json={
|
||||
"conversation_id": "00000000-0000-0000-0000-000000000099",
|
||||
"user_utterances": ["hi"],
|
||||
"flush_memoir_after": False,
|
||||
"skip_memoir": True,
|
||||
"skip_tts": True,
|
||||
},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
body = r.json()
|
||||
assert body["elapsed_ms"] == 150
|
||||
assert body["started_at_utc"] is not None
|
||||
assert body["finished_at_utc"] is not None
|
||||
99
api/tests/test_batch_phase1_chunked.py
Normal file
99
api/tests/test_batch_phase1_chunked.py
Normal file
@@ -0,0 +1,99 @@
|
||||
"""Phase1 批处理 LLM 分块:大量 segment 时拆多次请求并合并 by_id。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from app.agents.memoir.batch_phase1_prep import (
|
||||
BatchPhase1SegmentRow,
|
||||
run_batch_phase1_prep_chunked,
|
||||
)
|
||||
from app.agents.state_schema import MemoirStateSchema
|
||||
|
||||
|
||||
def _state() -> MemoirStateSchema:
|
||||
return MemoirStateSchema(
|
||||
stage_order=["childhood"],
|
||||
current_stage="childhood",
|
||||
covered_stages=[],
|
||||
slots={},
|
||||
)
|
||||
|
||||
|
||||
def test_run_batch_phase1_prep_chunked_splits_95_into_four_calls(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
chunk_lengths: list[int] = []
|
||||
|
||||
def fake_prep(
|
||||
segments: list,
|
||||
state: MemoirStateSchema,
|
||||
llm: object,
|
||||
) -> dict[str, BatchPhase1SegmentRow]:
|
||||
chunk_lengths.append(len(segments))
|
||||
return {
|
||||
str(s.id): BatchPhase1SegmentRow(
|
||||
detected_stage="childhood",
|
||||
slots={},
|
||||
chapter_category_raw="summary",
|
||||
)
|
||||
for s in segments
|
||||
}
|
||||
|
||||
monkeypatch.setattr(
|
||||
"app.agents.memoir.batch_phase1_prep.run_batch_phase1_prep",
|
||||
fake_prep,
|
||||
)
|
||||
segments = [
|
||||
SimpleNamespace(id=f"s{i}", user_input_text="hello") for i in range(95)
|
||||
]
|
||||
by_id = run_batch_phase1_prep_chunked(
|
||||
segments,
|
||||
_state(),
|
||||
MagicMock(),
|
||||
chunk_size=24,
|
||||
)
|
||||
assert len(by_id) == 95
|
||||
assert chunk_lengths == [24, 24, 24, 23]
|
||||
|
||||
|
||||
def test_chunked_bisect_on_value_error(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""块内失败时二分重试,仍能拼回全量 id。"""
|
||||
chunk_lengths: list[int] = []
|
||||
|
||||
def fake_prep(
|
||||
segments: list,
|
||||
state: MemoirStateSchema,
|
||||
llm: object,
|
||||
) -> dict[str, BatchPhase1SegmentRow]:
|
||||
chunk_lengths.append(len(segments))
|
||||
if len(segments) == 4:
|
||||
raise ValueError("simulate length limit")
|
||||
return {
|
||||
str(s.id): BatchPhase1SegmentRow(
|
||||
detected_stage="childhood",
|
||||
slots={},
|
||||
chapter_category_raw="summary",
|
||||
)
|
||||
for s in segments
|
||||
}
|
||||
|
||||
monkeypatch.setattr(
|
||||
"app.agents.memoir.batch_phase1_prep.run_batch_phase1_prep",
|
||||
fake_prep,
|
||||
)
|
||||
segments = [
|
||||
SimpleNamespace(id=f"b{i}", user_input_text="x") for i in range(4)
|
||||
]
|
||||
by_id = run_batch_phase1_prep_chunked(
|
||||
segments,
|
||||
_state(),
|
||||
MagicMock(),
|
||||
chunk_size=100,
|
||||
)
|
||||
assert len(by_id) == 4
|
||||
assert chunk_lengths[0] == 4
|
||||
assert 2 in chunk_lengths
|
||||
@@ -30,6 +30,7 @@ def test_phase1_batch_enabled_by_default() -> None:
|
||||
|
||||
s = Settings()
|
||||
assert s.memoir_phase1_batch_llm_enabled is True
|
||||
assert s.memoir_phase1_batch_llm_chunk_size >= 1
|
||||
|
||||
|
||||
def test_quality_pass_enabled_by_default() -> None:
|
||||
|
||||
@@ -131,6 +131,8 @@ def test_prepare_batches_batch_llm_path_matches_per_segment_skip_logic(
|
||||
segments: list,
|
||||
state: MemoirStateSchema,
|
||||
llm: object,
|
||||
*,
|
||||
chunk_size: int = 24,
|
||||
) -> dict:
|
||||
return {
|
||||
"mix-1": BatchPhase1SegmentRow(
|
||||
@@ -146,7 +148,7 @@ def test_prepare_batches_batch_llm_path_matches_per_segment_skip_logic(
|
||||
}
|
||||
|
||||
monkeypatch.setattr(
|
||||
"app.agents.memoir.orchestrator.run_batch_phase1_prep",
|
||||
"app.agents.memoir.orchestrator.run_batch_phase1_prep_chunked",
|
||||
fake_batch,
|
||||
)
|
||||
orch = MemoirOrchestrator()
|
||||
|
||||
Reference in New Issue
Block a user