feat(evaluation): session catalog, user export import, and eval web UI
- Extend evaluation API: schemas, router, repo, admin and execution services - Improve user export markdown importer; add fixtures and importer tests - Session catalog repo/service updates; internal app wiring and docs - Add internal-eval.sh helper; refresh app-eval-web (App, styles, Vite)
This commit is contained in:
@@ -26,16 +26,25 @@ from app.features.evaluation.models import (
|
||||
EvalRunTurn,
|
||||
EvalVersion,
|
||||
)
|
||||
from app.features.evaluation.presenters import run_out
|
||||
from app.features.evaluation.schemas import (
|
||||
CaseCreate,
|
||||
ExperimentCreate,
|
||||
ImportJsonCaseBody,
|
||||
ImportMarkdownBody,
|
||||
RegressionSetCreate,
|
||||
SessionEvalRunItem,
|
||||
SessionEvalRunsOut,
|
||||
SnapshotFromConversationBody,
|
||||
VersionCreate,
|
||||
)
|
||||
from app.features.evaluation.session_catalog_service import SessionCatalogService
|
||||
from app.features.evaluation.user_export_fixtures import (
|
||||
list_user_export_fixture_names as list_user_export_md_filenames,
|
||||
)
|
||||
from app.features.evaluation.user_export_fixtures import (
|
||||
read_user_export_fixture,
|
||||
)
|
||||
from app.tasks.evaluation_tasks import run_eval_experiment_task
|
||||
|
||||
|
||||
@@ -188,6 +197,23 @@ class EvaluationAdminService:
|
||||
async def list_experiments(self, *, limit: int) -> list[EvalExperiment]:
|
||||
return await eval_repo.list_experiments(self._db, limit=limit)
|
||||
|
||||
async def list_session_evaluation_runs(
|
||||
self, conversation_id: str
|
||||
) -> SessionEvalRunsOut:
|
||||
rows = await eval_repo.list_runs_for_source_conversation(
|
||||
self._db, source_conversation_id=conversation_id
|
||||
)
|
||||
items: list[SessionEvalRunItem] = []
|
||||
for run, _case, exp in rows:
|
||||
turns = await eval_repo.list_turns(self._db, run.id)
|
||||
items.append(
|
||||
SessionEvalRunItem(
|
||||
experiment_name=exp.name,
|
||||
run=run_out(run, turns),
|
||||
)
|
||||
)
|
||||
return SessionEvalRunsOut(conversation_id=conversation_id, items=items)
|
||||
|
||||
async def create_experiment(self, body: ExperimentCreate) -> EvalExperiment:
|
||||
rs = await eval_repo.get_regression_set(self._db, body.regression_set_id)
|
||||
if not rs:
|
||||
@@ -232,7 +258,6 @@ class EvaluationAdminService:
|
||||
async def experiment_stream_snapshot(
|
||||
self, experiment_id: str
|
||||
) -> dict[str, Any] | None:
|
||||
from app.features.evaluation.presenters import run_out
|
||||
from app.features.evaluation.schemas import GateVerdictOut
|
||||
|
||||
exp = await eval_repo.get_experiment(self._db, experiment_id)
|
||||
@@ -250,3 +275,10 @@ class EvaluationAdminService:
|
||||
"runs": run_payload,
|
||||
"gate": GateVerdictOut.model_validate(gv).model_dump() if gv else None,
|
||||
}
|
||||
|
||||
def list_user_export_fixture_names(self) -> list[str]:
|
||||
return list_user_export_md_filenames()
|
||||
|
||||
def load_user_export_fixture_turns(self, filename: str) -> list[tuple[str, str]]:
|
||||
turns, _ = read_user_export_fixture(filename)
|
||||
return turns
|
||||
|
||||
@@ -23,6 +23,17 @@ from app.features.evaluation.models import EvalCase, EvalRun, EvalVersion
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
_MAX_JUDGE_MARKDOWN_CHARS = 20_000
|
||||
_MAX_EVAL_CHAPTERS = 30
|
||||
_MAX_EVAL_STORIES = 40
|
||||
|
||||
|
||||
def _clip_md_for_judge(text: str, max_chars: int = _MAX_JUDGE_MARKDOWN_CHARS) -> str:
|
||||
s = (text or "").strip()
|
||||
if len(s) <= max_chars:
|
||||
return s
|
||||
return f"{s[:max_chars]}\n\n…(已截断供评审)"
|
||||
|
||||
|
||||
def _composite(
|
||||
conv: float | None, mem: float | None, weights: dict[str, Any] | None
|
||||
@@ -149,7 +160,66 @@ async def execute_eval_run(
|
||||
|
||||
memoir_md = simple_memoir_from_transcript(utterances, replies)
|
||||
mem_out = await judge.judge_memoir(memoir_markdown=memoir_md)
|
||||
mem_total = mem_out.total_score if mem_out else None
|
||||
|
||||
chapter_entries: list[dict[str, Any]] = []
|
||||
story_entries: list[dict[str, Any]] = []
|
||||
uid = (case.source_user_id or "").strip()
|
||||
if uid:
|
||||
from app.features.memoir.repo import get_chapters_for_memoir_list
|
||||
from app.features.story.repo import get_stories_for_user
|
||||
|
||||
try:
|
||||
chapters = await get_chapters_for_memoir_list(
|
||||
uid, db, active_only=True, is_new_only=None
|
||||
)
|
||||
for ch in chapters[:_MAX_EVAL_CHAPTERS]:
|
||||
body = (ch.canonical_markdown or "").strip()
|
||||
if not body:
|
||||
continue
|
||||
md = f"# 章节:{ch.title}\n\n{_clip_md_for_judge(body)}"
|
||||
cj = await judge.judge_memoir(memoir_markdown=md)
|
||||
chapter_entries.append(
|
||||
{
|
||||
"id": ch.id,
|
||||
"title": ch.title,
|
||||
"order_index": ch.order_index,
|
||||
"judge": cj.model_dump() if cj else None,
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("eval chapter judges skipped: {}", e)
|
||||
|
||||
try:
|
||||
stories = await get_stories_for_user(db, uid, status="active")
|
||||
for st in stories[:_MAX_EVAL_STORIES]:
|
||||
body = (st.canonical_markdown or "").strip()
|
||||
if not body:
|
||||
continue
|
||||
md = f"# 故事:{st.title}\n\n{_clip_md_for_judge(body)}"
|
||||
sj = await judge.judge_memoir(memoir_markdown=md)
|
||||
story_entries.append(
|
||||
{
|
||||
"id": st.id,
|
||||
"title": st.title,
|
||||
"stage": st.stage,
|
||||
"judge": sj.model_dump() if sj else None,
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("eval story judges skipped: {}", e)
|
||||
|
||||
mem_parts: list[float] = []
|
||||
if mem_out is not None:
|
||||
mem_parts.append(float(mem_out.total_score))
|
||||
for row in chapter_entries:
|
||||
j = row.get("judge")
|
||||
if isinstance(j, dict) and j.get("total_score") is not None:
|
||||
mem_parts.append(float(j["total_score"]))
|
||||
for row in story_entries:
|
||||
j = row.get("judge")
|
||||
if isinstance(j, dict) and j.get("total_score") is not None:
|
||||
mem_parts.append(float(j["total_score"]))
|
||||
mem_total = sum(mem_parts) / len(mem_parts) if mem_parts else None
|
||||
|
||||
exp = await eval_repo.get_experiment(db, run.experiment_id)
|
||||
weights = exp.composite_weights_json if exp else None
|
||||
@@ -158,6 +228,8 @@ async def execute_eval_run(
|
||||
bundle: dict[str, Any] = {
|
||||
"conversation_judge": conv_out.model_dump() if conv_out else None,
|
||||
"memoir_judge": mem_out.model_dump() if mem_out else None,
|
||||
"chapters": chapter_entries,
|
||||
"stories": story_entries,
|
||||
}
|
||||
await eval_repo.update_run(
|
||||
db,
|
||||
|
||||
@@ -17,3 +17,30 @@ def extract_user_utterances_from_export_md(text: str) -> list[str]:
|
||||
if chunk and chunk != "(空)":
|
||||
out.append(chunk)
|
||||
return out
|
||||
|
||||
|
||||
def extract_dialogue_turns_from_export_md(text: str) -> list[tuple[str, str]]:
|
||||
"""
|
||||
从 extract_sql_to_user_md 导出的 Markdown 中按「轮次」提取 (用户, AI) 对,供评测台对照。
|
||||
"""
|
||||
chunks = re.split(r"\n####\s*轮次\s*\d+[^\n]*", text)
|
||||
out: list[tuple[str, str]] = []
|
||||
for chunk in chunks[1:]:
|
||||
user_m = re.search(
|
||||
r"\*\*用户:\*\*\s*\n+(.+?)(?=\n\*\*AI:\*\*)",
|
||||
chunk,
|
||||
flags=re.DOTALL | re.IGNORECASE,
|
||||
)
|
||||
ai_m = re.search(
|
||||
r"\*\*AI:\*\*\s*\n+(.+?)(?=\n####\s|\n###\s+[^#]|\Z)",
|
||||
chunk,
|
||||
flags=re.DOTALL | re.IGNORECASE,
|
||||
)
|
||||
if not user_m:
|
||||
continue
|
||||
u = (user_m.group(1) or "").strip()
|
||||
if not u or u == "(空)":
|
||||
continue
|
||||
a = ((ai_m.group(1) if ai_m else "") or "").strip()
|
||||
out.append((u, a))
|
||||
return out
|
||||
|
||||
@@ -21,5 +21,6 @@ def run_out(row, turns: list) -> EvalRunOut:
|
||||
conversation_score_total=row.conversation_score_total,
|
||||
memoir_score_total=row.memoir_score_total,
|
||||
composite_score=row.composite_score,
|
||||
judge_bundle_json=row.judge_bundle_json,
|
||||
turns=[RunTurnOut.model_validate(t) for t in turns],
|
||||
)
|
||||
|
||||
@@ -204,6 +204,27 @@ async def list_runs_for_experiment(
|
||||
return list(res.scalars().all())
|
||||
|
||||
|
||||
async def list_runs_for_source_conversation(
|
||||
db: AsyncSession,
|
||||
*,
|
||||
source_conversation_id: str,
|
||||
limit: int = 80,
|
||||
) -> list[tuple[EvalRun, EvalCase, EvalExperiment]]:
|
||||
stmt = (
|
||||
select(EvalRun, EvalCase, EvalExperiment)
|
||||
.join(EvalCase, EvalRun.case_id == EvalCase.id)
|
||||
.join(EvalExperiment, EvalRun.experiment_id == EvalExperiment.id)
|
||||
.where(EvalCase.source_conversation_id == source_conversation_id)
|
||||
.order_by(
|
||||
EvalRun.completed_at.desc().nulls_last(),
|
||||
EvalRun.started_at.desc().nulls_last(),
|
||||
)
|
||||
.limit(limit)
|
||||
)
|
||||
res = await db.execute(stmt)
|
||||
return list(res.all())
|
||||
|
||||
|
||||
async def update_run(
|
||||
db: AsyncSession,
|
||||
run: EvalRun,
|
||||
|
||||
@@ -28,10 +28,15 @@ from app.features.evaluation.schemas import (
|
||||
ImportMarkdownBody,
|
||||
RegressionSetCreate,
|
||||
RegressionSetOut,
|
||||
SessionDialogueOut,
|
||||
SessionEvalRunsOut,
|
||||
SessionListItem,
|
||||
SessionListResponse,
|
||||
SessionTranscriptOut,
|
||||
SnapshotFromConversationBody,
|
||||
UserExportFixtureDetailOut,
|
||||
UserExportFixtureListOut,
|
||||
UserExportFixtureTurnOut,
|
||||
VersionCreate,
|
||||
VersionOut,
|
||||
)
|
||||
@@ -40,6 +45,12 @@ from app.features.evaluation.session_catalog_service import SessionCatalogServic
|
||||
router = APIRouter(tags=["internal-evaluation"])
|
||||
|
||||
|
||||
@router.get("/ping", include_in_schema=False)
|
||||
async def eval_api_ping() -> dict[str, str | bool]:
|
||||
"""无鉴权:确认当前进程是 internal_main 且路由已挂载。"""
|
||||
return {"ok": True, "service": "life-echo-internal-eval"}
|
||||
|
||||
|
||||
def _eval_http_exc(
|
||||
e: EvaluationNotFoundError | EvaluationBadRequestError,
|
||||
) -> HTTPException:
|
||||
@@ -123,17 +134,23 @@ async def list_sessions(
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
user_id: str | None = Query(None),
|
||||
q: str | None = Query(None),
|
||||
status: str | None = Query(
|
||||
None,
|
||||
description="按会话 status 过滤,如 active",
|
||||
),
|
||||
):
|
||||
catalog = SessionCatalogService(db)
|
||||
rows, total = await catalog.list_sessions(
|
||||
offset=offset, limit=limit, user_id=user_id, q=q
|
||||
offset=offset, limit=limit, user_id=user_id, q=q, status=status
|
||||
)
|
||||
return SessionListResponse(
|
||||
items=[
|
||||
SessionListItem(
|
||||
id=r.id,
|
||||
user_id=r.user_id,
|
||||
user_phone=r.user_phone,
|
||||
started_at=r.started_at,
|
||||
last_message_at=r.last_message_at,
|
||||
conversation_stage=r.conversation_stage,
|
||||
current_topic=r.current_topic,
|
||||
status=r.status,
|
||||
@@ -144,6 +161,22 @@ async def list_sessions(
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/sessions/{conversation_id}/dialogue",
|
||||
response_model=SessionDialogueOut,
|
||||
)
|
||||
async def get_session_dialogue(
|
||||
conversation_id: str,
|
||||
_auth: InternalEvalAuth,
|
||||
db: Annotated[AsyncSession, Depends(get_async_db)],
|
||||
):
|
||||
catalog = SessionCatalogService(db)
|
||||
out = await catalog.get_session_dialogue(conversation_id)
|
||||
if not out:
|
||||
raise HTTPException(status_code=404, detail="conversation not found")
|
||||
return out
|
||||
|
||||
|
||||
@router.get(
|
||||
"/sessions/{conversation_id}/transcript", response_model=SessionTranscriptOut
|
||||
)
|
||||
@@ -164,6 +197,52 @@ async def get_session_transcript(
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/sessions/{conversation_id}/evaluation-runs",
|
||||
response_model=SessionEvalRunsOut,
|
||||
)
|
||||
async def list_session_evaluation_runs(
|
||||
conversation_id: str,
|
||||
_auth: InternalEvalAuth,
|
||||
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
||||
):
|
||||
return await svc.list_session_evaluation_runs(conversation_id)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/fixtures/user-exports",
|
||||
response_model=UserExportFixtureListOut,
|
||||
)
|
||||
async def list_user_export_fixtures(
|
||||
_auth: InternalEvalAuth,
|
||||
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
||||
):
|
||||
return UserExportFixtureListOut(items=svc.list_user_export_fixture_names())
|
||||
|
||||
|
||||
@router.get(
|
||||
"/fixtures/user-exports/{filename}",
|
||||
response_model=UserExportFixtureDetailOut,
|
||||
)
|
||||
async def get_user_export_fixture(
|
||||
filename: str,
|
||||
_auth: InternalEvalAuth,
|
||||
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
||||
):
|
||||
try:
|
||||
turns = svc.load_user_export_fixture_turns(filename)
|
||||
except ValueError:
|
||||
raise HTTPException(
|
||||
status_code=400, detail="invalid fixture filename"
|
||||
) from None
|
||||
except FileNotFoundError:
|
||||
raise HTTPException(status_code=404, detail="fixture not found") from None
|
||||
return UserExportFixtureDetailOut(
|
||||
filename=filename,
|
||||
turns=[UserExportFixtureTurnOut(user=u, ai=a) for u, a in turns],
|
||||
)
|
||||
|
||||
|
||||
@router.post("/regression-sets/{set_id}/import-markdown", response_model=CaseOut)
|
||||
async def import_markdown_case(
|
||||
set_id: str,
|
||||
|
||||
@@ -88,10 +88,25 @@ class ExperimentOut(BaseModel):
|
||||
completed_at: datetime | None
|
||||
|
||||
|
||||
class SessionDialogueMessageOut(BaseModel):
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
role: str
|
||||
content: str
|
||||
created_at: datetime | None = None
|
||||
|
||||
|
||||
class SessionDialogueOut(BaseModel):
|
||||
conversation_id: str
|
||||
messages: list[SessionDialogueMessageOut]
|
||||
|
||||
|
||||
class SessionListItem(BaseModel):
|
||||
id: str
|
||||
user_id: str
|
||||
user_phone: str | None = Field(default=None, description="users.phone,列表展示用")
|
||||
started_at: datetime | None
|
||||
last_message_at: datetime | None = None
|
||||
conversation_stage: str | None
|
||||
current_topic: str | None
|
||||
status: str | None
|
||||
@@ -109,6 +124,20 @@ class SessionTranscriptOut(BaseModel):
|
||||
user_utterances_from_messages: list[str]
|
||||
|
||||
|
||||
class UserExportFixtureTurnOut(BaseModel):
|
||||
user: str
|
||||
ai: str
|
||||
|
||||
|
||||
class UserExportFixtureListOut(BaseModel):
|
||||
items: list[str]
|
||||
|
||||
|
||||
class UserExportFixtureDetailOut(BaseModel):
|
||||
filename: str
|
||||
turns: list[UserExportFixtureTurnOut]
|
||||
|
||||
|
||||
class SnapshotFromConversationBody(BaseModel):
|
||||
title: str | None = None
|
||||
use_messages: bool = False
|
||||
@@ -157,9 +186,20 @@ class EvalRunOut(BaseModel):
|
||||
conversation_score_total: float | None
|
||||
memoir_score_total: float | None
|
||||
composite_score: float | None
|
||||
judge_bundle_json: dict[str, Any] | None = None
|
||||
turns: list[RunTurnOut] = []
|
||||
|
||||
|
||||
class SessionEvalRunItem(BaseModel):
|
||||
experiment_name: str
|
||||
run: EvalRunOut
|
||||
|
||||
|
||||
class SessionEvalRunsOut(BaseModel):
|
||||
conversation_id: str
|
||||
items: list[SessionEvalRunItem]
|
||||
|
||||
|
||||
class GateVerdictOut(BaseModel):
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ from __future__ import annotations
|
||||
|
||||
from sqlalchemy import func, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.orm import joinedload
|
||||
|
||||
from app.features.conversation.models import Conversation, ConversationMessage, Segment
|
||||
|
||||
@@ -12,12 +13,14 @@ class SessionCatalogRepo:
|
||||
def __init__(self, db: AsyncSession) -> None:
|
||||
self._db = db
|
||||
|
||||
async def count_conversations(self) -> int:
|
||||
async def count_conversations(self, *, status: str | None = None) -> int:
|
||||
q = (
|
||||
select(func.count())
|
||||
.select_from(Conversation)
|
||||
.where(Conversation.deleted_at.is_(None))
|
||||
)
|
||||
if status:
|
||||
q = q.where(Conversation.status == status)
|
||||
r = await self._db.execute(q)
|
||||
return int(r.scalar() or 0)
|
||||
|
||||
@@ -28,11 +31,20 @@ class SessionCatalogRepo:
|
||||
limit: int = 50,
|
||||
user_id: str | None = None,
|
||||
q_text: str | None = None,
|
||||
status: str | None = None,
|
||||
) -> list[Conversation]:
|
||||
stmt = select(Conversation).where(Conversation.deleted_at.is_(None))
|
||||
if user_id:
|
||||
stmt = stmt.where(Conversation.user_id == user_id)
|
||||
stmt = stmt.order_by(Conversation.started_at.desc().nullslast())
|
||||
if status:
|
||||
stmt = stmt.where(Conversation.status == status)
|
||||
if status == "active":
|
||||
stmt = stmt.order_by(
|
||||
Conversation.last_message_at.desc().nullslast(),
|
||||
Conversation.started_at.desc().nullslast(),
|
||||
)
|
||||
else:
|
||||
stmt = stmt.order_by(Conversation.started_at.desc().nullslast())
|
||||
stmt = stmt.offset(offset).limit(limit)
|
||||
# q_text: 简单按 topic 搜索(后续可扩展全文)
|
||||
if q_text:
|
||||
@@ -41,6 +53,7 @@ class SessionCatalogRepo:
|
||||
(Conversation.current_topic.isnot(None))
|
||||
& (Conversation.current_topic.ilike(like))
|
||||
)
|
||||
stmt = stmt.options(joinedload(Conversation.user))
|
||||
res = await self._db.execute(stmt)
|
||||
return list(res.scalars().unique().all())
|
||||
|
||||
|
||||
@@ -6,6 +6,10 @@ from dataclasses import dataclass
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.features.evaluation.schemas import (
|
||||
SessionDialogueMessageOut,
|
||||
SessionDialogueOut,
|
||||
)
|
||||
from app.features.evaluation.session_catalog_repo import SessionCatalogRepo
|
||||
|
||||
|
||||
@@ -13,7 +17,9 @@ from app.features.evaluation.session_catalog_repo import SessionCatalogRepo
|
||||
class SessionSummary:
|
||||
id: str
|
||||
user_id: str
|
||||
user_phone: str | None
|
||||
started_at: object | None
|
||||
last_message_at: object | None
|
||||
conversation_stage: str | None
|
||||
current_topic: str | None
|
||||
status: str | None
|
||||
@@ -38,16 +44,23 @@ class SessionCatalogService:
|
||||
limit: int = 50,
|
||||
user_id: str | None = None,
|
||||
q: str | None = None,
|
||||
status: str | None = None,
|
||||
) -> tuple[list[SessionSummary], int]:
|
||||
total = await self._repo.count_conversations()
|
||||
total = await self._repo.count_conversations(status=status)
|
||||
rows = await self._repo.list_conversations(
|
||||
offset=offset, limit=limit, user_id=user_id, q_text=q
|
||||
offset=offset,
|
||||
limit=limit,
|
||||
user_id=user_id,
|
||||
q_text=q,
|
||||
status=status,
|
||||
)
|
||||
out = [
|
||||
SessionSummary(
|
||||
id=c.id,
|
||||
user_id=c.user_id,
|
||||
user_phone=c.user.phone if c.user is not None else None,
|
||||
started_at=c.started_at,
|
||||
last_message_at=c.last_message_at,
|
||||
conversation_stage=c.conversation_stage,
|
||||
current_topic=c.current_topic,
|
||||
status=c.status,
|
||||
@@ -56,6 +69,25 @@ class SessionCatalogService:
|
||||
]
|
||||
return out, total
|
||||
|
||||
async def get_session_dialogue(
|
||||
self, conversation_id: str
|
||||
) -> SessionDialogueOut | None:
|
||||
c = await self._repo.get_conversation(conversation_id)
|
||||
if not c or c.deleted_at:
|
||||
return None
|
||||
msgs = await self._repo.list_messages_for_conversation(conversation_id)
|
||||
return SessionDialogueOut(
|
||||
conversation_id=conversation_id,
|
||||
messages=[
|
||||
SessionDialogueMessageOut(
|
||||
role=m.role,
|
||||
content=m.content,
|
||||
created_at=m.created_at,
|
||||
)
|
||||
for m in msgs
|
||||
],
|
||||
)
|
||||
|
||||
async def get_transcript(self, conversation_id: str) -> SessionTranscript | None:
|
||||
c = await self._repo.get_conversation(conversation_id)
|
||||
if not c or c.deleted_at:
|
||||
|
||||
36
api/app/features/evaluation/user_export_fixtures.py
Normal file
36
api/app/features/evaluation/user_export_fixtures.py
Normal file
@@ -0,0 +1,36 @@
|
||||
"""只读加载 api/tests/user_exports/*.md,供内部评测台对照(非生产数据路径)。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
from app.features.evaluation.importers.user_export_markdown import (
|
||||
extract_dialogue_turns_from_export_md,
|
||||
)
|
||||
|
||||
_SAFE_MD = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_.-]*\.md$")
|
||||
|
||||
|
||||
def user_exports_dir() -> Path:
|
||||
# api/app/features/evaluation/user_export_fixtures.py → api/
|
||||
return Path(__file__).resolve().parents[3] / "tests" / "user_exports"
|
||||
|
||||
|
||||
def list_user_export_fixture_names() -> list[str]:
|
||||
root = user_exports_dir()
|
||||
if not root.is_dir():
|
||||
return []
|
||||
return sorted(p.name for p in root.glob("*.md"))
|
||||
|
||||
|
||||
def read_user_export_fixture(filename: str) -> tuple[list[tuple[str, str]], str]:
|
||||
if not _SAFE_MD.match(filename):
|
||||
raise ValueError("invalid fixture filename")
|
||||
root = user_exports_dir()
|
||||
path = (root / filename).resolve()
|
||||
if path.parent != root.resolve() or not path.is_file():
|
||||
raise FileNotFoundError(filename)
|
||||
text = path.read_text(encoding="utf-8")
|
||||
turns = extract_dialogue_turns_from_export_md(text)
|
||||
return turns, text
|
||||
@@ -16,6 +16,7 @@ setup_logging()
|
||||
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import HTMLResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
|
||||
from app.core.config import settings
|
||||
@@ -53,6 +54,27 @@ internal_app.add_middleware(
|
||||
register_exception_handlers(internal_app)
|
||||
|
||||
|
||||
@internal_app.get("/", include_in_schema=False, response_class=HTMLResponse)
|
||||
async def internal_eval_landing():
|
||||
"""浏览器打开 :8001 根路径时提示:界面在 Vite(默认 5174),本进程仅为 API。"""
|
||||
docs_hint = (
|
||||
'<p><a href="/docs">OpenAPI 文档 /docs</a></p>'
|
||||
if settings.internal_eval_enable_docs
|
||||
else "<p>(未开启文档;设置 INTERNAL_EVAL_ENABLE_DOCS=1 后可访问 /docs)</p>"
|
||||
)
|
||||
return f"""<!DOCTYPE html>
|
||||
<html lang="zh-CN"><head><meta charset="utf-8"/><title>内部评测 API</title></head>
|
||||
<body style="font-family:system-ui,sans-serif;max-width:44rem;margin:2rem auto;line-height:1.5">
|
||||
<h1>Life Echo · 内部回归评测 API</h1>
|
||||
<p>这里是 <strong>HTTP API</strong>(端口由启动命令决定),<strong>没有内置网页</strong>。
|
||||
浏览「回归评测台」请在仓库执行 <code>./internal-eval.sh</code> 或 <code>cd app-eval-web && npm run dev</code>,
|
||||
在终端里打开 Vite 给出的地址(一般为 <strong>http://127.0.0.1:5174/</strong>)。</p>
|
||||
<p>健康检查:<a href="/health">/health</a></p>
|
||||
{docs_hint}
|
||||
<p>会话与对比接口前缀:<code>/internal/api/evaluation/</code></p>
|
||||
</body></html>"""
|
||||
|
||||
|
||||
@internal_app.on_event("startup")
|
||||
async def _startup():
|
||||
import asyncio
|
||||
|
||||
@@ -4,6 +4,24 @@
|
||||
|
||||
## 启动
|
||||
|
||||
一键脚本 `internal-eval.sh` 与 `development.sh` **不是重复各启一套主站**:
|
||||
|
||||
| | `development.sh` | `internal-eval.sh` |
|
||||
|---|------------------|---------------------|
|
||||
| HTTP | 主站 `main:app`(默认 **8000**) | 仅评测 `internal_app`(默认 **8001**) |
|
||||
| Celery | 会起一个 worker | 默认也会起一个 worker(可与下面「瘦启动」二选一) |
|
||||
|
||||
评测分析只需要 **8001 上的 internal API**;若你已经在跑 `development.sh`(DB/Redis/主站/已有 Celery),不必再起第二份基础设施和 worker:
|
||||
|
||||
```bash
|
||||
cd api
|
||||
chmod +x internal-eval.sh
|
||||
# 确保 .env.development 或 .env 里有 INTERNAL_EVAL_API_KEY
|
||||
SKIP_INFRA=1 SKIP_INSTALL=1 SKIP_CELERY=1 ./internal-eval.sh # 推荐:只多开 8001
|
||||
```
|
||||
|
||||
全新机器、只跑评测栈时可直接 `./internal-eval.sh`(会起 docker、`uv sync`、迁移、8001 + Celery)。**默认会起 `app-eval-web`,并用 Vite `--open` 尝试打开浏览器**(`http://127.0.0.1:5174/`)。不要前端时设 `START_EVAL_WEB=0`;只要前端但不要弹窗时设 `OPEN_EVAL_WEB=0`。
|
||||
|
||||
数据库与主服务共用;需配置环境变量后启动专用进程:
|
||||
|
||||
```bash
|
||||
|
||||
374
api/internal-eval.sh
Executable file
374
api/internal-eval.sh
Executable file
@@ -0,0 +1,374 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# 仅启动「内部回归评测」栈(app/internal_main.py),不启动主站 consumer API。
|
||||
#
|
||||
# 与 development.sh 的区别:
|
||||
# - development.sh:main:app + Celery(通常 :8000),面向 App/主业务。
|
||||
# - internal-eval.sh:internal_app + Celery(:8001),仅评测/回放/GLM 打分/门禁。
|
||||
# 二者共用数据库与 Redis;不会拉起第二份 main:app。
|
||||
#
|
||||
# 若本机已在跑 ./development.sh,只想多开评测 HTTP(推荐,避免第二套 worker/docker):
|
||||
# SKIP_INFRA=1 SKIP_INSTALL=1 SKIP_CELERY=1 ./internal-eval.sh
|
||||
#
|
||||
# 用法:cd api && ./internal-eval.sh
|
||||
# 可选环境变量:
|
||||
# SKIP_INFRA=1 已起好 Postgres/Redis 时跳过 docker compose
|
||||
# SKIP_INSTALL=1 跳过 uv sync
|
||||
# SKIP_CELERY=1 仅起内部 API(别处已有 Celery worker 时)
|
||||
# START_EVAL_WEB=0 不起评测前端(默认会起 app-eval-web,需已 npm install)
|
||||
# OPEN_EVAL_WEB=0 起前端但不自动打开浏览器(默认 Vite --open)
|
||||
# EVAL_WEB_PORT 打印提示用,默认 5174(与 app-eval-web/vite.config.ts 一致)
|
||||
# INTERNAL_EVAL_PORT 默认 8001
|
||||
# CELERY_POOL 默认 solo(与 development.sh 一致)
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m'
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="$(cd "${ROOT_DIR}/.." && pwd)"
|
||||
EVAL_WEB_DIR="${REPO_ROOT}/app-eval-web"
|
||||
|
||||
VENV_DIR="${ROOT_DIR}/.venv"
|
||||
UVICORN_BIN="${VENV_DIR}/bin/uvicorn"
|
||||
CELERY_BIN="${VENV_DIR}/bin/celery"
|
||||
|
||||
INTERNAL_EVAL_HOST="${INTERNAL_EVAL_HOST:-0.0.0.0}"
|
||||
INTERNAL_EVAL_PORT="${INTERNAL_EVAL_PORT:-8001}"
|
||||
CELERY_POOL="${CELERY_POOL:-solo}"
|
||||
SKIP_INSTALL="${SKIP_INSTALL:-0}"
|
||||
SKIP_INFRA="${SKIP_INFRA:-0}"
|
||||
SKIP_CELERY="${SKIP_CELERY:-0}"
|
||||
START_EVAL_WEB="${START_EVAL_WEB:-1}"
|
||||
OPEN_EVAL_WEB="${OPEN_EVAL_WEB:-1}"
|
||||
EVAL_WEB_PORT="${EVAL_WEB_PORT:-5174}"
|
||||
SHUTDOWN_TIMEOUT="${SHUTDOWN_TIMEOUT:-12}"
|
||||
|
||||
API_PID=""
|
||||
CELERY_PID=""
|
||||
EVAL_WEB_PID=""
|
||||
CLEANED_UP=0
|
||||
INFRA_STARTED=0
|
||||
|
||||
print_header() {
|
||||
echo -e "\n${BLUE}========================================${NC}"
|
||||
echo -e "${BLUE}$1${NC}"
|
||||
echo -e "${BLUE}========================================${NC}"
|
||||
}
|
||||
|
||||
print_ok() {
|
||||
echo -e "${GREEN}✓ $1${NC}"
|
||||
}
|
||||
|
||||
print_warn() {
|
||||
echo -e "${YELLOW}⚠ $1${NC}"
|
||||
}
|
||||
|
||||
print_err() {
|
||||
echo -e "${RED}✗ $1${NC}"
|
||||
}
|
||||
|
||||
is_pid_alive() {
|
||||
local pid="$1"
|
||||
[[ -n "${pid}" ]] && kill -0 "${pid}" 2>/dev/null
|
||||
}
|
||||
|
||||
wait_pid_exit() {
|
||||
local pid="$1"
|
||||
local timeout="$2"
|
||||
local waited=0
|
||||
|
||||
while is_pid_alive "${pid}"; do
|
||||
if (( waited >= timeout )); then
|
||||
return 1
|
||||
fi
|
||||
sleep 1
|
||||
waited=$((waited + 1))
|
||||
done
|
||||
return 0
|
||||
}
|
||||
|
||||
kill_children_term() {
|
||||
local pid="$1"
|
||||
local children
|
||||
|
||||
children="$(pgrep -P "${pid}" 2>/dev/null || true)"
|
||||
if [[ -n "${children}" ]]; then
|
||||
while IFS= read -r child_pid; do
|
||||
[[ -z "${child_pid}" ]] && continue
|
||||
kill_children_term "${child_pid}"
|
||||
kill -TERM "${child_pid}" 2>/dev/null || true
|
||||
done <<< "${children}"
|
||||
fi
|
||||
}
|
||||
|
||||
stop_process_gracefully() {
|
||||
local name="$1"
|
||||
local pid="$2"
|
||||
local timeout="${3:-10}"
|
||||
|
||||
if ! is_pid_alive "${pid}"; then
|
||||
print_ok "${name} 已退出"
|
||||
return 0
|
||||
fi
|
||||
|
||||
print_warn "正在停止 ${name}(PID: ${pid})..."
|
||||
kill_children_term "${pid}"
|
||||
kill -TERM "${pid}" 2>/dev/null || true
|
||||
|
||||
if wait_pid_exit "${pid}" "${timeout}"; then
|
||||
print_ok "${name} 已停止"
|
||||
return 0
|
||||
fi
|
||||
|
||||
print_warn "${name} 在 ${timeout}s 内未退出,准备强制结束"
|
||||
kill -KILL "${pid}" 2>/dev/null || true
|
||||
wait_pid_exit "${pid}" 3 || true
|
||||
print_ok "${name} 已强制结束"
|
||||
}
|
||||
|
||||
cleanup() {
|
||||
if [[ "${CLEANED_UP}" == "1" ]]; then
|
||||
return 0
|
||||
fi
|
||||
CLEANED_UP=1
|
||||
|
||||
print_header "正在关闭内部评测环境"
|
||||
|
||||
if is_pid_alive "${EVAL_WEB_PID}"; then
|
||||
stop_process_gracefully "eval-web (Vite)" "${EVAL_WEB_PID}" "${SHUTDOWN_TIMEOUT}"
|
||||
fi
|
||||
|
||||
if is_pid_alive "${API_PID}"; then
|
||||
stop_process_gracefully "Internal Eval API" "${API_PID}" "${SHUTDOWN_TIMEOUT}"
|
||||
fi
|
||||
|
||||
if is_pid_alive "${CELERY_PID}"; then
|
||||
stop_process_gracefully "Celery" "${CELERY_PID}" "${SHUTDOWN_TIMEOUT}"
|
||||
fi
|
||||
|
||||
if [[ "${INFRA_STARTED}" == "1" ]]; then
|
||||
print_warn "正在停止 PostgreSQL / Redis 容器..."
|
||||
(
|
||||
cd "${ROOT_DIR}" && docker compose -f docker-compose.dev.yml stop
|
||||
) >/dev/null 2>&1 || true
|
||||
print_ok "PostgreSQL/Redis 容器已停止"
|
||||
fi
|
||||
}
|
||||
|
||||
require_cmd() {
|
||||
local cmd="$1"
|
||||
if ! command -v "${cmd}" >/dev/null 2>&1; then
|
||||
print_err "未找到命令: ${cmd}"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
start_infra() {
|
||||
print_header "启动 PostgreSQL 和 Redis"
|
||||
cd "${ROOT_DIR}"
|
||||
docker compose -f docker-compose.dev.yml up -d
|
||||
INFRA_STARTED=1
|
||||
print_ok "基础设施已就绪"
|
||||
}
|
||||
|
||||
wait_postgres_ready() {
|
||||
local retries=30
|
||||
local i=0
|
||||
print_header "等待 PostgreSQL 就绪"
|
||||
cd "${ROOT_DIR}"
|
||||
while (( i < retries )); do
|
||||
if docker compose -f docker-compose.dev.yml exec -T postgres \
|
||||
pg_isready -U postgres >/dev/null 2>&1; then
|
||||
print_ok "PostgreSQL 已就绪"
|
||||
return 0
|
||||
fi
|
||||
sleep 1
|
||||
i=$((i + 1))
|
||||
done
|
||||
print_warn "PostgreSQL 在 ${retries}s 内未就绪,迁移可能失败"
|
||||
return 1
|
||||
}
|
||||
|
||||
ensure_venv() {
|
||||
print_header "检查 Python 虚拟环境"
|
||||
|
||||
if [[ ! -d "${VENV_DIR}" ]]; then
|
||||
print_warn ".venv 不存在,正在创建"
|
||||
uv venv "${VENV_DIR}"
|
||||
fi
|
||||
|
||||
if [[ "${SKIP_INSTALL}" != "1" ]]; then
|
||||
print_header "安装 Python 依赖"
|
||||
uv sync
|
||||
print_ok "依赖安装完成"
|
||||
else
|
||||
print_warn "已跳过依赖安装 (SKIP_INSTALL=1)"
|
||||
fi
|
||||
}
|
||||
|
||||
ensure_dotenv_from_development() {
|
||||
print_header "准备本地 .env"
|
||||
if [[ -f "${ROOT_DIR}/.env.development" ]]; then
|
||||
cp "${ROOT_DIR}/.env.development" "${ROOT_DIR}/.env"
|
||||
print_ok "已从 .env.development 同步为 .env"
|
||||
return 0
|
||||
fi
|
||||
print_warn "未找到 .env.development,将使用现有 .env(若存在)"
|
||||
}
|
||||
|
||||
check_internal_eval_key() {
|
||||
print_header "检查内部评测密钥"
|
||||
if [[ -f "${ROOT_DIR}/.env" ]] && grep -qE '^INTERNAL_EVAL_API_KEY=.+' "${ROOT_DIR}/.env" 2>/dev/null; then
|
||||
print_ok "已在 .env 中配置 INTERNAL_EVAL_API_KEY"
|
||||
return 0
|
||||
fi
|
||||
if [[ -n "${INTERNAL_EVAL_API_KEY:-}" ]]; then
|
||||
print_ok "已从环境变量传入 INTERNAL_EVAL_API_KEY"
|
||||
return 0
|
||||
fi
|
||||
print_err "未配置 INTERNAL_EVAL_API_KEY:内部评测接口将返回 503。"
|
||||
print_err "请在 api/.env.development(或 .env)中加入一行,例如:"
|
||||
print_err " INTERNAL_EVAL_API_KEY=\"your-long-random-secret\""
|
||||
exit 1
|
||||
}
|
||||
|
||||
check_env_file() {
|
||||
print_header "检查环境变量文件"
|
||||
if [[ ! -f "${ROOT_DIR}/.env" ]]; then
|
||||
print_warn "未找到 .env,应用可能因缺少配置启动失败"
|
||||
else
|
||||
print_ok "检测到 .env"
|
||||
fi
|
||||
}
|
||||
|
||||
run_migrations() {
|
||||
print_header "执行数据库迁移"
|
||||
cd "${ROOT_DIR}"
|
||||
if uv run alembic upgrade head 2>/dev/null; then
|
||||
print_ok "Alembic 迁移已就绪"
|
||||
else
|
||||
print_warn "Alembic 迁移失败(可能数据库未启动或 DATABASE_URL 未配置),应用启动可能失败"
|
||||
fi
|
||||
}
|
||||
|
||||
start_eval_web() {
|
||||
print_header "启动 app-eval-web (Vite)"
|
||||
if [[ ! -d "${EVAL_WEB_DIR}" ]]; then
|
||||
print_err "未找到 ${EVAL_WEB_DIR}"
|
||||
exit 1
|
||||
fi
|
||||
if [[ ! -d "${EVAL_WEB_DIR}/node_modules" ]]; then
|
||||
print_err "请先执行: cd app-eval-web && npm install"
|
||||
exit 1
|
||||
fi
|
||||
require_cmd "npm"
|
||||
|
||||
local api_key="${INTERNAL_EVAL_API_KEY:-}"
|
||||
if [[ -z "${api_key}" ]] && [[ -f "${ROOT_DIR}/.env" ]]; then
|
||||
api_key="$(grep -E '^INTERNAL_EVAL_API_KEY=' "${ROOT_DIR}/.env" | head -1 | cut -d= -f2- | tr -d '\r' | sed 's/^"//;s/"$//')"
|
||||
fi
|
||||
if [[ -z "${api_key}" ]]; then
|
||||
print_err "无法解析 INTERNAL_EVAL_API_KEY,无法为 Vite 注入 VITE_EVAL_API_KEY"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local vite_extra=()
|
||||
if [[ "${OPEN_EVAL_WEB}" == "1" ]]; then
|
||||
vite_extra+=(--open)
|
||||
fi
|
||||
|
||||
# 不设 VITE_EVAL_API_BASE:前端走 Vite proxy(app-eval-web/vite.config.ts)转发到 :${INTERNAL_EVAL_PORT},减少直连/CORS/误指主站问题。
|
||||
# 若需直连远端 API:export VITE_EVAL_API_BASE=https://... 后再手动 npm run dev。
|
||||
(
|
||||
cd "${EVAL_WEB_DIR}"
|
||||
VITE_EVAL_API_KEY="${api_key}" \
|
||||
npm run dev -- --host 127.0.0.1 --port "${EVAL_WEB_PORT}" "${vite_extra[@]}"
|
||||
) &
|
||||
EVAL_WEB_PID=$!
|
||||
print_ok "eval-web 已启动 (PID: ${EVAL_WEB_PID}) → http://127.0.0.1:${EVAL_WEB_PORT}/"
|
||||
}
|
||||
|
||||
start_services() {
|
||||
print_header "启动 Internal Eval API 与 Celery"
|
||||
cd "${ROOT_DIR}"
|
||||
|
||||
if command -v lsof >/dev/null 2>&1; then
|
||||
if lsof -nP -iTCP:"${INTERNAL_EVAL_PORT}" -sTCP:LISTEN >/dev/null 2>&1; then
|
||||
print_err "端口 ${INTERNAL_EVAL_PORT} 已被占用,无法启动内部评测 Uvicorn。"
|
||||
print_err "请先结束占用进程,或设置 INTERNAL_EVAL_PORT 为其他端口"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# 与主开发脚本一致:评审/生产 LLM 等从 .env 读取;文档默认关闭,本地可 export INTERNAL_EVAL_ENABLE_DOCS=1
|
||||
"${UVICORN_BIN}" app.internal_main:internal_app --reload \
|
||||
--reload-exclude 'alembic/**' \
|
||||
--reload-exclude 'alembic.ini' \
|
||||
--host "${INTERNAL_EVAL_HOST}" --port "${INTERNAL_EVAL_PORT}" &
|
||||
API_PID=$!
|
||||
print_ok "Internal Eval API 已启动 (PID: ${API_PID})"
|
||||
|
||||
if [[ "${SKIP_CELERY}" != "1" ]]; then
|
||||
"${CELERY_BIN}" -A app.tasks.celery_app worker --loglevel=info --pool="${CELERY_POOL}" &
|
||||
CELERY_PID=$!
|
||||
print_ok "Celery 已启动 (PID: ${CELERY_PID})"
|
||||
else
|
||||
print_warn "已跳过 Celery (SKIP_CELERY=1);实验 run 接口需要 worker 才能执行"
|
||||
fi
|
||||
|
||||
if [[ "${START_EVAL_WEB}" == "1" ]]; then
|
||||
start_eval_web
|
||||
fi
|
||||
|
||||
echo
|
||||
echo -e "${GREEN}内部评测环境启动完成${NC}"
|
||||
echo "【请用浏览器打开】评测 Web UI: http://127.0.0.1:${EVAL_WEB_PORT}/ (/internal 会代理到 API :${INTERNAL_EVAL_PORT})"
|
||||
echo "内部评测 API: http://127.0.0.1:${INTERNAL_EVAL_PORT}/health"
|
||||
echo "评测 REST 前缀: http://127.0.0.1:${INTERNAL_EVAL_PORT}/internal/api/evaluation"
|
||||
if [[ "${INTERNAL_EVAL_ENABLE_DOCS:-}" == "1" ]] || grep -qE '^INTERNAL_EVAL_ENABLE_DOCS=true' "${ROOT_DIR}/.env" 2>/dev/null; then
|
||||
echo "API 文档: http://127.0.0.1:${INTERNAL_EVAL_PORT}/docs"
|
||||
fi
|
||||
echo "说明文档: api/docs/internal-eval.md"
|
||||
echo "按 Ctrl+C 停止所有进程"
|
||||
}
|
||||
|
||||
main() {
|
||||
print_header "Life Echo 内部回归评测 — 一键启动"
|
||||
echo -e "${BLUE}说明:${NC} 不启动主站 API(main:app / 默认 8000);仅启动 internal_main(:${INTERNAL_EVAL_PORT})。"
|
||||
echo ""
|
||||
|
||||
require_cmd "uv"
|
||||
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
if [[ "${SKIP_INFRA}" != "1" ]]; then
|
||||
require_cmd "docker"
|
||||
start_infra
|
||||
wait_postgres_ready || true
|
||||
else
|
||||
print_warn "已跳过 docker 基础设施 (SKIP_INFRA=1)"
|
||||
fi
|
||||
|
||||
ensure_venv
|
||||
ensure_dotenv_from_development
|
||||
check_env_file
|
||||
check_internal_eval_key
|
||||
run_migrations
|
||||
start_services
|
||||
|
||||
local wait_pids=("${API_PID}")
|
||||
if [[ "${SKIP_CELERY}" != "1" ]]; then
|
||||
wait_pids+=("${CELERY_PID}")
|
||||
fi
|
||||
if [[ "${START_EVAL_WEB}" == "1" ]] && [[ -n "${EVAL_WEB_PID}" ]]; then
|
||||
wait_pids+=("${EVAL_WEB_PID}")
|
||||
fi
|
||||
wait "${wait_pids[@]}"
|
||||
}
|
||||
|
||||
main "$@"
|
||||
@@ -1,5 +1,10 @@
|
||||
from app.features.evaluation.importers.script_json import parse_script_json
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from app.features.evaluation.importers.user_export_markdown import (
|
||||
extract_dialogue_turns_from_export_md,
|
||||
extract_user_utterances_from_export_md,
|
||||
)
|
||||
|
||||
@@ -27,3 +32,43 @@ hello
|
||||
hi
|
||||
"""
|
||||
assert extract_user_utterances_from_export_md(md) == ["hello"]
|
||||
|
||||
|
||||
def test_extract_dialogue_turns_from_export_md() -> None:
|
||||
md = """
|
||||
#### 轮次 1 — x
|
||||
|
||||
**用户:**
|
||||
|
||||
u1
|
||||
|
||||
**AI:**
|
||||
|
||||
a1
|
||||
|
||||
#### 轮次 2 — y
|
||||
|
||||
**用户:**
|
||||
|
||||
u2
|
||||
|
||||
**AI:**
|
||||
|
||||
a2
|
||||
"""
|
||||
turns = extract_dialogue_turns_from_export_md(md)
|
||||
assert turns == [("u1", "a1"), ("u2", "a2")]
|
||||
|
||||
|
||||
def test_extract_dialogue_turns_from_repo_user_export() -> None:
|
||||
p = (
|
||||
Path(__file__).resolve().parents[1]
|
||||
/ "user_exports"
|
||||
/ "13701020203_e27fcd97-fefa-43b8-a7a3-3ecd49ebf5f0.md"
|
||||
)
|
||||
if not p.is_file():
|
||||
pytest.skip("user export fixture not present")
|
||||
text = p.read_text(encoding="utf-8")
|
||||
turns = extract_dialogue_turns_from_export_md(text)
|
||||
assert len(turns) >= 5
|
||||
assert "你好" in turns[0][0]
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,8 +1,8 @@
|
||||
:root {
|
||||
font-family: system-ui, sans-serif;
|
||||
line-height: 1.5;
|
||||
color: #1a1a1a;
|
||||
background: #f5f5f7;
|
||||
color: #e6edf3;
|
||||
background: #0f1419;
|
||||
}
|
||||
* {
|
||||
box-sizing: border-box;
|
||||
|
||||
@@ -1,9 +1,19 @@
|
||||
import react from "@vitejs/plugin-react";
|
||||
import { defineConfig } from "vite";
|
||||
|
||||
/**
|
||||
* 开发时可将 VITE_EVAL_API_BASE 留空,前端请求 /internal/... 由 Vite 转发到 8001,
|
||||
* 避免连错端口、CORS 或浏览器策略导致看似 404。
|
||||
*/
|
||||
export default defineConfig({
|
||||
plugins: [react()],
|
||||
server: {
|
||||
port: 5174,
|
||||
proxy: {
|
||||
"/internal": {
|
||||
target: "http://127.0.0.1:8001",
|
||||
changeOrigin: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user