106 lines
2.2 KiB
Python
106 lines
2.2 KiB
Python
from app.features.evaluation.importers.script_json import parse_script_json
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from app.features.evaluation.importers.user_export_markdown import (
|
|
extract_dialogue_turns_from_export_md,
|
|
extract_memoir_chapter_sections_from_export_md,
|
|
extract_source_user_id_from_export_md,
|
|
extract_user_utterances_from_export_md,
|
|
)
|
|
|
|
|
|
def test_parse_script_json_list() -> None:
|
|
u, meta = parse_script_json('["a", "b"]')
|
|
assert u == ["a", "b"]
|
|
assert meta == {}
|
|
|
|
|
|
def test_parse_script_json_object() -> None:
|
|
u, meta = parse_script_json('{"utterances":["x"],"foo":1}')
|
|
assert u == ["x"]
|
|
assert meta == {"foo": 1}
|
|
|
|
|
|
def test_extract_user_lines_from_export_md() -> None:
|
|
md = """
|
|
**用户:**
|
|
|
|
hello
|
|
|
|
**AI:**
|
|
|
|
hi
|
|
"""
|
|
assert extract_user_utterances_from_export_md(md) == ["hello"]
|
|
|
|
|
|
def test_extract_dialogue_turns_from_export_md() -> None:
|
|
md = """
|
|
#### 轮次 1 — x
|
|
|
|
**用户:**
|
|
|
|
u1
|
|
|
|
**AI:**
|
|
|
|
a1
|
|
|
|
#### 轮次 2 — y
|
|
|
|
**用户:**
|
|
|
|
u2
|
|
|
|
**AI:**
|
|
|
|
a2
|
|
"""
|
|
turns = extract_dialogue_turns_from_export_md(md)
|
|
assert turns == [("u1", "a1"), ("u2", "a2")]
|
|
|
|
|
|
def test_extract_dialogue_turns_from_repo_user_export() -> None:
|
|
p = (
|
|
Path(__file__).resolve().parents[1]
|
|
/ "user_exports"
|
|
/ "13701020203_e27fcd97-fefa-43b8-a7a3-3ecd49ebf5f0.md"
|
|
)
|
|
if not p.is_file():
|
|
pytest.skip("user export fixture not present")
|
|
text = p.read_text(encoding="utf-8")
|
|
turns = extract_dialogue_turns_from_export_md(text)
|
|
assert len(turns) >= 5
|
|
assert "你好" in turns[0][0]
|
|
|
|
|
|
def test_extract_source_user_id_from_export_md() -> None:
|
|
md = "- **User ID:** `e27fcd97-fefa-43b8-a7a3-3ecd49ebf5f0`\n"
|
|
assert (
|
|
extract_source_user_id_from_export_md(md)
|
|
== "e27fcd97-fefa-43b8-a7a3-3ecd49ebf5f0"
|
|
)
|
|
|
|
|
|
def test_extract_memoir_chapter_sections_from_export_md() -> None:
|
|
md = """
|
|
## 回忆录章节(生成正文)
|
|
|
|
### First chapter
|
|
|
|
Line a.
|
|
{{IMAGE:foo}}
|
|
|
|
### Second title
|
|
|
|
Line b.
|
|
"""
|
|
sections = extract_memoir_chapter_sections_from_export_md(md)
|
|
assert len(sections) == 2
|
|
assert sections[0][0] == "First chapter"
|
|
assert "Line a." in sections[0][1]
|
|
assert "{{IMAGE" not in sections[0][1]
|
|
assert sections[1][0] == "Second title"
|