feat: align surgery API with schemas and extend client tooling

- Refactor app API and schemas; adjust surgery pipeline, repository, and session manager.

- Improve consumption TSV logging and consumable vision integration; trim voice resolution.

- Add Baidu Face 1:N search script, .env.example entries, and client API integration doc.

- Update demo client, staging checklist, surgery interface doc, and related tests; add sample face image.

Made-with: Cursor
This commit is contained in:
Kevin
2026-04-23 16:09:20 +08:00
parent 0c05463617
commit 69980d8073
20 changed files with 994 additions and 610 deletions

View File

@@ -698,9 +698,8 @@
{ key: "timestamp", label: "time" },
{ key: "item_id", label: "item_id" },
{ key: "item_name", label: "item_name" },
{ key: "quantity", label: "qty" },
{ key: "qty", label: "qty" },
{ key: "doctor_id", label: "doctor" },
{ key: "source", label: "source" },
]);
renderTable("汇总 summary[]", summary, [
{ key: "item_id", label: "item_id" },
@@ -710,10 +709,50 @@
};
// ============================================================
// §4.4 pending-confirmation + 可选 TTS
// §4.4 pending-confirmation(响应内带 Base64 MP3+ 可选自动播报
// ============================================================
let pollTimer = null;
let lastTtsConfirmationId = null;
/** 仅在一次成功播出音频/TTS 后更新,避免未播成功却跳过 */
let lastSpokenConfirmationId = null;
let lastPendingPayload = null;
/** 方案1首次用户手势内播放极短静音解锁自动播放之后待确认 MP3 复用同一 Audio */
const SILENT_UNLOCK_DATA_URL =
"data:audio/wav;base64,UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAE=";
let sharedPromptAudio = null;
let audioPlaybackUnlocked = false;
let lastPromptBlobUrl = null;
function getSharedPromptAudio() {
if (!sharedPromptAudio) {
sharedPromptAudio = new Audio();
sharedPromptAudio.preload = "auto";
sharedPromptAudio.volume = 1;
}
return sharedPromptAudio;
}
document.addEventListener(
"pointerdown",
async () => {
if (audioPlaybackUnlocked) return;
try {
const a = getSharedPromptAudio();
if (lastPromptBlobUrl) {
URL.revokeObjectURL(lastPromptBlobUrl);
lastPromptBlobUrl = null;
}
a.src = SILENT_UNLOCK_DATA_URL;
await a.play();
a.pause();
a.currentTime = 0;
audioPlaybackUnlocked = true;
} catch (e) {
console.warn("[demo-client] 音频自动播放未解锁(可点「播放话术」)", e);
}
},
{ once: true, capture: true, passive: true },
);
function pickZhTtsVoice() {
if (!window.speechSynthesis) return null;
@@ -747,40 +786,63 @@
});
}
/** 优先 GET /prompt-audio 播放百度 MP3失败时 speechSynthesis */
async function playPromptTts(surgeryId, confirmationId, textFallback) {
const path = `/client/surgeries/${surgeryId}/pending-confirmation/${encodeURIComponent(confirmationId)}/prompt-audio`;
const u = baseUrl() + path;
try {
const res = await fetch(u);
if (res.ok) {
const blob = await res.blob();
/** 解码 GET pending 的 prompt_audio_mp3_base64优先用解锁后的单例 Audio失败则回退 speechSynthesis */
async function playPromptAudioBase64(b64, textFallback) {
const t = (textFallback || "").trim();
const raw = typeof b64 === "string" ? b64.replace(/\s+/g, "") : "";
if (raw) {
try {
const bin = atob(raw);
const bytes = new Uint8Array(bin.length);
for (let i = 0; i < bin.length; i++) bytes[i] = bin.charCodeAt(i);
const blob = new Blob([bytes], { type: "audio/mpeg" });
const o = URL.createObjectURL(blob);
return new Promise((resolve, reject) => {
const a = new Audio();
a.preload = "auto";
a.src = o;
a.onended = () => {
const a = getSharedPromptAudio();
if (lastPromptBlobUrl) {
URL.revokeObjectURL(lastPromptBlobUrl);
lastPromptBlobUrl = null;
}
lastPromptBlobUrl = o;
a.pause();
a.currentTime = 0;
a.src = o;
try {
await new Promise((resolve, reject) => {
const cleanupBlob = () => {
if (lastPromptBlobUrl === o) {
URL.revokeObjectURL(o);
lastPromptBlobUrl = null;
}
};
a.onended = () => {
cleanupBlob();
resolve();
};
a.onerror = () => {
cleanupBlob();
reject(new Error("Audio 元素解码/播放失败"));
};
const p = a.play();
if (p && typeof p.catch === "function") {
p.catch((err) => {
cleanupBlob();
reject(err);
});
}
});
return;
} catch (playErr) {
if (lastPromptBlobUrl === o) {
URL.revokeObjectURL(o);
resolve();
};
a.onerror = () => {
URL.revokeObjectURL(o);
reject(new Error("Audio 元素播放失败"));
};
const p = a.play();
if (p && typeof p.catch === "function") {
p.catch((err) => {
URL.revokeObjectURL(o);
reject(err);
});
lastPromptBlobUrl = null;
}
});
console.warn("[demo-client] MP3 play() 被拒或失败,尝试浏览器朗读", playErr);
}
} catch (e) {
console.warn("[demo-client] Base64 MP3 解码失败,尝试浏览器朗读", e);
}
} catch (e) {
console.warn("[demo-client] prompt-audio 不可用,回退浏览器 TTS", e);
}
return speakTextPromise((textFallback || "").trim());
if (t) await speakTextPromise(t);
}
if (window.speechSynthesis) {
@@ -788,9 +850,22 @@
}
$("surgery-id").addEventListener("input", () => {
lastTtsConfirmationId = null;
lastSpokenConfirmationId = null;
lastPendingPayload = null;
});
async function playLastPendingManually() {
const p = lastPendingPayload;
if (!p || !p.confirmation_id) return;
const pt = (p.prompt_text || "").trim();
try {
await playPromptAudioBase64(p.prompt_audio_mp3_base64, pt);
lastSpokenConfirmationId = p.confirmation_id;
} catch (e) {
console.warn("[demo-client] 手动播放失败", e);
}
}
async function fetchPendingOnce() {
const sid = surgeryId();
if (!/^\d{6}$/.test(sid)) return;
@@ -818,6 +893,7 @@
const box = $("pending-render");
if (res.status === 200 && body && body.confirmation_id) {
box.hidden = false;
lastPendingPayload = body;
$("confirmation-id").value = body.confirmation_id;
const opts = (body.options || [])
.map(o => `<div class="option-row"><span>${o.label}</span><span class="muted">${(o.confidence * 100).toFixed(1)}%</span></div>`)
@@ -826,14 +902,27 @@
<div><strong>confirmation_id:</strong> <span class="kv">${body.confirmation_id}</span></div>
<div style="margin-top:4px"><strong>prompt_text:</strong> ${body.prompt_text || ""}</div>
<div style="margin-top:4px"><strong>Top1:</strong> ${body.model_top1_label} <span class="muted">(${(body.model_top1_confidence * 100).toFixed(1)}%)</span></div>
<div style="margin-top:6px"><strong>options:</strong>${opts || '<div class="muted">(无)</div>'}</div>`;
<div style="margin-top:6px"><strong>options:</strong>${opts || '<div class="muted">(无)</div>'}</div>
<div style="margin-top:10px">
<button type="button" class="secondary" id="btn-play-pending">▶ 播放话术MP3 或浏览器朗读)</button>
<span class="small muted" style="margin-left:8px">首次在页面任意处点按可解锁自动播报;仍失败时点此处</span>
</div>`;
const btnPlay = $("btn-play-pending");
if (btnPlay) btnPlay.onclick = () => void playLastPendingManually();
const pt = (body.prompt_text || "").trim();
const ttsOn = $("tts-pending") && $("tts-pending").checked;
if (ttsOn && pt && body.confirmation_id !== lastTtsConfirmationId) {
lastTtsConfirmationId = body.confirmation_id;
void playPromptTts(sid, body.confirmation_id, pt).catch((e) => console.warn(e));
if (ttsOn && pt && body.confirmation_id !== lastSpokenConfirmationId) {
void (async () => {
try {
await playPromptAudioBase64(body.prompt_audio_mp3_base64, pt);
lastSpokenConfirmationId = body.confirmation_id;
} catch (e) {
console.warn("[demo-client] 自动播报未完成(可点「播放话术」)", e);
}
})();
}
} else if (res.status === 404) {
lastPendingPayload = null;
box.hidden = false;
box.innerHTML = '<span class="muted">暂无待确认项。</span>';
} else {
@@ -1020,7 +1109,7 @@
$("btn-resolve").disabled = true;
$("audio-preview").hidden = true;
$("btn-download").style.display = "none";
lastTtsConfirmationId = null;
lastSpokenConfirmationId = null;
$("rec-info").textContent = "已提交,正在拉取下一条待确认…";
$("rec-info").className = "ok small";
await fetchPendingOnce();