添加API服务模块

This commit is contained in:
iammm0
2026-01-07 11:56:46 +08:00
parent 9ca3a3a89a
commit c634cb2daa
8 changed files with 241 additions and 0 deletions

11
api/services/__init__.py Normal file
View File

@@ -0,0 +1,11 @@
"""
服务模块
"""
from .asr_service import asr_service
from .tts_service import tts_service
__all__ = [
"asr_service",
"tts_service",
]

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,64 @@
"""
ASR 服务:语音转文字
"""
import os
import base64
from typing import Optional, Any, Coroutine
from openai import OpenAI
class ASRService:
"""ASR 服务(语音转文字)"""
def __init__(self):
api_key = os.getenv("OPENAI_API_KEY", "")
if api_key:
self.client = OpenAI(api_key=api_key)
else:
self.client = None
async def transcribe(self, audio_base64: str) -> str | None:
"""
转写音频为文字
Args:
audio_base64: Base64 编码的音频数据
Returns:
转写文本
"""
if not self.client:
# 如果没有配置 API Key返回模拟数据
return "这是模拟的转写文本(请配置 OPENAI_API_KEY 以使用实际 ASR 功能)"
try:
# 解码 Base64 音频
audio_bytes = base64.b64decode(audio_base64)
# 保存临时文件
import tempfile
with tempfile.NamedTemporaryFile(suffix=".m4a", delete=False) as tmp_file:
tmp_file.write(audio_bytes)
tmp_file_path = tmp_file.name
try:
# 调用 OpenAI Whisper API
with open(tmp_file_path, "rb") as audio_file:
transcript = self.client.audio.transcriptions.create(
model="whisper-1",
file=audio_file,
language="zh" # 中文
)
return transcript.text
finally:
# 清理临时文件
import os
if os.path.exists(tmp_file_path):
os.remove(tmp_file_path)
except Exception as e:
# 出错时返回错误信息
return f"转写失败: {str(e)}"
# 全局实例
asr_service = ASRService()

107
api/services/pdf_service.py Normal file
View File

@@ -0,0 +1,107 @@
"""
PDF 生成服务
"""
from typing import List
from reportlab.lib.pagesizes import letter, A4
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.pdfbase.cidfonts import UnicodeCIDFont
from io import BytesIO
import os
class PDFService:
"""PDF 生成服务"""
def __init__(self):
# 尝试注册中文字体
try:
# 使用系统字体或 ReportLab 内置的中文字体
# 如果没有中文字体文件,使用 UnicodeCIDFont
pdfmetrics.registerFont(UnicodeCIDFont('STSong-Light'))
self.chinese_font = 'STSong-Light'
except Exception:
# 如果注册失败,使用默认字体(可能不支持中文)
self.chinese_font = 'Helvetica'
async def generate_pdf(self, book, chapters: List) -> bytes:
"""
生成 PDF
Args:
book: 回忆录对象
chapters: 章节列表
Returns:
PDF 字节数据
"""
buffer = BytesIO()
doc = SimpleDocTemplate(buffer, pagesize=A4)
# 创建样式
styles = getSampleStyleSheet()
title_style = ParagraphStyle(
'CustomTitle',
parent=styles['Heading1'],
fontSize=24,
spaceAfter=30,
alignment=1, # 居中
fontName=self.chinese_font
)
heading_style = ParagraphStyle(
'CustomHeading',
parent=styles['Heading1'],
fontSize=18,
spaceAfter=12,
fontName=self.chinese_font
)
normal_style = ParagraphStyle(
'CustomNormal',
parent=styles['Normal'],
fontSize=12,
leading=18,
fontName=self.chinese_font
)
# 构建内容
story = []
# 封面
story.append(Paragraph(book.title, title_style))
story.append(Spacer(1, 0.5*inch))
story.append(PageBreak())
# 目录
story.append(Paragraph("目录", heading_style))
story.append(Spacer(1, 0.2*inch))
for i, chapter in enumerate(chapters, 1):
story.append(Paragraph(f"{i}. {chapter.title}", normal_style))
story.append(PageBreak())
# 章节内容
for chapter in chapters:
story.append(Paragraph(chapter.title, heading_style))
story.append(Spacer(1, 0.2*inch))
# 分段处理内容
paragraphs = chapter.content.split('\n\n')
for para in paragraphs:
if para.strip():
story.append(Paragraph(para.strip(), normal_style))
story.append(Spacer(1, 0.1*inch))
story.append(PageBreak())
# 生成 PDF
doc.build(story)
buffer.seek(0)
return buffer.read()
# 全局实例
pdf_service = PDFService()

View File

@@ -0,0 +1,59 @@
"""
TTS 服务:文字转语音
"""
import base64
import os
from io import BytesIO
from openai import OpenAI
class TTSService:
"""TTS 服务(文字转语音)"""
def __init__(self):
api_key = os.getenv("OPENAI_API_KEY", "")
if api_key:
self.client = OpenAI(api_key=api_key)
else:
self.client = None
async def synthesize(self, text: str) -> str:
"""
将文字转换为语音
Args:
text: 要转换的文字
Returns:
Base64 编码的音频数据
"""
if not self.client:
# 如果没有配置 API Key返回空字符串
return ""
try:
# 调用 OpenAI TTS API
response = self.client.audio.speech.create(
model="tts-1",
voice="alloy", # 可选: alloy, echo, fable, onyx, nova, shimmer
input=text
)
# 读取音频数据
audio_bytes = BytesIO()
for chunk in response.iter_bytes():
audio_bytes.write(chunk)
# 转换为 Base64
audio_data = audio_bytes.getvalue()
audio_base64 = base64.b64encode(audio_data).decode('utf-8')
return audio_base64
except Exception as e:
# 出错时返回空字符串
print(f"TTS 生成失败: {str(e)}")
return ""
# 全局实例
tts_service = TTSService()