diff --git a/api/services/__init__.py b/api/services/__init__.py new file mode 100644 index 0000000..6609a19 --- /dev/null +++ b/api/services/__init__.py @@ -0,0 +1,11 @@ +""" +服务模块 +""" +from .asr_service import asr_service +from .tts_service import tts_service + +__all__ = [ + "asr_service", + "tts_service", +] + diff --git a/api/services/__pycache__/__init__.cpython-312.pyc b/api/services/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000..088e8f9 Binary files /dev/null and b/api/services/__pycache__/__init__.cpython-312.pyc differ diff --git a/api/services/__pycache__/asr_service.cpython-312.pyc b/api/services/__pycache__/asr_service.cpython-312.pyc new file mode 100644 index 0000000..4a9d9ea Binary files /dev/null and b/api/services/__pycache__/asr_service.cpython-312.pyc differ diff --git a/api/services/__pycache__/pdf_service.cpython-312.pyc b/api/services/__pycache__/pdf_service.cpython-312.pyc new file mode 100644 index 0000000..4b14da5 Binary files /dev/null and b/api/services/__pycache__/pdf_service.cpython-312.pyc differ diff --git a/api/services/__pycache__/tts_service.cpython-312.pyc b/api/services/__pycache__/tts_service.cpython-312.pyc new file mode 100644 index 0000000..781f521 Binary files /dev/null and b/api/services/__pycache__/tts_service.cpython-312.pyc differ diff --git a/api/services/asr_service.py b/api/services/asr_service.py new file mode 100644 index 0000000..9ea2ed3 --- /dev/null +++ b/api/services/asr_service.py @@ -0,0 +1,64 @@ +""" +ASR 服务:语音转文字 +""" +import os +import base64 +from typing import Optional, Any, Coroutine +from openai import OpenAI + + +class ASRService: + """ASR 服务(语音转文字)""" + + def __init__(self): + api_key = os.getenv("OPENAI_API_KEY", "") + if api_key: + self.client = OpenAI(api_key=api_key) + else: + self.client = None + + async def transcribe(self, audio_base64: str) -> str | None: + """ + 转写音频为文字 + + Args: + audio_base64: Base64 编码的音频数据 + + Returns: + 转写文本 + """ + if not self.client: + # 如果没有配置 API Key,返回模拟数据 + return "这是模拟的转写文本(请配置 OPENAI_API_KEY 以使用实际 ASR 功能)" + + try: + # 解码 Base64 音频 + audio_bytes = base64.b64decode(audio_base64) + + # 保存临时文件 + import tempfile + with tempfile.NamedTemporaryFile(suffix=".m4a", delete=False) as tmp_file: + tmp_file.write(audio_bytes) + tmp_file_path = tmp_file.name + + try: + # 调用 OpenAI Whisper API + with open(tmp_file_path, "rb") as audio_file: + transcript = self.client.audio.transcriptions.create( + model="whisper-1", + file=audio_file, + language="zh" # 中文 + ) + return transcript.text + finally: + # 清理临时文件 + import os + if os.path.exists(tmp_file_path): + os.remove(tmp_file_path) + except Exception as e: + # 出错时返回错误信息 + return f"转写失败: {str(e)}" + + +# 全局实例 +asr_service = ASRService() diff --git a/api/services/pdf_service.py b/api/services/pdf_service.py new file mode 100644 index 0000000..0f43167 --- /dev/null +++ b/api/services/pdf_service.py @@ -0,0 +1,107 @@ +""" +PDF 生成服务 +""" +from typing import List +from reportlab.lib.pagesizes import letter, A4 +from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle +from reportlab.lib.units import inch +from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak +from reportlab.pdfbase import pdfmetrics +from reportlab.pdfbase.ttfonts import TTFont +from reportlab.pdfbase.cidfonts import UnicodeCIDFont +from io import BytesIO +import os + + +class PDFService: + """PDF 生成服务""" + + def __init__(self): + # 尝试注册中文字体 + try: + # 使用系统字体或 ReportLab 内置的中文字体 + # 如果没有中文字体文件,使用 UnicodeCIDFont + pdfmetrics.registerFont(UnicodeCIDFont('STSong-Light')) + self.chinese_font = 'STSong-Light' + except Exception: + # 如果注册失败,使用默认字体(可能不支持中文) + self.chinese_font = 'Helvetica' + + async def generate_pdf(self, book, chapters: List) -> bytes: + """ + 生成 PDF + + Args: + book: 回忆录对象 + chapters: 章节列表 + + Returns: + PDF 字节数据 + """ + buffer = BytesIO() + doc = SimpleDocTemplate(buffer, pagesize=A4) + + # 创建样式 + styles = getSampleStyleSheet() + title_style = ParagraphStyle( + 'CustomTitle', + parent=styles['Heading1'], + fontSize=24, + spaceAfter=30, + alignment=1, # 居中 + fontName=self.chinese_font + ) + + heading_style = ParagraphStyle( + 'CustomHeading', + parent=styles['Heading1'], + fontSize=18, + spaceAfter=12, + fontName=self.chinese_font + ) + + normal_style = ParagraphStyle( + 'CustomNormal', + parent=styles['Normal'], + fontSize=12, + leading=18, + fontName=self.chinese_font + ) + + # 构建内容 + story = [] + + # 封面 + story.append(Paragraph(book.title, title_style)) + story.append(Spacer(1, 0.5*inch)) + story.append(PageBreak()) + + # 目录 + story.append(Paragraph("目录", heading_style)) + story.append(Spacer(1, 0.2*inch)) + for i, chapter in enumerate(chapters, 1): + story.append(Paragraph(f"{i}. {chapter.title}", normal_style)) + story.append(PageBreak()) + + # 章节内容 + for chapter in chapters: + story.append(Paragraph(chapter.title, heading_style)) + story.append(Spacer(1, 0.2*inch)) + + # 分段处理内容 + paragraphs = chapter.content.split('\n\n') + for para in paragraphs: + if para.strip(): + story.append(Paragraph(para.strip(), normal_style)) + story.append(Spacer(1, 0.1*inch)) + + story.append(PageBreak()) + + # 生成 PDF + doc.build(story) + buffer.seek(0) + return buffer.read() + + +# 全局实例 +pdf_service = PDFService() diff --git a/api/services/tts_service.py b/api/services/tts_service.py new file mode 100644 index 0000000..4eb158b --- /dev/null +++ b/api/services/tts_service.py @@ -0,0 +1,59 @@ +""" +TTS 服务:文字转语音 +""" +import base64 +import os +from io import BytesIO + +from openai import OpenAI + + +class TTSService: + """TTS 服务(文字转语音)""" + + def __init__(self): + api_key = os.getenv("OPENAI_API_KEY", "") + if api_key: + self.client = OpenAI(api_key=api_key) + else: + self.client = None + + async def synthesize(self, text: str) -> str: + """ + 将文字转换为语音 + + Args: + text: 要转换的文字 + + Returns: + Base64 编码的音频数据 + """ + if not self.client: + # 如果没有配置 API Key,返回空字符串 + return "" + + try: + # 调用 OpenAI TTS API + response = self.client.audio.speech.create( + model="tts-1", + voice="alloy", # 可选: alloy, echo, fable, onyx, nova, shimmer + input=text + ) + + # 读取音频数据 + audio_bytes = BytesIO() + for chunk in response.iter_bytes(): + audio_bytes.write(chunk) + + # 转换为 Base64 + audio_data = audio_bytes.getvalue() + audio_base64 = base64.b64encode(audio_data).decode('utf-8') + return audio_base64 + except Exception as e: + # 出错时返回空字符串 + print(f"TTS 生成失败: {str(e)}") + return "" + + +# 全局实例 +tts_service = TTSService() \ No newline at end of file