From d12d55020f3849f8d7277d29dbd6718b14c97dba Mon Sep 17 00:00:00 2001 From: Satsuki Akiba Date: Sat, 7 Jun 2025 19:43:33 +0900 Subject: [PATCH] Close #3 --- services/tts.ts | 342 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 267 insertions(+), 75 deletions(-) diff --git a/services/tts.ts b/services/tts.ts index 386a9e7..1dd3394 100644 --- a/services/tts.ts +++ b/services/tts.ts @@ -3,6 +3,94 @@ import path from "path"; import ffmpegPath from "ffmpeg-static"; import { config } from "./config.js"; +/** + * Split text into natural chunks for TTS processing + * Aims for approximately 50 characters per chunk, breaking at natural points + */ +function splitTextIntoChunks(text: string, maxLength: number = 50): string[] { + if (text.length <= maxLength) { + return [text]; + } + + const chunks: string[] = []; + let currentChunk = ""; + + // Split by sentences first (Japanese periods and line breaks) + const sentences = text.split(/([。!?\n])/); + + for (let i = 0; i < sentences.length; i++) { + const sentence = sentences[i]; + if (!sentence) continue; + + if (currentChunk.length + sentence.length <= maxLength) { + currentChunk += sentence; + } else { + if (currentChunk.trim()) { + chunks.push(currentChunk.trim()); + } + + // If single sentence is too long, split further + if (sentence.length > maxLength) { + const subChunks = splitLongSentence(sentence, maxLength); + chunks.push(...subChunks); + currentChunk = ""; + } else { + currentChunk = sentence; + } + } + } + + if (currentChunk.trim()) { + chunks.push(currentChunk.trim()); + } + + return chunks.filter(chunk => chunk.length > 0); +} + +/** + * Split a long sentence at natural break points (commas, particles, etc.) + */ +function splitLongSentence(sentence: string, maxLength: number): string[] { + if (sentence.length <= maxLength) { + return [sentence]; + } + + const chunks: string[] = []; + let currentChunk = ""; + + // Split by commas and common Japanese particles + const parts = sentence.split(/([、,,]|[はがでをにと])/); + + for (const part of parts) { + if (currentChunk.length + part.length <= maxLength) { + currentChunk += part; + } else { + if (currentChunk.trim()) { + chunks.push(currentChunk.trim()); + } + currentChunk = part; + } + } + + if (currentChunk.trim()) { + chunks.push(currentChunk.trim()); + } + + // If still too long, force split by character limit + const finalChunks: string[] = []; + for (const chunk of chunks) { + if (chunk.length > maxLength) { + for (let i = 0; i < chunk.length; i += maxLength) { + finalChunks.push(chunk.slice(i, i + maxLength)); + } + } else { + finalChunks.push(chunk); + } + } + + return finalChunks.filter(chunk => chunk.length > 0); +} + interface VoiceStyle { styleId: number; } @@ -12,6 +100,125 @@ const defaultVoiceStyle: VoiceStyle = { styleId: config.voicevox.styleId, }; +/** + * Generate audio for a single text chunk + */ +async function generateAudioForChunk( + chunkText: string, + chunkIndex: number, + itemId: string, +): Promise { + const encodedText = encodeURIComponent(chunkText); + const queryUrl = `${config.voicevox.host}/audio_query?text=${encodedText}&speaker=${defaultVoiceStyle.styleId}`; + const synthesisUrl = `${config.voicevox.host}/synthesis?speaker=${defaultVoiceStyle.styleId}`; + + console.log(`チャンク${chunkIndex + 1}の音声クエリ開始: ${itemId} (${chunkText.length}文字)`); + + const queryResponse = await fetch(queryUrl, { + method: "POST", + headers: { + "Content-Type": "application/json", + Accept: "application/json", + }, + }); + + if (!queryResponse.ok) { + const errorText = await queryResponse.text(); + throw new Error( + `VOICEVOX audio query failed for chunk ${chunkIndex + 1} (${queryResponse.status}): ${errorText}`, + ); + } + + const audioQuery = await queryResponse.json(); + + console.log(`チャンク${chunkIndex + 1}の音声合成開始: ${itemId}`); + const audioResponse = await fetch(synthesisUrl, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify(audioQuery), + signal: AbortSignal.timeout(300000), // 5分のタイムアウト (チャンクごと) + }); + + if (!audioResponse.ok) { + const errorText = await audioResponse.text(); + throw new Error( + `VOICEVOX synthesis failed for chunk ${chunkIndex + 1} (${audioResponse.status}): ${errorText}`, + ); + } + + const audioArrayBuffer = await audioResponse.arrayBuffer(); + const audioBuffer = Buffer.from(audioArrayBuffer); + + // 出力ディレクトリの準備 + const outputDir = config.paths.podcastAudioDir; + if (!fs.existsSync(outputDir)) { + fs.mkdirSync(outputDir, { recursive: true }); + } + + const chunkWavPath = path.resolve(outputDir, `${itemId}_chunk_${chunkIndex}.wav`); + fs.writeFileSync(chunkWavPath, audioBuffer); + + console.log(`チャンク${chunkIndex + 1}のWAVファイル保存完了: ${chunkWavPath}`); + + return chunkWavPath; +} + +/** + * Concatenate multiple WAV files into a single MP3 file + */ +async function concatenateAudioFiles( + wavFiles: string[], + outputMp3Path: string, +): Promise { + const ffmpegCmd = ffmpegPath || "ffmpeg"; + + // Create a temporary file list for FFmpeg concat + const tempDir = config.paths.podcastAudioDir; + const listFilePath = path.resolve(tempDir, `concat_list_${Date.now()}.txt`); + + try { + // Write file list in FFmpeg concat format + const fileList = wavFiles.map(file => `file '${path.resolve(file)}'`).join('\n'); + fs.writeFileSync(listFilePath, fileList); + + console.log(`音声ファイル結合開始: ${wavFiles.length}個のファイルを結合 -> ${outputMp3Path}`); + + const result = Bun.spawnSync([ + ffmpegCmd, + "-f", "concat", + "-safe", "0", + "-i", listFilePath, + "-codec:a", "libmp3lame", + "-qscale:a", "2", + "-y", // Overwrite output file + outputMp3Path, + ]); + + if (result.exitCode !== 0) { + const stderr = result.stderr + ? new TextDecoder().decode(result.stderr) + : "Unknown error"; + throw new Error(`FFmpeg concatenation failed: ${stderr}`); + } + + console.log(`音声ファイル結合完了: ${outputMp3Path}`); + } finally { + // Clean up temporary files + if (fs.existsSync(listFilePath)) { + fs.unlinkSync(listFilePath); + } + + // Clean up individual WAV files + for (const wavFile of wavFiles) { + if (fs.existsSync(wavFile)) { + fs.unlinkSync(wavFile); + } + } + } +} + /** * Generate TTS without adding to retry queue on failure * Used for retry queue processing to avoid infinite loops @@ -29,95 +236,80 @@ export async function generateTTSWithoutQueue( throw new Error("Script text is required for TTS generation"); } - console.log(`TTS生成開始: ${itemId} (試行回数: ${retryCount + 1})`); - const encodedText = encodeURIComponent(scriptText); + console.log(`TTS生成開始: ${itemId} (試行回数: ${retryCount + 1}, ${scriptText.length}文字)`); - const queryUrl = `${config.voicevox.host}/audio_query?text=${encodedText}&speaker=${defaultVoiceStyle.styleId}`; - const synthesisUrl = `${config.voicevox.host}/synthesis?speaker=${defaultVoiceStyle.styleId}`; - - const queryResponse = await fetch(queryUrl, { - method: "POST", - headers: { - "Content-Type": "application/json", - Accept: "application/json", - }, - }); - - if (!queryResponse.ok) { - const errorText = await queryResponse.text(); - throw new Error( - `VOICEVOX audio query failed (${queryResponse.status}): ${errorText}`, - ); + // Split text into chunks + const chunks = splitTextIntoChunks(scriptText.trim()); + console.log(`テキストを${chunks.length}個のチャンクに分割: ${itemId}`); + + if (chunks.length === 0) { + throw new Error("No valid text chunks generated"); } - const audioQuery = await queryResponse.json(); - - console.log(`音声合成開始: ${itemId}`); - const audioResponse = await fetch(synthesisUrl, { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify(audioQuery), - signal: AbortSignal.timeout(600000), // 10分のタイムアウト - }); - - if (!audioResponse.ok) { - const errorText = await audioResponse.text(); - console.error(`音声合成失敗: ${itemId}`); - throw new Error( - `VOICEVOX synthesis failed (${audioResponse.status}): ${errorText}`, - ); - } - - const audioArrayBuffer = await audioResponse.arrayBuffer(); - const audioBuffer = Buffer.from(audioArrayBuffer); - - // 出力ディレクトリの準備 const outputDir = config.paths.podcastAudioDir; if (!fs.existsSync(outputDir)) { fs.mkdirSync(outputDir, { recursive: true }); } - const wavFilePath = path.resolve(outputDir, `${itemId}.wav`); const mp3FilePath = path.resolve(outputDir, `${itemId}.mp3`); + const generatedWavFiles: string[] = []; - console.log(`WAVファイル保存開始: ${wavFilePath}`); - fs.writeFileSync(wavFilePath, audioBuffer); - console.log(`WAVファイル保存完了: ${wavFilePath}`); + try { + // Generate audio for each chunk + for (let i = 0; i < chunks.length; i++) { + const chunk = chunks[i]; + if (!chunk) continue; + console.log(`チャンク${i + 1}/${chunks.length}処理中: "${chunk.substring(0, 30)}${chunk.length > 30 ? '...' : ''}"`); + + const wavPath = await generateAudioForChunk(chunk, i, itemId); + generatedWavFiles.push(wavPath); + } - console.log(`MP3変換開始: ${wavFilePath} -> ${mp3FilePath}`); + // Concatenate all audio files + if (generatedWavFiles.length === 1) { + // Single chunk - just convert to MP3 + const ffmpegCmd = ffmpegPath || "ffmpeg"; + const firstWavFile = generatedWavFiles[0]; + if (!firstWavFile) { + throw new Error("No WAV files generated"); + } + + const result = Bun.spawnSync([ + ffmpegCmd, + "-i", firstWavFile, + "-codec:a", "libmp3lame", + "-qscale:a", "2", + "-y", + mp3FilePath, + ]); - const ffmpegCmd = ffmpegPath || "ffmpeg"; - const result = Bun.spawnSync({ - cmd: [ - ffmpegCmd, - "-i", - wavFilePath, - "-codec:a", - "libmp3lame", - "-qscale:a", - "2", - "-y", // Overwrite output file - mp3FilePath, - ], - }); + if (result.exitCode !== 0) { + const stderr = result.stderr + ? new TextDecoder().decode(result.stderr) + : "Unknown error"; + throw new Error(`FFmpeg conversion failed: ${stderr}`); + } + + // Clean up WAV file + fs.unlinkSync(firstWavFile); + } else { + // Multiple chunks - concatenate them + await concatenateAudioFiles(generatedWavFiles, mp3FilePath); + } - if (result.exitCode !== 0) { - const stderr = result.stderr - ? new TextDecoder().decode(result.stderr) - : "Unknown error"; - throw new Error(`FFmpeg conversion failed: ${stderr}`); + console.log(`TTS生成完了: ${itemId} (${chunks.length}チャンク)`); + return path.basename(mp3FilePath); + + } catch (error) { + // Clean up any generated files on error + for (const wavFile of generatedWavFiles) { + if (fs.existsSync(wavFile)) { + fs.unlinkSync(wavFile); + } + } + + throw error; } - - // Wavファイルを削除 - if (fs.existsSync(wavFilePath)) { - fs.unlinkSync(wavFilePath); - } - - console.log(`TTS生成完了: ${itemId}`); - - return path.basename(mp3FilePath); } export async function generateTTS(