From d12d55020f3849f8d7277d29dbd6718b14c97dba Mon Sep 17 00:00:00 2001
From: Satsuki Akiba <anosatsuk124@gmail.com>
Date: Sat, 7 Jun 2025 19:43:33 +0900
Subject: [PATCH] Close #3

---
 services/tts.ts | 342 +++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 267 insertions(+), 75 deletions(-)
diff --git a/services/tts.ts b/services/tts.ts
index 386a9e7..1dd3394 100644
--- a/services/tts.ts
+++ b/services/tts.ts
@@ -3,6 +3,94 @@ import path from "path";
 import ffmpegPath from "ffmpeg-static";
 import { config } from "./config.js";
 
+/**
+ * Split text into natural chunks for TTS processing
+ * Aims for approximately 50 characters per chunk, breaking at natural points
+ */
+function splitTextIntoChunks(text: string, maxLength: number = 50): string[] {
+  if (text.length <= maxLength) {
+    return [text];
+  }
+
+  const chunks: string[] = [];
+  let currentChunk = "";
+
+  // Split by sentences first (Japanese periods and line breaks)
+  const sentences = text.split(/([。！？\n])/);
+  
+  for (let i = 0; i < sentences.length; i++) {
+    const sentence = sentences[i];
+    if (!sentence) continue;
+    
+    if (currentChunk.length + sentence.length <= maxLength) {
+      currentChunk += sentence;
+    } else {
+      if (currentChunk.trim()) {
+        chunks.push(currentChunk.trim());
+      }
+      
+      // If single sentence is too long, split further
+      if (sentence.length > maxLength) {
+        const subChunks = splitLongSentence(sentence, maxLength);
+        chunks.push(...subChunks);
+        currentChunk = "";
+      } else {
+        currentChunk = sentence;
+      }
+    }
+  }
+  
+  if (currentChunk.trim()) {
+    chunks.push(currentChunk.trim());
+  }
+  
+  return chunks.filter(chunk => chunk.length > 0);
+}
+
+/**
+ * Split a long sentence at natural break points (commas, particles, etc.)
+ */
+function splitLongSentence(sentence: string, maxLength: number): string[] {
+  if (sentence.length <= maxLength) {
+    return [sentence];
+  }
+
+  const chunks: string[] = [];
+  let currentChunk = "";
+  
+  // Split by commas and common Japanese particles
+  const parts = sentence.split(/([、，,]|[はがでをにと])/);
+  
+  for (const part of parts) {
+    if (currentChunk.length + part.length <= maxLength) {
+      currentChunk += part;
+    } else {
+      if (currentChunk.trim()) {
+        chunks.push(currentChunk.trim());
+      }
+      currentChunk = part;
+    }
+  }
+  
+  if (currentChunk.trim()) {
+    chunks.push(currentChunk.trim());
+  }
+  
+  // If still too long, force split by character limit
+  const finalChunks: string[] = [];
+  for (const chunk of chunks) {
+    if (chunk.length > maxLength) {
+      for (let i = 0; i < chunk.length; i += maxLength) {
+        finalChunks.push(chunk.slice(i, i + maxLength));
+      }
+    } else {
+      finalChunks.push(chunk);
+    }
+  }
+  
+  return finalChunks.filter(chunk => chunk.length > 0);
+}
+
 interface VoiceStyle {
   styleId: number;
 }
@@ -12,6 +100,125 @@ const defaultVoiceStyle: VoiceStyle = {
   styleId: config.voicevox.styleId,
 };
 
+/**
+ * Generate audio for a single text chunk
+ */
+async function generateAudioForChunk(
+  chunkText: string,
+  chunkIndex: number,
+  itemId: string,
+): Promise<string> {
+  const encodedText = encodeURIComponent(chunkText);
+  const queryUrl = `${config.voicevox.host}/audio_query?text=${encodedText}&speaker=${defaultVoiceStyle.styleId}`;
+  const synthesisUrl = `${config.voicevox.host}/synthesis?speaker=${defaultVoiceStyle.styleId}`;
+
+  console.log(`チャンク${chunkIndex + 1}の音声クエリ開始: ${itemId} (${chunkText.length}文字)`);
+
+  const queryResponse = await fetch(queryUrl, {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      Accept: "application/json",
+    },
+  });
+
+  if (!queryResponse.ok) {
+    const errorText = await queryResponse.text();
+    throw new Error(
+      `VOICEVOX audio query failed for chunk ${chunkIndex + 1} (${queryResponse.status}): ${errorText}`,
+    );
+  }
+
+  const audioQuery = await queryResponse.json();
+
+  console.log(`チャンク${chunkIndex + 1}の音声合成開始: ${itemId}`);
+  const audioResponse = await fetch(synthesisUrl, {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+    },
+    body: JSON.stringify(audioQuery),
+    signal: AbortSignal.timeout(300000), // 5分のタイムアウト (チャンクごと)
+  });
+
+  if (!audioResponse.ok) {
+    const errorText = await audioResponse.text();
+    throw new Error(
+      `VOICEVOX synthesis failed for chunk ${chunkIndex + 1} (${audioResponse.status}): ${errorText}`,
+    );
+  }
+
+  const audioArrayBuffer = await audioResponse.arrayBuffer();
+  const audioBuffer = Buffer.from(audioArrayBuffer);
+
+  // 出力ディレクトリの準備
+  const outputDir = config.paths.podcastAudioDir;
+  if (!fs.existsSync(outputDir)) {
+    fs.mkdirSync(outputDir, { recursive: true });
+  }
+
+  const chunkWavPath = path.resolve(outputDir, `${itemId}_chunk_${chunkIndex}.wav`);
+  fs.writeFileSync(chunkWavPath, audioBuffer);
+  
+  console.log(`チャンク${chunkIndex + 1}のWAVファイル保存完了: ${chunkWavPath}`);
+  
+  return chunkWavPath;
+}
+
+/**
+ * Concatenate multiple WAV files into a single MP3 file
+ */
+async function concatenateAudioFiles(
+  wavFiles: string[],
+  outputMp3Path: string,
+): Promise<void> {
+  const ffmpegCmd = ffmpegPath || "ffmpeg";
+  
+  // Create a temporary file list for FFmpeg concat
+  const tempDir = config.paths.podcastAudioDir;
+  const listFilePath = path.resolve(tempDir, `concat_list_${Date.now()}.txt`);
+  
+  try {
+    // Write file list in FFmpeg concat format
+    const fileList = wavFiles.map(file => `file '${path.resolve(file)}'`).join('\n');
+    fs.writeFileSync(listFilePath, fileList);
+
+    console.log(`音声ファイル結合開始: ${wavFiles.length}個のファイルを結合 -> ${outputMp3Path}`);
+
+    const result = Bun.spawnSync([
+      ffmpegCmd,
+      "-f", "concat",
+      "-safe", "0",
+      "-i", listFilePath,
+      "-codec:a", "libmp3lame",
+      "-qscale:a", "2",
+      "-y", // Overwrite output file
+      outputMp3Path,
+    ]);
+
+    if (result.exitCode !== 0) {
+      const stderr = result.stderr
+        ? new TextDecoder().decode(result.stderr)
+        : "Unknown error";
+      throw new Error(`FFmpeg concatenation failed: ${stderr}`);
+    }
+
+    console.log(`音声ファイル結合完了: ${outputMp3Path}`);
+  } finally {
+    // Clean up temporary files
+    if (fs.existsSync(listFilePath)) {
+      fs.unlinkSync(listFilePath);
+    }
+    
+    // Clean up individual WAV files
+    for (const wavFile of wavFiles) {
+      if (fs.existsSync(wavFile)) {
+        fs.unlinkSync(wavFile);
+      }
+    }
+  }
+}
+
 /**
  * Generate TTS without adding to retry queue on failure
  * Used for retry queue processing to avoid infinite loops
@@ -29,95 +236,80 @@ export async function generateTTSWithoutQueue(
     throw new Error("Script text is required for TTS generation");
   }
 
-  console.log(`TTS生成開始: ${itemId} (試行回数: ${retryCount + 1})`);
-  const encodedText = encodeURIComponent(scriptText);
+  console.log(`TTS生成開始: ${itemId} (試行回数: ${retryCount + 1}, ${scriptText.length}文字)`);
 
-  const queryUrl = `${config.voicevox.host}/audio_query?text=${encodedText}&speaker=${defaultVoiceStyle.styleId}`;
-  const synthesisUrl = `${config.voicevox.host}/synthesis?speaker=${defaultVoiceStyle.styleId}`;
-
-  const queryResponse = await fetch(queryUrl, {
-    method: "POST",
-    headers: {
-      "Content-Type": "application/json",
-      Accept: "application/json",
-    },
-  });
-
-  if (!queryResponse.ok) {
-    const errorText = await queryResponse.text();
-    throw new Error(
-      `VOICEVOX audio query failed (${queryResponse.status}): ${errorText}`,
-    );
+  // Split text into chunks
+  const chunks = splitTextIntoChunks(scriptText.trim());
+  console.log(`テキストを${chunks.length}個のチャンクに分割: ${itemId}`);
+  
+  if (chunks.length === 0) {
+    throw new Error("No valid text chunks generated");
   }
 
-  const audioQuery = await queryResponse.json();
-
-  console.log(`音声合成開始: ${itemId}`);
-  const audioResponse = await fetch(synthesisUrl, {
-    method: "POST",
-    headers: {
-      "Content-Type": "application/json",
-    },
-    body: JSON.stringify(audioQuery),
-    signal: AbortSignal.timeout(600000), // 10分のタイムアウト
-  });
-
-  if (!audioResponse.ok) {
-    const errorText = await audioResponse.text();
-    console.error(`音声合成失敗: ${itemId}`);
-    throw new Error(
-      `VOICEVOX synthesis failed (${audioResponse.status}): ${errorText}`,
-    );
-  }
-
-  const audioArrayBuffer = await audioResponse.arrayBuffer();
-  const audioBuffer = Buffer.from(audioArrayBuffer);
-
-  // 出力ディレクトリの準備
   const outputDir = config.paths.podcastAudioDir;
   if (!fs.existsSync(outputDir)) {
     fs.mkdirSync(outputDir, { recursive: true });
   }
 
-  const wavFilePath = path.resolve(outputDir, `${itemId}.wav`);
   const mp3FilePath = path.resolve(outputDir, `${itemId}.mp3`);
+  const generatedWavFiles: string[] = [];
 
-  console.log(`WAVファイル保存開始: ${wavFilePath}`);
-  fs.writeFileSync(wavFilePath, audioBuffer);
-  console.log(`WAVファイル保存完了: ${wavFilePath}`);
+  try {
+    // Generate audio for each chunk
+    for (let i = 0; i < chunks.length; i++) {
+      const chunk = chunks[i];
+      if (!chunk) continue;
+      console.log(`チャンク${i + 1}/${chunks.length}処理中: "${chunk.substring(0, 30)}${chunk.length > 30 ? '...' : ''}"`);
+      
+      const wavPath = await generateAudioForChunk(chunk, i, itemId);
+      generatedWavFiles.push(wavPath);
+    }
 
-  console.log(`MP3変換開始: ${wavFilePath} -> ${mp3FilePath}`);
+    // Concatenate all audio files
+    if (generatedWavFiles.length === 1) {
+      // Single chunk - just convert to MP3
+      const ffmpegCmd = ffmpegPath || "ffmpeg";
+      const firstWavFile = generatedWavFiles[0];
+      if (!firstWavFile) {
+        throw new Error("No WAV files generated");
+      }
+      
+      const result = Bun.spawnSync([
+        ffmpegCmd,
+        "-i", firstWavFile,
+        "-codec:a", "libmp3lame",
+        "-qscale:a", "2",
+        "-y",
+        mp3FilePath,
+      ]);
 
-  const ffmpegCmd = ffmpegPath || "ffmpeg";
-  const result = Bun.spawnSync({
-    cmd: [
-      ffmpegCmd,
-      "-i",
-      wavFilePath,
-      "-codec:a",
-      "libmp3lame",
-      "-qscale:a",
-      "2",
-      "-y", // Overwrite output file
-      mp3FilePath,
-    ],
-  });
+      if (result.exitCode !== 0) {
+        const stderr = result.stderr
+          ? new TextDecoder().decode(result.stderr)
+          : "Unknown error";
+        throw new Error(`FFmpeg conversion failed: ${stderr}`);
+      }
+      
+      // Clean up WAV file
+      fs.unlinkSync(firstWavFile);
+    } else {
+      // Multiple chunks - concatenate them
+      await concatenateAudioFiles(generatedWavFiles, mp3FilePath);
+    }
 
-  if (result.exitCode !== 0) {
-    const stderr = result.stderr
-      ? new TextDecoder().decode(result.stderr)
-      : "Unknown error";
-    throw new Error(`FFmpeg conversion failed: ${stderr}`);
+    console.log(`TTS生成完了: ${itemId} (${chunks.length}チャンク)`);
+    return path.basename(mp3FilePath);
+
+  } catch (error) {
+    // Clean up any generated files on error
+    for (const wavFile of generatedWavFiles) {
+      if (fs.existsSync(wavFile)) {
+        fs.unlinkSync(wavFile);
+      }
+    }
+    
+    throw error;
   }
-
-  // Wavファイルを削除
-  if (fs.existsSync(wavFilePath)) {
-    fs.unlinkSync(wavFilePath);
-  }
-
-  console.log(`TTS生成完了: ${itemId}`);
-
-  return path.basename(mp3FilePath);
 }
 
 export async function generateTTS(