Close #3
This commit is contained in:
		
							
								
								
									
										342
									
								
								services/tts.ts
									
									
									
									
									
								
							
							
						
						
									
										342
									
								
								services/tts.ts
									
									
									
									
									
								
							@@ -3,6 +3,94 @@ import path from "path";
 | 
			
		||||
import ffmpegPath from "ffmpeg-static";
 | 
			
		||||
import { config } from "./config.js";
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Split text into natural chunks for TTS processing
 | 
			
		||||
 * Aims for approximately 50 characters per chunk, breaking at natural points
 | 
			
		||||
 */
 | 
			
		||||
function splitTextIntoChunks(text: string, maxLength: number = 50): string[] {
 | 
			
		||||
  if (text.length <= maxLength) {
 | 
			
		||||
    return [text];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  const chunks: string[] = [];
 | 
			
		||||
  let currentChunk = "";
 | 
			
		||||
 | 
			
		||||
  // Split by sentences first (Japanese periods and line breaks)
 | 
			
		||||
  const sentences = text.split(/([。!?\n])/);
 | 
			
		||||
  
 | 
			
		||||
  for (let i = 0; i < sentences.length; i++) {
 | 
			
		||||
    const sentence = sentences[i];
 | 
			
		||||
    if (!sentence) continue;
 | 
			
		||||
    
 | 
			
		||||
    if (currentChunk.length + sentence.length <= maxLength) {
 | 
			
		||||
      currentChunk += sentence;
 | 
			
		||||
    } else {
 | 
			
		||||
      if (currentChunk.trim()) {
 | 
			
		||||
        chunks.push(currentChunk.trim());
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      // If single sentence is too long, split further
 | 
			
		||||
      if (sentence.length > maxLength) {
 | 
			
		||||
        const subChunks = splitLongSentence(sentence, maxLength);
 | 
			
		||||
        chunks.push(...subChunks);
 | 
			
		||||
        currentChunk = "";
 | 
			
		||||
      } else {
 | 
			
		||||
        currentChunk = sentence;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  if (currentChunk.trim()) {
 | 
			
		||||
    chunks.push(currentChunk.trim());
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  return chunks.filter(chunk => chunk.length > 0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Split a long sentence at natural break points (commas, particles, etc.)
 | 
			
		||||
 */
 | 
			
		||||
function splitLongSentence(sentence: string, maxLength: number): string[] {
 | 
			
		||||
  if (sentence.length <= maxLength) {
 | 
			
		||||
    return [sentence];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  const chunks: string[] = [];
 | 
			
		||||
  let currentChunk = "";
 | 
			
		||||
  
 | 
			
		||||
  // Split by commas and common Japanese particles
 | 
			
		||||
  const parts = sentence.split(/([、,,]|[はがでをにと])/);
 | 
			
		||||
  
 | 
			
		||||
  for (const part of parts) {
 | 
			
		||||
    if (currentChunk.length + part.length <= maxLength) {
 | 
			
		||||
      currentChunk += part;
 | 
			
		||||
    } else {
 | 
			
		||||
      if (currentChunk.trim()) {
 | 
			
		||||
        chunks.push(currentChunk.trim());
 | 
			
		||||
      }
 | 
			
		||||
      currentChunk = part;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  if (currentChunk.trim()) {
 | 
			
		||||
    chunks.push(currentChunk.trim());
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  // If still too long, force split by character limit
 | 
			
		||||
  const finalChunks: string[] = [];
 | 
			
		||||
  for (const chunk of chunks) {
 | 
			
		||||
    if (chunk.length > maxLength) {
 | 
			
		||||
      for (let i = 0; i < chunk.length; i += maxLength) {
 | 
			
		||||
        finalChunks.push(chunk.slice(i, i + maxLength));
 | 
			
		||||
      }
 | 
			
		||||
    } else {
 | 
			
		||||
      finalChunks.push(chunk);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  return finalChunks.filter(chunk => chunk.length > 0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
interface VoiceStyle {
 | 
			
		||||
  styleId: number;
 | 
			
		||||
}
 | 
			
		||||
@@ -12,6 +100,125 @@ const defaultVoiceStyle: VoiceStyle = {
 | 
			
		||||
  styleId: config.voicevox.styleId,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Generate audio for a single text chunk
 | 
			
		||||
 */
 | 
			
		||||
async function generateAudioForChunk(
 | 
			
		||||
  chunkText: string,
 | 
			
		||||
  chunkIndex: number,
 | 
			
		||||
  itemId: string,
 | 
			
		||||
): Promise<string> {
 | 
			
		||||
  const encodedText = encodeURIComponent(chunkText);
 | 
			
		||||
  const queryUrl = `${config.voicevox.host}/audio_query?text=${encodedText}&speaker=${defaultVoiceStyle.styleId}`;
 | 
			
		||||
  const synthesisUrl = `${config.voicevox.host}/synthesis?speaker=${defaultVoiceStyle.styleId}`;
 | 
			
		||||
 | 
			
		||||
  console.log(`チャンク${chunkIndex + 1}の音声クエリ開始: ${itemId} (${chunkText.length}文字)`);
 | 
			
		||||
 | 
			
		||||
  const queryResponse = await fetch(queryUrl, {
 | 
			
		||||
    method: "POST",
 | 
			
		||||
    headers: {
 | 
			
		||||
      "Content-Type": "application/json",
 | 
			
		||||
      Accept: "application/json",
 | 
			
		||||
    },
 | 
			
		||||
  });
 | 
			
		||||
 | 
			
		||||
  if (!queryResponse.ok) {
 | 
			
		||||
    const errorText = await queryResponse.text();
 | 
			
		||||
    throw new Error(
 | 
			
		||||
      `VOICEVOX audio query failed for chunk ${chunkIndex + 1} (${queryResponse.status}): ${errorText}`,
 | 
			
		||||
    );
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  const audioQuery = await queryResponse.json();
 | 
			
		||||
 | 
			
		||||
  console.log(`チャンク${chunkIndex + 1}の音声合成開始: ${itemId}`);
 | 
			
		||||
  const audioResponse = await fetch(synthesisUrl, {
 | 
			
		||||
    method: "POST",
 | 
			
		||||
    headers: {
 | 
			
		||||
      "Content-Type": "application/json",
 | 
			
		||||
    },
 | 
			
		||||
    body: JSON.stringify(audioQuery),
 | 
			
		||||
    signal: AbortSignal.timeout(300000), // 5分のタイムアウト (チャンクごと)
 | 
			
		||||
  });
 | 
			
		||||
 | 
			
		||||
  if (!audioResponse.ok) {
 | 
			
		||||
    const errorText = await audioResponse.text();
 | 
			
		||||
    throw new Error(
 | 
			
		||||
      `VOICEVOX synthesis failed for chunk ${chunkIndex + 1} (${audioResponse.status}): ${errorText}`,
 | 
			
		||||
    );
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  const audioArrayBuffer = await audioResponse.arrayBuffer();
 | 
			
		||||
  const audioBuffer = Buffer.from(audioArrayBuffer);
 | 
			
		||||
 | 
			
		||||
  // 出力ディレクトリの準備
 | 
			
		||||
  const outputDir = config.paths.podcastAudioDir;
 | 
			
		||||
  if (!fs.existsSync(outputDir)) {
 | 
			
		||||
    fs.mkdirSync(outputDir, { recursive: true });
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  const chunkWavPath = path.resolve(outputDir, `${itemId}_chunk_${chunkIndex}.wav`);
 | 
			
		||||
  fs.writeFileSync(chunkWavPath, audioBuffer);
 | 
			
		||||
  
 | 
			
		||||
  console.log(`チャンク${chunkIndex + 1}のWAVファイル保存完了: ${chunkWavPath}`);
 | 
			
		||||
  
 | 
			
		||||
  return chunkWavPath;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Concatenate multiple WAV files into a single MP3 file
 | 
			
		||||
 */
 | 
			
		||||
async function concatenateAudioFiles(
 | 
			
		||||
  wavFiles: string[],
 | 
			
		||||
  outputMp3Path: string,
 | 
			
		||||
): Promise<void> {
 | 
			
		||||
  const ffmpegCmd = ffmpegPath || "ffmpeg";
 | 
			
		||||
  
 | 
			
		||||
  // Create a temporary file list for FFmpeg concat
 | 
			
		||||
  const tempDir = config.paths.podcastAudioDir;
 | 
			
		||||
  const listFilePath = path.resolve(tempDir, `concat_list_${Date.now()}.txt`);
 | 
			
		||||
  
 | 
			
		||||
  try {
 | 
			
		||||
    // Write file list in FFmpeg concat format
 | 
			
		||||
    const fileList = wavFiles.map(file => `file '${path.resolve(file)}'`).join('\n');
 | 
			
		||||
    fs.writeFileSync(listFilePath, fileList);
 | 
			
		||||
 | 
			
		||||
    console.log(`音声ファイル結合開始: ${wavFiles.length}個のファイルを結合 -> ${outputMp3Path}`);
 | 
			
		||||
 | 
			
		||||
    const result = Bun.spawnSync([
 | 
			
		||||
      ffmpegCmd,
 | 
			
		||||
      "-f", "concat",
 | 
			
		||||
      "-safe", "0",
 | 
			
		||||
      "-i", listFilePath,
 | 
			
		||||
      "-codec:a", "libmp3lame",
 | 
			
		||||
      "-qscale:a", "2",
 | 
			
		||||
      "-y", // Overwrite output file
 | 
			
		||||
      outputMp3Path,
 | 
			
		||||
    ]);
 | 
			
		||||
 | 
			
		||||
    if (result.exitCode !== 0) {
 | 
			
		||||
      const stderr = result.stderr
 | 
			
		||||
        ? new TextDecoder().decode(result.stderr)
 | 
			
		||||
        : "Unknown error";
 | 
			
		||||
      throw new Error(`FFmpeg concatenation failed: ${stderr}`);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    console.log(`音声ファイル結合完了: ${outputMp3Path}`);
 | 
			
		||||
  } finally {
 | 
			
		||||
    // Clean up temporary files
 | 
			
		||||
    if (fs.existsSync(listFilePath)) {
 | 
			
		||||
      fs.unlinkSync(listFilePath);
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    // Clean up individual WAV files
 | 
			
		||||
    for (const wavFile of wavFiles) {
 | 
			
		||||
      if (fs.existsSync(wavFile)) {
 | 
			
		||||
        fs.unlinkSync(wavFile);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Generate TTS without adding to retry queue on failure
 | 
			
		||||
 * Used for retry queue processing to avoid infinite loops
 | 
			
		||||
@@ -29,95 +236,80 @@ export async function generateTTSWithoutQueue(
 | 
			
		||||
    throw new Error("Script text is required for TTS generation");
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  console.log(`TTS生成開始: ${itemId} (試行回数: ${retryCount + 1})`);
 | 
			
		||||
  const encodedText = encodeURIComponent(scriptText);
 | 
			
		||||
  console.log(`TTS生成開始: ${itemId} (試行回数: ${retryCount + 1}, ${scriptText.length}文字)`);
 | 
			
		||||
 | 
			
		||||
  const queryUrl = `${config.voicevox.host}/audio_query?text=${encodedText}&speaker=${defaultVoiceStyle.styleId}`;
 | 
			
		||||
  const synthesisUrl = `${config.voicevox.host}/synthesis?speaker=${defaultVoiceStyle.styleId}`;
 | 
			
		||||
 | 
			
		||||
  const queryResponse = await fetch(queryUrl, {
 | 
			
		||||
    method: "POST",
 | 
			
		||||
    headers: {
 | 
			
		||||
      "Content-Type": "application/json",
 | 
			
		||||
      Accept: "application/json",
 | 
			
		||||
    },
 | 
			
		||||
  });
 | 
			
		||||
 | 
			
		||||
  if (!queryResponse.ok) {
 | 
			
		||||
    const errorText = await queryResponse.text();
 | 
			
		||||
    throw new Error(
 | 
			
		||||
      `VOICEVOX audio query failed (${queryResponse.status}): ${errorText}`,
 | 
			
		||||
    );
 | 
			
		||||
  // Split text into chunks
 | 
			
		||||
  const chunks = splitTextIntoChunks(scriptText.trim());
 | 
			
		||||
  console.log(`テキストを${chunks.length}個のチャンクに分割: ${itemId}`);
 | 
			
		||||
  
 | 
			
		||||
  if (chunks.length === 0) {
 | 
			
		||||
    throw new Error("No valid text chunks generated");
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  const audioQuery = await queryResponse.json();
 | 
			
		||||
 | 
			
		||||
  console.log(`音声合成開始: ${itemId}`);
 | 
			
		||||
  const audioResponse = await fetch(synthesisUrl, {
 | 
			
		||||
    method: "POST",
 | 
			
		||||
    headers: {
 | 
			
		||||
      "Content-Type": "application/json",
 | 
			
		||||
    },
 | 
			
		||||
    body: JSON.stringify(audioQuery),
 | 
			
		||||
    signal: AbortSignal.timeout(600000), // 10分のタイムアウト
 | 
			
		||||
  });
 | 
			
		||||
 | 
			
		||||
  if (!audioResponse.ok) {
 | 
			
		||||
    const errorText = await audioResponse.text();
 | 
			
		||||
    console.error(`音声合成失敗: ${itemId}`);
 | 
			
		||||
    throw new Error(
 | 
			
		||||
      `VOICEVOX synthesis failed (${audioResponse.status}): ${errorText}`,
 | 
			
		||||
    );
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  const audioArrayBuffer = await audioResponse.arrayBuffer();
 | 
			
		||||
  const audioBuffer = Buffer.from(audioArrayBuffer);
 | 
			
		||||
 | 
			
		||||
  // 出力ディレクトリの準備
 | 
			
		||||
  const outputDir = config.paths.podcastAudioDir;
 | 
			
		||||
  if (!fs.existsSync(outputDir)) {
 | 
			
		||||
    fs.mkdirSync(outputDir, { recursive: true });
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  const wavFilePath = path.resolve(outputDir, `${itemId}.wav`);
 | 
			
		||||
  const mp3FilePath = path.resolve(outputDir, `${itemId}.mp3`);
 | 
			
		||||
  const generatedWavFiles: string[] = [];
 | 
			
		||||
 | 
			
		||||
  console.log(`WAVファイル保存開始: ${wavFilePath}`);
 | 
			
		||||
  fs.writeFileSync(wavFilePath, audioBuffer);
 | 
			
		||||
  console.log(`WAVファイル保存完了: ${wavFilePath}`);
 | 
			
		||||
  try {
 | 
			
		||||
    // Generate audio for each chunk
 | 
			
		||||
    for (let i = 0; i < chunks.length; i++) {
 | 
			
		||||
      const chunk = chunks[i];
 | 
			
		||||
      if (!chunk) continue;
 | 
			
		||||
      console.log(`チャンク${i + 1}/${chunks.length}処理中: "${chunk.substring(0, 30)}${chunk.length > 30 ? '...' : ''}"`);
 | 
			
		||||
      
 | 
			
		||||
      const wavPath = await generateAudioForChunk(chunk, i, itemId);
 | 
			
		||||
      generatedWavFiles.push(wavPath);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  console.log(`MP3変換開始: ${wavFilePath} -> ${mp3FilePath}`);
 | 
			
		||||
    // Concatenate all audio files
 | 
			
		||||
    if (generatedWavFiles.length === 1) {
 | 
			
		||||
      // Single chunk - just convert to MP3
 | 
			
		||||
      const ffmpegCmd = ffmpegPath || "ffmpeg";
 | 
			
		||||
      const firstWavFile = generatedWavFiles[0];
 | 
			
		||||
      if (!firstWavFile) {
 | 
			
		||||
        throw new Error("No WAV files generated");
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      const result = Bun.spawnSync([
 | 
			
		||||
        ffmpegCmd,
 | 
			
		||||
        "-i", firstWavFile,
 | 
			
		||||
        "-codec:a", "libmp3lame",
 | 
			
		||||
        "-qscale:a", "2",
 | 
			
		||||
        "-y",
 | 
			
		||||
        mp3FilePath,
 | 
			
		||||
      ]);
 | 
			
		||||
 | 
			
		||||
  const ffmpegCmd = ffmpegPath || "ffmpeg";
 | 
			
		||||
  const result = Bun.spawnSync({
 | 
			
		||||
    cmd: [
 | 
			
		||||
      ffmpegCmd,
 | 
			
		||||
      "-i",
 | 
			
		||||
      wavFilePath,
 | 
			
		||||
      "-codec:a",
 | 
			
		||||
      "libmp3lame",
 | 
			
		||||
      "-qscale:a",
 | 
			
		||||
      "2",
 | 
			
		||||
      "-y", // Overwrite output file
 | 
			
		||||
      mp3FilePath,
 | 
			
		||||
    ],
 | 
			
		||||
  });
 | 
			
		||||
      if (result.exitCode !== 0) {
 | 
			
		||||
        const stderr = result.stderr
 | 
			
		||||
          ? new TextDecoder().decode(result.stderr)
 | 
			
		||||
          : "Unknown error";
 | 
			
		||||
        throw new Error(`FFmpeg conversion failed: ${stderr}`);
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      // Clean up WAV file
 | 
			
		||||
      fs.unlinkSync(firstWavFile);
 | 
			
		||||
    } else {
 | 
			
		||||
      // Multiple chunks - concatenate them
 | 
			
		||||
      await concatenateAudioFiles(generatedWavFiles, mp3FilePath);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  if (result.exitCode !== 0) {
 | 
			
		||||
    const stderr = result.stderr
 | 
			
		||||
      ? new TextDecoder().decode(result.stderr)
 | 
			
		||||
      : "Unknown error";
 | 
			
		||||
    throw new Error(`FFmpeg conversion failed: ${stderr}`);
 | 
			
		||||
    console.log(`TTS生成完了: ${itemId} (${chunks.length}チャンク)`);
 | 
			
		||||
    return path.basename(mp3FilePath);
 | 
			
		||||
 | 
			
		||||
  } catch (error) {
 | 
			
		||||
    // Clean up any generated files on error
 | 
			
		||||
    for (const wavFile of generatedWavFiles) {
 | 
			
		||||
      if (fs.existsSync(wavFile)) {
 | 
			
		||||
        fs.unlinkSync(wavFile);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    throw error;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Wavファイルを削除
 | 
			
		||||
  if (fs.existsSync(wavFilePath)) {
 | 
			
		||||
    fs.unlinkSync(wavFilePath);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  console.log(`TTS生成完了: ${itemId}`);
 | 
			
		||||
 | 
			
		||||
  return path.basename(mp3FilePath);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
export async function generateTTS(
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user