Files

395 lines
11 KiB
TypeScript

import fs from "fs";
import path from "path";
import ffmpegPath from "ffmpeg-static";
import { config } from "./config.js";
import {
convertEnglishWordsOnly,
initializeTextConverter,
} from "./text-converter.js";
/**
* Split text into natural chunks for TTS processing
* Aims for approximately 50 characters per chunk, breaking at natural points
*/
function splitTextIntoChunks(text: string, maxLength = 100): string[] {
if (text.length <= maxLength) {
return [text];
}
const chunks: string[] = [];
let currentChunk = "";
// Split by sentences first (Japanese periods and line breaks)
const sentences = text.split(/([。!?\n])/);
for (let i = 0; i < sentences.length; i++) {
const sentence = sentences[i];
if (!sentence) continue;
if (currentChunk.length + sentence.length <= maxLength) {
currentChunk += sentence;
} else {
if (currentChunk.trim()) {
chunks.push(currentChunk.trim());
}
// If single sentence is too long, split further
if (sentence.length > maxLength) {
const subChunks = splitLongSentence(sentence, maxLength);
chunks.push(...subChunks);
currentChunk = "";
} else {
currentChunk = sentence;
}
}
}
if (currentChunk.trim()) {
chunks.push(currentChunk.trim());
}
return chunks.filter((chunk) => chunk.length > 0);
}
/**
* Split a long sentence at natural break points (commas, particles, etc.)
*/
function splitLongSentence(sentence: string, maxLength: number): string[] {
if (sentence.length <= maxLength) {
return [sentence];
}
const chunks: string[] = [];
let currentChunk = "";
// Split by commas and common Japanese particles
const parts = sentence.split(/([、,,]|[はがでをにと])/);
for (const part of parts) {
if (currentChunk.length + part.length <= maxLength) {
currentChunk += part;
} else {
if (currentChunk.trim()) {
chunks.push(currentChunk.trim());
}
currentChunk = part;
}
}
if (currentChunk.trim()) {
chunks.push(currentChunk.trim());
}
// If still too long, force split by character limit
const finalChunks: string[] = [];
for (const chunk of chunks) {
if (chunk.length > maxLength) {
for (let i = 0; i < chunk.length; i += maxLength) {
finalChunks.push(chunk.slice(i, i + maxLength));
}
} else {
finalChunks.push(chunk);
}
}
return finalChunks.filter((chunk) => chunk.length > 0);
}
interface VoiceStyle {
styleId: number;
}
// 環境変数からデフォルトの声設定を取得
const defaultVoiceStyle: VoiceStyle = {
styleId: config.voicevox.styleId,
};
/**
* Generate audio for a single text chunk
*/
async function generateAudioForChunk(
chunkText: string,
chunkIndex: number,
itemId: string,
): Promise<string> {
// Convert English words to katakana before TTS processing
let processedText: string;
try {
processedText = await convertEnglishWordsOnly(chunkText);
if (processedText !== chunkText) {
console.log(`チャンク${chunkIndex + 1}で英語をカタカナに変換: ${itemId}`);
console.log(`変換前: "${chunkText}"`);
console.log(`変換後: "${processedText}"`);
}
} catch (error) {
console.warn(
`チャンク${chunkIndex + 1}の英語変換に失敗、元のテキストを使用: ${itemId}`,
error,
);
processedText = chunkText;
}
const encodedText = encodeURIComponent(processedText);
const queryUrl = `${config.voicevox.host}/audio_query?text=${encodedText}&speaker=${defaultVoiceStyle.styleId}`;
const synthesisUrl = `${config.voicevox.host}/synthesis?speaker=${defaultVoiceStyle.styleId}`;
console.log(
`チャンク${chunkIndex + 1}の音声クエリ開始: ${itemId} (${processedText.length}文字)`,
);
const queryResponse = await fetch(queryUrl, {
method: "POST",
headers: {
"Content-Type": "application/json",
Accept: "application/json",
},
});
if (!queryResponse.ok) {
const errorText = await queryResponse.text();
throw new Error(
`VOICEVOX audio query failed for chunk ${chunkIndex + 1} (${queryResponse.status}): ${errorText}`,
);
}
const audioQuery = await queryResponse.json();
console.log(`チャンク${chunkIndex + 1}の音声合成開始: ${itemId}`);
const audioResponse = await fetch(synthesisUrl, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify(audioQuery),
signal: AbortSignal.timeout(300000), // 5分のタイムアウト (チャンクごと)
});
if (!audioResponse.ok) {
const errorText = await audioResponse.text();
throw new Error(
`VOICEVOX synthesis failed for chunk ${chunkIndex + 1} (${audioResponse.status}): ${errorText}`,
);
}
const audioArrayBuffer = await audioResponse.arrayBuffer();
const audioBuffer = Buffer.from(audioArrayBuffer);
// 出力ディレクトリの準備
const outputDir = config.paths.podcastAudioDir;
if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir, { recursive: true });
}
const chunkWavPath = path.resolve(
outputDir,
`${itemId}_chunk_${chunkIndex}.wav`,
);
fs.writeFileSync(chunkWavPath, audioBuffer);
console.log(
`チャンク${chunkIndex + 1}のWAVファイル保存完了: ${chunkWavPath}`,
);
return chunkWavPath;
}
/**
* Concatenate multiple WAV files into a single MP3 file
*/
async function concatenateAudioFiles(
wavFiles: string[],
outputMp3Path: string,
): Promise<void> {
const ffmpegCmd = ffmpegPath || "ffmpeg";
// Create a temporary file list for FFmpeg concat
const tempDir = config.paths.podcastAudioDir;
const listFilePath = path.resolve(tempDir, `concat_list_${Date.now()}.txt`);
try {
// Write file list in FFmpeg concat format
const fileList = wavFiles
.map((file) => `file '${path.resolve(file)}'`)
.join("\n");
fs.writeFileSync(listFilePath, fileList);
console.log(
`音声ファイル結合開始: ${wavFiles.length}個のファイルを結合 -> ${outputMp3Path}`,
);
const result = Bun.spawnSync([
ffmpegCmd,
"-f",
"concat",
"-safe",
"0",
"-i",
listFilePath,
"-codec:a",
"libmp3lame",
"-qscale:a",
"8",
"-y", // Overwrite output file
outputMp3Path,
]);
if (result.exitCode !== 0) {
const stderr = result.stderr
? new TextDecoder().decode(result.stderr)
: "Unknown error";
throw new Error(`FFmpeg concatenation failed: ${stderr}`);
}
console.log(`音声ファイル結合完了: ${outputMp3Path}`);
} finally {
// Clean up temporary files
if (fs.existsSync(listFilePath)) {
fs.unlinkSync(listFilePath);
}
// Clean up individual WAV files
for (const wavFile of wavFiles) {
if (fs.existsSync(wavFile)) {
fs.unlinkSync(wavFile);
}
}
}
}
/**
* Generate TTS without adding to retry queue on failure
* Used for retry queue processing to avoid infinite loops
*/
export async function generateTTSWithoutQueue(
itemId: string,
scriptText: string,
retryCount = 0,
): Promise<string> {
if (!itemId || itemId.trim() === "") {
throw new Error("Item ID is required for TTS generation");
}
if (!scriptText || scriptText.trim() === "") {
throw new Error("Script text is required for TTS generation");
}
// Initialize text converter if not already initialized
try {
await initializeTextConverter();
} catch (error) {
console.warn(
"テキストコンバーターの初期化に失敗しました。英語変換をスキップします:",
error,
);
}
console.log(
`TTS生成開始: ${itemId} (試行回数: ${retryCount + 1}, ${scriptText.length}文字)`,
);
// Split text into chunks
const chunks = splitTextIntoChunks(scriptText.trim());
console.log(`テキストを${chunks.length}個のチャンクに分割: ${itemId}`);
if (chunks.length === 0) {
throw new Error("No valid text chunks generated");
}
const outputDir = config.paths.podcastAudioDir;
if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir, { recursive: true });
}
const mp3FilePath = path.resolve(outputDir, `${itemId}.mp3`);
const generatedWavFiles: string[] = [];
try {
// Generate audio for each chunk
for (let i = 0; i < chunks.length; i++) {
const chunk = chunks[i];
if (!chunk) continue;
console.log(
`チャンク${i + 1}/${chunks.length}処理中: "${chunk.substring(0, 30)}${chunk.length > 30 ? "..." : ""}"`,
);
const wavPath = await generateAudioForChunk(chunk, i, itemId);
generatedWavFiles.push(wavPath);
}
// Concatenate all audio files
if (generatedWavFiles.length === 1) {
// Single chunk - just convert to MP3
const ffmpegCmd = ffmpegPath || "ffmpeg";
const firstWavFile = generatedWavFiles[0];
if (!firstWavFile) {
throw new Error("No WAV files generated");
}
const result = Bun.spawnSync([
ffmpegCmd,
"-i",
firstWavFile,
"-codec:a",
"libmp3lame",
"-qscale:a",
"8",
"-y",
mp3FilePath,
]);
if (result.exitCode !== 0) {
const stderr = result.stderr
? new TextDecoder().decode(result.stderr)
: "Unknown error";
throw new Error(`FFmpeg conversion failed: ${stderr}`);
}
// Clean up WAV file
fs.unlinkSync(firstWavFile);
} else {
// Multiple chunks - concatenate them
await concatenateAudioFiles(generatedWavFiles, mp3FilePath);
}
console.log(`TTS生成完了: ${itemId} (${chunks.length}チャンク)`);
return path.basename(mp3FilePath);
} catch (error) {
// Clean up any generated files on error
for (const wavFile of generatedWavFiles) {
if (fs.existsSync(wavFile)) {
fs.unlinkSync(wavFile);
}
}
throw error;
}
}
export async function generateTTS(
itemId: string,
scriptText: string,
retryCount = 0,
): Promise<string> {
const maxRetries = 2;
try {
return await generateTTSWithoutQueue(itemId, scriptText, retryCount);
} catch (error) {
console.error(
`TTS生成エラー: ${itemId} (試行回数: ${retryCount + 1})`,
error,
);
if (retryCount < maxRetries) {
// Add to queue for retry only on initial failure
const { addToQueue } = await import("../services/database.js");
await addToQueue(itemId, scriptText, retryCount);
throw new Error(`TTS generation failed, added to retry queue: ${error}`);
} else {
throw new Error(
`TTS generation failed after ${maxRetries + 1} attempts: ${error}`,
);
}
}
}