Apply formatting

This commit is contained in:
2025-06-08 15:21:58 +09:00
parent b5ff912fcb
commit a728ebb66c
28 changed files with 1809 additions and 1137 deletions

View File

@ -16,7 +16,7 @@ class BatchScheduler {
isRunning: false,
canForceStop: false,
};
private currentAbortController?: AbortController;
private readonly SIX_HOURS_MS = 6 * 60 * 60 * 1000; // 6 hours in milliseconds
@ -58,7 +58,7 @@ class BatchScheduler {
this.state.nextRun = new Date(nextRunTime).toISOString();
console.log(
`🕕 Next batch process scheduled for: ${new Date(nextRunTime).toLocaleString()}`
`🕕 Next batch process scheduled for: ${new Date(nextRunTime).toLocaleString()}`,
);
this.state.intervalId = setTimeout(async () => {
@ -78,7 +78,7 @@ class BatchScheduler {
this.state.isRunning = true;
this.state.canForceStop = true;
this.state.lastRun = new Date().toISOString();
// Create new AbortController for this batch run
this.currentAbortController = new AbortController();
@ -87,7 +87,7 @@ class BatchScheduler {
await batchProcess(this.currentAbortController.signal);
console.log("✅ Scheduled batch process completed");
} catch (error) {
if (error instanceof Error && error.name === 'AbortError') {
if (error instanceof Error && error.name === "AbortError") {
console.log("🛑 Batch process was forcefully stopped");
} else {
console.error("❌ Error during scheduled batch process:", error);
@ -162,4 +162,4 @@ class BatchScheduler {
// Export singleton instance
export const batchScheduler = new BatchScheduler();
export type { BatchSchedulerState };
export type { BatchSchedulerState };

View File

@ -7,13 +7,13 @@ interface Config {
endpoint: string;
modelName: string;
};
// VOICEVOX Configuration
voicevox: {
host: string;
styleId: number;
};
// Podcast Configuration
podcast: {
title: string;
@ -25,19 +25,19 @@ interface Config {
ttl: string;
baseUrl: string;
};
// Admin Panel Configuration
admin: {
port: number;
username?: string;
password?: string;
};
// Batch Processing Configuration
batch: {
disableInitialRun: boolean;
};
// File paths
paths: {
projectRoot: string;
@ -64,43 +64,52 @@ function getOptionalEnv(key: string, defaultValue: string): string {
}
function createConfig(): Config {
const projectRoot = import.meta.dirname ? path.dirname(import.meta.dirname) : process.cwd();
const projectRoot = import.meta.dirname
? path.dirname(import.meta.dirname)
: process.cwd();
const dataDir = path.join(projectRoot, "data");
const publicDir = path.join(projectRoot, "public");
return {
openai: {
apiKey: getRequiredEnv("OPENAI_API_KEY"),
endpoint: getOptionalEnv("OPENAI_API_ENDPOINT", "https://api.openai.com/v1"),
endpoint: getOptionalEnv(
"OPENAI_API_ENDPOINT",
"https://api.openai.com/v1",
),
modelName: getOptionalEnv("OPENAI_MODEL_NAME", "gpt-4o-mini"),
},
voicevox: {
host: getOptionalEnv("VOICEVOX_HOST", "http://localhost:50021"),
styleId: parseInt(getOptionalEnv("VOICEVOX_STYLE_ID", "0")),
},
podcast: {
title: getOptionalEnv("PODCAST_TITLE", "自動生成ポッドキャスト"),
link: getOptionalEnv("PODCAST_LINK", "https://your-domain.com/podcast"),
description: getOptionalEnv("PODCAST_DESCRIPTION", "RSSフィードから自動生成された音声ポッドキャスト"),
description: getOptionalEnv(
"PODCAST_DESCRIPTION",
"RSSフィードから自動生成された音声ポッドキャスト",
),
language: getOptionalEnv("PODCAST_LANGUAGE", "ja"),
author: getOptionalEnv("PODCAST_AUTHOR", "管理者"),
categories: getOptionalEnv("PODCAST_CATEGORIES", "Technology"),
ttl: getOptionalEnv("PODCAST_TTL", "60"),
baseUrl: getOptionalEnv("PODCAST_BASE_URL", "https://your-domain.com"),
},
admin: {
port: parseInt(getOptionalEnv("ADMIN_PORT", "3001")),
username: import.meta.env["ADMIN_USERNAME"],
password: import.meta.env["ADMIN_PASSWORD"],
},
batch: {
disableInitialRun: getOptionalEnv("DISABLE_INITIAL_BATCH", "false") === "true",
disableInitialRun:
getOptionalEnv("DISABLE_INITIAL_BATCH", "false") === "true",
},
paths: {
projectRoot,
dataDir,
@ -109,7 +118,10 @@ function createConfig(): Config {
podcastAudioDir: path.join(publicDir, "podcast_audio"),
frontendBuildDir: path.join(projectRoot, "frontend", "dist"),
adminBuildDir: path.join(projectRoot, "admin-panel", "dist"),
feedUrlsFile: path.join(projectRoot, getOptionalEnv("FEED_URLS_FILE", "feed_urls.txt")),
feedUrlsFile: path.join(
projectRoot,
getOptionalEnv("FEED_URLS_FILE", "feed_urls.txt"),
),
},
};
}
@ -121,21 +133,21 @@ export function validateConfig(): void {
if (!config.openai.apiKey) {
throw new Error("OPENAI_API_KEY is required");
}
if (isNaN(config.voicevox.styleId)) {
throw new Error("VOICEVOX_STYLE_ID must be a valid number");
}
// Validate URLs
try {
new URL(config.voicevox.host);
} catch {
throw new Error("VOICEVOX_HOST must be a valid URL");
}
try {
new URL(config.openai.endpoint);
} catch {
throw new Error("OPENAI_API_ENDPOINT must be a valid URL");
}
}
}

View File

@ -1,4 +1,4 @@
import * as cheerio from 'cheerio';
import * as cheerio from "cheerio";
export interface ExtractedContent {
title?: string;
@ -8,17 +8,21 @@ export interface ExtractedContent {
error?: string;
}
export async function extractArticleContent(url: string): Promise<ExtractedContent> {
export async function extractArticleContent(
url: string,
): Promise<ExtractedContent> {
try {
// Fetch the HTML content
const response = await fetch(url, {
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'ja,en-US;q=0.7,en;q=0.3',
'Accept-Encoding': 'gzip, deflate',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
Accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Language": "ja,en-US;q=0.7,en;q=0.3",
"Accept-Encoding": "gzip, deflate",
Connection: "keep-alive",
"Upgrade-Insecure-Requests": "1",
},
signal: AbortSignal.timeout(30000), // 30 second timeout
});
@ -31,52 +35,56 @@ export async function extractArticleContent(url: string): Promise<ExtractedConte
const $ = cheerio.load(html);
// Remove unwanted elements
$('script, style, nav, header, footer, aside, .advertisement, .ads, .sidebar, .menu, .navigation, .social-share, .comments').remove();
$(
"script, style, nav, header, footer, aside, .advertisement, .ads, .sidebar, .menu, .navigation, .social-share, .comments",
).remove();
let content = '';
let title = '';
let description = '';
let content = "";
let title = "";
let description = "";
// Extract title
title = $('title').text().trim() ||
$('h1').first().text().trim() ||
$('meta[property="og:title"]').attr('content') ||
'';
title =
$("title").text().trim() ||
$("h1").first().text().trim() ||
$('meta[property="og:title"]').attr("content") ||
"";
// Extract description
description = $('meta[name="description"]').attr('content') ||
$('meta[property="og:description"]').attr('content') ||
'';
description =
$('meta[name="description"]').attr("content") ||
$('meta[property="og:description"]').attr("content") ||
"";
// Try multiple content extraction strategies
const contentSelectors = [
// Common article selectors
'article',
"article",
'[role="main"]',
'.article-content',
'.post-content',
'.entry-content',
'.content',
'.main-content',
'.article-body',
'.post-body',
'.story-body',
'.news-content',
".article-content",
".post-content",
".entry-content",
".content",
".main-content",
".article-body",
".post-body",
".story-body",
".news-content",
// Japanese news site specific selectors
'.article',
'.news-article',
'.post',
'.entry',
'#content',
'#main',
'.main',
".article",
".news-article",
".post",
".entry",
"#content",
"#main",
".main",
// Fallback to common containers
'.container',
'#container',
'main',
'body'
".container",
"#container",
"main",
"body",
];
for (const selector of contentSelectors) {
@ -84,11 +92,11 @@ export async function extractArticleContent(url: string): Promise<ExtractedConte
if (element.length > 0) {
// Get text content and clean it up
let extractedText = element.text().trim();
// Remove extra whitespace and normalize
extractedText = extractedText
.replace(/\s+/g, ' ')
.replace(/\n\s*\n/g, '\n')
.replace(/\s+/g, " ")
.replace(/\n\s*\n/g, "\n")
.trim();
// Only use if we found substantial content
@ -101,50 +109,49 @@ export async function extractArticleContent(url: string): Promise<ExtractedConte
// If still no content, try paragraph extraction
if (!content) {
const paragraphs = $('p').map((_, el) => $(el).text().trim()).get();
const paragraphs = $("p")
.map((_, el) => $(el).text().trim())
.get();
content = paragraphs
.filter(p => p.length > 50) // Filter out short paragraphs
.join('\n\n');
.filter((p) => p.length > 50) // Filter out short paragraphs
.join("\n\n");
}
// Final fallback: use body text
if (!content || content.length < 100) {
content = $('body').text()
.replace(/\s+/g, ' ')
.trim();
content = $("body").text().replace(/\s+/g, " ").trim();
}
// Validate extracted content
if (!content || content.length < 50) {
return {
title,
content: '',
content: "",
description,
success: false,
error: 'Insufficient content extracted'
error: "Insufficient content extracted",
};
}
// Limit content length to avoid token limits
const maxLength = 5000;
if (content.length > maxLength) {
content = content.substring(0, maxLength) + '...';
content = content.substring(0, maxLength) + "...";
}
return {
title,
content,
description,
success: true
success: true,
};
} catch (error) {
return {
title: '',
content: '',
description: '',
title: "",
content: "",
description: "",
success: false,
error: error instanceof Error ? error.message : 'Unknown error occurred'
error: error instanceof Error ? error.message : "Unknown error occurred",
};
}
}
@ -153,30 +160,30 @@ export async function enhanceArticleContent(
originalTitle: string,
originalLink: string,
originalContent?: string,
originalDescription?: string
originalDescription?: string,
): Promise<{ content?: string; description?: string }> {
// If we already have substantial content, use it
const existingContent = originalContent || originalDescription || '';
const existingContent = originalContent || originalDescription || "";
if (existingContent.length > 500) {
return {
content: originalContent,
description: originalDescription
description: originalDescription,
};
}
// Try to extract content from the URL
const extracted = await extractArticleContent(originalLink);
if (extracted.success && extracted.content) {
return {
content: extracted.content,
description: extracted.description || originalDescription
description: extracted.description || originalDescription,
};
}
// Return original content if extraction failed
return {
content: originalContent,
description: originalDescription
description: originalDescription,
};
}
}

View File

@ -59,7 +59,12 @@ export async function openAI_ClassifyFeed(title: string): Promise<string> {
export async function openAI_GeneratePodcastContent(
title: string,
items: Array<{ title: string; link: string; content?: string; description?: string }>,
items: Array<{
title: string;
link: string;
content?: string;
description?: string;
}>,
): Promise<string> {
if (!title || title.trim() === "") {
throw new Error("Feed title is required for podcast content generation");
@ -78,22 +83,25 @@ export async function openAI_GeneratePodcastContent(
}
// Build detailed article information including content
const articleDetails = validItems.map((item, i) => {
let articleInfo = `${i + 1}. タイトル: ${item.title}\nURL: ${item.link}`;
// Add content if available
const content = item.content || item.description;
if (content && content.trim()) {
// Limit content length to avoid token limits
const maxContentLength = 2000;
const truncatedContent = content.length > maxContentLength
? content.substring(0, maxContentLength) + "..."
: content;
articleInfo += `\n内容: ${truncatedContent}`;
}
return articleInfo;
}).join("\n\n");
const articleDetails = validItems
.map((item, i) => {
let articleInfo = `${i + 1}. タイトル: ${item.title}\nURL: ${item.link}`;
// Add content if available
const content = item.content || item.description;
if (content && content.trim()) {
// Limit content length to avoid token limits
const maxContentLength = 2000;
const truncatedContent =
content.length > maxContentLength
? content.substring(0, maxContentLength) + "..."
: content;
articleInfo += `\n内容: ${truncatedContent}`;
}
return articleInfo;
})
.join("\n\n");
const prompt = `
あなたはプロのポッドキャスタです。以下に示すフィードタイトルに基づき、そのトピックに関する詳細なポッドキャスト原稿を作成してください。

View File

@ -17,18 +17,18 @@ function splitTextIntoChunks(text: string, maxLength: number = 50): string[] {
// Split by sentences first (Japanese periods and line breaks)
const sentences = text.split(/([。!?\n])/);
for (let i = 0; i < sentences.length; i++) {
const sentence = sentences[i];
if (!sentence) continue;
if (currentChunk.length + sentence.length <= maxLength) {
currentChunk += sentence;
} else {
if (currentChunk.trim()) {
chunks.push(currentChunk.trim());
}
// If single sentence is too long, split further
if (sentence.length > maxLength) {
const subChunks = splitLongSentence(sentence, maxLength);
@ -39,12 +39,12 @@ function splitTextIntoChunks(text: string, maxLength: number = 50): string[] {
}
}
}
if (currentChunk.trim()) {
chunks.push(currentChunk.trim());
}
return chunks.filter(chunk => chunk.length > 0);
return chunks.filter((chunk) => chunk.length > 0);
}
/**
@ -57,10 +57,10 @@ function splitLongSentence(sentence: string, maxLength: number): string[] {
const chunks: string[] = [];
let currentChunk = "";
// Split by commas and common Japanese particles
const parts = sentence.split(/([、,,]|[はがでをにと])/);
for (const part of parts) {
if (currentChunk.length + part.length <= maxLength) {
currentChunk += part;
@ -71,11 +71,11 @@ function splitLongSentence(sentence: string, maxLength: number): string[] {
currentChunk = part;
}
}
if (currentChunk.trim()) {
chunks.push(currentChunk.trim());
}
// If still too long, force split by character limit
const finalChunks: string[] = [];
for (const chunk of chunks) {
@ -87,8 +87,8 @@ function splitLongSentence(sentence: string, maxLength: number): string[] {
finalChunks.push(chunk);
}
}
return finalChunks.filter(chunk => chunk.length > 0);
return finalChunks.filter((chunk) => chunk.length > 0);
}
interface VoiceStyle {
@ -112,7 +112,9 @@ async function generateAudioForChunk(
const queryUrl = `${config.voicevox.host}/audio_query?text=${encodedText}&speaker=${defaultVoiceStyle.styleId}`;
const synthesisUrl = `${config.voicevox.host}/synthesis?speaker=${defaultVoiceStyle.styleId}`;
console.log(`チャンク${chunkIndex + 1}の音声クエリ開始: ${itemId} (${chunkText.length}文字)`);
console.log(
`チャンク${chunkIndex + 1}の音声クエリ開始: ${itemId} (${chunkText.length}文字)`,
);
const queryResponse = await fetch(queryUrl, {
method: "POST",
@ -157,11 +159,16 @@ async function generateAudioForChunk(
fs.mkdirSync(outputDir, { recursive: true });
}
const chunkWavPath = path.resolve(outputDir, `${itemId}_chunk_${chunkIndex}.wav`);
const chunkWavPath = path.resolve(
outputDir,
`${itemId}_chunk_${chunkIndex}.wav`,
);
fs.writeFileSync(chunkWavPath, audioBuffer);
console.log(`チャンク${chunkIndex + 1}のWAVファイル保存完了: ${chunkWavPath}`);
console.log(
`チャンク${chunkIndex + 1}のWAVファイル保存完了: ${chunkWavPath}`,
);
return chunkWavPath;
}
@ -173,25 +180,34 @@ async function concatenateAudioFiles(
outputMp3Path: string,
): Promise<void> {
const ffmpegCmd = ffmpegPath || "ffmpeg";
// Create a temporary file list for FFmpeg concat
const tempDir = config.paths.podcastAudioDir;
const listFilePath = path.resolve(tempDir, `concat_list_${Date.now()}.txt`);
try {
// Write file list in FFmpeg concat format
const fileList = wavFiles.map(file => `file '${path.resolve(file)}'`).join('\n');
const fileList = wavFiles
.map((file) => `file '${path.resolve(file)}'`)
.join("\n");
fs.writeFileSync(listFilePath, fileList);
console.log(`音声ファイル結合開始: ${wavFiles.length}個のファイルを結合 -> ${outputMp3Path}`);
console.log(
`音声ファイル結合開始: ${wavFiles.length}個のファイルを結合 -> ${outputMp3Path}`,
);
const result = Bun.spawnSync([
ffmpegCmd,
"-f", "concat",
"-safe", "0",
"-i", listFilePath,
"-codec:a", "libmp3lame",
"-qscale:a", "2",
"-f",
"concat",
"-safe",
"0",
"-i",
listFilePath,
"-codec:a",
"libmp3lame",
"-qscale:a",
"2",
"-y", // Overwrite output file
outputMp3Path,
]);
@ -209,7 +225,7 @@ async function concatenateAudioFiles(
if (fs.existsSync(listFilePath)) {
fs.unlinkSync(listFilePath);
}
// Clean up individual WAV files
for (const wavFile of wavFiles) {
if (fs.existsSync(wavFile)) {
@ -236,12 +252,14 @@ export async function generateTTSWithoutQueue(
throw new Error("Script text is required for TTS generation");
}
console.log(`TTS生成開始: ${itemId} (試行回数: ${retryCount + 1}, ${scriptText.length}文字)`);
console.log(
`TTS生成開始: ${itemId} (試行回数: ${retryCount + 1}, ${scriptText.length}文字)`,
);
// Split text into chunks
const chunks = splitTextIntoChunks(scriptText.trim());
console.log(`テキストを${chunks.length}個のチャンクに分割: ${itemId}`);
if (chunks.length === 0) {
throw new Error("No valid text chunks generated");
}
@ -259,8 +277,10 @@ export async function generateTTSWithoutQueue(
for (let i = 0; i < chunks.length; i++) {
const chunk = chunks[i];
if (!chunk) continue;
console.log(`チャンク${i + 1}/${chunks.length}処理中: "${chunk.substring(0, 30)}${chunk.length > 30 ? '...' : ''}"`);
console.log(
`チャンク${i + 1}/${chunks.length}処理中: "${chunk.substring(0, 30)}${chunk.length > 30 ? "..." : ""}"`,
);
const wavPath = await generateAudioForChunk(chunk, i, itemId);
generatedWavFiles.push(wavPath);
}
@ -273,12 +293,15 @@ export async function generateTTSWithoutQueue(
if (!firstWavFile) {
throw new Error("No WAV files generated");
}
const result = Bun.spawnSync([
ffmpegCmd,
"-i", firstWavFile,
"-codec:a", "libmp3lame",
"-qscale:a", "2",
"-i",
firstWavFile,
"-codec:a",
"libmp3lame",
"-qscale:a",
"2",
"-y",
mp3FilePath,
]);
@ -289,7 +312,7 @@ export async function generateTTSWithoutQueue(
: "Unknown error";
throw new Error(`FFmpeg conversion failed: ${stderr}`);
}
// Clean up WAV file
fs.unlinkSync(firstWavFile);
} else {
@ -299,7 +322,6 @@ export async function generateTTSWithoutQueue(
console.log(`TTS生成完了: ${itemId} (${chunks.length}チャンク)`);
return path.basename(mp3FilePath);
} catch (error) {
// Clean up any generated files on error
for (const wavFile of generatedWavFiles) {
@ -307,7 +329,7 @@ export async function generateTTSWithoutQueue(
fs.unlinkSync(wavFile);
}
}
throw error;
}
}
@ -318,19 +340,24 @@ export async function generateTTS(
retryCount: number = 0,
): Promise<string> {
const maxRetries = 2;
try {
return await generateTTSWithoutQueue(itemId, scriptText, retryCount);
} catch (error) {
console.error(`TTS生成エラー: ${itemId} (試行回数: ${retryCount + 1})`, error);
console.error(
`TTS生成エラー: ${itemId} (試行回数: ${retryCount + 1})`,
error,
);
if (retryCount < maxRetries) {
// Add to queue for retry only on initial failure
const { addToQueue } = await import("../services/database.js");
await addToQueue(itemId, scriptText, retryCount);
throw new Error(`TTS generation failed, added to retry queue: ${error}`);
} else {
throw new Error(`TTS generation failed after ${maxRetries + 1} attempts: ${error}`);
throw new Error(
`TTS generation failed after ${maxRetries + 1} attempts: ${error}`,
);
}
}
}