import Parser from "rss-parser"; import { openAI_ClassifyFeed, openAI_GeneratePodcastContent } from "./llm"; import { generateTTS } from "./tts"; import { saveEpisode, markAsProcessed } from "./database"; import { updatePodcastRSS } from "./podcast"; import crypto from "crypto"; interface FeedItem { id: string; title: string; link: string; pubDate: string; contentSnippet?: string; } import fs from "fs/promises"; import path from "path"; import { fileURLToPath } from "url"; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); export async function batchProcess() { const feedUrlsFile = import.meta.env["FEED_URLS_FILE"] ?? "feed_urls.txt"; const feedUrlsPath = path.resolve(__dirname, "..", feedUrlsFile); let feedUrls: string[]; try { const data = await fs.readFile(feedUrlsPath, "utf-8"); feedUrls = data .split("\n") .map((url) => url.trim()) .filter((url) => url.length > 0); } catch (err) { console.warn(`フィードURLファイルの読み込みに失敗: ${feedUrlsFile}`); feedUrls = []; } // フィードごとに処理 for (const url of feedUrls) { try { await processFeedUrl(url); } finally { await updatePodcastRSS(); } } console.log("処理完了:", new Date().toISOString()); } const processFeedUrl = async (url: string) => { const parser = new Parser(); const feed = await parser.parseURL(url); // フィードのカテゴリ分類 const feedTitle = feed.title || url; const category = await openAI_ClassifyFeed(feedTitle); console.log(`フィード分類完了: ${feedTitle} - ${category}`); // 昨日の記事のみフィルタリング const yesterday = new Date(); yesterday.setDate(yesterday.getDate() - 1); const yesterdayItems = feed.items.filter((item) => { const pub = new Date(item.pubDate || ""); return ( pub.getFullYear() === yesterday.getFullYear() && pub.getMonth() === yesterday.getMonth() && pub.getDate() === yesterday.getDate() ); }); if (yesterdayItems.length === 0) { console.log(`昨日の記事が見つかりません: ${feedTitle}`); return; } // ポッドキャスト原稿生成 console.log(`ポッドキャスト原稿生成開始: ${feedTitle}`); const validItems = yesterdayItems.filter((item): item is FeedItem => { return !!item.title && !!item.link; }); const podcastContent = await openAI_GeneratePodcastContent( feedTitle, validItems, ); // トピックごとの統合音声生成 const feedUrlHash = crypto.createHash("md5").update(url).digest("hex"); const categoryHash = crypto.createHash("md5").update(category).digest("hex"); const uniqueId = `${feedUrlHash}-${categoryHash}`; const audioFilePath = await generateTTS(uniqueId, podcastContent); console.log(`音声ファイル生成完了: ${audioFilePath}`); // エピソードとして保存(各フィードにつき1つの統合エピソード) const firstItem = yesterdayItems[0]; if (!firstItem) { console.warn("アイテムが空です"); return; } const pub = new Date(firstItem.pubDate || ""); await saveEpisode({ id: uniqueId, title: `${category}: ${feedTitle}`, pubDate: pub.toISOString(), audioPath: audioFilePath, sourceLink: url, }); console.log(`エピソード保存完了: ${category} - ${feedTitle}`); // 個別記事の処理記録 for (const item of yesterdayItems) { const itemId = item["id"] as string | undefined; const fallbackId = item.link || item.title || JSON.stringify(item); const finalItemId = itemId && typeof itemId === "string" && itemId.trim() !== "" ? itemId : `fallback-${Buffer.from(fallbackId).toString("base64")}`; if (!finalItemId || finalItemId.trim() === "") { console.warn(`フィードアイテムのIDを生成できませんでした`, { feedUrl: url, itemTitle: item.title, itemLink: item.link, }); continue; } const already = await markAsProcessed(url, finalItemId); if (already) { console.log(`既に処理済み: ${finalItemId}`); continue; } } };