diff --git a/services/fetch_and_generate.ts b/services/fetch_and_generate.ts new file mode 100644 index 0000000..4d512a2 --- /dev/null +++ b/services/fetch_and_generate.ts @@ -0,0 +1,137 @@ +import Parser from "rss-parser"; +import { openAI_ClassifyFeed, openAI_GeneratePodcastContent } from "./llm"; +import { generateTTS } from "./tts"; +import { saveEpisode, markAsProcessed } from "./database"; +import { updatePodcastRSS } from "./podcast"; +import crypto from "crypto"; + +interface FeedItem { + id: string; + title: string; + link: string; + pubDate: string; + contentSnippet?: string; +} + +import fs from "fs/promises"; +import path from "path"; +import { fileURLToPath } from "url"; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); + +export async function batchProcess() { + const feedUrlsFile = import.meta.env["FEED_URLS_FILE"] ?? "feed_urls.txt"; + const feedUrlsPath = path.resolve(__dirname, "..", feedUrlsFile); + let feedUrls: string[]; + try { + const data = await fs.readFile(feedUrlsPath, "utf-8"); + feedUrls = data + .split("\n") + .map((url) => url.trim()) + .filter((url) => url.length > 0); + } catch (err) { + console.warn(`フィードURLファイルの読み込みに失敗: ${feedUrlsFile}`); + feedUrls = []; + } + + // フィードごとに処理 + for (const url of feedUrls) { + try { + await processFeedUrl(url); + } finally { + await updatePodcastRSS(); + } + } + + console.log("処理完了:", new Date().toISOString()); +} + +const processFeedUrl = async (url: string) => { + const parser = new Parser(); + const feed = await parser.parseURL(url); + + // フィードのカテゴリ分類 + const feedTitle = feed.title || url; + const category = await openAI_ClassifyFeed(feedTitle); + console.log(`フィード分類完了: ${feedTitle} - ${category}`); + + // 昨日の記事のみフィルタリング + const yesterday = new Date(); + yesterday.setDate(yesterday.getDate() - 1); + + const yesterdayItems = feed.items.filter((item) => { + const pub = new Date(item.pubDate || ""); + return ( + pub.getFullYear() === yesterday.getFullYear() && + pub.getMonth() === yesterday.getMonth() && + pub.getDate() === yesterday.getDate() + ); + }); + + if (yesterdayItems.length === 0) { + console.log(`昨日の記事が見つかりません: ${feedTitle}`); + return; + } + + // ポッドキャスト原稿生成 + console.log(`ポッドキャスト原稿生成開始: ${feedTitle}`); + const validItems = yesterdayItems.filter((item): item is FeedItem => { + return !!item.title && !!item.link; + }); + const podcastContent = await openAI_GeneratePodcastContent( + feedTitle, + validItems, + ); + + // トピックごとの統合音声生成 + const feedUrlHash = crypto.createHash("md5").update(url).digest("hex"); + const categoryHash = crypto.createHash("md5").update(category).digest("hex"); + const uniqueId = `${feedUrlHash}-${categoryHash}`; + + const audioFilePath = await generateTTS(uniqueId, podcastContent); + console.log(`音声ファイル生成完了: ${audioFilePath}`); + + // エピソードとして保存(各フィードにつき1つの統合エピソード) + const firstItem = yesterdayItems[0]; + if (!firstItem) { + console.warn("アイテムが空です"); + return; + } + const pub = new Date(firstItem.pubDate || ""); + + await saveEpisode({ + id: uniqueId, + title: `${category}: ${feedTitle}`, + pubDate: pub.toISOString(), + audioPath: audioFilePath, + sourceLink: url, + }); + + console.log(`エピソード保存完了: ${category} - ${feedTitle}`); + + // 個別記事の処理記録 + for (const item of yesterdayItems) { + const itemId = item["id"] as string | undefined; + const fallbackId = item.link || item.title || JSON.stringify(item); + const finalItemId = + itemId && typeof itemId === "string" && itemId.trim() !== "" + ? itemId + : `fallback-${Buffer.from(fallbackId).toString("base64")}`; + + if (!finalItemId || finalItemId.trim() === "") { + console.warn(`フィードアイテムのIDを生成できませんでした`, { + feedUrl: url, + itemTitle: item.title, + itemLink: item.link, + }); + continue; + } + + const already = await markAsProcessed(url, finalItemId); + if (already) { + console.log(`既に処理済み: ${finalItemId}`); + continue; + } + } +};