From 95043f0ea530c2ae375c1716a34e8e184ba19cfd Mon Sep 17 00:00:00 2001 From: Satsuki Akiba Date: Wed, 4 Jun 2025 16:06:10 +0900 Subject: [PATCH] feat: add fluent-ffmpeg and refactor batch process --- bun.lock | 9 +++ package.json | 1 + scripts/fetch_and_generate.ts | 145 ---------------------------------- server.ts | 15 +++- 4 files changed, 22 insertions(+), 148 deletions(-) delete mode 100644 scripts/fetch_and_generate.ts diff --git a/bun.lock b/bun.lock index a954d1d..b657d22 100644 --- a/bun.lock +++ b/bun.lock @@ -6,6 +6,7 @@ "dependencies": { "@aws-sdk/client-polly": "^3.823.0", "@hono/node-server": "^1.14.3", + "fluent-ffmpeg": "^2.1.3", "hono": "^4.7.11", "openai": "^4.104.0", "react": "^19.1.0", @@ -327,6 +328,8 @@ "agentkeepalive": ["agentkeepalive@4.6.0", "", { "dependencies": { "humanize-ms": "^1.2.1" } }, "sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ=="], + "async": ["async@0.2.10", "", {}, "sha512-eAkdoKxU6/LkKDBzLpT+t6Ff5EtfSF4wx1WfJiPEEV7WNLnDaRXk0oVysiEPm262roaachGexwUv94WhSgN5TQ=="], + "asynckit": ["asynckit@0.4.0", "", {}, "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="], "bowser": ["bowser@2.11.0", "", {}, "sha512-AlcaJBi/pqqJBIQ8U9Mcpc9i8Aqxn88Skv5d+xBX006BY5u8N3mGLHa5Lgppa7L/HfwgwLgZ6NYs+Ag6uUmJRA=="], @@ -373,6 +376,8 @@ "fdir": ["fdir@6.4.5", "", { "peerDependencies": { "picomatch": "^3 || ^4" }, "optionalPeers": ["picomatch"] }, "sha512-4BG7puHpVsIYxZUbiUE3RqGloLaSSwzYie5jvasC4LWuBWzZawynvYouhjbQKw2JuIGYdm0DzIxl8iVidKlUEw=="], + "fluent-ffmpeg": ["fluent-ffmpeg@2.1.3", "", { "dependencies": { "async": "^0.2.9", "which": "^1.1.1" } }, "sha512-Be3narBNt2s6bsaqP6Jzq91heDgOEaDCJAXcE3qcma/EJBSy5FB4cvO31XBInuAuKBx8Kptf8dkhjK0IOru39Q=="], + "form-data": ["form-data@4.0.2", "", { "dependencies": { "asynckit": "^0.4.0", "combined-stream": "^1.0.8", "es-set-tostringtag": "^2.1.0", "mime-types": "^2.1.12" } }, "sha512-hGfm/slu0ZabnNt4oaRZ6uREyfCj6P4fT/n6A1rGV+Z0VdGXjfOhVUpkn6qVQONHGIFwmveGXyDs75+nr6FM8w=="], "form-data-encoder": ["form-data-encoder@1.7.2", "", {}, "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A=="], @@ -403,6 +408,8 @@ "humanize-ms": ["humanize-ms@1.2.1", "", { "dependencies": { "ms": "^2.0.0" } }, "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ=="], + "isexe": ["isexe@2.0.0", "", {}, "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw=="], + "js-tokens": ["js-tokens@4.0.0", "", {}, "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ=="], "jsesc": ["jsesc@3.1.0", "", { "bin": { "jsesc": "bin/jsesc" } }, "sha512-/sM3dO2FOzXjKQhJuo0Q173wf2KOo8t4I8vHy6lF9poUp7bKT0/NHE8fPX23PwfhnykfqnC2xRxOnVw5XuGIaA=="], @@ -477,6 +484,8 @@ "whatwg-url": ["whatwg-url@5.0.0", "", { "dependencies": { "tr46": "~0.0.3", "webidl-conversions": "^3.0.0" } }, "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw=="], + "which": ["which@1.3.1", "", { "dependencies": { "isexe": "^2.0.0" }, "bin": { "which": "./bin/which" } }, "sha512-HxJdYWq1MTIQbJ3nw0cqssHoTNU267KlrDuGZ1WYlxDStUtKUhOaJmh112/TZmHxxUfuJqPXSOm7tDyas0OSIQ=="], + "xml2js": ["xml2js@0.5.0", "", { "dependencies": { "sax": ">=0.6.0", "xmlbuilder": "~11.0.0" } }, "sha512-drPFnkQJik/O+uPKpqSgr22mpuFHqKdbS835iAQrUC73L2F5WkboIRd63ai/2Yg6I1jzifPFKH2NTK+cfglkIA=="], "xmlbuilder": ["xmlbuilder@11.0.1", "", {}, "sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA=="], diff --git a/package.json b/package.json index a7dfc6d..2223444 100644 --- a/package.json +++ b/package.json @@ -9,6 +9,7 @@ "dependencies": { "@aws-sdk/client-polly": "^3.823.0", "@hono/node-server": "^1.14.3", + "fluent-ffmpeg": "^2.1.3", "hono": "^4.7.11", "openai": "^4.104.0", "react": "^19.1.0", diff --git a/scripts/fetch_and_generate.ts b/scripts/fetch_and_generate.ts deleted file mode 100644 index dd00849..0000000 --- a/scripts/fetch_and_generate.ts +++ /dev/null @@ -1,145 +0,0 @@ -import Parser from "rss-parser"; -import { - openAI_ClassifyFeed, - openAI_GeneratePodcastContent, -} from "../services/llm"; -import { generateTTS } from "../services/tts"; -import { saveEpisode, markAsProcessed } from "../services/database"; -import { updatePodcastRSS } from "../services/podcast"; -import crypto from "crypto"; - -interface FeedItem { - id: string; - title: string; - link: string; - pubDate: string; - contentSnippet?: string; -} - -import fs from "fs/promises"; -import path from "path"; -import { fileURLToPath } from "url"; - -const __filename = fileURLToPath(import.meta.url); -const __dirname = path.dirname(__filename); - -async function main() { - const feedUrlsFile = import.meta.env["FEED_URLS_FILE"] ?? "feed_urls.txt"; - const feedUrlsPath = path.resolve(__dirname, "..", feedUrlsFile); - let feedUrls: string[]; - try { - const data = await fs.readFile(feedUrlsPath, "utf-8"); - feedUrls = data - .split("\n") - .map((url) => url.trim()) - .filter((url) => url.length > 0); - } catch (err) { - console.warn(`フィードURLファイルの読み込みに失敗: ${feedUrlsFile}`); - feedUrls = []; - } - - // フィードごとに処理 - for (const url of feedUrls) { - try { - await processFeedUrl(url); - } finally { - await updatePodcastRSS(); - } - } - - console.log("処理完了:", new Date().toISOString()); -} - -const processFeedUrl = async (url: string) => { - const parser = new Parser(); - const feed = await parser.parseURL(url); - - // フィードのカテゴリ分類 - const feedTitle = feed.title || url; - const category = await openAI_ClassifyFeed(feedTitle); - console.log(`フィード分類完了: ${feedTitle} - ${category}`); - - // 昨日の記事のみフィルタリング - const yesterday = new Date(); - yesterday.setDate(yesterday.getDate() - 1); - - const yesterdayItems = feed.items.filter((item) => { - const pub = new Date(item.pubDate || ""); - return ( - pub.getFullYear() === yesterday.getFullYear() && - pub.getMonth() === yesterday.getMonth() && - pub.getDate() === yesterday.getDate() - ); - }); - - if (yesterdayItems.length === 0) { - console.log(`昨日の記事が見つかりません: ${feedTitle}`); - return; - } - - // ポッドキャスト原稿生成 - console.log(`ポッドキャスト原稿生成開始: ${feedTitle}`); - const validItems = yesterdayItems.filter((item): item is FeedItem => { - return !!item.title && !!item.link; - }); - const podcastContent = await openAI_GeneratePodcastContent( - feedTitle, - validItems, - ); - - // トピックごとの統合音声生成 - const feedUrlHash = crypto.createHash("md5").update(url).digest("hex"); - const categoryHash = crypto.createHash("md5").update(category).digest("hex"); - const uniqueId = `${feedUrlHash}-${categoryHash}`; - - const audioFilePath = await generateTTS(uniqueId, podcastContent); - console.log(`音声ファイル生成完了: ${audioFilePath}`); - - // エピソードとして保存(各フィードにつき1つの統合エピソード) - const firstItem = yesterdayItems[0]; - if (!firstItem) { - console.warn("アイテムが空です"); - return; - } - const pub = new Date(firstItem.pubDate || ""); - - await saveEpisode({ - id: uniqueId, - title: `${category}: ${feedTitle}`, - pubDate: pub.toISOString(), - audioPath: audioFilePath, - sourceLink: url, - }); - - console.log(`エピソード保存完了: ${category} - ${feedTitle}`); - - // 個別記事の処理記録 - for (const item of yesterdayItems) { - const itemId = item["id"] as string | undefined; - const fallbackId = item.link || item.title || JSON.stringify(item); - const finalItemId = - itemId && typeof itemId === "string" && itemId.trim() !== "" - ? itemId - : `fallback-${Buffer.from(fallbackId).toString("base64")}`; - - if (!finalItemId || finalItemId.trim() === "") { - console.warn(`フィードアイテムのIDを生成できませんでした`, { - feedUrl: url, - itemTitle: item.title, - itemLink: item.link, - }); - continue; - } - - const already = await markAsProcessed(url, finalItemId); - if (already) { - console.log(`既に処理済み: ${finalItemId}`); - continue; - } - } -}; - -main().catch((err) => { - console.error("エラー発生:", err); - process.exit(1); -}); diff --git a/server.ts b/server.ts index 9b41007..8b3d927 100644 --- a/server.ts +++ b/server.ts @@ -3,7 +3,7 @@ import { serve } from "@hono/node-server"; import fs from "fs"; import path from "path"; import { Database } from "bun:sqlite"; -import { batchProcess } from "./scripts/fetch_and_generate"; +import { batchProcess } from "./services/fetch_and_generate"; import { setInterval } from "timers"; @@ -198,10 +198,19 @@ function scheduleFirstBatchProcess() { function scheduleDailyBatchProcess() { const now = new Date(); - const nextRun = new Date(now.getFullYear(), now.getMonth(), now.getDate() + 1, 0, 0, 0); + const nextRun = new Date( + now.getFullYear(), + now.getMonth(), + now.getDate() + 1, + 0, + 0, + 0, + ); const delay = nextRun.getTime() - now.getTime(); - console.log(`Next daily batch process scheduled in ${delay / 1000 / 60} minutes`); + console.log( + `Next daily batch process scheduled in ${delay / 1000 / 60} minutes`, + ); setTimeout(async () => { try {