feat: add fluent-ffmpeg and refactor batch process
This commit is contained in:
9
bun.lock
9
bun.lock
@ -6,6 +6,7 @@
|
|||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@aws-sdk/client-polly": "^3.823.0",
|
"@aws-sdk/client-polly": "^3.823.0",
|
||||||
"@hono/node-server": "^1.14.3",
|
"@hono/node-server": "^1.14.3",
|
||||||
|
"fluent-ffmpeg": "^2.1.3",
|
||||||
"hono": "^4.7.11",
|
"hono": "^4.7.11",
|
||||||
"openai": "^4.104.0",
|
"openai": "^4.104.0",
|
||||||
"react": "^19.1.0",
|
"react": "^19.1.0",
|
||||||
@ -327,6 +328,8 @@
|
|||||||
|
|
||||||
"agentkeepalive": ["agentkeepalive@4.6.0", "", { "dependencies": { "humanize-ms": "^1.2.1" } }, "sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ=="],
|
"agentkeepalive": ["agentkeepalive@4.6.0", "", { "dependencies": { "humanize-ms": "^1.2.1" } }, "sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ=="],
|
||||||
|
|
||||||
|
"async": ["async@0.2.10", "", {}, "sha512-eAkdoKxU6/LkKDBzLpT+t6Ff5EtfSF4wx1WfJiPEEV7WNLnDaRXk0oVysiEPm262roaachGexwUv94WhSgN5TQ=="],
|
||||||
|
|
||||||
"asynckit": ["asynckit@0.4.0", "", {}, "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="],
|
"asynckit": ["asynckit@0.4.0", "", {}, "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="],
|
||||||
|
|
||||||
"bowser": ["bowser@2.11.0", "", {}, "sha512-AlcaJBi/pqqJBIQ8U9Mcpc9i8Aqxn88Skv5d+xBX006BY5u8N3mGLHa5Lgppa7L/HfwgwLgZ6NYs+Ag6uUmJRA=="],
|
"bowser": ["bowser@2.11.0", "", {}, "sha512-AlcaJBi/pqqJBIQ8U9Mcpc9i8Aqxn88Skv5d+xBX006BY5u8N3mGLHa5Lgppa7L/HfwgwLgZ6NYs+Ag6uUmJRA=="],
|
||||||
@ -373,6 +376,8 @@
|
|||||||
|
|
||||||
"fdir": ["fdir@6.4.5", "", { "peerDependencies": { "picomatch": "^3 || ^4" }, "optionalPeers": ["picomatch"] }, "sha512-4BG7puHpVsIYxZUbiUE3RqGloLaSSwzYie5jvasC4LWuBWzZawynvYouhjbQKw2JuIGYdm0DzIxl8iVidKlUEw=="],
|
"fdir": ["fdir@6.4.5", "", { "peerDependencies": { "picomatch": "^3 || ^4" }, "optionalPeers": ["picomatch"] }, "sha512-4BG7puHpVsIYxZUbiUE3RqGloLaSSwzYie5jvasC4LWuBWzZawynvYouhjbQKw2JuIGYdm0DzIxl8iVidKlUEw=="],
|
||||||
|
|
||||||
|
"fluent-ffmpeg": ["fluent-ffmpeg@2.1.3", "", { "dependencies": { "async": "^0.2.9", "which": "^1.1.1" } }, "sha512-Be3narBNt2s6bsaqP6Jzq91heDgOEaDCJAXcE3qcma/EJBSy5FB4cvO31XBInuAuKBx8Kptf8dkhjK0IOru39Q=="],
|
||||||
|
|
||||||
"form-data": ["form-data@4.0.2", "", { "dependencies": { "asynckit": "^0.4.0", "combined-stream": "^1.0.8", "es-set-tostringtag": "^2.1.0", "mime-types": "^2.1.12" } }, "sha512-hGfm/slu0ZabnNt4oaRZ6uREyfCj6P4fT/n6A1rGV+Z0VdGXjfOhVUpkn6qVQONHGIFwmveGXyDs75+nr6FM8w=="],
|
"form-data": ["form-data@4.0.2", "", { "dependencies": { "asynckit": "^0.4.0", "combined-stream": "^1.0.8", "es-set-tostringtag": "^2.1.0", "mime-types": "^2.1.12" } }, "sha512-hGfm/slu0ZabnNt4oaRZ6uREyfCj6P4fT/n6A1rGV+Z0VdGXjfOhVUpkn6qVQONHGIFwmveGXyDs75+nr6FM8w=="],
|
||||||
|
|
||||||
"form-data-encoder": ["form-data-encoder@1.7.2", "", {}, "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A=="],
|
"form-data-encoder": ["form-data-encoder@1.7.2", "", {}, "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A=="],
|
||||||
@ -403,6 +408,8 @@
|
|||||||
|
|
||||||
"humanize-ms": ["humanize-ms@1.2.1", "", { "dependencies": { "ms": "^2.0.0" } }, "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ=="],
|
"humanize-ms": ["humanize-ms@1.2.1", "", { "dependencies": { "ms": "^2.0.0" } }, "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ=="],
|
||||||
|
|
||||||
|
"isexe": ["isexe@2.0.0", "", {}, "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw=="],
|
||||||
|
|
||||||
"js-tokens": ["js-tokens@4.0.0", "", {}, "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ=="],
|
"js-tokens": ["js-tokens@4.0.0", "", {}, "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ=="],
|
||||||
|
|
||||||
"jsesc": ["jsesc@3.1.0", "", { "bin": { "jsesc": "bin/jsesc" } }, "sha512-/sM3dO2FOzXjKQhJuo0Q173wf2KOo8t4I8vHy6lF9poUp7bKT0/NHE8fPX23PwfhnykfqnC2xRxOnVw5XuGIaA=="],
|
"jsesc": ["jsesc@3.1.0", "", { "bin": { "jsesc": "bin/jsesc" } }, "sha512-/sM3dO2FOzXjKQhJuo0Q173wf2KOo8t4I8vHy6lF9poUp7bKT0/NHE8fPX23PwfhnykfqnC2xRxOnVw5XuGIaA=="],
|
||||||
@ -477,6 +484,8 @@
|
|||||||
|
|
||||||
"whatwg-url": ["whatwg-url@5.0.0", "", { "dependencies": { "tr46": "~0.0.3", "webidl-conversions": "^3.0.0" } }, "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw=="],
|
"whatwg-url": ["whatwg-url@5.0.0", "", { "dependencies": { "tr46": "~0.0.3", "webidl-conversions": "^3.0.0" } }, "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw=="],
|
||||||
|
|
||||||
|
"which": ["which@1.3.1", "", { "dependencies": { "isexe": "^2.0.0" }, "bin": { "which": "./bin/which" } }, "sha512-HxJdYWq1MTIQbJ3nw0cqssHoTNU267KlrDuGZ1WYlxDStUtKUhOaJmh112/TZmHxxUfuJqPXSOm7tDyas0OSIQ=="],
|
||||||
|
|
||||||
"xml2js": ["xml2js@0.5.0", "", { "dependencies": { "sax": ">=0.6.0", "xmlbuilder": "~11.0.0" } }, "sha512-drPFnkQJik/O+uPKpqSgr22mpuFHqKdbS835iAQrUC73L2F5WkboIRd63ai/2Yg6I1jzifPFKH2NTK+cfglkIA=="],
|
"xml2js": ["xml2js@0.5.0", "", { "dependencies": { "sax": ">=0.6.0", "xmlbuilder": "~11.0.0" } }, "sha512-drPFnkQJik/O+uPKpqSgr22mpuFHqKdbS835iAQrUC73L2F5WkboIRd63ai/2Yg6I1jzifPFKH2NTK+cfglkIA=="],
|
||||||
|
|
||||||
"xmlbuilder": ["xmlbuilder@11.0.1", "", {}, "sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA=="],
|
"xmlbuilder": ["xmlbuilder@11.0.1", "", {}, "sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA=="],
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@aws-sdk/client-polly": "^3.823.0",
|
"@aws-sdk/client-polly": "^3.823.0",
|
||||||
"@hono/node-server": "^1.14.3",
|
"@hono/node-server": "^1.14.3",
|
||||||
|
"fluent-ffmpeg": "^2.1.3",
|
||||||
"hono": "^4.7.11",
|
"hono": "^4.7.11",
|
||||||
"openai": "^4.104.0",
|
"openai": "^4.104.0",
|
||||||
"react": "^19.1.0",
|
"react": "^19.1.0",
|
||||||
|
@ -1,145 +0,0 @@
|
|||||||
import Parser from "rss-parser";
|
|
||||||
import {
|
|
||||||
openAI_ClassifyFeed,
|
|
||||||
openAI_GeneratePodcastContent,
|
|
||||||
} from "../services/llm";
|
|
||||||
import { generateTTS } from "../services/tts";
|
|
||||||
import { saveEpisode, markAsProcessed } from "../services/database";
|
|
||||||
import { updatePodcastRSS } from "../services/podcast";
|
|
||||||
import crypto from "crypto";
|
|
||||||
|
|
||||||
interface FeedItem {
|
|
||||||
id: string;
|
|
||||||
title: string;
|
|
||||||
link: string;
|
|
||||||
pubDate: string;
|
|
||||||
contentSnippet?: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
import fs from "fs/promises";
|
|
||||||
import path from "path";
|
|
||||||
import { fileURLToPath } from "url";
|
|
||||||
|
|
||||||
const __filename = fileURLToPath(import.meta.url);
|
|
||||||
const __dirname = path.dirname(__filename);
|
|
||||||
|
|
||||||
async function main() {
|
|
||||||
const feedUrlsFile = import.meta.env["FEED_URLS_FILE"] ?? "feed_urls.txt";
|
|
||||||
const feedUrlsPath = path.resolve(__dirname, "..", feedUrlsFile);
|
|
||||||
let feedUrls: string[];
|
|
||||||
try {
|
|
||||||
const data = await fs.readFile(feedUrlsPath, "utf-8");
|
|
||||||
feedUrls = data
|
|
||||||
.split("\n")
|
|
||||||
.map((url) => url.trim())
|
|
||||||
.filter((url) => url.length > 0);
|
|
||||||
} catch (err) {
|
|
||||||
console.warn(`フィードURLファイルの読み込みに失敗: ${feedUrlsFile}`);
|
|
||||||
feedUrls = [];
|
|
||||||
}
|
|
||||||
|
|
||||||
// フィードごとに処理
|
|
||||||
for (const url of feedUrls) {
|
|
||||||
try {
|
|
||||||
await processFeedUrl(url);
|
|
||||||
} finally {
|
|
||||||
await updatePodcastRSS();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log("処理完了:", new Date().toISOString());
|
|
||||||
}
|
|
||||||
|
|
||||||
const processFeedUrl = async (url: string) => {
|
|
||||||
const parser = new Parser<FeedItem>();
|
|
||||||
const feed = await parser.parseURL(url);
|
|
||||||
|
|
||||||
// フィードのカテゴリ分類
|
|
||||||
const feedTitle = feed.title || url;
|
|
||||||
const category = await openAI_ClassifyFeed(feedTitle);
|
|
||||||
console.log(`フィード分類完了: ${feedTitle} - ${category}`);
|
|
||||||
|
|
||||||
// 昨日の記事のみフィルタリング
|
|
||||||
const yesterday = new Date();
|
|
||||||
yesterday.setDate(yesterday.getDate() - 1);
|
|
||||||
|
|
||||||
const yesterdayItems = feed.items.filter((item) => {
|
|
||||||
const pub = new Date(item.pubDate || "");
|
|
||||||
return (
|
|
||||||
pub.getFullYear() === yesterday.getFullYear() &&
|
|
||||||
pub.getMonth() === yesterday.getMonth() &&
|
|
||||||
pub.getDate() === yesterday.getDate()
|
|
||||||
);
|
|
||||||
});
|
|
||||||
|
|
||||||
if (yesterdayItems.length === 0) {
|
|
||||||
console.log(`昨日の記事が見つかりません: ${feedTitle}`);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ポッドキャスト原稿生成
|
|
||||||
console.log(`ポッドキャスト原稿生成開始: ${feedTitle}`);
|
|
||||||
const validItems = yesterdayItems.filter((item): item is FeedItem => {
|
|
||||||
return !!item.title && !!item.link;
|
|
||||||
});
|
|
||||||
const podcastContent = await openAI_GeneratePodcastContent(
|
|
||||||
feedTitle,
|
|
||||||
validItems,
|
|
||||||
);
|
|
||||||
|
|
||||||
// トピックごとの統合音声生成
|
|
||||||
const feedUrlHash = crypto.createHash("md5").update(url).digest("hex");
|
|
||||||
const categoryHash = crypto.createHash("md5").update(category).digest("hex");
|
|
||||||
const uniqueId = `${feedUrlHash}-${categoryHash}`;
|
|
||||||
|
|
||||||
const audioFilePath = await generateTTS(uniqueId, podcastContent);
|
|
||||||
console.log(`音声ファイル生成完了: ${audioFilePath}`);
|
|
||||||
|
|
||||||
// エピソードとして保存(各フィードにつき1つの統合エピソード)
|
|
||||||
const firstItem = yesterdayItems[0];
|
|
||||||
if (!firstItem) {
|
|
||||||
console.warn("アイテムが空です");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const pub = new Date(firstItem.pubDate || "");
|
|
||||||
|
|
||||||
await saveEpisode({
|
|
||||||
id: uniqueId,
|
|
||||||
title: `${category}: ${feedTitle}`,
|
|
||||||
pubDate: pub.toISOString(),
|
|
||||||
audioPath: audioFilePath,
|
|
||||||
sourceLink: url,
|
|
||||||
});
|
|
||||||
|
|
||||||
console.log(`エピソード保存完了: ${category} - ${feedTitle}`);
|
|
||||||
|
|
||||||
// 個別記事の処理記録
|
|
||||||
for (const item of yesterdayItems) {
|
|
||||||
const itemId = item["id"] as string | undefined;
|
|
||||||
const fallbackId = item.link || item.title || JSON.stringify(item);
|
|
||||||
const finalItemId =
|
|
||||||
itemId && typeof itemId === "string" && itemId.trim() !== ""
|
|
||||||
? itemId
|
|
||||||
: `fallback-${Buffer.from(fallbackId).toString("base64")}`;
|
|
||||||
|
|
||||||
if (!finalItemId || finalItemId.trim() === "") {
|
|
||||||
console.warn(`フィードアイテムのIDを生成できませんでした`, {
|
|
||||||
feedUrl: url,
|
|
||||||
itemTitle: item.title,
|
|
||||||
itemLink: item.link,
|
|
||||||
});
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
const already = await markAsProcessed(url, finalItemId);
|
|
||||||
if (already) {
|
|
||||||
console.log(`既に処理済み: ${finalItemId}`);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
main().catch((err) => {
|
|
||||||
console.error("エラー発生:", err);
|
|
||||||
process.exit(1);
|
|
||||||
});
|
|
15
server.ts
15
server.ts
@ -3,7 +3,7 @@ import { serve } from "@hono/node-server";
|
|||||||
import fs from "fs";
|
import fs from "fs";
|
||||||
import path from "path";
|
import path from "path";
|
||||||
import { Database } from "bun:sqlite";
|
import { Database } from "bun:sqlite";
|
||||||
import { batchProcess } from "./scripts/fetch_and_generate";
|
import { batchProcess } from "./services/fetch_and_generate";
|
||||||
|
|
||||||
import { setInterval } from "timers";
|
import { setInterval } from "timers";
|
||||||
|
|
||||||
@ -198,10 +198,19 @@ function scheduleFirstBatchProcess() {
|
|||||||
|
|
||||||
function scheduleDailyBatchProcess() {
|
function scheduleDailyBatchProcess() {
|
||||||
const now = new Date();
|
const now = new Date();
|
||||||
const nextRun = new Date(now.getFullYear(), now.getMonth(), now.getDate() + 1, 0, 0, 0);
|
const nextRun = new Date(
|
||||||
|
now.getFullYear(),
|
||||||
|
now.getMonth(),
|
||||||
|
now.getDate() + 1,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
);
|
||||||
|
|
||||||
const delay = nextRun.getTime() - now.getTime();
|
const delay = nextRun.getTime() - now.getTime();
|
||||||
console.log(`Next daily batch process scheduled in ${delay / 1000 / 60} minutes`);
|
console.log(
|
||||||
|
`Next daily batch process scheduled in ${delay / 1000 / 60} minutes`,
|
||||||
|
);
|
||||||
|
|
||||||
setTimeout(async () => {
|
setTimeout(async () => {
|
||||||
try {
|
try {
|
||||||
|
Reference in New Issue
Block a user