feat: implement feed classification and podcast content generation
This commit is contained in:
@ -1,5 +1,8 @@
|
||||
import Parser from "rss-parser";
|
||||
import { openAI_GenerateScript } from "../services/llm";
|
||||
import {
|
||||
openAI_ClassifyFeed,
|
||||
openAI_GeneratePodcastContent
|
||||
} from "../services/llm";
|
||||
import { generateTTS } from "../services/tts";
|
||||
import { saveEpisode, markAsProcessed } from "../services/database";
|
||||
import { updatePodcastRSS } from "../services/podcast";
|
||||
@ -36,68 +39,81 @@ async function main() {
|
||||
feedUrls = [];
|
||||
}
|
||||
|
||||
// フィードごとに処理
|
||||
for (const url of feedUrls) {
|
||||
const feed = await parser.parseURL(url);
|
||||
for (const item of feed.items) {
|
||||
|
||||
// フィードのカテゴリ分類
|
||||
const feedTitle = feed.title || url;
|
||||
const category = await openAI_ClassifyFeed(feedTitle);
|
||||
console.log(`フィード分類完了: ${feedTitle} - ${category}`);
|
||||
|
||||
// 昨日の記事のみフィルタリング
|
||||
const yesterday = new Date();
|
||||
yesterday.setDate(yesterday.getDate() - 1);
|
||||
|
||||
const yesterdayItems = feed.items.filter(item => {
|
||||
const pub = new Date(item.pubDate || "");
|
||||
const today = new Date();
|
||||
const yesterday = new Date(today);
|
||||
yesterday.setDate(today.getDate() - 1);
|
||||
|
||||
if (
|
||||
return (
|
||||
pub.getFullYear() === yesterday.getFullYear() &&
|
||||
pub.getMonth() === yesterday.getMonth() &&
|
||||
pub.getDate() === yesterday.getDate()
|
||||
) {
|
||||
// Use item.id if available, otherwise generate fallback ID from title or link
|
||||
const itemId = item["id"] as string | undefined;
|
||||
const fallbackId = item.link || item.title || JSON.stringify(item);
|
||||
const finalItemId =
|
||||
itemId && typeof itemId === "string" && itemId.trim() !== ""
|
||||
? itemId
|
||||
: `fallback-${Buffer.from(fallbackId).toString("base64")}`;
|
||||
|
||||
// Skip if even the fallback ID is missing (should be rare)
|
||||
if (!finalItemId || finalItemId.trim() === "") {
|
||||
console.warn(`フィードアイテムのIDを生成できませんでした`, {
|
||||
feedUrl: url,
|
||||
itemTitle: item.title,
|
||||
itemLink: item.link,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
const already = await markAsProcessed(url, finalItemId);
|
||||
if (already) {
|
||||
console.log(`既に処理済み: ${finalItemId}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
console.log(`スクリプト生成開始: ${finalItemId}`);
|
||||
const scriptText = await openAI_GenerateScript({
|
||||
title: item.title ?? "",
|
||||
link: item.link ?? "",
|
||||
contentSnippet: item.contentSnippet ?? "",
|
||||
);
|
||||
});
|
||||
|
||||
if (yesterdayItems.length === 0) {
|
||||
console.log(`昨日の記事が見つかりません: ${feedTitle}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
// ポッドキャスト原稿生成
|
||||
console.log(`ポッドキャスト原稿生成開始: ${feedTitle}`);
|
||||
const podcastContent = await openAI_GeneratePodcastContent(feedTitle, yesterdayItems);
|
||||
|
||||
// トピックごとの統合音声生成
|
||||
const feedUrlHash = crypto.createHash("md5").update(url).digest("hex");
|
||||
const categoryHash = crypto.createHash("md5").update(category).digest("hex");
|
||||
const uniqueFilename = `${feedUrlHash}-${categoryHash}.mp3`;
|
||||
|
||||
const audioFilePath = await generateTTS(uniqueFilename, podcastContent);
|
||||
console.log(`音声ファイル生成完了: ${audioFilePath}`);
|
||||
|
||||
// エピソードとして保存(各フィードにつき1つの統合エピソード)
|
||||
const firstItem = yesterdayItems[0];
|
||||
const pub = new Date(firstItem.pubDate || "");
|
||||
|
||||
await saveEpisode({
|
||||
id: `topic-${categoryHash}`,
|
||||
title: `${category}: ${feedTitle}`,
|
||||
pubDate: pub.toISOString(),
|
||||
audioPath: audioFilePath,
|
||||
sourceLink: url,
|
||||
});
|
||||
|
||||
console.log(`エピソード保存完了: ${category} - ${feedTitle}`);
|
||||
|
||||
// 個別記事の処理記録
|
||||
for (const item of yesterdayItems) {
|
||||
const itemId = item["id"] as string | undefined;
|
||||
const fallbackId = item.link || item.title || JSON.stringify(item);
|
||||
const finalItemId =
|
||||
itemId && typeof itemId === "string" && itemId.trim() !== ""
|
||||
? itemId
|
||||
: `fallback-${Buffer.from(fallbackId).toString("base64")}`;
|
||||
|
||||
if (!finalItemId || finalItemId.trim() === "") {
|
||||
console.warn(`フィードアイテムのIDを生成できませんでした`, {
|
||||
feedUrl: url,
|
||||
itemTitle: item.title,
|
||||
itemLink: item.link,
|
||||
});
|
||||
|
||||
// Generate a unique filename using the feed URL hash and item ID
|
||||
const feedUrlHash = crypto.createHash("md5").update(url).digest("hex");
|
||||
const itemIdHash = crypto.createHash("md5").update(finalItemId).digest("hex");
|
||||
const uniqueFilename = `${feedUrlHash}-${itemIdHash}.mp3`;
|
||||
|
||||
const audioFilePath = await generateTTS(uniqueFilename, scriptText);
|
||||
|
||||
console.log(`音声ファイル生成完了: ${audioFilePath}`);
|
||||
|
||||
await saveEpisode({
|
||||
id: finalItemId,
|
||||
title: item.title ?? "",
|
||||
pubDate: pub.toISOString(),
|
||||
audioPath: audioFilePath,
|
||||
sourceLink: item.link ?? "",
|
||||
});
|
||||
|
||||
console.log(`エピソード保存完了: ${finalItemId}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
const already = await markAsProcessed(url, finalItemId);
|
||||
if (already) {
|
||||
console.log(`既に処理済み: ${finalItemId}`);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user