feat: add service to fetch and generate podcast content
This commit is contained in:
		
							
								
								
									
										137
									
								
								services/fetch_and_generate.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										137
									
								
								services/fetch_and_generate.ts
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,137 @@
 | 
				
			|||||||
 | 
					import Parser from "rss-parser";
 | 
				
			||||||
 | 
					import { openAI_ClassifyFeed, openAI_GeneratePodcastContent } from "./llm";
 | 
				
			||||||
 | 
					import { generateTTS } from "./tts";
 | 
				
			||||||
 | 
					import { saveEpisode, markAsProcessed } from "./database";
 | 
				
			||||||
 | 
					import { updatePodcastRSS } from "./podcast";
 | 
				
			||||||
 | 
					import crypto from "crypto";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					interface FeedItem {
 | 
				
			||||||
 | 
					  id: string;
 | 
				
			||||||
 | 
					  title: string;
 | 
				
			||||||
 | 
					  link: string;
 | 
				
			||||||
 | 
					  pubDate: string;
 | 
				
			||||||
 | 
					  contentSnippet?: string;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import fs from "fs/promises";
 | 
				
			||||||
 | 
					import path from "path";
 | 
				
			||||||
 | 
					import { fileURLToPath } from "url";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					const __filename = fileURLToPath(import.meta.url);
 | 
				
			||||||
 | 
					const __dirname = path.dirname(__filename);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export async function batchProcess() {
 | 
				
			||||||
 | 
					  const feedUrlsFile = import.meta.env["FEED_URLS_FILE"] ?? "feed_urls.txt";
 | 
				
			||||||
 | 
					  const feedUrlsPath = path.resolve(__dirname, "..", feedUrlsFile);
 | 
				
			||||||
 | 
					  let feedUrls: string[];
 | 
				
			||||||
 | 
					  try {
 | 
				
			||||||
 | 
					    const data = await fs.readFile(feedUrlsPath, "utf-8");
 | 
				
			||||||
 | 
					    feedUrls = data
 | 
				
			||||||
 | 
					      .split("\n")
 | 
				
			||||||
 | 
					      .map((url) => url.trim())
 | 
				
			||||||
 | 
					      .filter((url) => url.length > 0);
 | 
				
			||||||
 | 
					  } catch (err) {
 | 
				
			||||||
 | 
					    console.warn(`フィードURLファイルの読み込みに失敗: ${feedUrlsFile}`);
 | 
				
			||||||
 | 
					    feedUrls = [];
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // フィードごとに処理
 | 
				
			||||||
 | 
					  for (const url of feedUrls) {
 | 
				
			||||||
 | 
					    try {
 | 
				
			||||||
 | 
					      await processFeedUrl(url);
 | 
				
			||||||
 | 
					    } finally {
 | 
				
			||||||
 | 
					      await updatePodcastRSS();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  console.log("処理完了:", new Date().toISOString());
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					const processFeedUrl = async (url: string) => {
 | 
				
			||||||
 | 
					  const parser = new Parser<FeedItem>();
 | 
				
			||||||
 | 
					  const feed = await parser.parseURL(url);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // フィードのカテゴリ分類
 | 
				
			||||||
 | 
					  const feedTitle = feed.title || url;
 | 
				
			||||||
 | 
					  const category = await openAI_ClassifyFeed(feedTitle);
 | 
				
			||||||
 | 
					  console.log(`フィード分類完了: ${feedTitle} - ${category}`);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // 昨日の記事のみフィルタリング
 | 
				
			||||||
 | 
					  const yesterday = new Date();
 | 
				
			||||||
 | 
					  yesterday.setDate(yesterday.getDate() - 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const yesterdayItems = feed.items.filter((item) => {
 | 
				
			||||||
 | 
					    const pub = new Date(item.pubDate || "");
 | 
				
			||||||
 | 
					    return (
 | 
				
			||||||
 | 
					      pub.getFullYear() === yesterday.getFullYear() &&
 | 
				
			||||||
 | 
					      pub.getMonth() === yesterday.getMonth() &&
 | 
				
			||||||
 | 
					      pub.getDate() === yesterday.getDate()
 | 
				
			||||||
 | 
					    );
 | 
				
			||||||
 | 
					  });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (yesterdayItems.length === 0) {
 | 
				
			||||||
 | 
					    console.log(`昨日の記事が見つかりません: ${feedTitle}`);
 | 
				
			||||||
 | 
					    return;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // ポッドキャスト原稿生成
 | 
				
			||||||
 | 
					  console.log(`ポッドキャスト原稿生成開始: ${feedTitle}`);
 | 
				
			||||||
 | 
					  const validItems = yesterdayItems.filter((item): item is FeedItem => {
 | 
				
			||||||
 | 
					    return !!item.title && !!item.link;
 | 
				
			||||||
 | 
					  });
 | 
				
			||||||
 | 
					  const podcastContent = await openAI_GeneratePodcastContent(
 | 
				
			||||||
 | 
					    feedTitle,
 | 
				
			||||||
 | 
					    validItems,
 | 
				
			||||||
 | 
					  );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // トピックごとの統合音声生成
 | 
				
			||||||
 | 
					  const feedUrlHash = crypto.createHash("md5").update(url).digest("hex");
 | 
				
			||||||
 | 
					  const categoryHash = crypto.createHash("md5").update(category).digest("hex");
 | 
				
			||||||
 | 
					  const uniqueId = `${feedUrlHash}-${categoryHash}`;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const audioFilePath = await generateTTS(uniqueId, podcastContent);
 | 
				
			||||||
 | 
					  console.log(`音声ファイル生成完了: ${audioFilePath}`);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // エピソードとして保存(各フィードにつき1つの統合エピソード)
 | 
				
			||||||
 | 
					  const firstItem = yesterdayItems[0];
 | 
				
			||||||
 | 
					  if (!firstItem) {
 | 
				
			||||||
 | 
					    console.warn("アイテムが空です");
 | 
				
			||||||
 | 
					    return;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  const pub = new Date(firstItem.pubDate || "");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  await saveEpisode({
 | 
				
			||||||
 | 
					    id: uniqueId,
 | 
				
			||||||
 | 
					    title: `${category}: ${feedTitle}`,
 | 
				
			||||||
 | 
					    pubDate: pub.toISOString(),
 | 
				
			||||||
 | 
					    audioPath: audioFilePath,
 | 
				
			||||||
 | 
					    sourceLink: url,
 | 
				
			||||||
 | 
					  });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  console.log(`エピソード保存完了: ${category} - ${feedTitle}`);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // 個別記事の処理記録
 | 
				
			||||||
 | 
					  for (const item of yesterdayItems) {
 | 
				
			||||||
 | 
					    const itemId = item["id"] as string | undefined;
 | 
				
			||||||
 | 
					    const fallbackId = item.link || item.title || JSON.stringify(item);
 | 
				
			||||||
 | 
					    const finalItemId =
 | 
				
			||||||
 | 
					      itemId && typeof itemId === "string" && itemId.trim() !== ""
 | 
				
			||||||
 | 
					        ? itemId
 | 
				
			||||||
 | 
					        : `fallback-${Buffer.from(fallbackId).toString("base64")}`;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (!finalItemId || finalItemId.trim() === "") {
 | 
				
			||||||
 | 
					      console.warn(`フィードアイテムのIDを生成できませんでした`, {
 | 
				
			||||||
 | 
					        feedUrl: url,
 | 
				
			||||||
 | 
					        itemTitle: item.title,
 | 
				
			||||||
 | 
					        itemLink: item.link,
 | 
				
			||||||
 | 
					      });
 | 
				
			||||||
 | 
					      continue;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const already = await markAsProcessed(url, finalItemId);
 | 
				
			||||||
 | 
					    if (already) {
 | 
				
			||||||
 | 
					      console.log(`既に処理済み: ${finalItemId}`);
 | 
				
			||||||
 | 
					      continue;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
		Reference in New Issue
	
	Block a user