feat: implement feed classification and podcast content generation
This commit is contained in:
		@@ -1,5 +1,8 @@
 | 
			
		||||
import Parser from "rss-parser";
 | 
			
		||||
import { openAI_GenerateScript } from "../services/llm";
 | 
			
		||||
import { 
 | 
			
		||||
  openAI_ClassifyFeed, 
 | 
			
		||||
  openAI_GeneratePodcastContent 
 | 
			
		||||
} from "../services/llm";
 | 
			
		||||
import { generateTTS } from "../services/tts";
 | 
			
		||||
import { saveEpisode, markAsProcessed } from "../services/database";
 | 
			
		||||
import { updatePodcastRSS } from "../services/podcast";
 | 
			
		||||
@@ -36,68 +39,81 @@ async function main() {
 | 
			
		||||
    feedUrls = [];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // フィードごとに処理
 | 
			
		||||
  for (const url of feedUrls) {
 | 
			
		||||
    const feed = await parser.parseURL(url);
 | 
			
		||||
    for (const item of feed.items) {
 | 
			
		||||
    
 | 
			
		||||
    // フィードのカテゴリ分類
 | 
			
		||||
    const feedTitle = feed.title || url;
 | 
			
		||||
    const category = await openAI_ClassifyFeed(feedTitle);
 | 
			
		||||
    console.log(`フィード分類完了: ${feedTitle} - ${category}`);
 | 
			
		||||
    
 | 
			
		||||
    // 昨日の記事のみフィルタリング
 | 
			
		||||
    const yesterday = new Date();
 | 
			
		||||
    yesterday.setDate(yesterday.getDate() - 1);
 | 
			
		||||
    
 | 
			
		||||
    const yesterdayItems = feed.items.filter(item => {
 | 
			
		||||
      const pub = new Date(item.pubDate || "");
 | 
			
		||||
      const today = new Date();
 | 
			
		||||
      const yesterday = new Date(today);
 | 
			
		||||
      yesterday.setDate(today.getDate() - 1);
 | 
			
		||||
 | 
			
		||||
      if (
 | 
			
		||||
      return (
 | 
			
		||||
        pub.getFullYear() === yesterday.getFullYear() &&
 | 
			
		||||
        pub.getMonth() === yesterday.getMonth() &&
 | 
			
		||||
        pub.getDate() === yesterday.getDate()
 | 
			
		||||
      ) {
 | 
			
		||||
        // Use item.id if available, otherwise generate fallback ID from title or link
 | 
			
		||||
        const itemId = item["id"] as string | undefined;
 | 
			
		||||
        const fallbackId = item.link || item.title || JSON.stringify(item);
 | 
			
		||||
        const finalItemId =
 | 
			
		||||
          itemId && typeof itemId === "string" && itemId.trim() !== ""
 | 
			
		||||
            ? itemId
 | 
			
		||||
            : `fallback-${Buffer.from(fallbackId).toString("base64")}`;
 | 
			
		||||
 | 
			
		||||
        // Skip if even the fallback ID is missing (should be rare)
 | 
			
		||||
        if (!finalItemId || finalItemId.trim() === "") {
 | 
			
		||||
          console.warn(`フィードアイテムのIDを生成できませんでした`, {
 | 
			
		||||
            feedUrl: url,
 | 
			
		||||
            itemTitle: item.title,
 | 
			
		||||
            itemLink: item.link,
 | 
			
		||||
          });
 | 
			
		||||
          continue;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        const already = await markAsProcessed(url, finalItemId);
 | 
			
		||||
        if (already) {
 | 
			
		||||
          console.log(`既に処理済み: ${finalItemId}`);
 | 
			
		||||
          continue;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        console.log(`スクリプト生成開始: ${finalItemId}`);
 | 
			
		||||
        const scriptText = await openAI_GenerateScript({
 | 
			
		||||
          title: item.title ?? "",
 | 
			
		||||
          link: item.link ?? "",
 | 
			
		||||
          contentSnippet: item.contentSnippet ?? "",
 | 
			
		||||
      );
 | 
			
		||||
    });
 | 
			
		||||
    
 | 
			
		||||
    if (yesterdayItems.length === 0) {
 | 
			
		||||
      console.log(`昨日の記事が見つかりません: ${feedTitle}`);
 | 
			
		||||
      continue;
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    // ポッドキャスト原稿生成
 | 
			
		||||
    console.log(`ポッドキャスト原稿生成開始: ${feedTitle}`);
 | 
			
		||||
    const podcastContent = await openAI_GeneratePodcastContent(feedTitle, yesterdayItems);
 | 
			
		||||
    
 | 
			
		||||
    // トピックごとの統合音声生成
 | 
			
		||||
    const feedUrlHash = crypto.createHash("md5").update(url).digest("hex");
 | 
			
		||||
    const categoryHash = crypto.createHash("md5").update(category).digest("hex");
 | 
			
		||||
    const uniqueFilename = `${feedUrlHash}-${categoryHash}.mp3`;
 | 
			
		||||
    
 | 
			
		||||
    const audioFilePath = await generateTTS(uniqueFilename, podcastContent);
 | 
			
		||||
    console.log(`音声ファイル生成完了: ${audioFilePath}`);
 | 
			
		||||
    
 | 
			
		||||
    // エピソードとして保存(各フィードにつき1つの統合エピソード)
 | 
			
		||||
    const firstItem = yesterdayItems[0];
 | 
			
		||||
    const pub = new Date(firstItem.pubDate || "");
 | 
			
		||||
    
 | 
			
		||||
    await saveEpisode({
 | 
			
		||||
      id: `topic-${categoryHash}`,
 | 
			
		||||
      title: `${category}: ${feedTitle}`,
 | 
			
		||||
      pubDate: pub.toISOString(),
 | 
			
		||||
      audioPath: audioFilePath,
 | 
			
		||||
      sourceLink: url,
 | 
			
		||||
    });
 | 
			
		||||
    
 | 
			
		||||
    console.log(`エピソード保存完了: ${category} - ${feedTitle}`);
 | 
			
		||||
    
 | 
			
		||||
    // 個別記事の処理記録
 | 
			
		||||
    for (const item of yesterdayItems) {
 | 
			
		||||
      const itemId = item["id"] as string | undefined;
 | 
			
		||||
      const fallbackId = item.link || item.title || JSON.stringify(item);
 | 
			
		||||
      const finalItemId =
 | 
			
		||||
        itemId && typeof itemId === "string" && itemId.trim() !== ""
 | 
			
		||||
          ? itemId
 | 
			
		||||
          : `fallback-${Buffer.from(fallbackId).toString("base64")}`;
 | 
			
		||||
          
 | 
			
		||||
      if (!finalItemId || finalItemId.trim() === "") {
 | 
			
		||||
        console.warn(`フィードアイテムのIDを生成できませんでした`, {
 | 
			
		||||
          feedUrl: url,
 | 
			
		||||
          itemTitle: item.title,
 | 
			
		||||
          itemLink: item.link,
 | 
			
		||||
        });
 | 
			
		||||
        
 | 
			
		||||
        // Generate a unique filename using the feed URL hash and item ID
 | 
			
		||||
        const feedUrlHash = crypto.createHash("md5").update(url).digest("hex");
 | 
			
		||||
        const itemIdHash = crypto.createHash("md5").update(finalItemId).digest("hex");
 | 
			
		||||
        const uniqueFilename = `${feedUrlHash}-${itemIdHash}.mp3`;
 | 
			
		||||
        
 | 
			
		||||
        const audioFilePath = await generateTTS(uniqueFilename, scriptText);
 | 
			
		||||
 | 
			
		||||
        console.log(`音声ファイル生成完了: ${audioFilePath}`);
 | 
			
		||||
        
 | 
			
		||||
        await saveEpisode({
 | 
			
		||||
          id: finalItemId,
 | 
			
		||||
          title: item.title ?? "",
 | 
			
		||||
          pubDate: pub.toISOString(),
 | 
			
		||||
          audioPath: audioFilePath,
 | 
			
		||||
          sourceLink: item.link ?? "",
 | 
			
		||||
        });
 | 
			
		||||
        
 | 
			
		||||
        console.log(`エピソード保存完了: ${finalItemId}`);
 | 
			
		||||
        continue;
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      const already = await markAsProcessed(url, finalItemId);
 | 
			
		||||
      if (already) {
 | 
			
		||||
        console.log(`既に処理済み: ${finalItemId}`);
 | 
			
		||||
        continue;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user