Update category management and RSS endpoint handling

This commit is contained in:
2025-06-08 21:50:31 +09:00
parent 4aa1b5c56a
commit cd0e4065fc
13 changed files with 1171 additions and 70 deletions

View File

@ -86,24 +86,39 @@ class BatchScheduler {
try {
console.log("🔄 Running scheduled batch process...");
// Run migration for feeds without categories (only once)
// Run migrations (only once per startup)
if (!this.migrationCompleted) {
try {
// Feed category migration
const { migrateFeedsWithCategories, getFeedCategoryMigrationStatus } =
await import("./database.js");
const migrationStatus = await getFeedCategoryMigrationStatus();
const feedMigrationStatus = await getFeedCategoryMigrationStatus();
if (!migrationStatus.migrationComplete) {
if (!feedMigrationStatus.migrationComplete) {
console.log("🔄 Running feed category migration...");
await migrateFeedsWithCategories();
this.migrationCompleted = true;
console.log("✅ Feed category migration completed");
} else {
console.log("✅ Feed category migration already complete");
this.migrationCompleted = true;
}
// Episode category migration
const { migrateEpisodesWithCategories, getEpisodeCategoryMigrationStatus } =
await import("./database.js");
const episodeMigrationStatus = await getEpisodeCategoryMigrationStatus();
if (!episodeMigrationStatus.migrationComplete) {
console.log("🔄 Running episode category migration...");
await migrateEpisodesWithCategories();
console.log("✅ Episode category migration completed");
} else {
console.log("✅ Episode category migration already complete");
}
this.migrationCompleted = true;
} catch (migrationError) {
console.error(
"❌ Error during feed category migration:",
"❌ Error during category migrations:",
migrationError,
);
// Don't fail the entire batch process due to migration error

View File

@ -230,7 +230,7 @@ export async function extractArticleContent(
}
export async function enhanceArticleContent(
originalTitle: string,
_originalTitle: string,
originalLink: string,
originalContent?: string,
originalDescription?: string,

View File

@ -159,6 +159,7 @@ function initializeDatabase(): Database {
audio_path TEXT NOT NULL,
duration INTEGER,
file_size INTEGER,
category TEXT,
created_at TEXT NOT NULL,
FOREIGN KEY(article_id) REFERENCES articles(id)
);
@ -207,14 +208,22 @@ function initializeDatabase(): Database {
// ALTER
// ALTER TABLE feeds ADD COLUMN category TEXT DEFAULT NULL;
// Ensure the category column exists
const infos = db.prepare("PRAGMA table_info(feeds);").all();
const hasCategory = infos.some((col: any) => col.name === "category");
// Ensure the category column exists in feeds
const feedInfos = db.prepare("PRAGMA table_info(feeds);").all();
const hasFeedCategory = feedInfos.some((col: any) => col.name === "category");
if (!hasCategory) {
if (!hasFeedCategory) {
db.exec("ALTER TABLE feeds ADD COLUMN category TEXT DEFAULT NULL;");
}
// Ensure the category column exists in episodes
const episodeInfos = db.prepare("PRAGMA table_info(episodes);").all();
const hasEpisodeCategory = episodeInfos.some((col: any) => col.name === "category");
if (!hasEpisodeCategory) {
db.exec("ALTER TABLE episodes ADD COLUMN category TEXT DEFAULT NULL;");
}
return db;
}
@ -251,6 +260,7 @@ export interface Episode {
audioPath: string;
duration?: number;
fileSize?: number;
category?: string;
createdAt: string;
}
@ -271,6 +281,7 @@ export interface EpisodeWithFeedInfo {
audioPath: string;
duration?: number;
fileSize?: number;
category?: string;
createdAt: string;
articleId: string;
articleTitle: string;
@ -415,6 +426,7 @@ export async function fetchEpisodesWithFeedInfo(): Promise<
e.audio_path as audioPath,
e.duration,
e.file_size as fileSize,
e.category,
e.created_at as createdAt,
e.article_id as articleId,
a.title as articleTitle,
@ -440,6 +452,7 @@ export async function fetchEpisodesWithFeedInfo(): Promise<
audioPath: row.audioPath,
duration: row.duration,
fileSize: row.fileSize,
category: row.category,
createdAt: row.createdAt,
articleId: row.articleId,
articleTitle: row.articleTitle,
@ -469,6 +482,7 @@ export async function fetchEpisodesByFeedId(
e.audio_path as audioPath,
e.duration,
e.file_size as fileSize,
e.category,
e.created_at as createdAt,
e.article_id as articleId,
a.title as articleTitle,
@ -494,6 +508,7 @@ export async function fetchEpisodesByFeedId(
audioPath: row.audioPath,
duration: row.duration,
fileSize: row.fileSize,
category: row.category,
createdAt: row.createdAt,
articleId: row.articleId,
articleTitle: row.articleTitle,
@ -523,6 +538,7 @@ export async function fetchEpisodeWithSourceInfo(
e.audio_path as audioPath,
e.duration,
e.file_size as fileSize,
e.category,
e.created_at as createdAt,
e.article_id as articleId,
a.title as articleTitle,
@ -548,6 +564,7 @@ export async function fetchEpisodeWithSourceInfo(
audioPath: row.audioPath,
duration: row.duration,
fileSize: row.fileSize,
category: row.category,
createdAt: row.createdAt,
articleId: row.articleId,
articleTitle: row.articleTitle,
@ -823,7 +840,7 @@ export async function saveEpisode(
try {
const stmt = db.prepare(
"INSERT INTO episodes (id, article_id, title, description, audio_path, duration, file_size, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
"INSERT INTO episodes (id, article_id, title, description, audio_path, duration, file_size, category, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
);
stmt.run(
id,
@ -833,6 +850,7 @@ export async function saveEpisode(
episode.audioPath,
episode.duration || null,
episode.fileSize || null,
episode.category || null,
createdAt,
);
return id;
@ -876,6 +894,7 @@ export async function fetchAllEpisodes(): Promise<Episode[]> {
e.audio_path as audioPath,
e.duration,
e.file_size as fileSize,
e.category,
e.created_at as createdAt
FROM episodes e
ORDER BY e.created_at DESC
@ -1289,6 +1308,278 @@ export async function deleteEpisode(episodeId: string): Promise<boolean> {
}
}
// Episode category management functions
export async function getEpisodesByCategory(category?: string): Promise<EpisodeWithFeedInfo[]> {
try {
let stmt;
let rows;
if (category) {
stmt = db.prepare(`
SELECT
e.id,
e.title,
e.description,
e.audio_path as audioPath,
e.duration,
e.file_size as fileSize,
e.category,
e.created_at as createdAt,
e.article_id as articleId,
a.title as articleTitle,
a.link as articleLink,
a.pub_date as articlePubDate,
f.id as feedId,
f.title as feedTitle,
f.url as feedUrl,
f.category as feedCategory
FROM episodes e
JOIN articles a ON e.article_id = a.id
JOIN feeds f ON a.feed_id = f.id
WHERE e.category = ? AND f.active = 1
ORDER BY e.created_at DESC
`);
rows = stmt.all(category) as any[];
} else {
// If no category specified, return all episodes
stmt = db.prepare(`
SELECT
e.id,
e.title,
e.description,
e.audio_path as audioPath,
e.duration,
e.file_size as fileSize,
e.category,
e.created_at as createdAt,
e.article_id as articleId,
a.title as articleTitle,
a.link as articleLink,
a.pub_date as articlePubDate,
f.id as feedId,
f.title as feedTitle,
f.url as feedUrl,
f.category as feedCategory
FROM episodes e
JOIN articles a ON e.article_id = a.id
JOIN feeds f ON a.feed_id = f.id
WHERE f.active = 1
ORDER BY e.created_at DESC
`);
rows = stmt.all() as any[];
}
return rows.map((row) => ({
id: row.id,
title: row.title,
description: row.description,
audioPath: row.audioPath,
duration: row.duration,
fileSize: row.fileSize,
category: row.category,
createdAt: row.createdAt,
articleId: row.articleId,
articleTitle: row.articleTitle,
articleLink: row.articleLink,
articlePubDate: row.articlePubDate,
feedId: row.feedId,
feedTitle: row.feedTitle,
feedUrl: row.feedUrl,
feedCategory: row.feedCategory,
}));
} catch (error) {
console.error("Error getting episodes by category:", error);
throw error;
}
}
export async function getAllEpisodeCategories(): Promise<string[]> {
try {
const stmt = db.prepare(
"SELECT DISTINCT category FROM episodes WHERE category IS NOT NULL ORDER BY category",
);
const rows = stmt.all() as any[];
return rows.map((row) => row.category).filter(Boolean);
} catch (error) {
console.error("Error getting all episode categories:", error);
throw error;
}
}
export async function getEpisodesGroupedByCategory(): Promise<{
[category: string]: EpisodeWithFeedInfo[];
}> {
try {
const episodes = await fetchEpisodesWithFeedInfo();
const grouped: { [category: string]: EpisodeWithFeedInfo[] } = {};
for (const episode of episodes) {
const category = episode.category || "未分類";
if (!grouped[category]) {
grouped[category] = [];
}
grouped[category].push(episode);
}
return grouped;
} catch (error) {
console.error("Error getting episodes grouped by category:", error);
throw error;
}
}
export async function getEpisodeCategoryStats(): Promise<{
[category: string]: number;
}> {
try {
const stmt = db.prepare(`
SELECT
COALESCE(e.category, '未分類') as category,
COUNT(*) as count
FROM episodes e
JOIN articles a ON e.article_id = a.id
JOIN feeds f ON a.feed_id = f.id
WHERE f.active = 1
GROUP BY e.category
ORDER BY count DESC
`);
const rows = stmt.all() as any[];
const stats: { [category: string]: number } = {};
for (const row of rows) {
stats[row.category] = row.count;
}
return stats;
} catch (error) {
console.error("Error getting episode category stats:", error);
throw error;
}
}
export async function updateEpisodeCategory(episodeId: string, category: string): Promise<boolean> {
try {
const stmt = db.prepare("UPDATE episodes SET category = ? WHERE id = ?");
const result = stmt.run(category, episodeId);
return result.changes > 0;
} catch (error) {
console.error("Error updating episode category:", error);
throw error;
}
}
// Migration function to classify existing episodes without categories
export async function migrateEpisodesWithCategories(): Promise<void> {
try {
console.log("🔄 Starting episode category migration...");
// Get all episodes without categories
const stmt = db.prepare(
"SELECT * FROM episodes WHERE category IS NULL OR category = ''",
);
const episodesWithoutCategories = stmt.all() as any[];
if (episodesWithoutCategories.length === 0) {
console.log("✅ All episodes already have categories assigned");
return;
}
console.log(
`📋 Found ${episodesWithoutCategories.length} episodes without categories`,
);
// Import LLM service
const { openAI_ClassifyEpisode } = await import("./llm.js");
let processedCount = 0;
let errorCount = 0;
for (const episode of episodesWithoutCategories) {
try {
console.log(`🔍 Classifying episode: ${episode.title}`);
// Classify the episode using title and description
const category = await openAI_ClassifyEpisode(
episode.title,
episode.description,
);
// Update the episode with the category
const updateStmt = db.prepare(
"UPDATE episodes SET category = ? WHERE id = ?",
);
updateStmt.run(category, episode.id);
console.log(
`✅ Assigned category "${category}" to episode: ${episode.title}`,
);
processedCount++;
// Add a small delay to avoid rate limiting
await new Promise((resolve) => setTimeout(resolve, 1000));
} catch (error) {
console.error(
`❌ Failed to classify episode ${episode.title}:`,
error,
);
errorCount++;
// Set a default category for failed classifications
const defaultCategory = "その他";
const updateStmt = db.prepare(
"UPDATE episodes SET category = ? WHERE id = ?",
);
updateStmt.run(defaultCategory, episode.id);
console.log(
`! Assigned default category "${defaultCategory}" to episode: ${episode.title}`,
);
}
}
console.log(`✅ Episode category migration completed`);
console.log(
`📊 Processed: ${processedCount}, Errors: ${errorCount}, Total: ${episodesWithoutCategories.length}`,
);
} catch (error) {
console.error("❌ Error during episode category migration:", error);
throw error;
}
}
// Function to get episode migration status
export async function getEpisodeCategoryMigrationStatus(): Promise<{
totalEpisodes: number;
episodesWithCategories: number;
episodesWithoutCategories: number;
migrationComplete: boolean;
}> {
try {
const totalStmt = db.prepare("SELECT COUNT(*) as count FROM episodes");
const totalResult = totalStmt.get() as any;
const totalEpisodes = totalResult.count;
const withCategoriesStmt = db.prepare(
"SELECT COUNT(*) as count FROM episodes WHERE category IS NOT NULL AND category != ''",
);
const withCategoriesResult = withCategoriesStmt.get() as any;
const episodesWithCategories = withCategoriesResult.count;
const episodesWithoutCategories = totalEpisodes - episodesWithCategories;
const migrationComplete = episodesWithoutCategories === 0;
return {
totalEpisodes,
episodesWithCategories,
episodesWithoutCategories,
migrationComplete,
};
} catch (error) {
console.error("Error getting episode migration status:", error);
throw error;
}
}
export function closeDatabase(): void {
db.close();
}

View File

@ -143,3 +143,69 @@ ${articleDetails}
);
}
}
export async function openAI_ClassifyEpisode(
title: string,
description?: string,
content?: string,
): Promise<string> {
if (!title || title.trim() === "") {
throw new Error("Episode title is required for classification");
}
// Build the text for classification based on available data
let textForClassification = `タイトル: ${title}`;
if (description && description.trim()) {
textForClassification += `\n説明: ${description}`;
}
if (content && content.trim()) {
const maxContentLength = 1500;
const truncatedContent = content.length > maxContentLength
? content.substring(0, maxContentLength) + "..."
: content;
textForClassification += `\n内容: ${truncatedContent}`;
}
const prompt = `
以下のポッドキャストエピソードの情報を見て、適切なトピックカテゴリに分類してください。
${textForClassification}
以下のカテゴリから1つを選択してください:
- テクノロジー
- ビジネス
- エンターテインメント
- スポーツ
- 科学
- 健康
- 政治
- 環境
- 教育
- その他
エピソードの内容に最も適合するカテゴリを上記から1つだけ返してください。
`;
try {
const response = await openai.chat.completions.create({
model: config.openai.modelName,
messages: [{ role: "user", content: prompt.trim() }],
temperature: 0.3,
});
const category = response.choices[0]?.message?.content?.trim();
if (!category) {
console.warn("OpenAI returned empty episode category, using default");
return "その他";
}
return category;
} catch (error) {
console.error("Error classifying episode:", error);
throw new Error(
`Failed to classify episode: ${error instanceof Error ? error.message : "Unknown error"}`,
);
}
}

View File

@ -3,7 +3,11 @@ import fsSync from "node:fs";
import path from "node:path";
import { dirname } from "path";
import { config } from "./config.js";
import { fetchEpisodesWithFeedInfo } from "./database.js";
import {
fetchEpisodesWithFeedInfo,
getEpisodesByCategory,
fetchEpisodesByFeedId
} from "./database.js";
function escapeXml(text: string): string {
return text
@ -64,40 +68,38 @@ function createItemXml(episode: any): string {
</item>`;
}
export async function updatePodcastRSS(): Promise<void> {
try {
// Use episodes with feed info for enhanced descriptions
const episodesWithFeedInfo = await fetchEpisodesWithFeedInfo();
// Filter episodes to only include those with valid audio files
function filterValidEpisodes(episodes: any[]): any[] {
return episodes.filter((episode) => {
try {
const audioPath = path.join(
config.paths.podcastAudioDir,
episode.audioPath,
);
return fsSync.existsSync(audioPath);
} catch (error) {
console.warn(`Audio file not found for episode: ${episode.title}`);
return false;
}
});
}
// Filter episodes to only include those with valid audio files
const validEpisodes = episodesWithFeedInfo.filter((episode) => {
try {
const audioPath = path.join(
config.paths.podcastAudioDir,
episode.audioPath,
);
return fsSync.existsSync(audioPath);
} catch (error) {
console.warn(`Audio file not found for episode: ${episode.title}`);
return false;
}
});
console.log(
`Found ${episodesWithFeedInfo.length} episodes, ${validEpisodes.length} with valid audio files`,
);
const lastBuildDate = new Date().toUTCString();
const itemsXml = validEpisodes.map(createItemXml).join("\n");
const outputPath = path.join(config.paths.publicDir, "podcast.xml");
// Create RSS XML content
const rssXml = `<?xml version="1.0" encoding="UTF-8"?>
// Generate RSS XML from episodes
function generateRSSXml(
episodes: any[],
title: string,
description: string,
link?: string
): string {
const lastBuildDate = new Date().toUTCString();
const itemsXml = episodes.map(createItemXml).join("\n");
return `<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>${escapeXml(config.podcast.title)}</title>
<link>${escapeXml(config.podcast.link)}</link>
<description><![CDATA[${escapeXml(config.podcast.description)}]]></description>
<title>${escapeXml(title)}</title>
<link>${escapeXml(link || config.podcast.link)}</link>
<description><![CDATA[${escapeXml(description)}]]></description>
<language>${config.podcast.language}</language>
<lastBuildDate>${lastBuildDate}</lastBuildDate>
<ttl>${config.podcast.ttl}</ttl>
@ -105,6 +107,24 @@ export async function updatePodcastRSS(): Promise<void> {
<category>${escapeXml(config.podcast.categories)}</category>${itemsXml}
</channel>
</rss>`;
}
export async function updatePodcastRSS(): Promise<void> {
try {
// Use episodes with feed info for enhanced descriptions
const episodesWithFeedInfo = await fetchEpisodesWithFeedInfo();
const validEpisodes = filterValidEpisodes(episodesWithFeedInfo);
console.log(
`Found ${episodesWithFeedInfo.length} episodes, ${validEpisodes.length} with valid audio files`,
);
const outputPath = path.join(config.paths.publicDir, "podcast.xml");
const rssXml = generateRSSXml(
validEpisodes,
config.podcast.title,
config.podcast.description
);
// Ensure directory exists
await fs.mkdir(dirname(outputPath), { recursive: true });
@ -118,3 +138,78 @@ export async function updatePodcastRSS(): Promise<void> {
throw error;
}
}
export async function generateCategoryRSS(category: string): Promise<string> {
try {
// Get episodes for the specific category
const episodesWithFeedInfo = await getEpisodesByCategory(category);
const validEpisodes = filterValidEpisodes(episodesWithFeedInfo);
console.log(
`Found ${episodesWithFeedInfo.length} episodes for category "${category}", ${validEpisodes.length} with valid audio files`,
);
const title = `${config.podcast.title} - ${category}`;
const description = `${config.podcast.description} カテゴリ: ${category}`;
return generateRSSXml(validEpisodes, title, description);
} catch (error) {
console.error(`Error generating category RSS for "${category}":`, error);
throw error;
}
}
export async function saveCategoryRSS(category: string): Promise<void> {
try {
const rssXml = await generateCategoryRSS(category);
const safeCategory = category.replace(/[^a-zA-Z0-9\u3040-\u309F\u30A0-\u30FF\u4E00-\u9FAF]/g, "_");
const outputPath = path.join(config.paths.publicDir, `podcast_category_${safeCategory}.xml`);
// Ensure directory exists
await fs.mkdir(dirname(outputPath), { recursive: true });
await fs.writeFile(outputPath, rssXml);
console.log(`Category RSS saved for "${category}" at ${outputPath}`);
} catch (error) {
console.error(`Error saving category RSS for "${category}":`, error);
throw error;
}
}
export async function generateFeedRSS(feedId: string): Promise<string> {
try {
// Get episodes for the specific feed
const episodesWithFeedInfo = await fetchEpisodesByFeedId(feedId);
const validEpisodes = filterValidEpisodes(episodesWithFeedInfo);
console.log(
`Found ${episodesWithFeedInfo.length} episodes for feed "${feedId}", ${validEpisodes.length} with valid audio files`,
);
// Use feed info for RSS metadata if available
const feedTitle = validEpisodes.length > 0 ? validEpisodes[0].feedTitle : "Unknown Feed";
const title = `${config.podcast.title} - ${feedTitle}`;
const description = `${config.podcast.description} フィード: ${feedTitle}`;
return generateRSSXml(validEpisodes, title, description);
} catch (error) {
console.error(`Error generating feed RSS for "${feedId}":`, error);
throw error;
}
}
export async function saveFeedRSS(feedId: string): Promise<void> {
try {
const rssXml = await generateFeedRSS(feedId);
const outputPath = path.join(config.paths.publicDir, `podcast_feed_${feedId}.xml`);
// Ensure directory exists
await fs.mkdir(dirname(outputPath), { recursive: true });
await fs.writeFile(outputPath, rssXml);
console.log(`Feed RSS saved for feed "${feedId}" at ${outputPath}`);
} catch (error) {
console.error(`Error saving feed RSS for "${feedId}":`, error);
throw error;
}
}