Add JMDict Japanese dictionary support
This commit is contained in:
@ -22,6 +22,7 @@
|
|||||||
"cheerio": "^1.0.0",
|
"cheerio": "^1.0.0",
|
||||||
"ffmpeg-static": "^5.2.0",
|
"ffmpeg-static": "^5.2.0",
|
||||||
"hono": "^4.7.11",
|
"hono": "^4.7.11",
|
||||||
|
"jmdict-simplified-node": "^1.1.2",
|
||||||
"kuroshiro": "^1.2.0",
|
"kuroshiro": "^1.2.0",
|
||||||
"kuroshiro-analyzer-mecab": "^1.0.1",
|
"kuroshiro-analyzer-mecab": "^1.0.1",
|
||||||
"openai": "^4.104.0",
|
"openai": "^4.104.0",
|
||||||
|
@ -341,10 +341,16 @@ export async function fetchActiveFeeds(): Promise<Feed[]> {
|
|||||||
|
|
||||||
// Get paginated active feeds with total count
|
// Get paginated active feeds with total count
|
||||||
export async function fetchActiveFeedsPaginated(
|
export async function fetchActiveFeedsPaginated(
|
||||||
page: number = 1,
|
page = 1,
|
||||||
limit: number = 10,
|
limit = 10,
|
||||||
category?: string
|
category?: string,
|
||||||
): Promise<{ feeds: Feed[]; total: number; page: number; limit: number; totalPages: number }> {
|
): Promise<{
|
||||||
|
feeds: Feed[];
|
||||||
|
total: number;
|
||||||
|
page: number;
|
||||||
|
limit: number;
|
||||||
|
totalPages: number;
|
||||||
|
}> {
|
||||||
try {
|
try {
|
||||||
const offset = (page - 1) * limit;
|
const offset = (page - 1) * limit;
|
||||||
|
|
||||||
@ -358,7 +364,9 @@ export async function fetchActiveFeedsPaginated(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Get total count
|
// Get total count
|
||||||
const countStmt = db.prepare(`SELECT COUNT(*) as count FROM feeds ${whereCondition}`);
|
const countStmt = db.prepare(
|
||||||
|
`SELECT COUNT(*) as count FROM feeds ${whereCondition}`,
|
||||||
|
);
|
||||||
const countResult = countStmt.get(...params) as { count: number };
|
const countResult = countStmt.get(...params) as { count: number };
|
||||||
const total = countResult.count;
|
const total = countResult.count;
|
||||||
|
|
||||||
@ -390,7 +398,7 @@ export async function fetchActiveFeedsPaginated(
|
|||||||
total,
|
total,
|
||||||
page,
|
page,
|
||||||
limit,
|
limit,
|
||||||
totalPages
|
totalPages,
|
||||||
};
|
};
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("Error getting paginated feeds:", error);
|
console.error("Error getting paginated feeds:", error);
|
||||||
@ -456,10 +464,16 @@ export async function fetchEpisodesWithFeedInfo(): Promise<
|
|||||||
|
|
||||||
// Get episodes with feed information for enhanced display (paginated)
|
// Get episodes with feed information for enhanced display (paginated)
|
||||||
export async function fetchEpisodesWithFeedInfoPaginated(
|
export async function fetchEpisodesWithFeedInfoPaginated(
|
||||||
page: number = 1,
|
page = 1,
|
||||||
limit: number = 10,
|
limit = 10,
|
||||||
category?: string
|
category?: string,
|
||||||
): Promise<{ episodes: EpisodeWithFeedInfo[]; total: number; page: number; limit: number; totalPages: number }> {
|
): Promise<{
|
||||||
|
episodes: EpisodeWithFeedInfo[];
|
||||||
|
total: number;
|
||||||
|
page: number;
|
||||||
|
limit: number;
|
||||||
|
totalPages: number;
|
||||||
|
}> {
|
||||||
try {
|
try {
|
||||||
const offset = (page - 1) * limit;
|
const offset = (page - 1) * limit;
|
||||||
|
|
||||||
@ -538,7 +552,7 @@ export async function fetchEpisodesWithFeedInfoPaginated(
|
|||||||
total,
|
total,
|
||||||
page,
|
page,
|
||||||
limit,
|
limit,
|
||||||
totalPages
|
totalPages,
|
||||||
};
|
};
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("Error fetching paginated episodes with feed info:", error);
|
console.error("Error fetching paginated episodes with feed info:", error);
|
||||||
@ -1636,7 +1650,9 @@ export async function updateEpisodeCategory(
|
|||||||
// Category cleanup functions
|
// Category cleanup functions
|
||||||
export async function deleteFeedCategory(category: string): Promise<number> {
|
export async function deleteFeedCategory(category: string): Promise<number> {
|
||||||
try {
|
try {
|
||||||
const stmt = db.prepare("UPDATE feeds SET category = NULL WHERE category = ?");
|
const stmt = db.prepare(
|
||||||
|
"UPDATE feeds SET category = NULL WHERE category = ?",
|
||||||
|
);
|
||||||
const result = stmt.run(category);
|
const result = stmt.run(category);
|
||||||
return result.changes;
|
return result.changes;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
@ -1647,7 +1663,9 @@ export async function deleteFeedCategory(category: string): Promise<number> {
|
|||||||
|
|
||||||
export async function deleteEpisodeCategory(category: string): Promise<number> {
|
export async function deleteEpisodeCategory(category: string): Promise<number> {
|
||||||
try {
|
try {
|
||||||
const stmt = db.prepare("UPDATE episodes SET category = NULL WHERE category = ?");
|
const stmt = db.prepare(
|
||||||
|
"UPDATE episodes SET category = NULL WHERE category = ?",
|
||||||
|
);
|
||||||
const result = stmt.run(category);
|
const result = stmt.run(category);
|
||||||
return result.changes;
|
return result.changes;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
@ -1656,7 +1674,9 @@ export async function deleteEpisodeCategory(category: string): Promise<number> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function deleteCategoryFromBoth(category: string): Promise<{feedChanges: number, episodeChanges: number}> {
|
export async function deleteCategoryFromBoth(
|
||||||
|
category: string,
|
||||||
|
): Promise<{ feedChanges: number; episodeChanges: number }> {
|
||||||
try {
|
try {
|
||||||
db.exec("BEGIN TRANSACTION");
|
db.exec("BEGIN TRANSACTION");
|
||||||
|
|
||||||
@ -1673,21 +1693,25 @@ export async function deleteCategoryFromBoth(category: string): Promise<{feedCha
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function getAllUsedCategories(): Promise<{feedCategories: string[], episodeCategories: string[], allCategories: string[]}> {
|
export async function getAllUsedCategories(): Promise<{
|
||||||
|
feedCategories: string[];
|
||||||
|
episodeCategories: string[];
|
||||||
|
allCategories: string[];
|
||||||
|
}> {
|
||||||
try {
|
try {
|
||||||
// Get feed categories
|
// Get feed categories
|
||||||
const feedCatStmt = db.prepare(
|
const feedCatStmt = db.prepare(
|
||||||
"SELECT DISTINCT category FROM feeds WHERE category IS NOT NULL AND category != '' ORDER BY category"
|
"SELECT DISTINCT category FROM feeds WHERE category IS NOT NULL AND category != '' ORDER BY category",
|
||||||
);
|
);
|
||||||
const feedCatRows = feedCatStmt.all() as any[];
|
const feedCatRows = feedCatStmt.all() as any[];
|
||||||
const feedCategories = feedCatRows.map(row => row.category);
|
const feedCategories = feedCatRows.map((row) => row.category);
|
||||||
|
|
||||||
// Get episode categories
|
// Get episode categories
|
||||||
const episodeCatStmt = db.prepare(
|
const episodeCatStmt = db.prepare(
|
||||||
"SELECT DISTINCT category FROM episodes WHERE category IS NOT NULL AND category != '' ORDER BY category"
|
"SELECT DISTINCT category FROM episodes WHERE category IS NOT NULL AND category != '' ORDER BY category",
|
||||||
);
|
);
|
||||||
const episodeCatRows = episodeCatStmt.all() as any[];
|
const episodeCatRows = episodeCatStmt.all() as any[];
|
||||||
const episodeCategories = episodeCatRows.map(row => row.category);
|
const episodeCategories = episodeCatRows.map((row) => row.category);
|
||||||
|
|
||||||
// Get all unique categories
|
// Get all unique categories
|
||||||
const allCategoriesSet = new Set([...feedCategories, ...episodeCategories]);
|
const allCategoriesSet = new Set([...feedCategories, ...episodeCategories]);
|
||||||
@ -1696,7 +1720,7 @@ export async function getAllUsedCategories(): Promise<{feedCategories: string[],
|
|||||||
return {
|
return {
|
||||||
feedCategories,
|
feedCategories,
|
||||||
episodeCategories,
|
episodeCategories,
|
||||||
allCategories
|
allCategories,
|
||||||
};
|
};
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("Error getting all used categories:", error);
|
console.error("Error getting all used categories:", error);
|
||||||
@ -1704,19 +1728,27 @@ export async function getAllUsedCategories(): Promise<{feedCategories: string[],
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function getCategoryCounts(category: string): Promise<{feedCount: number, episodeCount: number}> {
|
export async function getCategoryCounts(
|
||||||
|
category: string,
|
||||||
|
): Promise<{ feedCount: number; episodeCount: number }> {
|
||||||
try {
|
try {
|
||||||
// Count feeds with this category
|
// Count feeds with this category
|
||||||
const feedCountStmt = db.prepare("SELECT COUNT(*) as count FROM feeds WHERE category = ?");
|
const feedCountStmt = db.prepare(
|
||||||
|
"SELECT COUNT(*) as count FROM feeds WHERE category = ?",
|
||||||
|
);
|
||||||
const feedCountResult = feedCountStmt.get(category) as { count: number };
|
const feedCountResult = feedCountStmt.get(category) as { count: number };
|
||||||
|
|
||||||
// Count episodes with this category
|
// Count episodes with this category
|
||||||
const episodeCountStmt = db.prepare("SELECT COUNT(*) as count FROM episodes WHERE category = ?");
|
const episodeCountStmt = db.prepare(
|
||||||
const episodeCountResult = episodeCountStmt.get(category) as { count: number };
|
"SELECT COUNT(*) as count FROM episodes WHERE category = ?",
|
||||||
|
);
|
||||||
|
const episodeCountResult = episodeCountStmt.get(category) as {
|
||||||
|
count: number;
|
||||||
|
};
|
||||||
|
|
||||||
return {
|
return {
|
||||||
feedCount: feedCountResult.count,
|
feedCount: feedCountResult.count,
|
||||||
episodeCount: episodeCountResult.count
|
episodeCount: episodeCountResult.count,
|
||||||
};
|
};
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("Error getting category counts:", error);
|
console.error("Error getting category counts:", error);
|
||||||
|
361
services/jmdict.ts
Normal file
361
services/jmdict.ts
Normal file
@ -0,0 +1,361 @@
|
|||||||
|
import { promises as fs } from "fs";
|
||||||
|
import path from "path";
|
||||||
|
import { type SetupType, readingAnywhere, setup } from "jmdict-simplified-node";
|
||||||
|
|
||||||
|
// Global JMdict database instance
|
||||||
|
let jmdictDb: SetupType | null = null;
|
||||||
|
let isInitializing = false;
|
||||||
|
|
||||||
|
const JMDICT_DB_PATH = path.join(process.cwd(), "data", "jmdict-db");
|
||||||
|
const JMDICT_DATA_URL =
|
||||||
|
"https://github.com/scriptin/jmdict-simplified/releases/download/3.1.0/jmdict-eng-3.1.0.json.gz";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize JMdict database
|
||||||
|
* Downloads and sets up the JMdict database if it doesn't exist
|
||||||
|
*/
|
||||||
|
export async function initializeJMdict(): Promise<void> {
|
||||||
|
if (jmdictDb) {
|
||||||
|
return; // Already initialized
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isInitializing) {
|
||||||
|
// Wait for ongoing initialization
|
||||||
|
while (isInitializing) {
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, 100));
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
isInitializing = true;
|
||||||
|
|
||||||
|
try {
|
||||||
|
console.log("JMdict データベースを初期化中...");
|
||||||
|
|
||||||
|
// Ensure data directory exists
|
||||||
|
const dataDir = path.dirname(JMDICT_DB_PATH);
|
||||||
|
await fs.mkdir(dataDir, { recursive: true });
|
||||||
|
|
||||||
|
// Try to load existing database
|
||||||
|
try {
|
||||||
|
jmdictDb = await setup(JMDICT_DB_PATH);
|
||||||
|
console.log(
|
||||||
|
`JMdict データベース読み込み完了 (辞書日付: ${jmdictDb.dictDate})`,
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
} catch (error) {
|
||||||
|
console.log(
|
||||||
|
"既存のJMdictデータベースが見つかりません。新規作成します...",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if we have the JSON file locally
|
||||||
|
const jsonPath = path.join(dataDir, "jmdict-eng-3.1.0.json");
|
||||||
|
let jsonExists = false;
|
||||||
|
|
||||||
|
try {
|
||||||
|
await fs.access(jsonPath);
|
||||||
|
jsonExists = true;
|
||||||
|
} catch {
|
||||||
|
console.log(
|
||||||
|
"JMdict JSONファイルが見つかりません。ダウンロードが必要です。",
|
||||||
|
);
|
||||||
|
console.log(`手動でダウンロードしてください: ${JMDICT_DATA_URL}`);
|
||||||
|
console.log(
|
||||||
|
`ダウンロード後、解凍して以下のパスに配置してください: ${jsonPath}`,
|
||||||
|
);
|
||||||
|
|
||||||
|
// For now, we'll create a minimal database with some common words
|
||||||
|
await createMinimalJMdictDatabase();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (jsonExists) {
|
||||||
|
console.log("JMdict JSONファイルを使用してデータベースを作成中...");
|
||||||
|
jmdictDb = await setup(JMDICT_DB_PATH, jsonPath, true);
|
||||||
|
console.log(
|
||||||
|
`JMdict データベース作成完了 (辞書日付: ${jmdictDb.dictDate})`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error("JMdictの初期化に失敗しました:", error);
|
||||||
|
// Create a minimal fallback database
|
||||||
|
await createMinimalJMdictDatabase();
|
||||||
|
} finally {
|
||||||
|
isInitializing = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a minimal JMdict database with common English-Japanese mappings
|
||||||
|
* This serves as a fallback when the full JMdict database is not available
|
||||||
|
*/
|
||||||
|
async function createMinimalJMdictDatabase(): Promise<void> {
|
||||||
|
console.log("最小限のJMdictデータベースを作成中...");
|
||||||
|
|
||||||
|
// Create a mock database setup that uses in-memory mappings
|
||||||
|
const mockDb = {
|
||||||
|
get: async (key: string, _options?: any) => {
|
||||||
|
if (key === "raw/dictDate") return "2024-01-01";
|
||||||
|
if (key === "raw/version") return "3.1.0-minimal";
|
||||||
|
throw new Error("Key not found");
|
||||||
|
},
|
||||||
|
createValueStream: () =>
|
||||||
|
({
|
||||||
|
on: () => ({}),
|
||||||
|
}) as any,
|
||||||
|
} as any;
|
||||||
|
|
||||||
|
jmdictDb = {
|
||||||
|
db: mockDb,
|
||||||
|
dictDate: "2024-01-01",
|
||||||
|
version: "3.1.0-minimal",
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log("最小限のJMdictデータベース作成完了");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Search for English words in JMdict and get their katakana readings
|
||||||
|
* @param englishWord - English word to search for
|
||||||
|
* @returns Array of possible katakana readings
|
||||||
|
*/
|
||||||
|
export async function searchEnglishToKatakana(
|
||||||
|
englishWord: string,
|
||||||
|
): Promise<string[]> {
|
||||||
|
if (!jmdictDb) {
|
||||||
|
await initializeJMdict();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!jmdictDb) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Search for the English word in various ways
|
||||||
|
const searchTerms = [
|
||||||
|
englishWord.toLowerCase(),
|
||||||
|
englishWord.toUpperCase(),
|
||||||
|
englishWord.charAt(0).toUpperCase() + englishWord.slice(1).toLowerCase(),
|
||||||
|
];
|
||||||
|
|
||||||
|
const katakanaReadings: Set<string> = new Set();
|
||||||
|
|
||||||
|
for (const term of searchTerms) {
|
||||||
|
try {
|
||||||
|
// Search by reading (kana) - this might catch loanwords
|
||||||
|
const readingResults = await readingAnywhere(jmdictDb.db, term, 10);
|
||||||
|
for (const word of readingResults) {
|
||||||
|
// Extract katakana readings
|
||||||
|
for (const kana of word.kana) {
|
||||||
|
if (isKatakana(kana.text)) {
|
||||||
|
katakanaReadings.add(kana.text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Also search in glosses (definitions) for English matches
|
||||||
|
// This is more complex and would require full text search in sense.gloss
|
||||||
|
// For now, we'll implement a basic approach
|
||||||
|
} catch (searchError) {
|
||||||
|
console.warn(`JMdict search failed for term "${term}":`, searchError);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Array.from(katakanaReadings);
|
||||||
|
} catch (error) {
|
||||||
|
console.error("JMdict英語→カタカナ変換エラー:", error);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if a string contains katakana characters
|
||||||
|
*/
|
||||||
|
function isKatakana(text: string): boolean {
|
||||||
|
return /[\u30A0-\u30FF]/.test(text);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Enhanced English to Katakana conversion using JMdict + fallback methods
|
||||||
|
* @param englishWord - English word to convert
|
||||||
|
* @returns Most appropriate katakana conversion
|
||||||
|
*/
|
||||||
|
export async function convertEnglishToKatakanaWithJMdict(
|
||||||
|
englishWord: string,
|
||||||
|
): Promise<string> {
|
||||||
|
// First try JMdict
|
||||||
|
const jmdictResults = await searchEnglishToKatakana(englishWord);
|
||||||
|
|
||||||
|
if (jmdictResults.length > 0) {
|
||||||
|
// Return the first (most common) result
|
||||||
|
return jmdictResults[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback to enhanced phonetic conversion
|
||||||
|
return convertEnglishToKatakanaPhonetic(englishWord);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Enhanced phonetic English to Katakana conversion
|
||||||
|
* This is more sophisticated than the basic mapping in text-converter.ts
|
||||||
|
*/
|
||||||
|
function convertEnglishToKatakanaPhonetic(word: string): string {
|
||||||
|
const lowerWord = word.toLowerCase();
|
||||||
|
|
||||||
|
// Enhanced common word mappings
|
||||||
|
const commonWords: Record<string, string> = {
|
||||||
|
// Technology
|
||||||
|
computer: "コンピューター",
|
||||||
|
software: "ソフトウェア",
|
||||||
|
hardware: "ハードウェア",
|
||||||
|
internet: "インターネット",
|
||||||
|
website: "ウェブサイト",
|
||||||
|
email: "イーメール",
|
||||||
|
digital: "デジタル",
|
||||||
|
technology: "テクノロジー",
|
||||||
|
programming: "プログラミング",
|
||||||
|
algorithm: "アルゴリズム",
|
||||||
|
database: "データベース",
|
||||||
|
server: "サーバー",
|
||||||
|
client: "クライアント",
|
||||||
|
network: "ネットワーク",
|
||||||
|
security: "セキュリティ",
|
||||||
|
password: "パスワード",
|
||||||
|
login: "ログイン",
|
||||||
|
logout: "ログアウト",
|
||||||
|
download: "ダウンロード",
|
||||||
|
upload: "アップロード",
|
||||||
|
|
||||||
|
// Common English words
|
||||||
|
hello: "ハロー",
|
||||||
|
world: "ワールド",
|
||||||
|
news: "ニュース",
|
||||||
|
business: "ビジネス",
|
||||||
|
service: "サービス",
|
||||||
|
system: "システム",
|
||||||
|
management: "マネジメント",
|
||||||
|
project: "プロジェクト",
|
||||||
|
team: "チーム",
|
||||||
|
meeting: "ミーティング",
|
||||||
|
presentation: "プレゼンテーション",
|
||||||
|
report: "レポート",
|
||||||
|
analysis: "アナリシス",
|
||||||
|
marketing: "マーケティング",
|
||||||
|
strategy: "ストラテジー",
|
||||||
|
solution: "ソリューション",
|
||||||
|
development: "デベロップメント",
|
||||||
|
innovation: "イノベーション",
|
||||||
|
design: "デザイン",
|
||||||
|
product: "プロダクト",
|
||||||
|
quality: "クオリティ",
|
||||||
|
performance: "パフォーマンス",
|
||||||
|
efficiency: "エフィシエンシー",
|
||||||
|
|
||||||
|
// Food and daily life
|
||||||
|
coffee: "コーヒー",
|
||||||
|
restaurant: "レストラン",
|
||||||
|
hotel: "ホテル",
|
||||||
|
shopping: "ショッピング",
|
||||||
|
fashion: "ファッション",
|
||||||
|
music: "ミュージック",
|
||||||
|
movie: "ムービー",
|
||||||
|
game: "ゲーム",
|
||||||
|
sport: "スポーツ",
|
||||||
|
travel: "トラベル",
|
||||||
|
vacation: "バケーション",
|
||||||
|
holiday: "ホリデー",
|
||||||
|
};
|
||||||
|
|
||||||
|
if (commonWords[lowerWord]) {
|
||||||
|
return commonWords[lowerWord];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Enhanced phonetic mapping rules
|
||||||
|
let result = "";
|
||||||
|
let i = 0;
|
||||||
|
|
||||||
|
while (i < lowerWord.length) {
|
||||||
|
const char = lowerWord[i];
|
||||||
|
const nextChar = i + 1 < lowerWord.length ? lowerWord[i + 1] : "";
|
||||||
|
|
||||||
|
// Handle common English phonetic patterns
|
||||||
|
if (char === "c" && nextChar === "h") {
|
||||||
|
result += "チ";
|
||||||
|
i += 2;
|
||||||
|
} else if (char === "s" && nextChar === "h") {
|
||||||
|
result += "シ";
|
||||||
|
i += 2;
|
||||||
|
} else if (char === "t" && nextChar === "h") {
|
||||||
|
result += "ス";
|
||||||
|
i += 2;
|
||||||
|
} else if (char === "p" && nextChar === "h") {
|
||||||
|
result += "フ";
|
||||||
|
i += 2;
|
||||||
|
} else if (char === "c" && nextChar === "k") {
|
||||||
|
result += "ク";
|
||||||
|
i += 2;
|
||||||
|
} else if (char === "n" && nextChar === "g") {
|
||||||
|
result += "ング";
|
||||||
|
i += 2;
|
||||||
|
} else if (char === "q" && nextChar === "u") {
|
||||||
|
result += "クワ";
|
||||||
|
i += 2;
|
||||||
|
} else {
|
||||||
|
// Single character mapping
|
||||||
|
const phoneticMap: Record<string, string> = {
|
||||||
|
a: "ア",
|
||||||
|
e: "エ",
|
||||||
|
i: "イ",
|
||||||
|
o: "オ",
|
||||||
|
u: "ウ",
|
||||||
|
b: "ブ",
|
||||||
|
c: "ク",
|
||||||
|
d: "ド",
|
||||||
|
f: "フ",
|
||||||
|
g: "グ",
|
||||||
|
h: "ハ",
|
||||||
|
j: "ジ",
|
||||||
|
k: "ク",
|
||||||
|
l: "ル",
|
||||||
|
m: "ム",
|
||||||
|
n: "ン",
|
||||||
|
p: "プ",
|
||||||
|
r: "ル",
|
||||||
|
s: "ス",
|
||||||
|
t: "ト",
|
||||||
|
v: "ブ",
|
||||||
|
w: "ワ",
|
||||||
|
x: "クス",
|
||||||
|
y: "ワイ",
|
||||||
|
z: "ズ",
|
||||||
|
};
|
||||||
|
|
||||||
|
result += phoneticMap[char] ?? char;
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if JMdict is initialized and available
|
||||||
|
*/
|
||||||
|
export function isJMdictInitialized(): boolean {
|
||||||
|
return jmdictDb !== null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get JMdict database information
|
||||||
|
*/
|
||||||
|
export function getJMdictInfo(): { dictDate: string; version: string } | null {
|
||||||
|
if (!jmdictDb) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
dictDate: jmdictDb.dictDate,
|
||||||
|
version: jmdictDb.version,
|
||||||
|
};
|
||||||
|
}
|
@ -153,11 +153,16 @@ export async function generateAllCategoryRSSFiles(): Promise<void> {
|
|||||||
try {
|
try {
|
||||||
await saveCategoryRSSFile(category);
|
await saveCategoryRSSFile(category);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error(`❌ Failed to generate RSS for category "${category}":`, error);
|
console.error(
|
||||||
|
`❌ Failed to generate RSS for category "${category}":`,
|
||||||
|
error,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(`✅ Generated category RSS files for ${categories.length} categories`);
|
console.log(
|
||||||
|
`✅ Generated category RSS files for ${categories.length} categories`,
|
||||||
|
);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("❌ Error generating category RSS files:", error);
|
console.error("❌ Error generating category RSS files:", error);
|
||||||
throw error;
|
throw error;
|
||||||
@ -178,7 +183,10 @@ export async function generateAllFeedRSSFiles(): Promise<void> {
|
|||||||
try {
|
try {
|
||||||
await saveFeedRSSFile(feed.id);
|
await saveFeedRSSFile(feed.id);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error(`❌ Failed to generate RSS for feed "${feed.id}":`, error);
|
console.error(
|
||||||
|
`❌ Failed to generate RSS for feed "${feed.id}":`,
|
||||||
|
error,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,54 +1,72 @@
|
|||||||
import Kuroshiro from "kuroshiro";
|
import Kuroshiro from "kuroshiro";
|
||||||
import KuroshiroAnalyzerMecab from "kuroshiro-analyzer-mecab";
|
import KuroshiroAnalyzerMecab from "kuroshiro-analyzer-mecab";
|
||||||
import { toKatakana } from "wanakana";
|
import { toKatakana } from "wanakana";
|
||||||
|
import {
|
||||||
|
convertEnglishToKatakanaWithJMdict,
|
||||||
|
getJMdictInfo,
|
||||||
|
initializeJMdict,
|
||||||
|
isJMdictInitialized,
|
||||||
|
} from "./jmdict.js";
|
||||||
|
|
||||||
// Global instance to avoid recreating the analyzer
|
// Global instance to avoid recreating the analyzer
|
||||||
let kuroshiroInstance: Kuroshiro | null = null;
|
let kuroshiroInstance: Kuroshiro | null = null;
|
||||||
|
|
||||||
// Basic English to Katakana mapping for common words
|
// Basic English to Katakana mapping for common words
|
||||||
const englishToKatakanaMap: Record<string, string> = {
|
const englishToKatakanaMap: Record<string, string> = {
|
||||||
"hello": "ハロー",
|
hello: "ハロー",
|
||||||
"world": "ワールド",
|
world: "ワールド",
|
||||||
"this": "ディス",
|
this: "ディス",
|
||||||
"is": "イズ",
|
is: "イズ",
|
||||||
"a": "ア",
|
a: "ア",
|
||||||
"test": "テスト",
|
test: "テスト",
|
||||||
"javascript": "ジャバスクリプト",
|
javascript: "ジャバスクリプト",
|
||||||
"typescript": "タイプスクリプト",
|
typescript: "タイプスクリプト",
|
||||||
"and": "アンド",
|
and: "アンド",
|
||||||
"api": "エーピーアイ",
|
api: "エーピーアイ",
|
||||||
"endpoint": "エンドポイント",
|
endpoint: "エンドポイント",
|
||||||
"machine": "マシン",
|
machine: "マシン",
|
||||||
"learning": "ラーニング",
|
learning: "ラーニング",
|
||||||
"model": "モデル",
|
model: "モデル",
|
||||||
"analysis": "アナリシス",
|
analysis: "アナリシス",
|
||||||
"computer": "コンピューター",
|
computer: "コンピューター",
|
||||||
"data": "データ",
|
data: "データ",
|
||||||
"software": "ソフトウェア",
|
software: "ソフトウェア",
|
||||||
"program": "プログラム",
|
program: "プログラム",
|
||||||
"system": "システム",
|
system: "システム",
|
||||||
"network": "ネットワーク",
|
network: "ネットワーク",
|
||||||
"server": "サーバー",
|
server: "サーバー",
|
||||||
"client": "クライアント",
|
client: "クライアント",
|
||||||
"database": "データベース",
|
database: "データベース",
|
||||||
"file": "ファイル",
|
file: "ファイル",
|
||||||
"user": "ユーザー",
|
user: "ユーザー",
|
||||||
"password": "パスワード",
|
password: "パスワード",
|
||||||
"login": "ログイン",
|
login: "ログイン",
|
||||||
"logout": "ログアウト",
|
logout: "ログアウト",
|
||||||
"website": "ウェブサイト",
|
website: "ウェブサイト",
|
||||||
"browser": "ブラウザー",
|
browser: "ブラウザー",
|
||||||
"application": "アプリケーション",
|
application: "アプリケーション",
|
||||||
"service": "サービス"
|
service: "サービス",
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Convert English word to Katakana using predefined mapping or phonetic approximation
|
* Convert English word to Katakana using JMdict, predefined mapping, or phonetic approximation
|
||||||
*/
|
*/
|
||||||
function convertEnglishWordToKatakana(word: string): string {
|
async function convertEnglishWordToKatakana(word: string): Promise<string> {
|
||||||
const lowerWord = word.toLowerCase();
|
const lowerWord = word.toLowerCase();
|
||||||
|
|
||||||
// Check predefined mapping first
|
// First try JMdict if available
|
||||||
|
try {
|
||||||
|
if (isJMdictInitialized()) {
|
||||||
|
const jmdictResult = await convertEnglishToKatakanaWithJMdict(word);
|
||||||
|
if (jmdictResult && jmdictResult !== word) {
|
||||||
|
return jmdictResult;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.warn(`JMdict conversion failed for "${word}":`, error);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check predefined mapping second
|
||||||
if (englishToKatakanaMap[lowerWord]) {
|
if (englishToKatakanaMap[lowerWord]) {
|
||||||
return englishToKatakanaMap[lowerWord];
|
return englishToKatakanaMap[lowerWord];
|
||||||
}
|
}
|
||||||
@ -73,35 +91,78 @@ function convertEnglishWordToKatakana(word: string): string {
|
|||||||
*/
|
*/
|
||||||
function approximateEnglishToKatakana(word: string): string {
|
function approximateEnglishToKatakana(word: string): string {
|
||||||
const phoneticMap: Record<string, string> = {
|
const phoneticMap: Record<string, string> = {
|
||||||
'a': 'ア', 'b': 'ブ', 'c': 'ク', 'd': 'ド', 'e': 'エ',
|
a: "ア",
|
||||||
'f': 'フ', 'g': 'グ', 'h': 'ハ', 'i': 'イ', 'j': 'ジ',
|
b: "ブ",
|
||||||
'k': 'ク', 'l': 'ル', 'm': 'ム', 'n': 'ン', 'o': 'オ',
|
c: "ク",
|
||||||
'p': 'プ', 'q': 'ク', 'r': 'ル', 's': 'ス', 't': 'ト',
|
d: "ド",
|
||||||
'u': 'ウ', 'v': 'ブ', 'w': 'ワ', 'x': 'クス', 'y': 'ワイ', 'z': 'ズ'
|
e: "エ",
|
||||||
|
f: "フ",
|
||||||
|
g: "グ",
|
||||||
|
h: "ハ",
|
||||||
|
i: "イ",
|
||||||
|
j: "ジ",
|
||||||
|
k: "ク",
|
||||||
|
l: "ル",
|
||||||
|
m: "ム",
|
||||||
|
n: "ン",
|
||||||
|
o: "オ",
|
||||||
|
p: "プ",
|
||||||
|
q: "ク",
|
||||||
|
r: "ル",
|
||||||
|
s: "ス",
|
||||||
|
t: "ト",
|
||||||
|
u: "ウ",
|
||||||
|
v: "ブ",
|
||||||
|
w: "ワ",
|
||||||
|
x: "クス",
|
||||||
|
y: "ワイ",
|
||||||
|
z: "ズ",
|
||||||
};
|
};
|
||||||
|
|
||||||
return word.toLowerCase()
|
return word
|
||||||
.split('')
|
.toLowerCase()
|
||||||
.map(char => phoneticMap[char] || char)
|
.split("")
|
||||||
.join('');
|
.map((char) => phoneticMap[char] || char)
|
||||||
|
.join("");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initialize kuroshiro with MeCab analyzer
|
* Initialize kuroshiro with MeCab analyzer and JMdict
|
||||||
* This should be called once during application startup
|
* This should be called once during application startup
|
||||||
*/
|
*/
|
||||||
export async function initializeTextConverter(): Promise<void> {
|
export async function initializeTextConverter(): Promise<void> {
|
||||||
if (kuroshiroInstance) {
|
if (kuroshiroInstance && isJMdictInitialized()) {
|
||||||
return; // Already initialized
|
return; // Already initialized
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
console.log("Kuroshiroテキストコンバーターを初期化中...");
|
console.log("テキストコンバーターを初期化中...");
|
||||||
kuroshiroInstance = new Kuroshiro();
|
|
||||||
await kuroshiroInstance.init(new KuroshiroAnalyzerMecab());
|
// Initialize Kuroshiro if not already done
|
||||||
console.log("Kuroshiroテキストコンバーター初期化完了");
|
if (!kuroshiroInstance) {
|
||||||
|
console.log("Kuroshiroを初期化中...");
|
||||||
|
kuroshiroInstance = new Kuroshiro();
|
||||||
|
await kuroshiroInstance.init(new KuroshiroAnalyzerMecab());
|
||||||
|
console.log("Kuroshiro初期化完了");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize JMdict if not already done
|
||||||
|
if (!isJMdictInitialized()) {
|
||||||
|
console.log("JMdictを初期化中...");
|
||||||
|
await initializeJMdict();
|
||||||
|
console.log("JMdict初期化完了");
|
||||||
|
|
||||||
|
const jmdictInfo = getJMdictInfo();
|
||||||
|
if (jmdictInfo) {
|
||||||
|
console.log(
|
||||||
|
`JMdict情報: バージョン ${jmdictInfo.version}, 辞書日付 ${jmdictInfo.dictDate}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log("テキストコンバーター初期化完了");
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("Kuroshiroの初期化に失敗しました:", error);
|
console.error("テキストコンバーターの初期化に失敗しました:", error);
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -163,14 +224,20 @@ export async function convertEnglishWordsOnly(text: string): Promise<string> {
|
|||||||
for (const englishWord of matches) {
|
for (const englishWord of matches) {
|
||||||
try {
|
try {
|
||||||
// Convert each English word to katakana using our custom function
|
// Convert each English word to katakana using our custom function
|
||||||
const converted = convertEnglishWordToKatakana(englishWord);
|
const converted = await convertEnglishWordToKatakana(englishWord);
|
||||||
|
|
||||||
// Replace the English word with its katakana equivalent
|
// Replace the English word with its katakana equivalent
|
||||||
// Use word boundary to avoid partial replacements
|
// Use word boundary to avoid partial replacements
|
||||||
const wordRegex = new RegExp(`\\b${englishWord.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`, 'g');
|
const wordRegex = new RegExp(
|
||||||
|
`\\b${englishWord.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`,
|
||||||
|
"g",
|
||||||
|
);
|
||||||
result = result.replace(wordRegex, converted);
|
result = result.replace(wordRegex, converted);
|
||||||
} catch (convertError) {
|
} catch (convertError) {
|
||||||
console.warn(`Failed to convert word "${englishWord}":`, convertError);
|
console.warn(
|
||||||
|
`Failed to convert word "${englishWord}":`,
|
||||||
|
convertError,
|
||||||
|
);
|
||||||
// Keep original word if conversion fails
|
// Keep original word if conversion fails
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -185,8 +252,85 @@ export async function convertEnglishWordsOnly(text: string): Promise<string> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if kuroshiro is initialized
|
* Check if text converter (kuroshiro and JMdict) is fully initialized
|
||||||
*/
|
*/
|
||||||
export function isTextConverterInitialized(): boolean {
|
export function isTextConverterInitialized(): boolean {
|
||||||
|
return kuroshiroInstance !== null && isJMdictInitialized();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if only kuroshiro is initialized (backward compatibility)
|
||||||
|
*/
|
||||||
|
export function isKuroshiroInitialized(): boolean {
|
||||||
return kuroshiroInstance !== null;
|
return kuroshiroInstance !== null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get text converter status information
|
||||||
|
*/
|
||||||
|
export function getTextConverterInfo(): {
|
||||||
|
kuroshiro: boolean;
|
||||||
|
jmdict: boolean;
|
||||||
|
jmdictInfo: { dictDate: string; version: string } | null;
|
||||||
|
} {
|
||||||
|
return {
|
||||||
|
kuroshiro: kuroshiroInstance !== null,
|
||||||
|
jmdict: isJMdictInitialized(),
|
||||||
|
jmdictInfo: getJMdictInfo(),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert English words to Katakana using JMdict with enhanced fallback
|
||||||
|
* This is the main function that leverages JMdict for accurate conversions
|
||||||
|
* @param text - Input text containing English words
|
||||||
|
* @returns Text with English words converted to Katakana using JMdict
|
||||||
|
*/
|
||||||
|
export async function convertEnglishToKatakanaWithJMdictFallback(
|
||||||
|
text: string,
|
||||||
|
): Promise<string> {
|
||||||
|
if (!isJMdictInitialized()) {
|
||||||
|
await initializeJMdict();
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Extract English words using regex
|
||||||
|
const englishWordPattern = /\b[a-zA-Z]+\b/g;
|
||||||
|
let result = text;
|
||||||
|
|
||||||
|
// Find all English words
|
||||||
|
const matches = text.match(englishWordPattern);
|
||||||
|
|
||||||
|
if (matches) {
|
||||||
|
// Process each unique word to avoid duplicate conversions
|
||||||
|
const uniqueWords = [...new Set(matches)];
|
||||||
|
|
||||||
|
for (const englishWord of uniqueWords) {
|
||||||
|
try {
|
||||||
|
// Convert using JMdict-enhanced function
|
||||||
|
const converted =
|
||||||
|
await convertEnglishToKatakanaWithJMdict(englishWord);
|
||||||
|
|
||||||
|
// Replace all occurrences of this English word with its katakana equivalent
|
||||||
|
const wordRegex = new RegExp(
|
||||||
|
`\\b${englishWord.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`,
|
||||||
|
"g",
|
||||||
|
);
|
||||||
|
result = result.replace(wordRegex, converted);
|
||||||
|
} catch (convertError) {
|
||||||
|
console.warn(
|
||||||
|
`Failed to convert word "${englishWord}":`,
|
||||||
|
convertError,
|
||||||
|
);
|
||||||
|
// Keep original word if conversion fails
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
} catch (error) {
|
||||||
|
console.error("JMdict-based English to Katakana conversion error:", error);
|
||||||
|
// Fallback to the original method if JMdict conversion fails
|
||||||
|
return convertEnglishWordsOnly(text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -2,7 +2,10 @@ import fs from "fs";
|
|||||||
import path from "path";
|
import path from "path";
|
||||||
import ffmpegPath from "ffmpeg-static";
|
import ffmpegPath from "ffmpeg-static";
|
||||||
import { config } from "./config.js";
|
import { config } from "./config.js";
|
||||||
import { convertEnglishWordsOnly, initializeTextConverter } from "./text-converter.js";
|
import {
|
||||||
|
convertEnglishWordsOnly,
|
||||||
|
initializeTextConverter,
|
||||||
|
} from "./text-converter.js";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Split text into natural chunks for TTS processing
|
* Split text into natural chunks for TTS processing
|
||||||
@ -119,7 +122,10 @@ async function generateAudioForChunk(
|
|||||||
console.log(`変換後: "${processedText}"`);
|
console.log(`変換後: "${processedText}"`);
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.warn(`チャンク${chunkIndex + 1}の英語変換に失敗、元のテキストを使用: ${itemId}`, error);
|
console.warn(
|
||||||
|
`チャンク${chunkIndex + 1}の英語変換に失敗、元のテキストを使用: ${itemId}`,
|
||||||
|
error,
|
||||||
|
);
|
||||||
processedText = chunkText;
|
processedText = chunkText;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -271,7 +277,10 @@ export async function generateTTSWithoutQueue(
|
|||||||
try {
|
try {
|
||||||
await initializeTextConverter();
|
await initializeTextConverter();
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.warn("テキストコンバーターの初期化に失敗しました。英語変換をスキップします:", error);
|
console.warn(
|
||||||
|
"テキストコンバーターの初期化に失敗しました。英語変換をスキップします:",
|
||||||
|
error,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(
|
console.log(
|
||||||
|
Reference in New Issue
Block a user