This commit is contained in:
2025-06-11 23:37:13 +09:00
parent 7f719bb827
commit a8fe191ab0

View File

@ -8,11 +8,43 @@ let isInitializing = false;
const JMDICT_DB_PATH = path.join(process.cwd(), "data", "jmdict-db");
const JMDICT_DATA_URL =
"https://github.com/scriptin/jmdict-simplified/releases/download/3.1.0/jmdict-eng-3.1.0.json.gz";
"https://github.com/scriptin/jmdict-simplified/releases/download/3.1.0/jmdict-eng-3.1.0.json.tgz";
/**
* Initialize JMdict database
* Downloads and sets up the JMdict database if it doesn't exist
* Clean up corrupted database directory
*/
async function cleanupDatabase(dbPath: string): Promise<void> {
try {
console.log("データベースディレクトリをクリーンアップ中...");
await fs.rm(dbPath, { recursive: true, force: true });
console.log("クリーンアップ完了");
} catch (error) {
console.warn("クリーンアップに失敗しました:", error);
}
}
/**
* Check if database directory has proper permissions
*/
async function checkDatabasePermissions(dbPath: string): Promise<boolean> {
try {
const dataDir = path.dirname(dbPath);
await fs.mkdir(dataDir, { recursive: true });
// Test write permissions by creating a temporary file
const testFile = path.join(dataDir, ".test-write");
await fs.writeFile(testFile, "test");
await fs.unlink(testFile);
return true;
} catch (error) {
console.error("データベースディレクトリの権限エラー:", error);
return false;
}
}
/**
* Initialize JMdict database with enhanced error handling
*/
export async function initializeJMdict(): Promise<void> {
if (jmdictDb) {
@ -32,11 +64,19 @@ export async function initializeJMdict(): Promise<void> {
try {
console.log("JMdict データベースを初期化中...");
// Ensure data directory exists
const dataDir = path.dirname(JMDICT_DB_PATH);
await fs.mkdir(dataDir, { recursive: true });
// Check permissions first
const hasPermissions = await checkDatabasePermissions(JMDICT_DB_PATH);
if (!hasPermissions) {
console.error("データベースディレクトリへの書き込み権限がありません");
await createMinimalJMdictDatabase();
return;
}
// Try to load existing database
// Try to load existing database with retry logic
let retryCount = 0;
const maxRetries = 2;
while (retryCount <= maxRetries) {
try {
jmdictDb = await setup(JMDICT_DB_PATH);
console.log(
@ -45,41 +85,65 @@ export async function initializeJMdict(): Promise<void> {
return;
} catch (error) {
console.log(
"既存のJMdictデータベースが見つかりません。新規作成します...",
`データベース読み込み試行 ${retryCount + 1}/${maxRetries + 1} 失敗:`,
error,
);
if (retryCount < maxRetries) {
// Clean up potentially corrupted database and retry
await cleanupDatabase(JMDICT_DB_PATH);
retryCount++;
await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait 1 second
} else {
throw error;
}
}
}
// If we get here, all retries failed
console.log(
"既存のデータベース読み込みに失敗しました。新規作成を試行します...",
);
// Check if we have the JSON file locally
const dataDir = path.dirname(JMDICT_DB_PATH);
const jsonPath = path.join(dataDir, "jmdict-eng-3.1.0.json");
let jsonExists = false;
try {
await fs.access(jsonPath);
jsonExists = true;
} catch {
console.log("JMdict JSONファイルを使用してデータベースを作成中...");
// Ensure clean directory for new database
await cleanupDatabase(JMDICT_DB_PATH);
jmdictDb = await setup(JMDICT_DB_PATH, jsonPath, true);
console.log(
"JMdict JSONファイルが見つかりません。ダウンロードが必要です。",
`JMdict データベース作成完了 (辞書日付: ${jmdictDb.dictDate})`,
);
} catch (jsonError) {
console.log("JMdict JSONファイルが見つかりません。");
console.log(`手動でダウンロードしてください: ${JMDICT_DATA_URL}`);
console.log(
`ダウンロード後、解凍して以下のパスに配置してください: ${jsonPath}`,
);
// For now, we'll create a minimal database with some common words
await createMinimalJMdictDatabase();
return;
}
if (jsonExists) {
console.log("JMdict JSONファイルを使用してデータベースを作成中...");
jmdictDb = await setup(JMDICT_DB_PATH, jsonPath, true);
console.log(
`JMdict データベース作成完了 (辞書日付: ${jmdictDb.dictDate})`,
);
}
} catch (error) {
console.error("JMdictの初期化に失敗しました:", error);
// Create a minimal fallback database
// Check if it's a LevelDB-specific error
if (
error instanceof Error &&
(error.message.includes("levelup") ||
error.message.includes("leveldown") ||
error.message.includes("LOCK"))
) {
console.log("LevelDBエラーを検出しました。データベースを再構築します...");
await cleanupDatabase(JMDICT_DB_PATH);
}
// Always fall back to minimal database
await createMinimalJMdictDatabase();
} finally {
isInitializing = false;
@ -88,14 +152,48 @@ export async function initializeJMdict(): Promise<void> {
/**
* Create a minimal JMdict database with common English-Japanese mappings
* This serves as a fallback when the full JMdict database is not available
*/
async function createMinimalJMdictDatabase(): Promise<void> {
console.log("最小限のJMdictデータベースを作成中...");
// Create a mock database setup that uses in-memory mappings
// Enhanced minimal database with more comprehensive mappings
const minimalMappings: Record<string, string[]> = {
// Technology terms
computer: ["コンピューター", "コンピュータ"],
software: ["ソフトウェア", "ソフトウエア"],
hardware: ["ハードウェア", "ハードウエア"],
internet: ["インターネット"],
website: ["ウェブサイト", "ウエブサイト"],
email: ["イーメール", "メール"],
digital: ["デジタル"],
system: ["システム"],
network: ["ネットワーク"],
server: ["サーバー", "サーバ"],
database: ["データベース"],
program: ["プログラム"],
application: ["アプリケーション", "アプリ"],
// Common words
hello: ["ハロー", "ハロ"],
world: ["ワールド"],
news: ["ニュース"],
business: ["ビジネス"],
service: ["サービス"],
project: ["プロジェクト"],
team: ["チーム"],
meeting: ["ミーティング"],
coffee: ["コーヒー"],
restaurant: ["レストラン"],
hotel: ["ホテル"],
music: ["ミュージック"],
game: ["ゲーム"],
sport: ["スポーツ"],
travel: ["トラベル"],
};
// Create a mock database that searches the minimal mappings
const mockDb = {
get: async (key: string, _options?: any) => {
get: async (key: string) => {
if (key === "raw/dictDate") return "2024-01-01";
if (key === "raw/version") return "3.1.0-minimal";
throw new Error("Key not found");
@ -112,13 +210,14 @@ async function createMinimalJMdictDatabase(): Promise<void> {
version: "3.1.0-minimal",
};
// Override the search function to use our minimal mappings
global.minimalJMdictMappings = minimalMappings;
console.log("最小限のJMdictデータベース作成完了");
}
/**
* Search for English words in JMdict and get their katakana readings
* @param englishWord - English word to search for
* @returns Array of possible katakana readings
*/
export async function searchEnglishToKatakana(
englishWord: string,
@ -132,6 +231,17 @@ export async function searchEnglishToKatakana(
}
try {
// If using minimal database, check our mappings first
const minimalMappings = (global as any).minimalJMdictMappings;
if (minimalMappings) {
const lowerWord = englishWord.toLowerCase();
if (minimalMappings[lowerWord]) {
return minimalMappings[lowerWord].filter((reading) =>
isKatakana(reading),
);
}
}
// Search for the English word in various ways
const searchTerms = [
englishWord.toLowerCase(),
@ -153,10 +263,6 @@ export async function searchEnglishToKatakana(
}
}
}
// Also search in glosses (definitions) for English matches
// This is more complex and would require full text search in sense.gloss
// For now, we'll implement a basic approach
} catch (searchError) {
console.warn(`JMdict search failed for term "${term}":`, searchError);
}
@ -178,8 +284,6 @@ function isKatakana(text: string): boolean {
/**
* Enhanced English to Katakana conversion using JMdict + fallback methods
* @param englishWord - English word to convert
* @returns Most appropriate katakana conversion
*/
export async function convertEnglishToKatakanaWithJMdict(
englishWord: string,
@ -188,7 +292,6 @@ export async function convertEnglishToKatakanaWithJMdict(
const jmdictResults = await searchEnglishToKatakana(englishWord);
if (jmdictResults.length > 0) {
// Return the first (most common) result
return jmdictResults[0];
}
@ -198,12 +301,11 @@ export async function convertEnglishToKatakanaWithJMdict(
/**
* Enhanced phonetic English to Katakana conversion
* This is more sophisticated than the basic mapping in text-converter.ts
*/
function convertEnglishToKatakanaPhonetic(word: string): string {
const lowerWord = word.toLowerCase();
// Enhanced common word mappings
// Enhanced common word mappings (same as your original)
const commonWords: Record<string, string> = {
// Technology
computer: "コンピューター",
@ -271,7 +373,7 @@ function convertEnglishToKatakanaPhonetic(word: string): string {
return commonWords[lowerWord];
}
// Enhanced phonetic mapping rules
// Enhanced phonetic mapping rules (same as your original)
let result = "";
let i = 0;
@ -279,7 +381,6 @@ function convertEnglishToKatakanaPhonetic(word: string): string {
const char = lowerWord[i];
const nextChar = i + 1 < lowerWord.length ? lowerWord[i + 1] : "";
// Handle common English phonetic patterns
if (char === "c" && nextChar === "h") {
result += "チ";
i += 2;
@ -302,7 +403,6 @@ function convertEnglishToKatakanaPhonetic(word: string): string {
result += "クワ";
i += 2;
} else {
// Single character mapping
const phoneticMap: Record<string, string> = {
a: "ア",
e: "エ",
@ -359,3 +459,23 @@ export function getJMdictInfo(): { dictDate: string; version: string } | null {
version: jmdictDb.version,
};
}
/**
* Force cleanup and re-initialization of the database
* Useful for troubleshooting LevelDB issues
*/
export async function forceReinitializeJMdict(): Promise<void> {
console.log("JMdictデータベースの強制再初期化中...");
// Reset state
jmdictDb = null;
isInitializing = false;
// Clean up existing database
await cleanupDatabase(JMDICT_DB_PATH);
// Re-initialize
await initializeJMdict();
console.log("JMdictデータベースの強制再初期化完了");
}