499 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			TypeScript
		
	
	
	
	
	
			
		
		
	
	
			499 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			TypeScript
		
	
	
	
	
	
import { promises as fs } from "fs";
 | 
						|
import path from "path";
 | 
						|
import { type SetupType, readingAnywhere, setup } from "jmdict-simplified-node";
 | 
						|
 | 
						|
// Global JMdict database instance
 | 
						|
let jmdictDb: SetupType | null = null;
 | 
						|
let isInitializing = false;
 | 
						|
 | 
						|
const JMDICT_DB_PATH = path.join(process.cwd(), "data", "jmdict-db");
 | 
						|
const JMDICT_DATA_URL =
 | 
						|
  "https://github.com/scriptin/jmdict-simplified/releases/download/3.1.0/jmdict-eng-3.1.0.json.tgz";
 | 
						|
 | 
						|
/**
 | 
						|
 * Clean up corrupted database directory
 | 
						|
 */
 | 
						|
async function cleanupDatabase(dbPath: string): Promise<void> {
 | 
						|
  try {
 | 
						|
    console.log("データベースディレクトリをクリーンアップ中...");
 | 
						|
    await fs.rm(dbPath, { recursive: true, force: true });
 | 
						|
    console.log("クリーンアップ完了");
 | 
						|
  } catch (error) {
 | 
						|
    console.warn("クリーンアップに失敗しました:", error);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * Check if database directory has proper permissions
 | 
						|
 */
 | 
						|
async function checkDatabasePermissions(dbPath: string): Promise<boolean> {
 | 
						|
  try {
 | 
						|
    const dataDir = path.dirname(dbPath);
 | 
						|
    await fs.mkdir(dataDir, { recursive: true });
 | 
						|
 | 
						|
    // Test write permissions by creating a temporary file
 | 
						|
    const testFile = path.join(dataDir, ".test-write");
 | 
						|
    await fs.writeFile(testFile, "test");
 | 
						|
    await fs.unlink(testFile);
 | 
						|
 | 
						|
    return true;
 | 
						|
  } catch (error) {
 | 
						|
    console.error("データベースディレクトリの権限エラー:", error);
 | 
						|
    return false;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * Initialize JMdict database with enhanced error handling
 | 
						|
 */
 | 
						|
export async function initializeJMdict(): Promise<void> {
 | 
						|
  if (jmdictDb) {
 | 
						|
    return; // Already initialized
 | 
						|
  }
 | 
						|
 | 
						|
  if (isInitializing) {
 | 
						|
    // Wait for ongoing initialization
 | 
						|
    while (isInitializing) {
 | 
						|
      await new Promise((resolve) => setTimeout(resolve, 100));
 | 
						|
    }
 | 
						|
    return;
 | 
						|
  }
 | 
						|
 | 
						|
  isInitializing = true;
 | 
						|
 | 
						|
  try {
 | 
						|
    console.log("JMdict データベースを初期化中...");
 | 
						|
 | 
						|
    // Check permissions first
 | 
						|
    const hasPermissions = await checkDatabasePermissions(JMDICT_DB_PATH);
 | 
						|
    if (!hasPermissions) {
 | 
						|
      console.error("データベースディレクトリへの書き込み権限がありません");
 | 
						|
      await createMinimalJMdictDatabase();
 | 
						|
      return;
 | 
						|
    }
 | 
						|
 | 
						|
    // Check if we have the JSON file first
 | 
						|
    const dataDir = path.dirname(JMDICT_DB_PATH);
 | 
						|
    const jsonPath = path.join(dataDir, "jmdict-eng-3.1.0.json");
 | 
						|
 | 
						|
    let hasJsonFile = false;
 | 
						|
    try {
 | 
						|
      await fs.access(jsonPath);
 | 
						|
      hasJsonFile = true;
 | 
						|
      console.log("JMdict JSONファイルが見つかりました");
 | 
						|
    } catch {
 | 
						|
      console.log("JMdict JSONファイルが見つかりません");
 | 
						|
    }
 | 
						|
 | 
						|
    // Try to load existing database (only if it might exist)
 | 
						|
    let databaseLoaded = false;
 | 
						|
    try {
 | 
						|
      // Check if database directory exists and has content
 | 
						|
      const dbStats = await fs.stat(JMDICT_DB_PATH);
 | 
						|
      if (dbStats.isDirectory()) {
 | 
						|
        const dbContents = await fs.readdir(JMDICT_DB_PATH);
 | 
						|
        if (dbContents.length > 0) {
 | 
						|
          console.log(
 | 
						|
            "既存のデータベースディレクトリが見つかりました。読み込みを試行します...",
 | 
						|
          );
 | 
						|
 | 
						|
          try {
 | 
						|
            jmdictDb = await setup(JMDICT_DB_PATH);
 | 
						|
            console.log(
 | 
						|
              `JMdict データベース読み込み完了 (辞書日付: ${jmdictDb.dictDate})`,
 | 
						|
            );
 | 
						|
            databaseLoaded = true;
 | 
						|
          } catch (loadError) {
 | 
						|
            console.log("既存データベースの読み込みに失敗:", loadError.message);
 | 
						|
            // Clean up corrupted database
 | 
						|
            await cleanupDatabase(JMDICT_DB_PATH);
 | 
						|
          }
 | 
						|
        }
 | 
						|
      }
 | 
						|
    } catch {
 | 
						|
      // Database directory doesn't exist, which is fine
 | 
						|
      console.log("既存のデータベースが見つかりません");
 | 
						|
    }
 | 
						|
 | 
						|
    // If database loading failed or no database exists, try to create from JSON
 | 
						|
    if (!databaseLoaded && hasJsonFile) {
 | 
						|
      try {
 | 
						|
        console.log("JMdict JSONファイルからデータベースを作成中...");
 | 
						|
 | 
						|
        // Ensure clean directory for new database
 | 
						|
        await cleanupDatabase(JMDICT_DB_PATH);
 | 
						|
 | 
						|
        jmdictDb = await setup(JMDICT_DB_PATH, jsonPath, true);
 | 
						|
        console.log(
 | 
						|
          `JMdict データベース作成完了 (辞書日付: ${jmdictDb.dictDate})`,
 | 
						|
        );
 | 
						|
        databaseLoaded = true;
 | 
						|
      } catch (createError) {
 | 
						|
        console.error("JSONからのデータベース作成に失敗:", createError.message);
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
    // If still no database, provide instructions and use minimal database
 | 
						|
    if (!databaseLoaded) {
 | 
						|
      if (!hasJsonFile) {
 | 
						|
        console.log("JMdict JSONファイルが見つかりません。");
 | 
						|
        console.log(`手動でダウンロードしてください: ${JMDICT_DATA_URL}`);
 | 
						|
        console.log(
 | 
						|
          `ダウンロード後、解凍して以下のパスに配置してください: ${jsonPath}`,
 | 
						|
        );
 | 
						|
      }
 | 
						|
 | 
						|
      console.log("最小限のデータベースを使用します。");
 | 
						|
      await createMinimalJMdictDatabase();
 | 
						|
    }
 | 
						|
  } catch (error) {
 | 
						|
    console.error("JMdictの初期化に失敗しました:", error);
 | 
						|
 | 
						|
    // Check if it's a LevelDB-specific error
 | 
						|
    if (
 | 
						|
      error instanceof Error &&
 | 
						|
      (error.message.includes("levelup") ||
 | 
						|
        error.message.includes("leveldown") ||
 | 
						|
        error.message.includes("LOCK"))
 | 
						|
    ) {
 | 
						|
      console.log("LevelDBエラーを検出しました。データベースを再構築します...");
 | 
						|
      await cleanupDatabase(JMDICT_DB_PATH);
 | 
						|
    }
 | 
						|
 | 
						|
    // Always fall back to minimal database
 | 
						|
    await createMinimalJMdictDatabase();
 | 
						|
  } finally {
 | 
						|
    isInitializing = false;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * Create a minimal JMdict database with common English-Japanese mappings
 | 
						|
 */
 | 
						|
async function createMinimalJMdictDatabase(): Promise<void> {
 | 
						|
  console.log("最小限のJMdictデータベースを作成中...");
 | 
						|
 | 
						|
  // Enhanced minimal database with more comprehensive mappings
 | 
						|
  const minimalMappings: Record<string, string[]> = {
 | 
						|
    // Technology terms
 | 
						|
    computer: ["コンピューター", "コンピュータ"],
 | 
						|
    software: ["ソフトウェア", "ソフトウエア"],
 | 
						|
    hardware: ["ハードウェア", "ハードウエア"],
 | 
						|
    internet: ["インターネット"],
 | 
						|
    website: ["ウェブサイト", "ウエブサイト"],
 | 
						|
    email: ["イーメール", "メール"],
 | 
						|
    digital: ["デジタル"],
 | 
						|
    system: ["システム"],
 | 
						|
    network: ["ネットワーク"],
 | 
						|
    server: ["サーバー", "サーバ"],
 | 
						|
    database: ["データベース"],
 | 
						|
    program: ["プログラム"],
 | 
						|
    application: ["アプリケーション", "アプリ"],
 | 
						|
 | 
						|
    // Common words
 | 
						|
    hello: ["ハロー", "ハロ"],
 | 
						|
    world: ["ワールド"],
 | 
						|
    news: ["ニュース"],
 | 
						|
    business: ["ビジネス"],
 | 
						|
    service: ["サービス"],
 | 
						|
    project: ["プロジェクト"],
 | 
						|
    team: ["チーム"],
 | 
						|
    meeting: ["ミーティング"],
 | 
						|
    coffee: ["コーヒー"],
 | 
						|
    restaurant: ["レストラン"],
 | 
						|
    hotel: ["ホテル"],
 | 
						|
    music: ["ミュージック"],
 | 
						|
    game: ["ゲーム"],
 | 
						|
    sport: ["スポーツ"],
 | 
						|
    travel: ["トラベル"],
 | 
						|
  };
 | 
						|
 | 
						|
  // Create a mock database that searches the minimal mappings
 | 
						|
  const mockDb = {
 | 
						|
    get: async (key: string) => {
 | 
						|
      if (key === "raw/dictDate") return "2024-01-01";
 | 
						|
      if (key === "raw/version") return "3.1.0-minimal";
 | 
						|
      throw new Error("Key not found");
 | 
						|
    },
 | 
						|
    createValueStream: () =>
 | 
						|
      ({
 | 
						|
        on: () => ({}),
 | 
						|
      }) as any,
 | 
						|
  } as any;
 | 
						|
 | 
						|
  jmdictDb = {
 | 
						|
    db: mockDb,
 | 
						|
    dictDate: "2024-01-01",
 | 
						|
    version: "3.1.0-minimal",
 | 
						|
  };
 | 
						|
 | 
						|
  // Override the search function to use our minimal mappings
 | 
						|
  global.minimalJMdictMappings = minimalMappings;
 | 
						|
 | 
						|
  console.log("最小限のJMdictデータベース作成完了");
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * Search for English words in JMdict and get their katakana readings
 | 
						|
 */
 | 
						|
export async function searchEnglishToKatakana(
 | 
						|
  englishWord: string,
 | 
						|
): Promise<string[]> {
 | 
						|
  if (!jmdictDb) {
 | 
						|
    await initializeJMdict();
 | 
						|
  }
 | 
						|
 | 
						|
  if (!jmdictDb) {
 | 
						|
    return [];
 | 
						|
  }
 | 
						|
 | 
						|
  try {
 | 
						|
    // If using minimal database, check our mappings first
 | 
						|
    const minimalMappings = (global as any).minimalJMdictMappings;
 | 
						|
    if (minimalMappings) {
 | 
						|
      const lowerWord = englishWord.toLowerCase();
 | 
						|
      if (minimalMappings[lowerWord]) {
 | 
						|
        return minimalMappings[lowerWord].filter((reading) =>
 | 
						|
          isKatakana(reading),
 | 
						|
        );
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
    // Search for the English word in various ways
 | 
						|
    const searchTerms = [
 | 
						|
      englishWord.toLowerCase(),
 | 
						|
      englishWord.toUpperCase(),
 | 
						|
      englishWord.charAt(0).toUpperCase() + englishWord.slice(1).toLowerCase(),
 | 
						|
    ];
 | 
						|
 | 
						|
    const katakanaReadings: Set<string> = new Set();
 | 
						|
 | 
						|
    for (const term of searchTerms) {
 | 
						|
      try {
 | 
						|
        // Search by reading (kana) - this might catch loanwords
 | 
						|
        const readingResults = await readingAnywhere(jmdictDb.db, term, 10);
 | 
						|
        for (const word of readingResults) {
 | 
						|
          // Extract katakana readings
 | 
						|
          for (const kana of word.kana) {
 | 
						|
            if (isKatakana(kana.text)) {
 | 
						|
              katakanaReadings.add(kana.text);
 | 
						|
            }
 | 
						|
          }
 | 
						|
        }
 | 
						|
      } catch (searchError) {
 | 
						|
        console.warn(`JMdict search failed for term "${term}":`, searchError);
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
    return Array.from(katakanaReadings);
 | 
						|
  } catch (error) {
 | 
						|
    console.error("JMdict英語→カタカナ変換エラー:", error);
 | 
						|
    return [];
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * Check if a string contains katakana characters
 | 
						|
 */
 | 
						|
function isKatakana(text: string): boolean {
 | 
						|
  return /[\u30A0-\u30FF]/.test(text);
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * Enhanced English to Katakana conversion using JMdict + fallback methods
 | 
						|
 */
 | 
						|
export async function convertEnglishToKatakanaWithJMdict(
 | 
						|
  englishWord: string,
 | 
						|
): Promise<string> {
 | 
						|
  // First try JMdict
 | 
						|
  const jmdictResults = await searchEnglishToKatakana(englishWord);
 | 
						|
 | 
						|
  if (jmdictResults.length > 0) {
 | 
						|
    return jmdictResults[0];
 | 
						|
  }
 | 
						|
 | 
						|
  // Fallback to enhanced phonetic conversion
 | 
						|
  return englishWord;
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * Enhanced phonetic English to Katakana conversion
 | 
						|
 */
 | 
						|
function convertEnglishToKatakanaPhonetic(word: string): string {
 | 
						|
  const lowerWord = word.toLowerCase();
 | 
						|
 | 
						|
  // Enhanced common word mappings (same as your original)
 | 
						|
  const commonWords: Record<string, string> = {
 | 
						|
    // Technology
 | 
						|
    computer: "コンピューター",
 | 
						|
    software: "ソフトウェア",
 | 
						|
    hardware: "ハードウェア",
 | 
						|
    internet: "インターネット",
 | 
						|
    website: "ウェブサイト",
 | 
						|
    email: "イーメール",
 | 
						|
    digital: "デジタル",
 | 
						|
    technology: "テクノロジー",
 | 
						|
    programming: "プログラミング",
 | 
						|
    algorithm: "アルゴリズム",
 | 
						|
    database: "データベース",
 | 
						|
    server: "サーバー",
 | 
						|
    client: "クライアント",
 | 
						|
    network: "ネットワーク",
 | 
						|
    security: "セキュリティ",
 | 
						|
    password: "パスワード",
 | 
						|
    login: "ログイン",
 | 
						|
    logout: "ログアウト",
 | 
						|
    download: "ダウンロード",
 | 
						|
    upload: "アップロード",
 | 
						|
 | 
						|
    // Common English words
 | 
						|
    hello: "ハロー",
 | 
						|
    world: "ワールド",
 | 
						|
    news: "ニュース",
 | 
						|
    business: "ビジネス",
 | 
						|
    service: "サービス",
 | 
						|
    system: "システム",
 | 
						|
    management: "マネジメント",
 | 
						|
    project: "プロジェクト",
 | 
						|
    team: "チーム",
 | 
						|
    meeting: "ミーティング",
 | 
						|
    presentation: "プレゼンテーション",
 | 
						|
    report: "レポート",
 | 
						|
    analysis: "アナリシス",
 | 
						|
    marketing: "マーケティング",
 | 
						|
    strategy: "ストラテジー",
 | 
						|
    solution: "ソリューション",
 | 
						|
    development: "デベロップメント",
 | 
						|
    innovation: "イノベーション",
 | 
						|
    design: "デザイン",
 | 
						|
    product: "プロダクト",
 | 
						|
    quality: "クオリティ",
 | 
						|
    performance: "パフォーマンス",
 | 
						|
    efficiency: "エフィシエンシー",
 | 
						|
 | 
						|
    // Food and daily life
 | 
						|
    coffee: "コーヒー",
 | 
						|
    restaurant: "レストラン",
 | 
						|
    hotel: "ホテル",
 | 
						|
    shopping: "ショッピング",
 | 
						|
    fashion: "ファッション",
 | 
						|
    music: "ミュージック",
 | 
						|
    movie: "ムービー",
 | 
						|
    game: "ゲーム",
 | 
						|
    sport: "スポーツ",
 | 
						|
    travel: "トラベル",
 | 
						|
    vacation: "バケーション",
 | 
						|
    holiday: "ホリデー",
 | 
						|
  };
 | 
						|
 | 
						|
  if (commonWords[lowerWord]) {
 | 
						|
    return commonWords[lowerWord];
 | 
						|
  }
 | 
						|
 | 
						|
  // Enhanced phonetic mapping rules (same as your original)
 | 
						|
  let result = "";
 | 
						|
  let i = 0;
 | 
						|
 | 
						|
  while (i < lowerWord.length) {
 | 
						|
    const char = lowerWord[i];
 | 
						|
    const nextChar = i + 1 < lowerWord.length ? lowerWord[i + 1] : "";
 | 
						|
 | 
						|
    if (char === "c" && nextChar === "h") {
 | 
						|
      result += "チ";
 | 
						|
      i += 2;
 | 
						|
    } else if (char === "s" && nextChar === "h") {
 | 
						|
      result += "シ";
 | 
						|
      i += 2;
 | 
						|
    } else if (char === "t" && nextChar === "h") {
 | 
						|
      result += "ス";
 | 
						|
      i += 2;
 | 
						|
    } else if (char === "p" && nextChar === "h") {
 | 
						|
      result += "フ";
 | 
						|
      i += 2;
 | 
						|
    } else if (char === "c" && nextChar === "k") {
 | 
						|
      result += "ク";
 | 
						|
      i += 2;
 | 
						|
    } else if (char === "n" && nextChar === "g") {
 | 
						|
      result += "ング";
 | 
						|
      i += 2;
 | 
						|
    } else if (char === "q" && nextChar === "u") {
 | 
						|
      result += "クワ";
 | 
						|
      i += 2;
 | 
						|
    } else {
 | 
						|
      const phoneticMap: Record<string, string> = {
 | 
						|
        a: "ア",
 | 
						|
        e: "エ",
 | 
						|
        i: "イ",
 | 
						|
        o: "オ",
 | 
						|
        u: "ウ",
 | 
						|
        b: "ブ",
 | 
						|
        c: "ク",
 | 
						|
        d: "ド",
 | 
						|
        f: "フ",
 | 
						|
        g: "グ",
 | 
						|
        h: "ハ",
 | 
						|
        j: "ジ",
 | 
						|
        k: "ク",
 | 
						|
        l: "ル",
 | 
						|
        m: "ム",
 | 
						|
        n: "ン",
 | 
						|
        p: "プ",
 | 
						|
        r: "ル",
 | 
						|
        s: "ス",
 | 
						|
        t: "ト",
 | 
						|
        v: "ブ",
 | 
						|
        w: "ワ",
 | 
						|
        x: "クス",
 | 
						|
        y: "ワイ",
 | 
						|
        z: "ズ",
 | 
						|
      };
 | 
						|
 | 
						|
      result += phoneticMap[char] ?? char;
 | 
						|
      i += 1;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return result;
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * Check if JMdict is initialized and available
 | 
						|
 */
 | 
						|
export function isJMdictInitialized(): boolean {
 | 
						|
  return jmdictDb !== null;
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * Get JMdict database information
 | 
						|
 */
 | 
						|
export function getJMdictInfo(): { dictDate: string; version: string } | null {
 | 
						|
  if (!jmdictDb) {
 | 
						|
    return null;
 | 
						|
  }
 | 
						|
 | 
						|
  return {
 | 
						|
    dictDate: jmdictDb.dictDate,
 | 
						|
    version: jmdictDb.version,
 | 
						|
  };
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * Force cleanup and re-initialization of the database
 | 
						|
 * Useful for troubleshooting LevelDB issues
 | 
						|
 */
 | 
						|
export async function forceReinitializeJMdict(): Promise<void> {
 | 
						|
  console.log("JMdictデータベースの強制再初期化中...");
 | 
						|
 | 
						|
  // Reset state
 | 
						|
  jmdictDb = null;
 | 
						|
  isInitializing = false;
 | 
						|
 | 
						|
  // Clean up existing database
 | 
						|
  await cleanupDatabase(JMDICT_DB_PATH);
 | 
						|
 | 
						|
  // Re-initialize
 | 
						|
  await initializeJMdict();
 | 
						|
 | 
						|
  console.log("JMdictデータベースの強制再初期化完了");
 | 
						|
}
 |