import Kuroshiro from "kuroshiro"; import KuroshiroAnalyzerMecab from "kuroshiro-analyzer-mecab"; import { toKatakana } from "wanakana"; import { convertEnglishToKatakanaWithJMdict, getJMdictInfo, initializeJMdict, isJMdictInitialized, } from "./jmdict.js"; // Global instance to avoid recreating the analyzer let kuroshiroInstance: Kuroshiro | null = null; // Basic English to Katakana mapping for common words const englishToKatakanaMap: Record = { hello: "ハロー", world: "ワールド", this: "ディス", is: "イズ", a: "ア", test: "テスト", javascript: "ジャバスクリプト", typescript: "タイプスクリプト", and: "アンド", api: "エーピーアイ", endpoint: "エンドポイント", machine: "マシン", learning: "ラーニング", model: "モデル", analysis: "アナリシス", computer: "コンピューター", data: "データ", software: "ソフトウェア", program: "プログラム", system: "システム", network: "ネットワーク", server: "サーバー", client: "クライアント", database: "データベース", file: "ファイル", user: "ユーザー", password: "パスワード", login: "ログイン", logout: "ログアウト", website: "ウェブサイト", browser: "ブラウザー", application: "アプリケーション", service: "サービス", }; /** * Convert English word to Katakana using JMdict, predefined mapping, or phonetic approximation */ async function convertEnglishWordToKatakana(word: string): Promise { const lowerWord = word.toLowerCase(); // First try JMdict if available try { if (isJMdictInitialized()) { const jmdictResult = await convertEnglishToKatakanaWithJMdict(word); if (jmdictResult && jmdictResult !== word) { return jmdictResult; } } } catch (error) { console.warn(`JMdict conversion failed for "${word}":`, error); } // Check predefined mapping second if (englishToKatakanaMap[lowerWord]) { return englishToKatakanaMap[lowerWord]; } // Try using wanakana for romanized pronunciation try { // Convert to a rough romanized version and then to katakana const katakana = toKatakana(word.toLowerCase()); if (katakana && katakana !== word.toLowerCase()) { return katakana; } } catch { // Fallback if wanakana fails } // Fallback: simple phonetic approximation return approximateEnglishToKatakana(word); } /** * Simple phonetic approximation for English to Katakana */ function approximateEnglishToKatakana(word: string): string { const phoneticMap: Record = { a: "ア", b: "ブ", c: "ク", d: "ド", e: "エ", f: "フ", g: "グ", h: "ハ", i: "イ", j: "ジ", k: "ク", l: "ル", m: "ム", n: "ン", o: "オ", p: "プ", q: "ク", r: "ル", s: "ス", t: "ト", u: "ウ", v: "ブ", w: "ワ", x: "クス", y: "ワイ", z: "ズ", }; return word .toLowerCase() .split("") .map((char) => phoneticMap[char] || char) .join(""); } /** * Initialize kuroshiro with MeCab analyzer and JMdict * This should be called once during application startup */ export async function initializeTextConverter(): Promise { if (kuroshiroInstance && isJMdictInitialized()) { return; // Already initialized } try { console.log("テキストコンバーターを初期化中..."); // Initialize Kuroshiro if not already done if (!kuroshiroInstance) { console.log("Kuroshiroを初期化中..."); kuroshiroInstance = new Kuroshiro(); await kuroshiroInstance.init(new KuroshiroAnalyzerMecab()); console.log("Kuroshiro初期化完了"); } // Initialize JMdict if not already done if (!isJMdictInitialized()) { console.log("JMdictを初期化中..."); await initializeJMdict(); console.log("JMdict初期化完了"); const jmdictInfo = getJMdictInfo(); if (jmdictInfo) { console.log( `JMdict情報: バージョン ${jmdictInfo.version}, 辞書日付 ${jmdictInfo.dictDate}`, ); } } console.log("テキストコンバーター初期化完了"); } catch (error) { console.error("テキストコンバーターの初期化に失敗しました:", error); throw error; } } /** * Convert English words and mixed text to katakana * @param text - Input text (may contain Japanese, English, and other characters) * @returns Text with English words converted to katakana */ export async function convertEnglishToKatakana(text: string): Promise { if (!kuroshiroInstance) { await initializeTextConverter(); } if (!kuroshiroInstance) { throw new Error("Failed to initialize kuroshiro"); } try { // Convert the entire text to katakana // This will convert both Japanese hiragana and English words to katakana const convertedText = await kuroshiroInstance.convert(text, { to: "katakana", mode: "normal", }); return convertedText; } catch (error) { console.error("テキスト変換エラー:", error); // Return original text if conversion fails return text; } } /** * Convert only English words to katakana while preserving Japanese text * This is a more selective approach that tries to preserve Japanese characters * @param text - Input text * @returns Text with only English words converted to katakana */ export async function convertEnglishWordsOnly(text: string): Promise { if (!kuroshiroInstance) { await initializeTextConverter(); } if (!kuroshiroInstance) { throw new Error("Failed to initialize kuroshiro"); } try { // Extract English words using regex const englishWordPattern = /\b[a-zA-Z]+\b/g; let result = text; // Find all English words const matches = text.match(englishWordPattern); if (matches) { for (const englishWord of matches) { try { // Convert each English word to katakana using our custom function const converted = await convertEnglishWordToKatakana(englishWord); // Replace the English word with its katakana equivalent // Use word boundary to avoid partial replacements const wordRegex = new RegExp( `\\b${englishWord.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`, "g", ); result = result.replace(wordRegex, converted); } catch (convertError) { console.warn( `Failed to convert word "${englishWord}":`, convertError, ); // Keep original word if conversion fails } } } return result; } catch (error) { console.error("選択的テキスト変換エラー:", error); // Fallback to full conversion return convertEnglishToKatakana(text); } } /** * Check if text converter (kuroshiro and JMdict) is fully initialized */ export function isTextConverterInitialized(): boolean { return kuroshiroInstance !== null && isJMdictInitialized(); } /** * Check if only kuroshiro is initialized (backward compatibility) */ export function isKuroshiroInitialized(): boolean { return kuroshiroInstance !== null; } /** * Get text converter status information */ export function getTextConverterInfo(): { kuroshiro: boolean; jmdict: boolean; jmdictInfo: { dictDate: string; version: string } | null; } { return { kuroshiro: kuroshiroInstance !== null, jmdict: isJMdictInitialized(), jmdictInfo: getJMdictInfo(), }; } /** * Convert English words to Katakana using JMdict with enhanced fallback * This is the main function that leverages JMdict for accurate conversions * @param text - Input text containing English words * @returns Text with English words converted to Katakana using JMdict */ export async function convertEnglishToKatakanaWithJMdictFallback( text: string, ): Promise { if (!isJMdictInitialized()) { await initializeJMdict(); } try { // Extract English words using regex const englishWordPattern = /\b[a-zA-Z]+\b/g; let result = text; // Find all English words const matches = text.match(englishWordPattern); if (matches) { // Process each unique word to avoid duplicate conversions const uniqueWords = [...new Set(matches)]; for (const englishWord of uniqueWords) { try { // Convert using JMdict-enhanced function const converted = await convertEnglishToKatakanaWithJMdict(englishWord); // Replace all occurrences of this English word with its katakana equivalent const wordRegex = new RegExp( `\\b${englishWord.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`, "g", ); result = result.replace(wordRegex, converted); } catch (convertError) { console.warn( `Failed to convert word "${englishWord}":`, convertError, ); // Keep original word if conversion fails } } } return result; } catch (error) { console.error("JMdict-based English to Katakana conversion error:", error); // Fallback to the original method if JMdict conversion fails return convertEnglishWordsOnly(text); } }