import Kuroshiro from "kuroshiro"; import KuroshiroAnalyzerMecab from "kuroshiro-analyzer-mecab"; import { toKatakana } from "wanakana"; // Global instance to avoid recreating the analyzer let kuroshiroInstance: Kuroshiro | null = null; // Basic English to Katakana mapping for common words const englishToKatakanaMap: Record = { "hello": "ハロー", "world": "ワールド", "this": "ディス", "is": "イズ", "a": "ア", "test": "テスト", "javascript": "ジャバスクリプト", "typescript": "タイプスクリプト", "and": "アンド", "api": "エーピーアイ", "endpoint": "エンドポイント", "machine": "マシン", "learning": "ラーニング", "model": "モデル", "analysis": "アナリシス", "computer": "コンピューター", "data": "データ", "software": "ソフトウェア", "program": "プログラム", "system": "システム", "network": "ネットワーク", "server": "サーバー", "client": "クライアント", "database": "データベース", "file": "ファイル", "user": "ユーザー", "password": "パスワード", "login": "ログイン", "logout": "ログアウト", "website": "ウェブサイト", "browser": "ブラウザー", "application": "アプリケーション", "service": "サービス" }; /** * Convert English word to Katakana using predefined mapping or phonetic approximation */ function convertEnglishWordToKatakana(word: string): string { const lowerWord = word.toLowerCase(); // Check predefined mapping first if (englishToKatakanaMap[lowerWord]) { return englishToKatakanaMap[lowerWord]; } // Try using wanakana for romanized pronunciation try { // Convert to a rough romanized version and then to katakana const katakana = toKatakana(word.toLowerCase()); if (katakana && katakana !== word.toLowerCase()) { return katakana; } } catch { // Fallback if wanakana fails } // Fallback: simple phonetic approximation return approximateEnglishToKatakana(word); } /** * Simple phonetic approximation for English to Katakana */ function approximateEnglishToKatakana(word: string): string { const phoneticMap: Record = { 'a': 'ア', 'b': 'ブ', 'c': 'ク', 'd': 'ド', 'e': 'エ', 'f': 'フ', 'g': 'グ', 'h': 'ハ', 'i': 'イ', 'j': 'ジ', 'k': 'ク', 'l': 'ル', 'm': 'ム', 'n': 'ン', 'o': 'オ', 'p': 'プ', 'q': 'ク', 'r': 'ル', 's': 'ス', 't': 'ト', 'u': 'ウ', 'v': 'ブ', 'w': 'ワ', 'x': 'クス', 'y': 'ワイ', 'z': 'ズ' }; return word.toLowerCase() .split('') .map(char => phoneticMap[char] || char) .join(''); } /** * Initialize kuroshiro with MeCab analyzer * This should be called once during application startup */ export async function initializeTextConverter(): Promise { if (kuroshiroInstance) { return; // Already initialized } try { console.log("Kuroshiroテキストコンバーターを初期化中..."); kuroshiroInstance = new Kuroshiro(); await kuroshiroInstance.init(new KuroshiroAnalyzerMecab()); console.log("Kuroshiroテキストコンバーター初期化完了"); } catch (error) { console.error("Kuroshiroの初期化に失敗しました:", error); throw error; } } /** * Convert English words and mixed text to katakana * @param text - Input text (may contain Japanese, English, and other characters) * @returns Text with English words converted to katakana */ export async function convertEnglishToKatakana(text: string): Promise { if (!kuroshiroInstance) { await initializeTextConverter(); } if (!kuroshiroInstance) { throw new Error("Failed to initialize kuroshiro"); } try { // Convert the entire text to katakana // This will convert both Japanese hiragana and English words to katakana const convertedText = await kuroshiroInstance.convert(text, { to: "katakana", mode: "normal", }); return convertedText; } catch (error) { console.error("テキスト変換エラー:", error); // Return original text if conversion fails return text; } } /** * Convert only English words to katakana while preserving Japanese text * This is a more selective approach that tries to preserve Japanese characters * @param text - Input text * @returns Text with only English words converted to katakana */ export async function convertEnglishWordsOnly(text: string): Promise { if (!kuroshiroInstance) { await initializeTextConverter(); } if (!kuroshiroInstance) { throw new Error("Failed to initialize kuroshiro"); } try { // Extract English words using regex const englishWordPattern = /\b[a-zA-Z]+\b/g; let result = text; // Find all English words const matches = text.match(englishWordPattern); if (matches) { for (const englishWord of matches) { try { // Convert each English word to katakana using our custom function const converted = convertEnglishWordToKatakana(englishWord); // Replace the English word with its katakana equivalent // Use word boundary to avoid partial replacements const wordRegex = new RegExp(`\\b${englishWord.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`, 'g'); result = result.replace(wordRegex, converted); } catch (convertError) { console.warn(`Failed to convert word "${englishWord}":`, convertError); // Keep original word if conversion fails } } } return result; } catch (error) { console.error("選択的テキスト変換エラー:", error); // Fallback to full conversion return convertEnglishToKatakana(text); } } /** * Check if kuroshiro is initialized */ export function isTextConverterInitialized(): boolean { return kuroshiroInstance !== null; }