337 lines
		
	
	
		
			9.2 KiB
		
	
	
	
		
			TypeScript
		
	
	
	
	
	
			
		
		
	
	
			337 lines
		
	
	
		
			9.2 KiB
		
	
	
	
		
			TypeScript
		
	
	
	
	
	
import Kuroshiro from "kuroshiro";
 | 
						|
import KuroshiroAnalyzerMecab from "kuroshiro-analyzer-mecab";
 | 
						|
import { toKatakana } from "wanakana";
 | 
						|
import {
 | 
						|
  convertEnglishToKatakanaWithJMdict,
 | 
						|
  getJMdictInfo,
 | 
						|
  initializeJMdict,
 | 
						|
  isJMdictInitialized,
 | 
						|
} from "./jmdict.js";
 | 
						|
 | 
						|
// Global instance to avoid recreating the analyzer
 | 
						|
let kuroshiroInstance: Kuroshiro | null = null;
 | 
						|
 | 
						|
// Basic English to Katakana mapping for common words
 | 
						|
const englishToKatakanaMap: Record<string, string> = {
 | 
						|
  hello: "ハロー",
 | 
						|
  world: "ワールド",
 | 
						|
  this: "ディス",
 | 
						|
  is: "イズ",
 | 
						|
  a: "ア",
 | 
						|
  test: "テスト",
 | 
						|
  javascript: "ジャバスクリプト",
 | 
						|
  typescript: "タイプスクリプト",
 | 
						|
  and: "アンド",
 | 
						|
  api: "エーピーアイ",
 | 
						|
  endpoint: "エンドポイント",
 | 
						|
  machine: "マシン",
 | 
						|
  learning: "ラーニング",
 | 
						|
  model: "モデル",
 | 
						|
  analysis: "アナリシス",
 | 
						|
  computer: "コンピューター",
 | 
						|
  data: "データ",
 | 
						|
  software: "ソフトウェア",
 | 
						|
  program: "プログラム",
 | 
						|
  system: "システム",
 | 
						|
  network: "ネットワーク",
 | 
						|
  server: "サーバー",
 | 
						|
  client: "クライアント",
 | 
						|
  database: "データベース",
 | 
						|
  file: "ファイル",
 | 
						|
  user: "ユーザー",
 | 
						|
  password: "パスワード",
 | 
						|
  login: "ログイン",
 | 
						|
  logout: "ログアウト",
 | 
						|
  website: "ウェブサイト",
 | 
						|
  browser: "ブラウザー",
 | 
						|
  application: "アプリケーション",
 | 
						|
  service: "サービス",
 | 
						|
};
 | 
						|
 | 
						|
/**
 | 
						|
 * Convert English word to Katakana using JMdict, predefined mapping, or phonetic approximation
 | 
						|
 */
 | 
						|
async function convertEnglishWordToKatakana(word: string): Promise<string> {
 | 
						|
  const lowerWord = word.toLowerCase();
 | 
						|
 | 
						|
  // First try JMdict if available
 | 
						|
  try {
 | 
						|
    if (isJMdictInitialized()) {
 | 
						|
      const jmdictResult = await convertEnglishToKatakanaWithJMdict(word);
 | 
						|
      if (jmdictResult && jmdictResult !== word) {
 | 
						|
        return jmdictResult;
 | 
						|
      }
 | 
						|
    }
 | 
						|
  } catch (error) {
 | 
						|
    console.warn(`JMdict conversion failed for "${word}":`, error);
 | 
						|
  }
 | 
						|
 | 
						|
  // Check predefined mapping second
 | 
						|
  if (englishToKatakanaMap[lowerWord]) {
 | 
						|
    return englishToKatakanaMap[lowerWord];
 | 
						|
  }
 | 
						|
 | 
						|
  // Try using wanakana for romanized pronunciation
 | 
						|
  try {
 | 
						|
    // Convert to a rough romanized version and then to katakana
 | 
						|
    const katakana = toKatakana(word.toLowerCase());
 | 
						|
    if (katakana && katakana !== word.toLowerCase()) {
 | 
						|
      return katakana;
 | 
						|
    }
 | 
						|
  } catch {
 | 
						|
    // Fallback if wanakana fails
 | 
						|
  }
 | 
						|
 | 
						|
  // Fallback: simple phonetic approximation
 | 
						|
  return approximateEnglishToKatakana(word);
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * Simple phonetic approximation for English to Katakana
 | 
						|
 */
 | 
						|
function approximateEnglishToKatakana(word: string): string {
 | 
						|
  const phoneticMap: Record<string, string> = {
 | 
						|
    a: "ア",
 | 
						|
    b: "ブ",
 | 
						|
    c: "ク",
 | 
						|
    d: "ド",
 | 
						|
    e: "エ",
 | 
						|
    f: "フ",
 | 
						|
    g: "グ",
 | 
						|
    h: "ハ",
 | 
						|
    i: "イ",
 | 
						|
    j: "ジ",
 | 
						|
    k: "ク",
 | 
						|
    l: "ル",
 | 
						|
    m: "ム",
 | 
						|
    n: "ン",
 | 
						|
    o: "オ",
 | 
						|
    p: "プ",
 | 
						|
    q: "ク",
 | 
						|
    r: "ル",
 | 
						|
    s: "ス",
 | 
						|
    t: "ト",
 | 
						|
    u: "ウ",
 | 
						|
    v: "ブ",
 | 
						|
    w: "ワ",
 | 
						|
    x: "クス",
 | 
						|
    y: "ワイ",
 | 
						|
    z: "ズ",
 | 
						|
  };
 | 
						|
 | 
						|
  return word
 | 
						|
    .toLowerCase()
 | 
						|
    .split("")
 | 
						|
    .map((char) => phoneticMap[char] || char)
 | 
						|
    .join("");
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * Initialize kuroshiro with MeCab analyzer and JMdict
 | 
						|
 * This should be called once during application startup
 | 
						|
 */
 | 
						|
export async function initializeTextConverter(): Promise<void> {
 | 
						|
  if (kuroshiroInstance && isJMdictInitialized()) {
 | 
						|
    return; // Already initialized
 | 
						|
  }
 | 
						|
 | 
						|
  try {
 | 
						|
    console.log("テキストコンバーターを初期化中...");
 | 
						|
 | 
						|
    // Initialize Kuroshiro if not already done
 | 
						|
    if (!kuroshiroInstance) {
 | 
						|
      console.log("Kuroshiroを初期化中...");
 | 
						|
      kuroshiroInstance = new Kuroshiro();
 | 
						|
      await kuroshiroInstance.init(new KuroshiroAnalyzerMecab());
 | 
						|
      console.log("Kuroshiro初期化完了");
 | 
						|
    }
 | 
						|
 | 
						|
    // Initialize JMdict if not already done
 | 
						|
    if (!isJMdictInitialized()) {
 | 
						|
      console.log("JMdictを初期化中...");
 | 
						|
      await initializeJMdict();
 | 
						|
      console.log("JMdict初期化完了");
 | 
						|
 | 
						|
      const jmdictInfo = getJMdictInfo();
 | 
						|
      if (jmdictInfo) {
 | 
						|
        console.log(
 | 
						|
          `JMdict情報: バージョン ${jmdictInfo.version}, 辞書日付 ${jmdictInfo.dictDate}`,
 | 
						|
        );
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
    console.log("テキストコンバーター初期化完了");
 | 
						|
  } catch (error) {
 | 
						|
    console.error("テキストコンバーターの初期化に失敗しました:", error);
 | 
						|
    throw error;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * Convert English words and mixed text to katakana
 | 
						|
 * @param text - Input text (may contain Japanese, English, and other characters)
 | 
						|
 * @returns Text with English words converted to katakana
 | 
						|
 */
 | 
						|
export async function convertEnglishToKatakana(text: string): Promise<string> {
 | 
						|
  if (!kuroshiroInstance) {
 | 
						|
    await initializeTextConverter();
 | 
						|
  }
 | 
						|
 | 
						|
  if (!kuroshiroInstance) {
 | 
						|
    throw new Error("Failed to initialize kuroshiro");
 | 
						|
  }
 | 
						|
 | 
						|
  try {
 | 
						|
    // Convert the entire text to katakana
 | 
						|
    // This will convert both Japanese hiragana and English words to katakana
 | 
						|
    const convertedText = await kuroshiroInstance.convert(text, {
 | 
						|
      to: "katakana",
 | 
						|
      mode: "normal",
 | 
						|
    });
 | 
						|
 | 
						|
    return convertedText;
 | 
						|
  } catch (error) {
 | 
						|
    console.error("テキスト変換エラー:", error);
 | 
						|
    // Return original text if conversion fails
 | 
						|
    return text;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * Convert only English words to katakana while preserving Japanese text
 | 
						|
 * This is a more selective approach that tries to preserve Japanese characters
 | 
						|
 * @param text - Input text
 | 
						|
 * @returns Text with only English words converted to katakana
 | 
						|
 */
 | 
						|
export async function convertEnglishWordsOnly(text: string): Promise<string> {
 | 
						|
  if (!kuroshiroInstance) {
 | 
						|
    await initializeTextConverter();
 | 
						|
  }
 | 
						|
 | 
						|
  if (!kuroshiroInstance) {
 | 
						|
    throw new Error("Failed to initialize kuroshiro");
 | 
						|
  }
 | 
						|
 | 
						|
  try {
 | 
						|
    // Extract English words using regex
 | 
						|
    const englishWordPattern = /\b[a-zA-Z]+\b/g;
 | 
						|
    let result = text;
 | 
						|
 | 
						|
    // Find all English words
 | 
						|
    const matches = text.match(englishWordPattern);
 | 
						|
 | 
						|
    if (matches) {
 | 
						|
      for (const englishWord of matches) {
 | 
						|
        try {
 | 
						|
          // Convert each English word to katakana using our custom function
 | 
						|
          const converted = await convertEnglishWordToKatakana(englishWord);
 | 
						|
 | 
						|
          // Replace the English word with its katakana equivalent
 | 
						|
          // Use word boundary to avoid partial replacements
 | 
						|
          const wordRegex = new RegExp(
 | 
						|
            `\\b${englishWord.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`,
 | 
						|
            "g",
 | 
						|
          );
 | 
						|
          result = result.replace(wordRegex, converted);
 | 
						|
        } catch (convertError) {
 | 
						|
          console.warn(
 | 
						|
            `Failed to convert word "${englishWord}":`,
 | 
						|
            convertError,
 | 
						|
          );
 | 
						|
          // Keep original word if conversion fails
 | 
						|
        }
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
    return result;
 | 
						|
  } catch (error) {
 | 
						|
    console.error("選択的テキスト変換エラー:", error);
 | 
						|
    // Fallback to full conversion
 | 
						|
    return convertEnglishToKatakana(text);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * Check if text converter (kuroshiro and JMdict) is fully initialized
 | 
						|
 */
 | 
						|
export function isTextConverterInitialized(): boolean {
 | 
						|
  return kuroshiroInstance !== null && isJMdictInitialized();
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * Check if only kuroshiro is initialized (backward compatibility)
 | 
						|
 */
 | 
						|
export function isKuroshiroInitialized(): boolean {
 | 
						|
  return kuroshiroInstance !== null;
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * Get text converter status information
 | 
						|
 */
 | 
						|
export function getTextConverterInfo(): {
 | 
						|
  kuroshiro: boolean;
 | 
						|
  jmdict: boolean;
 | 
						|
  jmdictInfo: { dictDate: string; version: string } | null;
 | 
						|
} {
 | 
						|
  return {
 | 
						|
    kuroshiro: kuroshiroInstance !== null,
 | 
						|
    jmdict: isJMdictInitialized(),
 | 
						|
    jmdictInfo: getJMdictInfo(),
 | 
						|
  };
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * Convert English words to Katakana using JMdict with enhanced fallback
 | 
						|
 * This is the main function that leverages JMdict for accurate conversions
 | 
						|
 * @param text - Input text containing English words
 | 
						|
 * @returns Text with English words converted to Katakana using JMdict
 | 
						|
 */
 | 
						|
export async function convertEnglishToKatakanaWithJMdictFallback(
 | 
						|
  text: string,
 | 
						|
): Promise<string> {
 | 
						|
  if (!isJMdictInitialized()) {
 | 
						|
    await initializeJMdict();
 | 
						|
  }
 | 
						|
 | 
						|
  try {
 | 
						|
    // Extract English words using regex
 | 
						|
    const englishWordPattern = /\b[a-zA-Z]+\b/g;
 | 
						|
    let result = text;
 | 
						|
 | 
						|
    // Find all English words
 | 
						|
    const matches = text.match(englishWordPattern);
 | 
						|
 | 
						|
    if (matches) {
 | 
						|
      // Process each unique word to avoid duplicate conversions
 | 
						|
      const uniqueWords = [...new Set(matches)];
 | 
						|
 | 
						|
      for (const englishWord of uniqueWords) {
 | 
						|
        try {
 | 
						|
          // Convert using JMdict-enhanced function
 | 
						|
          const converted =
 | 
						|
            await convertEnglishToKatakanaWithJMdict(englishWord);
 | 
						|
 | 
						|
          // Replace all occurrences of this English word with its katakana equivalent
 | 
						|
          const wordRegex = new RegExp(
 | 
						|
            `\\b${englishWord.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`,
 | 
						|
            "g",
 | 
						|
          );
 | 
						|
          result = result.replace(wordRegex, converted);
 | 
						|
        } catch (convertError) {
 | 
						|
          console.warn(
 | 
						|
            `Failed to convert word "${englishWord}":`,
 | 
						|
            convertError,
 | 
						|
          );
 | 
						|
          // Keep original word if conversion fails
 | 
						|
        }
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
    return result;
 | 
						|
  } catch (error) {
 | 
						|
    console.error("JMdict-based English to Katakana conversion error:", error);
 | 
						|
    // Fallback to the original method if JMdict conversion fails
 | 
						|
    return convertEnglishWordsOnly(text);
 | 
						|
  }
 | 
						|
}
 |