192 lines
5.8 KiB
TypeScript
192 lines
5.8 KiB
TypeScript
import Kuroshiro from "kuroshiro";
|
|
import KuroshiroAnalyzerMecab from "kuroshiro-analyzer-mecab";
|
|
import { toKatakana } from "wanakana";
|
|
|
|
// Global instance to avoid recreating the analyzer
|
|
let kuroshiroInstance: Kuroshiro | null = null;
|
|
|
|
// Basic English to Katakana mapping for common words
|
|
const englishToKatakanaMap: Record<string, string> = {
|
|
"hello": "ハロー",
|
|
"world": "ワールド",
|
|
"this": "ディス",
|
|
"is": "イズ",
|
|
"a": "ア",
|
|
"test": "テスト",
|
|
"javascript": "ジャバスクリプト",
|
|
"typescript": "タイプスクリプト",
|
|
"and": "アンド",
|
|
"api": "エーピーアイ",
|
|
"endpoint": "エンドポイント",
|
|
"machine": "マシン",
|
|
"learning": "ラーニング",
|
|
"model": "モデル",
|
|
"analysis": "アナリシス",
|
|
"computer": "コンピューター",
|
|
"data": "データ",
|
|
"software": "ソフトウェア",
|
|
"program": "プログラム",
|
|
"system": "システム",
|
|
"network": "ネットワーク",
|
|
"server": "サーバー",
|
|
"client": "クライアント",
|
|
"database": "データベース",
|
|
"file": "ファイル",
|
|
"user": "ユーザー",
|
|
"password": "パスワード",
|
|
"login": "ログイン",
|
|
"logout": "ログアウト",
|
|
"website": "ウェブサイト",
|
|
"browser": "ブラウザー",
|
|
"application": "アプリケーション",
|
|
"service": "サービス"
|
|
};
|
|
|
|
/**
|
|
* Convert English word to Katakana using predefined mapping or phonetic approximation
|
|
*/
|
|
function convertEnglishWordToKatakana(word: string): string {
|
|
const lowerWord = word.toLowerCase();
|
|
|
|
// Check predefined mapping first
|
|
if (englishToKatakanaMap[lowerWord]) {
|
|
return englishToKatakanaMap[lowerWord];
|
|
}
|
|
|
|
// Try using wanakana for romanized pronunciation
|
|
try {
|
|
// Convert to a rough romanized version and then to katakana
|
|
const katakana = toKatakana(word.toLowerCase());
|
|
if (katakana && katakana !== word.toLowerCase()) {
|
|
return katakana;
|
|
}
|
|
} catch {
|
|
// Fallback if wanakana fails
|
|
}
|
|
|
|
// Fallback: simple phonetic approximation
|
|
return approximateEnglishToKatakana(word);
|
|
}
|
|
|
|
/**
|
|
* Simple phonetic approximation for English to Katakana
|
|
*/
|
|
function approximateEnglishToKatakana(word: string): string {
|
|
const phoneticMap: Record<string, string> = {
|
|
'a': 'ア', 'b': 'ブ', 'c': 'ク', 'd': 'ド', 'e': 'エ',
|
|
'f': 'フ', 'g': 'グ', 'h': 'ハ', 'i': 'イ', 'j': 'ジ',
|
|
'k': 'ク', 'l': 'ル', 'm': 'ム', 'n': 'ン', 'o': 'オ',
|
|
'p': 'プ', 'q': 'ク', 'r': 'ル', 's': 'ス', 't': 'ト',
|
|
'u': 'ウ', 'v': 'ブ', 'w': 'ワ', 'x': 'クス', 'y': 'ワイ', 'z': 'ズ'
|
|
};
|
|
|
|
return word.toLowerCase()
|
|
.split('')
|
|
.map(char => phoneticMap[char] || char)
|
|
.join('');
|
|
}
|
|
|
|
/**
|
|
* Initialize kuroshiro with MeCab analyzer
|
|
* This should be called once during application startup
|
|
*/
|
|
export async function initializeTextConverter(): Promise<void> {
|
|
if (kuroshiroInstance) {
|
|
return; // Already initialized
|
|
}
|
|
|
|
try {
|
|
console.log("Kuroshiroテキストコンバーターを初期化中...");
|
|
kuroshiroInstance = new Kuroshiro();
|
|
await kuroshiroInstance.init(new KuroshiroAnalyzerMecab());
|
|
console.log("Kuroshiroテキストコンバーター初期化完了");
|
|
} catch (error) {
|
|
console.error("Kuroshiroの初期化に失敗しました:", error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Convert English words and mixed text to katakana
|
|
* @param text - Input text (may contain Japanese, English, and other characters)
|
|
* @returns Text with English words converted to katakana
|
|
*/
|
|
export async function convertEnglishToKatakana(text: string): Promise<string> {
|
|
if (!kuroshiroInstance) {
|
|
await initializeTextConverter();
|
|
}
|
|
|
|
if (!kuroshiroInstance) {
|
|
throw new Error("Failed to initialize kuroshiro");
|
|
}
|
|
|
|
try {
|
|
// Convert the entire text to katakana
|
|
// This will convert both Japanese hiragana and English words to katakana
|
|
const convertedText = await kuroshiroInstance.convert(text, {
|
|
to: "katakana",
|
|
mode: "normal",
|
|
});
|
|
|
|
return convertedText;
|
|
} catch (error) {
|
|
console.error("テキスト変換エラー:", error);
|
|
// Return original text if conversion fails
|
|
return text;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Convert only English words to katakana while preserving Japanese text
|
|
* This is a more selective approach that tries to preserve Japanese characters
|
|
* @param text - Input text
|
|
* @returns Text with only English words converted to katakana
|
|
*/
|
|
export async function convertEnglishWordsOnly(text: string): Promise<string> {
|
|
if (!kuroshiroInstance) {
|
|
await initializeTextConverter();
|
|
}
|
|
|
|
if (!kuroshiroInstance) {
|
|
throw new Error("Failed to initialize kuroshiro");
|
|
}
|
|
|
|
try {
|
|
// Extract English words using regex
|
|
const englishWordPattern = /\b[a-zA-Z]+\b/g;
|
|
let result = text;
|
|
|
|
// Find all English words
|
|
const matches = text.match(englishWordPattern);
|
|
|
|
if (matches) {
|
|
for (const englishWord of matches) {
|
|
try {
|
|
// Convert each English word to katakana using our custom function
|
|
const converted = convertEnglishWordToKatakana(englishWord);
|
|
|
|
// Replace the English word with its katakana equivalent
|
|
// Use word boundary to avoid partial replacements
|
|
const wordRegex = new RegExp(`\\b${englishWord.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`, 'g');
|
|
result = result.replace(wordRegex, converted);
|
|
} catch (convertError) {
|
|
console.warn(`Failed to convert word "${englishWord}":`, convertError);
|
|
// Keep original word if conversion fails
|
|
}
|
|
}
|
|
}
|
|
|
|
return result;
|
|
} catch (error) {
|
|
console.error("選択的テキスト変換エラー:", error);
|
|
// Fallback to full conversion
|
|
return convertEnglishToKatakana(text);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if kuroshiro is initialized
|
|
*/
|
|
export function isTextConverterInitialized(): boolean {
|
|
return kuroshiroInstance !== null;
|
|
} |