Add English to Katakana and Romaji conversion support
This commit is contained in:
@ -22,6 +22,12 @@ FROM oven/bun:latest AS runtime
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install MeCab for English to Katakana conversion
|
||||
RUN apt-get update && \
|
||||
apt-get install -y mecab mecab-ipadic-utf8 libmecab-dev && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy built application from builder stage
|
||||
COPY --from=builder /app .
|
||||
|
||||
|
34
README.md
34
README.md
@ -15,22 +15,46 @@ Voice RSS Summary automatically collects RSS feeds, summarizes new articles usin
|
||||
- [Bun](https://bun.sh/) 1.x (if running locally)
|
||||
- VOICEVOX engine
|
||||
- An OpenAI API key
|
||||
- MeCab (for English to Katakana conversion)
|
||||
|
||||
## Installation
|
||||
|
||||
1. Install dependencies:
|
||||
1. Install MeCab (for English to Katakana conversion):
|
||||
|
||||
**macOS (using Homebrew):**
|
||||
```bash
|
||||
brew install mecab mecab-ipadic
|
||||
```
|
||||
|
||||
**Ubuntu/Debian:**
|
||||
```bash
|
||||
sudo apt-get update
|
||||
sudo apt-get install mecab mecab-ipadic-utf8 libmecab-dev
|
||||
```
|
||||
|
||||
**CentOS/RHEL/Fedora:**
|
||||
```bash
|
||||
sudo yum install mecab mecab-ipadic mecab-devel
|
||||
# or for newer versions:
|
||||
sudo dnf install mecab mecab-ipadic mecab-devel
|
||||
```
|
||||
|
||||
**Manual installation:**
|
||||
See the setup script: `scripts/setup-mecab.sh`
|
||||
|
||||
2. Install dependencies:
|
||||
|
||||
```bash
|
||||
bun install
|
||||
```
|
||||
|
||||
2. Build the front end:
|
||||
3. Build the front end:
|
||||
|
||||
```bash
|
||||
bun run build:frontend
|
||||
```
|
||||
|
||||
3. Create a `.env` file and set the following variables:
|
||||
4. Create a `.env` file and set the following variables:
|
||||
|
||||
```env
|
||||
OPENAI_API_KEY=your-api-key
|
||||
@ -49,9 +73,9 @@ Voice RSS Summary automatically collects RSS feeds, summarizes new articles usin
|
||||
FEED_URLS_FILE=feed_urls.txt
|
||||
```
|
||||
|
||||
4. Prepare `feed_urls.txt` with one RSS feed URL per line.
|
||||
5. Prepare `feed_urls.txt` with one RSS feed URL per line.
|
||||
|
||||
5. Start the server:
|
||||
6. Start the server:
|
||||
|
||||
```bash
|
||||
bun run server.ts
|
||||
|
15
bun.lock
15
bun.lock
@ -10,6 +10,8 @@
|
||||
"cheerio": "^1.0.0",
|
||||
"ffmpeg-static": "^5.2.0",
|
||||
"hono": "^4.7.11",
|
||||
"kuroshiro": "^1.2.0",
|
||||
"kuroshiro-analyzer-mecab": "^1.0.1",
|
||||
"openai": "^4.104.0",
|
||||
"puppeteer": "^24.10.0",
|
||||
"react": "^19.1.0",
|
||||
@ -17,6 +19,7 @@
|
||||
"react-helmet-async": "^2.0.5",
|
||||
"react-router-dom": "^7.6.2",
|
||||
"rss-parser": "^3.13.0",
|
||||
"wanakana": "^5.3.1",
|
||||
"xml2js": "^0.6.2",
|
||||
},
|
||||
"devDependencies": {
|
||||
@ -122,6 +125,8 @@
|
||||
|
||||
"@babel/plugin-transform-react-jsx-source": ["@babel/plugin-transform-react-jsx-source@7.27.1", "", { "dependencies": { "@babel/helper-plugin-utils": "^7.27.1" }, "peerDependencies": { "@babel/core": "^7.0.0-0" } }, "sha512-zbwoTsBruTeKB9hSq73ha66iFeJHuaFkUbwvqElnygoNbj/jHRsSeokowZFN3CZ64IvEqcmmkVe89OPXc7ldAw=="],
|
||||
|
||||
"@babel/runtime": ["@babel/runtime@7.27.6", "", {}, "sha512-vbavdySgbTTrmFE+EsiqUTzlOr5bzlnJtUv9PynGCAKvfQqjIXbvFdumPM/GxMDfyuGMJaJAU6TO4zc1Jf1i8Q=="],
|
||||
|
||||
"@babel/template": ["@babel/template@7.27.2", "", { "dependencies": { "@babel/code-frame": "^7.27.1", "@babel/parser": "^7.27.2", "@babel/types": "^7.27.1" } }, "sha512-LPDZ85aEJyYSd18/DkjNh4/y1ntkE5KwUHWTiqgRxruuZL2F1yuHligVHLvcHY2vMHXttKFpJn6LwfI7cw7ODw=="],
|
||||
|
||||
"@babel/traverse": ["@babel/traverse@7.27.4", "", { "dependencies": { "@babel/code-frame": "^7.27.1", "@babel/generator": "^7.27.3", "@babel/parser": "^7.27.4", "@babel/template": "^7.27.2", "@babel/types": "^7.27.3", "debug": "^4.3.1", "globals": "^11.1.0" } }, "sha512-oNcu2QbHqts9BtOWJosOVJapWjBDSxGCpFvikNR5TGDYDQf3JwpIoMzIKrvfoti93cLfPJEG4tH9SPVeyCGgdA=="],
|
||||
@ -578,6 +583,10 @@
|
||||
|
||||
"json5": ["json5@2.2.3", "", { "bin": { "json5": "lib/cli.js" } }, "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg=="],
|
||||
|
||||
"kuroshiro": ["kuroshiro@1.2.0", "", { "dependencies": { "@babel/runtime": "^7.14.0" } }, "sha512-yBGCK9oDOY3LGZ/KXaN9m7ADcAuSczOR2FoMRYwHLUlis3/o/uxdMVROAjENFO0NQJgALhIdWxI/vIBVrMCk9w=="],
|
||||
|
||||
"kuroshiro-analyzer-mecab": ["kuroshiro-analyzer-mecab@1.0.1", "", { "dependencies": { "mecab-async": "^0.1.2" } }, "sha512-8QUDoWnRW4VEh1DqnCz6XsRe4/w9GIRco9K/T7ZskhhZGuwMjKQsRuCUF9LDJTuleUJR+bV1iKwctOZ13ajfxA=="],
|
||||
|
||||
"lines-and-columns": ["lines-and-columns@1.2.4", "", {}, "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg=="],
|
||||
|
||||
"loose-envify": ["loose-envify@1.4.0", "", { "dependencies": { "js-tokens": "^3.0.0 || ^4.0.0" }, "bin": { "loose-envify": "cli.js" } }, "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q=="],
|
||||
@ -586,6 +595,8 @@
|
||||
|
||||
"math-intrinsics": ["math-intrinsics@1.1.0", "", {}, "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g=="],
|
||||
|
||||
"mecab-async": ["mecab-async@0.1.2", "", { "dependencies": { "shell-quote": "*" } }, "sha512-/hruCkDWB+jM1bYMFM53HLzG6ENKVtC3n45qD1qxQUW99pz3rrsU4HKEqlYPVCr+/wv0ErBR0vNlVwAv92f2Wg=="],
|
||||
|
||||
"mime-db": ["mime-db@1.52.0", "", {}, "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg=="],
|
||||
|
||||
"mime-types": ["mime-types@2.1.35", "", { "dependencies": { "mime-db": "1.52.0" } }, "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw=="],
|
||||
@ -684,6 +695,8 @@
|
||||
|
||||
"shallowequal": ["shallowequal@1.1.0", "", {}, "sha512-y0m1JoUZSlPAjXVtPPW70aZWfIL/dSP7AFkRnniLCrK/8MDKog3TySTBmckD+RObVxH0v4Tox67+F14PdED2oQ=="],
|
||||
|
||||
"shell-quote": ["shell-quote@1.8.3", "", {}, "sha512-ObmnIF4hXNg1BqhnHmgbDETF8dLPCggZWBjkQfhZpbszZnYur5DUljTcCHii5LC3J5E0yeO/1LIMyH+UvHQgyw=="],
|
||||
|
||||
"smart-buffer": ["smart-buffer@4.2.0", "", {}, "sha512-94hK0Hh8rPqQl2xXc3HsaBoOXKV20MToPkcXvwbISWLEs+64sBq5kFgn2kJDHb1Pry9yrP0dxrCI9RRci7RXKg=="],
|
||||
|
||||
"socks": ["socks@2.8.4", "", { "dependencies": { "ip-address": "^9.0.5", "smart-buffer": "^4.2.0" } }, "sha512-D3YaD0aRxR3mEcqnidIs7ReYJFVzWdd6fXJYUM8ixcQcJRGTka/b3saV0KflYhyVJXKhb947GndU35SxYNResQ=="],
|
||||
@ -736,6 +749,8 @@
|
||||
|
||||
"vite": ["vite@6.3.5", "", { "dependencies": { "esbuild": "^0.25.0", "fdir": "^6.4.4", "picomatch": "^4.0.2", "postcss": "^8.5.3", "rollup": "^4.34.9", "tinyglobby": "^0.2.13" }, "optionalDependencies": { "fsevents": "~2.3.3" }, "peerDependencies": { "@types/node": "^18.0.0 || ^20.0.0 || >=22.0.0", "jiti": ">=1.21.0", "less": "*", "lightningcss": "^1.21.0", "sass": "*", "sass-embedded": "*", "stylus": "*", "sugarss": "*", "terser": "^5.16.0", "tsx": "^4.8.1", "yaml": "^2.4.2" }, "optionalPeers": ["@types/node", "jiti", "less", "lightningcss", "sass", "sass-embedded", "stylus", "sugarss", "terser", "tsx", "yaml"], "bin": { "vite": "bin/vite.js" } }, "sha512-cZn6NDFE7wdTpINgs++ZJ4N49W2vRp8LCKrn3Ob1kYNtOo21vfDoaV5GzBfLU4MovSAB8uNRm4jgzVQZ+mBzPQ=="],
|
||||
|
||||
"wanakana": ["wanakana@5.3.1", "", {}, "sha512-OSDqupzTlzl2LGyqTdhcXcl6ezMiFhcUwLBP8YKaBIbMYW1wAwDvupw2T9G9oVaKT9RmaSpyTXjxddFPUcFFIw=="],
|
||||
|
||||
"web-streams-polyfill": ["web-streams-polyfill@4.0.0-beta.3", "", {}, "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug=="],
|
||||
|
||||
"webidl-conversions": ["webidl-conversions@3.0.1", "", {}, "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ=="],
|
||||
|
@ -22,6 +22,8 @@
|
||||
"cheerio": "^1.0.0",
|
||||
"ffmpeg-static": "^5.2.0",
|
||||
"hono": "^4.7.11",
|
||||
"kuroshiro": "^1.2.0",
|
||||
"kuroshiro-analyzer-mecab": "^1.0.1",
|
||||
"openai": "^4.104.0",
|
||||
"puppeteer": "^24.10.0",
|
||||
"react": "^19.1.0",
|
||||
@ -29,6 +31,7 @@
|
||||
"react-helmet-async": "^2.0.5",
|
||||
"react-router-dom": "^7.6.2",
|
||||
"rss-parser": "^3.13.0",
|
||||
"wanakana": "^5.3.1",
|
||||
"xml2js": "^0.6.2"
|
||||
},
|
||||
"type": "module",
|
||||
|
81
scripts/setup-mecab.sh
Executable file
81
scripts/setup-mecab.sh
Executable file
@ -0,0 +1,81 @@
|
||||
#!/bin/bash
|
||||
|
||||
# MeCab Setup Script for Voice RSS Summary
|
||||
# This script installs MeCab and IPA dictionary required for English to Katakana conversion
|
||||
|
||||
set -e
|
||||
|
||||
echo "=== MeCab Setup Script ==="
|
||||
echo "This script will install MeCab and IPA dictionary for English to Katakana conversion."
|
||||
echo
|
||||
|
||||
# Detect OS
|
||||
if [[ "$OSTYPE" == "linux-gnu"* ]]; then
|
||||
echo "Detected Linux OS"
|
||||
|
||||
# Detect distribution
|
||||
if [ -f /etc/debian_version ]; then
|
||||
echo "Detected Debian/Ubuntu"
|
||||
|
||||
echo "Updating package list..."
|
||||
sudo apt-get update
|
||||
|
||||
echo "Installing MeCab and IPA dictionary..."
|
||||
sudo apt-get install -y mecab mecab-ipadic-utf8 libmecab-dev
|
||||
|
||||
echo "Testing MeCab installation..."
|
||||
echo "こんにちは Hello" | mecab
|
||||
|
||||
elif [ -f /etc/redhat-release ]; then
|
||||
echo "Detected Red Hat/CentOS/Fedora"
|
||||
|
||||
# Check if dnf exists (newer versions)
|
||||
if command -v dnf &> /dev/null; then
|
||||
echo "Installing MeCab using dnf..."
|
||||
sudo dnf install -y mecab mecab-ipadic mecab-devel
|
||||
else
|
||||
echo "Installing MeCab using yum..."
|
||||
sudo yum install -y mecab mecab-ipadic mecab-devel
|
||||
fi
|
||||
|
||||
echo "Testing MeCab installation..."
|
||||
echo "こんにちは Hello" | mecab
|
||||
|
||||
else
|
||||
echo "Unsupported Linux distribution. Please install MeCab manually."
|
||||
echo "Required packages: mecab, mecab-ipadic (or mecab-ipadic-utf8), mecab-devel (or libmecab-dev)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
elif [[ "$OSTYPE" == "darwin"* ]]; then
|
||||
echo "Detected macOS"
|
||||
|
||||
# Check if Homebrew is installed
|
||||
if ! command -v brew &> /dev/null; then
|
||||
echo "Homebrew is not installed. Please install Homebrew first:"
|
||||
echo "https://brew.sh/"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Installing MeCab using Homebrew..."
|
||||
brew install mecab mecab-ipadic
|
||||
|
||||
echo "Testing MeCab installation..."
|
||||
echo "こんにちは Hello" | mecab
|
||||
|
||||
else
|
||||
echo "Unsupported operating system: $OSTYPE"
|
||||
echo "Please install MeCab manually for your operating system."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo
|
||||
echo "=== MeCab installation completed successfully! ==="
|
||||
echo
|
||||
echo "You can now run the Voice RSS Summary application with English to Katakana conversion support."
|
||||
echo
|
||||
echo "To start the application:"
|
||||
echo " bun install"
|
||||
echo " bun run build:frontend"
|
||||
echo " bun run start"
|
||||
echo
|
@ -107,8 +107,6 @@ export async function openAI_GeneratePodcastContent(
|
||||
あなたはプロのポッドキャスタです。以下に示すフィードタイトルに基づき、そのトピックに関する詳細なポッドキャスト原稿を作成してください。
|
||||
|
||||
以下の要件を満たしてください:
|
||||
- もし英単語が含まれている場合は、**必ずすべてカタカナに変換**してください (例: "Google" → "グーグル")
|
||||
- もし英語の文が含まれている場合は、すべて日本語に翻訳してください
|
||||
- 各ニュース記事の具体的な内容を基に詳細な要約と解説を行ってください
|
||||
- 約1000文字〜5000文字程度の長さにしてください
|
||||
- 自然な日本語の口語表現を使ってください
|
||||
|
192
services/text-converter.ts
Normal file
192
services/text-converter.ts
Normal file
@ -0,0 +1,192 @@
|
||||
import Kuroshiro from "kuroshiro";
|
||||
import KuroshiroAnalyzerMecab from "kuroshiro-analyzer-mecab";
|
||||
import { toKatakana } from "wanakana";
|
||||
|
||||
// Global instance to avoid recreating the analyzer
|
||||
let kuroshiroInstance: Kuroshiro | null = null;
|
||||
|
||||
// Basic English to Katakana mapping for common words
|
||||
const englishToKatakanaMap: Record<string, string> = {
|
||||
"hello": "ハロー",
|
||||
"world": "ワールド",
|
||||
"this": "ディス",
|
||||
"is": "イズ",
|
||||
"a": "ア",
|
||||
"test": "テスト",
|
||||
"javascript": "ジャバスクリプト",
|
||||
"typescript": "タイプスクリプト",
|
||||
"and": "アンド",
|
||||
"api": "エーピーアイ",
|
||||
"endpoint": "エンドポイント",
|
||||
"machine": "マシン",
|
||||
"learning": "ラーニング",
|
||||
"model": "モデル",
|
||||
"analysis": "アナリシス",
|
||||
"computer": "コンピューター",
|
||||
"data": "データ",
|
||||
"software": "ソフトウェア",
|
||||
"program": "プログラム",
|
||||
"system": "システム",
|
||||
"network": "ネットワーク",
|
||||
"server": "サーバー",
|
||||
"client": "クライアント",
|
||||
"database": "データベース",
|
||||
"file": "ファイル",
|
||||
"user": "ユーザー",
|
||||
"password": "パスワード",
|
||||
"login": "ログイン",
|
||||
"logout": "ログアウト",
|
||||
"website": "ウェブサイト",
|
||||
"browser": "ブラウザー",
|
||||
"application": "アプリケーション",
|
||||
"service": "サービス"
|
||||
};
|
||||
|
||||
/**
|
||||
* Convert English word to Katakana using predefined mapping or phonetic approximation
|
||||
*/
|
||||
function convertEnglishWordToKatakana(word: string): string {
|
||||
const lowerWord = word.toLowerCase();
|
||||
|
||||
// Check predefined mapping first
|
||||
if (englishToKatakanaMap[lowerWord]) {
|
||||
return englishToKatakanaMap[lowerWord];
|
||||
}
|
||||
|
||||
// Try using wanakana for romanized pronunciation
|
||||
try {
|
||||
// Convert to a rough romanized version and then to katakana
|
||||
const katakana = toKatakana(word.toLowerCase());
|
||||
if (katakana && katakana !== word.toLowerCase()) {
|
||||
return katakana;
|
||||
}
|
||||
} catch {
|
||||
// Fallback if wanakana fails
|
||||
}
|
||||
|
||||
// Fallback: simple phonetic approximation
|
||||
return approximateEnglishToKatakana(word);
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple phonetic approximation for English to Katakana
|
||||
*/
|
||||
function approximateEnglishToKatakana(word: string): string {
|
||||
const phoneticMap: Record<string, string> = {
|
||||
'a': 'ア', 'b': 'ブ', 'c': 'ク', 'd': 'ド', 'e': 'エ',
|
||||
'f': 'フ', 'g': 'グ', 'h': 'ハ', 'i': 'イ', 'j': 'ジ',
|
||||
'k': 'ク', 'l': 'ル', 'm': 'ム', 'n': 'ン', 'o': 'オ',
|
||||
'p': 'プ', 'q': 'ク', 'r': 'ル', 's': 'ス', 't': 'ト',
|
||||
'u': 'ウ', 'v': 'ブ', 'w': 'ワ', 'x': 'クス', 'y': 'ワイ', 'z': 'ズ'
|
||||
};
|
||||
|
||||
return word.toLowerCase()
|
||||
.split('')
|
||||
.map(char => phoneticMap[char] || char)
|
||||
.join('');
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize kuroshiro with MeCab analyzer
|
||||
* This should be called once during application startup
|
||||
*/
|
||||
export async function initializeTextConverter(): Promise<void> {
|
||||
if (kuroshiroInstance) {
|
||||
return; // Already initialized
|
||||
}
|
||||
|
||||
try {
|
||||
console.log("Kuroshiroテキストコンバーターを初期化中...");
|
||||
kuroshiroInstance = new Kuroshiro();
|
||||
await kuroshiroInstance.init(new KuroshiroAnalyzerMecab());
|
||||
console.log("Kuroshiroテキストコンバーター初期化完了");
|
||||
} catch (error) {
|
||||
console.error("Kuroshiroの初期化に失敗しました:", error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert English words and mixed text to katakana
|
||||
* @param text - Input text (may contain Japanese, English, and other characters)
|
||||
* @returns Text with English words converted to katakana
|
||||
*/
|
||||
export async function convertEnglishToKatakana(text: string): Promise<string> {
|
||||
if (!kuroshiroInstance) {
|
||||
await initializeTextConverter();
|
||||
}
|
||||
|
||||
if (!kuroshiroInstance) {
|
||||
throw new Error("Failed to initialize kuroshiro");
|
||||
}
|
||||
|
||||
try {
|
||||
// Convert the entire text to katakana
|
||||
// This will convert both Japanese hiragana and English words to katakana
|
||||
const convertedText = await kuroshiroInstance.convert(text, {
|
||||
to: "katakana",
|
||||
mode: "normal",
|
||||
});
|
||||
|
||||
return convertedText;
|
||||
} catch (error) {
|
||||
console.error("テキスト変換エラー:", error);
|
||||
// Return original text if conversion fails
|
||||
return text;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert only English words to katakana while preserving Japanese text
|
||||
* This is a more selective approach that tries to preserve Japanese characters
|
||||
* @param text - Input text
|
||||
* @returns Text with only English words converted to katakana
|
||||
*/
|
||||
export async function convertEnglishWordsOnly(text: string): Promise<string> {
|
||||
if (!kuroshiroInstance) {
|
||||
await initializeTextConverter();
|
||||
}
|
||||
|
||||
if (!kuroshiroInstance) {
|
||||
throw new Error("Failed to initialize kuroshiro");
|
||||
}
|
||||
|
||||
try {
|
||||
// Extract English words using regex
|
||||
const englishWordPattern = /\b[a-zA-Z]+\b/g;
|
||||
let result = text;
|
||||
|
||||
// Find all English words
|
||||
const matches = text.match(englishWordPattern);
|
||||
|
||||
if (matches) {
|
||||
for (const englishWord of matches) {
|
||||
try {
|
||||
// Convert each English word to katakana using our custom function
|
||||
const converted = convertEnglishWordToKatakana(englishWord);
|
||||
|
||||
// Replace the English word with its katakana equivalent
|
||||
// Use word boundary to avoid partial replacements
|
||||
const wordRegex = new RegExp(`\\b${englishWord.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`, 'g');
|
||||
result = result.replace(wordRegex, converted);
|
||||
} catch (convertError) {
|
||||
console.warn(`Failed to convert word "${englishWord}":`, convertError);
|
||||
// Keep original word if conversion fails
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
} catch (error) {
|
||||
console.error("選択的テキスト変換エラー:", error);
|
||||
// Fallback to full conversion
|
||||
return convertEnglishToKatakana(text);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if kuroshiro is initialized
|
||||
*/
|
||||
export function isTextConverterInitialized(): boolean {
|
||||
return kuroshiroInstance !== null;
|
||||
}
|
@ -2,6 +2,7 @@ import fs from "fs";
|
||||
import path from "path";
|
||||
import ffmpegPath from "ffmpeg-static";
|
||||
import { config } from "./config.js";
|
||||
import { convertEnglishWordsOnly, initializeTextConverter } from "./text-converter.js";
|
||||
|
||||
/**
|
||||
* Split text into natural chunks for TTS processing
|
||||
@ -108,12 +109,26 @@ async function generateAudioForChunk(
|
||||
chunkIndex: number,
|
||||
itemId: string,
|
||||
): Promise<string> {
|
||||
const encodedText = encodeURIComponent(chunkText);
|
||||
// Convert English words to katakana before TTS processing
|
||||
let processedText: string;
|
||||
try {
|
||||
processedText = await convertEnglishWordsOnly(chunkText);
|
||||
if (processedText !== chunkText) {
|
||||
console.log(`チャンク${chunkIndex + 1}で英語をカタカナに変換: ${itemId}`);
|
||||
console.log(`変換前: "${chunkText}"`);
|
||||
console.log(`変換後: "${processedText}"`);
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn(`チャンク${chunkIndex + 1}の英語変換に失敗、元のテキストを使用: ${itemId}`, error);
|
||||
processedText = chunkText;
|
||||
}
|
||||
|
||||
const encodedText = encodeURIComponent(processedText);
|
||||
const queryUrl = `${config.voicevox.host}/audio_query?text=${encodedText}&speaker=${defaultVoiceStyle.styleId}`;
|
||||
const synthesisUrl = `${config.voicevox.host}/synthesis?speaker=${defaultVoiceStyle.styleId}`;
|
||||
|
||||
console.log(
|
||||
`チャンク${chunkIndex + 1}の音声クエリ開始: ${itemId} (${chunkText.length}文字)`,
|
||||
`チャンク${chunkIndex + 1}の音声クエリ開始: ${itemId} (${processedText.length}文字)`,
|
||||
);
|
||||
|
||||
const queryResponse = await fetch(queryUrl, {
|
||||
@ -252,6 +267,13 @@ export async function generateTTSWithoutQueue(
|
||||
throw new Error("Script text is required for TTS generation");
|
||||
}
|
||||
|
||||
// Initialize text converter if not already initialized
|
||||
try {
|
||||
await initializeTextConverter();
|
||||
} catch (error) {
|
||||
console.warn("テキストコンバーターの初期化に失敗しました。英語変換をスキップします:", error);
|
||||
}
|
||||
|
||||
console.log(
|
||||
`TTS生成開始: ${itemId} (試行回数: ${retryCount + 1}, ${scriptText.length}文字)`,
|
||||
);
|
||||
|
@ -27,8 +27,9 @@
|
||||
"noPropertyAccessFromIndexSignature": true,
|
||||
|
||||
// Next.js specific settings
|
||||
"types": ["react", "bun"]
|
||||
"types": ["react", "bun"],
|
||||
"typeRoots": ["./node_modules/@types", "./types"]
|
||||
},
|
||||
"include": ["**/*.ts", "**/*.tsx", "**/*.js", "**/*.jsx"],
|
||||
"include": ["**/*.ts", "**/*.tsx", "**/*.js", "**/*.jsx", "types/**/*.ts"],
|
||||
"exclude": ["node_modules", ".next", "out", "public", "styles"]
|
||||
}
|
||||
|
25
types/kuroshiro.d.ts
vendored
Normal file
25
types/kuroshiro.d.ts
vendored
Normal file
@ -0,0 +1,25 @@
|
||||
declare module "kuroshiro" {
|
||||
interface ConvertOptions {
|
||||
to?: "hiragana" | "katakana" | "romaji";
|
||||
mode?: "normal" | "spaced" | "okurigana" | "furigana";
|
||||
romajiSystem?: "nippon" | "passport" | "hepburn";
|
||||
delimiter_start?: string;
|
||||
delimiter_end?: string;
|
||||
}
|
||||
|
||||
class Kuroshiro {
|
||||
constructor();
|
||||
init(analyzer: any): Promise<void>;
|
||||
convert(text: string, options?: ConvertOptions): Promise<string>;
|
||||
}
|
||||
|
||||
export = Kuroshiro;
|
||||
}
|
||||
|
||||
declare module "kuroshiro-analyzer-mecab" {
|
||||
class KuroshiroAnalyzerMecab {
|
||||
constructor();
|
||||
}
|
||||
|
||||
export = KuroshiroAnalyzerMecab;
|
||||
}
|
Reference in New Issue
Block a user