Compare commits

..

17 Commits

Author SHA1 Message Date
ba3cc41e34 Update
Some checks failed
CI / lint-and-test (push) Failing after 1m41s
CI / docker-test (push) Has been skipped
CI / security-scan (push) Has been skipped
Build and Publish Docker Images / build (push) Failing after 1m55s
2025-06-13 17:12:32 +09:00
4f158508d0 Update 2025-06-13 13:07:18 +09:00
8a42413e86 Update 2025-06-13 13:05:41 +09:00
6049a05776 Fix
Some checks failed
CI / security-scan (push) Has been skipped
Build and Publish Docker Images / build (push) Failing after 58s
CI / lint-and-test (push) Failing after 10m46s
CI / docker-test (push) Has been cancelled
2025-06-13 13:04:20 +09:00
bb4b4e42ef Update
Some checks failed
CI / lint-and-test (push) Failing after 1m45s
CI / docker-test (push) Has been skipped
CI / security-scan (push) Has been skipped
Build and Publish Docker Images / build (push) Failing after 1m26s
2025-06-13 12:14:29 +09:00
1edafad270 Update
Some checks failed
CI / docker-test (push) Has been cancelled
CI / lint-and-test (push) Failing after 4m42s
CI / security-scan (push) Has been cancelled
Build and Publish Docker Images / build (push) Has been cancelled
2025-06-13 12:10:36 +09:00
99af4d11b1 Fix
Some checks failed
CI / lint-and-test (push) Failing after 4m2s
CI / docker-test (push) Has been skipped
CI / security-scan (push) Has been skipped
Build and Publish Docker Images / build (push) Failing after 2m8s
2025-06-12 13:09:38 +09:00
886dc94429 Update
Some checks failed
Build and Publish Docker Images / build (push) Failing after 4m16s
CI / security-scan (push) Failing after 12m2s
CI / lint-and-test (push) Failing after 12m8s
CI / docker-test (push) Has been cancelled
2025-06-12 12:20:25 +09:00
0760909de1 Update 2025-06-12 10:03:12 +09:00
9349c6e95b Update docker-build.yml 2025-06-12 09:35:04 +09:00
e189618038 Update 2025-06-12 09:19:16 +09:00
18a3b5312e Add ci 2025-06-12 09:00:16 +09:00
4226d6ccd6 Update 2025-06-12 08:45:27 +09:00
3a37299aed Update 2025-06-12 08:35:37 +09:00
e66621906e Update Dockerfile 2025-06-12 08:19:39 +09:00
1235e78ec6 Fix 2025-06-12 07:53:37 +09:00
ca9e18d12f Add dependency 2025-06-12 07:45:09 +09:00
10 changed files with 1182 additions and 81 deletions

111
.github/workflows/ci.yml vendored Normal file
View File

@ -0,0 +1,111 @@
name: CI
on:
push:
branches: [ main, develop ]
pull_request:
branches: [ main ]
jobs:
lint-and-test:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Bun
uses: oven-sh/setup-bun@v1
with:
bun-version: latest
- name: Install dependencies
run: bun install
- name: Install admin panel dependencies
run: cd admin-panel && bun install
- name: Type check
run: bunx tsc --noEmit
- name: Lint
run: bun run lint
- name: Format check
run: bun run format:check
- name: Build frontend
run: bun run build:frontend
- name: Build admin panel
run: bun run build:admin
- name: Test build artifacts
run: |
ls -la frontend/dist/
ls -la admin-panel/dist/
echo "✅ Build artifacts created successfully"
docker-test:
runs-on: ubuntu-latest
needs: lint-and-test
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build Docker image (test only)
uses: docker/build-push-action@v5
with:
context: .
platforms: linux/amd64
push: false
tags: voice-rss-summary:test
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Test Docker image
run: |
echo "Testing Docker image functionality..."
# Create minimal test environment
mkdir -p test-data test-public
echo "# Test feed" > feed_urls.txt
echo "OPENAI_API_KEY=test" > .env
echo "VOICEVOX_HOST=http://localhost:50021" >> .env
# Run container for a short time to test startup
docker run --rm --name test-container \
-v "$(pwd)/feed_urls.txt:/app/feed_urls.txt:ro" \
-v "$(pwd)/.env:/app/.env:ro" \
-v "$(pwd)/test-public:/app/public" \
-v "$(pwd)/test-data:/app/data" \
voice-rss-summary:test \
timeout 30 bun run server.ts || true
echo "✅ Docker image test completed"
security-scan:
runs-on: ubuntu-latest
if: github.event_name == 'pull_request'
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@master
with:
scan-type: 'fs'
scan-ref: '.'
format: 'sarif'
output: 'trivy-results.sarif'
- name: Upload Trivy scan results to GitHub Security tab
uses: github/codeql-action/upload-sarif@v2
if: always()
with:
sarif_file: 'trivy-results.sarif'

116
.github/workflows/docker-build.yml vendored Normal file
View File

@ -0,0 +1,116 @@
name: Build and Publish Docker Images
on:
push:
branches: [main, develop]
tags: ["v*"]
pull_request:
branches: [main]
workflow_dispatch:
inputs:
platforms:
description: "Platforms to build (comma-separated)"
required: false
default: "linux/amd64,linux/arm64"
type: string
push_to_registry:
description: "Push to registry"
required: false
default: true
type: boolean
permissions:
contents: read # ソースコードのクローンに必要
packages: write # GHCR へイメージをプッシュするのに必要
env:
REGISTRY: ghcr.io
IMAGE_NAME_1: ${{ github.repository_owner }}/voice-rss-summary
jobs:
build:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to Container Registry
if: github.event_name != 'pull_request'
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata
id: meta
uses: docker/metadata-action@v5
with:
images: |
${{ env.REGISTRY }}/${{ env.IMAGE_NAME_1 }}
tags: |
type=ref,event=branch
type=ref,event=pr
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=semver,pattern={{major}}
type=raw,value=latest,enable={{is_default_branch}}
- name: Determine platforms
id: platforms
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
echo "platforms=${{ github.event.inputs.platforms }}" >> $GITHUB_OUTPUT
elif [ "${{ github.event_name }}" = "pull_request" ]; then
echo "platforms=linux/amd64" >> $GITHUB_OUTPUT
else
echo "platforms=linux/amd64,linux/arm64" >> $GITHUB_OUTPUT
fi
- name: Determine push setting
id: push
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
echo "push=${{ github.event.inputs.push_to_registry }}" >> $GITHUB_OUTPUT
elif [ "${{ github.event_name }}" = "pull_request" ]; then
echo "push=false" >> $GITHUB_OUTPUT
else
echo "push=true" >> $GITHUB_OUTPUT
fi
- name: Build and push Docker image
uses: docker/build-push-action@v5
with:
context: .
platforms: ${{ steps.platforms.outputs.platforms }}
push: ${{ steps.push.outputs.push }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
build-args: |
BUILDKIT_INLINE_CACHE=1
- name: Generate summary
if: always()
run: |
echo "## Build Summary" >> $GITHUB_STEP_SUMMARY
echo "- **Event**: ${{ github.event_name }}" >> $GITHUB_STEP_SUMMARY
echo "- **Ref**: ${{ github.ref }}" >> $GITHUB_STEP_SUMMARY
echo "- **Platforms**: ${{ steps.platforms.outputs.platforms }}" >> $GITHUB_STEP_SUMMARY
echo "- **Push to registry**: ${{ steps.push.outputs.push }}" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "### Images built:" >> $GITHUB_STEP_SUMMARY
echo '```' >> $GITHUB_STEP_SUMMARY
echo "${{ steps.meta.outputs.tags }}" >> $GITHUB_STEP_SUMMARY
echo '```' >> $GITHUB_STEP_SUMMARY

138
.github/workflows/release.yml vendored Normal file
View File

@ -0,0 +1,138 @@
name: Release
on:
push:
tags:
- 'v*'
workflow_dispatch:
inputs:
tag:
description: 'Tag to release'
required: true
type: string
env:
REGISTRY: ghcr.io
jobs:
create-release:
runs-on: ubuntu-latest
permissions:
contents: write
packages: read
outputs:
upload_url: ${{ steps.create_release.outputs.upload_url }}
release_id: ${{ steps.create_release.outputs.id }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Get tag
id: get_tag
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
echo "tag=${{ github.event.inputs.tag }}" >> $GITHUB_OUTPUT
else
echo "tag=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
fi
- name: Generate changelog
id: changelog
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
PREVIOUS_TAG=$(git describe --tags --abbrev=0 "${{ github.event.inputs.tag }}^" 2>/dev/null || echo "")
else
PREVIOUS_TAG=$(git describe --tags --abbrev=0 HEAD^ 2>/dev/null || echo "")
fi
if [ -n "$PREVIOUS_TAG" ]; then
echo "## Changes since $PREVIOUS_TAG" > changelog.md
git log --pretty=format:"- %s (%h)" "$PREVIOUS_TAG"..HEAD >> changelog.md
else
echo "## Initial Release" > changelog.md
echo "First release of Voice RSS Summary" >> changelog.md
fi
echo "" >> changelog.md
echo "## Docker Images" >> changelog.md
echo "- \`ghcr.io/${{ github.repository_owner }}/voice-rss-summary:${{ steps.get_tag.outputs.tag }}\`" >> changelog.md
echo "- \`ghcr.io/${{ github.repository_owner }}/voicersssummary:${{ steps.get_tag.outputs.tag }}\`" >> changelog.md
echo "" >> changelog.md
echo "## Usage" >> changelog.md
echo "\`\`\`bash" >> changelog.md
echo "# Pull and run the latest image" >> changelog.md
echo "docker run -p 3000:3000 -p 3001:3001 ghcr.io/${{ github.repository_owner }}/voice-rss-summary:${{ steps.get_tag.outputs.tag }}" >> changelog.md
echo "" >> changelog.md
echo "# Or clone the repository and run locally" >> changelog.md
echo "git clone https://github.com/${{ github.repository }}.git" >> changelog.md
echo "cd VoiceRSSSummary" >> changelog.md
echo "git checkout ${{ steps.get_tag.outputs.tag }}" >> changelog.md
echo "./run-docker.sh container-name ${{ steps.get_tag.outputs.tag }} --from-ghcr" >> changelog.md
echo "\`\`\`" >> changelog.md
- name: Create Release
id: create_release
uses: actions/create-release@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
tag_name: ${{ steps.get_tag.outputs.tag }}
release_name: Release ${{ steps.get_tag.outputs.tag }}
body_path: changelog.md
draft: false
prerelease: ${{ contains(steps.get_tag.outputs.tag, '-') }}
wait-for-docker:
runs-on: ubuntu-latest
needs: create-release
permissions:
packages: read
steps:
- name: Get tag
id: get_tag
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
echo "tag=${{ github.event.inputs.tag }}" >> $GITHUB_OUTPUT
else
echo "tag=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
fi
- name: Wait for Docker images
run: |
echo "Waiting for Docker images to be available..."
TAG="${{ steps.get_tag.outputs.tag }}"
for i in {1..30}; do
echo "Attempt $i: Checking if images are available..."
if docker manifest inspect ghcr.io/${{ github.repository_owner }}/voice-rss-summary:${TAG} >/dev/null 2>&1; then
echo "✅ Docker images are available!"
exit 0
fi
echo "Images not yet available, waiting 30 seconds..."
sleep 30
done
echo "❌ Timeout waiting for Docker images"
exit 1
- name: Test Docker image
run: |
TAG="${{ steps.get_tag.outputs.tag }}"
echo "Testing Docker image: ghcr.io/${{ github.repository_owner }}/voice-rss-summary:${TAG}"
# Pull the image
docker pull ghcr.io/${{ github.repository_owner }}/voice-rss-summary:${TAG}
# Run a quick test
docker run --rm --name test-container \
ghcr.io/${{ github.repository_owner }}/voice-rss-summary:${TAG} \
timeout 10 bun --version || true
echo "✅ Docker image test completed"

View File

@ -22,18 +22,28 @@ FROM oven/bun:latest AS runtime
WORKDIR /app
# Install MeCab for English to Katakana conversion
# Install MeCab for English to Katakana conversion and Chrome dependencies for Puppeteer
RUN apt-get update && \
apt-get install -y mecab mecab-ipadic-utf8 libmecab-dev && \
apt-get install -y \
mecab mecab-ipadic-utf8 libmecab-dev \
wget gnupg ca-certificates \
fonts-liberation libappindicator3-1 libasound2 libatk-bridge2.0-0 \
libdrm2 libxcomposite1 libxdamage1 libxrandr2 libgbm1 libxss1 \
libgtk-3-0 libnspr4 libnss3 libxdamage1 libxfixes3 libxrandr2 \
libgconf-2-4 libxss1 libasound2 libxtst6 libatspi2.0-0 libdrm2 \
libxcomposite1 libxcursor1 libxi6 libxtst6 xdg-utils lsb-release \
libglib2.0-0 libnss3-dev libgconf-2-4 libxrandr2 libasound2-dev \
libpangocairo-1.0-0 libatk1.0-dev libcairo-gobject2 libgtk-3-dev \
libgdk-pixbuf2.0-dev \
--no-install-recommends && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
RUN bunx puppeteer browsers install chrome
# Copy built application from builder stage
COPY --from=builder /app .
# Create necessary directories with proper permissions
RUN mkdir -p data public/podcast_audio
# Expose ports
EXPOSE 3000 3001

20
build-amd64.sh Executable file
View File

@ -0,0 +1,20 @@
#!/bin/bash
set -euo pipefail
# Quick script to build AMD64 image and publish to GHCR
# Usage: ./build-amd64.sh [tag]
TAG="${1:-latest}"
echo "🔨 Building and publishing AMD64 image..."
echo "Tag: ${TAG}"
echo ""
# Build and push AMD64 image in one command
./publish-docker.sh "${TAG}" --platform=linux/amd64 --build-and-push
echo ""
echo "✅ AMD64 image built and published successfully!"
echo ""
echo "To run on AMD64 systems:"
echo " docker run --platform linux/amd64 -p 3000:3000 -p 3001:3001 ghcr.io/anosatsuk124/voice-rss-summary:${TAG}"

View File

@ -2,14 +2,47 @@
set -euo pipefail
# Build Docker image for Voice RSS Summary project
# Usage: ./build-docker-image.sh [tag] [build-args...]
# Usage: ./build-docker-image.sh [tag] [--platform=platform] [build-args...]
IMAGE_NAME="voice-rss-summary"
TAG="${1:-latest}"
TAG="latest"
PLATFORM=""
BUILD_ARGS=()
# Parse arguments
while [[ $# -gt 0 ]]; do
case $1 in
--platform=*)
PLATFORM="${1#*=}"
shift
;;
--platform)
PLATFORM="$2"
shift 2
;;
-*)
BUILD_ARGS+=("$1")
shift
;;
*)
if [[ -z "${TAG_SET:-}" ]]; then
TAG="$1"
TAG_SET=true
else
BUILD_ARGS+=("$1")
fi
shift
;;
esac
done
FULL_TAG="${IMAGE_NAME}:${TAG}"
echo "Building Docker image: ${FULL_TAG}"
echo "Build context: $(pwd)"
if [[ -n "$PLATFORM" ]]; then
echo "Target platform: ${PLATFORM}"
fi
# Check if Dockerfile exists
if [[ ! -f "Dockerfile" ]]; then
@ -18,15 +51,34 @@ if [[ ! -f "Dockerfile" ]]; then
fi
# Build with build cache and progress output
exec docker build \
--tag "${FULL_TAG}" \
--progress=plain \
--build-arg BUILDKIT_INLINE_CACHE=1 \
"${@:2}" \
.
DOCKER_CMD=(docker build --tag "${FULL_TAG}" --progress=plain --build-arg BUILDKIT_INLINE_CACHE=1)
# Add platform if specified
if [[ -n "$PLATFORM" ]]; then
DOCKER_CMD+=(--platform "$PLATFORM")
fi
# Add any additional build args
if [[ ${#BUILD_ARGS[@]} -gt 0 ]]; then
DOCKER_CMD+=("${BUILD_ARGS[@]}")
fi
# Add build context
DOCKER_CMD+=(.)
echo "Running: ${DOCKER_CMD[*]}"
"${DOCKER_CMD[@]}"
# Display image info
echo "\nBuild completed successfully!"
echo ""
echo "Build completed successfully!"
echo "Image: ${FULL_TAG}"
if [[ -n "$PLATFORM" ]]; then
echo "Platform: ${PLATFORM}"
fi
echo "Size: $(docker images --format 'table {{.Size}}' "${FULL_TAG}" | tail -n +2)"
echo "\nTo run the container, use: ./run-docker.sh"
echo ""
echo "To run the container, use: ./run-docker.sh"
if [[ -n "$PLATFORM" && "$PLATFORM" != "linux/amd64" ]]; then
echo "Note: Cross-platform image built. May need to push to registry for deployment."
fi

145
publish-docker.sh Executable file
View File

@ -0,0 +1,145 @@
#!/bin/bash
set -euo pipefail
# Publish Docker image to GitHub Container Registry
# Usage: ./publish-docker.sh [tag] [username] [--platform=platform] [--build-and-push]
GITHUB_USERNAME="anosatsuk124"
TAG="latest"
PLATFORM=""
BUILD_AND_PUSH=false
BUILD_ARGS=()
# Parse arguments
while [[ $# -gt 0 ]]; do
case $1 in
--platform=*)
PLATFORM="${1#*=}"
shift
;;
--platform)
PLATFORM="$2"
shift 2
;;
--build-and-push)
BUILD_AND_PUSH=true
shift
;;
--username=*)
GITHUB_USERNAME="${1#*=}"
shift
;;
-*)
BUILD_ARGS+=("$1")
shift
;;
*)
if [[ -z "${TAG_SET:-}" ]]; then
TAG="$1"
TAG_SET=true
elif [[ -z "${USERNAME_SET:-}" ]]; then
GITHUB_USERNAME="$1"
USERNAME_SET=true
else
BUILD_ARGS+=("$1")
fi
shift
;;
esac
done
LOCAL_IMAGE="voice-rss-summary:${TAG}"
GHCR_IMAGE_1="ghcr.io/${GITHUB_USERNAME}/voice-rss-summary:${TAG}"
GHCR_IMAGE_2="ghcr.io/${GITHUB_USERNAME}/voicersssummary:${TAG}"
echo "Publishing Docker image to GitHub Container Registry"
echo "Local image: ${LOCAL_IMAGE}"
echo "GHCR images: ${GHCR_IMAGE_1}, ${GHCR_IMAGE_2}"
if [[ -n "$PLATFORM" ]]; then
echo "Target platform: ${PLATFORM}"
fi
# Build image if requested
if [[ "$BUILD_AND_PUSH" == "true" ]]; then
echo "Building image first..."
BUILD_CMD=(./build-docker-image.sh "$TAG")
if [[ -n "$PLATFORM" ]]; then
BUILD_CMD+=(--platform "$PLATFORM")
fi
if [[ ${#BUILD_ARGS[@]} -gt 0 ]]; then
BUILD_CMD+=("${BUILD_ARGS[@]}")
fi
echo "Running: ${BUILD_CMD[*]}"
if ! "${BUILD_CMD[@]}"; then
echo "Error: Failed to build image"
exit 1
fi
fi
# Check if local image exists
if ! docker image inspect "${LOCAL_IMAGE}" >/dev/null 2>&1; then
echo "Error: Local Docker image '${LOCAL_IMAGE}' not found"
if [[ -n "$PLATFORM" ]]; then
echo "Build it first with: ./build-docker-image.sh ${TAG} --platform=${PLATFORM}"
else
echo "Build it first with: ./build-docker-image.sh ${TAG}"
fi
echo "Or use --build-and-push flag to build and push in one command"
exit 1
fi
# Check if user is logged in to GHCR
if ! docker system info | grep -q "ghcr.io"; then
echo "Checking GHCR authentication..."
if ! echo "test" | docker login ghcr.io --username "${GITHUB_USERNAME}" --password-stdin >/dev/null 2>&1; then
echo "Please authenticate with GitHub Container Registry first:"
echo "1. Create a personal access token with 'write:packages' scope at:"
echo " https://github.com/settings/tokens"
echo "2. Login with: echo \$GITHUB_TOKEN | docker login ghcr.io -u ${GITHUB_USERNAME} --password-stdin"
echo " or: docker login ghcr.io -u ${GITHUB_USERNAME}"
exit 1
fi
fi
# Tag images for GHCR
echo "Tagging images for GHCR..."
docker tag "${LOCAL_IMAGE}" "${GHCR_IMAGE_1}"
docker tag "${LOCAL_IMAGE}" "${GHCR_IMAGE_2}"
# Push to GHCR
echo "Pushing ${GHCR_IMAGE_1}..."
if docker push "${GHCR_IMAGE_1}"; then
echo "✅ Successfully pushed ${GHCR_IMAGE_1}"
else
echo "❌ Failed to push ${GHCR_IMAGE_1}"
exit 1
fi
echo "Pushing ${GHCR_IMAGE_2}..."
if docker push "${GHCR_IMAGE_2}"; then
echo "✅ Successfully pushed ${GHCR_IMAGE_2}"
else
echo "❌ Failed to push ${GHCR_IMAGE_2}"
exit 1
fi
echo ""
echo "🎉 Successfully published Docker images to GitHub Container Registry!"
echo ""
echo "Images available at:"
echo " - ${GHCR_IMAGE_1}"
echo " - ${GHCR_IMAGE_2}"
if [[ -n "$PLATFORM" ]]; then
echo "Platform: ${PLATFORM}"
fi
echo ""
echo "To run from GHCR:"
if [[ -n "$PLATFORM" && "$PLATFORM" != "$(uname -m)" ]]; then
echo " docker run --platform ${PLATFORM} -p 3000:3000 -p 3001:3001 ${GHCR_IMAGE_1}"
else
echo " docker run -p 3000:3000 -p 3001:3001 ${GHCR_IMAGE_1}"
fi
echo ""
echo "To use with run-docker.sh:"
echo " ./run-docker.sh container-name ${TAG} --from-ghcr"

View File

@ -2,21 +2,46 @@
set -euo pipefail
# Run Docker container for Voice RSS Summary project
# Usage: ./run-docker.sh [container-name] [image-tag]
# Usage: ./run-docker.sh [container-name] [image-tag] [--from-ghcr]
GITHUB_USERNAME="anosatsuk124"
IMAGE_NAME="voice-rss-summary"
CONTAINER_NAME="${1:-voice-rss-summary}"
IMAGE_TAG="${2:-latest}"
FULL_IMAGE="${IMAGE_NAME}:${IMAGE_TAG}"
FROM_GHCR=false
# Check for --from-ghcr flag
for arg in "$@"; do
if [[ "$arg" == "--from-ghcr" ]]; then
FROM_GHCR=true
break
fi
done
if [[ "$FROM_GHCR" == "true" ]]; then
FULL_IMAGE="ghcr.io/${GITHUB_USERNAME}/${IMAGE_NAME}:${IMAGE_TAG}"
else
FULL_IMAGE="${IMAGE_NAME}:${IMAGE_TAG}"
fi
echo "Starting Docker container: ${CONTAINER_NAME}"
echo "Using image: ${FULL_IMAGE}"
# Check if image exists
# Check if image exists or pull from GHCR
if ! docker image inspect "${FULL_IMAGE}" >/dev/null 2>&1; then
echo "Error: Docker image '${FULL_IMAGE}' not found"
echo "Build it first with: ./build-docker-image.sh"
exit 1
if [[ "$FROM_GHCR" == "true" ]]; then
echo "Pulling image from GitHub Container Registry..."
if ! docker pull "${FULL_IMAGE}"; then
echo "Error: Failed to pull Docker image '${FULL_IMAGE}' from GHCR"
echo "Make sure the image exists and you have access to it"
exit 1
fi
else
echo "Error: Docker image '${FULL_IMAGE}' not found"
echo "Build it first with: ./build-docker-image.sh"
echo "Or use --from-ghcr flag to pull from GitHub Container Registry"
exit 1
fi
fi
# Stop and remove existing container if it exists
@ -57,10 +82,13 @@ exec docker run \
--health-retries=3 \
"${FULL_IMAGE}"
echo "\nContainer started successfully!"
echo ""
echo "Container started successfully!"
echo "Container name: ${CONTAINER_NAME}"
echo "Image: ${FULL_IMAGE}"
echo "Web UI: http://localhost:3000"
echo "Admin panel: http://localhost:3001"
echo "\nTo view logs: docker logs -f ${CONTAINER_NAME}"
echo ""
echo "To view logs: docker logs -f ${CONTAINER_NAME}"
echo "To stop: docker stop ${CONTAINER_NAME}"
echo "To remove: docker rm ${CONTAINER_NAME}"

View File

@ -1,4 +1,6 @@
import puppeteer, { type Browser } from "puppeteer";
import * as cheerio from "cheerio";
import type { CheerioAPI } from "cheerio";
export interface ExtractedContent {
title?: string;
@ -8,9 +10,28 @@ export interface ExtractedContent {
error?: string;
}
interface RetryOptions {
maxRetries: number;
baseDelay: number;
maxDelay: number;
backoffMultiplier: number;
}
const DEFAULT_RETRY_OPTIONS: RetryOptions = {
maxRetries: 3,
baseDelay: 1000,
maxDelay: 10000,
backoffMultiplier: 2
};
// Singleton browser instance for reuse
let sharedBrowser: Browser | null = null;
// Helper function to replace page.waitForTimeout
async function waitForTimeout(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
// Dynamic content handling function
async function handleDynamicContent(page: any): Promise<void> {
try {
@ -90,7 +111,7 @@ async function handleDynamicContent(page: any): Promise<void> {
const button = await page.$(selector);
if (button) {
await button.click();
await page.waitForTimeout(2000);
await waitForTimeout(2000);
break;
}
} catch (e) {
@ -117,12 +138,12 @@ async function handleDynamicContent(page: any): Promise<void> {
]);
// Final wait for any remaining dynamic content
await page.waitForTimeout(2000);
await waitForTimeout(2000);
} catch (error) {
console.log('Dynamic content handling failed, using basic timeout:', error);
// If dynamic content handling fails, continue with basic timeout
await page.waitForTimeout(3000);
await waitForTimeout(3000);
}
console.log('Dynamic content handling completed.');
}
@ -141,12 +162,74 @@ async function getBrowser(): Promise<Browser> {
"--disable-gpu",
"--disable-web-security",
"--disable-features=VizDisplayCompositor",
"--disable-background-timer-throttling",
"--disable-backgrounding-occluded-windows",
"--disable-renderer-backgrounding",
"--disable-field-trial-config",
"--disable-ipc-flooding-protection",
"--enable-automation",
"--force-device-scale-factor=1",
"--ignore-certificate-errors",
"--ignore-ssl-errors",
"--ignore-certificate-errors-spki-list",
"--allow-running-insecure-content",
"--disable-extensions",
"--no-default-browser-check",
"--disable-default-apps",
"--disable-sync",
"--metrics-recording-only",
"--no-pings",
"--mute-audio"
],
});
}
return sharedBrowser;
}
// Helper function for exponential backoff retry
async function retryWithBackoff<T>(
operation: () => Promise<T>,
options: RetryOptions = DEFAULT_RETRY_OPTIONS,
attempt: number = 1
): Promise<T> {
try {
return await operation();
} catch (error) {
if (attempt >= options.maxRetries) {
throw error;
}
const isRetryableError = error instanceof Error && (
error.message.includes('ERR_SOCKET_NOT_CONNECTED') ||
error.message.includes('ERR_CONNECTION_REFUSED') ||
error.message.includes('ERR_CONNECTION_RESET') ||
error.message.includes('ERR_NETWORK_CHANGED') ||
error.message.includes('ERR_INTERNET_DISCONNECTED') ||
error.message.includes('ERR_NAME_NOT_RESOLVED') ||
error.message.includes('ERR_TIMED_OUT') ||
error.message.includes('Protocol error') ||
error.message.includes('Navigation timeout') ||
error.message.includes('net::') ||
error.message.includes('Target closed') ||
error.message.includes('Session closed')
);
if (!isRetryableError) {
throw error;
}
const delay = Math.min(
options.baseDelay * Math.pow(options.backoffMultiplier, attempt - 1),
options.maxDelay
);
console.log(`Attempt ${attempt} failed, retrying in ${delay}ms:`, error.message);
await waitForTimeout(delay);
return retryWithBackoff(operation, options, attempt + 1);
}
}
export async function closeBrowser(): Promise<void> {
if (sharedBrowser && sharedBrowser.isConnected()) {
await sharedBrowser.close();
@ -154,49 +237,373 @@ export async function closeBrowser(): Promise<void> {
}
}
export async function extractArticleContent(
url: string,
): Promise<ExtractedContent> {
console.log(`Starting content extraction for: ${url}`);
let page = null;
// Fallback content extraction using fetch + cheerio
async function extractWithFetchFallback(url: string): Promise<ExtractedContent> {
console.log(`Using fetch fallback for: ${url}`);
try {
const browser = await getBrowser();
page = await browser.newPage();
const userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36";
// Set user agent and viewport
await page.setUserAgent(
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
);
await page.setViewport({ width: 1280, height: 720 });
// Set navigation timeout and disable images for faster loading
page.setDefaultNavigationTimeout(45000);
page.setDefaultTimeout(45000);
// Block unnecessary resources to speed up loading
await page.setRequestInterception(true);
page.on('request', (req) => {
const resourceType = req.resourceType();
if (resourceType === 'image' || resourceType === 'media' || resourceType === 'font') {
req.abort();
} else {
req.continue();
}
const response = await fetch(url, {
headers: {
'User-Agent': userAgent,
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate, br',
'DNT': '1',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'Cache-Control': 'no-cache'
},
signal: AbortSignal.timeout(30000) // 30 second timeout
});
// Navigate to the page with better waiting strategy
const response = await page.goto(url, {
waitUntil: "domcontentloaded",
timeout: 45000,
});
if (!response || !response.ok()) {
throw new Error(`HTTP ${response?.status()}: Failed to load page`);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
}
// Enhanced dynamic content handling
console.log('Handling dynamic content...');
await handleDynamicContent(page);
const html = await response.text();
const $ = cheerio.load(html);
// Remove unwanted elements first
const unwantedSelectors = [
"script", "style", "noscript", "iframe", "embed", "object",
"nav", "header", "footer", "aside", "form",
".advertisement", ".ads", ".ad", ".adsbygoogle", "[class*='ad-']", "[id*='ad-']",
".sidebar", ".menu", ".navigation", ".nav", ".breadcrumb",
".social-share", ".share", ".social", ".sns",
".comments", ".comment", ".disqus",
".cookie-banner", ".cookie", ".gdpr",
".popup", ".modal", ".overlay", ".lightbox",
".related", ".recommended", ".more-stories",
".tags", ".categories", ".metadata",
".author-bio", ".author-info",
".newsletter", ".subscribe", ".signup",
"[role='complementary']", "[role='banner']", "[role='contentinfo']",
"[aria-label*='advertisement']", "[aria-label*='sidebar']"
];
unwantedSelectors.forEach((selector) => {
$(selector).remove();
});
// Extract title
let title = "";
const titleSources = [
$('meta[property="og:title"]').attr('content'),
$('meta[name="twitter:title"]').attr('content'),
$('h1').first().text().trim(),
$('.article-title, .post-title, .entry-title').first().text().trim(),
$('title').text().trim(),
$('[itemprop="headline"]').first().text().trim()
];
for (const titleSource of titleSources) {
if (titleSource && titleSource.length > 0) {
title = titleSource;
break;
}
}
// Extract description
let description = "";
const descriptionSources = [
$('meta[property="og:description"]').attr('content'),
$('meta[name="description"]').attr('content'),
$('meta[name="twitter:description"]').attr('content'),
$('[itemprop="description"]').first().text().trim()
];
for (const descSource of descriptionSources) {
if (descSource && descSource.length > 0) {
description = descSource;
break;
}
}
// Content selectors (same as in Puppeteer version)
const contentSelectors = [
'[itemtype*="Article"] [itemprop="articleBody"]',
'[itemtype*="NewsArticle"] [itemprop="articleBody"]',
'[itemtype*="BlogPosting"] [itemprop="articleBody"]',
'article[role="main"]',
'main article',
'[role="main"] article',
'article',
'.post-content', '.entry-content', '.article-content', '.content-area',
'.article-body', '.post-body', '.entry-body', '.story-body',
'.main-content', '.primary-content', '.page-content',
'.news-content', '.blog-content', '.editorial-content',
'.wp-content', '.entry', '.post',
'.section-content', '.postArticle-content', '.post-full-content',
'.markup', '.section--body', '.section-divider + .section-content',
'.honbun', '.main_text', '.article_body', '.news_body',
'.entry_text', '.blog_text', '.content_text',
'.kiji', '.news', '.article',
'main', '[role="main"]',
'#content', '#main', '#article', '#post', '#entry',
'#main-content', '#primary', '#content-area',
'.content', '.main', '.wrapper', '.container'
];
// Function to calculate content quality score
const calculateContentScore = (element: cheerio.Cheerio<any>): number => {
const text = element.text() || '';
if (text.length < 100) return 0;
let score = 0;
// Base score from text length (diminishing returns)
score += Math.min(text.length / 100, 50);
// Paragraph density
const paragraphs = element.find('p');
const avgParagraphLength = paragraphs.length > 0 ?
paragraphs.toArray().reduce((sum, p) => sum + ($(p).text().length || 0), 0) / paragraphs.length : 0;
if (avgParagraphLength > 100) score += 20;
if (paragraphs.length > 3) score += 10;
// Link density penalty
const links = element.find('a');
const linkText = links.toArray().reduce((sum, link) => sum + ($(link).text().length || 0), 0);
const linkDensity = text.length > 0 ? linkText / text.length : 0;
if (linkDensity < 0.2) score += 15;
else if (linkDensity < 0.4) score += 5;
else score -= 10;
// Bonus for article-like structure
if (element.prop('tagName') === 'ARTICLE') score += 25;
if (element.attr('role') === 'main') score += 20;
if (element.find('h1, h2, h3').length > 0) score += 10;
// Bonus for semantic elements
const semanticElements = element.find('p, h1, h2, h3, h4, h5, h6, blockquote, ul, ol');
if (semanticElements.length > 5) score += 15;
// Penalty for navigation-like content
const navWords = ['メニュー', 'ナビ', 'カテゴリ', 'タグ', 'menu', 'navigation', 'nav', 'sidebar'];
const className = (element.attr('class') || '').toLowerCase();
const id = (element.attr('id') || '').toLowerCase();
if (navWords.some(word => className.includes(word) || id.includes(word))) {
score -= 20;
}
return Math.max(score, 0);
};
// Function to clean and normalize text
const cleanText = (text: string): string => {
return text
.replace(/\s+/g, ' ')
.replace(/\n\s*\n\s*\n/g, '\n\n')
.replace(/^\s+|\s+$/g, '')
.replace(/[\u200B-\u200D\uFEFF]/g, '')
.trim();
};
// Collect and score all content candidates
interface ContentCandidate {
element: cheerio.Cheerio<any>;
score: number;
content: string;
selector: string;
}
const candidates: ContentCandidate[] = [];
for (const selector of contentSelectors) {
try {
const elements = $(selector);
elements.each((index, element) => {
const $element = $(element);
const text = $element.text() || '';
if (text.length > 200) {
const score = calculateContentScore($element);
candidates.push({
element: $element,
score,
content: cleanText(text),
selector: `${selector}[${index}]`
});
}
});
} catch (e) {
continue;
}
}
// Sort candidates by score (highest first)
candidates.sort((a, b) => b.score - a.score);
console.log(`Found ${candidates.length} content candidates`);
if (candidates.length > 0) {
console.log(`Best candidate score: ${candidates[0]!.score}, selector: ${candidates[0]!.selector}`);
}
// Get the best content
let content = "";
if (candidates.length > 0) {
content = candidates[0]!.content;
// If the best candidate is still short, try combining top candidates
if (content.length < 500 && candidates.length > 1) {
const topCandidates = candidates.slice(0, 3).filter(c => c.score > 10);
const combinedContent = topCandidates.map(c => c.content).join('\n\n');
if (combinedContent.length > content.length) {
content = cleanText(combinedContent);
}
}
}
// Fallback strategies if still no good content
if (!content || content.length < 200) {
console.log('Using paragraph aggregation fallback...');
const paragraphs = $('p').toArray()
.map(p => $(p).text().trim())
.filter(p => p.length > 50)
.filter(p => {
const lowerP = p.toLowerCase();
return !lowerP.includes('cookie') &&
!lowerP.includes('privacy') &&
!lowerP.includes('terms of service') &&
!lowerP.includes('subscribe') &&
!lowerP.includes('newsletter');
});
if (paragraphs.length > 0) {
content = cleanText(paragraphs.join('\n\n'));
}
}
// Final fallback: structured data
if (!content || content.length < 200) {
console.log('Trying structured data fallback...');
try {
const jsonLd = $('script[type="application/ld+json"]').first().html();
if (jsonLd) {
const data = JSON.parse(jsonLd);
if (data.articleBody) {
content = cleanText(data.articleBody);
} else if (data.text) {
content = cleanText(data.text);
}
}
} catch (e) {
// Ignore JSON parsing errors
}
}
// Limit content length to avoid token limits
const maxLength = 50000;
if (content.length > maxLength) {
content = content.substring(0, maxLength) + "...";
}
console.log(`Fetch fallback extracted content: ${content.length} characters`);
if (!content || content.length < 100) {
return {
title: title || '',
content: '',
description: description || '',
success: false,
error: `Insufficient content extracted via fetch fallback (${content?.length || 0} characters)`,
};
}
return {
title: title || '',
content,
description: description || '',
success: true,
};
} catch (error) {
console.error(`Fetch fallback failed:`, error);
return {
title: '',
content: '',
description: '',
success: false,
error: error instanceof Error ? error.message : 'Unknown error in fetch fallback',
};
}
}
async function extractWithRetry(url: string): Promise<ExtractedContent> {
const userAgents = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0"
];
return retryWithBackoff(async () => {
let page = null;
try {
const browser = await getBrowser();
page = await browser.newPage();
// Randomize user agent to avoid detection
const userAgent = userAgents[Math.floor(Math.random() * userAgents.length)] || userAgents[0];
await page.setUserAgent(userAgent!);
await page.setViewport({ width: 1280, height: 720 });
// Set longer timeout for problematic sites
page.setDefaultNavigationTimeout(60000);
page.setDefaultTimeout(60000);
// Block unnecessary resources to speed up loading
await page.setRequestInterception(true);
page.on('request', (req) => {
const resourceType = req.resourceType();
if (resourceType === 'image' || resourceType === 'media' || resourceType === 'font') {
req.abort();
} else {
req.continue();
}
});
// Add extra headers to appear more like a real browser
await page.setExtraHTTPHeaders({
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate, br',
'DNT': '1',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1'
});
// Navigate with multiple wait strategies
let response;
try {
response = await page.goto(url, {
waitUntil: ["domcontentloaded", "networkidle0"],
timeout: 60000,
});
} catch (networkError) {
// Fallback to more basic wait strategy
const errorMessage = networkError instanceof Error ? networkError.message : 'Unknown error';
console.log('Network idle failed, trying domcontentloaded only:', errorMessage);
response = await page.goto(url, {
waitUntil: "domcontentloaded",
timeout: 60000,
});
}
if (!response) {
throw new Error('No response received from server');
}
const status = response?.status();
if (status && status >= 400) {
throw new Error(`HTTP ${status}: ${response?.statusText() || 'Unknown error'}`);
}
// Enhanced dynamic content handling
console.log('Handling dynamic content...');
await handleDynamicContent(page);
// Extract content using advanced multi-strategy approach
console.log('Extracting content using multi-strategy approach...');
@ -415,13 +822,13 @@ export async function extractArticleContent(
console.log(`Found ${candidates.length} content candidates`);
if (candidates.length > 0) {
console.log(`Best candidate score: ${candidates[0].score}, selector: ${candidates[0].selector}`);
console.log(`Best candidate score: ${candidates[0]!.score}, selector: ${candidates[0]!.selector}`);
}
// Get the best content
let content = "";
if (candidates.length > 0) {
content = candidates[0].content;
content = candidates[0]!.content;
// If the best candidate is still short, try combining top candidates
if (content.length < 500 && candidates.length > 1) {
@ -508,9 +915,9 @@ export async function extractArticleContent(
extractedData.content = fallbackData;
} else {
return {
title: extractedData.title,
title: extractedData.title || '',
content: extractedData.content || "",
description: extractedData.description,
description: extractedData.description || '',
success: false,
error: `Insufficient content extracted (${extractedData.content?.length || 0} characters)`,
};
@ -524,26 +931,97 @@ export async function extractArticleContent(
content = content.substring(0, maxLength) + "...";
}
console.log(`Successfully extracted content: ${content.length} characters`);
return {
title: extractedData.title,
content,
description: extractedData.description,
success: true,
};
console.log(`Successfully extracted content: ${content.length} characters`);
return {
title: extractedData.title,
content,
description: extractedData.description,
success: true,
};
} catch (error) {
console.error(`Content extraction attempt failed:`, error);
throw error; // Let retry logic handle this
} finally {
if (page) {
try {
await page.close();
} catch (closeError) {
console.warn('Failed to close page:', closeError);
}
}
}
});
}
export async function extractArticleContent(
url: string,
): Promise<ExtractedContent> {
console.log(`Starting content extraction for: ${url}`);
try {
return await extractWithRetry(url);
} catch (error) {
console.error(`Content extraction failed for ${url}:`, error);
console.error(`Content extraction failed after all retries for ${url}:`, error);
// Check if this is a Puppeteer launch/browser failure that should trigger fallback
const shouldUseFallback = error instanceof Error && (
error.message.includes('TimeoutError') ||
error.message.includes('Timed out after') ||
error.message.includes('waiting for the WS endpoint URL') ||
error.message.includes('Browser closed') ||
error.message.includes('Target closed') ||
error.message.includes('Session closed') ||
error.message.includes('Protocol error') ||
error.message.includes('Connection terminated') ||
error.message.includes('spawn') || // Process spawn errors
error.message.includes('ECONNRESET') ||
error.message.includes('ECONNREFUSED') ||
error.message.includes('ENOTFOUND')
);
if (shouldUseFallback) {
console.log(`Puppeteer failed, trying fetch fallback for ${url}`);
try {
const fallbackResult = await extractWithFetchFallback(url);
if (fallbackResult.success) {
console.log(`Fetch fallback succeeded for ${url}`);
return fallbackResult;
}
console.log(`Fetch fallback also failed for ${url}:`, fallbackResult.error);
} catch (fallbackError) {
console.error(`Fetch fallback threw error for ${url}:`, fallbackError);
}
}
// Provide more specific error messages
let errorMessage = "Unknown error occurred";
if (error instanceof Error) {
if (error.message.includes('ERR_SOCKET_NOT_CONNECTED')) {
errorMessage = "Network connection failed - server may be unreachable";
} else if (error.message.includes('ERR_CONNECTION_REFUSED')) {
errorMessage = "Connection refused by server";
} else if (error.message.includes('ERR_NAME_NOT_RESOLVED')) {
errorMessage = "DNS resolution failed - domain may not exist";
} else if (error.message.includes('ERR_TIMED_OUT')) {
errorMessage = "Request timed out - server too slow";
} else if (error.message.includes('HTTP 4')) {
errorMessage = `Client error: ${error.message}`;
} else if (error.message.includes('HTTP 5')) {
errorMessage = `Server error: ${error.message}`;
} else if (error.message.includes('TimeoutError')) {
errorMessage = "Puppeteer browser launch timeout - both Puppeteer and fetch fallback failed";
} else {
errorMessage = error.message;
}
}
return {
title: "",
content: "",
description: "",
success: false,
error: error instanceof Error ? error.message : "Unknown error occurred",
error: errorMessage,
};
} finally {
if (page) {
await page.close();
}
}
}

View File

@ -39,7 +39,8 @@ export async function openAI_ClassifyFeed(title: string): Promise<string> {
const response = await openai.chat.completions.create({
model: config.openai.modelName,
messages: [{ role: "user", content: prompt.trim() }],
temperature: 0.3,
temperature: 0.2,
reasoning_effort: "low",
});
const category = response.choices[0]?.message?.content?.trim();
@ -152,6 +153,7 @@ ${articleDetails}
{ role: "user", content: sendContent.trim() },
],
temperature: 0.4,
reasoning_effort: "high",
});
const scriptText = response.choices[0]?.message?.content?.trim();
@ -218,6 +220,7 @@ ${textForClassification}
model: config.openai.modelName,
messages: [{ role: "user", content: prompt.trim() }],
temperature: 0.2,
reasoning_effort: "low",
});
const category = response.choices[0]?.message?.content?.trim();