Compare commits

...

10 Commits

Author SHA1 Message Date
886dc94429 Update
Some checks failed
Build and Publish Docker Images / build (push) Failing after 4m16s
CI / security-scan (push) Failing after 12m2s
CI / lint-and-test (push) Failing after 12m8s
CI / docker-test (push) Has been cancelled
2025-06-12 12:20:25 +09:00
0760909de1 Update 2025-06-12 10:03:12 +09:00
9349c6e95b Update docker-build.yml 2025-06-12 09:35:04 +09:00
e189618038 Update 2025-06-12 09:19:16 +09:00
18a3b5312e Add ci 2025-06-12 09:00:16 +09:00
4226d6ccd6 Update 2025-06-12 08:45:27 +09:00
3a37299aed Update 2025-06-12 08:35:37 +09:00
e66621906e Update Dockerfile 2025-06-12 08:19:39 +09:00
1235e78ec6 Fix 2025-06-12 07:53:37 +09:00
ca9e18d12f Add dependency 2025-06-12 07:45:09 +09:00
9 changed files with 850 additions and 80 deletions

111
.github/workflows/ci.yml vendored Normal file
View File

@ -0,0 +1,111 @@
name: CI
on:
push:
branches: [ main, develop ]
pull_request:
branches: [ main ]
jobs:
lint-and-test:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Bun
uses: oven-sh/setup-bun@v1
with:
bun-version: latest
- name: Install dependencies
run: bun install
- name: Install admin panel dependencies
run: cd admin-panel && bun install
- name: Type check
run: bunx tsc --noEmit
- name: Lint
run: bun run lint
- name: Format check
run: bun run format:check
- name: Build frontend
run: bun run build:frontend
- name: Build admin panel
run: bun run build:admin
- name: Test build artifacts
run: |
ls -la frontend/dist/
ls -la admin-panel/dist/
echo "✅ Build artifacts created successfully"
docker-test:
runs-on: ubuntu-latest
needs: lint-and-test
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build Docker image (test only)
uses: docker/build-push-action@v5
with:
context: .
platforms: linux/amd64
push: false
tags: voice-rss-summary:test
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Test Docker image
run: |
echo "Testing Docker image functionality..."
# Create minimal test environment
mkdir -p test-data test-public
echo "# Test feed" > feed_urls.txt
echo "OPENAI_API_KEY=test" > .env
echo "VOICEVOX_HOST=http://localhost:50021" >> .env
# Run container for a short time to test startup
docker run --rm --name test-container \
-v "$(pwd)/feed_urls.txt:/app/feed_urls.txt:ro" \
-v "$(pwd)/.env:/app/.env:ro" \
-v "$(pwd)/test-public:/app/public" \
-v "$(pwd)/test-data:/app/data" \
voice-rss-summary:test \
timeout 30 bun run server.ts || true
echo "✅ Docker image test completed"
security-scan:
runs-on: ubuntu-latest
if: github.event_name == 'pull_request'
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@master
with:
scan-type: 'fs'
scan-ref: '.'
format: 'sarif'
output: 'trivy-results.sarif'
- name: Upload Trivy scan results to GitHub Security tab
uses: github/codeql-action/upload-sarif@v2
if: always()
with:
sarif_file: 'trivy-results.sarif'

116
.github/workflows/docker-build.yml vendored Normal file
View File

@ -0,0 +1,116 @@
name: Build and Publish Docker Images
on:
push:
branches: [main, develop]
tags: ["v*"]
pull_request:
branches: [main]
workflow_dispatch:
inputs:
platforms:
description: "Platforms to build (comma-separated)"
required: false
default: "linux/amd64,linux/arm64"
type: string
push_to_registry:
description: "Push to registry"
required: false
default: true
type: boolean
permissions:
contents: read # ソースコードのクローンに必要
packages: write # GHCR へイメージをプッシュするのに必要
env:
REGISTRY: ghcr.io
IMAGE_NAME_1: ${{ github.repository_owner }}/voice-rss-summary
jobs:
build:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to Container Registry
if: github.event_name != 'pull_request'
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata
id: meta
uses: docker/metadata-action@v5
with:
images: |
${{ env.REGISTRY }}/${{ env.IMAGE_NAME_1 }}
tags: |
type=ref,event=branch
type=ref,event=pr
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=semver,pattern={{major}}
type=raw,value=latest,enable={{is_default_branch}}
- name: Determine platforms
id: platforms
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
echo "platforms=${{ github.event.inputs.platforms }}" >> $GITHUB_OUTPUT
elif [ "${{ github.event_name }}" = "pull_request" ]; then
echo "platforms=linux/amd64" >> $GITHUB_OUTPUT
else
echo "platforms=linux/amd64,linux/arm64" >> $GITHUB_OUTPUT
fi
- name: Determine push setting
id: push
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
echo "push=${{ github.event.inputs.push_to_registry }}" >> $GITHUB_OUTPUT
elif [ "${{ github.event_name }}" = "pull_request" ]; then
echo "push=false" >> $GITHUB_OUTPUT
else
echo "push=true" >> $GITHUB_OUTPUT
fi
- name: Build and push Docker image
uses: docker/build-push-action@v5
with:
context: .
platforms: ${{ steps.platforms.outputs.platforms }}
push: ${{ steps.push.outputs.push }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
build-args: |
BUILDKIT_INLINE_CACHE=1
- name: Generate summary
if: always()
run: |
echo "## Build Summary" >> $GITHUB_STEP_SUMMARY
echo "- **Event**: ${{ github.event_name }}" >> $GITHUB_STEP_SUMMARY
echo "- **Ref**: ${{ github.ref }}" >> $GITHUB_STEP_SUMMARY
echo "- **Platforms**: ${{ steps.platforms.outputs.platforms }}" >> $GITHUB_STEP_SUMMARY
echo "- **Push to registry**: ${{ steps.push.outputs.push }}" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "### Images built:" >> $GITHUB_STEP_SUMMARY
echo '```' >> $GITHUB_STEP_SUMMARY
echo "${{ steps.meta.outputs.tags }}" >> $GITHUB_STEP_SUMMARY
echo '```' >> $GITHUB_STEP_SUMMARY

138
.github/workflows/release.yml vendored Normal file
View File

@ -0,0 +1,138 @@
name: Release
on:
push:
tags:
- 'v*'
workflow_dispatch:
inputs:
tag:
description: 'Tag to release'
required: true
type: string
env:
REGISTRY: ghcr.io
jobs:
create-release:
runs-on: ubuntu-latest
permissions:
contents: write
packages: read
outputs:
upload_url: ${{ steps.create_release.outputs.upload_url }}
release_id: ${{ steps.create_release.outputs.id }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Get tag
id: get_tag
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
echo "tag=${{ github.event.inputs.tag }}" >> $GITHUB_OUTPUT
else
echo "tag=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
fi
- name: Generate changelog
id: changelog
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
PREVIOUS_TAG=$(git describe --tags --abbrev=0 "${{ github.event.inputs.tag }}^" 2>/dev/null || echo "")
else
PREVIOUS_TAG=$(git describe --tags --abbrev=0 HEAD^ 2>/dev/null || echo "")
fi
if [ -n "$PREVIOUS_TAG" ]; then
echo "## Changes since $PREVIOUS_TAG" > changelog.md
git log --pretty=format:"- %s (%h)" "$PREVIOUS_TAG"..HEAD >> changelog.md
else
echo "## Initial Release" > changelog.md
echo "First release of Voice RSS Summary" >> changelog.md
fi
echo "" >> changelog.md
echo "## Docker Images" >> changelog.md
echo "- \`ghcr.io/${{ github.repository_owner }}/voice-rss-summary:${{ steps.get_tag.outputs.tag }}\`" >> changelog.md
echo "- \`ghcr.io/${{ github.repository_owner }}/voicersssummary:${{ steps.get_tag.outputs.tag }}\`" >> changelog.md
echo "" >> changelog.md
echo "## Usage" >> changelog.md
echo "\`\`\`bash" >> changelog.md
echo "# Pull and run the latest image" >> changelog.md
echo "docker run -p 3000:3000 -p 3001:3001 ghcr.io/${{ github.repository_owner }}/voice-rss-summary:${{ steps.get_tag.outputs.tag }}" >> changelog.md
echo "" >> changelog.md
echo "# Or clone the repository and run locally" >> changelog.md
echo "git clone https://github.com/${{ github.repository }}.git" >> changelog.md
echo "cd VoiceRSSSummary" >> changelog.md
echo "git checkout ${{ steps.get_tag.outputs.tag }}" >> changelog.md
echo "./run-docker.sh container-name ${{ steps.get_tag.outputs.tag }} --from-ghcr" >> changelog.md
echo "\`\`\`" >> changelog.md
- name: Create Release
id: create_release
uses: actions/create-release@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
tag_name: ${{ steps.get_tag.outputs.tag }}
release_name: Release ${{ steps.get_tag.outputs.tag }}
body_path: changelog.md
draft: false
prerelease: ${{ contains(steps.get_tag.outputs.tag, '-') }}
wait-for-docker:
runs-on: ubuntu-latest
needs: create-release
permissions:
packages: read
steps:
- name: Get tag
id: get_tag
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
echo "tag=${{ github.event.inputs.tag }}" >> $GITHUB_OUTPUT
else
echo "tag=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
fi
- name: Wait for Docker images
run: |
echo "Waiting for Docker images to be available..."
TAG="${{ steps.get_tag.outputs.tag }}"
for i in {1..30}; do
echo "Attempt $i: Checking if images are available..."
if docker manifest inspect ghcr.io/${{ github.repository_owner }}/voice-rss-summary:${TAG} >/dev/null 2>&1; then
echo "✅ Docker images are available!"
exit 0
fi
echo "Images not yet available, waiting 30 seconds..."
sleep 30
done
echo "❌ Timeout waiting for Docker images"
exit 1
- name: Test Docker image
run: |
TAG="${{ steps.get_tag.outputs.tag }}"
echo "Testing Docker image: ghcr.io/${{ github.repository_owner }}/voice-rss-summary:${TAG}"
# Pull the image
docker pull ghcr.io/${{ github.repository_owner }}/voice-rss-summary:${TAG}
# Run a quick test
docker run --rm --name test-container \
ghcr.io/${{ github.repository_owner }}/voice-rss-summary:${TAG} \
timeout 10 bun --version || true
echo "✅ Docker image test completed"

View File

@ -22,18 +22,28 @@ FROM oven/bun:latest AS runtime
WORKDIR /app
# Install MeCab for English to Katakana conversion
# Install MeCab for English to Katakana conversion and Chrome dependencies for Puppeteer
RUN apt-get update && \
apt-get install -y mecab mecab-ipadic-utf8 libmecab-dev && \
apt-get install -y \
mecab mecab-ipadic-utf8 libmecab-dev \
wget gnupg ca-certificates \
fonts-liberation libappindicator3-1 libasound2 libatk-bridge2.0-0 \
libdrm2 libxcomposite1 libxdamage1 libxrandr2 libgbm1 libxss1 \
libgtk-3-0 libnspr4 libnss3 libxdamage1 libxfixes3 libxrandr2 \
libgconf-2-4 libxss1 libasound2 libxtst6 libatspi2.0-0 libdrm2 \
libxcomposite1 libxcursor1 libxi6 libxtst6 xdg-utils lsb-release \
libglib2.0-0 libnss3-dev libgconf-2-4 libxrandr2 libasound2-dev \
libpangocairo-1.0-0 libatk1.0-dev libcairo-gobject2 libgtk-3-dev \
libgdk-pixbuf2.0-dev \
--no-install-recommends && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
RUN bunx puppeteer browsers install chrome
# Copy built application from builder stage
COPY --from=builder /app .
# Create necessary directories with proper permissions
RUN mkdir -p data public/podcast_audio
# Expose ports
EXPOSE 3000 3001

20
build-amd64.sh Executable file
View File

@ -0,0 +1,20 @@
#!/bin/bash
set -euo pipefail
# Quick script to build AMD64 image and publish to GHCR
# Usage: ./build-amd64.sh [tag]
TAG="${1:-latest}"
echo "🔨 Building and publishing AMD64 image..."
echo "Tag: ${TAG}"
echo ""
# Build and push AMD64 image in one command
./publish-docker.sh "${TAG}" --platform=linux/amd64 --build-and-push
echo ""
echo "✅ AMD64 image built and published successfully!"
echo ""
echo "To run on AMD64 systems:"
echo " docker run --platform linux/amd64 -p 3000:3000 -p 3001:3001 ghcr.io/anosatsuk124/voice-rss-summary:${TAG}"

View File

@ -2,14 +2,47 @@
set -euo pipefail
# Build Docker image for Voice RSS Summary project
# Usage: ./build-docker-image.sh [tag] [build-args...]
# Usage: ./build-docker-image.sh [tag] [--platform=platform] [build-args...]
IMAGE_NAME="voice-rss-summary"
TAG="${1:-latest}"
TAG="latest"
PLATFORM=""
BUILD_ARGS=()
# Parse arguments
while [[ $# -gt 0 ]]; do
case $1 in
--platform=*)
PLATFORM="${1#*=}"
shift
;;
--platform)
PLATFORM="$2"
shift 2
;;
-*)
BUILD_ARGS+=("$1")
shift
;;
*)
if [[ -z "${TAG_SET:-}" ]]; then
TAG="$1"
TAG_SET=true
else
BUILD_ARGS+=("$1")
fi
shift
;;
esac
done
FULL_TAG="${IMAGE_NAME}:${TAG}"
echo "Building Docker image: ${FULL_TAG}"
echo "Build context: $(pwd)"
if [[ -n "$PLATFORM" ]]; then
echo "Target platform: ${PLATFORM}"
fi
# Check if Dockerfile exists
if [[ ! -f "Dockerfile" ]]; then
@ -18,15 +51,34 @@ if [[ ! -f "Dockerfile" ]]; then
fi
# Build with build cache and progress output
exec docker build \
--tag "${FULL_TAG}" \
--progress=plain \
--build-arg BUILDKIT_INLINE_CACHE=1 \
"${@:2}" \
.
DOCKER_CMD=(docker build --tag "${FULL_TAG}" --progress=plain --build-arg BUILDKIT_INLINE_CACHE=1)
# Add platform if specified
if [[ -n "$PLATFORM" ]]; then
DOCKER_CMD+=(--platform "$PLATFORM")
fi
# Add any additional build args
if [[ ${#BUILD_ARGS[@]} -gt 0 ]]; then
DOCKER_CMD+=("${BUILD_ARGS[@]}")
fi
# Add build context
DOCKER_CMD+=(.)
echo "Running: ${DOCKER_CMD[*]}"
"${DOCKER_CMD[@]}"
# Display image info
echo "\nBuild completed successfully!"
echo ""
echo "Build completed successfully!"
echo "Image: ${FULL_TAG}"
if [[ -n "$PLATFORM" ]]; then
echo "Platform: ${PLATFORM}"
fi
echo "Size: $(docker images --format 'table {{.Size}}' "${FULL_TAG}" | tail -n +2)"
echo "\nTo run the container, use: ./run-docker.sh"
echo ""
echo "To run the container, use: ./run-docker.sh"
if [[ -n "$PLATFORM" && "$PLATFORM" != "linux/amd64" ]]; then
echo "Note: Cross-platform image built. May need to push to registry for deployment."
fi

145
publish-docker.sh Executable file
View File

@ -0,0 +1,145 @@
#!/bin/bash
set -euo pipefail
# Publish Docker image to GitHub Container Registry
# Usage: ./publish-docker.sh [tag] [username] [--platform=platform] [--build-and-push]
GITHUB_USERNAME="anosatsuk124"
TAG="latest"
PLATFORM=""
BUILD_AND_PUSH=false
BUILD_ARGS=()
# Parse arguments
while [[ $# -gt 0 ]]; do
case $1 in
--platform=*)
PLATFORM="${1#*=}"
shift
;;
--platform)
PLATFORM="$2"
shift 2
;;
--build-and-push)
BUILD_AND_PUSH=true
shift
;;
--username=*)
GITHUB_USERNAME="${1#*=}"
shift
;;
-*)
BUILD_ARGS+=("$1")
shift
;;
*)
if [[ -z "${TAG_SET:-}" ]]; then
TAG="$1"
TAG_SET=true
elif [[ -z "${USERNAME_SET:-}" ]]; then
GITHUB_USERNAME="$1"
USERNAME_SET=true
else
BUILD_ARGS+=("$1")
fi
shift
;;
esac
done
LOCAL_IMAGE="voice-rss-summary:${TAG}"
GHCR_IMAGE_1="ghcr.io/${GITHUB_USERNAME}/voice-rss-summary:${TAG}"
GHCR_IMAGE_2="ghcr.io/${GITHUB_USERNAME}/voicersssummary:${TAG}"
echo "Publishing Docker image to GitHub Container Registry"
echo "Local image: ${LOCAL_IMAGE}"
echo "GHCR images: ${GHCR_IMAGE_1}, ${GHCR_IMAGE_2}"
if [[ -n "$PLATFORM" ]]; then
echo "Target platform: ${PLATFORM}"
fi
# Build image if requested
if [[ "$BUILD_AND_PUSH" == "true" ]]; then
echo "Building image first..."
BUILD_CMD=(./build-docker-image.sh "$TAG")
if [[ -n "$PLATFORM" ]]; then
BUILD_CMD+=(--platform "$PLATFORM")
fi
if [[ ${#BUILD_ARGS[@]} -gt 0 ]]; then
BUILD_CMD+=("${BUILD_ARGS[@]}")
fi
echo "Running: ${BUILD_CMD[*]}"
if ! "${BUILD_CMD[@]}"; then
echo "Error: Failed to build image"
exit 1
fi
fi
# Check if local image exists
if ! docker image inspect "${LOCAL_IMAGE}" >/dev/null 2>&1; then
echo "Error: Local Docker image '${LOCAL_IMAGE}' not found"
if [[ -n "$PLATFORM" ]]; then
echo "Build it first with: ./build-docker-image.sh ${TAG} --platform=${PLATFORM}"
else
echo "Build it first with: ./build-docker-image.sh ${TAG}"
fi
echo "Or use --build-and-push flag to build and push in one command"
exit 1
fi
# Check if user is logged in to GHCR
if ! docker system info | grep -q "ghcr.io"; then
echo "Checking GHCR authentication..."
if ! echo "test" | docker login ghcr.io --username "${GITHUB_USERNAME}" --password-stdin >/dev/null 2>&1; then
echo "Please authenticate with GitHub Container Registry first:"
echo "1. Create a personal access token with 'write:packages' scope at:"
echo " https://github.com/settings/tokens"
echo "2. Login with: echo \$GITHUB_TOKEN | docker login ghcr.io -u ${GITHUB_USERNAME} --password-stdin"
echo " or: docker login ghcr.io -u ${GITHUB_USERNAME}"
exit 1
fi
fi
# Tag images for GHCR
echo "Tagging images for GHCR..."
docker tag "${LOCAL_IMAGE}" "${GHCR_IMAGE_1}"
docker tag "${LOCAL_IMAGE}" "${GHCR_IMAGE_2}"
# Push to GHCR
echo "Pushing ${GHCR_IMAGE_1}..."
if docker push "${GHCR_IMAGE_1}"; then
echo "✅ Successfully pushed ${GHCR_IMAGE_1}"
else
echo "❌ Failed to push ${GHCR_IMAGE_1}"
exit 1
fi
echo "Pushing ${GHCR_IMAGE_2}..."
if docker push "${GHCR_IMAGE_2}"; then
echo "✅ Successfully pushed ${GHCR_IMAGE_2}"
else
echo "❌ Failed to push ${GHCR_IMAGE_2}"
exit 1
fi
echo ""
echo "🎉 Successfully published Docker images to GitHub Container Registry!"
echo ""
echo "Images available at:"
echo " - ${GHCR_IMAGE_1}"
echo " - ${GHCR_IMAGE_2}"
if [[ -n "$PLATFORM" ]]; then
echo "Platform: ${PLATFORM}"
fi
echo ""
echo "To run from GHCR:"
if [[ -n "$PLATFORM" && "$PLATFORM" != "$(uname -m)" ]]; then
echo " docker run --platform ${PLATFORM} -p 3000:3000 -p 3001:3001 ${GHCR_IMAGE_1}"
else
echo " docker run -p 3000:3000 -p 3001:3001 ${GHCR_IMAGE_1}"
fi
echo ""
echo "To use with run-docker.sh:"
echo " ./run-docker.sh container-name ${TAG} --from-ghcr"

View File

@ -2,21 +2,46 @@
set -euo pipefail
# Run Docker container for Voice RSS Summary project
# Usage: ./run-docker.sh [container-name] [image-tag]
# Usage: ./run-docker.sh [container-name] [image-tag] [--from-ghcr]
GITHUB_USERNAME="anosatsuk124"
IMAGE_NAME="voice-rss-summary"
CONTAINER_NAME="${1:-voice-rss-summary}"
IMAGE_TAG="${2:-latest}"
FULL_IMAGE="${IMAGE_NAME}:${IMAGE_TAG}"
FROM_GHCR=false
# Check for --from-ghcr flag
for arg in "$@"; do
if [[ "$arg" == "--from-ghcr" ]]; then
FROM_GHCR=true
break
fi
done
if [[ "$FROM_GHCR" == "true" ]]; then
FULL_IMAGE="ghcr.io/${GITHUB_USERNAME}/${IMAGE_NAME}:${IMAGE_TAG}"
else
FULL_IMAGE="${IMAGE_NAME}:${IMAGE_TAG}"
fi
echo "Starting Docker container: ${CONTAINER_NAME}"
echo "Using image: ${FULL_IMAGE}"
# Check if image exists
# Check if image exists or pull from GHCR
if ! docker image inspect "${FULL_IMAGE}" >/dev/null 2>&1; then
echo "Error: Docker image '${FULL_IMAGE}' not found"
echo "Build it first with: ./build-docker-image.sh"
exit 1
if [[ "$FROM_GHCR" == "true" ]]; then
echo "Pulling image from GitHub Container Registry..."
if ! docker pull "${FULL_IMAGE}"; then
echo "Error: Failed to pull Docker image '${FULL_IMAGE}' from GHCR"
echo "Make sure the image exists and you have access to it"
exit 1
fi
else
echo "Error: Docker image '${FULL_IMAGE}' not found"
echo "Build it first with: ./build-docker-image.sh"
echo "Or use --from-ghcr flag to pull from GitHub Container Registry"
exit 1
fi
fi
# Stop and remove existing container if it exists
@ -57,10 +82,13 @@ exec docker run \
--health-retries=3 \
"${FULL_IMAGE}"
echo "\nContainer started successfully!"
echo ""
echo "Container started successfully!"
echo "Container name: ${CONTAINER_NAME}"
echo "Image: ${FULL_IMAGE}"
echo "Web UI: http://localhost:3000"
echo "Admin panel: http://localhost:3001"
echo "\nTo view logs: docker logs -f ${CONTAINER_NAME}"
echo ""
echo "To view logs: docker logs -f ${CONTAINER_NAME}"
echo "To stop: docker stop ${CONTAINER_NAME}"
echo "To remove: docker rm ${CONTAINER_NAME}"

View File

@ -8,9 +8,28 @@ export interface ExtractedContent {
error?: string;
}
interface RetryOptions {
maxRetries: number;
baseDelay: number;
maxDelay: number;
backoffMultiplier: number;
}
const DEFAULT_RETRY_OPTIONS: RetryOptions = {
maxRetries: 3,
baseDelay: 1000,
maxDelay: 10000,
backoffMultiplier: 2
};
// Singleton browser instance for reuse
let sharedBrowser: Browser | null = null;
// Helper function to replace page.waitForTimeout
async function waitForTimeout(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
// Dynamic content handling function
async function handleDynamicContent(page: any): Promise<void> {
try {
@ -90,7 +109,7 @@ async function handleDynamicContent(page: any): Promise<void> {
const button = await page.$(selector);
if (button) {
await button.click();
await page.waitForTimeout(2000);
await waitForTimeout(2000);
break;
}
} catch (e) {
@ -117,12 +136,12 @@ async function handleDynamicContent(page: any): Promise<void> {
]);
// Final wait for any remaining dynamic content
await page.waitForTimeout(2000);
await waitForTimeout(2000);
} catch (error) {
console.log('Dynamic content handling failed, using basic timeout:', error);
// If dynamic content handling fails, continue with basic timeout
await page.waitForTimeout(3000);
await waitForTimeout(3000);
}
console.log('Dynamic content handling completed.');
}
@ -141,12 +160,74 @@ async function getBrowser(): Promise<Browser> {
"--disable-gpu",
"--disable-web-security",
"--disable-features=VizDisplayCompositor",
"--disable-background-timer-throttling",
"--disable-backgrounding-occluded-windows",
"--disable-renderer-backgrounding",
"--disable-field-trial-config",
"--disable-ipc-flooding-protection",
"--enable-automation",
"--force-device-scale-factor=1",
"--ignore-certificate-errors",
"--ignore-ssl-errors",
"--ignore-certificate-errors-spki-list",
"--allow-running-insecure-content",
"--disable-extensions",
"--no-default-browser-check",
"--disable-default-apps",
"--disable-sync",
"--metrics-recording-only",
"--no-pings",
"--mute-audio"
],
});
}
return sharedBrowser;
}
// Helper function for exponential backoff retry
async function retryWithBackoff<T>(
operation: () => Promise<T>,
options: RetryOptions = DEFAULT_RETRY_OPTIONS,
attempt: number = 1
): Promise<T> {
try {
return await operation();
} catch (error) {
if (attempt >= options.maxRetries) {
throw error;
}
const isRetryableError = error instanceof Error && (
error.message.includes('ERR_SOCKET_NOT_CONNECTED') ||
error.message.includes('ERR_CONNECTION_REFUSED') ||
error.message.includes('ERR_CONNECTION_RESET') ||
error.message.includes('ERR_NETWORK_CHANGED') ||
error.message.includes('ERR_INTERNET_DISCONNECTED') ||
error.message.includes('ERR_NAME_NOT_RESOLVED') ||
error.message.includes('ERR_TIMED_OUT') ||
error.message.includes('Protocol error') ||
error.message.includes('Navigation timeout') ||
error.message.includes('net::') ||
error.message.includes('Target closed') ||
error.message.includes('Session closed')
);
if (!isRetryableError) {
throw error;
}
const delay = Math.min(
options.baseDelay * Math.pow(options.backoffMultiplier, attempt - 1),
options.maxDelay
);
console.log(`Attempt ${attempt} failed, retrying in ${delay}ms:`, error.message);
await waitForTimeout(delay);
return retryWithBackoff(operation, options, attempt + 1);
}
}
export async function closeBrowser(): Promise<void> {
if (sharedBrowser && sharedBrowser.isConnected()) {
await sharedBrowser.close();
@ -154,49 +235,79 @@ export async function closeBrowser(): Promise<void> {
}
}
export async function extractArticleContent(
url: string,
): Promise<ExtractedContent> {
console.log(`Starting content extraction for: ${url}`);
let page = null;
try {
const browser = await getBrowser();
page = await browser.newPage();
async function extractWithRetry(url: string): Promise<ExtractedContent> {
const userAgents = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0"
];
// Set user agent and viewport
await page.setUserAgent(
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
);
await page.setViewport({ width: 1280, height: 720 });
return retryWithBackoff(async () => {
let page = null;
try {
const browser = await getBrowser();
page = await browser.newPage();
// Set navigation timeout and disable images for faster loading
page.setDefaultNavigationTimeout(45000);
page.setDefaultTimeout(45000);
// Block unnecessary resources to speed up loading
await page.setRequestInterception(true);
page.on('request', (req) => {
const resourceType = req.resourceType();
if (resourceType === 'image' || resourceType === 'media' || resourceType === 'font') {
req.abort();
} else {
req.continue();
// Randomize user agent to avoid detection
const userAgent = userAgents[Math.floor(Math.random() * userAgents.length)] || userAgents[0];
await page.setUserAgent(userAgent!);
await page.setViewport({ width: 1280, height: 720 });
// Set longer timeout for problematic sites
page.setDefaultNavigationTimeout(60000);
page.setDefaultTimeout(60000);
// Block unnecessary resources to speed up loading
await page.setRequestInterception(true);
page.on('request', (req) => {
const resourceType = req.resourceType();
if (resourceType === 'image' || resourceType === 'media' || resourceType === 'font') {
req.abort();
} else {
req.continue();
}
});
// Add extra headers to appear more like a real browser
await page.setExtraHTTPHeaders({
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate, br',
'DNT': '1',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1'
});
// Navigate with multiple wait strategies
let response;
try {
response = await page.goto(url, {
waitUntil: ["domcontentloaded", "networkidle0"],
timeout: 60000,
});
} catch (networkError) {
// Fallback to more basic wait strategy
const errorMessage = networkError instanceof Error ? networkError.message : 'Unknown error';
console.log('Network idle failed, trying domcontentloaded only:', errorMessage);
response = await page.goto(url, {
waitUntil: "domcontentloaded",
timeout: 60000,
});
}
});
// Navigate to the page with better waiting strategy
const response = await page.goto(url, {
waitUntil: "domcontentloaded",
timeout: 45000,
});
if (!response) {
throw new Error('No response received from server');
}
if (!response || !response.ok()) {
throw new Error(`HTTP ${response?.status()}: Failed to load page`);
}
const status = response?.status();
if (status && status >= 400) {
throw new Error(`HTTP ${status}: ${response?.statusText() || 'Unknown error'}`);
}
// Enhanced dynamic content handling
console.log('Handling dynamic content...');
await handleDynamicContent(page);
// Enhanced dynamic content handling
console.log('Handling dynamic content...');
await handleDynamicContent(page);
// Extract content using advanced multi-strategy approach
console.log('Extracting content using multi-strategy approach...');
@ -415,13 +526,13 @@ export async function extractArticleContent(
console.log(`Found ${candidates.length} content candidates`);
if (candidates.length > 0) {
console.log(`Best candidate score: ${candidates[0].score}, selector: ${candidates[0].selector}`);
console.log(`Best candidate score: ${candidates[0]!.score}, selector: ${candidates[0]!.selector}`);
}
// Get the best content
let content = "";
if (candidates.length > 0) {
content = candidates[0].content;
content = candidates[0]!.content;
// If the best candidate is still short, try combining top candidates
if (content.length < 500 && candidates.length > 1) {
@ -508,9 +619,9 @@ export async function extractArticleContent(
extractedData.content = fallbackData;
} else {
return {
title: extractedData.title,
title: extractedData.title || '',
content: extractedData.content || "",
description: extractedData.description,
description: extractedData.description || '',
success: false,
error: `Insufficient content extracted (${extractedData.content?.length || 0} characters)`,
};
@ -524,26 +635,65 @@ export async function extractArticleContent(
content = content.substring(0, maxLength) + "...";
}
console.log(`Successfully extracted content: ${content.length} characters`);
return {
title: extractedData.title,
content,
description: extractedData.description,
success: true,
};
console.log(`Successfully extracted content: ${content.length} characters`);
return {
title: extractedData.title,
content,
description: extractedData.description,
success: true,
};
} catch (error) {
console.error(`Content extraction attempt failed:`, error);
throw error; // Let retry logic handle this
} finally {
if (page) {
try {
await page.close();
} catch (closeError) {
console.warn('Failed to close page:', closeError);
}
}
}
});
}
export async function extractArticleContent(
url: string,
): Promise<ExtractedContent> {
console.log(`Starting content extraction for: ${url}`);
try {
return await extractWithRetry(url);
} catch (error) {
console.error(`Content extraction failed for ${url}:`, error);
console.error(`Content extraction failed after all retries for ${url}:`, error);
// Provide more specific error messages
let errorMessage = "Unknown error occurred";
if (error instanceof Error) {
if (error.message.includes('ERR_SOCKET_NOT_CONNECTED')) {
errorMessage = "Network connection failed - server may be unreachable";
} else if (error.message.includes('ERR_CONNECTION_REFUSED')) {
errorMessage = "Connection refused by server";
} else if (error.message.includes('ERR_NAME_NOT_RESOLVED')) {
errorMessage = "DNS resolution failed - domain may not exist";
} else if (error.message.includes('ERR_TIMED_OUT')) {
errorMessage = "Request timed out - server too slow";
} else if (error.message.includes('HTTP 4')) {
errorMessage = `Client error: ${error.message}`;
} else if (error.message.includes('HTTP 5')) {
errorMessage = `Server error: ${error.message}`;
} else {
errorMessage = error.message;
}
}
return {
title: "",
content: "",
description: "",
success: false,
error: error instanceof Error ? error.message : "Unknown error occurred",
error: errorMessage,
};
} finally {
if (page) {
await page.close();
}
}
}