Compare commits
10 Commits
8ddd5ad103
...
886dc94429
Author | SHA1 | Date | |
---|---|---|---|
886dc94429 | |||
0760909de1 | |||
9349c6e95b | |||
e189618038 | |||
18a3b5312e | |||
4226d6ccd6 | |||
3a37299aed | |||
e66621906e | |||
1235e78ec6 | |||
ca9e18d12f |
111
.github/workflows/ci.yml
vendored
Normal file
111
.github/workflows/ci.yml
vendored
Normal file
@ -0,0 +1,111 @@
|
|||||||
|
name: CI
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [ main, develop ]
|
||||||
|
pull_request:
|
||||||
|
branches: [ main ]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
lint-and-test:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Setup Bun
|
||||||
|
uses: oven-sh/setup-bun@v1
|
||||||
|
with:
|
||||||
|
bun-version: latest
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: bun install
|
||||||
|
|
||||||
|
- name: Install admin panel dependencies
|
||||||
|
run: cd admin-panel && bun install
|
||||||
|
|
||||||
|
- name: Type check
|
||||||
|
run: bunx tsc --noEmit
|
||||||
|
|
||||||
|
- name: Lint
|
||||||
|
run: bun run lint
|
||||||
|
|
||||||
|
- name: Format check
|
||||||
|
run: bun run format:check
|
||||||
|
|
||||||
|
- name: Build frontend
|
||||||
|
run: bun run build:frontend
|
||||||
|
|
||||||
|
- name: Build admin panel
|
||||||
|
run: bun run build:admin
|
||||||
|
|
||||||
|
- name: Test build artifacts
|
||||||
|
run: |
|
||||||
|
ls -la frontend/dist/
|
||||||
|
ls -la admin-panel/dist/
|
||||||
|
echo "✅ Build artifacts created successfully"
|
||||||
|
|
||||||
|
docker-test:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: lint-and-test
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
|
- name: Build Docker image (test only)
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
platforms: linux/amd64
|
||||||
|
push: false
|
||||||
|
tags: voice-rss-summary:test
|
||||||
|
cache-from: type=gha
|
||||||
|
cache-to: type=gha,mode=max
|
||||||
|
|
||||||
|
- name: Test Docker image
|
||||||
|
run: |
|
||||||
|
echo "Testing Docker image functionality..."
|
||||||
|
|
||||||
|
# Create minimal test environment
|
||||||
|
mkdir -p test-data test-public
|
||||||
|
echo "# Test feed" > feed_urls.txt
|
||||||
|
echo "OPENAI_API_KEY=test" > .env
|
||||||
|
echo "VOICEVOX_HOST=http://localhost:50021" >> .env
|
||||||
|
|
||||||
|
# Run container for a short time to test startup
|
||||||
|
docker run --rm --name test-container \
|
||||||
|
-v "$(pwd)/feed_urls.txt:/app/feed_urls.txt:ro" \
|
||||||
|
-v "$(pwd)/.env:/app/.env:ro" \
|
||||||
|
-v "$(pwd)/test-public:/app/public" \
|
||||||
|
-v "$(pwd)/test-data:/app/data" \
|
||||||
|
voice-rss-summary:test \
|
||||||
|
timeout 30 bun run server.ts || true
|
||||||
|
|
||||||
|
echo "✅ Docker image test completed"
|
||||||
|
|
||||||
|
security-scan:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
if: github.event_name == 'pull_request'
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Run Trivy vulnerability scanner
|
||||||
|
uses: aquasecurity/trivy-action@master
|
||||||
|
with:
|
||||||
|
scan-type: 'fs'
|
||||||
|
scan-ref: '.'
|
||||||
|
format: 'sarif'
|
||||||
|
output: 'trivy-results.sarif'
|
||||||
|
|
||||||
|
- name: Upload Trivy scan results to GitHub Security tab
|
||||||
|
uses: github/codeql-action/upload-sarif@v2
|
||||||
|
if: always()
|
||||||
|
with:
|
||||||
|
sarif_file: 'trivy-results.sarif'
|
116
.github/workflows/docker-build.yml
vendored
Normal file
116
.github/workflows/docker-build.yml
vendored
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
name: Build and Publish Docker Images
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [main, develop]
|
||||||
|
tags: ["v*"]
|
||||||
|
pull_request:
|
||||||
|
branches: [main]
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
platforms:
|
||||||
|
description: "Platforms to build (comma-separated)"
|
||||||
|
required: false
|
||||||
|
default: "linux/amd64,linux/arm64"
|
||||||
|
type: string
|
||||||
|
push_to_registry:
|
||||||
|
description: "Push to registry"
|
||||||
|
required: false
|
||||||
|
default: true
|
||||||
|
type: boolean
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read # ソースコードのクローンに必要
|
||||||
|
packages: write # GHCR へイメージをプッシュするのに必要
|
||||||
|
|
||||||
|
env:
|
||||||
|
REGISTRY: ghcr.io
|
||||||
|
IMAGE_NAME_1: ${{ github.repository_owner }}/voice-rss-summary
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
packages: write
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up QEMU
|
||||||
|
uses: docker/setup-qemu-action@v3
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
|
- name: Log in to Container Registry
|
||||||
|
if: github.event_name != 'pull_request'
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
registry: ${{ env.REGISTRY }}
|
||||||
|
username: ${{ github.actor }}
|
||||||
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
- name: Extract metadata
|
||||||
|
id: meta
|
||||||
|
uses: docker/metadata-action@v5
|
||||||
|
with:
|
||||||
|
images: |
|
||||||
|
${{ env.REGISTRY }}/${{ env.IMAGE_NAME_1 }}
|
||||||
|
tags: |
|
||||||
|
type=ref,event=branch
|
||||||
|
type=ref,event=pr
|
||||||
|
type=semver,pattern={{version}}
|
||||||
|
type=semver,pattern={{major}}.{{minor}}
|
||||||
|
type=semver,pattern={{major}}
|
||||||
|
type=raw,value=latest,enable={{is_default_branch}}
|
||||||
|
|
||||||
|
- name: Determine platforms
|
||||||
|
id: platforms
|
||||||
|
run: |
|
||||||
|
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
||||||
|
echo "platforms=${{ github.event.inputs.platforms }}" >> $GITHUB_OUTPUT
|
||||||
|
elif [ "${{ github.event_name }}" = "pull_request" ]; then
|
||||||
|
echo "platforms=linux/amd64" >> $GITHUB_OUTPUT
|
||||||
|
else
|
||||||
|
echo "platforms=linux/amd64,linux/arm64" >> $GITHUB_OUTPUT
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Determine push setting
|
||||||
|
id: push
|
||||||
|
run: |
|
||||||
|
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
||||||
|
echo "push=${{ github.event.inputs.push_to_registry }}" >> $GITHUB_OUTPUT
|
||||||
|
elif [ "${{ github.event_name }}" = "pull_request" ]; then
|
||||||
|
echo "push=false" >> $GITHUB_OUTPUT
|
||||||
|
else
|
||||||
|
echo "push=true" >> $GITHUB_OUTPUT
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Build and push Docker image
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
platforms: ${{ steps.platforms.outputs.platforms }}
|
||||||
|
push: ${{ steps.push.outputs.push }}
|
||||||
|
tags: ${{ steps.meta.outputs.tags }}
|
||||||
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
|
cache-from: type=gha
|
||||||
|
cache-to: type=gha,mode=max
|
||||||
|
build-args: |
|
||||||
|
BUILDKIT_INLINE_CACHE=1
|
||||||
|
|
||||||
|
- name: Generate summary
|
||||||
|
if: always()
|
||||||
|
run: |
|
||||||
|
echo "## Build Summary" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "- **Event**: ${{ github.event_name }}" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "- **Ref**: ${{ github.ref }}" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "- **Platforms**: ${{ steps.platforms.outputs.platforms }}" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "- **Push to registry**: ${{ steps.push.outputs.push }}" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "### Images built:" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo '```' >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "${{ steps.meta.outputs.tags }}" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo '```' >> $GITHUB_STEP_SUMMARY
|
138
.github/workflows/release.yml
vendored
Normal file
138
.github/workflows/release.yml
vendored
Normal file
@ -0,0 +1,138 @@
|
|||||||
|
name: Release
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
tags:
|
||||||
|
- 'v*'
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
tag:
|
||||||
|
description: 'Tag to release'
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
|
||||||
|
env:
|
||||||
|
REGISTRY: ghcr.io
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
create-release:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
packages: read
|
||||||
|
|
||||||
|
outputs:
|
||||||
|
upload_url: ${{ steps.create_release.outputs.upload_url }}
|
||||||
|
release_id: ${{ steps.create_release.outputs.id }}
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Get tag
|
||||||
|
id: get_tag
|
||||||
|
run: |
|
||||||
|
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
||||||
|
echo "tag=${{ github.event.inputs.tag }}" >> $GITHUB_OUTPUT
|
||||||
|
else
|
||||||
|
echo "tag=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Generate changelog
|
||||||
|
id: changelog
|
||||||
|
run: |
|
||||||
|
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
||||||
|
PREVIOUS_TAG=$(git describe --tags --abbrev=0 "${{ github.event.inputs.tag }}^" 2>/dev/null || echo "")
|
||||||
|
else
|
||||||
|
PREVIOUS_TAG=$(git describe --tags --abbrev=0 HEAD^ 2>/dev/null || echo "")
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -n "$PREVIOUS_TAG" ]; then
|
||||||
|
echo "## Changes since $PREVIOUS_TAG" > changelog.md
|
||||||
|
git log --pretty=format:"- %s (%h)" "$PREVIOUS_TAG"..HEAD >> changelog.md
|
||||||
|
else
|
||||||
|
echo "## Initial Release" > changelog.md
|
||||||
|
echo "First release of Voice RSS Summary" >> changelog.md
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "" >> changelog.md
|
||||||
|
echo "## Docker Images" >> changelog.md
|
||||||
|
echo "- \`ghcr.io/${{ github.repository_owner }}/voice-rss-summary:${{ steps.get_tag.outputs.tag }}\`" >> changelog.md
|
||||||
|
echo "- \`ghcr.io/${{ github.repository_owner }}/voicersssummary:${{ steps.get_tag.outputs.tag }}\`" >> changelog.md
|
||||||
|
echo "" >> changelog.md
|
||||||
|
echo "## Usage" >> changelog.md
|
||||||
|
echo "\`\`\`bash" >> changelog.md
|
||||||
|
echo "# Pull and run the latest image" >> changelog.md
|
||||||
|
echo "docker run -p 3000:3000 -p 3001:3001 ghcr.io/${{ github.repository_owner }}/voice-rss-summary:${{ steps.get_tag.outputs.tag }}" >> changelog.md
|
||||||
|
echo "" >> changelog.md
|
||||||
|
echo "# Or clone the repository and run locally" >> changelog.md
|
||||||
|
echo "git clone https://github.com/${{ github.repository }}.git" >> changelog.md
|
||||||
|
echo "cd VoiceRSSSummary" >> changelog.md
|
||||||
|
echo "git checkout ${{ steps.get_tag.outputs.tag }}" >> changelog.md
|
||||||
|
echo "./run-docker.sh container-name ${{ steps.get_tag.outputs.tag }} --from-ghcr" >> changelog.md
|
||||||
|
echo "\`\`\`" >> changelog.md
|
||||||
|
|
||||||
|
- name: Create Release
|
||||||
|
id: create_release
|
||||||
|
uses: actions/create-release@v1
|
||||||
|
env:
|
||||||
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
with:
|
||||||
|
tag_name: ${{ steps.get_tag.outputs.tag }}
|
||||||
|
release_name: Release ${{ steps.get_tag.outputs.tag }}
|
||||||
|
body_path: changelog.md
|
||||||
|
draft: false
|
||||||
|
prerelease: ${{ contains(steps.get_tag.outputs.tag, '-') }}
|
||||||
|
|
||||||
|
wait-for-docker:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: create-release
|
||||||
|
permissions:
|
||||||
|
packages: read
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Get tag
|
||||||
|
id: get_tag
|
||||||
|
run: |
|
||||||
|
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
||||||
|
echo "tag=${{ github.event.inputs.tag }}" >> $GITHUB_OUTPUT
|
||||||
|
else
|
||||||
|
echo "tag=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Wait for Docker images
|
||||||
|
run: |
|
||||||
|
echo "Waiting for Docker images to be available..."
|
||||||
|
TAG="${{ steps.get_tag.outputs.tag }}"
|
||||||
|
|
||||||
|
for i in {1..30}; do
|
||||||
|
echo "Attempt $i: Checking if images are available..."
|
||||||
|
|
||||||
|
if docker manifest inspect ghcr.io/${{ github.repository_owner }}/voice-rss-summary:${TAG} >/dev/null 2>&1; then
|
||||||
|
echo "✅ Docker images are available!"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Images not yet available, waiting 30 seconds..."
|
||||||
|
sleep 30
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "❌ Timeout waiting for Docker images"
|
||||||
|
exit 1
|
||||||
|
|
||||||
|
- name: Test Docker image
|
||||||
|
run: |
|
||||||
|
TAG="${{ steps.get_tag.outputs.tag }}"
|
||||||
|
echo "Testing Docker image: ghcr.io/${{ github.repository_owner }}/voice-rss-summary:${TAG}"
|
||||||
|
|
||||||
|
# Pull the image
|
||||||
|
docker pull ghcr.io/${{ github.repository_owner }}/voice-rss-summary:${TAG}
|
||||||
|
|
||||||
|
# Run a quick test
|
||||||
|
docker run --rm --name test-container \
|
||||||
|
ghcr.io/${{ github.repository_owner }}/voice-rss-summary:${TAG} \
|
||||||
|
timeout 10 bun --version || true
|
||||||
|
|
||||||
|
echo "✅ Docker image test completed"
|
20
Dockerfile
20
Dockerfile
@ -22,18 +22,28 @@ FROM oven/bun:latest AS runtime
|
|||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Install MeCab for English to Katakana conversion
|
# Install MeCab for English to Katakana conversion and Chrome dependencies for Puppeteer
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y mecab mecab-ipadic-utf8 libmecab-dev && \
|
apt-get install -y \
|
||||||
|
mecab mecab-ipadic-utf8 libmecab-dev \
|
||||||
|
wget gnupg ca-certificates \
|
||||||
|
fonts-liberation libappindicator3-1 libasound2 libatk-bridge2.0-0 \
|
||||||
|
libdrm2 libxcomposite1 libxdamage1 libxrandr2 libgbm1 libxss1 \
|
||||||
|
libgtk-3-0 libnspr4 libnss3 libxdamage1 libxfixes3 libxrandr2 \
|
||||||
|
libgconf-2-4 libxss1 libasound2 libxtst6 libatspi2.0-0 libdrm2 \
|
||||||
|
libxcomposite1 libxcursor1 libxi6 libxtst6 xdg-utils lsb-release \
|
||||||
|
libglib2.0-0 libnss3-dev libgconf-2-4 libxrandr2 libasound2-dev \
|
||||||
|
libpangocairo-1.0-0 libatk1.0-dev libcairo-gobject2 libgtk-3-dev \
|
||||||
|
libgdk-pixbuf2.0-dev \
|
||||||
|
--no-install-recommends && \
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
RUN bunx puppeteer browsers install chrome
|
||||||
|
|
||||||
# Copy built application from builder stage
|
# Copy built application from builder stage
|
||||||
COPY --from=builder /app .
|
COPY --from=builder /app .
|
||||||
|
|
||||||
# Create necessary directories with proper permissions
|
|
||||||
RUN mkdir -p data public/podcast_audio
|
|
||||||
|
|
||||||
# Expose ports
|
# Expose ports
|
||||||
EXPOSE 3000 3001
|
EXPOSE 3000 3001
|
||||||
|
|
||||||
|
20
build-amd64.sh
Executable file
20
build-amd64.sh
Executable file
@ -0,0 +1,20 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# Quick script to build AMD64 image and publish to GHCR
|
||||||
|
# Usage: ./build-amd64.sh [tag]
|
||||||
|
|
||||||
|
TAG="${1:-latest}"
|
||||||
|
|
||||||
|
echo "🔨 Building and publishing AMD64 image..."
|
||||||
|
echo "Tag: ${TAG}"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Build and push AMD64 image in one command
|
||||||
|
./publish-docker.sh "${TAG}" --platform=linux/amd64 --build-and-push
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "✅ AMD64 image built and published successfully!"
|
||||||
|
echo ""
|
||||||
|
echo "To run on AMD64 systems:"
|
||||||
|
echo " docker run --platform linux/amd64 -p 3000:3000 -p 3001:3001 ghcr.io/anosatsuk124/voice-rss-summary:${TAG}"
|
@ -2,14 +2,47 @@
|
|||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
# Build Docker image for Voice RSS Summary project
|
# Build Docker image for Voice RSS Summary project
|
||||||
# Usage: ./build-docker-image.sh [tag] [build-args...]
|
# Usage: ./build-docker-image.sh [tag] [--platform=platform] [build-args...]
|
||||||
|
|
||||||
IMAGE_NAME="voice-rss-summary"
|
IMAGE_NAME="voice-rss-summary"
|
||||||
TAG="${1:-latest}"
|
TAG="latest"
|
||||||
|
PLATFORM=""
|
||||||
|
BUILD_ARGS=()
|
||||||
|
|
||||||
|
# Parse arguments
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case $1 in
|
||||||
|
--platform=*)
|
||||||
|
PLATFORM="${1#*=}"
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
--platform)
|
||||||
|
PLATFORM="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
-*)
|
||||||
|
BUILD_ARGS+=("$1")
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
if [[ -z "${TAG_SET:-}" ]]; then
|
||||||
|
TAG="$1"
|
||||||
|
TAG_SET=true
|
||||||
|
else
|
||||||
|
BUILD_ARGS+=("$1")
|
||||||
|
fi
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
FULL_TAG="${IMAGE_NAME}:${TAG}"
|
FULL_TAG="${IMAGE_NAME}:${TAG}"
|
||||||
|
|
||||||
echo "Building Docker image: ${FULL_TAG}"
|
echo "Building Docker image: ${FULL_TAG}"
|
||||||
echo "Build context: $(pwd)"
|
echo "Build context: $(pwd)"
|
||||||
|
if [[ -n "$PLATFORM" ]]; then
|
||||||
|
echo "Target platform: ${PLATFORM}"
|
||||||
|
fi
|
||||||
|
|
||||||
# Check if Dockerfile exists
|
# Check if Dockerfile exists
|
||||||
if [[ ! -f "Dockerfile" ]]; then
|
if [[ ! -f "Dockerfile" ]]; then
|
||||||
@ -18,15 +51,34 @@ if [[ ! -f "Dockerfile" ]]; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
# Build with build cache and progress output
|
# Build with build cache and progress output
|
||||||
exec docker build \
|
DOCKER_CMD=(docker build --tag "${FULL_TAG}" --progress=plain --build-arg BUILDKIT_INLINE_CACHE=1)
|
||||||
--tag "${FULL_TAG}" \
|
|
||||||
--progress=plain \
|
# Add platform if specified
|
||||||
--build-arg BUILDKIT_INLINE_CACHE=1 \
|
if [[ -n "$PLATFORM" ]]; then
|
||||||
"${@:2}" \
|
DOCKER_CMD+=(--platform "$PLATFORM")
|
||||||
.
|
fi
|
||||||
|
|
||||||
|
# Add any additional build args
|
||||||
|
if [[ ${#BUILD_ARGS[@]} -gt 0 ]]; then
|
||||||
|
DOCKER_CMD+=("${BUILD_ARGS[@]}")
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Add build context
|
||||||
|
DOCKER_CMD+=(.)
|
||||||
|
|
||||||
|
echo "Running: ${DOCKER_CMD[*]}"
|
||||||
|
"${DOCKER_CMD[@]}"
|
||||||
|
|
||||||
# Display image info
|
# Display image info
|
||||||
echo "\nBuild completed successfully!"
|
echo ""
|
||||||
|
echo "Build completed successfully!"
|
||||||
echo "Image: ${FULL_TAG}"
|
echo "Image: ${FULL_TAG}"
|
||||||
|
if [[ -n "$PLATFORM" ]]; then
|
||||||
|
echo "Platform: ${PLATFORM}"
|
||||||
|
fi
|
||||||
echo "Size: $(docker images --format 'table {{.Size}}' "${FULL_TAG}" | tail -n +2)"
|
echo "Size: $(docker images --format 'table {{.Size}}' "${FULL_TAG}" | tail -n +2)"
|
||||||
echo "\nTo run the container, use: ./run-docker.sh"
|
echo ""
|
||||||
|
echo "To run the container, use: ./run-docker.sh"
|
||||||
|
if [[ -n "$PLATFORM" && "$PLATFORM" != "linux/amd64" ]]; then
|
||||||
|
echo "Note: Cross-platform image built. May need to push to registry for deployment."
|
||||||
|
fi
|
||||||
|
145
publish-docker.sh
Executable file
145
publish-docker.sh
Executable file
@ -0,0 +1,145 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# Publish Docker image to GitHub Container Registry
|
||||||
|
# Usage: ./publish-docker.sh [tag] [username] [--platform=platform] [--build-and-push]
|
||||||
|
|
||||||
|
GITHUB_USERNAME="anosatsuk124"
|
||||||
|
TAG="latest"
|
||||||
|
PLATFORM=""
|
||||||
|
BUILD_AND_PUSH=false
|
||||||
|
BUILD_ARGS=()
|
||||||
|
|
||||||
|
# Parse arguments
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case $1 in
|
||||||
|
--platform=*)
|
||||||
|
PLATFORM="${1#*=}"
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
--platform)
|
||||||
|
PLATFORM="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--build-and-push)
|
||||||
|
BUILD_AND_PUSH=true
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
--username=*)
|
||||||
|
GITHUB_USERNAME="${1#*=}"
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
-*)
|
||||||
|
BUILD_ARGS+=("$1")
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
if [[ -z "${TAG_SET:-}" ]]; then
|
||||||
|
TAG="$1"
|
||||||
|
TAG_SET=true
|
||||||
|
elif [[ -z "${USERNAME_SET:-}" ]]; then
|
||||||
|
GITHUB_USERNAME="$1"
|
||||||
|
USERNAME_SET=true
|
||||||
|
else
|
||||||
|
BUILD_ARGS+=("$1")
|
||||||
|
fi
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
LOCAL_IMAGE="voice-rss-summary:${TAG}"
|
||||||
|
GHCR_IMAGE_1="ghcr.io/${GITHUB_USERNAME}/voice-rss-summary:${TAG}"
|
||||||
|
GHCR_IMAGE_2="ghcr.io/${GITHUB_USERNAME}/voicersssummary:${TAG}"
|
||||||
|
|
||||||
|
echo "Publishing Docker image to GitHub Container Registry"
|
||||||
|
echo "Local image: ${LOCAL_IMAGE}"
|
||||||
|
echo "GHCR images: ${GHCR_IMAGE_1}, ${GHCR_IMAGE_2}"
|
||||||
|
if [[ -n "$PLATFORM" ]]; then
|
||||||
|
echo "Target platform: ${PLATFORM}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Build image if requested
|
||||||
|
if [[ "$BUILD_AND_PUSH" == "true" ]]; then
|
||||||
|
echo "Building image first..."
|
||||||
|
BUILD_CMD=(./build-docker-image.sh "$TAG")
|
||||||
|
if [[ -n "$PLATFORM" ]]; then
|
||||||
|
BUILD_CMD+=(--platform "$PLATFORM")
|
||||||
|
fi
|
||||||
|
if [[ ${#BUILD_ARGS[@]} -gt 0 ]]; then
|
||||||
|
BUILD_CMD+=("${BUILD_ARGS[@]}")
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Running: ${BUILD_CMD[*]}"
|
||||||
|
if ! "${BUILD_CMD[@]}"; then
|
||||||
|
echo "Error: Failed to build image"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if local image exists
|
||||||
|
if ! docker image inspect "${LOCAL_IMAGE}" >/dev/null 2>&1; then
|
||||||
|
echo "Error: Local Docker image '${LOCAL_IMAGE}' not found"
|
||||||
|
if [[ -n "$PLATFORM" ]]; then
|
||||||
|
echo "Build it first with: ./build-docker-image.sh ${TAG} --platform=${PLATFORM}"
|
||||||
|
else
|
||||||
|
echo "Build it first with: ./build-docker-image.sh ${TAG}"
|
||||||
|
fi
|
||||||
|
echo "Or use --build-and-push flag to build and push in one command"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if user is logged in to GHCR
|
||||||
|
if ! docker system info | grep -q "ghcr.io"; then
|
||||||
|
echo "Checking GHCR authentication..."
|
||||||
|
if ! echo "test" | docker login ghcr.io --username "${GITHUB_USERNAME}" --password-stdin >/dev/null 2>&1; then
|
||||||
|
echo "Please authenticate with GitHub Container Registry first:"
|
||||||
|
echo "1. Create a personal access token with 'write:packages' scope at:"
|
||||||
|
echo " https://github.com/settings/tokens"
|
||||||
|
echo "2. Login with: echo \$GITHUB_TOKEN | docker login ghcr.io -u ${GITHUB_USERNAME} --password-stdin"
|
||||||
|
echo " or: docker login ghcr.io -u ${GITHUB_USERNAME}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Tag images for GHCR
|
||||||
|
echo "Tagging images for GHCR..."
|
||||||
|
docker tag "${LOCAL_IMAGE}" "${GHCR_IMAGE_1}"
|
||||||
|
docker tag "${LOCAL_IMAGE}" "${GHCR_IMAGE_2}"
|
||||||
|
|
||||||
|
# Push to GHCR
|
||||||
|
echo "Pushing ${GHCR_IMAGE_1}..."
|
||||||
|
if docker push "${GHCR_IMAGE_1}"; then
|
||||||
|
echo "✅ Successfully pushed ${GHCR_IMAGE_1}"
|
||||||
|
else
|
||||||
|
echo "❌ Failed to push ${GHCR_IMAGE_1}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Pushing ${GHCR_IMAGE_2}..."
|
||||||
|
if docker push "${GHCR_IMAGE_2}"; then
|
||||||
|
echo "✅ Successfully pushed ${GHCR_IMAGE_2}"
|
||||||
|
else
|
||||||
|
echo "❌ Failed to push ${GHCR_IMAGE_2}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "🎉 Successfully published Docker images to GitHub Container Registry!"
|
||||||
|
echo ""
|
||||||
|
echo "Images available at:"
|
||||||
|
echo " - ${GHCR_IMAGE_1}"
|
||||||
|
echo " - ${GHCR_IMAGE_2}"
|
||||||
|
if [[ -n "$PLATFORM" ]]; then
|
||||||
|
echo "Platform: ${PLATFORM}"
|
||||||
|
fi
|
||||||
|
echo ""
|
||||||
|
echo "To run from GHCR:"
|
||||||
|
if [[ -n "$PLATFORM" && "$PLATFORM" != "$(uname -m)" ]]; then
|
||||||
|
echo " docker run --platform ${PLATFORM} -p 3000:3000 -p 3001:3001 ${GHCR_IMAGE_1}"
|
||||||
|
else
|
||||||
|
echo " docker run -p 3000:3000 -p 3001:3001 ${GHCR_IMAGE_1}"
|
||||||
|
fi
|
||||||
|
echo ""
|
||||||
|
echo "To use with run-docker.sh:"
|
||||||
|
echo " ./run-docker.sh container-name ${TAG} --from-ghcr"
|
@ -2,21 +2,46 @@
|
|||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
# Run Docker container for Voice RSS Summary project
|
# Run Docker container for Voice RSS Summary project
|
||||||
# Usage: ./run-docker.sh [container-name] [image-tag]
|
# Usage: ./run-docker.sh [container-name] [image-tag] [--from-ghcr]
|
||||||
|
|
||||||
|
GITHUB_USERNAME="anosatsuk124"
|
||||||
IMAGE_NAME="voice-rss-summary"
|
IMAGE_NAME="voice-rss-summary"
|
||||||
CONTAINER_NAME="${1:-voice-rss-summary}"
|
CONTAINER_NAME="${1:-voice-rss-summary}"
|
||||||
IMAGE_TAG="${2:-latest}"
|
IMAGE_TAG="${2:-latest}"
|
||||||
FULL_IMAGE="${IMAGE_NAME}:${IMAGE_TAG}"
|
FROM_GHCR=false
|
||||||
|
|
||||||
|
# Check for --from-ghcr flag
|
||||||
|
for arg in "$@"; do
|
||||||
|
if [[ "$arg" == "--from-ghcr" ]]; then
|
||||||
|
FROM_GHCR=true
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ "$FROM_GHCR" == "true" ]]; then
|
||||||
|
FULL_IMAGE="ghcr.io/${GITHUB_USERNAME}/${IMAGE_NAME}:${IMAGE_TAG}"
|
||||||
|
else
|
||||||
|
FULL_IMAGE="${IMAGE_NAME}:${IMAGE_TAG}"
|
||||||
|
fi
|
||||||
|
|
||||||
echo "Starting Docker container: ${CONTAINER_NAME}"
|
echo "Starting Docker container: ${CONTAINER_NAME}"
|
||||||
echo "Using image: ${FULL_IMAGE}"
|
echo "Using image: ${FULL_IMAGE}"
|
||||||
|
|
||||||
# Check if image exists
|
# Check if image exists or pull from GHCR
|
||||||
if ! docker image inspect "${FULL_IMAGE}" >/dev/null 2>&1; then
|
if ! docker image inspect "${FULL_IMAGE}" >/dev/null 2>&1; then
|
||||||
echo "Error: Docker image '${FULL_IMAGE}' not found"
|
if [[ "$FROM_GHCR" == "true" ]]; then
|
||||||
echo "Build it first with: ./build-docker-image.sh"
|
echo "Pulling image from GitHub Container Registry..."
|
||||||
exit 1
|
if ! docker pull "${FULL_IMAGE}"; then
|
||||||
|
echo "Error: Failed to pull Docker image '${FULL_IMAGE}' from GHCR"
|
||||||
|
echo "Make sure the image exists and you have access to it"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "Error: Docker image '${FULL_IMAGE}' not found"
|
||||||
|
echo "Build it first with: ./build-docker-image.sh"
|
||||||
|
echo "Or use --from-ghcr flag to pull from GitHub Container Registry"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Stop and remove existing container if it exists
|
# Stop and remove existing container if it exists
|
||||||
@ -57,10 +82,13 @@ exec docker run \
|
|||||||
--health-retries=3 \
|
--health-retries=3 \
|
||||||
"${FULL_IMAGE}"
|
"${FULL_IMAGE}"
|
||||||
|
|
||||||
echo "\nContainer started successfully!"
|
echo ""
|
||||||
|
echo "Container started successfully!"
|
||||||
echo "Container name: ${CONTAINER_NAME}"
|
echo "Container name: ${CONTAINER_NAME}"
|
||||||
|
echo "Image: ${FULL_IMAGE}"
|
||||||
echo "Web UI: http://localhost:3000"
|
echo "Web UI: http://localhost:3000"
|
||||||
echo "Admin panel: http://localhost:3001"
|
echo "Admin panel: http://localhost:3001"
|
||||||
echo "\nTo view logs: docker logs -f ${CONTAINER_NAME}"
|
echo ""
|
||||||
|
echo "To view logs: docker logs -f ${CONTAINER_NAME}"
|
||||||
echo "To stop: docker stop ${CONTAINER_NAME}"
|
echo "To stop: docker stop ${CONTAINER_NAME}"
|
||||||
echo "To remove: docker rm ${CONTAINER_NAME}"
|
echo "To remove: docker rm ${CONTAINER_NAME}"
|
||||||
|
@ -8,9 +8,28 @@ export interface ExtractedContent {
|
|||||||
error?: string;
|
error?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface RetryOptions {
|
||||||
|
maxRetries: number;
|
||||||
|
baseDelay: number;
|
||||||
|
maxDelay: number;
|
||||||
|
backoffMultiplier: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
const DEFAULT_RETRY_OPTIONS: RetryOptions = {
|
||||||
|
maxRetries: 3,
|
||||||
|
baseDelay: 1000,
|
||||||
|
maxDelay: 10000,
|
||||||
|
backoffMultiplier: 2
|
||||||
|
};
|
||||||
|
|
||||||
// Singleton browser instance for reuse
|
// Singleton browser instance for reuse
|
||||||
let sharedBrowser: Browser | null = null;
|
let sharedBrowser: Browser | null = null;
|
||||||
|
|
||||||
|
// Helper function to replace page.waitForTimeout
|
||||||
|
async function waitForTimeout(ms: number): Promise<void> {
|
||||||
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
// Dynamic content handling function
|
// Dynamic content handling function
|
||||||
async function handleDynamicContent(page: any): Promise<void> {
|
async function handleDynamicContent(page: any): Promise<void> {
|
||||||
try {
|
try {
|
||||||
@ -90,7 +109,7 @@ async function handleDynamicContent(page: any): Promise<void> {
|
|||||||
const button = await page.$(selector);
|
const button = await page.$(selector);
|
||||||
if (button) {
|
if (button) {
|
||||||
await button.click();
|
await button.click();
|
||||||
await page.waitForTimeout(2000);
|
await waitForTimeout(2000);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
@ -117,12 +136,12 @@ async function handleDynamicContent(page: any): Promise<void> {
|
|||||||
]);
|
]);
|
||||||
|
|
||||||
// Final wait for any remaining dynamic content
|
// Final wait for any remaining dynamic content
|
||||||
await page.waitForTimeout(2000);
|
await waitForTimeout(2000);
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.log('Dynamic content handling failed, using basic timeout:', error);
|
console.log('Dynamic content handling failed, using basic timeout:', error);
|
||||||
// If dynamic content handling fails, continue with basic timeout
|
// If dynamic content handling fails, continue with basic timeout
|
||||||
await page.waitForTimeout(3000);
|
await waitForTimeout(3000);
|
||||||
}
|
}
|
||||||
console.log('Dynamic content handling completed.');
|
console.log('Dynamic content handling completed.');
|
||||||
}
|
}
|
||||||
@ -141,12 +160,74 @@ async function getBrowser(): Promise<Browser> {
|
|||||||
"--disable-gpu",
|
"--disable-gpu",
|
||||||
"--disable-web-security",
|
"--disable-web-security",
|
||||||
"--disable-features=VizDisplayCompositor",
|
"--disable-features=VizDisplayCompositor",
|
||||||
|
"--disable-background-timer-throttling",
|
||||||
|
"--disable-backgrounding-occluded-windows",
|
||||||
|
"--disable-renderer-backgrounding",
|
||||||
|
"--disable-field-trial-config",
|
||||||
|
"--disable-ipc-flooding-protection",
|
||||||
|
"--enable-automation",
|
||||||
|
"--force-device-scale-factor=1",
|
||||||
|
"--ignore-certificate-errors",
|
||||||
|
"--ignore-ssl-errors",
|
||||||
|
"--ignore-certificate-errors-spki-list",
|
||||||
|
"--allow-running-insecure-content",
|
||||||
|
"--disable-extensions",
|
||||||
|
"--no-default-browser-check",
|
||||||
|
"--disable-default-apps",
|
||||||
|
"--disable-sync",
|
||||||
|
"--metrics-recording-only",
|
||||||
|
"--no-pings",
|
||||||
|
"--mute-audio"
|
||||||
],
|
],
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
return sharedBrowser;
|
return sharedBrowser;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Helper function for exponential backoff retry
|
||||||
|
async function retryWithBackoff<T>(
|
||||||
|
operation: () => Promise<T>,
|
||||||
|
options: RetryOptions = DEFAULT_RETRY_OPTIONS,
|
||||||
|
attempt: number = 1
|
||||||
|
): Promise<T> {
|
||||||
|
try {
|
||||||
|
return await operation();
|
||||||
|
} catch (error) {
|
||||||
|
if (attempt >= options.maxRetries) {
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
|
||||||
|
const isRetryableError = error instanceof Error && (
|
||||||
|
error.message.includes('ERR_SOCKET_NOT_CONNECTED') ||
|
||||||
|
error.message.includes('ERR_CONNECTION_REFUSED') ||
|
||||||
|
error.message.includes('ERR_CONNECTION_RESET') ||
|
||||||
|
error.message.includes('ERR_NETWORK_CHANGED') ||
|
||||||
|
error.message.includes('ERR_INTERNET_DISCONNECTED') ||
|
||||||
|
error.message.includes('ERR_NAME_NOT_RESOLVED') ||
|
||||||
|
error.message.includes('ERR_TIMED_OUT') ||
|
||||||
|
error.message.includes('Protocol error') ||
|
||||||
|
error.message.includes('Navigation timeout') ||
|
||||||
|
error.message.includes('net::') ||
|
||||||
|
error.message.includes('Target closed') ||
|
||||||
|
error.message.includes('Session closed')
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!isRetryableError) {
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
|
||||||
|
const delay = Math.min(
|
||||||
|
options.baseDelay * Math.pow(options.backoffMultiplier, attempt - 1),
|
||||||
|
options.maxDelay
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log(`Attempt ${attempt} failed, retrying in ${delay}ms:`, error.message);
|
||||||
|
await waitForTimeout(delay);
|
||||||
|
|
||||||
|
return retryWithBackoff(operation, options, attempt + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export async function closeBrowser(): Promise<void> {
|
export async function closeBrowser(): Promise<void> {
|
||||||
if (sharedBrowser && sharedBrowser.isConnected()) {
|
if (sharedBrowser && sharedBrowser.isConnected()) {
|
||||||
await sharedBrowser.close();
|
await sharedBrowser.close();
|
||||||
@ -154,49 +235,79 @@ export async function closeBrowser(): Promise<void> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function extractArticleContent(
|
async function extractWithRetry(url: string): Promise<ExtractedContent> {
|
||||||
url: string,
|
const userAgents = [
|
||||||
): Promise<ExtractedContent> {
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||||
console.log(`Starting content extraction for: ${url}`);
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||||
let page = null;
|
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||||
try {
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0"
|
||||||
const browser = await getBrowser();
|
];
|
||||||
page = await browser.newPage();
|
|
||||||
|
|
||||||
// Set user agent and viewport
|
return retryWithBackoff(async () => {
|
||||||
await page.setUserAgent(
|
let page = null;
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
try {
|
||||||
);
|
const browser = await getBrowser();
|
||||||
await page.setViewport({ width: 1280, height: 720 });
|
page = await browser.newPage();
|
||||||
|
|
||||||
// Set navigation timeout and disable images for faster loading
|
// Randomize user agent to avoid detection
|
||||||
page.setDefaultNavigationTimeout(45000);
|
const userAgent = userAgents[Math.floor(Math.random() * userAgents.length)] || userAgents[0];
|
||||||
page.setDefaultTimeout(45000);
|
await page.setUserAgent(userAgent!);
|
||||||
|
await page.setViewport({ width: 1280, height: 720 });
|
||||||
|
|
||||||
// Block unnecessary resources to speed up loading
|
// Set longer timeout for problematic sites
|
||||||
await page.setRequestInterception(true);
|
page.setDefaultNavigationTimeout(60000);
|
||||||
page.on('request', (req) => {
|
page.setDefaultTimeout(60000);
|
||||||
const resourceType = req.resourceType();
|
|
||||||
if (resourceType === 'image' || resourceType === 'media' || resourceType === 'font') {
|
// Block unnecessary resources to speed up loading
|
||||||
req.abort();
|
await page.setRequestInterception(true);
|
||||||
} else {
|
page.on('request', (req) => {
|
||||||
req.continue();
|
const resourceType = req.resourceType();
|
||||||
|
if (resourceType === 'image' || resourceType === 'media' || resourceType === 'font') {
|
||||||
|
req.abort();
|
||||||
|
} else {
|
||||||
|
req.continue();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Add extra headers to appear more like a real browser
|
||||||
|
await page.setExtraHTTPHeaders({
|
||||||
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
|
||||||
|
'Accept-Language': 'en-US,en;q=0.5',
|
||||||
|
'Accept-Encoding': 'gzip, deflate, br',
|
||||||
|
'DNT': '1',
|
||||||
|
'Connection': 'keep-alive',
|
||||||
|
'Upgrade-Insecure-Requests': '1'
|
||||||
|
});
|
||||||
|
|
||||||
|
// Navigate with multiple wait strategies
|
||||||
|
let response;
|
||||||
|
try {
|
||||||
|
response = await page.goto(url, {
|
||||||
|
waitUntil: ["domcontentloaded", "networkidle0"],
|
||||||
|
timeout: 60000,
|
||||||
|
});
|
||||||
|
} catch (networkError) {
|
||||||
|
// Fallback to more basic wait strategy
|
||||||
|
const errorMessage = networkError instanceof Error ? networkError.message : 'Unknown error';
|
||||||
|
console.log('Network idle failed, trying domcontentloaded only:', errorMessage);
|
||||||
|
response = await page.goto(url, {
|
||||||
|
waitUntil: "domcontentloaded",
|
||||||
|
timeout: 60000,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
});
|
|
||||||
|
|
||||||
// Navigate to the page with better waiting strategy
|
if (!response) {
|
||||||
const response = await page.goto(url, {
|
throw new Error('No response received from server');
|
||||||
waitUntil: "domcontentloaded",
|
}
|
||||||
timeout: 45000,
|
|
||||||
});
|
|
||||||
|
|
||||||
if (!response || !response.ok()) {
|
const status = response?.status();
|
||||||
throw new Error(`HTTP ${response?.status()}: Failed to load page`);
|
if (status && status >= 400) {
|
||||||
}
|
throw new Error(`HTTP ${status}: ${response?.statusText() || 'Unknown error'}`);
|
||||||
|
}
|
||||||
|
|
||||||
// Enhanced dynamic content handling
|
// Enhanced dynamic content handling
|
||||||
console.log('Handling dynamic content...');
|
console.log('Handling dynamic content...');
|
||||||
await handleDynamicContent(page);
|
await handleDynamicContent(page);
|
||||||
|
|
||||||
// Extract content using advanced multi-strategy approach
|
// Extract content using advanced multi-strategy approach
|
||||||
console.log('Extracting content using multi-strategy approach...');
|
console.log('Extracting content using multi-strategy approach...');
|
||||||
@ -415,13 +526,13 @@ export async function extractArticleContent(
|
|||||||
|
|
||||||
console.log(`Found ${candidates.length} content candidates`);
|
console.log(`Found ${candidates.length} content candidates`);
|
||||||
if (candidates.length > 0) {
|
if (candidates.length > 0) {
|
||||||
console.log(`Best candidate score: ${candidates[0].score}, selector: ${candidates[0].selector}`);
|
console.log(`Best candidate score: ${candidates[0]!.score}, selector: ${candidates[0]!.selector}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get the best content
|
// Get the best content
|
||||||
let content = "";
|
let content = "";
|
||||||
if (candidates.length > 0) {
|
if (candidates.length > 0) {
|
||||||
content = candidates[0].content;
|
content = candidates[0]!.content;
|
||||||
|
|
||||||
// If the best candidate is still short, try combining top candidates
|
// If the best candidate is still short, try combining top candidates
|
||||||
if (content.length < 500 && candidates.length > 1) {
|
if (content.length < 500 && candidates.length > 1) {
|
||||||
@ -508,9 +619,9 @@ export async function extractArticleContent(
|
|||||||
extractedData.content = fallbackData;
|
extractedData.content = fallbackData;
|
||||||
} else {
|
} else {
|
||||||
return {
|
return {
|
||||||
title: extractedData.title,
|
title: extractedData.title || '',
|
||||||
content: extractedData.content || "",
|
content: extractedData.content || "",
|
||||||
description: extractedData.description,
|
description: extractedData.description || '',
|
||||||
success: false,
|
success: false,
|
||||||
error: `Insufficient content extracted (${extractedData.content?.length || 0} characters)`,
|
error: `Insufficient content extracted (${extractedData.content?.length || 0} characters)`,
|
||||||
};
|
};
|
||||||
@ -524,26 +635,65 @@ export async function extractArticleContent(
|
|||||||
content = content.substring(0, maxLength) + "...";
|
content = content.substring(0, maxLength) + "...";
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(`Successfully extracted content: ${content.length} characters`);
|
console.log(`Successfully extracted content: ${content.length} characters`);
|
||||||
return {
|
return {
|
||||||
title: extractedData.title,
|
title: extractedData.title,
|
||||||
content,
|
content,
|
||||||
description: extractedData.description,
|
description: extractedData.description,
|
||||||
success: true,
|
success: true,
|
||||||
};
|
};
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`Content extraction attempt failed:`, error);
|
||||||
|
throw error; // Let retry logic handle this
|
||||||
|
} finally {
|
||||||
|
if (page) {
|
||||||
|
try {
|
||||||
|
await page.close();
|
||||||
|
} catch (closeError) {
|
||||||
|
console.warn('Failed to close page:', closeError);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function extractArticleContent(
|
||||||
|
url: string,
|
||||||
|
): Promise<ExtractedContent> {
|
||||||
|
console.log(`Starting content extraction for: ${url}`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
return await extractWithRetry(url);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error(`Content extraction failed for ${url}:`, error);
|
console.error(`Content extraction failed after all retries for ${url}:`, error);
|
||||||
|
|
||||||
|
// Provide more specific error messages
|
||||||
|
let errorMessage = "Unknown error occurred";
|
||||||
|
if (error instanceof Error) {
|
||||||
|
if (error.message.includes('ERR_SOCKET_NOT_CONNECTED')) {
|
||||||
|
errorMessage = "Network connection failed - server may be unreachable";
|
||||||
|
} else if (error.message.includes('ERR_CONNECTION_REFUSED')) {
|
||||||
|
errorMessage = "Connection refused by server";
|
||||||
|
} else if (error.message.includes('ERR_NAME_NOT_RESOLVED')) {
|
||||||
|
errorMessage = "DNS resolution failed - domain may not exist";
|
||||||
|
} else if (error.message.includes('ERR_TIMED_OUT')) {
|
||||||
|
errorMessage = "Request timed out - server too slow";
|
||||||
|
} else if (error.message.includes('HTTP 4')) {
|
||||||
|
errorMessage = `Client error: ${error.message}`;
|
||||||
|
} else if (error.message.includes('HTTP 5')) {
|
||||||
|
errorMessage = `Server error: ${error.message}`;
|
||||||
|
} else {
|
||||||
|
errorMessage = error.message;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
title: "",
|
title: "",
|
||||||
content: "",
|
content: "",
|
||||||
description: "",
|
description: "",
|
||||||
success: false,
|
success: false,
|
||||||
error: error instanceof Error ? error.message : "Unknown error occurred",
|
error: errorMessage,
|
||||||
};
|
};
|
||||||
} finally {
|
|
||||||
if (page) {
|
|
||||||
await page.close();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user