Fix
This commit is contained in:
		@@ -1,4 +1,6 @@
 | 
				
			|||||||
import puppeteer, { type Browser } from "puppeteer";
 | 
					import puppeteer, { type Browser } from "puppeteer";
 | 
				
			||||||
 | 
					import * as cheerio from "cheerio";
 | 
				
			||||||
 | 
					import type { CheerioAPI } from "cheerio";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
export interface ExtractedContent {
 | 
					export interface ExtractedContent {
 | 
				
			||||||
  title?: string;
 | 
					  title?: string;
 | 
				
			||||||
@@ -235,6 +237,300 @@ export async function closeBrowser(): Promise<void> {
 | 
				
			|||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Fallback content extraction using fetch + cheerio
 | 
				
			||||||
 | 
					async function extractWithFetchFallback(url: string): Promise<ExtractedContent> {
 | 
				
			||||||
 | 
					  console.log(`Using fetch fallback for: ${url}`);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  try {
 | 
				
			||||||
 | 
					    const userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36";
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    const response = await fetch(url, {
 | 
				
			||||||
 | 
					      headers: {
 | 
				
			||||||
 | 
					        'User-Agent': userAgent,
 | 
				
			||||||
 | 
					        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
 | 
				
			||||||
 | 
					        'Accept-Language': 'en-US,en;q=0.5',
 | 
				
			||||||
 | 
					        'Accept-Encoding': 'gzip, deflate, br',
 | 
				
			||||||
 | 
					        'DNT': '1',
 | 
				
			||||||
 | 
					        'Connection': 'keep-alive',
 | 
				
			||||||
 | 
					        'Upgrade-Insecure-Requests': '1',
 | 
				
			||||||
 | 
					        'Cache-Control': 'no-cache'
 | 
				
			||||||
 | 
					      },
 | 
				
			||||||
 | 
					      signal: AbortSignal.timeout(30000) // 30 second timeout
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (!response.ok) {
 | 
				
			||||||
 | 
					      throw new Error(`HTTP ${response.status}: ${response.statusText}`);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const html = await response.text();
 | 
				
			||||||
 | 
					    const $ = cheerio.load(html);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Remove unwanted elements first
 | 
				
			||||||
 | 
					    const unwantedSelectors = [
 | 
				
			||||||
 | 
					      "script", "style", "noscript", "iframe", "embed", "object",
 | 
				
			||||||
 | 
					      "nav", "header", "footer", "aside", "form",
 | 
				
			||||||
 | 
					      ".advertisement", ".ads", ".ad", ".adsbygoogle", "[class*='ad-']", "[id*='ad-']",
 | 
				
			||||||
 | 
					      ".sidebar", ".menu", ".navigation", ".nav", ".breadcrumb",
 | 
				
			||||||
 | 
					      ".social-share", ".share", ".social", ".sns",
 | 
				
			||||||
 | 
					      ".comments", ".comment", ".disqus",
 | 
				
			||||||
 | 
					      ".cookie-banner", ".cookie", ".gdpr",
 | 
				
			||||||
 | 
					      ".popup", ".modal", ".overlay", ".lightbox",
 | 
				
			||||||
 | 
					      ".related", ".recommended", ".more-stories",
 | 
				
			||||||
 | 
					      ".tags", ".categories", ".metadata",
 | 
				
			||||||
 | 
					      ".author-bio", ".author-info",
 | 
				
			||||||
 | 
					      ".newsletter", ".subscribe", ".signup",
 | 
				
			||||||
 | 
					      "[role='complementary']", "[role='banner']", "[role='contentinfo']",
 | 
				
			||||||
 | 
					      "[aria-label*='advertisement']", "[aria-label*='sidebar']"
 | 
				
			||||||
 | 
					    ];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    unwantedSelectors.forEach((selector) => {
 | 
				
			||||||
 | 
					      $(selector).remove();
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Extract title
 | 
				
			||||||
 | 
					    let title = "";
 | 
				
			||||||
 | 
					    const titleSources = [
 | 
				
			||||||
 | 
					      $('meta[property="og:title"]').attr('content'),
 | 
				
			||||||
 | 
					      $('meta[name="twitter:title"]').attr('content'),
 | 
				
			||||||
 | 
					      $('h1').first().text().trim(),
 | 
				
			||||||
 | 
					      $('.article-title, .post-title, .entry-title').first().text().trim(),
 | 
				
			||||||
 | 
					      $('title').text().trim(),
 | 
				
			||||||
 | 
					      $('[itemprop="headline"]').first().text().trim()
 | 
				
			||||||
 | 
					    ];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for (const titleSource of titleSources) {
 | 
				
			||||||
 | 
					      if (titleSource && titleSource.length > 0) {
 | 
				
			||||||
 | 
					        title = titleSource;
 | 
				
			||||||
 | 
					        break;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Extract description
 | 
				
			||||||
 | 
					    let description = "";
 | 
				
			||||||
 | 
					    const descriptionSources = [
 | 
				
			||||||
 | 
					      $('meta[property="og:description"]').attr('content'),
 | 
				
			||||||
 | 
					      $('meta[name="description"]').attr('content'),
 | 
				
			||||||
 | 
					      $('meta[name="twitter:description"]').attr('content'),
 | 
				
			||||||
 | 
					      $('[itemprop="description"]').first().text().trim()
 | 
				
			||||||
 | 
					    ];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for (const descSource of descriptionSources) {
 | 
				
			||||||
 | 
					      if (descSource && descSource.length > 0) {
 | 
				
			||||||
 | 
					        description = descSource;
 | 
				
			||||||
 | 
					        break;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Content selectors (same as in Puppeteer version)
 | 
				
			||||||
 | 
					    const contentSelectors = [
 | 
				
			||||||
 | 
					      '[itemtype*="Article"] [itemprop="articleBody"]',
 | 
				
			||||||
 | 
					      '[itemtype*="NewsArticle"] [itemprop="articleBody"]',
 | 
				
			||||||
 | 
					      '[itemtype*="BlogPosting"] [itemprop="articleBody"]',
 | 
				
			||||||
 | 
					      'article[role="main"]',
 | 
				
			||||||
 | 
					      'main article',
 | 
				
			||||||
 | 
					      '[role="main"] article',
 | 
				
			||||||
 | 
					      'article',
 | 
				
			||||||
 | 
					      '.post-content', '.entry-content', '.article-content', '.content-area',
 | 
				
			||||||
 | 
					      '.article-body', '.post-body', '.entry-body', '.story-body',
 | 
				
			||||||
 | 
					      '.main-content', '.primary-content', '.page-content',
 | 
				
			||||||
 | 
					      '.news-content', '.blog-content', '.editorial-content',
 | 
				
			||||||
 | 
					      '.wp-content', '.entry', '.post',
 | 
				
			||||||
 | 
					      '.section-content', '.postArticle-content', '.post-full-content',
 | 
				
			||||||
 | 
					      '.markup', '.section--body', '.section-divider + .section-content',
 | 
				
			||||||
 | 
					      '.honbun', '.main_text', '.article_body', '.news_body',
 | 
				
			||||||
 | 
					      '.entry_text', '.blog_text', '.content_text',
 | 
				
			||||||
 | 
					      '.kiji', '.news', '.article',
 | 
				
			||||||
 | 
					      'main', '[role="main"]',
 | 
				
			||||||
 | 
					      '#content', '#main', '#article', '#post', '#entry',
 | 
				
			||||||
 | 
					      '#main-content', '#primary', '#content-area',
 | 
				
			||||||
 | 
					      '.content', '.main', '.wrapper', '.container'
 | 
				
			||||||
 | 
					    ];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Function to calculate content quality score
 | 
				
			||||||
 | 
					    const calculateContentScore = (element: cheerio.Cheerio<any>): number => {
 | 
				
			||||||
 | 
					      const text = element.text() || '';
 | 
				
			||||||
 | 
					      if (text.length < 100) return 0;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      let score = 0;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      // Base score from text length (diminishing returns)
 | 
				
			||||||
 | 
					      score += Math.min(text.length / 100, 50);
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      // Paragraph density
 | 
				
			||||||
 | 
					      const paragraphs = element.find('p');
 | 
				
			||||||
 | 
					      const avgParagraphLength = paragraphs.length > 0 ? 
 | 
				
			||||||
 | 
					        paragraphs.toArray().reduce((sum, p) => sum + ($(p).text().length || 0), 0) / paragraphs.length : 0;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      if (avgParagraphLength > 100) score += 20;
 | 
				
			||||||
 | 
					      if (paragraphs.length > 3) score += 10;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      // Link density penalty
 | 
				
			||||||
 | 
					      const links = element.find('a');
 | 
				
			||||||
 | 
					      const linkText = links.toArray().reduce((sum, link) => sum + ($(link).text().length || 0), 0);
 | 
				
			||||||
 | 
					      const linkDensity = text.length > 0 ? linkText / text.length : 0;
 | 
				
			||||||
 | 
					      if (linkDensity < 0.2) score += 15;
 | 
				
			||||||
 | 
					      else if (linkDensity < 0.4) score += 5;
 | 
				
			||||||
 | 
					      else score -= 10;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      // Bonus for article-like structure
 | 
				
			||||||
 | 
					      if (element.prop('tagName') === 'ARTICLE') score += 25;
 | 
				
			||||||
 | 
					      if (element.attr('role') === 'main') score += 20;
 | 
				
			||||||
 | 
					      if (element.find('h1, h2, h3').length > 0) score += 10;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      // Bonus for semantic elements
 | 
				
			||||||
 | 
					      const semanticElements = element.find('p, h1, h2, h3, h4, h5, h6, blockquote, ul, ol');
 | 
				
			||||||
 | 
					      if (semanticElements.length > 5) score += 15;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      // Penalty for navigation-like content
 | 
				
			||||||
 | 
					      const navWords = ['メニュー', 'ナビ', 'カテゴリ', 'タグ', 'menu', 'navigation', 'nav', 'sidebar'];
 | 
				
			||||||
 | 
					      const className = (element.attr('class') || '').toLowerCase();
 | 
				
			||||||
 | 
					      const id = (element.attr('id') || '').toLowerCase();
 | 
				
			||||||
 | 
					      if (navWords.some(word => className.includes(word) || id.includes(word))) {
 | 
				
			||||||
 | 
					        score -= 20;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      return Math.max(score, 0);
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Function to clean and normalize text
 | 
				
			||||||
 | 
					    const cleanText = (text: string): string => {
 | 
				
			||||||
 | 
					      return text
 | 
				
			||||||
 | 
					        .replace(/\s+/g, ' ')
 | 
				
			||||||
 | 
					        .replace(/\n\s*\n\s*\n/g, '\n\n')
 | 
				
			||||||
 | 
					        .replace(/^\s+|\s+$/g, '')
 | 
				
			||||||
 | 
					        .replace(/[\u200B-\u200D\uFEFF]/g, '')
 | 
				
			||||||
 | 
					        .trim();
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Collect and score all content candidates
 | 
				
			||||||
 | 
					    interface ContentCandidate {
 | 
				
			||||||
 | 
					      element: cheerio.Cheerio<any>;
 | 
				
			||||||
 | 
					      score: number;
 | 
				
			||||||
 | 
					      content: string;
 | 
				
			||||||
 | 
					      selector: string;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const candidates: ContentCandidate[] = [];
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    for (const selector of contentSelectors) {
 | 
				
			||||||
 | 
					      try {
 | 
				
			||||||
 | 
					        const elements = $(selector);
 | 
				
			||||||
 | 
					        elements.each((index, element) => {
 | 
				
			||||||
 | 
					          const $element = $(element);
 | 
				
			||||||
 | 
					          const text = $element.text() || '';
 | 
				
			||||||
 | 
					          if (text.length > 200) {
 | 
				
			||||||
 | 
					            const score = calculateContentScore($element);
 | 
				
			||||||
 | 
					            candidates.push({
 | 
				
			||||||
 | 
					              element: $element,
 | 
				
			||||||
 | 
					              score,
 | 
				
			||||||
 | 
					              content: cleanText(text),
 | 
				
			||||||
 | 
					              selector: `${selector}[${index}]`
 | 
				
			||||||
 | 
					            });
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					        });
 | 
				
			||||||
 | 
					      } catch (e) {
 | 
				
			||||||
 | 
					        continue;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // Sort candidates by score (highest first)
 | 
				
			||||||
 | 
					    candidates.sort((a, b) => b.score - a.score);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    console.log(`Found ${candidates.length} content candidates`);
 | 
				
			||||||
 | 
					    if (candidates.length > 0) {
 | 
				
			||||||
 | 
					      console.log(`Best candidate score: ${candidates[0]!.score}, selector: ${candidates[0]!.selector}`);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // Get the best content
 | 
				
			||||||
 | 
					    let content = "";
 | 
				
			||||||
 | 
					    if (candidates.length > 0) {
 | 
				
			||||||
 | 
					      content = candidates[0]!.content;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      // If the best candidate is still short, try combining top candidates
 | 
				
			||||||
 | 
					      if (content.length < 500 && candidates.length > 1) {
 | 
				
			||||||
 | 
					        const topCandidates = candidates.slice(0, 3).filter(c => c.score > 10);
 | 
				
			||||||
 | 
					        const combinedContent = topCandidates.map(c => c.content).join('\n\n');
 | 
				
			||||||
 | 
					        if (combinedContent.length > content.length) {
 | 
				
			||||||
 | 
					          content = cleanText(combinedContent);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // Fallback strategies if still no good content
 | 
				
			||||||
 | 
					    if (!content || content.length < 200) {
 | 
				
			||||||
 | 
					      console.log('Using paragraph aggregation fallback...');
 | 
				
			||||||
 | 
					      const paragraphs = $('p').toArray()
 | 
				
			||||||
 | 
					        .map(p => $(p).text().trim())
 | 
				
			||||||
 | 
					        .filter(p => p.length > 50)
 | 
				
			||||||
 | 
					        .filter(p => {
 | 
				
			||||||
 | 
					          const lowerP = p.toLowerCase();
 | 
				
			||||||
 | 
					          return !lowerP.includes('cookie') && 
 | 
				
			||||||
 | 
					                 !lowerP.includes('privacy') && 
 | 
				
			||||||
 | 
					                 !lowerP.includes('terms of service') &&
 | 
				
			||||||
 | 
					                 !lowerP.includes('subscribe') &&
 | 
				
			||||||
 | 
					                 !lowerP.includes('newsletter');
 | 
				
			||||||
 | 
					        });
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					      if (paragraphs.length > 0) {
 | 
				
			||||||
 | 
					        content = cleanText(paragraphs.join('\n\n'));
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // Final fallback: structured data
 | 
				
			||||||
 | 
					    if (!content || content.length < 200) {
 | 
				
			||||||
 | 
					      console.log('Trying structured data fallback...');
 | 
				
			||||||
 | 
					      try {
 | 
				
			||||||
 | 
					        const jsonLd = $('script[type="application/ld+json"]').first().html();
 | 
				
			||||||
 | 
					        if (jsonLd) {
 | 
				
			||||||
 | 
					          const data = JSON.parse(jsonLd);
 | 
				
			||||||
 | 
					          if (data.articleBody) {
 | 
				
			||||||
 | 
					            content = cleanText(data.articleBody);
 | 
				
			||||||
 | 
					          } else if (data.text) {
 | 
				
			||||||
 | 
					            content = cleanText(data.text);
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      } catch (e) {
 | 
				
			||||||
 | 
					        // Ignore JSON parsing errors
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Limit content length to avoid token limits
 | 
				
			||||||
 | 
					    const maxLength = 50000;
 | 
				
			||||||
 | 
					    if (content.length > maxLength) {
 | 
				
			||||||
 | 
					      content = content.substring(0, maxLength) + "...";
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    console.log(`Fetch fallback extracted content: ${content.length} characters`);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    if (!content || content.length < 100) {
 | 
				
			||||||
 | 
					      return {
 | 
				
			||||||
 | 
					        title: title || '',
 | 
				
			||||||
 | 
					        content: '',
 | 
				
			||||||
 | 
					        description: description || '',
 | 
				
			||||||
 | 
					        success: false,
 | 
				
			||||||
 | 
					        error: `Insufficient content extracted via fetch fallback (${content?.length || 0} characters)`,
 | 
				
			||||||
 | 
					      };
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return {
 | 
				
			||||||
 | 
					      title: title || '',
 | 
				
			||||||
 | 
					      content,
 | 
				
			||||||
 | 
					      description: description || '',
 | 
				
			||||||
 | 
					      success: true,
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  } catch (error) {
 | 
				
			||||||
 | 
					    console.error(`Fetch fallback failed:`, error);
 | 
				
			||||||
 | 
					    return {
 | 
				
			||||||
 | 
					      title: '',
 | 
				
			||||||
 | 
					      content: '',
 | 
				
			||||||
 | 
					      description: '',
 | 
				
			||||||
 | 
					      success: false,
 | 
				
			||||||
 | 
					      error: error instanceof Error ? error.message : 'Unknown error in fetch fallback',
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
async function extractWithRetry(url: string): Promise<ExtractedContent> {
 | 
					async function extractWithRetry(url: string): Promise<ExtractedContent> {
 | 
				
			||||||
  const userAgents = [
 | 
					  const userAgents = [
 | 
				
			||||||
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
 | 
					    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
 | 
				
			||||||
@@ -667,6 +963,36 @@ export async function extractArticleContent(
 | 
				
			|||||||
  } catch (error) {
 | 
					  } catch (error) {
 | 
				
			||||||
    console.error(`Content extraction failed after all retries for ${url}:`, error);
 | 
					    console.error(`Content extraction failed after all retries for ${url}:`, error);
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
 | 
					    // Check if this is a Puppeteer launch/browser failure that should trigger fallback
 | 
				
			||||||
 | 
					    const shouldUseFallback = error instanceof Error && (
 | 
				
			||||||
 | 
					      error.message.includes('TimeoutError') ||
 | 
				
			||||||
 | 
					      error.message.includes('Timed out after') ||
 | 
				
			||||||
 | 
					      error.message.includes('waiting for the WS endpoint URL') ||
 | 
				
			||||||
 | 
					      error.message.includes('Browser closed') ||
 | 
				
			||||||
 | 
					      error.message.includes('Target closed') ||
 | 
				
			||||||
 | 
					      error.message.includes('Session closed') ||
 | 
				
			||||||
 | 
					      error.message.includes('Protocol error') ||
 | 
				
			||||||
 | 
					      error.message.includes('Connection terminated') ||
 | 
				
			||||||
 | 
					      error.message.includes('spawn') || // Process spawn errors
 | 
				
			||||||
 | 
					      error.message.includes('ECONNRESET') ||
 | 
				
			||||||
 | 
					      error.message.includes('ECONNREFUSED') ||
 | 
				
			||||||
 | 
					      error.message.includes('ENOTFOUND')
 | 
				
			||||||
 | 
					    );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (shouldUseFallback) {
 | 
				
			||||||
 | 
					      console.log(`Puppeteer failed, trying fetch fallback for ${url}`);
 | 
				
			||||||
 | 
					      try {
 | 
				
			||||||
 | 
					        const fallbackResult = await extractWithFetchFallback(url);
 | 
				
			||||||
 | 
					        if (fallbackResult.success) {
 | 
				
			||||||
 | 
					          console.log(`Fetch fallback succeeded for ${url}`);
 | 
				
			||||||
 | 
					          return fallbackResult;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        console.log(`Fetch fallback also failed for ${url}:`, fallbackResult.error);
 | 
				
			||||||
 | 
					      } catch (fallbackError) {
 | 
				
			||||||
 | 
					        console.error(`Fetch fallback threw error for ${url}:`, fallbackError);
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
    // Provide more specific error messages
 | 
					    // Provide more specific error messages
 | 
				
			||||||
    let errorMessage = "Unknown error occurred";
 | 
					    let errorMessage = "Unknown error occurred";
 | 
				
			||||||
    if (error instanceof Error) {
 | 
					    if (error instanceof Error) {
 | 
				
			||||||
@@ -682,6 +1008,8 @@ export async function extractArticleContent(
 | 
				
			|||||||
        errorMessage = `Client error: ${error.message}`;
 | 
					        errorMessage = `Client error: ${error.message}`;
 | 
				
			||||||
      } else if (error.message.includes('HTTP 5')) {
 | 
					      } else if (error.message.includes('HTTP 5')) {
 | 
				
			||||||
        errorMessage = `Server error: ${error.message}`;
 | 
					        errorMessage = `Server error: ${error.message}`;
 | 
				
			||||||
 | 
					      } else if (error.message.includes('TimeoutError')) {
 | 
				
			||||||
 | 
					        errorMessage = "Puppeteer browser launch timeout - both Puppeteer and fetch fallback failed";
 | 
				
			||||||
      } else {
 | 
					      } else {
 | 
				
			||||||
        errorMessage = error.message;
 | 
					        errorMessage = error.message;
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user