/** * Background Service Worker - Manifest V3 * * Responsibilities: * - Manage communication with popup and content scripts * - Fetch and parse product detail pages * - Extract image URLs from detail pages * - Download images using Chrome downloads API * - Track progress and state * - Handle retry logic and rate limiting */ // ============================================================================ // CONFIGURATION - Adjust selectors for detail page structure // ============================================================================ const DETAIL_PAGE_SELECTORS = { // Common image container selectors on Yupoo detail pages imageContainer: [ 'div.photo-list', 'div.details-images', 'div.album', 'div.gallery', 'div.photo-album', 'div[class*="images"]', 'div[class*="photo"]', 'div[class*="album"]', 'ul.photo-list', ], imageElements: [ 'img.photo', 'img[class*="detail"]', 'img[class*="product"]', 'img[data-src*=".jpg"]', 'img[data-src*=".png"]', 'img', ], imageLinks: [ 'a[href*=".jpg"]', 'a[href*=".png"]', 'a[href*=".webp"]', 'a[href*="/fs/"]', 'a[href*="/f/"]', ], }; const CONFIG = { REQUEST_DELAY: 800, // ms between requests to avoid hammering RETRY_ATTEMPTS: 3, RETRY_DELAY: 2000, // ms MAX_CONCURRENT_DOWNLOADS: 2, // default, can be changed by user TIMEOUT: 30000, // ms for fetch requests }; // ============================================================================ // STATE MANAGEMENT // ============================================================================ let state = { products: [], currentProductIndex: -1, totalImagesDownloaded: 0, errorCount: 0, isRunning: false, downloadedUrls: new Set(), // Deduplicate during this session maxConcurrency: CONFIG.MAX_CONCURRENT_DOWNLOADS, logs: [], }; // ============================================================================ // LOGGING // ============================================================================ function log(message, type = 'info') { const timestamp = new Date().toLocaleTimeString(); const entry = `[${timestamp}] ${message}`; state.logs.push({ message: entry, type }); // Keep last 100 logs if (state.logs.length > 100) { state.logs.shift(); } console.log(entry); broadcastUpdate(); } function logError(message) { log(`❌ ${message}`, 'error'); state.errorCount++; } function logSuccess(message) { log(`✅ ${message}`, 'success'); } function logWarning(message) { log(`⚠️ ${message}`, 'warning'); } // ============================================================================ // COMMUNICATION & STATE BROADCAST // ============================================================================ function broadcastUpdate() { chrome.runtime.sendMessage({ action: 'updateState', state: { products: state.products, currentProductIndex: state.currentProductIndex, totalImagesDownloaded: state.totalImagesDownloaded, errorCount: state.errorCount, isRunning: state.isRunning, logs: state.logs, }, }).catch(() => { // Popup might be closed, ignore }); } chrome.runtime.onMessage.addListener((request, sender, sendResponse) => { console.log('[Background] Received:', request.action); if (request.action === 'updateProducts') { state.products = request.products || []; state.currentProductIndex = -1; state.totalImagesDownloaded = 0; state.errorCount = 0; state.downloadedUrls.clear(); state.logs = []; log(`Loaded ${state.products.length} products`, 'info'); broadcastUpdate(); sendResponse({ success: true }); } else if (request.action === 'startDownload') { state.maxConcurrency = request.maxConcurrency || CONFIG.MAX_CONCURRENT_DOWNLOADS; log(`Starting downloads with max concurrency: ${state.maxConcurrency}`, 'info'); startDownloadProcess(); sendResponse({ success: true }); } else if (request.action === 'stopDownload') { state.isRunning = false; log('Download stopped by user', 'warning'); broadcastUpdate(); sendResponse({ success: true }); } else if (request.action === 'getState') { sendResponse({ state: { products: state.products, currentProductIndex: state.currentProductIndex, totalImagesDownloaded: state.totalImagesDownloaded, errorCount: state.errorCount, isRunning: state.isRunning, logs: state.logs, }, }); } }); // ============================================================================ // DOWNLOAD MANAGEMENT // ============================================================================ /** * Main download process */ async function startDownloadProcess() { if (state.isRunning) return; state.isRunning = true; broadcastUpdate(); for (let i = 0; i < state.products.length; i++) { if (!state.isRunning) break; const product = state.products[i]; state.currentProductIndex = i; log(`Processing product ${i + 1}/${state.products.length}: ${product.productCode || 'unknown'}`, 'info'); broadcastUpdate(); try { await downloadProductImages(product); await delay(CONFIG.REQUEST_DELAY); } catch (error) { logError(`Failed to process product: ${error.message}`); } } state.isRunning = false; log('✨ Download process complete!', 'success'); broadcastUpdate(); } /** * Download all images for a single product */ async function downloadProductImages(product) { const folderName = sanitizeFolderName(product.productCode || `product_${product.index}`); try { log(`Fetching detail page: ${product.detailUrl}`, 'info'); const html = await fetchWithRetry(product.detailUrl); if (!html || html.length === 0) { logError(`Detail page returned empty HTML for ${folderName}`); return; } log(`Detail page fetched: ${html.length} characters`, 'info'); const imageUrls = extractImageUrlsFromHtml(html, product.detailUrl); if (imageUrls.length === 0) { logWarning(`No images found for product: ${folderName}. Check if page loads in browser.`); return; } log(`Found ${imageUrls.length} images for ${folderName}`, 'info'); // Download images with controlled concurrency await downloadImagesWithConcurrency(imageUrls, folderName, state.maxConcurrency); logSuccess(`✓ Downloaded ${imageUrls.length} images for ${folderName}`); } catch (error) { logError(`Error downloading product ${folderName}: ${error.message}`); } } /** * Download multiple images with concurrency limit */ async function downloadImagesWithConcurrency(imageUrls, folderName, maxConcurrency) { const queue = [...imageUrls]; const inProgress = []; while (queue.length > 0 || inProgress.length > 0) { // Fill up to maxConcurrency while (inProgress.length < maxConcurrency && queue.length > 0) { const url = queue.shift(); const downloadPromise = downloadSingleImage(url, folderName, imageUrls.indexOf(url) + 1) .then(() => { inProgress.splice(inProgress.indexOf(downloadPromise), 1); }) .catch(error => { console.error('Download error:', error); inProgress.splice(inProgress.indexOf(downloadPromise), 1); }); inProgress.push(downloadPromise); } // Wait for at least one to complete if (inProgress.length > 0) { await Promise.race(inProgress); } } } /** * Download a single image */ async function downloadSingleImage(imageUrl, folderName, index) { // Skip if already downloaded in this session if (state.downloadedUrls.has(imageUrl)) { logWarning(`Skipping duplicate image: ${imageUrl}`); return; } try { // Make sure URL is absolute let absoluteUrl = imageUrl; if (!absoluteUrl.startsWith('http')) { // Try to construct from base Yupoo URL if (absoluteUrl.startsWith('/')) { absoluteUrl = 'https://www.yupoo.com' + absoluteUrl; } else { absoluteUrl = 'https://www.yupoo.com/' + absoluteUrl; } } const filename = await fetchImageAndDownload(absoluteUrl, folderName, index); state.downloadedUrls.add(imageUrl); state.totalImagesDownloaded++; broadcastUpdate(); log(`Downloaded: ${folderName}/${filename}`, 'success'); } catch (error) { throw new Error(`Failed to download image: ${error.message}`); } } /** * Fetch image and use Chrome downloads API */ async function fetchImageAndDownload(imageUrl, folderName, index) { try { const response = await fetch(imageUrl, { method: 'GET', headers: { 'Referer': 'https://www.yupoo.com/', }, timeout: CONFIG.TIMEOUT, }); if (!response.ok) { throw new Error(`HTTP ${response.status}`); } const ext = getImageExtension(imageUrl, response); const filename = `${String(index).padStart(2, '0')}.${ext}`; const filepath = `${folderName}/${filename}`; // Create a blob URL for download const blob = await response.blob(); const blobUrl = URL.createObjectURL(blob); // Use Chrome downloads API return new Promise((resolve, reject) => { chrome.downloads.download( { url: blobUrl, filename: filepath, saveAs: false, }, (downloadId) => { if (chrome.runtime.lastError) { reject(new Error(chrome.runtime.lastError.message)); } else { // Clean up blob URL after a delay setTimeout(() => URL.revokeObjectURL(blobUrl), 1000); resolve(filename); } } ); }); } catch (error) { throw new Error(`Download failed: ${error.message}`); } } /** * Fetch URL with retry logic */ async function fetchWithRetry(url, attempt = 1) { try { const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), CONFIG.TIMEOUT); const response = await fetch(url, { method: 'GET', headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36', 'Referer': 'https://www.yupoo.com/', }, signal: controller.signal, }); clearTimeout(timeoutId); if (!response.ok) { throw new Error(`HTTP ${response.status}`); } return await response.text(); } catch (error) { if (attempt < CONFIG.RETRY_ATTEMPTS) { logWarning(`Fetch attempt ${attempt} failed, retrying... (${error.message})`); await delay(CONFIG.RETRY_DELAY); return fetchWithRetry(url, attempt + 1); } throw error; } } // ============================================================================ // IMAGE EXTRACTION FROM HTML // ============================================================================ /** * Extract all image URLs from detail page HTML */ function extractImageUrlsFromHtml(html, baseUrl) { try { const parser = new DOMParser(); const doc = parser.parseFromString(html, 'text/html'); const imageUrls = []; const seenUrls = new Set(); console.log(`[Background] Parsing HTML for images... (length: ${html.length} chars)`); // Strategy 1: Find image containers for (const containerSelector of DETAIL_PAGE_SELECTORS.imageContainer) { try { const containers = doc.querySelectorAll(containerSelector); if (containers.length > 0) { console.log(`[Background] Found ${containers.length} containers with selector: ${containerSelector}`); containers.forEach(container => { extractFromContainer(container, baseUrl, imageUrls, seenUrls); }); } } catch (e) { // Invalid selector } } // Strategy 2: If no images found, try direct image selectors if (imageUrls.length === 0) { console.log('[Background] No container images found, trying direct selectors...'); for (const imgSelector of DETAIL_PAGE_SELECTORS.imageElements) { try { const images = doc.querySelectorAll(imgSelector); if (images.length > 0) { console.log(`[Background] Found ${images.length} images with selector: ${imgSelector}`); images.forEach(img => { const url = img.src || img.getAttribute('data-src'); if (url && isValidImageUrl(url)) { const absUrl = resolveUrl(url, baseUrl); if (!seenUrls.has(absUrl)) { imageUrls.push(absUrl); seenUrls.add(absUrl); } } }); } } catch (e) { // Invalid selector } } } // Strategy 3: Look for image links if (imageUrls.length === 0) { console.log('[Background] No direct images found, trying image links...'); for (const linkSelector of DETAIL_PAGE_SELECTORS.imageLinks) { try { const links = doc.querySelectorAll(linkSelector); if (links.length > 0) { console.log(`[Background] Found ${links.length} image links with selector: ${linkSelector}`); links.forEach(link => { const url = link.href; if (url && isValidImageUrl(url)) { const absUrl = resolveUrl(url, baseUrl); if (!seenUrls.has(absUrl)) { imageUrls.push(absUrl); seenUrls.add(absUrl); } } }); } } catch (e) { // Invalid selector } } } // Strategy 4: Last resort - find ALL images and filter if (imageUrls.length === 0) { console.log('[Background] Last resort: scanning all images...'); const allImages = doc.querySelectorAll('img'); console.log(`[Background] Found ${allImages.length} total img elements`); let validCount = 0; allImages.forEach(img => { const url = img.src || img.getAttribute('data-src'); // Filter to actual product images (skip small/thumbnail images) if (url && isValidImageUrl(url) && (!img.width || img.width > 100)) { const absUrl = resolveUrl(url, baseUrl); if (!seenUrls.has(absUrl)) { imageUrls.push(absUrl); seenUrls.add(absUrl); validCount++; } } }); console.log(`[Background] Added ${validCount} valid images from all images`); } console.log(`[Background] ✓ Extracted ${imageUrls.length} total images from detail page`); return imageUrls; } catch (error) { console.error('Error parsing HTML:', error); return []; } } /** * Extract images from a container element */ function extractFromContainer(container, baseUrl, imageUrls, seenUrls) { // Look for img tags container.querySelectorAll('img').forEach(img => { const url = img.src || img.getAttribute('data-src'); if (url && isValidImageUrl(url)) { const absUrl = resolveUrl(url, baseUrl); if (!seenUrls.has(absUrl)) { imageUrls.push(absUrl); seenUrls.add(absUrl); } } }); // Look for image links container.querySelectorAll('a[href*=".jpg"], a[href*=".png"], a[href*=".webp"]').forEach(link => { const url = link.href; if (url && isValidImageUrl(url)) { const absUrl = resolveUrl(url, baseUrl); if (!seenUrls.has(absUrl)) { imageUrls.push(absUrl); seenUrls.add(absUrl); } } }); } /** * Check if URL looks like a valid image */ function isValidImageUrl(url) { if (!url || url.length === 0) return false; const imageExtensions = /\.(jpg|jpeg|png|webp|gif|bmp)(\?.*)?$/i; const isDataUrl = url.startsWith('data:'); const isTooSmall = url.includes('favicon') || url.includes('logo') || url.includes('icon') || url.includes('.svg'); return !isDataUrl && !isTooSmall && imageExtensions.test(url); } /** * Convert relative URL to absolute */ function resolveUrl(url, baseUrl) { if (!url) return ''; // Already absolute if (url.startsWith('http://') || url.startsWith('https://')) { return url; } try { return new URL(url, baseUrl).href; } catch (e) { console.warn('Failed to resolve URL:', url, baseUrl); return url; } } /** * Get image extension from URL or content-type */ function getImageExtension(url, response) { // Try from URL const urlMatch = url.match(/\.([a-z]+)(\?|$)/i); if (urlMatch) { const ext = urlMatch[1].toLowerCase(); if (['jpg', 'jpeg', 'png', 'webp', 'gif', 'bmp'].includes(ext)) { return ext === 'jpg' ? 'jpg' : ext; } } // Try from content-type const contentType = response.headers.get('content-type'); if (contentType) { if (contentType.includes('jpeg')) return 'jpg'; if (contentType.includes('png')) return 'png'; if (contentType.includes('webp')) return 'webp'; if (contentType.includes('gif')) return 'gif'; } return 'jpg'; // Default } // ============================================================================ // UTILITY FUNCTIONS // ============================================================================ /** * Sanitize folder name for use in file system */ function sanitizeFolderName(name) { if (!name) return 'product_unknown'; // Remove or replace invalid characters return name .replace(/[<>:"|?*\/\\]/g, '_') // Invalid file system characters .replace(/\s+/g, '_') // Spaces to underscores .replace(/_{2,}/g, '_') // Multiple underscores to single .substring(0, 100) // Limit length .toLowerCase(); } /** * Delay utility */ function delay(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } // ============================================================================ // INITIALIZATION // ============================================================================ console.log('[Background Service Worker] Initialized'); log('Service worker ready', 'info');