2026-04-20 00:08:24 +03:00

607 lines
18 KiB
JavaScript

/**
* Background Service Worker - Manifest V3
*
* Responsibilities:
* - Manage communication with popup and content scripts
* - Fetch and parse product detail pages
* - Extract image URLs from detail pages
* - Download images using Chrome downloads API
* - Track progress and state
* - Handle retry logic and rate limiting
*/
// ============================================================================
// CONFIGURATION - Adjust selectors for detail page structure
// ============================================================================
const DETAIL_PAGE_SELECTORS = {
// Common image container selectors on Yupoo detail pages
imageContainer: [
'div.photo-list',
'div.details-images',
'div.album',
'div.gallery',
'div.photo-album',
'div[class*="images"]',
'div[class*="photo"]',
'div[class*="album"]',
'ul.photo-list',
],
imageElements: [
'img.photo',
'img[class*="detail"]',
'img[class*="product"]',
'img[data-src*=".jpg"]',
'img[data-src*=".png"]',
'img',
],
imageLinks: [
'a[href*=".jpg"]',
'a[href*=".png"]',
'a[href*=".webp"]',
'a[href*="/fs/"]',
'a[href*="/f/"]',
],
};
const CONFIG = {
REQUEST_DELAY: 800, // ms between requests to avoid hammering
RETRY_ATTEMPTS: 3,
RETRY_DELAY: 2000, // ms
MAX_CONCURRENT_DOWNLOADS: 2, // default, can be changed by user
TIMEOUT: 30000, // ms for fetch requests
};
// ============================================================================
// STATE MANAGEMENT
// ============================================================================
let state = {
products: [],
currentProductIndex: -1,
totalImagesDownloaded: 0,
errorCount: 0,
isRunning: false,
downloadedUrls: new Set(), // Deduplicate during this session
maxConcurrency: CONFIG.MAX_CONCURRENT_DOWNLOADS,
logs: [],
};
// ============================================================================
// LOGGING
// ============================================================================
function log(message, type = 'info') {
const timestamp = new Date().toLocaleTimeString();
const entry = `[${timestamp}] ${message}`;
state.logs.push({ message: entry, type });
// Keep last 100 logs
if (state.logs.length > 100) {
state.logs.shift();
}
console.log(entry);
broadcastUpdate();
}
function logError(message) {
log(`${message}`, 'error');
state.errorCount++;
}
function logSuccess(message) {
log(`${message}`, 'success');
}
function logWarning(message) {
log(`⚠️ ${message}`, 'warning');
}
// ============================================================================
// COMMUNICATION & STATE BROADCAST
// ============================================================================
function broadcastUpdate() {
chrome.runtime.sendMessage({
action: 'updateState',
state: {
products: state.products,
currentProductIndex: state.currentProductIndex,
totalImagesDownloaded: state.totalImagesDownloaded,
errorCount: state.errorCount,
isRunning: state.isRunning,
logs: state.logs,
},
}).catch(() => {
// Popup might be closed, ignore
});
}
chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
console.log('[Background] Received:', request.action);
if (request.action === 'updateProducts') {
state.products = request.products || [];
state.currentProductIndex = -1;
state.totalImagesDownloaded = 0;
state.errorCount = 0;
state.downloadedUrls.clear();
state.logs = [];
log(`Loaded ${state.products.length} products`, 'info');
broadcastUpdate();
sendResponse({ success: true });
} else if (request.action === 'startDownload') {
state.maxConcurrency = request.maxConcurrency || CONFIG.MAX_CONCURRENT_DOWNLOADS;
log(`Starting downloads with max concurrency: ${state.maxConcurrency}`, 'info');
startDownloadProcess();
sendResponse({ success: true });
} else if (request.action === 'stopDownload') {
state.isRunning = false;
log('Download stopped by user', 'warning');
broadcastUpdate();
sendResponse({ success: true });
} else if (request.action === 'getState') {
sendResponse({
state: {
products: state.products,
currentProductIndex: state.currentProductIndex,
totalImagesDownloaded: state.totalImagesDownloaded,
errorCount: state.errorCount,
isRunning: state.isRunning,
logs: state.logs,
},
});
}
});
// ============================================================================
// DOWNLOAD MANAGEMENT
// ============================================================================
/**
* Main download process
*/
async function startDownloadProcess() {
if (state.isRunning) return;
state.isRunning = true;
broadcastUpdate();
for (let i = 0; i < state.products.length; i++) {
if (!state.isRunning) break;
const product = state.products[i];
state.currentProductIndex = i;
log(`Processing product ${i + 1}/${state.products.length}: ${product.productCode || 'unknown'}`, 'info');
broadcastUpdate();
try {
await downloadProductImages(product);
await delay(CONFIG.REQUEST_DELAY);
} catch (error) {
logError(`Failed to process product: ${error.message}`);
}
}
state.isRunning = false;
log('✨ Download process complete!', 'success');
broadcastUpdate();
}
/**
* Download all images for a single product
*/
async function downloadProductImages(product) {
const folderName = sanitizeFolderName(product.productCode || `product_${product.index}`);
try {
log(`Fetching detail page: ${product.detailUrl}`, 'info');
const html = await fetchWithRetry(product.detailUrl);
if (!html || html.length === 0) {
logError(`Detail page returned empty HTML for ${folderName}`);
return;
}
log(`Detail page fetched: ${html.length} characters`, 'info');
const imageUrls = extractImageUrlsFromHtml(html, product.detailUrl);
if (imageUrls.length === 0) {
logWarning(`No images found for product: ${folderName}. Check if page loads in browser.`);
return;
}
log(`Found ${imageUrls.length} images for ${folderName}`, 'info');
// Download images with controlled concurrency
await downloadImagesWithConcurrency(imageUrls, folderName, state.maxConcurrency);
logSuccess(`✓ Downloaded ${imageUrls.length} images for ${folderName}`);
} catch (error) {
logError(`Error downloading product ${folderName}: ${error.message}`);
}
}
/**
* Download multiple images with concurrency limit
*/
async function downloadImagesWithConcurrency(imageUrls, folderName, maxConcurrency) {
const queue = [...imageUrls];
const inProgress = [];
while (queue.length > 0 || inProgress.length > 0) {
// Fill up to maxConcurrency
while (inProgress.length < maxConcurrency && queue.length > 0) {
const url = queue.shift();
const downloadPromise = downloadSingleImage(url, folderName, imageUrls.indexOf(url) + 1)
.then(() => {
inProgress.splice(inProgress.indexOf(downloadPromise), 1);
})
.catch(error => {
console.error('Download error:', error);
inProgress.splice(inProgress.indexOf(downloadPromise), 1);
});
inProgress.push(downloadPromise);
}
// Wait for at least one to complete
if (inProgress.length > 0) {
await Promise.race(inProgress);
}
}
}
/**
* Download a single image
*/
async function downloadSingleImage(imageUrl, folderName, index) {
// Skip if already downloaded in this session
if (state.downloadedUrls.has(imageUrl)) {
logWarning(`Skipping duplicate image: ${imageUrl}`);
return;
}
try {
// Make sure URL is absolute
let absoluteUrl = imageUrl;
if (!absoluteUrl.startsWith('http')) {
// Try to construct from base Yupoo URL
if (absoluteUrl.startsWith('/')) {
absoluteUrl = 'https://www.yupoo.com' + absoluteUrl;
} else {
absoluteUrl = 'https://www.yupoo.com/' + absoluteUrl;
}
}
const filename = await fetchImageAndDownload(absoluteUrl, folderName, index);
state.downloadedUrls.add(imageUrl);
state.totalImagesDownloaded++;
broadcastUpdate();
log(`Downloaded: ${folderName}/${filename}`, 'success');
} catch (error) {
throw new Error(`Failed to download image: ${error.message}`);
}
}
/**
* Fetch image and use Chrome downloads API
*/
async function fetchImageAndDownload(imageUrl, folderName, index) {
try {
const response = await fetch(imageUrl, {
method: 'GET',
headers: {
'Referer': 'https://www.yupoo.com/',
},
timeout: CONFIG.TIMEOUT,
});
if (!response.ok) {
throw new Error(`HTTP ${response.status}`);
}
const ext = getImageExtension(imageUrl, response);
const filename = `${String(index).padStart(2, '0')}.${ext}`;
const filepath = `${folderName}/${filename}`;
// Create a blob URL for download
const blob = await response.blob();
const blobUrl = URL.createObjectURL(blob);
// Use Chrome downloads API
return new Promise((resolve, reject) => {
chrome.downloads.download(
{
url: blobUrl,
filename: filepath,
saveAs: false,
},
(downloadId) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message));
} else {
// Clean up blob URL after a delay
setTimeout(() => URL.revokeObjectURL(blobUrl), 1000);
resolve(filename);
}
}
);
});
} catch (error) {
throw new Error(`Download failed: ${error.message}`);
}
}
/**
* Fetch URL with retry logic
*/
async function fetchWithRetry(url, attempt = 1) {
try {
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), CONFIG.TIMEOUT);
const response = await fetch(url, {
method: 'GET',
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
'Referer': 'https://www.yupoo.com/',
},
signal: controller.signal,
});
clearTimeout(timeoutId);
if (!response.ok) {
throw new Error(`HTTP ${response.status}`);
}
return await response.text();
} catch (error) {
if (attempt < CONFIG.RETRY_ATTEMPTS) {
logWarning(`Fetch attempt ${attempt} failed, retrying... (${error.message})`);
await delay(CONFIG.RETRY_DELAY);
return fetchWithRetry(url, attempt + 1);
}
throw error;
}
}
// ============================================================================
// IMAGE EXTRACTION FROM HTML
// ============================================================================
/**
* Extract all image URLs from detail page HTML
*/
function extractImageUrlsFromHtml(html, baseUrl) {
try {
const parser = new DOMParser();
const doc = parser.parseFromString(html, 'text/html');
const imageUrls = [];
const seenUrls = new Set();
console.log(`[Background] Parsing HTML for images... (length: ${html.length} chars)`);
// Strategy 1: Find image containers
for (const containerSelector of DETAIL_PAGE_SELECTORS.imageContainer) {
try {
const containers = doc.querySelectorAll(containerSelector);
if (containers.length > 0) {
console.log(`[Background] Found ${containers.length} containers with selector: ${containerSelector}`);
containers.forEach(container => {
extractFromContainer(container, baseUrl, imageUrls, seenUrls);
});
}
} catch (e) {
// Invalid selector
}
}
// Strategy 2: If no images found, try direct image selectors
if (imageUrls.length === 0) {
console.log('[Background] No container images found, trying direct selectors...');
for (const imgSelector of DETAIL_PAGE_SELECTORS.imageElements) {
try {
const images = doc.querySelectorAll(imgSelector);
if (images.length > 0) {
console.log(`[Background] Found ${images.length} images with selector: ${imgSelector}`);
images.forEach(img => {
const url = img.src || img.getAttribute('data-src');
if (url && isValidImageUrl(url)) {
const absUrl = resolveUrl(url, baseUrl);
if (!seenUrls.has(absUrl)) {
imageUrls.push(absUrl);
seenUrls.add(absUrl);
}
}
});
}
} catch (e) {
// Invalid selector
}
}
}
// Strategy 3: Look for image links
if (imageUrls.length === 0) {
console.log('[Background] No direct images found, trying image links...');
for (const linkSelector of DETAIL_PAGE_SELECTORS.imageLinks) {
try {
const links = doc.querySelectorAll(linkSelector);
if (links.length > 0) {
console.log(`[Background] Found ${links.length} image links with selector: ${linkSelector}`);
links.forEach(link => {
const url = link.href;
if (url && isValidImageUrl(url)) {
const absUrl = resolveUrl(url, baseUrl);
if (!seenUrls.has(absUrl)) {
imageUrls.push(absUrl);
seenUrls.add(absUrl);
}
}
});
}
} catch (e) {
// Invalid selector
}
}
}
// Strategy 4: Last resort - find ALL images and filter
if (imageUrls.length === 0) {
console.log('[Background] Last resort: scanning all images...');
const allImages = doc.querySelectorAll('img');
console.log(`[Background] Found ${allImages.length} total img elements`);
let validCount = 0;
allImages.forEach(img => {
const url = img.src || img.getAttribute('data-src');
// Filter to actual product images (skip small/thumbnail images)
if (url && isValidImageUrl(url) && (!img.width || img.width > 100)) {
const absUrl = resolveUrl(url, baseUrl);
if (!seenUrls.has(absUrl)) {
imageUrls.push(absUrl);
seenUrls.add(absUrl);
validCount++;
}
}
});
console.log(`[Background] Added ${validCount} valid images from all images`);
}
console.log(`[Background] ✓ Extracted ${imageUrls.length} total images from detail page`);
return imageUrls;
} catch (error) {
console.error('Error parsing HTML:', error);
return [];
}
}
/**
* Extract images from a container element
*/
function extractFromContainer(container, baseUrl, imageUrls, seenUrls) {
// Look for img tags
container.querySelectorAll('img').forEach(img => {
const url = img.src || img.getAttribute('data-src');
if (url && isValidImageUrl(url)) {
const absUrl = resolveUrl(url, baseUrl);
if (!seenUrls.has(absUrl)) {
imageUrls.push(absUrl);
seenUrls.add(absUrl);
}
}
});
// Look for image links
container.querySelectorAll('a[href*=".jpg"], a[href*=".png"], a[href*=".webp"]').forEach(link => {
const url = link.href;
if (url && isValidImageUrl(url)) {
const absUrl = resolveUrl(url, baseUrl);
if (!seenUrls.has(absUrl)) {
imageUrls.push(absUrl);
seenUrls.add(absUrl);
}
}
});
}
/**
* Check if URL looks like a valid image
*/
function isValidImageUrl(url) {
if (!url || url.length === 0) return false;
const imageExtensions = /\.(jpg|jpeg|png|webp|gif|bmp)(\?.*)?$/i;
const isDataUrl = url.startsWith('data:');
const isTooSmall = url.includes('favicon') || url.includes('logo') || url.includes('icon') || url.includes('.svg');
return !isDataUrl && !isTooSmall && imageExtensions.test(url);
}
/**
* Convert relative URL to absolute
*/
function resolveUrl(url, baseUrl) {
if (!url) return '';
// Already absolute
if (url.startsWith('http://') || url.startsWith('https://')) {
return url;
}
try {
return new URL(url, baseUrl).href;
} catch (e) {
console.warn('Failed to resolve URL:', url, baseUrl);
return url;
}
}
/**
* Get image extension from URL or content-type
*/
function getImageExtension(url, response) {
// Try from URL
const urlMatch = url.match(/\.([a-z]+)(\?|$)/i);
if (urlMatch) {
const ext = urlMatch[1].toLowerCase();
if (['jpg', 'jpeg', 'png', 'webp', 'gif', 'bmp'].includes(ext)) {
return ext === 'jpg' ? 'jpg' : ext;
}
}
// Try from content-type
const contentType = response.headers.get('content-type');
if (contentType) {
if (contentType.includes('jpeg')) return 'jpg';
if (contentType.includes('png')) return 'png';
if (contentType.includes('webp')) return 'webp';
if (contentType.includes('gif')) return 'gif';
}
return 'jpg'; // Default
}
// ============================================================================
// UTILITY FUNCTIONS
// ============================================================================
/**
* Sanitize folder name for use in file system
*/
function sanitizeFolderName(name) {
if (!name) return 'product_unknown';
// Remove or replace invalid characters
return name
.replace(/[<>:"|?*\/\\]/g, '_') // Invalid file system characters
.replace(/\s+/g, '_') // Spaces to underscores
.replace(/_{2,}/g, '_') // Multiple underscores to single
.substring(0, 100) // Limit length
.toLowerCase();
}
/**
* Delay utility
*/
function delay(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
// ============================================================================
// INITIALIZATION
// ============================================================================
console.log('[Background Service Worker] Initialized');
log('Service worker ready', 'info');