607 lines
18 KiB
JavaScript
607 lines
18 KiB
JavaScript
/**
|
|
* Background Service Worker - Manifest V3
|
|
*
|
|
* Responsibilities:
|
|
* - Manage communication with popup and content scripts
|
|
* - Fetch and parse product detail pages
|
|
* - Extract image URLs from detail pages
|
|
* - Download images using Chrome downloads API
|
|
* - Track progress and state
|
|
* - Handle retry logic and rate limiting
|
|
*/
|
|
|
|
// ============================================================================
|
|
// CONFIGURATION - Adjust selectors for detail page structure
|
|
// ============================================================================
|
|
|
|
const DETAIL_PAGE_SELECTORS = {
|
|
// Common image container selectors on Yupoo detail pages
|
|
imageContainer: [
|
|
'div.photo-list',
|
|
'div.details-images',
|
|
'div.album',
|
|
'div.gallery',
|
|
'div.photo-album',
|
|
'div[class*="images"]',
|
|
'div[class*="photo"]',
|
|
'div[class*="album"]',
|
|
'ul.photo-list',
|
|
],
|
|
imageElements: [
|
|
'img.photo',
|
|
'img[class*="detail"]',
|
|
'img[class*="product"]',
|
|
'img[data-src*=".jpg"]',
|
|
'img[data-src*=".png"]',
|
|
'img',
|
|
],
|
|
imageLinks: [
|
|
'a[href*=".jpg"]',
|
|
'a[href*=".png"]',
|
|
'a[href*=".webp"]',
|
|
'a[href*="/fs/"]',
|
|
'a[href*="/f/"]',
|
|
],
|
|
};
|
|
|
|
const CONFIG = {
|
|
REQUEST_DELAY: 800, // ms between requests to avoid hammering
|
|
RETRY_ATTEMPTS: 3,
|
|
RETRY_DELAY: 2000, // ms
|
|
MAX_CONCURRENT_DOWNLOADS: 2, // default, can be changed by user
|
|
TIMEOUT: 30000, // ms for fetch requests
|
|
};
|
|
|
|
// ============================================================================
|
|
// STATE MANAGEMENT
|
|
// ============================================================================
|
|
|
|
let state = {
|
|
products: [],
|
|
currentProductIndex: -1,
|
|
totalImagesDownloaded: 0,
|
|
errorCount: 0,
|
|
isRunning: false,
|
|
downloadedUrls: new Set(), // Deduplicate during this session
|
|
maxConcurrency: CONFIG.MAX_CONCURRENT_DOWNLOADS,
|
|
logs: [],
|
|
};
|
|
|
|
// ============================================================================
|
|
// LOGGING
|
|
// ============================================================================
|
|
|
|
function log(message, type = 'info') {
|
|
const timestamp = new Date().toLocaleTimeString();
|
|
const entry = `[${timestamp}] ${message}`;
|
|
state.logs.push({ message: entry, type });
|
|
|
|
// Keep last 100 logs
|
|
if (state.logs.length > 100) {
|
|
state.logs.shift();
|
|
}
|
|
|
|
console.log(entry);
|
|
broadcastUpdate();
|
|
}
|
|
|
|
function logError(message) {
|
|
log(`❌ ${message}`, 'error');
|
|
state.errorCount++;
|
|
}
|
|
|
|
function logSuccess(message) {
|
|
log(`✅ ${message}`, 'success');
|
|
}
|
|
|
|
function logWarning(message) {
|
|
log(`⚠️ ${message}`, 'warning');
|
|
}
|
|
|
|
// ============================================================================
|
|
// COMMUNICATION & STATE BROADCAST
|
|
// ============================================================================
|
|
|
|
function broadcastUpdate() {
|
|
chrome.runtime.sendMessage({
|
|
action: 'updateState',
|
|
state: {
|
|
products: state.products,
|
|
currentProductIndex: state.currentProductIndex,
|
|
totalImagesDownloaded: state.totalImagesDownloaded,
|
|
errorCount: state.errorCount,
|
|
isRunning: state.isRunning,
|
|
logs: state.logs,
|
|
},
|
|
}).catch(() => {
|
|
// Popup might be closed, ignore
|
|
});
|
|
}
|
|
|
|
chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
|
|
console.log('[Background] Received:', request.action);
|
|
|
|
if (request.action === 'updateProducts') {
|
|
state.products = request.products || [];
|
|
state.currentProductIndex = -1;
|
|
state.totalImagesDownloaded = 0;
|
|
state.errorCount = 0;
|
|
state.downloadedUrls.clear();
|
|
state.logs = [];
|
|
log(`Loaded ${state.products.length} products`, 'info');
|
|
broadcastUpdate();
|
|
sendResponse({ success: true });
|
|
|
|
} else if (request.action === 'startDownload') {
|
|
state.maxConcurrency = request.maxConcurrency || CONFIG.MAX_CONCURRENT_DOWNLOADS;
|
|
log(`Starting downloads with max concurrency: ${state.maxConcurrency}`, 'info');
|
|
startDownloadProcess();
|
|
sendResponse({ success: true });
|
|
|
|
} else if (request.action === 'stopDownload') {
|
|
state.isRunning = false;
|
|
log('Download stopped by user', 'warning');
|
|
broadcastUpdate();
|
|
sendResponse({ success: true });
|
|
|
|
} else if (request.action === 'getState') {
|
|
sendResponse({
|
|
state: {
|
|
products: state.products,
|
|
currentProductIndex: state.currentProductIndex,
|
|
totalImagesDownloaded: state.totalImagesDownloaded,
|
|
errorCount: state.errorCount,
|
|
isRunning: state.isRunning,
|
|
logs: state.logs,
|
|
},
|
|
});
|
|
}
|
|
});
|
|
|
|
// ============================================================================
|
|
// DOWNLOAD MANAGEMENT
|
|
// ============================================================================
|
|
|
|
/**
|
|
* Main download process
|
|
*/
|
|
async function startDownloadProcess() {
|
|
if (state.isRunning) return;
|
|
|
|
state.isRunning = true;
|
|
broadcastUpdate();
|
|
|
|
for (let i = 0; i < state.products.length; i++) {
|
|
if (!state.isRunning) break;
|
|
|
|
const product = state.products[i];
|
|
state.currentProductIndex = i;
|
|
|
|
log(`Processing product ${i + 1}/${state.products.length}: ${product.productCode || 'unknown'}`, 'info');
|
|
broadcastUpdate();
|
|
|
|
try {
|
|
await downloadProductImages(product);
|
|
await delay(CONFIG.REQUEST_DELAY);
|
|
} catch (error) {
|
|
logError(`Failed to process product: ${error.message}`);
|
|
}
|
|
}
|
|
|
|
state.isRunning = false;
|
|
log('✨ Download process complete!', 'success');
|
|
broadcastUpdate();
|
|
}
|
|
|
|
/**
|
|
* Download all images for a single product
|
|
*/
|
|
async function downloadProductImages(product) {
|
|
const folderName = sanitizeFolderName(product.productCode || `product_${product.index}`);
|
|
|
|
try {
|
|
log(`Fetching detail page: ${product.detailUrl}`, 'info');
|
|
const html = await fetchWithRetry(product.detailUrl);
|
|
|
|
if (!html || html.length === 0) {
|
|
logError(`Detail page returned empty HTML for ${folderName}`);
|
|
return;
|
|
}
|
|
|
|
log(`Detail page fetched: ${html.length} characters`, 'info');
|
|
|
|
const imageUrls = extractImageUrlsFromHtml(html, product.detailUrl);
|
|
|
|
if (imageUrls.length === 0) {
|
|
logWarning(`No images found for product: ${folderName}. Check if page loads in browser.`);
|
|
return;
|
|
}
|
|
|
|
log(`Found ${imageUrls.length} images for ${folderName}`, 'info');
|
|
|
|
// Download images with controlled concurrency
|
|
await downloadImagesWithConcurrency(imageUrls, folderName, state.maxConcurrency);
|
|
|
|
logSuccess(`✓ Downloaded ${imageUrls.length} images for ${folderName}`);
|
|
} catch (error) {
|
|
logError(`Error downloading product ${folderName}: ${error.message}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Download multiple images with concurrency limit
|
|
*/
|
|
async function downloadImagesWithConcurrency(imageUrls, folderName, maxConcurrency) {
|
|
const queue = [...imageUrls];
|
|
const inProgress = [];
|
|
|
|
while (queue.length > 0 || inProgress.length > 0) {
|
|
// Fill up to maxConcurrency
|
|
while (inProgress.length < maxConcurrency && queue.length > 0) {
|
|
const url = queue.shift();
|
|
const downloadPromise = downloadSingleImage(url, folderName, imageUrls.indexOf(url) + 1)
|
|
.then(() => {
|
|
inProgress.splice(inProgress.indexOf(downloadPromise), 1);
|
|
})
|
|
.catch(error => {
|
|
console.error('Download error:', error);
|
|
inProgress.splice(inProgress.indexOf(downloadPromise), 1);
|
|
});
|
|
inProgress.push(downloadPromise);
|
|
}
|
|
|
|
// Wait for at least one to complete
|
|
if (inProgress.length > 0) {
|
|
await Promise.race(inProgress);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Download a single image
|
|
*/
|
|
async function downloadSingleImage(imageUrl, folderName, index) {
|
|
// Skip if already downloaded in this session
|
|
if (state.downloadedUrls.has(imageUrl)) {
|
|
logWarning(`Skipping duplicate image: ${imageUrl}`);
|
|
return;
|
|
}
|
|
|
|
try {
|
|
// Make sure URL is absolute
|
|
let absoluteUrl = imageUrl;
|
|
if (!absoluteUrl.startsWith('http')) {
|
|
// Try to construct from base Yupoo URL
|
|
if (absoluteUrl.startsWith('/')) {
|
|
absoluteUrl = 'https://www.yupoo.com' + absoluteUrl;
|
|
} else {
|
|
absoluteUrl = 'https://www.yupoo.com/' + absoluteUrl;
|
|
}
|
|
}
|
|
|
|
const filename = await fetchImageAndDownload(absoluteUrl, folderName, index);
|
|
|
|
state.downloadedUrls.add(imageUrl);
|
|
state.totalImagesDownloaded++;
|
|
broadcastUpdate();
|
|
|
|
log(`Downloaded: ${folderName}/${filename}`, 'success');
|
|
} catch (error) {
|
|
throw new Error(`Failed to download image: ${error.message}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Fetch image and use Chrome downloads API
|
|
*/
|
|
async function fetchImageAndDownload(imageUrl, folderName, index) {
|
|
try {
|
|
const response = await fetch(imageUrl, {
|
|
method: 'GET',
|
|
headers: {
|
|
'Referer': 'https://www.yupoo.com/',
|
|
},
|
|
timeout: CONFIG.TIMEOUT,
|
|
});
|
|
|
|
if (!response.ok) {
|
|
throw new Error(`HTTP ${response.status}`);
|
|
}
|
|
|
|
const ext = getImageExtension(imageUrl, response);
|
|
const filename = `${String(index).padStart(2, '0')}.${ext}`;
|
|
const filepath = `${folderName}/${filename}`;
|
|
|
|
// Create a blob URL for download
|
|
const blob = await response.blob();
|
|
const blobUrl = URL.createObjectURL(blob);
|
|
|
|
// Use Chrome downloads API
|
|
return new Promise((resolve, reject) => {
|
|
chrome.downloads.download(
|
|
{
|
|
url: blobUrl,
|
|
filename: filepath,
|
|
saveAs: false,
|
|
},
|
|
(downloadId) => {
|
|
if (chrome.runtime.lastError) {
|
|
reject(new Error(chrome.runtime.lastError.message));
|
|
} else {
|
|
// Clean up blob URL after a delay
|
|
setTimeout(() => URL.revokeObjectURL(blobUrl), 1000);
|
|
resolve(filename);
|
|
}
|
|
}
|
|
);
|
|
});
|
|
} catch (error) {
|
|
throw new Error(`Download failed: ${error.message}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Fetch URL with retry logic
|
|
*/
|
|
async function fetchWithRetry(url, attempt = 1) {
|
|
try {
|
|
const controller = new AbortController();
|
|
const timeoutId = setTimeout(() => controller.abort(), CONFIG.TIMEOUT);
|
|
|
|
const response = await fetch(url, {
|
|
method: 'GET',
|
|
headers: {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
|
'Referer': 'https://www.yupoo.com/',
|
|
},
|
|
signal: controller.signal,
|
|
});
|
|
|
|
clearTimeout(timeoutId);
|
|
|
|
if (!response.ok) {
|
|
throw new Error(`HTTP ${response.status}`);
|
|
}
|
|
|
|
return await response.text();
|
|
} catch (error) {
|
|
if (attempt < CONFIG.RETRY_ATTEMPTS) {
|
|
logWarning(`Fetch attempt ${attempt} failed, retrying... (${error.message})`);
|
|
await delay(CONFIG.RETRY_DELAY);
|
|
return fetchWithRetry(url, attempt + 1);
|
|
}
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// IMAGE EXTRACTION FROM HTML
|
|
// ============================================================================
|
|
|
|
/**
|
|
* Extract all image URLs from detail page HTML
|
|
*/
|
|
function extractImageUrlsFromHtml(html, baseUrl) {
|
|
try {
|
|
const parser = new DOMParser();
|
|
const doc = parser.parseFromString(html, 'text/html');
|
|
|
|
const imageUrls = [];
|
|
const seenUrls = new Set();
|
|
|
|
console.log(`[Background] Parsing HTML for images... (length: ${html.length} chars)`);
|
|
|
|
// Strategy 1: Find image containers
|
|
for (const containerSelector of DETAIL_PAGE_SELECTORS.imageContainer) {
|
|
try {
|
|
const containers = doc.querySelectorAll(containerSelector);
|
|
if (containers.length > 0) {
|
|
console.log(`[Background] Found ${containers.length} containers with selector: ${containerSelector}`);
|
|
containers.forEach(container => {
|
|
extractFromContainer(container, baseUrl, imageUrls, seenUrls);
|
|
});
|
|
}
|
|
} catch (e) {
|
|
// Invalid selector
|
|
}
|
|
}
|
|
|
|
// Strategy 2: If no images found, try direct image selectors
|
|
if (imageUrls.length === 0) {
|
|
console.log('[Background] No container images found, trying direct selectors...');
|
|
for (const imgSelector of DETAIL_PAGE_SELECTORS.imageElements) {
|
|
try {
|
|
const images = doc.querySelectorAll(imgSelector);
|
|
if (images.length > 0) {
|
|
console.log(`[Background] Found ${images.length} images with selector: ${imgSelector}`);
|
|
images.forEach(img => {
|
|
const url = img.src || img.getAttribute('data-src');
|
|
if (url && isValidImageUrl(url)) {
|
|
const absUrl = resolveUrl(url, baseUrl);
|
|
if (!seenUrls.has(absUrl)) {
|
|
imageUrls.push(absUrl);
|
|
seenUrls.add(absUrl);
|
|
}
|
|
}
|
|
});
|
|
}
|
|
} catch (e) {
|
|
// Invalid selector
|
|
}
|
|
}
|
|
}
|
|
|
|
// Strategy 3: Look for image links
|
|
if (imageUrls.length === 0) {
|
|
console.log('[Background] No direct images found, trying image links...');
|
|
for (const linkSelector of DETAIL_PAGE_SELECTORS.imageLinks) {
|
|
try {
|
|
const links = doc.querySelectorAll(linkSelector);
|
|
if (links.length > 0) {
|
|
console.log(`[Background] Found ${links.length} image links with selector: ${linkSelector}`);
|
|
links.forEach(link => {
|
|
const url = link.href;
|
|
if (url && isValidImageUrl(url)) {
|
|
const absUrl = resolveUrl(url, baseUrl);
|
|
if (!seenUrls.has(absUrl)) {
|
|
imageUrls.push(absUrl);
|
|
seenUrls.add(absUrl);
|
|
}
|
|
}
|
|
});
|
|
}
|
|
} catch (e) {
|
|
// Invalid selector
|
|
}
|
|
}
|
|
}
|
|
|
|
// Strategy 4: Last resort - find ALL images and filter
|
|
if (imageUrls.length === 0) {
|
|
console.log('[Background] Last resort: scanning all images...');
|
|
const allImages = doc.querySelectorAll('img');
|
|
console.log(`[Background] Found ${allImages.length} total img elements`);
|
|
|
|
let validCount = 0;
|
|
allImages.forEach(img => {
|
|
const url = img.src || img.getAttribute('data-src');
|
|
// Filter to actual product images (skip small/thumbnail images)
|
|
if (url && isValidImageUrl(url) && (!img.width || img.width > 100)) {
|
|
const absUrl = resolveUrl(url, baseUrl);
|
|
if (!seenUrls.has(absUrl)) {
|
|
imageUrls.push(absUrl);
|
|
seenUrls.add(absUrl);
|
|
validCount++;
|
|
}
|
|
}
|
|
});
|
|
console.log(`[Background] Added ${validCount} valid images from all images`);
|
|
}
|
|
|
|
console.log(`[Background] ✓ Extracted ${imageUrls.length} total images from detail page`);
|
|
return imageUrls;
|
|
} catch (error) {
|
|
console.error('Error parsing HTML:', error);
|
|
return [];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Extract images from a container element
|
|
*/
|
|
function extractFromContainer(container, baseUrl, imageUrls, seenUrls) {
|
|
// Look for img tags
|
|
container.querySelectorAll('img').forEach(img => {
|
|
const url = img.src || img.getAttribute('data-src');
|
|
if (url && isValidImageUrl(url)) {
|
|
const absUrl = resolveUrl(url, baseUrl);
|
|
if (!seenUrls.has(absUrl)) {
|
|
imageUrls.push(absUrl);
|
|
seenUrls.add(absUrl);
|
|
}
|
|
}
|
|
});
|
|
|
|
// Look for image links
|
|
container.querySelectorAll('a[href*=".jpg"], a[href*=".png"], a[href*=".webp"]').forEach(link => {
|
|
const url = link.href;
|
|
if (url && isValidImageUrl(url)) {
|
|
const absUrl = resolveUrl(url, baseUrl);
|
|
if (!seenUrls.has(absUrl)) {
|
|
imageUrls.push(absUrl);
|
|
seenUrls.add(absUrl);
|
|
}
|
|
}
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Check if URL looks like a valid image
|
|
*/
|
|
function isValidImageUrl(url) {
|
|
if (!url || url.length === 0) return false;
|
|
|
|
const imageExtensions = /\.(jpg|jpeg|png|webp|gif|bmp)(\?.*)?$/i;
|
|
const isDataUrl = url.startsWith('data:');
|
|
const isTooSmall = url.includes('favicon') || url.includes('logo') || url.includes('icon') || url.includes('.svg');
|
|
|
|
return !isDataUrl && !isTooSmall && imageExtensions.test(url);
|
|
}
|
|
|
|
/**
|
|
* Convert relative URL to absolute
|
|
*/
|
|
function resolveUrl(url, baseUrl) {
|
|
if (!url) return '';
|
|
|
|
// Already absolute
|
|
if (url.startsWith('http://') || url.startsWith('https://')) {
|
|
return url;
|
|
}
|
|
|
|
try {
|
|
return new URL(url, baseUrl).href;
|
|
} catch (e) {
|
|
console.warn('Failed to resolve URL:', url, baseUrl);
|
|
return url;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get image extension from URL or content-type
|
|
*/
|
|
function getImageExtension(url, response) {
|
|
// Try from URL
|
|
const urlMatch = url.match(/\.([a-z]+)(\?|$)/i);
|
|
if (urlMatch) {
|
|
const ext = urlMatch[1].toLowerCase();
|
|
if (['jpg', 'jpeg', 'png', 'webp', 'gif', 'bmp'].includes(ext)) {
|
|
return ext === 'jpg' ? 'jpg' : ext;
|
|
}
|
|
}
|
|
|
|
// Try from content-type
|
|
const contentType = response.headers.get('content-type');
|
|
if (contentType) {
|
|
if (contentType.includes('jpeg')) return 'jpg';
|
|
if (contentType.includes('png')) return 'png';
|
|
if (contentType.includes('webp')) return 'webp';
|
|
if (contentType.includes('gif')) return 'gif';
|
|
}
|
|
|
|
return 'jpg'; // Default
|
|
}
|
|
|
|
// ============================================================================
|
|
// UTILITY FUNCTIONS
|
|
// ============================================================================
|
|
|
|
/**
|
|
* Sanitize folder name for use in file system
|
|
*/
|
|
function sanitizeFolderName(name) {
|
|
if (!name) return 'product_unknown';
|
|
|
|
// Remove or replace invalid characters
|
|
return name
|
|
.replace(/[<>:"|?*\/\\]/g, '_') // Invalid file system characters
|
|
.replace(/\s+/g, '_') // Spaces to underscores
|
|
.replace(/_{2,}/g, '_') // Multiple underscores to single
|
|
.substring(0, 100) // Limit length
|
|
.toLowerCase();
|
|
}
|
|
|
|
/**
|
|
* Delay utility
|
|
*/
|
|
function delay(ms) {
|
|
return new Promise(resolve => setTimeout(resolve, ms));
|
|
}
|
|
|
|
// ============================================================================
|
|
// INITIALIZATION
|
|
// ============================================================================
|
|
|
|
console.log('[Background Service Worker] Initialized');
|
|
log('Service worker ready', 'info');
|