/** * Content Script - Runs on the target website page * * Responsibilities: * - Scan the current page for product cards/links * - Extract product identifiers and detail page URLs * - Send product data to the background service worker */ // ============================================================================ // CONFIGURATION - Adjust selectors based on website structure // ============================================================================ const SELECTORS = { // Yupoo gallery page selectors - optimized for various Yupoo URL patterns productCard: [ 'a[href*="/fs/"]', // Yupoo /fs/ pattern 'a[href*="/f/"]', // Yupoo /f/ pattern 'a[href*="/item/"]', // Yupoo /item/ pattern 'a[href*="/product/"]', // Product pattern 'div[onclick*="/fs/"]', // Onclick handlers 'div[onclick*="/f/"]', ], productLink: [ 'a[href*="/fs/"]', 'a[href*="/f/"]', 'a[href*="/item/"]', 'a[href*="/product"]', ], productTitle: [ '.caption', '.box-title', '.item-name', 'p', 'span', 'div', ], }; // ============================================================================ // PRODUCT SCANNER // ============================================================================ class ProductScanner { constructor() { this.products = []; this.seenUrls = new Set(); } /** * Find the first matching element using a selector array */ findElement(parent, selectors) { if (typeof selectors === 'string') { return parent.querySelector(selectors); } for (const selector of selectors) { try { const element = parent.querySelector(selector); if (element) return element; } catch (e) { // Invalid selector, continue } } return null; } /** * Scan the current page for products */ scanPage() { console.log('[ProductScanner] Starting page scan...'); this.products = []; this.seenUrls.clear(); let productElements = []; // STRATEGY 1: Look for product thumbnail containers (divs with images inside) const imageSelectors = [ 'div.thumb', // Common Yupoo thumbnail class 'div[class*="thumb"]', // Any thumb variant 'li.photo-list-item', // List item in photo list 'li[class*="item"]', // Any list item 'div.photo-item', // Photo item 'div[class*="box"]', // Box container 'div > img', // Direct parent of image ]; for (const selector of imageSelectors) { try { const elements = document.querySelectorAll(selector); if (elements.length > 6 && elements.length < 200) { // Filter out nav/footer elements console.log(`[ProductScanner] Found ${elements.length} elements with selector: ${selector}`); productElements.push(...elements); } } catch (e) { // Invalid selector } } console.log(`[ProductScanner] Found ${productElements.length} product container candidates`); // Extract product info from containers const extractedProducts = []; productElements.forEach((element, index) => { try { if (element.dataset.processed) return; // Method 1: Look for tag within the element let link = element.querySelector('a[href]'); // Method 2: If no link found, check if element itself is an image - find parent link if (!link && (element.tagName === 'IMG' || element.tagName === 'img')) { let parent = element.parentElement; let depth = 0; while (parent && depth < 5) { const potentialLink = parent.querySelector('a[href]'); if (potentialLink) { link = potentialLink; break; } parent = parent.parentElement; depth++; } } // Method 3: Check for onclick handler const onclickAttr = element.getAttribute('onclick'); if (!link && onclickAttr) { // Try to extract URL from onclick const urlMatch = onclickAttr.match(/['"](https?:\/\/[^'"]+)['"]/); if (urlMatch) { const fakeLink = document.createElement('a'); fakeLink.href = urlMatch[1]; link = fakeLink; } } if (link && link.href) { const product = this.extractProductInfo(link, extractedProducts.length); if (product && !this.seenUrls.has(product.detailUrl)) { extractedProducts.push(product); this.seenUrls.add(product.detailUrl); element.dataset.processed = true; console.log(`[ProductScanner] ✓ Added: ${product.productCode || 'unknown'}`); } } } catch (e) { console.warn(`[ProductScanner] Error processing element:`, e.message); } }); this.products = extractedProducts; console.log(`[ProductScanner] ✓ Extracted ${this.products.length} unique products`); // If still 0 products, do a last-ditch search if (this.products.length === 0) { console.log('[ProductScanner] Last-ditch: scanning all images with image gallery patterns...'); const allImages = document.querySelectorAll('img[src*=".jpg"], img[src*=".png"]'); console.log(`[ProductScanner] Found ${allImages.length} image elements`); // Look for parent links of these images allImages.forEach((img, idx) => { if (idx > 100) return; // Sanity check let container = img.closest('a, div.thumb, div.photo, li'); if (container) { const link = container.tagName === 'A' ? container : container.querySelector('a'); if (link && link.href) { const product = this.extractProductInfo(link, this.products.length); if (product && !this.seenUrls.has(product.detailUrl)) { this.products.push(product); this.seenUrls.add(product.detailUrl); } } } }); } console.log(`[ProductScanner] Final count: ${this.products.length} products`); return this.products; } /** * Extract product information from a link or container element */ extractProductInfo(element, index) { let detailUrl = null; let titleText = ''; // Get the URL if (element.href) { detailUrl = element.href; } else { return null; } // Skip navigation/category links - we only want product detail pages // Product detail pages should have specific patterns const href = detailUrl.toLowerCase(); // Skip common non-product pages if (href.includes('/categor') || href.includes('/album?') || href.includes('/albums') || href.includes('/home') || href.includes('/search') || href.includes('javascript:') || href.includes('#')) { console.log(`[ProductScanner] Skipping non-product URL: ${detailUrl}`); return null; } // Must have a Yupoo domain if (!href.includes('yupoo.com')) { console.log(`[ProductScanner] Skipping non-Yupoo URL: ${detailUrl}`); return null; } // Normalize URL try { detailUrl = new URL(detailUrl, window.location.href).href; } catch (e) { console.warn(`[ProductScanner] Invalid URL:`, detailUrl); return null; } // Try to find title from nearby text let container = element.closest('div, li') || element; // Look for text content in the container const allText = container.innerText || container.textContent || ''; const lines = allText.split('\n').filter(line => line.trim().length > 0); if (lines.length > 0) { titleText = lines[0].trim(); if (titleText.length > 200) { titleText = titleText.substring(0, 200); } } // If still no title, look for alt text on images if (!titleText) { const img = container.querySelector('img'); if (img && img.alt) { titleText = img.alt; } } // Extract product code const productCode = this.extractProductCode(titleText, detailUrl); return { index: index + 1, productCode, titleText, detailUrl, thumbnailUrl: this.extractThumbnailUrl(container), }; } /** * Extract a unique product identifier from title or URL * Look for patterns like "3ME10101430" or use URL slug */ extractProductCode(titleText, detailUrl) { // Look for pattern: [XXXXX...] at the start of title (Yupoo format) const bracketMatch = titleText.match(/\[([A-Z0-9]{4,})\]/); if (bracketMatch) { return bracketMatch[1]; } // Look for 6+ alphanumeric characters const matches = titleText.match(/[A-Z0-9]{6,}/); if (matches) { return matches[0]; } // Try URL slug - look for various Yupoo patterns let urlMatch = detailUrl.match(/\/fs\/([a-zA-Z0-9]+)/i) || // /fs/XXXXX detailUrl.match(/\/f\/[\d.]+\/([a-zA-Z0-9]+)/i) || // /f/7.11.31/XXXXX detailUrl.match(/\/item\/([a-zA-Z0-9]+)/i) || detailUrl.match(/\/product\/([a-zA-Z0-9]+)/i) || detailUrl.match(/\/album\/([a-zA-Z0-9]+)/i); if (urlMatch && urlMatch[1]) { return urlMatch[1]; } // Last resort - extract last path segment const urlObj = new URL(detailUrl); const pathSegments = urlObj.pathname.split('/').filter(Boolean); if (pathSegments.length > 0) { const lastSegment = pathSegments[pathSegments.length - 1]; if (lastSegment && lastSegment.length > 2 && !/^[0-9.]+$/.test(lastSegment)) { return lastSegment; } } return null; } /** * Extract thumbnail image URL from card */ extractThumbnailUrl(element) { // Look for img tag let imgElement = element.querySelector('img'); if (imgElement && imgElement.src) { return imgElement.src; } // If element is img itself if (element.tagName === 'IMG' || element.tagName === 'img') { return element.src; } // Look for background image const style = window.getComputedStyle(element); const bgImage = style.backgroundImage; if (bgImage && bgImage !== 'none') { const match = bgImage.match(/url\(['"]*([^'"]*)['"]*\)/); if (match) { return match[1]; } } return null; } } // ============================================================================ // MESSAGE LISTENER - Communicate with popup and background // ============================================================================ chrome.runtime.onMessage.addListener((request, sender, sendResponse) => { console.log('[Content] Received message:', request.action); try { if (request.action === 'scanPage') { const scanner = new ProductScanner(); const products = scanner.scanPage(); sendResponse({ success: true, productCount: products.length, products: products, }); } else if (request.action === 'debug') { // Debug diagnostics const divOnclick = document.querySelectorAll('div[onclick*="/item/"]').length; const linkItem = document.querySelectorAll('a[href*="/item/"]').length; const linkProduct = document.querySelectorAll('a[href*="/product"]').length; const divItem = document.querySelectorAll('div.item').length; const allImages = document.querySelectorAll('img').length; const allLinks = document.querySelectorAll('a[href]').length; // Get sample links to help debug const allLinkHrefs = []; document.querySelectorAll('a[href]').forEach((link, idx) => { if (idx < 20) { // First 20 links allLinkHrefs.push(link.href); } }); sendResponse({ success: true, divOnclick, linkItem, linkProduct, divItem, allImages, allLinks, htmlLength: document.documentElement.innerHTML.length, sampleLinks: allLinkHrefs, }); } else if (request.action === 'ping') { sendResponse({ success: true }); } } catch (error) { console.error('[Content] Error:', error); sendResponse({ success: false, error: error.message, }); } // Return true to indicate async response return true; }); console.log('[Content Script] Loaded and ready');