391 lines
12 KiB
JavaScript
391 lines
12 KiB
JavaScript
/**
|
|
* Content Script - Runs on the target website page
|
|
*
|
|
* Responsibilities:
|
|
* - Scan the current page for product cards/links
|
|
* - Extract product identifiers and detail page URLs
|
|
* - Send product data to the background service worker
|
|
*/
|
|
|
|
// ============================================================================
|
|
// CONFIGURATION - Adjust selectors based on website structure
|
|
// ============================================================================
|
|
|
|
const SELECTORS = {
|
|
// Yupoo gallery page selectors - optimized for various Yupoo URL patterns
|
|
productCard: [
|
|
'a[href*="/fs/"]', // Yupoo /fs/ pattern
|
|
'a[href*="/f/"]', // Yupoo /f/ pattern
|
|
'a[href*="/item/"]', // Yupoo /item/ pattern
|
|
'a[href*="/product/"]', // Product pattern
|
|
'div[onclick*="/fs/"]', // Onclick handlers
|
|
'div[onclick*="/f/"]',
|
|
],
|
|
productLink: [
|
|
'a[href*="/fs/"]',
|
|
'a[href*="/f/"]',
|
|
'a[href*="/item/"]',
|
|
'a[href*="/product"]',
|
|
],
|
|
productTitle: [
|
|
'.caption',
|
|
'.box-title',
|
|
'.item-name',
|
|
'p',
|
|
'span',
|
|
'div',
|
|
],
|
|
};
|
|
|
|
// ============================================================================
|
|
// PRODUCT SCANNER
|
|
// ============================================================================
|
|
|
|
class ProductScanner {
|
|
constructor() {
|
|
this.products = [];
|
|
this.seenUrls = new Set();
|
|
}
|
|
|
|
/**
|
|
* Find the first matching element using a selector array
|
|
*/
|
|
findElement(parent, selectors) {
|
|
if (typeof selectors === 'string') {
|
|
return parent.querySelector(selectors);
|
|
}
|
|
|
|
for (const selector of selectors) {
|
|
try {
|
|
const element = parent.querySelector(selector);
|
|
if (element) return element;
|
|
} catch (e) {
|
|
// Invalid selector, continue
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Scan the current page for products
|
|
*/
|
|
scanPage() {
|
|
console.log('[ProductScanner] Starting page scan...');
|
|
this.products = [];
|
|
this.seenUrls.clear();
|
|
|
|
let productElements = [];
|
|
|
|
// STRATEGY 1: Look for product thumbnail containers (divs with images inside)
|
|
const imageSelectors = [
|
|
'div.thumb', // Common Yupoo thumbnail class
|
|
'div[class*="thumb"]', // Any thumb variant
|
|
'li.photo-list-item', // List item in photo list
|
|
'li[class*="item"]', // Any list item
|
|
'div.photo-item', // Photo item
|
|
'div[class*="box"]', // Box container
|
|
'div > img', // Direct parent of image
|
|
];
|
|
|
|
for (const selector of imageSelectors) {
|
|
try {
|
|
const elements = document.querySelectorAll(selector);
|
|
if (elements.length > 6 && elements.length < 200) { // Filter out nav/footer elements
|
|
console.log(`[ProductScanner] Found ${elements.length} elements with selector: ${selector}`);
|
|
productElements.push(...elements);
|
|
}
|
|
} catch (e) {
|
|
// Invalid selector
|
|
}
|
|
}
|
|
|
|
console.log(`[ProductScanner] Found ${productElements.length} product container candidates`);
|
|
|
|
// Extract product info from containers
|
|
const extractedProducts = [];
|
|
productElements.forEach((element, index) => {
|
|
try {
|
|
if (element.dataset.processed) return;
|
|
|
|
// Method 1: Look for <a> tag within the element
|
|
let link = element.querySelector('a[href]');
|
|
|
|
// Method 2: If no link found, check if element itself is an image - find parent link
|
|
if (!link && (element.tagName === 'IMG' || element.tagName === 'img')) {
|
|
let parent = element.parentElement;
|
|
let depth = 0;
|
|
while (parent && depth < 5) {
|
|
const potentialLink = parent.querySelector('a[href]');
|
|
if (potentialLink) {
|
|
link = potentialLink;
|
|
break;
|
|
}
|
|
parent = parent.parentElement;
|
|
depth++;
|
|
}
|
|
}
|
|
|
|
// Method 3: Check for onclick handler
|
|
const onclickAttr = element.getAttribute('onclick');
|
|
if (!link && onclickAttr) {
|
|
// Try to extract URL from onclick
|
|
const urlMatch = onclickAttr.match(/['"](https?:\/\/[^'"]+)['"]/);
|
|
if (urlMatch) {
|
|
const fakeLink = document.createElement('a');
|
|
fakeLink.href = urlMatch[1];
|
|
link = fakeLink;
|
|
}
|
|
}
|
|
|
|
if (link && link.href) {
|
|
const product = this.extractProductInfo(link, extractedProducts.length);
|
|
if (product && !this.seenUrls.has(product.detailUrl)) {
|
|
extractedProducts.push(product);
|
|
this.seenUrls.add(product.detailUrl);
|
|
element.dataset.processed = true;
|
|
console.log(`[ProductScanner] ✓ Added: ${product.productCode || 'unknown'}`);
|
|
}
|
|
}
|
|
} catch (e) {
|
|
console.warn(`[ProductScanner] Error processing element:`, e.message);
|
|
}
|
|
});
|
|
|
|
this.products = extractedProducts;
|
|
console.log(`[ProductScanner] ✓ Extracted ${this.products.length} unique products`);
|
|
|
|
// If still 0 products, do a last-ditch search
|
|
if (this.products.length === 0) {
|
|
console.log('[ProductScanner] Last-ditch: scanning all images with image gallery patterns...');
|
|
const allImages = document.querySelectorAll('img[src*=".jpg"], img[src*=".png"]');
|
|
console.log(`[ProductScanner] Found ${allImages.length} image elements`);
|
|
|
|
// Look for parent links of these images
|
|
allImages.forEach((img, idx) => {
|
|
if (idx > 100) return; // Sanity check
|
|
|
|
let container = img.closest('a, div.thumb, div.photo, li');
|
|
if (container) {
|
|
const link = container.tagName === 'A' ? container : container.querySelector('a');
|
|
if (link && link.href) {
|
|
const product = this.extractProductInfo(link, this.products.length);
|
|
if (product && !this.seenUrls.has(product.detailUrl)) {
|
|
this.products.push(product);
|
|
this.seenUrls.add(product.detailUrl);
|
|
}
|
|
}
|
|
}
|
|
});
|
|
}
|
|
|
|
console.log(`[ProductScanner] Final count: ${this.products.length} products`);
|
|
return this.products;
|
|
}
|
|
|
|
/**
|
|
* Extract product information from a link or container element
|
|
*/
|
|
extractProductInfo(element, index) {
|
|
let detailUrl = null;
|
|
let titleText = '';
|
|
|
|
// Get the URL
|
|
if (element.href) {
|
|
detailUrl = element.href;
|
|
} else {
|
|
return null;
|
|
}
|
|
|
|
// Skip navigation/category links - we only want product detail pages
|
|
// Product detail pages should have specific patterns
|
|
const href = detailUrl.toLowerCase();
|
|
|
|
// Skip common non-product pages
|
|
if (href.includes('/categor') ||
|
|
href.includes('/album?') ||
|
|
href.includes('/albums') ||
|
|
href.includes('/home') ||
|
|
href.includes('/search') ||
|
|
href.includes('javascript:') ||
|
|
href.includes('#')) {
|
|
console.log(`[ProductScanner] Skipping non-product URL: ${detailUrl}`);
|
|
return null;
|
|
}
|
|
|
|
// Must have a Yupoo domain
|
|
if (!href.includes('yupoo.com')) {
|
|
console.log(`[ProductScanner] Skipping non-Yupoo URL: ${detailUrl}`);
|
|
return null;
|
|
}
|
|
|
|
// Normalize URL
|
|
try {
|
|
detailUrl = new URL(detailUrl, window.location.href).href;
|
|
} catch (e) {
|
|
console.warn(`[ProductScanner] Invalid URL:`, detailUrl);
|
|
return null;
|
|
}
|
|
|
|
// Try to find title from nearby text
|
|
let container = element.closest('div, li') || element;
|
|
|
|
// Look for text content in the container
|
|
const allText = container.innerText || container.textContent || '';
|
|
const lines = allText.split('\n').filter(line => line.trim().length > 0);
|
|
if (lines.length > 0) {
|
|
titleText = lines[0].trim();
|
|
if (titleText.length > 200) {
|
|
titleText = titleText.substring(0, 200);
|
|
}
|
|
}
|
|
|
|
// If still no title, look for alt text on images
|
|
if (!titleText) {
|
|
const img = container.querySelector('img');
|
|
if (img && img.alt) {
|
|
titleText = img.alt;
|
|
}
|
|
}
|
|
|
|
// Extract product code
|
|
const productCode = this.extractProductCode(titleText, detailUrl);
|
|
|
|
return {
|
|
index: index + 1,
|
|
productCode,
|
|
titleText,
|
|
detailUrl,
|
|
thumbnailUrl: this.extractThumbnailUrl(container),
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Extract a unique product identifier from title or URL
|
|
* Look for patterns like "3ME10101430" or use URL slug
|
|
*/
|
|
extractProductCode(titleText, detailUrl) {
|
|
// Look for pattern: [XXXXX...] at the start of title (Yupoo format)
|
|
const bracketMatch = titleText.match(/\[([A-Z0-9]{4,})\]/);
|
|
if (bracketMatch) {
|
|
return bracketMatch[1];
|
|
}
|
|
|
|
// Look for 6+ alphanumeric characters
|
|
const matches = titleText.match(/[A-Z0-9]{6,}/);
|
|
if (matches) {
|
|
return matches[0];
|
|
}
|
|
|
|
// Try URL slug - look for various Yupoo patterns
|
|
let urlMatch = detailUrl.match(/\/fs\/([a-zA-Z0-9]+)/i) || // /fs/XXXXX
|
|
detailUrl.match(/\/f\/[\d.]+\/([a-zA-Z0-9]+)/i) || // /f/7.11.31/XXXXX
|
|
detailUrl.match(/\/item\/([a-zA-Z0-9]+)/i) ||
|
|
detailUrl.match(/\/product\/([a-zA-Z0-9]+)/i) ||
|
|
detailUrl.match(/\/album\/([a-zA-Z0-9]+)/i);
|
|
if (urlMatch && urlMatch[1]) {
|
|
return urlMatch[1];
|
|
}
|
|
|
|
// Last resort - extract last path segment
|
|
const urlObj = new URL(detailUrl);
|
|
const pathSegments = urlObj.pathname.split('/').filter(Boolean);
|
|
if (pathSegments.length > 0) {
|
|
const lastSegment = pathSegments[pathSegments.length - 1];
|
|
if (lastSegment && lastSegment.length > 2 && !/^[0-9.]+$/.test(lastSegment)) {
|
|
return lastSegment;
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Extract thumbnail image URL from card
|
|
*/
|
|
extractThumbnailUrl(element) {
|
|
// Look for img tag
|
|
let imgElement = element.querySelector('img');
|
|
if (imgElement && imgElement.src) {
|
|
return imgElement.src;
|
|
}
|
|
|
|
// If element is img itself
|
|
if (element.tagName === 'IMG' || element.tagName === 'img') {
|
|
return element.src;
|
|
}
|
|
|
|
// Look for background image
|
|
const style = window.getComputedStyle(element);
|
|
const bgImage = style.backgroundImage;
|
|
if (bgImage && bgImage !== 'none') {
|
|
const match = bgImage.match(/url\(['"]*([^'"]*)['"]*\)/);
|
|
if (match) {
|
|
return match[1];
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// MESSAGE LISTENER - Communicate with popup and background
|
|
// ============================================================================
|
|
|
|
chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
|
|
console.log('[Content] Received message:', request.action);
|
|
|
|
try {
|
|
if (request.action === 'scanPage') {
|
|
const scanner = new ProductScanner();
|
|
const products = scanner.scanPage();
|
|
sendResponse({
|
|
success: true,
|
|
productCount: products.length,
|
|
products: products,
|
|
});
|
|
} else if (request.action === 'debug') {
|
|
// Debug diagnostics
|
|
const divOnclick = document.querySelectorAll('div[onclick*="/item/"]').length;
|
|
const linkItem = document.querySelectorAll('a[href*="/item/"]').length;
|
|
const linkProduct = document.querySelectorAll('a[href*="/product"]').length;
|
|
const divItem = document.querySelectorAll('div.item').length;
|
|
const allImages = document.querySelectorAll('img').length;
|
|
const allLinks = document.querySelectorAll('a[href]').length;
|
|
|
|
// Get sample links to help debug
|
|
const allLinkHrefs = [];
|
|
document.querySelectorAll('a[href]').forEach((link, idx) => {
|
|
if (idx < 20) { // First 20 links
|
|
allLinkHrefs.push(link.href);
|
|
}
|
|
});
|
|
|
|
sendResponse({
|
|
success: true,
|
|
divOnclick,
|
|
linkItem,
|
|
linkProduct,
|
|
divItem,
|
|
allImages,
|
|
allLinks,
|
|
htmlLength: document.documentElement.innerHTML.length,
|
|
sampleLinks: allLinkHrefs,
|
|
});
|
|
} else if (request.action === 'ping') {
|
|
sendResponse({ success: true });
|
|
}
|
|
} catch (error) {
|
|
console.error('[Content] Error:', error);
|
|
sendResponse({
|
|
success: false,
|
|
error: error.message,
|
|
});
|
|
}
|
|
|
|
// Return true to indicate async response
|
|
return true;
|
|
});
|
|
|
|
console.log('[Content Script] Loaded and ready');
|