2026-04-20 00:08:24 +03:00

391 lines
12 KiB
JavaScript

/**
* Content Script - Runs on the target website page
*
* Responsibilities:
* - Scan the current page for product cards/links
* - Extract product identifiers and detail page URLs
* - Send product data to the background service worker
*/
// ============================================================================
// CONFIGURATION - Adjust selectors based on website structure
// ============================================================================
const SELECTORS = {
// Yupoo gallery page selectors - optimized for various Yupoo URL patterns
productCard: [
'a[href*="/fs/"]', // Yupoo /fs/ pattern
'a[href*="/f/"]', // Yupoo /f/ pattern
'a[href*="/item/"]', // Yupoo /item/ pattern
'a[href*="/product/"]', // Product pattern
'div[onclick*="/fs/"]', // Onclick handlers
'div[onclick*="/f/"]',
],
productLink: [
'a[href*="/fs/"]',
'a[href*="/f/"]',
'a[href*="/item/"]',
'a[href*="/product"]',
],
productTitle: [
'.caption',
'.box-title',
'.item-name',
'p',
'span',
'div',
],
};
// ============================================================================
// PRODUCT SCANNER
// ============================================================================
class ProductScanner {
constructor() {
this.products = [];
this.seenUrls = new Set();
}
/**
* Find the first matching element using a selector array
*/
findElement(parent, selectors) {
if (typeof selectors === 'string') {
return parent.querySelector(selectors);
}
for (const selector of selectors) {
try {
const element = parent.querySelector(selector);
if (element) return element;
} catch (e) {
// Invalid selector, continue
}
}
return null;
}
/**
* Scan the current page for products
*/
scanPage() {
console.log('[ProductScanner] Starting page scan...');
this.products = [];
this.seenUrls.clear();
let productElements = [];
// STRATEGY 1: Look for product thumbnail containers (divs with images inside)
const imageSelectors = [
'div.thumb', // Common Yupoo thumbnail class
'div[class*="thumb"]', // Any thumb variant
'li.photo-list-item', // List item in photo list
'li[class*="item"]', // Any list item
'div.photo-item', // Photo item
'div[class*="box"]', // Box container
'div > img', // Direct parent of image
];
for (const selector of imageSelectors) {
try {
const elements = document.querySelectorAll(selector);
if (elements.length > 6 && elements.length < 200) { // Filter out nav/footer elements
console.log(`[ProductScanner] Found ${elements.length} elements with selector: ${selector}`);
productElements.push(...elements);
}
} catch (e) {
// Invalid selector
}
}
console.log(`[ProductScanner] Found ${productElements.length} product container candidates`);
// Extract product info from containers
const extractedProducts = [];
productElements.forEach((element, index) => {
try {
if (element.dataset.processed) return;
// Method 1: Look for <a> tag within the element
let link = element.querySelector('a[href]');
// Method 2: If no link found, check if element itself is an image - find parent link
if (!link && (element.tagName === 'IMG' || element.tagName === 'img')) {
let parent = element.parentElement;
let depth = 0;
while (parent && depth < 5) {
const potentialLink = parent.querySelector('a[href]');
if (potentialLink) {
link = potentialLink;
break;
}
parent = parent.parentElement;
depth++;
}
}
// Method 3: Check for onclick handler
const onclickAttr = element.getAttribute('onclick');
if (!link && onclickAttr) {
// Try to extract URL from onclick
const urlMatch = onclickAttr.match(/['"](https?:\/\/[^'"]+)['"]/);
if (urlMatch) {
const fakeLink = document.createElement('a');
fakeLink.href = urlMatch[1];
link = fakeLink;
}
}
if (link && link.href) {
const product = this.extractProductInfo(link, extractedProducts.length);
if (product && !this.seenUrls.has(product.detailUrl)) {
extractedProducts.push(product);
this.seenUrls.add(product.detailUrl);
element.dataset.processed = true;
console.log(`[ProductScanner] ✓ Added: ${product.productCode || 'unknown'}`);
}
}
} catch (e) {
console.warn(`[ProductScanner] Error processing element:`, e.message);
}
});
this.products = extractedProducts;
console.log(`[ProductScanner] ✓ Extracted ${this.products.length} unique products`);
// If still 0 products, do a last-ditch search
if (this.products.length === 0) {
console.log('[ProductScanner] Last-ditch: scanning all images with image gallery patterns...');
const allImages = document.querySelectorAll('img[src*=".jpg"], img[src*=".png"]');
console.log(`[ProductScanner] Found ${allImages.length} image elements`);
// Look for parent links of these images
allImages.forEach((img, idx) => {
if (idx > 100) return; // Sanity check
let container = img.closest('a, div.thumb, div.photo, li');
if (container) {
const link = container.tagName === 'A' ? container : container.querySelector('a');
if (link && link.href) {
const product = this.extractProductInfo(link, this.products.length);
if (product && !this.seenUrls.has(product.detailUrl)) {
this.products.push(product);
this.seenUrls.add(product.detailUrl);
}
}
}
});
}
console.log(`[ProductScanner] Final count: ${this.products.length} products`);
return this.products;
}
/**
* Extract product information from a link or container element
*/
extractProductInfo(element, index) {
let detailUrl = null;
let titleText = '';
// Get the URL
if (element.href) {
detailUrl = element.href;
} else {
return null;
}
// Skip navigation/category links - we only want product detail pages
// Product detail pages should have specific patterns
const href = detailUrl.toLowerCase();
// Skip common non-product pages
if (href.includes('/categor') ||
href.includes('/album?') ||
href.includes('/albums') ||
href.includes('/home') ||
href.includes('/search') ||
href.includes('javascript:') ||
href.includes('#')) {
console.log(`[ProductScanner] Skipping non-product URL: ${detailUrl}`);
return null;
}
// Must have a Yupoo domain
if (!href.includes('yupoo.com')) {
console.log(`[ProductScanner] Skipping non-Yupoo URL: ${detailUrl}`);
return null;
}
// Normalize URL
try {
detailUrl = new URL(detailUrl, window.location.href).href;
} catch (e) {
console.warn(`[ProductScanner] Invalid URL:`, detailUrl);
return null;
}
// Try to find title from nearby text
let container = element.closest('div, li') || element;
// Look for text content in the container
const allText = container.innerText || container.textContent || '';
const lines = allText.split('\n').filter(line => line.trim().length > 0);
if (lines.length > 0) {
titleText = lines[0].trim();
if (titleText.length > 200) {
titleText = titleText.substring(0, 200);
}
}
// If still no title, look for alt text on images
if (!titleText) {
const img = container.querySelector('img');
if (img && img.alt) {
titleText = img.alt;
}
}
// Extract product code
const productCode = this.extractProductCode(titleText, detailUrl);
return {
index: index + 1,
productCode,
titleText,
detailUrl,
thumbnailUrl: this.extractThumbnailUrl(container),
};
}
/**
* Extract a unique product identifier from title or URL
* Look for patterns like "3ME10101430" or use URL slug
*/
extractProductCode(titleText, detailUrl) {
// Look for pattern: [XXXXX...] at the start of title (Yupoo format)
const bracketMatch = titleText.match(/\[([A-Z0-9]{4,})\]/);
if (bracketMatch) {
return bracketMatch[1];
}
// Look for 6+ alphanumeric characters
const matches = titleText.match(/[A-Z0-9]{6,}/);
if (matches) {
return matches[0];
}
// Try URL slug - look for various Yupoo patterns
let urlMatch = detailUrl.match(/\/fs\/([a-zA-Z0-9]+)/i) || // /fs/XXXXX
detailUrl.match(/\/f\/[\d.]+\/([a-zA-Z0-9]+)/i) || // /f/7.11.31/XXXXX
detailUrl.match(/\/item\/([a-zA-Z0-9]+)/i) ||
detailUrl.match(/\/product\/([a-zA-Z0-9]+)/i) ||
detailUrl.match(/\/album\/([a-zA-Z0-9]+)/i);
if (urlMatch && urlMatch[1]) {
return urlMatch[1];
}
// Last resort - extract last path segment
const urlObj = new URL(detailUrl);
const pathSegments = urlObj.pathname.split('/').filter(Boolean);
if (pathSegments.length > 0) {
const lastSegment = pathSegments[pathSegments.length - 1];
if (lastSegment && lastSegment.length > 2 && !/^[0-9.]+$/.test(lastSegment)) {
return lastSegment;
}
}
return null;
}
/**
* Extract thumbnail image URL from card
*/
extractThumbnailUrl(element) {
// Look for img tag
let imgElement = element.querySelector('img');
if (imgElement && imgElement.src) {
return imgElement.src;
}
// If element is img itself
if (element.tagName === 'IMG' || element.tagName === 'img') {
return element.src;
}
// Look for background image
const style = window.getComputedStyle(element);
const bgImage = style.backgroundImage;
if (bgImage && bgImage !== 'none') {
const match = bgImage.match(/url\(['"]*([^'"]*)['"]*\)/);
if (match) {
return match[1];
}
}
return null;
}
}
// ============================================================================
// MESSAGE LISTENER - Communicate with popup and background
// ============================================================================
chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
console.log('[Content] Received message:', request.action);
try {
if (request.action === 'scanPage') {
const scanner = new ProductScanner();
const products = scanner.scanPage();
sendResponse({
success: true,
productCount: products.length,
products: products,
});
} else if (request.action === 'debug') {
// Debug diagnostics
const divOnclick = document.querySelectorAll('div[onclick*="/item/"]').length;
const linkItem = document.querySelectorAll('a[href*="/item/"]').length;
const linkProduct = document.querySelectorAll('a[href*="/product"]').length;
const divItem = document.querySelectorAll('div.item').length;
const allImages = document.querySelectorAll('img').length;
const allLinks = document.querySelectorAll('a[href]').length;
// Get sample links to help debug
const allLinkHrefs = [];
document.querySelectorAll('a[href]').forEach((link, idx) => {
if (idx < 20) { // First 20 links
allLinkHrefs.push(link.href);
}
});
sendResponse({
success: true,
divOnclick,
linkItem,
linkProduct,
divItem,
allImages,
allLinks,
htmlLength: document.documentElement.innerHTML.length,
sampleLinks: allLinkHrefs,
});
} else if (request.action === 'ping') {
sendResponse({ success: true });
}
} catch (error) {
console.error('[Content] Error:', error);
sendResponse({
success: false,
error: error.message,
});
}
// Return true to indicate async response
return true;
});
console.log('[Content Script] Loaded and ready');