product_task->response->jsonld); // if there is no main image intervention but the main image url is provided if (! is_empty($main_image_url)) { $scraped_image = ShopeeSellerScrapedImage::where('original_name', pathinfo($main_image_url, PATHINFO_BASENAME))->where('shopee_seller_scrape_id', $shopee_task->shopee_seller_scrape->id)->first(); if (is_null($scraped_image)) { $main_image = self::getProductImage($shopee_task->product_task->response->jsonld, $rotating_proxy_server, $user_agent, $costs); $scraped_image = self::uploadAndSaveScrapedImage($shopee_task->shopee_seller_scrape, $main_image, true); } } /////// PART 2 $images = self::getFilteredImages($shopee_task->product_task->response->raw_html, $rotating_proxy_server, $user_agent, $costs); //dd($images); if (! is_null($images) && is_array($images) && count($images) > 0) { foreach ($images as $image_obj) { $scraped_image = ShopeeSellerScrapedImage::where('original_name', $image_obj->original_name)->where('shopee_seller_scrape_id', $shopee_task->shopee_seller_scrape->id)->first(); if (is_null($scraped_image)) { $scraped_image = self::uploadAndSaveScrapedImage($shopee_task->shopee_seller_scrape, $image_obj, false); } } } //return ShopeeSellerScrapedImage::where('shopee_seller_scrape_id', $shopee_task->shopee_seller_scrape->id)->get(); } private static function uploadAndSaveScrapedImage($shopee_seller_scrape, $image_obj, $featured = false) { // Generate a unique filename for the uploaded file and LQIP version $uuid = Str::uuid()->toString(); $fileName = time().'_'.$uuid.'.jpg'; $lqipFileName = time().'_'.$uuid.'_lqip.jpg'; // Convert the file to JPEG format using Intervention Image library $image = $image_obj->intervention; // Get the original image width and height $originalWidth = $image->width(); $originalHeight = $image->height(); // Compress the image to reduce file size to 50% $image->encode('jpg', 50); // Save the processed image to the 'r2' storage driver under the 'uploads' directory $filePath = 'uploads/'.$fileName; $lqipFilePath = 'uploads/'.$lqipFileName; Storage::disk('r2')->put($filePath, $image->stream()->detach()); // Save the original image to a temporary file and open it again $tempImagePath = tempnam(sys_get_temp_dir(), 'temp_image'); file_put_contents($tempImagePath, $image_obj->intervention->encode()); $clonedImage = Image::make($tempImagePath); // Create the LQIP version of the image using a small size while maintaining the aspect ratio $lqipImage = $clonedImage->fit(10, 10, function ($constraint) { $constraint->aspectRatio(); }); $lqipImage->encode('jpg', 5); Storage::disk('r2')->put($lqipFilePath, $lqipImage->stream()->detach()); // Cleanup the temporary image file unlink($tempImagePath); // Get the final URL of the uploaded image (non-LQIP version) $url = Storage::disk('r2')->url($filePath); $scraped_image = new ShopeeSellerScrapedImage; $scraped_image->shopee_seller_scrape_id = $shopee_seller_scrape->id; $scraped_image->original_name = $image_obj->original_name; $scraped_image->image = $url; $scraped_image->featured = $featured; if ($scraped_image->save()) { return $scraped_image; } return null; } private static function getImageUrls(string $raw_html) { $images = []; // Pattern for extracting src and alt attributes from img tags $pattern = '/]*>/is'; if (preg_match_all($pattern, $raw_html, $matches, PREG_SET_ORDER)) { foreach ($matches as $match) { $src = $match[1]; // Check if image file name ends with '_tn' and remove it $src = preg_replace('/_tn(\.[a-z]+)?$/i', '$1', $src); $images[] = [ 'src' => $src, 'alt' => isset($match[2]) ? $match[2] : null, ]; } } return $images; } private static function getFilteredImages(string $raw_html, string $proxy, string $user_agent, &$costs) { $images = self::getImageUrls($raw_html); //dd($images); $filteredImages = []; $uniqueAttributes = []; // This array will track unique width, height, mime, and size combinations $count = 0; foreach ($images as $image) { $count++; $src = $image['src']; try { $response = Http::withOptions(['proxy' => $proxy, 'verify' => false])->withHeaders(['User-Agent' => $user_agent])->get($src); // Check if the request was successful if (! $response->successful()) { continue; } $imageData = $response->body(); // Create an Intervention Image instance from the response data $interventionImage = Image::make($imageData); $width = $interventionImage->width(); $height = $interventionImage->height(); $mime = $interventionImage->mime(); // Image size in KB $sizeKb = round(strlen($imageData) / 1024, 2); // Check constraints if ($width < 800 || $height < 800 || $sizeKb < 100 || $mime !== 'image/jpeg') { continue; } $interventionImage->resize(800, null, function ($constraint) { $constraint->aspectRatio(); }); $width = $interventionImage->width(); $height = $interventionImage->height(); $mime = $interventionImage->mime(); $image['width'] = $width; $image['height'] = $height; $image['mime'] = $mime; $image['sizeKb'] = $sizeKb; // Check for duplicates by searching through uniqueAttributes $isDuplicate = false; foreach ($uniqueAttributes as $attr) { if ( $attr['width'] == $width && $attr['height'] == $height && $attr['mime'] == $mime && abs($attr['sizeKb'] - $sizeKb) <= 30 // Check for size within a +/- 10kB tolerance ) { $isDuplicate = true; break; } } if (! $isDuplicate) { $uniqueAttributes[] = [ 'width' => $width, 'height' => $height, 'mime' => $mime, 'sizeKb' => $sizeKb, ]; $image['color_counts'] = self::getImageColorCounts($interventionImage); $image['intervention'] = $interventionImage; $image['original_name'] = pathinfo($src, PATHINFO_BASENAME); //$image['img'] = $interventionImage; $costs['count-'.$count] = calculate_smartproxy_cost($sizeKb, 'rotating_global'); $filteredImages[] = $image; } } catch (\Exception $e) { // Handle exceptions related to the HTTP request continue; } } // Collect all the color counts $colorCounts = []; foreach ($filteredImages as $image) { $colorCounts[] = $image['color_counts']; } if (! empty($colorCounts)) { // Compute the median of the color counts sort($colorCounts); $count = count($colorCounts); $middleIndex = floor($count / 2); $median = $count % 2 === 0 ? ($colorCounts[$middleIndex - 1] + $colorCounts[$middleIndex]) / 2 : $colorCounts[$middleIndex]; // Use the median to filter out the low outliers $threshold = 0.10 * $median; // Adjust this percentage as needed $filteredImages = array_filter($filteredImages, function ($image) use ($threshold) { return $image['color_counts'] > $threshold; }); } else { // No images found $filteredImages = []; // Clear the array or take any other appropriate action } usort($filteredImages, function ($a, $b) { return $b['sizeKb'] <=> $a['sizeKb']; // Using the spaceship operator to sort in descending order }); $final_images = []; foreach ($filteredImages as $image_obj) { $final_images[] = (object) $image_obj; } return $final_images; } private static function getProductImageUrl(array $jsonLdData) { foreach ($jsonLdData as $data) { // Ensure the type is "Product" before proceeding if (isset($data->{'@type'}) && $data->{'@type'} === 'Product') { if (isset($data->url)) { return $data->url; } } } } private static function getProductImage(array $jsonLdData, string $proxy, string $user_agent, &$costs) { foreach ($jsonLdData as $data) { // Ensure the type is "Product" before proceeding if (isset($data->{'@type'}) && $data->{'@type'} === 'Product') { if (isset($data->url) && isset($data->image)) { try { $response = Http::withOptions(['proxy' => $proxy, 'verify' => false])->withHeaders(['User-Agent' => $user_agent])->get($data->image); // Check if the request was successful if ($response->successful()) { $imageData = $response->body(); // Create an Intervention Image instance from the response data $interventionImage = Image::make($imageData); // Resize/upscale the image to 1920x1080 maintaining the aspect ratio and cropping if needed $interventionImage->fit(1920, 1080, function ($constraint) { $constraint->upsize(); $constraint->aspectRatio(); }); $sizeInKb = strlen($interventionImage->encode()) / 1024; // Convert bytes to kilobytes // Calculate the cost $cost = calculate_smartproxy_cost($sizeInKb, 'rotating_global'); $costs['product_image'] = $cost; return (object) [ 'url' => $data->url, 'intervention' => $interventionImage, 'original_name' => pathinfo($data->image, PATHINFO_BASENAME), 'cost' => $cost, ]; } } catch (\Exception $e) { // Handle exceptions related to the HTTP request return null; } } } } return null; } private static function getImageColorCounts($interventionImage) { // Use Intervention to manipulate the image $img = clone $interventionImage; // Resize to a smaller dimension for faster processing (maintaining aspect ratio) $img->resize(200, null, function ($constraint) { $constraint->aspectRatio(); }); // Apply some blur $img->blur(10); $im = imagecreatefromstring($img->encode()); $width = imagesx($im); $height = imagesy($im); $uniqueColors = []; for ($x = 0; $x < $width; $x++) { for ($y = 0; $y < $height; $y++) { $rgb = imagecolorat($im, $x, $y); $uniqueColors[$rgb] = true; } } imagedestroy($im); // Adjust the threshold based on your dataset. // Here, I'm assuming that images with less than 100 unique colors are mostly text // because we've reduced the image size and applied blurring. return count($uniqueColors); } }