product_task?->intervention?->main_intervention_image)) { $main_intervention_image = $shopee_task->product_task->intervention->main_intervention_image; } else { $main_image_url = self::getProductImageUrl($shopee_task->product_task->response->jsonld); } // If there is other image interventions set, then set in, else get the image urls only. if (isset($shopee_task?->product_task?->intervention?->intervention_images)) { $intervention_images = $shopee_task->product_task->intervention->intervention_images; } else { $images = self::getImages($shopee_task->product_task->response->raw_html); $images = self::filterImages($images, $rotating_proxy_server, $user_agent, $costs, $intervention_images); } ///////// PART 2 // Check existence and upload if image intervention is set if (! is_null($main_intervention_image)) { $scraped_image = ShopeeSellerScrapedImage::where('original_name', $main_intervention_image->original_name)->where('shopee_seller_scrape_id', $shopee_task->shopee_seller_scrape->id)->first(); if (is_null($scraped_image)) { $scraped_image = self::uploadAndSaveScrapedImage($shopee_task->shopee_seller_scrape, $main_intervention_image, true); } } // if there is no main image intervention but the main image url is provided elseif (! is_empty($main_image_url)) { $scraped_image = ShopeeSellerScrapedImage::where('original_name', pathinfo($main_image_url, PATHINFO_BASENAME))->where('shopee_seller_scrape_id', $shopee_task->shopee_seller_scrape->id)->first(); if (is_null($scraped_image)) { $main_image = self::getProductImage($shopee_task->product_task->response->jsonld, $rotating_proxy_server, $user_agent, $costs, $main_intervention_image); $scraped_image = self::uploadAndSaveScrapedImage($shopee_task->shopee_seller_scrape, $main_intervention_image, true); } } /////// PART 3 if (! is_null($intervention_images) && is_array($intervention_images) && count($intervention_images) > 0) { foreach ($intervention_images as $intervention_image) { $scraped_image = ShopeeSellerScrapedImage::where('original_name', $intervention_image->original_name)->where('shopee_seller_scrape_id', $shopee_task->shopee_seller_scrape->id)->first(); if (is_null($scraped_image)) { $scraped_image = self::uploadAndSaveScrapedImage($shopee_task->shopee_seller_scrape, $intervention_image, false); } } } //return ShopeeSellerScrapedImage::where('shopee_seller_scrape_id', $shopee_task->shopee_seller_scrape->id)->get(); } private static function uploadAndSaveScrapedImage($shopee_seller_scrape, $intervention_image, $featured = false) { // Generate a unique filename for the uploaded file and LQIP version $uuid = Str::uuid()->toString(); $fileName = time().'_'.$uuid.'.jpg'; $lqipFileName = time().'_'.$uuid.'_lqip.jpg'; // Convert the file to JPEG format using Intervention Image library $image = $intervention_image->image; // Get the original image width and height $originalWidth = $image->width(); $originalHeight = $image->height(); // Compress the image to reduce file size to 50% $image->encode('jpg', 50); // Save the processed image to the 'r2' storage driver under the 'uploads' directory $filePath = 'uploads/'.$fileName; $lqipFilePath = 'uploads/'.$lqipFileName; Storage::disk('r2')->put($filePath, $image->stream()->detach()); // Save the original image to a temporary file and open it again $tempImagePath = tempnam(sys_get_temp_dir(), 'temp_image'); file_put_contents($tempImagePath, $intervention_image->image->encode()); $clonedImage = Image::make($tempImagePath); // Create the LQIP version of the image using a small size while maintaining the aspect ratio $lqipImage = $clonedImage->fit(10, 10, function ($constraint) { $constraint->aspectRatio(); }); $lqipImage->encode('jpg', 5); Storage::disk('r2')->put($lqipFilePath, $lqipImage->stream()->detach()); // Cleanup the temporary image file unlink($tempImagePath); // Get the final URL of the uploaded image (non-LQIP version) $url = Storage::disk('r2')->url($filePath); $scraped_image = new ShopeeSellerScrapedImage; $scraped_image->shopee_seller_scrape_id = $shopee_seller_scrape->id; $scraped_image->original_name = $intervention_image->original_name; $scraped_image->image = $url; $scraped_image->featured = $featured; if ($scraped_image->save()) { return $scraped_image; } return null; } private static function getImages(string $raw_html) { $crawler = new Crawler($raw_html); $images = []; $crawler->filter('img')->each(function ($node) use (&$images) { $src = $node->attr('src'); $alt = $node->attr('alt') ?? null; // Setting a default value if alt is not present $blacklist_domain = []; foreach ($blacklist_domain as $blacklist) { if (! str_contains($src, $blacklist)) { $images[] = [ 'src' => $src, 'alt' => $alt, ]; } } }); return $images; } private static function filterImages(array $images, string $proxy, string $user_agent, &$costs, &$intervention_images) { $filteredImages = []; $uniqueAttributes = []; // This array will track unique width, height, mime, and size combinations $count = 0; foreach ($images as $image) { $count++; $src = $image['src']; try { $response = Http::withOptions(['proxy' => $proxy, 'verify' => false])->withHeaders(['User-Agent' => $user_agent])->get($src); // Check if the request was successful if (! $response->successful()) { continue; } $imageData = $response->body(); // Create an Intervention Image instance from the response data $interventionImage = Image::make($imageData); $width = $interventionImage->width(); $height = $interventionImage->height(); $mime = $interventionImage->mime(); // Image size in KB $sizeKb = round(strlen($imageData) / 1024, 2); // Check constraints if ($width < 300 || $height < 300) { continue; } $interventionImage->resize(800, null, function ($constraint) { $constraint->aspectRatio(); }); $width = $interventionImage->width(); $height = $interventionImage->height(); $mime = $interventionImage->mime(); $image['width'] = $width; $image['height'] = $height; $image['mime'] = $mime; $image['sizeKb'] = $sizeKb; // Check for duplicates by searching through uniqueAttributes $isDuplicate = false; foreach ($uniqueAttributes as $attr) { if ( $attr['width'] == $width && $attr['height'] == $height && $attr['mime'] == $mime && abs($attr['sizeKb'] - $sizeKb) <= 30 // Check for size within a +/- 10kB tolerance ) { $isDuplicate = true; break; } } if (! $isDuplicate) { $uniqueAttributes[] = [ 'width' => $width, 'height' => $height, 'mime' => $mime, 'sizeKb' => $sizeKb, ]; $image['color_counts'] = self::isMostlyTextBasedOnUniqueColors($interventionImage); //$image['img'] = $interventionImage; $costs['count-'.$count] = calculate_smartproxy_cost($sizeKb, 'rotating_global'); $filteredImages[] = $image; $intervention_images[] = (object) [ 'image' => $interventionImage, 'original_name' => pathinfo($src, PATHINFO_BASENAME), ]; } } catch (\Exception $e) { // Handle exceptions related to the HTTP request continue; } } // Collect all the color counts $colorCounts = []; foreach ($filteredImages as $image) { $colorCounts[] = $image['color_counts']; } if (! empty($colorCounts)) { // Compute the median of the color counts sort($colorCounts); $count = count($colorCounts); $middleIndex = floor($count / 2); $median = $count % 2 === 0 ? ($colorCounts[$middleIndex - 1] + $colorCounts[$middleIndex]) / 2 : $colorCounts[$middleIndex]; // Use the median to filter out the low outliers $threshold = 0.10 * $median; // Adjust this percentage as needed $filteredImages = array_filter($filteredImages, function ($image) use ($threshold) { return $image['color_counts'] > $threshold; }); } else { // No images found $filteredImages = []; // Clear the array or take any other appropriate action } usort($filteredImages, function ($a, $b) { return $b['sizeKb'] <=> $a['sizeKb']; // Using the spaceship operator to sort in descending order }); return $filteredImages; } private static function getProductImageUrl(array $jsonLdData) { foreach ($jsonLdData as $data) { // Ensure the type is "Product" before proceeding if (isset($data->{'@type'}) && $data->{'@type'} === 'Product') { if (isset($data->url)) { return $data->url; } } } } private static function getProductImage(array $jsonLdData, string $proxy, string $user_agent, &$costs, &$main_intervention_image) { foreach ($jsonLdData as $data) { // Ensure the type is "Product" before proceeding if (isset($data->{'@type'}) && $data->{'@type'} === 'Product') { if (isset($data->url) && isset($data->image)) { try { $response = Http::withOptions(['proxy' => $proxy, 'verify' => false])->withHeaders(['User-Agent' => $user_agent])->get($data->image); // Check if the request was successful if ($response->successful()) { $imageData = $response->body(); // Create an Intervention Image instance from the response data $interventionImage = Image::make($imageData); // Resize/upscale the image to 1920x1080 maintaining the aspect ratio and cropping if needed $interventionImage->fit(1920, 1080, function ($constraint) { $constraint->upsize(); $constraint->aspectRatio(); }); $sizeInKb = strlen($interventionImage->encode()) / 1024; // Convert bytes to kilobytes // Calculate the cost $cost = calculate_smartproxy_cost($sizeInKb, 'rotating_global'); $costs['product_image'] = $cost; $main_intervention_image = (object) [ 'image' => $interventionImage, 'original_name' => pathinfo($data->image, PATHINFO_BASENAME), ]; return [ 'url' => $data->url, //'img' => $interventionImage, 'cost' => $cost, ]; } } catch (\Exception $e) { // Handle exceptions related to the HTTP request return null; } } } } return null; } private static function isMostlyTextBasedOnUniqueColors($interventionImage) { // Use Intervention to manipulate the image $img = clone $interventionImage; // Resize to a smaller dimension for faster processing (maintaining aspect ratio) $img->resize(200, null, function ($constraint) { $constraint->aspectRatio(); }); // Apply some blur $img->blur(10); $im = imagecreatefromstring($img->encode()); $width = imagesx($im); $height = imagesy($im); $uniqueColors = []; for ($x = 0; $x < $width; $x++) { for ($y = 0; $y < $height; $y++) { $rgb = imagecolorat($im, $x, $y); $uniqueColors[$rgb] = true; } } imagedestroy($im); // Adjust the threshold based on your dataset. // Here, I'm assuming that images with less than 100 unique colors are mostly text // because we've reduced the image size and applied blurring. return count($uniqueColors); } }