356 lines
13 KiB
PHP
356 lines
13 KiB
PHP
<?php
|
|
|
|
namespace App\Jobs\Tasks;
|
|
|
|
use App\Models\ShopeeSellerScrapedImage;
|
|
use Illuminate\Support\Facades\Http;
|
|
use Illuminate\Support\Facades\Storage;
|
|
use Illuminate\Support\Str;
|
|
use Intervention\Image\Facades\Image;
|
|
|
|
class SaveShopeeSellerImagesTask
|
|
{
|
|
public static function handle($shopee_task)
|
|
{
|
|
|
|
$unblocker_proxy_server = get_smartproxy_unblocker_server();
|
|
$rotating_proxy_server = get_smartproxy_rotating_server();
|
|
$costs = [];
|
|
$user_agent = config('platform.proxy.user_agent');
|
|
|
|
///////// PART 1
|
|
$main_image_url = self::getProductImageUrl($shopee_task->product_task->response->jsonld);
|
|
|
|
// if there is no main image intervention but the main image url is provided
|
|
if (! is_empty($main_image_url)) {
|
|
$scraped_image = ShopeeSellerScrapedImage::where('original_name', pathinfo($main_image_url, PATHINFO_BASENAME))->where('shopee_seller_scrape_id', $shopee_task->shopee_seller_scrape->id)->first();
|
|
|
|
if (is_null($scraped_image)) {
|
|
$main_image = self::getProductImage($shopee_task->product_task->response->jsonld, $rotating_proxy_server, $user_agent, $costs);
|
|
|
|
$scraped_image = self::uploadAndSaveScrapedImage($shopee_task->shopee_seller_scrape, $main_image, true);
|
|
}
|
|
}
|
|
|
|
/////// PART 2
|
|
|
|
$images = self::getFilteredImages($shopee_task->product_task->response->raw_html, $rotating_proxy_server, $user_agent, $costs);
|
|
|
|
//dd($images);
|
|
|
|
if (! is_null($images) && is_array($images) && count($images) > 0) {
|
|
foreach ($images as $image_obj) {
|
|
$scraped_image = ShopeeSellerScrapedImage::where('original_name', $image_obj->original_name)->where('shopee_seller_scrape_id', $shopee_task->shopee_seller_scrape->id)->first();
|
|
|
|
if (is_null($scraped_image)) {
|
|
$scraped_image = self::uploadAndSaveScrapedImage($shopee_task->shopee_seller_scrape, $image_obj, false);
|
|
}
|
|
}
|
|
}
|
|
|
|
//return ShopeeSellerScrapedImage::where('shopee_seller_scrape_id', $shopee_task->shopee_seller_scrape->id)->get();
|
|
|
|
}
|
|
|
|
private static function uploadAndSaveScrapedImage($shopee_seller_scrape, $image_obj, $featured = false)
|
|
{
|
|
// Generate a unique filename for the uploaded file and LQIP version
|
|
$uuid = Str::uuid()->toString();
|
|
$fileName = time().'_'.$uuid.'.jpg';
|
|
$lqipFileName = time().'_'.$uuid.'_lqip.jpg';
|
|
|
|
// Convert the file to JPEG format using Intervention Image library
|
|
$image = $image_obj->intervention;
|
|
|
|
// Get the original image width and height
|
|
$originalWidth = $image->width();
|
|
$originalHeight = $image->height();
|
|
|
|
// Compress the image to reduce file size to 50%
|
|
$image->encode('jpg', 50);
|
|
|
|
// Save the processed image to the 'r2' storage driver under the 'uploads' directory
|
|
$filePath = 'uploads/'.$fileName;
|
|
$lqipFilePath = 'uploads/'.$lqipFileName;
|
|
Storage::disk('r2')->put($filePath, $image->stream()->detach());
|
|
|
|
// Save the original image to a temporary file and open it again
|
|
$tempImagePath = tempnam(sys_get_temp_dir(), 'temp_image');
|
|
file_put_contents($tempImagePath, $image_obj->intervention->encode());
|
|
$clonedImage = Image::make($tempImagePath);
|
|
|
|
// Create the LQIP version of the image using a small size while maintaining the aspect ratio
|
|
$lqipImage = $clonedImage->fit(10, 10, function ($constraint) {
|
|
$constraint->aspectRatio();
|
|
});
|
|
$lqipImage->encode('jpg', 5);
|
|
Storage::disk('r2')->put($lqipFilePath, $lqipImage->stream()->detach());
|
|
|
|
// Cleanup the temporary image file
|
|
unlink($tempImagePath);
|
|
|
|
// Get the final URL of the uploaded image (non-LQIP version)
|
|
$url = Storage::disk('r2')->url($filePath);
|
|
|
|
$scraped_image = new ShopeeSellerScrapedImage;
|
|
$scraped_image->shopee_seller_scrape_id = $shopee_seller_scrape->id;
|
|
$scraped_image->original_name = $image_obj->original_name;
|
|
$scraped_image->image = $url;
|
|
$scraped_image->featured = $featured;
|
|
|
|
if ($scraped_image->save()) {
|
|
return $scraped_image;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
private static function getImageUrls(string $raw_html)
|
|
{
|
|
$images = [];
|
|
|
|
// Pattern for extracting src and alt attributes from img tags
|
|
$pattern = '/<img\s.*?(?:src=["\'](.*?)["\']).*?(?:alt=["\'](.*?)["\'])?[^>]*>/is';
|
|
|
|
if (preg_match_all($pattern, $raw_html, $matches, PREG_SET_ORDER)) {
|
|
foreach ($matches as $match) {
|
|
$src = $match[1];
|
|
|
|
// Check if image file name ends with '_tn' and remove it
|
|
$src = preg_replace('/_tn(\.[a-z]+)?$/i', '$1', $src);
|
|
|
|
$images[] = [
|
|
'src' => $src,
|
|
'alt' => isset($match[2]) ? $match[2] : null,
|
|
];
|
|
}
|
|
}
|
|
|
|
return $images;
|
|
}
|
|
|
|
private static function getFilteredImages(string $raw_html, string $proxy, string $user_agent, &$costs)
|
|
{
|
|
$images = self::getImageUrls($raw_html);
|
|
|
|
//dd($images);
|
|
|
|
$filteredImages = [];
|
|
$uniqueAttributes = []; // This array will track unique width, height, mime, and size combinations
|
|
|
|
$count = 0;
|
|
|
|
foreach ($images as $image) {
|
|
$count++;
|
|
|
|
$src = $image['src'];
|
|
|
|
try {
|
|
$response = Http::withOptions(['proxy' => $proxy, 'verify' => false])->withHeaders(['User-Agent' => $user_agent])->get($src);
|
|
|
|
// Check if the request was successful
|
|
if (! $response->successful()) {
|
|
continue;
|
|
}
|
|
|
|
$imageData = $response->body();
|
|
|
|
// Create an Intervention Image instance from the response data
|
|
$interventionImage = Image::make($imageData);
|
|
|
|
$width = $interventionImage->width();
|
|
$height = $interventionImage->height();
|
|
$mime = $interventionImage->mime();
|
|
|
|
// Image size in KB
|
|
$sizeKb = round(strlen($imageData) / 1024, 2);
|
|
|
|
// Check constraints
|
|
if ($width < 800 || $height < 800 || $sizeKb < 100 || $mime !== 'image/jpeg') {
|
|
continue;
|
|
}
|
|
|
|
if ($height > $width) {
|
|
continue;
|
|
}
|
|
|
|
$interventionImage->resize(800, null, function ($constraint) {
|
|
$constraint->aspectRatio();
|
|
});
|
|
$width = $interventionImage->width();
|
|
$height = $interventionImage->height();
|
|
$mime = $interventionImage->mime();
|
|
|
|
$image['width'] = $width;
|
|
$image['height'] = $height;
|
|
$image['mime'] = $mime;
|
|
$image['sizeKb'] = $sizeKb;
|
|
|
|
// Check for duplicates by searching through uniqueAttributes
|
|
$isDuplicate = false;
|
|
foreach ($uniqueAttributes as $attr) {
|
|
if (
|
|
$attr['width'] == $width &&
|
|
$attr['height'] == $height &&
|
|
$attr['mime'] == $mime &&
|
|
abs($attr['sizeKb'] - $sizeKb) <= 30 // Check for size within a +/- 10kB tolerance
|
|
) {
|
|
$isDuplicate = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (! $isDuplicate) {
|
|
$uniqueAttributes[] = [
|
|
'width' => $width,
|
|
'height' => $height,
|
|
'mime' => $mime,
|
|
'sizeKb' => $sizeKb,
|
|
];
|
|
$image['color_counts'] = self::getImageColorCounts($interventionImage);
|
|
|
|
$image['intervention'] = $interventionImage;
|
|
$image['original_name'] = pathinfo($src, PATHINFO_BASENAME);
|
|
|
|
//$image['img'] = $interventionImage;
|
|
$costs['count-'.$count] = calculate_smartproxy_cost($sizeKb, 'rotating_global');
|
|
|
|
$filteredImages[] = $image;
|
|
}
|
|
} catch (\Exception $e) {
|
|
// Handle exceptions related to the HTTP request
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// Collect all the color counts
|
|
$colorCounts = [];
|
|
foreach ($filteredImages as $image) {
|
|
$colorCounts[] = $image['color_counts'];
|
|
}
|
|
|
|
if (! empty($colorCounts)) {
|
|
// Compute the median of the color counts
|
|
sort($colorCounts);
|
|
$count = count($colorCounts);
|
|
$middleIndex = floor($count / 2);
|
|
$median = $count % 2 === 0 ? ($colorCounts[$middleIndex - 1] + $colorCounts[$middleIndex]) / 2 : $colorCounts[$middleIndex];
|
|
|
|
// Use the median to filter out the low outliers
|
|
$threshold = 0.10 * $median; // Adjust this percentage as needed
|
|
$filteredImages = array_filter($filteredImages, function ($image) use ($threshold) {
|
|
return $image['color_counts'] > $threshold;
|
|
});
|
|
} else {
|
|
// No images found
|
|
$filteredImages = []; // Clear the array or take any other appropriate action
|
|
}
|
|
|
|
usort($filteredImages, function ($a, $b) {
|
|
return $b['sizeKb'] <=> $a['sizeKb']; // Using the spaceship operator to sort in descending order
|
|
});
|
|
|
|
$final_images = [];
|
|
|
|
foreach ($filteredImages as $image_obj) {
|
|
$final_images[] = (object) $image_obj;
|
|
}
|
|
|
|
return $final_images;
|
|
}
|
|
|
|
private static function getProductImageUrl(array $jsonLdData)
|
|
{
|
|
foreach ($jsonLdData as $data) {
|
|
// Ensure the type is "Product" before proceeding
|
|
if (isset($data->{'@type'}) && $data->{'@type'} === 'Product') {
|
|
if (isset($data->url)) {
|
|
return $data->url;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
private static function getProductImage(array $jsonLdData, string $proxy, string $user_agent, &$costs)
|
|
{
|
|
foreach ($jsonLdData as $data) {
|
|
// Ensure the type is "Product" before proceeding
|
|
if (isset($data->{'@type'}) && $data->{'@type'} === 'Product') {
|
|
if (isset($data->url) && isset($data->image)) {
|
|
try {
|
|
$response = Http::withOptions(['proxy' => $proxy, 'verify' => false])->withHeaders(['User-Agent' => $user_agent])->get($data->image);
|
|
|
|
// Check if the request was successful
|
|
if ($response->successful()) {
|
|
$imageData = $response->body();
|
|
|
|
// Create an Intervention Image instance from the response data
|
|
$interventionImage = Image::make($imageData);
|
|
|
|
// Resize/upscale the image to 1920x1080 maintaining the aspect ratio and cropping if needed
|
|
$interventionImage->fit(1920, 1080, function ($constraint) {
|
|
$constraint->upsize();
|
|
$constraint->aspectRatio();
|
|
});
|
|
|
|
$sizeInKb = strlen($interventionImage->encode()) / 1024; // Convert bytes to kilobytes
|
|
|
|
// Calculate the cost
|
|
$cost = calculate_smartproxy_cost($sizeInKb, 'rotating_global');
|
|
|
|
$costs['product_image'] = $cost;
|
|
|
|
return (object) [
|
|
'url' => $data->url,
|
|
'intervention' => $interventionImage,
|
|
'original_name' => pathinfo($data->image, PATHINFO_BASENAME),
|
|
'cost' => $cost,
|
|
];
|
|
}
|
|
} catch (\Exception $e) {
|
|
// Handle exceptions related to the HTTP request
|
|
return null;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
private static function getImageColorCounts($interventionImage)
|
|
{
|
|
// Use Intervention to manipulate the image
|
|
$img = clone $interventionImage;
|
|
|
|
// Resize to a smaller dimension for faster processing (maintaining aspect ratio)
|
|
$img->resize(200, null, function ($constraint) {
|
|
$constraint->aspectRatio();
|
|
});
|
|
|
|
// Apply some blur
|
|
$img->blur(10);
|
|
|
|
$im = imagecreatefromstring($img->encode());
|
|
|
|
$width = imagesx($im);
|
|
$height = imagesy($im);
|
|
|
|
$uniqueColors = [];
|
|
|
|
for ($x = 0; $x < $width; $x++) {
|
|
for ($y = 0; $y < $height; $y++) {
|
|
$rgb = imagecolorat($im, $x, $y);
|
|
$uniqueColors[$rgb] = true;
|
|
}
|
|
}
|
|
|
|
imagedestroy($im);
|
|
|
|
// Adjust the threshold based on your dataset.
|
|
// Here, I'm assuming that images with less than 100 unique colors are mostly text
|
|
// because we've reduced the image size and applied blurring.
|
|
return count($uniqueColors);
|
|
}
|
|
}
|