Add (ai gen)
This commit is contained in:
@@ -18,7 +18,7 @@ public static function handle(string $url, $directory, $postfix = null, $strip_h
|
||||
{
|
||||
$slug = str_slug($url);
|
||||
|
||||
$cached_url = self::getGoogleCachedUrl($url, false);
|
||||
$cached_url = $url; // self::getGoogleCachedUrl($url, false);
|
||||
|
||||
$postfix = strval($postfix);
|
||||
|
||||
@@ -35,7 +35,8 @@ public static function handle(string $url, $directory, $postfix = null, $strip_h
|
||||
$main_intervention_image = null;
|
||||
$intervention_images = [];
|
||||
|
||||
$proxy_server = get_smartproxy_server();
|
||||
$unblocker_proxy_server = get_smartproxy_unblocker_server();
|
||||
$rotating_proxy_server = get_smartproxy_rotating_server();
|
||||
|
||||
try {
|
||||
$raw_html = OSSUploader::readFile($driver, $directory, $filename);
|
||||
@@ -51,26 +52,47 @@ public static function handle(string $url, $directory, $postfix = null, $strip_h
|
||||
if (is_null($raw_html)) {
|
||||
|
||||
try {
|
||||
$browsershot = new Browsershot();
|
||||
|
||||
$browsershot->setUrl($cached_url)
|
||||
->setOption('args', ['headless: "new"'])
|
||||
->noSandbox()
|
||||
->setOption('args', ['--disable-web-security'])
|
||||
->userAgent($user_agent)
|
||||
->ignoreHttpsErrors()
|
||||
->preventUnsuccessfulResponse()
|
||||
->timeout(10)
|
||||
//->setProxyServer($proxy_server)
|
||||
->userAgent($user_agent);
|
||||
$response = Http::withHeaders([
|
||||
'User-Agent' => $user_agent,
|
||||
])
|
||||
->withOptions([
|
||||
'proxy' => $unblocker_proxy_server,
|
||||
'timeout' => 1000,
|
||||
'verify' => false,
|
||||
])
|
||||
->get($cached_url);
|
||||
|
||||
if (app()->environment() == 'local') {
|
||||
$browsershot->setNodeBinary(config('platform.general.node_binary'))->setNpmBinary(config('platform.general.npm_binary'));
|
||||
if ($response->successful()) {
|
||||
$raw_html = $response->body();
|
||||
// ... your logic here ...
|
||||
} else {
|
||||
$raw_html = null;
|
||||
$status_code = -3;
|
||||
//throw new Exception('Http response failed');
|
||||
$response->throw();
|
||||
}
|
||||
|
||||
//dump($browsershot);
|
||||
// $browsershot = new Browsershot();
|
||||
|
||||
$raw_html = $browsershot->bodyHtml();
|
||||
// $browsershot->setUrl($cached_url)
|
||||
// ->setOption('args', ['headless: "new"'])
|
||||
// ->noSandbox()
|
||||
// ->setOption('args', ['--disable-web-security'])
|
||||
// ->userAgent($user_agent)
|
||||
// ->ignoreHttpsErrors()
|
||||
// ->preventUnsuccessfulResponse()
|
||||
// ->timeout(10)
|
||||
// ->setProxyServer($proxy_server)
|
||||
// ->userAgent($user_agent);
|
||||
|
||||
// if (app()->environment() == 'local') {
|
||||
// $browsershot->setNodeBinary(config('platform.general.node_binary'))->setNpmBinary(config('platform.general.npm_binary'));
|
||||
// }
|
||||
|
||||
// //dump($browsershot);
|
||||
|
||||
// $raw_html = $browsershot->bodyHtml();
|
||||
|
||||
// $sizeInKb = strlen($raw_html) / 1024; // Convert bytes to kilobytes
|
||||
// $browsershot_cost = round(calculate_smartproxy_cost($sizeInKb)) ;
|
||||
@@ -97,12 +119,12 @@ public static function handle(string $url, $directory, $postfix = null, $strip_h
|
||||
|
||||
if ($parse_images) {
|
||||
$images = self::getImages($raw_html);
|
||||
$images = self::filterImages($images, $proxy_server, $user_agent, $costs, $intervention_images);
|
||||
$images = self::filterImages($images, $rotating_proxy_server, $user_agent, $costs, $intervention_images);
|
||||
} else {
|
||||
$images = [];
|
||||
}
|
||||
|
||||
$main_image = self::getProductImage($jsonld, $proxy_server, $user_agent, $costs, $main_intervention_image);
|
||||
$main_image = self::getProductImage($jsonld, $rotating_proxy_server, $user_agent, $costs, $main_intervention_image);
|
||||
|
||||
return (object) [
|
||||
'intervention' => (object) compact('main_intervention_image', 'intervention_images'),
|
||||
@@ -169,16 +191,20 @@ private static function getImages(string $raw_html)
|
||||
$crawler->filter('img')->each(function ($node) use (&$images) {
|
||||
$src = $node->attr('src');
|
||||
$alt = $node->attr('alt') ?? null; // Setting a default value if alt is not present
|
||||
$images[] = [
|
||||
'src' => $src,
|
||||
'alt' => $alt,
|
||||
];
|
||||
|
||||
$blacklist_domain = [];
|
||||
|
||||
foreach ($blacklist_domain as $blacklist) {
|
||||
if (! str_contains($src, $blacklist)) {
|
||||
$images[] = [
|
||||
'src' => $src,
|
||||
'alt' => $alt,
|
||||
];
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// if (count($images) > 4)
|
||||
// {
|
||||
// return $images;
|
||||
// }
|
||||
//dd($images);
|
||||
|
||||
return $images;
|
||||
}
|
||||
@@ -196,7 +222,8 @@ private static function filterImages(array $images, string $proxy, string $user_
|
||||
$src = $image['src'];
|
||||
|
||||
try {
|
||||
$response = Http::withOptions(['proxy' => $proxy])->withHeaders(['User-Agent' => $user_agent])->get($src);
|
||||
|
||||
$response = Http::withOptions(['proxy' => $proxy, 'verify' => false])->withHeaders(['User-Agent' => $user_agent])->get($src);
|
||||
|
||||
// Check if the request was successful
|
||||
if (! $response->successful()) {
|
||||
@@ -216,7 +243,7 @@ private static function filterImages(array $images, string $proxy, string $user_
|
||||
$sizeKb = round(strlen($imageData) / 1024, 2);
|
||||
|
||||
// Check constraints
|
||||
if ($width < 800 || $height < 800 || $sizeKb < 100 || $mime !== 'image/jpeg') {
|
||||
if ($width < 800 || $height < 800 || $sizeKb < 100) {
|
||||
continue;
|
||||
}
|
||||
$image['width'] = $width;
|
||||
@@ -268,17 +295,22 @@ private static function filterImages(array $images, string $proxy, string $user_
|
||||
$colorCounts[] = $image['color_counts'];
|
||||
}
|
||||
|
||||
// Compute the median of the color counts
|
||||
sort($colorCounts);
|
||||
$count = count($colorCounts);
|
||||
$middleIndex = floor($count / 2);
|
||||
$median = $count % 2 === 0 ? ($colorCounts[$middleIndex - 1] + $colorCounts[$middleIndex]) / 2 : $colorCounts[$middleIndex];
|
||||
if (! empty($colorCounts)) {
|
||||
// Compute the median of the color counts
|
||||
sort($colorCounts);
|
||||
$count = count($colorCounts);
|
||||
$middleIndex = floor($count / 2);
|
||||
$median = $count % 2 === 0 ? ($colorCounts[$middleIndex - 1] + $colorCounts[$middleIndex]) / 2 : $colorCounts[$middleIndex];
|
||||
|
||||
// Use the median to filter out the low outliers
|
||||
$threshold = 0.10 * $median; // Adjust this percentage as needed
|
||||
$filteredImages = array_filter($filteredImages, function ($image) use ($threshold) {
|
||||
return $image['color_counts'] > $threshold;
|
||||
});
|
||||
// Use the median to filter out the low outliers
|
||||
$threshold = 0.10 * $median; // Adjust this percentage as needed
|
||||
$filteredImages = array_filter($filteredImages, function ($image) use ($threshold) {
|
||||
return $image['color_counts'] > $threshold;
|
||||
});
|
||||
} else {
|
||||
// No images found
|
||||
$filteredImages = []; // Clear the array or take any other appropriate action
|
||||
}
|
||||
|
||||
usort($filteredImages, function ($a, $b) {
|
||||
return $b['sizeKb'] <=> $a['sizeKb']; // Using the spaceship operator to sort in descending order
|
||||
@@ -307,7 +339,7 @@ private static function getProductImage(array $jsonLdData, string $proxy, string
|
||||
if (isset($data->{'@type'}) && $data->{'@type'} === 'Product') {
|
||||
if (isset($data->url) && isset($data->image)) {
|
||||
try {
|
||||
$response = Http::withOptions(['proxy' => $proxy])->withHeaders(['User-Agent' => $user_agent])->get($data->image);
|
||||
$response = Http::withOptions(['proxy' => $proxy, 'verify' => false])->withHeaders(['User-Agent' => $user_agent])->get($data->image);
|
||||
|
||||
// Check if the request was successful
|
||||
if ($response->successful()) {
|
||||
|
||||
Reference in New Issue
Block a user