$user_agent, ]) ->withOptions([ 'proxy' => $unblocker_proxy_server, 'timeout' => 1000, 'verify' => false, ]) ->get($cached_url); if ($response->successful()) { $raw_html = $response->body(); $costs['unblocker'] = calculate_smartproxy_cost(round(strlen($raw_html) / 1024, 2), 'unblocker'); } else { $raw_html = null; $status_code = -3; $response->throw(); } // $browsershot = new Browsershot(); // $browsershot->setUrl($cached_url) // ->setOption('args', ['headless: "new"']) // ->noSandbox() // ->setOption('args', ['--disable-web-security']) // ->userAgent($user_agent) // ->ignoreHttpsErrors() // ->preventUnsuccessfulResponse() // ->timeout(10) // ->setProxyServer($proxy_server) // ->userAgent($user_agent); // if (app()->environment() == 'local') { // $browsershot->setNodeBinary(config('platform.general.node_binary'))->setNpmBinary(config('platform.general.npm_binary')); // } // //dump($browsershot); // $raw_html = $browsershot->bodyHtml(); // $sizeInKb = strlen($raw_html) / 1024; // Convert bytes to kilobytes // $browsershot_cost = round(calculate_smartproxy_cost($sizeInKb)) ; // $costs['html'] = $browsershot_cost; } catch (UnsuccessfulResponse|Exception $e) { $raw_html = null; $status_code = -3; throw $e; } if (! is_empty($raw_html)) { OSSUploader::uploadFile($driver, $directory, $filename, $raw_html); $status_code = 1; } } if (! is_null($raw_html)) { //$raw_html = self::minifyAndCleanHtml($raw_html); $jsonld = self::getJsonLd($raw_html); return (object) [ 'response' => (object) [ 'url' => $url, 'postfix' => $postfix, 'filename' => $disk_url, 'raw_html' => $raw_html, 'jsonld' => $jsonld, 'status_code' => $status_code, 'costs' => $costs, 'total_cost' => array_sum(array_values($costs)), ], ]; } return (object) [ 'response' => (object) [ 'url' => $url, 'postfix' => $postfix, 'filename' => null, 'raw_html' => null, 'jsonld' => [], 'status_code' => $status_code, 'costs' => $costs, 'total_cost' => 0, ], ]; } private static function getJsonLd(string $raw_html) { $crawler = new Crawler($raw_html); try { $jsonld = $crawler->filter('script[type="application/ld+json"]')->each(function (Crawler $node) { return $node->text(); }); } catch (Exception $e) { return []; } $contents = []; foreach ($jsonld as $content) { try { $contents[] = json_decode($content); } catch (Exception $e) { } } return $contents; } private static function minifyAndCleanHtml(string $raw_html) { $raw_html = self::minifyHTML($raw_html); $crawler = new Crawler($raw_html); // Directly loop through the DOM and remove 'class' and 'id' attributes foreach ($crawler as $domElement) { /** @var \DOMNodeList $nodes */ $nodes = $domElement->getElementsByTagName('*'); foreach ($nodes as $node) { /** @var \DOMElement $node */ $node->removeAttribute('class'); $node->removeAttribute('id'); $node->removeAttribute('style'); } } // Remove