Files
crawlshot/app/Services/BrowsershotService.php
2025-08-11 02:35:35 +08:00

84 lines
2.6 KiB
PHP

<?php
namespace App\Services;
use Illuminate\Support\Facades\App;
use Spatie\Browsershot\Browsershot;
class BrowsershotService
{
public function crawlHtml(string $url, array $options = []): string
{
$browsershot = $this->configureBrowsershot($url, $options);
return $browsershot->bodyHtml();
}
public function takeScreenshot(string $url, array $options = []): array
{
$browsershot = $this->configureBrowsershot($url, $options);
// Configure viewport for screenshots
$width = $options['viewport_width'] ?? 1920;
$height = $options['viewport_height'] ?? 1080;
$browsershot->windowSize($width, $height);
// Always use WebP format
$quality = $options['quality'] ?? 90;
$browsershot->setScreenshotType('webp', $quality);
$tempPath = storage_path("temp_screenshot_webp." . time() . '.webp');
$browsershot->save($tempPath);
$imageData = file_get_contents($tempPath);
unlink($tempPath);
return [
'data' => $imageData,
'mime_type' => 'image/webp',
'width' => $width,
'height' => $height
];
}
private function configureBrowsershot(string $url, array $options = []): Browsershot
{
$browsershot = Browsershot::url($url)
->waitUntilNetworkIdle() // Always enabled for production to ensure proper rendering
->preventUnsuccessfulResponse();
if (App::environment('production')) {
$browsershot->noSandbox();
}
// Basic configuration with maximum timeout safeguard
$timeout = $options['timeout'] ?? 30;
$maxTimeout = 300; // 5 minutes maximum to prevent indefinite waiting
$browsershot->timeout(min($timeout, $maxTimeout));
if (isset($options['delay'])) {
$browsershot->setDelay($options['delay']);
}
// waitUntilNetworkIdle() is always enabled (configured above on line 47)
// Removed conditional logic as network idle waiting is required for production
// Apply ad/tracker blocking
if (($options['block_ads'] ?? true) || ($options['block_trackers'] ?? true)) {
$easyListService = new EasyListService();
$blockedDomains = $easyListService->getBlockedDomains($url);
$blockedUrls = $easyListService->getBlockedUrls($url);
if (!empty($blockedDomains)) {
$browsershot->blockDomains($blockedDomains);
}
if (!empty($blockedUrls)) {
$browsershot->blockUrls($blockedUrls);
}
}
return $browsershot;
}
}