This commit is contained in:
ct
2025-08-11 02:35:35 +08:00
parent 4a80723243
commit f3c91b9a64
24 changed files with 2035 additions and 214 deletions

View File

@@ -44,7 +44,7 @@ public function takeScreenshot(string $url, array $options = []): array
private function configureBrowsershot(string $url, array $options = []): Browsershot
{
$browsershot = Browsershot::url($url)
->waitUntilNetworkIdle()
->waitUntilNetworkIdle() // Always enabled for production to ensure proper rendering
->preventUnsuccessfulResponse();
@@ -52,18 +52,17 @@ private function configureBrowsershot(string $url, array $options = []): Browser
$browsershot->noSandbox();
}
// Basic configuration
if (isset($options['timeout'])) {
$browsershot->timeout($options['timeout']);
}
// Basic configuration with maximum timeout safeguard
$timeout = $options['timeout'] ?? 30;
$maxTimeout = 300; // 5 minutes maximum to prevent indefinite waiting
$browsershot->timeout(min($timeout, $maxTimeout));
if (isset($options['delay'])) {
$browsershot->setDelay($options['delay']);
}
if (isset($options['wait_until_network_idle']) && $options['wait_until_network_idle']) {
$browsershot->waitUntilNetworkIdle();
}
// waitUntilNetworkIdle() is always enabled (configured above on line 47)
// Removed conditional logic as network idle waiting is required for production
// Apply ad/tracker blocking
if (($options['block_ads'] ?? true) || ($options['block_trackers'] ?? true)) {

View File

@@ -26,7 +26,8 @@ public function getBlockedDomains(string $url): array
}
}
return array_slice(array_unique($domains), 0, 100); // Limit to 100 domains
// Limit to 50 most common ad domains to reduce timeout risk
return array_slice(array_unique($domains), 0, 50);
}
public function getBlockedUrls(string $url): array
@@ -43,7 +44,8 @@ public function getBlockedUrls(string $url): array
}
}
return array_slice(array_unique($urls), 0, 50); // Limit to 50 URL patterns
// Limit to 25 URL patterns to reduce blocking overhead
return array_slice(array_unique($urls), 0, 25);
}
private function getFilters(): array

View File

@@ -0,0 +1,62 @@
<?php
namespace App\Services;
use App\Models\CrawlShotJob;
use App\Jobs\RetryWebhookJob;
use Illuminate\Support\Facades\Http;
use Illuminate\Support\Facades\Log;
class WebhookService
{
private const RETRY_DELAYS = [1, 2, 4, 8, 16, 32]; // minutes
public static function send(CrawlShotJob $job): void
{
try {
$payload = $job->buildStatusResponse();
$response = Http::timeout(5)->post($job->webhook_url, $payload);
if ($response->successful()) {
// Reset webhook error fields on success
$job->update([
'webhook_attempts' => 0,
'webhook_last_error' => null,
'webhook_next_retry_at' => null
]);
} else {
throw new \Exception("HTTP {$response->status()}: {$response->body()}");
}
} catch (\Exception $e) {
self::handleWebhookFailure($job, $e->getMessage());
}
}
private static function handleWebhookFailure(CrawlShotJob $job, string $error): void
{
$currentAttempts = $job->webhook_attempts ?? 0;
if ($currentAttempts < 6) {
$delayMinutes = self::RETRY_DELAYS[$currentAttempts];
$nextRetryAt = now()->addMinutes($delayMinutes);
$job->update([
'webhook_attempts' => $currentAttempts + 1,
'webhook_last_error' => $error,
'webhook_next_retry_at' => $nextRetryAt
]);
// Schedule retry job
RetryWebhookJob::dispatch($job->uuid)->delay($nextRetryAt);
} else {
// Max attempts reached, just update error
$job->update([
'webhook_attempts' => $currentAttempts + 1,
'webhook_last_error' => $error,
'webhook_next_retry_at' => null
]);
}
}
}