Update
This commit is contained in:
@@ -23,7 +23,9 @@ public function crawl(Request $request): JsonResponse
|
||||
'block_ads' => 'boolean',
|
||||
'block_cookie_banners' => 'boolean',
|
||||
'block_trackers' => 'boolean',
|
||||
'wait_until_network_idle' => 'boolean'
|
||||
'webhook_url' => 'nullable|url|max:2048',
|
||||
'webhook_events_filter' => 'nullable|array',
|
||||
'webhook_events_filter.*' => 'in:queued,processing,completed,failed'
|
||||
]);
|
||||
|
||||
$uuid = Str::uuid()->toString();
|
||||
@@ -33,13 +35,15 @@ public function crawl(Request $request): JsonResponse
|
||||
'type' => 'crawl',
|
||||
'url' => $validated['url'],
|
||||
'status' => 'queued',
|
||||
'webhook_url' => $validated['webhook_url'] ?? null,
|
||||
'webhook_events_filter' => isset($validated['webhook_events_filter']) ? $validated['webhook_events_filter'] : ['queued', 'processing', 'completed', 'failed'],
|
||||
'parameters' => array_filter([
|
||||
'timeout' => $validated['timeout'] ?? 30,
|
||||
'delay' => $validated['delay'] ?? 0,
|
||||
'block_ads' => $validated['block_ads'] ?? true,
|
||||
'block_cookie_banners' => $validated['block_cookie_banners'] ?? true,
|
||||
'block_trackers' => $validated['block_trackers'] ?? true,
|
||||
'wait_until_network_idle' => $validated['wait_until_network_idle'] ?? false
|
||||
'block_trackers' => $validated['block_trackers'] ?? true
|
||||
// wait_until_network_idle is always enabled in BrowsershotService
|
||||
])
|
||||
]);
|
||||
|
||||
|
||||
@@ -24,7 +24,10 @@ public function shot(Request $request): JsonResponse
|
||||
'delay' => 'integer|min:0|max:30000',
|
||||
'block_ads' => 'boolean',
|
||||
'block_cookie_banners' => 'boolean',
|
||||
'block_trackers' => 'boolean'
|
||||
'block_trackers' => 'boolean',
|
||||
'webhook_url' => 'nullable|url|max:2048',
|
||||
'webhook_events_filter' => 'nullable|array',
|
||||
'webhook_events_filter.*' => 'in:queued,processing,completed,failed'
|
||||
]);
|
||||
|
||||
$uuid = Str::uuid()->toString();
|
||||
@@ -34,6 +37,8 @@ public function shot(Request $request): JsonResponse
|
||||
'type' => 'shot',
|
||||
'url' => $validated['url'],
|
||||
'status' => 'queued',
|
||||
'webhook_url' => $validated['webhook_url'] ?? null,
|
||||
'webhook_events_filter' => isset($validated['webhook_events_filter']) ? $validated['webhook_events_filter'] : ['queued', 'processing', 'completed', 'failed'],
|
||||
'parameters' => array_filter([
|
||||
'viewport_width' => $validated['viewport_width'] ?? 1920,
|
||||
'viewport_height' => $validated['viewport_height'] ?? 1080,
|
||||
|
||||
72
app/Http/Controllers/Api/WebhookErrorController.php
Normal file
72
app/Http/Controllers/Api/WebhookErrorController.php
Normal file
@@ -0,0 +1,72 @@
|
||||
<?php
|
||||
|
||||
namespace App\Http\Controllers\Api;
|
||||
|
||||
use App\Http\Controllers\Controller;
|
||||
use App\Models\CrawlShotJob;
|
||||
use App\Services\WebhookService;
|
||||
use Illuminate\Http\JsonResponse;
|
||||
|
||||
class WebhookErrorController extends Controller
|
||||
{
|
||||
public function index(): JsonResponse
|
||||
{
|
||||
$jobs = CrawlShotJob::where('webhook_attempts', '>', 0)
|
||||
->whereNotNull('webhook_url')
|
||||
->orderBy('updated_at', 'desc')
|
||||
->paginate(20);
|
||||
|
||||
$response = [
|
||||
'jobs' => $jobs->items(),
|
||||
'pagination' => [
|
||||
'current_page' => $jobs->currentPage(),
|
||||
'total_pages' => $jobs->lastPage(),
|
||||
'total_items' => $jobs->total(),
|
||||
'per_page' => $jobs->perPage()
|
||||
]
|
||||
];
|
||||
|
||||
return response()->json($response);
|
||||
}
|
||||
|
||||
public function retry(string $uuid): JsonResponse
|
||||
{
|
||||
$job = CrawlShotJob::where('uuid', $uuid)->first();
|
||||
|
||||
if (!$job) {
|
||||
return response()->json(['error' => 'Job not found'], 404);
|
||||
}
|
||||
|
||||
if (!$job->webhook_url) {
|
||||
return response()->json(['error' => 'Job has no webhook URL'], 400);
|
||||
}
|
||||
|
||||
// Attempt webhook immediately
|
||||
WebhookService::send($job);
|
||||
|
||||
return response()->json([
|
||||
'uuid' => $job->uuid,
|
||||
'message' => 'Webhook retry attempted'
|
||||
]);
|
||||
}
|
||||
|
||||
public function clear(string $uuid): JsonResponse
|
||||
{
|
||||
$job = CrawlShotJob::where('uuid', $uuid)->first();
|
||||
|
||||
if (!$job) {
|
||||
return response()->json(['error' => 'Job not found'], 404);
|
||||
}
|
||||
|
||||
$job->update([
|
||||
'webhook_attempts' => 0,
|
||||
'webhook_last_error' => null,
|
||||
'webhook_next_retry_at' => null
|
||||
]);
|
||||
|
||||
return response()->json([
|
||||
'uuid' => $job->uuid,
|
||||
'message' => 'Webhook error cleared'
|
||||
]);
|
||||
}
|
||||
}
|
||||
40
app/Jobs/RetryWebhookJob.php
Normal file
40
app/Jobs/RetryWebhookJob.php
Normal file
@@ -0,0 +1,40 @@
|
||||
<?php
|
||||
|
||||
namespace App\Jobs;
|
||||
|
||||
use App\Models\CrawlShotJob;
|
||||
use App\Services\WebhookService;
|
||||
use Illuminate\Bus\Queueable;
|
||||
use Illuminate\Contracts\Queue\ShouldQueue;
|
||||
use Illuminate\Foundation\Bus\Dispatchable;
|
||||
use Illuminate\Queue\InteractsWithQueue;
|
||||
use Illuminate\Queue\SerializesModels;
|
||||
|
||||
class RetryWebhookJob implements ShouldQueue
|
||||
{
|
||||
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
|
||||
|
||||
protected string $jobUuid;
|
||||
|
||||
public function __construct(string $jobUuid)
|
||||
{
|
||||
$this->jobUuid = $jobUuid;
|
||||
}
|
||||
|
||||
public function handle(): void
|
||||
{
|
||||
$job = CrawlShotJob::where('uuid', $this->jobUuid)->first();
|
||||
|
||||
if (!$job || !$job->webhook_url) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if job still needs retry (in case it was manually cleared)
|
||||
if (!$job->webhook_next_retry_at || $job->webhook_next_retry_at->isFuture()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Attempt webhook again
|
||||
WebhookService::send($job);
|
||||
}
|
||||
}
|
||||
@@ -4,6 +4,7 @@
|
||||
|
||||
use Illuminate\Database\Eloquent\Factories\HasFactory;
|
||||
use Illuminate\Database\Eloquent\Model;
|
||||
use Illuminate\Support\Facades\Storage;
|
||||
|
||||
class CrawlShotJob extends Model
|
||||
{
|
||||
@@ -18,17 +19,72 @@ class CrawlShotJob extends Model
|
||||
'file_path',
|
||||
'error_message',
|
||||
'started_at',
|
||||
'completed_at'
|
||||
'completed_at',
|
||||
'webhook_url',
|
||||
'webhook_events_filter',
|
||||
'webhook_attempts',
|
||||
'webhook_last_error',
|
||||
'webhook_next_retry_at'
|
||||
];
|
||||
|
||||
protected $casts = [
|
||||
'parameters' => 'array',
|
||||
'webhook_events_filter' => 'array',
|
||||
'started_at' => 'datetime',
|
||||
'completed_at' => 'datetime'
|
||||
'completed_at' => 'datetime',
|
||||
'webhook_next_retry_at' => 'datetime'
|
||||
];
|
||||
|
||||
public function getRouteKeyName()
|
||||
{
|
||||
return 'uuid';
|
||||
}
|
||||
|
||||
public function buildStatusResponse(): array
|
||||
{
|
||||
$response = [
|
||||
'uuid' => $this->uuid,
|
||||
'status' => $this->status,
|
||||
'url' => $this->url,
|
||||
'created_at' => $this->created_at->toISOString()
|
||||
];
|
||||
|
||||
if ($this->started_at) {
|
||||
$response['started_at'] = $this->started_at->toISOString();
|
||||
}
|
||||
|
||||
if ($this->completed_at) {
|
||||
$response['completed_at'] = $this->completed_at->toISOString();
|
||||
}
|
||||
|
||||
if ($this->status === 'completed' && $this->file_path) {
|
||||
if ($this->type === 'crawl') {
|
||||
$response['result'] = [
|
||||
'html' => [
|
||||
'url' => url("/api/crawl/{$this->uuid}.html"),
|
||||
'raw' => Storage::get($this->file_path)
|
||||
]
|
||||
];
|
||||
} elseif ($this->type === 'shot') {
|
||||
$imageData = Storage::get($this->file_path);
|
||||
$response['result'] = [
|
||||
'image' => [
|
||||
'url' => url("/api/shot/{$this->uuid}.webp"),
|
||||
'raw' => base64_encode($imageData),
|
||||
],
|
||||
'mime_type' => 'image/webp',
|
||||
'format' => 'webp',
|
||||
'width' => $this->parameters['viewport_width'] ?? 1920,
|
||||
'height' => $this->parameters['viewport_height'] ?? 1080,
|
||||
'size' => strlen($imageData)
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
if ($this->status === 'failed' && $this->error_message) {
|
||||
$response['error'] = $this->error_message;
|
||||
}
|
||||
|
||||
return $response;
|
||||
}
|
||||
}
|
||||
27
app/Observers/CrawlShotJobObserver.php
Normal file
27
app/Observers/CrawlShotJobObserver.php
Normal file
@@ -0,0 +1,27 @@
|
||||
<?php
|
||||
|
||||
namespace App\Observers;
|
||||
|
||||
use App\Models\CrawlShotJob;
|
||||
use App\Services\WebhookService;
|
||||
|
||||
class CrawlShotJobObserver
|
||||
{
|
||||
public function updated(CrawlShotJob $crawlShotJob): void
|
||||
{
|
||||
// Only fire webhook if status has changed and webhook_url is set
|
||||
if ($crawlShotJob->isDirty('status') && $crawlShotJob->webhook_url) {
|
||||
$eventsFilter = $crawlShotJob->webhook_events_filter ?? ['queued', 'processing', 'completed', 'failed'];
|
||||
|
||||
// Don't fire webhook if filter is empty array
|
||||
if (empty($eventsFilter)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Only fire webhook if current status is in the filter
|
||||
if (in_array($crawlShotJob->status, $eventsFilter)) {
|
||||
WebhookService::send($crawlShotJob);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2,6 +2,8 @@
|
||||
|
||||
namespace App\Providers;
|
||||
|
||||
use App\Models\CrawlShotJob;
|
||||
use App\Observers\CrawlShotJobObserver;
|
||||
use Illuminate\Support\ServiceProvider;
|
||||
|
||||
class AppServiceProvider extends ServiceProvider
|
||||
@@ -19,6 +21,6 @@ public function register(): void
|
||||
*/
|
||||
public function boot(): void
|
||||
{
|
||||
//
|
||||
CrawlShotJob::observe(CrawlShotJobObserver::class);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -44,7 +44,7 @@ public function takeScreenshot(string $url, array $options = []): array
|
||||
private function configureBrowsershot(string $url, array $options = []): Browsershot
|
||||
{
|
||||
$browsershot = Browsershot::url($url)
|
||||
->waitUntilNetworkIdle()
|
||||
->waitUntilNetworkIdle() // Always enabled for production to ensure proper rendering
|
||||
->preventUnsuccessfulResponse();
|
||||
|
||||
|
||||
@@ -52,18 +52,17 @@ private function configureBrowsershot(string $url, array $options = []): Browser
|
||||
$browsershot->noSandbox();
|
||||
}
|
||||
|
||||
// Basic configuration
|
||||
if (isset($options['timeout'])) {
|
||||
$browsershot->timeout($options['timeout']);
|
||||
}
|
||||
// Basic configuration with maximum timeout safeguard
|
||||
$timeout = $options['timeout'] ?? 30;
|
||||
$maxTimeout = 300; // 5 minutes maximum to prevent indefinite waiting
|
||||
$browsershot->timeout(min($timeout, $maxTimeout));
|
||||
|
||||
if (isset($options['delay'])) {
|
||||
$browsershot->setDelay($options['delay']);
|
||||
}
|
||||
|
||||
if (isset($options['wait_until_network_idle']) && $options['wait_until_network_idle']) {
|
||||
$browsershot->waitUntilNetworkIdle();
|
||||
}
|
||||
// waitUntilNetworkIdle() is always enabled (configured above on line 47)
|
||||
// Removed conditional logic as network idle waiting is required for production
|
||||
|
||||
// Apply ad/tracker blocking
|
||||
if (($options['block_ads'] ?? true) || ($options['block_trackers'] ?? true)) {
|
||||
|
||||
@@ -26,7 +26,8 @@ public function getBlockedDomains(string $url): array
|
||||
}
|
||||
}
|
||||
|
||||
return array_slice(array_unique($domains), 0, 100); // Limit to 100 domains
|
||||
// Limit to 50 most common ad domains to reduce timeout risk
|
||||
return array_slice(array_unique($domains), 0, 50);
|
||||
}
|
||||
|
||||
public function getBlockedUrls(string $url): array
|
||||
@@ -43,7 +44,8 @@ public function getBlockedUrls(string $url): array
|
||||
}
|
||||
}
|
||||
|
||||
return array_slice(array_unique($urls), 0, 50); // Limit to 50 URL patterns
|
||||
// Limit to 25 URL patterns to reduce blocking overhead
|
||||
return array_slice(array_unique($urls), 0, 25);
|
||||
}
|
||||
|
||||
private function getFilters(): array
|
||||
|
||||
62
app/Services/WebhookService.php
Normal file
62
app/Services/WebhookService.php
Normal file
@@ -0,0 +1,62 @@
|
||||
<?php
|
||||
|
||||
namespace App\Services;
|
||||
|
||||
use App\Models\CrawlShotJob;
|
||||
use App\Jobs\RetryWebhookJob;
|
||||
use Illuminate\Support\Facades\Http;
|
||||
use Illuminate\Support\Facades\Log;
|
||||
|
||||
class WebhookService
|
||||
{
|
||||
private const RETRY_DELAYS = [1, 2, 4, 8, 16, 32]; // minutes
|
||||
|
||||
public static function send(CrawlShotJob $job): void
|
||||
{
|
||||
try {
|
||||
$payload = $job->buildStatusResponse();
|
||||
|
||||
$response = Http::timeout(5)->post($job->webhook_url, $payload);
|
||||
|
||||
if ($response->successful()) {
|
||||
// Reset webhook error fields on success
|
||||
$job->update([
|
||||
'webhook_attempts' => 0,
|
||||
'webhook_last_error' => null,
|
||||
'webhook_next_retry_at' => null
|
||||
]);
|
||||
} else {
|
||||
throw new \Exception("HTTP {$response->status()}: {$response->body()}");
|
||||
}
|
||||
|
||||
} catch (\Exception $e) {
|
||||
self::handleWebhookFailure($job, $e->getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private static function handleWebhookFailure(CrawlShotJob $job, string $error): void
|
||||
{
|
||||
$currentAttempts = $job->webhook_attempts ?? 0;
|
||||
|
||||
if ($currentAttempts < 6) {
|
||||
$delayMinutes = self::RETRY_DELAYS[$currentAttempts];
|
||||
$nextRetryAt = now()->addMinutes($delayMinutes);
|
||||
|
||||
$job->update([
|
||||
'webhook_attempts' => $currentAttempts + 1,
|
||||
'webhook_last_error' => $error,
|
||||
'webhook_next_retry_at' => $nextRetryAt
|
||||
]);
|
||||
|
||||
// Schedule retry job
|
||||
RetryWebhookJob::dispatch($job->uuid)->delay($nextRetryAt);
|
||||
} else {
|
||||
// Max attempts reached, just update error
|
||||
$job->update([
|
||||
'webhook_attempts' => $currentAttempts + 1,
|
||||
'webhook_last_error' => $error,
|
||||
'webhook_next_retry_at' => null
|
||||
]);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user