133 lines
4.3 KiB
PHP
133 lines
4.3 KiB
PHP
<?php
|
|
|
|
namespace App\Http\Controllers\Api;
|
|
|
|
use App\Http\Controllers\Controller;
|
|
use App\Models\CrawlShotJob;
|
|
use App\Jobs\ProcessCrawlShotJob;
|
|
use Illuminate\Http\Request;
|
|
use Illuminate\Http\JsonResponse;
|
|
use Illuminate\Http\Response;
|
|
use Illuminate\Support\Str;
|
|
use Illuminate\Support\Facades\Storage;
|
|
use Illuminate\Validation\Rule;
|
|
|
|
class CrawlController extends Controller
|
|
{
|
|
public function crawl(Request $request): JsonResponse
|
|
{
|
|
$validated = $request->validate([
|
|
'url' => 'required|url|max:2048',
|
|
'timeout' => 'integer|min:5|max:300',
|
|
'delay' => 'integer|min:0|max:30000',
|
|
'block_ads' => 'boolean',
|
|
'block_cookie_banners' => 'boolean',
|
|
'block_trackers' => 'boolean',
|
|
'webhook_url' => 'nullable|url|max:2048',
|
|
'webhook_events_filter' => 'nullable|array',
|
|
'webhook_events_filter.*' => 'in:queued,processing,completed,failed'
|
|
]);
|
|
|
|
$uuid = Str::uuid()->toString();
|
|
|
|
$job = CrawlShotJob::create([
|
|
'uuid' => $uuid,
|
|
'type' => 'crawl',
|
|
'url' => $validated['url'],
|
|
'status' => 'queued',
|
|
'webhook_url' => $validated['webhook_url'] ?? null,
|
|
'webhook_events_filter' => isset($validated['webhook_events_filter']) ? $validated['webhook_events_filter'] : ['queued', 'processing', 'completed', 'failed'],
|
|
'parameters' => array_filter([
|
|
'timeout' => $validated['timeout'] ?? 30,
|
|
'delay' => $validated['delay'] ?? 0,
|
|
'block_ads' => $validated['block_ads'] ?? true,
|
|
'block_cookie_banners' => $validated['block_cookie_banners'] ?? true,
|
|
'block_trackers' => $validated['block_trackers'] ?? true
|
|
// wait_until_network_idle is always enabled in BrowsershotService
|
|
])
|
|
]);
|
|
|
|
ProcessCrawlShotJob::dispatch($uuid);
|
|
|
|
return response()->json([
|
|
'uuid' => $uuid,
|
|
'status' => 'queued',
|
|
'message' => 'Crawl job initiated successfully'
|
|
], 201);
|
|
}
|
|
|
|
public function status(string $uuid): JsonResponse
|
|
{
|
|
$job = CrawlShotJob::where('uuid', $uuid)->first();
|
|
|
|
if (!$job) {
|
|
return response()->json(['error' => 'Job not found'], 404);
|
|
}
|
|
|
|
$response = [
|
|
'uuid' => $job->uuid,
|
|
'status' => $job->status,
|
|
'url' => $job->url,
|
|
'created_at' => $job->created_at->toISOString()
|
|
];
|
|
|
|
if ($job->started_at) {
|
|
$response['started_at'] = $job->started_at->toISOString();
|
|
}
|
|
|
|
if ($job->completed_at) {
|
|
$response['completed_at'] = $job->completed_at->toISOString();
|
|
}
|
|
|
|
if ($job->status === 'completed' && $job->file_path) {
|
|
$response['result'] = [
|
|
'html' => [
|
|
'url' => url("/api/crawl/{$job->uuid}.html"),
|
|
'raw' => Storage::get($job->file_path)
|
|
]
|
|
];
|
|
}
|
|
|
|
if ($job->status === 'failed' && $job->error_message) {
|
|
$response['error'] = $job->error_message;
|
|
}
|
|
|
|
return response()->json($response);
|
|
}
|
|
|
|
public function index(): JsonResponse
|
|
{
|
|
$jobs = CrawlShotJob::where('type', 'crawl')
|
|
->orderBy('created_at', 'desc')
|
|
->paginate(20);
|
|
|
|
$response = [
|
|
'jobs' => $jobs->items(),
|
|
'pagination' => [
|
|
'current_page' => $jobs->currentPage(),
|
|
'total_pages' => $jobs->lastPage(),
|
|
'total_items' => $jobs->total(),
|
|
'per_page' => $jobs->perPage()
|
|
]
|
|
];
|
|
|
|
return response()->json($response);
|
|
}
|
|
|
|
public function serve(string $uuid): Response
|
|
{
|
|
$job = CrawlShotJob::where('uuid', $uuid)->where('type', 'crawl')->first();
|
|
|
|
if (!$job || $job->status !== 'completed') {
|
|
return response('HTML file not found or not ready', 404);
|
|
}
|
|
|
|
if (!$job->file_path || !Storage::exists($job->file_path)) {
|
|
return response('HTML file not found', 404);
|
|
}
|
|
|
|
return response(Storage::get($job->file_path))
|
|
->header('Content-Type', 'text/html; charset=utf-8');
|
|
}
|
|
}
|