Files
crawlshot/app/Http/Controllers/Api/CrawlController.php
2025-08-11 02:35:35 +08:00

133 lines
4.3 KiB
PHP

<?php
namespace App\Http\Controllers\Api;
use App\Http\Controllers\Controller;
use App\Models\CrawlShotJob;
use App\Jobs\ProcessCrawlShotJob;
use Illuminate\Http\Request;
use Illuminate\Http\JsonResponse;
use Illuminate\Http\Response;
use Illuminate\Support\Str;
use Illuminate\Support\Facades\Storage;
use Illuminate\Validation\Rule;
class CrawlController extends Controller
{
public function crawl(Request $request): JsonResponse
{
$validated = $request->validate([
'url' => 'required|url|max:2048',
'timeout' => 'integer|min:5|max:300',
'delay' => 'integer|min:0|max:30000',
'block_ads' => 'boolean',
'block_cookie_banners' => 'boolean',
'block_trackers' => 'boolean',
'webhook_url' => 'nullable|url|max:2048',
'webhook_events_filter' => 'nullable|array',
'webhook_events_filter.*' => 'in:queued,processing,completed,failed'
]);
$uuid = Str::uuid()->toString();
$job = CrawlShotJob::create([
'uuid' => $uuid,
'type' => 'crawl',
'url' => $validated['url'],
'status' => 'queued',
'webhook_url' => $validated['webhook_url'] ?? null,
'webhook_events_filter' => isset($validated['webhook_events_filter']) ? $validated['webhook_events_filter'] : ['queued', 'processing', 'completed', 'failed'],
'parameters' => array_filter([
'timeout' => $validated['timeout'] ?? 30,
'delay' => $validated['delay'] ?? 0,
'block_ads' => $validated['block_ads'] ?? true,
'block_cookie_banners' => $validated['block_cookie_banners'] ?? true,
'block_trackers' => $validated['block_trackers'] ?? true
// wait_until_network_idle is always enabled in BrowsershotService
])
]);
ProcessCrawlShotJob::dispatch($uuid);
return response()->json([
'uuid' => $uuid,
'status' => 'queued',
'message' => 'Crawl job initiated successfully'
], 201);
}
public function status(string $uuid): JsonResponse
{
$job = CrawlShotJob::where('uuid', $uuid)->first();
if (!$job) {
return response()->json(['error' => 'Job not found'], 404);
}
$response = [
'uuid' => $job->uuid,
'status' => $job->status,
'url' => $job->url,
'created_at' => $job->created_at->toISOString()
];
if ($job->started_at) {
$response['started_at'] = $job->started_at->toISOString();
}
if ($job->completed_at) {
$response['completed_at'] = $job->completed_at->toISOString();
}
if ($job->status === 'completed' && $job->file_path) {
$response['result'] = [
'html' => [
'url' => url("/api/crawl/{$job->uuid}.html"),
'raw' => Storage::get($job->file_path)
]
];
}
if ($job->status === 'failed' && $job->error_message) {
$response['error'] = $job->error_message;
}
return response()->json($response);
}
public function index(): JsonResponse
{
$jobs = CrawlShotJob::where('type', 'crawl')
->orderBy('created_at', 'desc')
->paginate(20);
$response = [
'jobs' => $jobs->items(),
'pagination' => [
'current_page' => $jobs->currentPage(),
'total_pages' => $jobs->lastPage(),
'total_items' => $jobs->total(),
'per_page' => $jobs->perPage()
]
];
return response()->json($response);
}
public function serve(string $uuid): Response
{
$job = CrawlShotJob::where('uuid', $uuid)->where('type', 'crawl')->first();
if (!$job || $job->status !== 'completed') {
return response('HTML file not found or not ready', 404);
}
if (!$job->file_path || !Storage::exists($job->file_path)) {
return response('HTML file not found', 404);
}
return response(Storage::get($job->file_path))
->header('Content-Type', 'text/html; charset=utf-8');
}
}