This commit is contained in:
ct
2025-08-10 21:10:33 +08:00
parent 480bd9055d
commit 583a804073
43 changed files with 7623 additions and 270 deletions

View File

@@ -0,0 +1,128 @@
<?php
namespace App\Http\Controllers\Api;
use App\Http\Controllers\Controller;
use App\Models\CrawlShotJob;
use App\Jobs\ProcessCrawlShotJob;
use Illuminate\Http\Request;
use Illuminate\Http\JsonResponse;
use Illuminate\Http\Response;
use Illuminate\Support\Str;
use Illuminate\Support\Facades\Storage;
use Illuminate\Validation\Rule;
class CrawlController extends Controller
{
public function crawl(Request $request): JsonResponse
{
$validated = $request->validate([
'url' => 'required|url|max:2048',
'timeout' => 'integer|min:5|max:300',
'delay' => 'integer|min:0|max:30000',
'block_ads' => 'boolean',
'block_cookie_banners' => 'boolean',
'block_trackers' => 'boolean',
'wait_until_network_idle' => 'boolean'
]);
$uuid = Str::uuid()->toString();
$job = CrawlShotJob::create([
'uuid' => $uuid,
'type' => 'crawl',
'url' => $validated['url'],
'status' => 'queued',
'parameters' => array_filter([
'timeout' => $validated['timeout'] ?? 30,
'delay' => $validated['delay'] ?? 0,
'block_ads' => $validated['block_ads'] ?? true,
'block_cookie_banners' => $validated['block_cookie_banners'] ?? true,
'block_trackers' => $validated['block_trackers'] ?? true,
'wait_until_network_idle' => $validated['wait_until_network_idle'] ?? false
])
]);
ProcessCrawlShotJob::dispatch($uuid);
return response()->json([
'uuid' => $uuid,
'status' => 'queued',
'message' => 'Crawl job initiated successfully'
], 201);
}
public function status(string $uuid): JsonResponse
{
$job = CrawlShotJob::where('uuid', $uuid)->first();
if (!$job) {
return response()->json(['error' => 'Job not found'], 404);
}
$response = [
'uuid' => $job->uuid,
'status' => $job->status,
'url' => $job->url,
'created_at' => $job->created_at->toISOString()
];
if ($job->started_at) {
$response['started_at'] = $job->started_at->toISOString();
}
if ($job->completed_at) {
$response['completed_at'] = $job->completed_at->toISOString();
}
if ($job->status === 'completed' && $job->file_path) {
$response['result'] = [
'html' => [
'url' => url("/api/crawl/{$job->uuid}.html"),
'raw' => Storage::get($job->file_path)
]
];
}
if ($job->status === 'failed' && $job->error_message) {
$response['error'] = $job->error_message;
}
return response()->json($response);
}
public function index(): JsonResponse
{
$jobs = CrawlShotJob::where('type', 'crawl')
->orderBy('created_at', 'desc')
->paginate(20);
$response = [
'jobs' => $jobs->items(),
'pagination' => [
'current_page' => $jobs->currentPage(),
'total_pages' => $jobs->lastPage(),
'total_items' => $jobs->total(),
'per_page' => $jobs->perPage()
]
];
return response()->json($response);
}
public function serve(string $uuid): Response
{
$job = CrawlShotJob::where('uuid', $uuid)->where('type', 'crawl')->first();
if (!$job || $job->status !== 'completed') {
return response('HTML file not found or not ready', 404);
}
if (!$job->file_path || !Storage::exists($job->file_path)) {
return response('HTML file not found', 404);
}
return response(Storage::get($job->file_path))
->header('Content-Type', 'text/html; charset=utf-8');
}
}