Update
This commit is contained in:
31
app/Console/Commands/CreateApiToken.php
Normal file
31
app/Console/Commands/CreateApiToken.php
Normal file
@@ -0,0 +1,31 @@
|
||||
<?php
|
||||
|
||||
namespace App\Console\Commands;
|
||||
|
||||
use App\Models\User;
|
||||
use Illuminate\Console\Command;
|
||||
|
||||
class CreateApiToken extends Command
|
||||
{
|
||||
protected $signature = 'crawlshot:create-token {name=API User} {email=api@crawlshot.test}';
|
||||
protected $description = 'Create an API token for Crawlshot';
|
||||
|
||||
public function handle()
|
||||
{
|
||||
$name = $this->argument('name');
|
||||
$email = $this->argument('email');
|
||||
|
||||
$user = User::firstOrCreate(['email' => $email], [
|
||||
'name' => $name,
|
||||
'password' => bcrypt('password')
|
||||
]);
|
||||
|
||||
$token = $user->createToken('crawlshot-api')->plainTextToken;
|
||||
|
||||
$this->info("API Token created successfully!");
|
||||
$this->line("Token: {$token}");
|
||||
$this->line("Use this in your Authorization header: Bearer {$token}");
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
100
app/Console/Commands/PruneStorage.php
Normal file
100
app/Console/Commands/PruneStorage.php
Normal file
@@ -0,0 +1,100 @@
|
||||
<?php
|
||||
|
||||
namespace App\Console\Commands;
|
||||
|
||||
use App\Jobs\CleanupOldResults;
|
||||
use App\Models\CrawlShotJob;
|
||||
use Carbon\Carbon;
|
||||
use Illuminate\Console\Command;
|
||||
|
||||
class PruneStorage extends Command
|
||||
{
|
||||
/**
|
||||
* The name and signature of the console command.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $signature = 'crawlshot:prune-storage
|
||||
{--hours=24 : How many hours old files should be to be pruned}
|
||||
{--dry-run : Show what would be deleted without actually deleting}';
|
||||
|
||||
/**
|
||||
* The console command description.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $description = 'Prune expired crawlshot HTML and image results older than specified hours (default: 24)';
|
||||
|
||||
/**
|
||||
* Execute the console command.
|
||||
*/
|
||||
public function handle()
|
||||
{
|
||||
$hours = (int) $this->option('hours');
|
||||
$dryRun = $this->option('dry-run');
|
||||
|
||||
$this->info("Pruning crawlshot storage files older than {$hours} hours...");
|
||||
|
||||
if ($dryRun) {
|
||||
$this->warn('DRY RUN MODE - No files will actually be deleted');
|
||||
}
|
||||
|
||||
// Find jobs older than specified hours
|
||||
$cutoffTime = Carbon::now()->subHours($hours);
|
||||
|
||||
$oldJobs = CrawlShotJob::where('created_at', '<', $cutoffTime)
|
||||
->whereNotNull('file_path')
|
||||
->get();
|
||||
|
||||
if ($oldJobs->isEmpty()) {
|
||||
$this->info('No files found to prune.');
|
||||
return Command::SUCCESS;
|
||||
}
|
||||
|
||||
$this->info("Found {$oldJobs->count()} files to prune:");
|
||||
|
||||
$deletedFiles = 0;
|
||||
$deletedRecords = 0;
|
||||
$errors = 0;
|
||||
|
||||
foreach ($oldJobs as $job) {
|
||||
$this->line("- {$job->type} job {$job->uuid} ({$job->file_path})");
|
||||
|
||||
if (!$dryRun) {
|
||||
// Delete the file if it exists
|
||||
if ($job->file_path && file_exists($job->file_path)) {
|
||||
if (unlink($job->file_path)) {
|
||||
$deletedFiles++;
|
||||
} else {
|
||||
$this->error(" Failed to delete file: {$job->file_path}");
|
||||
$errors++;
|
||||
}
|
||||
}
|
||||
|
||||
// Delete the database record
|
||||
try {
|
||||
$job->delete();
|
||||
$deletedRecords++;
|
||||
} catch (\Exception $e) {
|
||||
$this->error(" Failed to delete database record: {$e->getMessage()}");
|
||||
$errors++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!$dryRun) {
|
||||
$this->info("Cleanup completed:");
|
||||
$this->line(" - Files deleted: {$deletedFiles}");
|
||||
$this->line(" - Database records deleted: {$deletedRecords}");
|
||||
|
||||
if ($errors > 0) {
|
||||
$this->error(" - Errors encountered: {$errors}");
|
||||
return Command::FAILURE;
|
||||
}
|
||||
} else {
|
||||
$this->info("Would have deleted {$oldJobs->count()} files and records");
|
||||
}
|
||||
|
||||
return Command::SUCCESS;
|
||||
}
|
||||
}
|
||||
128
app/Http/Controllers/Api/CrawlController.php
Normal file
128
app/Http/Controllers/Api/CrawlController.php
Normal file
@@ -0,0 +1,128 @@
|
||||
<?php
|
||||
|
||||
namespace App\Http\Controllers\Api;
|
||||
|
||||
use App\Http\Controllers\Controller;
|
||||
use App\Models\CrawlShotJob;
|
||||
use App\Jobs\ProcessCrawlShotJob;
|
||||
use Illuminate\Http\Request;
|
||||
use Illuminate\Http\JsonResponse;
|
||||
use Illuminate\Http\Response;
|
||||
use Illuminate\Support\Str;
|
||||
use Illuminate\Support\Facades\Storage;
|
||||
use Illuminate\Validation\Rule;
|
||||
|
||||
class CrawlController extends Controller
|
||||
{
|
||||
public function crawl(Request $request): JsonResponse
|
||||
{
|
||||
$validated = $request->validate([
|
||||
'url' => 'required|url|max:2048',
|
||||
'timeout' => 'integer|min:5|max:300',
|
||||
'delay' => 'integer|min:0|max:30000',
|
||||
'block_ads' => 'boolean',
|
||||
'block_cookie_banners' => 'boolean',
|
||||
'block_trackers' => 'boolean',
|
||||
'wait_until_network_idle' => 'boolean'
|
||||
]);
|
||||
|
||||
$uuid = Str::uuid()->toString();
|
||||
|
||||
$job = CrawlShotJob::create([
|
||||
'uuid' => $uuid,
|
||||
'type' => 'crawl',
|
||||
'url' => $validated['url'],
|
||||
'status' => 'queued',
|
||||
'parameters' => array_filter([
|
||||
'timeout' => $validated['timeout'] ?? 30,
|
||||
'delay' => $validated['delay'] ?? 0,
|
||||
'block_ads' => $validated['block_ads'] ?? true,
|
||||
'block_cookie_banners' => $validated['block_cookie_banners'] ?? true,
|
||||
'block_trackers' => $validated['block_trackers'] ?? true,
|
||||
'wait_until_network_idle' => $validated['wait_until_network_idle'] ?? false
|
||||
])
|
||||
]);
|
||||
|
||||
ProcessCrawlShotJob::dispatch($uuid);
|
||||
|
||||
return response()->json([
|
||||
'uuid' => $uuid,
|
||||
'status' => 'queued',
|
||||
'message' => 'Crawl job initiated successfully'
|
||||
], 201);
|
||||
}
|
||||
|
||||
public function status(string $uuid): JsonResponse
|
||||
{
|
||||
$job = CrawlShotJob::where('uuid', $uuid)->first();
|
||||
|
||||
if (!$job) {
|
||||
return response()->json(['error' => 'Job not found'], 404);
|
||||
}
|
||||
|
||||
$response = [
|
||||
'uuid' => $job->uuid,
|
||||
'status' => $job->status,
|
||||
'url' => $job->url,
|
||||
'created_at' => $job->created_at->toISOString()
|
||||
];
|
||||
|
||||
if ($job->started_at) {
|
||||
$response['started_at'] = $job->started_at->toISOString();
|
||||
}
|
||||
|
||||
if ($job->completed_at) {
|
||||
$response['completed_at'] = $job->completed_at->toISOString();
|
||||
}
|
||||
|
||||
if ($job->status === 'completed' && $job->file_path) {
|
||||
$response['result'] = [
|
||||
'html' => [
|
||||
'url' => url("/api/crawl/{$job->uuid}.html"),
|
||||
'raw' => Storage::get($job->file_path)
|
||||
]
|
||||
];
|
||||
}
|
||||
|
||||
if ($job->status === 'failed' && $job->error_message) {
|
||||
$response['error'] = $job->error_message;
|
||||
}
|
||||
|
||||
return response()->json($response);
|
||||
}
|
||||
|
||||
public function index(): JsonResponse
|
||||
{
|
||||
$jobs = CrawlShotJob::where('type', 'crawl')
|
||||
->orderBy('created_at', 'desc')
|
||||
->paginate(20);
|
||||
|
||||
$response = [
|
||||
'jobs' => $jobs->items(),
|
||||
'pagination' => [
|
||||
'current_page' => $jobs->currentPage(),
|
||||
'total_pages' => $jobs->lastPage(),
|
||||
'total_items' => $jobs->total(),
|
||||
'per_page' => $jobs->perPage()
|
||||
]
|
||||
];
|
||||
|
||||
return response()->json($response);
|
||||
}
|
||||
|
||||
public function serve(string $uuid): Response
|
||||
{
|
||||
$job = CrawlShotJob::where('uuid', $uuid)->where('type', 'crawl')->first();
|
||||
|
||||
if (!$job || $job->status !== 'completed') {
|
||||
return response('HTML file not found or not ready', 404);
|
||||
}
|
||||
|
||||
if (!$job->file_path || !Storage::exists($job->file_path)) {
|
||||
return response('HTML file not found', 404);
|
||||
}
|
||||
|
||||
return response(Storage::get($job->file_path))
|
||||
->header('Content-Type', 'text/html; charset=utf-8');
|
||||
}
|
||||
}
|
||||
151
app/Http/Controllers/Api/ShotController.php
Normal file
151
app/Http/Controllers/Api/ShotController.php
Normal file
@@ -0,0 +1,151 @@
|
||||
<?php
|
||||
|
||||
namespace App\Http\Controllers\Api;
|
||||
|
||||
use App\Http\Controllers\Controller;
|
||||
use App\Models\CrawlShotJob;
|
||||
use App\Jobs\ProcessCrawlShotJob;
|
||||
use Illuminate\Http\Request;
|
||||
use Illuminate\Http\JsonResponse;
|
||||
use Illuminate\Http\Response;
|
||||
use Illuminate\Support\Str;
|
||||
use Illuminate\Support\Facades\Storage;
|
||||
|
||||
class ShotController extends Controller
|
||||
{
|
||||
public function shot(Request $request): JsonResponse
|
||||
{
|
||||
$validated = $request->validate([
|
||||
'url' => 'required|url|max:2048',
|
||||
'viewport_width' => 'integer|min:320|max:3840',
|
||||
'viewport_height' => 'integer|min:240|max:2160',
|
||||
'quality' => 'integer|min:1|max:100',
|
||||
'timeout' => 'integer|min:5|max:300',
|
||||
'delay' => 'integer|min:0|max:30000',
|
||||
'block_ads' => 'boolean',
|
||||
'block_cookie_banners' => 'boolean',
|
||||
'block_trackers' => 'boolean'
|
||||
]);
|
||||
|
||||
$uuid = Str::uuid()->toString();
|
||||
|
||||
$job = CrawlShotJob::create([
|
||||
'uuid' => $uuid,
|
||||
'type' => 'shot',
|
||||
'url' => $validated['url'],
|
||||
'status' => 'queued',
|
||||
'parameters' => array_filter([
|
||||
'viewport_width' => $validated['viewport_width'] ?? 1920,
|
||||
'viewport_height' => $validated['viewport_height'] ?? 1080,
|
||||
'format' => 'webp', // Force WebP for all screenshots
|
||||
'quality' => $validated['quality'] ?? 90,
|
||||
'timeout' => $validated['timeout'] ?? 30,
|
||||
'delay' => $validated['delay'] ?? 0,
|
||||
'block_ads' => $validated['block_ads'] ?? true,
|
||||
'block_cookie_banners' => $validated['block_cookie_banners'] ?? true,
|
||||
'block_trackers' => $validated['block_trackers'] ?? true
|
||||
])
|
||||
]);
|
||||
|
||||
ProcessCrawlShotJob::dispatch($uuid);
|
||||
|
||||
return response()->json([
|
||||
'uuid' => $uuid,
|
||||
'status' => 'queued',
|
||||
'message' => 'Screenshot job initiated successfully'
|
||||
], 201);
|
||||
}
|
||||
|
||||
public function status(string $uuid): JsonResponse
|
||||
{
|
||||
$job = CrawlShotJob::where('uuid', $uuid)->first();
|
||||
|
||||
if (!$job) {
|
||||
return response()->json(['error' => 'Job not found'], 404);
|
||||
}
|
||||
|
||||
$response = [
|
||||
'uuid' => $job->uuid,
|
||||
'status' => $job->status,
|
||||
'url' => $job->url,
|
||||
'created_at' => $job->created_at->toISOString()
|
||||
];
|
||||
|
||||
if ($job->started_at) {
|
||||
$response['started_at'] = $job->started_at->toISOString();
|
||||
}
|
||||
|
||||
if ($job->completed_at) {
|
||||
$response['completed_at'] = $job->completed_at->toISOString();
|
||||
}
|
||||
|
||||
if ($job->status === 'completed' && $job->file_path) {
|
||||
$imageData = Storage::get($job->file_path);
|
||||
|
||||
$response['result'] = [
|
||||
'image' => [
|
||||
'url' => url("/api/shot/{$job->uuid}.webp"),
|
||||
'raw' => base64_encode($imageData),
|
||||
],
|
||||
'mime_type' => 'image/webp',
|
||||
'format' => 'webp',
|
||||
'width' => $job->parameters['viewport_width'] ?? 1920,
|
||||
'height' => $job->parameters['viewport_height'] ?? 1080,
|
||||
'size' => strlen($imageData)
|
||||
];
|
||||
}
|
||||
|
||||
if ($job->status === 'failed' && $job->error_message) {
|
||||
$response['error'] = $job->error_message;
|
||||
}
|
||||
|
||||
return response()->json($response);
|
||||
}
|
||||
|
||||
public function serve(string $uuid): Response
|
||||
{
|
||||
$job = CrawlShotJob::where('uuid', $uuid)->where('type', 'shot')->first();
|
||||
|
||||
if (!$job || $job->status !== 'completed') {
|
||||
return response('Screenshot not found or not ready', 404);
|
||||
}
|
||||
|
||||
if (!$job->file_path || !Storage::exists($job->file_path)) {
|
||||
return response('Screenshot file not found', 404);
|
||||
}
|
||||
|
||||
// Always serve as WebP
|
||||
return response(Storage::get($job->file_path))
|
||||
->header('Content-Type', 'image/webp');
|
||||
}
|
||||
|
||||
public function index(): JsonResponse
|
||||
{
|
||||
$jobs = CrawlShotJob::where('type', 'shot')
|
||||
->orderBy('created_at', 'desc')
|
||||
->paginate(20);
|
||||
|
||||
$response = [
|
||||
'jobs' => $jobs->items(),
|
||||
'pagination' => [
|
||||
'current_page' => $jobs->currentPage(),
|
||||
'total_pages' => $jobs->lastPage(),
|
||||
'total_items' => $jobs->total(),
|
||||
'per_page' => $jobs->perPage()
|
||||
]
|
||||
];
|
||||
|
||||
return response()->json($response);
|
||||
}
|
||||
|
||||
private function getMimeType(string $format): string
|
||||
{
|
||||
$mimeTypes = [
|
||||
'jpg' => 'image/jpeg',
|
||||
'png' => 'image/png',
|
||||
'webp' => 'image/webp'
|
||||
];
|
||||
|
||||
return $mimeTypes[$format] ?? 'image/webp';
|
||||
}
|
||||
}
|
||||
43
app/Jobs/CleanupOldResults.php
Normal file
43
app/Jobs/CleanupOldResults.php
Normal file
@@ -0,0 +1,43 @@
|
||||
<?php
|
||||
|
||||
namespace App\Jobs;
|
||||
|
||||
use App\Models\CrawlShotJob;
|
||||
use Illuminate\Bus\Queueable;
|
||||
use Illuminate\Contracts\Queue\ShouldQueue;
|
||||
use Illuminate\Foundation\Bus\Dispatchable;
|
||||
use Illuminate\Queue\InteractsWithQueue;
|
||||
use Illuminate\Queue\SerializesModels;
|
||||
use Illuminate\Support\Facades\Storage;
|
||||
use Illuminate\Support\Facades\Log;
|
||||
|
||||
class CleanupOldResults implements ShouldQueue
|
||||
{
|
||||
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
|
||||
|
||||
public function handle(): void
|
||||
{
|
||||
$cutoffTime = now()->subHours(24);
|
||||
|
||||
$oldJobs = CrawlShotJob::where('created_at', '<', $cutoffTime)->get();
|
||||
|
||||
$deletedFiles = 0;
|
||||
$deletedJobs = 0;
|
||||
|
||||
foreach ($oldJobs as $job) {
|
||||
if ($job->file_path && Storage::exists($job->file_path)) {
|
||||
Storage::delete($job->file_path);
|
||||
$deletedFiles++;
|
||||
}
|
||||
|
||||
$job->delete();
|
||||
$deletedJobs++;
|
||||
}
|
||||
|
||||
Log::info("Cleanup completed", [
|
||||
'deleted_files' => $deletedFiles,
|
||||
'deleted_jobs' => $deletedJobs,
|
||||
'cutoff_time' => $cutoffTime->toISOString()
|
||||
]);
|
||||
}
|
||||
}
|
||||
88
app/Jobs/ProcessCrawlShotJob.php
Normal file
88
app/Jobs/ProcessCrawlShotJob.php
Normal file
@@ -0,0 +1,88 @@
|
||||
<?php
|
||||
|
||||
namespace App\Jobs;
|
||||
|
||||
use App\Models\CrawlShotJob;
|
||||
use App\Services\BrowsershotService;
|
||||
use Illuminate\Bus\Queueable;
|
||||
use Illuminate\Contracts\Queue\ShouldQueue;
|
||||
use Illuminate\Foundation\Bus\Dispatchable;
|
||||
use Illuminate\Queue\InteractsWithQueue;
|
||||
use Illuminate\Queue\SerializesModels;
|
||||
use Illuminate\Support\Facades\Log;
|
||||
use Illuminate\Support\Facades\Storage;
|
||||
|
||||
class ProcessCrawlShotJob implements ShouldQueue
|
||||
{
|
||||
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
|
||||
|
||||
protected string $jobUuid;
|
||||
|
||||
public function __construct(string $jobUuid)
|
||||
{
|
||||
$this->jobUuid = $jobUuid;
|
||||
}
|
||||
|
||||
public function handle(): void
|
||||
{
|
||||
$job = CrawlShotJob::where('uuid', $this->jobUuid)->first();
|
||||
|
||||
if (!$job) {
|
||||
Log::error("CrawlShotJob not found: {$this->jobUuid}");
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
$job->update([
|
||||
'status' => 'processing',
|
||||
'started_at' => now()
|
||||
]);
|
||||
|
||||
$browsershot = new BrowsershotService();
|
||||
|
||||
if ($job->type === 'crawl') {
|
||||
$result = $browsershot->crawlHtml($job->url, $job->parameters ?? []);
|
||||
$this->saveCrawlResult($job, $result);
|
||||
} elseif ($job->type === 'shot') {
|
||||
$result = $browsershot->takeScreenshot($job->url, $job->parameters ?? []);
|
||||
$this->saveScreenshotResult($job, $result);
|
||||
}
|
||||
|
||||
$job->update([
|
||||
'status' => 'completed',
|
||||
'completed_at' => now()
|
||||
]);
|
||||
|
||||
} catch (\Exception $e) {
|
||||
Log::error("Job {$this->jobUuid} failed: " . $e->getMessage());
|
||||
|
||||
$job->update([
|
||||
'status' => 'failed',
|
||||
'error_message' => $e->getMessage(),
|
||||
'completed_at' => now()
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
private function saveCrawlResult(CrawlShotJob $job, string $html): void
|
||||
{
|
||||
$filename = "{$job->uuid}.html";
|
||||
$path = "crawlshot/html/{$filename}";
|
||||
|
||||
Storage::put($path, $html);
|
||||
|
||||
$job->update(['file_path' => $path]);
|
||||
}
|
||||
|
||||
private function saveScreenshotResult(CrawlShotJob $job, array $result): void
|
||||
{
|
||||
$parameters = $job->parameters ?? [];
|
||||
$format = $parameters['format'] ?? 'jpg';
|
||||
$filename = "{$job->uuid}.{$format}";
|
||||
$path = "crawlshot/images/{$filename}";
|
||||
|
||||
Storage::put($path, $result['data']);
|
||||
|
||||
$job->update(['file_path' => $path]);
|
||||
}
|
||||
}
|
||||
34
app/Models/CrawlShotJob.php
Normal file
34
app/Models/CrawlShotJob.php
Normal file
@@ -0,0 +1,34 @@
|
||||
<?php
|
||||
|
||||
namespace App\Models;
|
||||
|
||||
use Illuminate\Database\Eloquent\Factories\HasFactory;
|
||||
use Illuminate\Database\Eloquent\Model;
|
||||
|
||||
class CrawlShotJob extends Model
|
||||
{
|
||||
use HasFactory;
|
||||
|
||||
protected $fillable = [
|
||||
'uuid',
|
||||
'type',
|
||||
'url',
|
||||
'status',
|
||||
'parameters',
|
||||
'file_path',
|
||||
'error_message',
|
||||
'started_at',
|
||||
'completed_at'
|
||||
];
|
||||
|
||||
protected $casts = [
|
||||
'parameters' => 'array',
|
||||
'started_at' => 'datetime',
|
||||
'completed_at' => 'datetime'
|
||||
];
|
||||
|
||||
public function getRouteKeyName()
|
||||
{
|
||||
return 'uuid';
|
||||
}
|
||||
}
|
||||
@@ -6,11 +6,12 @@
|
||||
use Illuminate\Database\Eloquent\Factories\HasFactory;
|
||||
use Illuminate\Foundation\Auth\User as Authenticatable;
|
||||
use Illuminate\Notifications\Notifiable;
|
||||
use Laravel\Sanctum\HasApiTokens;
|
||||
|
||||
class User extends Authenticatable
|
||||
{
|
||||
/** @use HasFactory<\Database\Factories\UserFactory> */
|
||||
use HasFactory, Notifiable;
|
||||
use HasFactory, Notifiable, HasApiTokens;
|
||||
|
||||
/**
|
||||
* The attributes that are mass assignable.
|
||||
|
||||
36
app/Providers/HorizonServiceProvider.php
Normal file
36
app/Providers/HorizonServiceProvider.php
Normal file
@@ -0,0 +1,36 @@
|
||||
<?php
|
||||
|
||||
namespace App\Providers;
|
||||
|
||||
use Illuminate\Support\Facades\Gate;
|
||||
use Laravel\Horizon\Horizon;
|
||||
use Laravel\Horizon\HorizonApplicationServiceProvider;
|
||||
|
||||
class HorizonServiceProvider extends HorizonApplicationServiceProvider
|
||||
{
|
||||
/**
|
||||
* Bootstrap any application services.
|
||||
*/
|
||||
public function boot(): void
|
||||
{
|
||||
parent::boot();
|
||||
|
||||
// Horizon::routeSmsNotificationsTo('15556667777');
|
||||
// Horizon::routeMailNotificationsTo('example@example.com');
|
||||
// Horizon::routeSlackNotificationsTo('slack-webhook-url', '#channel');
|
||||
}
|
||||
|
||||
/**
|
||||
* Register the Horizon gate.
|
||||
*
|
||||
* This gate determines who can access Horizon in non-local environments.
|
||||
*/
|
||||
protected function gate(): void
|
||||
{
|
||||
Gate::define('viewHorizon', function ($user = null) {
|
||||
return in_array(optional($user)->email, [
|
||||
//
|
||||
]);
|
||||
});
|
||||
}
|
||||
}
|
||||
76
app/Services/BrowsershotService.php
Normal file
76
app/Services/BrowsershotService.php
Normal file
@@ -0,0 +1,76 @@
|
||||
<?php
|
||||
|
||||
namespace App\Services;
|
||||
|
||||
use Spatie\Browsershot\Browsershot;
|
||||
|
||||
class BrowsershotService
|
||||
{
|
||||
public function crawlHtml(string $url, array $options = []): string
|
||||
{
|
||||
$browsershot = $this->configureBrowsershot($url, $options);
|
||||
|
||||
return $browsershot->bodyHtml();
|
||||
}
|
||||
|
||||
public function takeScreenshot(string $url, array $options = []): array
|
||||
{
|
||||
$browsershot = $this->configureBrowsershot($url, $options);
|
||||
|
||||
// Configure viewport for screenshots
|
||||
$width = $options['viewport_width'] ?? 1920;
|
||||
$height = $options['viewport_height'] ?? 1080;
|
||||
$browsershot->windowSize($width, $height);
|
||||
|
||||
// Always use WebP format
|
||||
$quality = $options['quality'] ?? 90;
|
||||
$browsershot->setScreenshotType('webp', $quality);
|
||||
|
||||
$tempPath = storage_path("temp_screenshot_webp." . time() . '.webp');
|
||||
$browsershot->save($tempPath);
|
||||
|
||||
$imageData = file_get_contents($tempPath);
|
||||
unlink($tempPath);
|
||||
|
||||
return [
|
||||
'data' => $imageData,
|
||||
'mime_type' => 'image/webp',
|
||||
'width' => $width,
|
||||
'height' => $height
|
||||
];
|
||||
}
|
||||
|
||||
private function configureBrowsershot(string $url, array $options = []): Browsershot
|
||||
{
|
||||
$browsershot = Browsershot::url($url);
|
||||
|
||||
// Basic configuration
|
||||
if (isset($options['timeout'])) {
|
||||
$browsershot->timeout($options['timeout']);
|
||||
}
|
||||
|
||||
if (isset($options['delay'])) {
|
||||
$browsershot->setDelay($options['delay']);
|
||||
}
|
||||
|
||||
if (isset($options['wait_until_network_idle']) && $options['wait_until_network_idle']) {
|
||||
$browsershot->waitUntilNetworkIdle();
|
||||
}
|
||||
|
||||
// Apply ad/tracker blocking
|
||||
if (($options['block_ads'] ?? true) || ($options['block_trackers'] ?? true)) {
|
||||
$easyListService = new EasyListService();
|
||||
$blockedDomains = $easyListService->getBlockedDomains($url);
|
||||
$blockedUrls = $easyListService->getBlockedUrls($url);
|
||||
|
||||
if (!empty($blockedDomains)) {
|
||||
$browsershot->blockDomains($blockedDomains);
|
||||
}
|
||||
if (!empty($blockedUrls)) {
|
||||
$browsershot->blockUrls($blockedUrls);
|
||||
}
|
||||
}
|
||||
|
||||
return $browsershot;
|
||||
}
|
||||
}
|
||||
100
app/Services/EasyListService.php
Normal file
100
app/Services/EasyListService.php
Normal file
@@ -0,0 +1,100 @@
|
||||
<?php
|
||||
|
||||
namespace App\Services;
|
||||
|
||||
use Illuminate\Support\Facades\Http;
|
||||
use Illuminate\Support\Facades\Cache;
|
||||
use Illuminate\Support\Facades\Log;
|
||||
|
||||
class EasyListService
|
||||
{
|
||||
private const EASYLIST_URL = 'https://easylist.to/easylist/easylist.txt';
|
||||
private const CACHE_KEY = 'easylist_filters';
|
||||
private const CACHE_TTL = 86400; // 24 hours
|
||||
|
||||
public function getBlockedDomains(string $url): array
|
||||
{
|
||||
$filters = $this->getFilters();
|
||||
$domains = [];
|
||||
|
||||
foreach ($filters as $filter) {
|
||||
if (strpos($filter, '||') === 0 && strpos($filter, '^') !== false) {
|
||||
$domain = trim(str_replace(['||', '^'], '', $filter));
|
||||
if ($this->isValidDomain($domain)) {
|
||||
$domains[] = $domain;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return array_slice(array_unique($domains), 0, 100); // Limit to 100 domains
|
||||
}
|
||||
|
||||
public function getBlockedUrls(string $url): array
|
||||
{
|
||||
$filters = $this->getFilters();
|
||||
$urls = [];
|
||||
|
||||
foreach ($filters as $filter) {
|
||||
if (strpos($filter, '||') !== 0 && strpos($filter, '#') !== 0 && strpos($filter, '!') !== 0) {
|
||||
$cleanFilter = trim($filter);
|
||||
if (strlen($cleanFilter) > 3 && strpos($cleanFilter, '*') !== false) {
|
||||
$urls[] = str_replace('*', '', $cleanFilter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return array_slice(array_unique($urls), 0, 50); // Limit to 50 URL patterns
|
||||
}
|
||||
|
||||
private function getFilters(): array
|
||||
{
|
||||
return Cache::remember(self::CACHE_KEY, self::CACHE_TTL, function () {
|
||||
try {
|
||||
$response = Http::timeout(30)->get(self::EASYLIST_URL);
|
||||
|
||||
if ($response->successful()) {
|
||||
$content = $response->body();
|
||||
$lines = explode("\n", $content);
|
||||
|
||||
$filters = [];
|
||||
foreach ($lines as $line) {
|
||||
$line = trim($line);
|
||||
if (!empty($line) && strpos($line, '!') !== 0) {
|
||||
$filters[] = $line;
|
||||
}
|
||||
}
|
||||
|
||||
Log::info('EasyList filters updated', ['count' => count($filters)]);
|
||||
return $filters;
|
||||
}
|
||||
|
||||
Log::warning('Failed to fetch EasyList filters');
|
||||
return $this->getFallbackFilters();
|
||||
|
||||
} catch (\Exception $e) {
|
||||
Log::error('Error fetching EasyList filters: ' . $e->getMessage());
|
||||
return $this->getFallbackFilters();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private function getFallbackFilters(): array
|
||||
{
|
||||
return [
|
||||
'||googletagmanager.com^',
|
||||
'||google-analytics.com^',
|
||||
'||facebook.com/tr^',
|
||||
'||doubleclick.net^',
|
||||
'||googlesyndication.com^',
|
||||
'||amazon-adsystem.com^',
|
||||
'||adsystem.amazon.com^',
|
||||
'||googlesyndication.com^',
|
||||
'||googleadservices.com^'
|
||||
];
|
||||
}
|
||||
|
||||
private function isValidDomain(string $domain): bool
|
||||
{
|
||||
return filter_var($domain, FILTER_VALIDATE_DOMAIN) !== false;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user