This commit is contained in:
ct
2025-08-10 21:10:33 +08:00
parent 480bd9055d
commit 583a804073
43 changed files with 7623 additions and 270 deletions

View File

@@ -0,0 +1,76 @@
<?php
namespace App\Services;
use Spatie\Browsershot\Browsershot;
class BrowsershotService
{
public function crawlHtml(string $url, array $options = []): string
{
$browsershot = $this->configureBrowsershot($url, $options);
return $browsershot->bodyHtml();
}
public function takeScreenshot(string $url, array $options = []): array
{
$browsershot = $this->configureBrowsershot($url, $options);
// Configure viewport for screenshots
$width = $options['viewport_width'] ?? 1920;
$height = $options['viewport_height'] ?? 1080;
$browsershot->windowSize($width, $height);
// Always use WebP format
$quality = $options['quality'] ?? 90;
$browsershot->setScreenshotType('webp', $quality);
$tempPath = storage_path("temp_screenshot_webp." . time() . '.webp');
$browsershot->save($tempPath);
$imageData = file_get_contents($tempPath);
unlink($tempPath);
return [
'data' => $imageData,
'mime_type' => 'image/webp',
'width' => $width,
'height' => $height
];
}
private function configureBrowsershot(string $url, array $options = []): Browsershot
{
$browsershot = Browsershot::url($url);
// Basic configuration
if (isset($options['timeout'])) {
$browsershot->timeout($options['timeout']);
}
if (isset($options['delay'])) {
$browsershot->setDelay($options['delay']);
}
if (isset($options['wait_until_network_idle']) && $options['wait_until_network_idle']) {
$browsershot->waitUntilNetworkIdle();
}
// Apply ad/tracker blocking
if (($options['block_ads'] ?? true) || ($options['block_trackers'] ?? true)) {
$easyListService = new EasyListService();
$blockedDomains = $easyListService->getBlockedDomains($url);
$blockedUrls = $easyListService->getBlockedUrls($url);
if (!empty($blockedDomains)) {
$browsershot->blockDomains($blockedDomains);
}
if (!empty($blockedUrls)) {
$browsershot->blockUrls($blockedUrls);
}
}
return $browsershot;
}
}

View File

@@ -0,0 +1,100 @@
<?php
namespace App\Services;
use Illuminate\Support\Facades\Http;
use Illuminate\Support\Facades\Cache;
use Illuminate\Support\Facades\Log;
class EasyListService
{
private const EASYLIST_URL = 'https://easylist.to/easylist/easylist.txt';
private const CACHE_KEY = 'easylist_filters';
private const CACHE_TTL = 86400; // 24 hours
public function getBlockedDomains(string $url): array
{
$filters = $this->getFilters();
$domains = [];
foreach ($filters as $filter) {
if (strpos($filter, '||') === 0 && strpos($filter, '^') !== false) {
$domain = trim(str_replace(['||', '^'], '', $filter));
if ($this->isValidDomain($domain)) {
$domains[] = $domain;
}
}
}
return array_slice(array_unique($domains), 0, 100); // Limit to 100 domains
}
public function getBlockedUrls(string $url): array
{
$filters = $this->getFilters();
$urls = [];
foreach ($filters as $filter) {
if (strpos($filter, '||') !== 0 && strpos($filter, '#') !== 0 && strpos($filter, '!') !== 0) {
$cleanFilter = trim($filter);
if (strlen($cleanFilter) > 3 && strpos($cleanFilter, '*') !== false) {
$urls[] = str_replace('*', '', $cleanFilter);
}
}
}
return array_slice(array_unique($urls), 0, 50); // Limit to 50 URL patterns
}
private function getFilters(): array
{
return Cache::remember(self::CACHE_KEY, self::CACHE_TTL, function () {
try {
$response = Http::timeout(30)->get(self::EASYLIST_URL);
if ($response->successful()) {
$content = $response->body();
$lines = explode("\n", $content);
$filters = [];
foreach ($lines as $line) {
$line = trim($line);
if (!empty($line) && strpos($line, '!') !== 0) {
$filters[] = $line;
}
}
Log::info('EasyList filters updated', ['count' => count($filters)]);
return $filters;
}
Log::warning('Failed to fetch EasyList filters');
return $this->getFallbackFilters();
} catch (\Exception $e) {
Log::error('Error fetching EasyList filters: ' . $e->getMessage());
return $this->getFallbackFilters();
}
});
}
private function getFallbackFilters(): array
{
return [
'||googletagmanager.com^',
'||google-analytics.com^',
'||facebook.com/tr^',
'||doubleclick.net^',
'||googlesyndication.com^',
'||amazon-adsystem.com^',
'||adsystem.amazon.com^',
'||googlesyndication.com^',
'||googleadservices.com^'
];
}
private function isValidDomain(string $domain): bool
{
return filter_var($domain, FILTER_VALIDATE_DOMAIN) !== false;
}
}