Files
crawlshot/app/Services/EasyListService.php
2025-08-11 02:35:35 +08:00

102 lines
3.3 KiB
PHP

<?php
namespace App\Services;
use Illuminate\Support\Facades\Http;
use Illuminate\Support\Facades\Cache;
use Illuminate\Support\Facades\Log;
class EasyListService
{
private const EASYLIST_URL = 'https://easylist.to/easylist/easylist.txt';
private const CACHE_KEY = 'easylist_filters';
private const CACHE_TTL = 86400; // 24 hours
public function getBlockedDomains(string $url): array
{
$filters = $this->getFilters();
$domains = [];
foreach ($filters as $filter) {
if (strpos($filter, '||') === 0 && strpos($filter, '^') !== false) {
$domain = trim(str_replace(['||', '^'], '', $filter));
if ($this->isValidDomain($domain)) {
$domains[] = $domain;
}
}
}
// Limit to 50 most common ad domains to reduce timeout risk
return array_slice(array_unique($domains), 0, 50);
}
public function getBlockedUrls(string $url): array
{
$filters = $this->getFilters();
$urls = [];
foreach ($filters as $filter) {
if (strpos($filter, '||') !== 0 && strpos($filter, '#') !== 0 && strpos($filter, '!') !== 0) {
$cleanFilter = trim($filter);
if (strlen($cleanFilter) > 3 && strpos($cleanFilter, '*') !== false) {
$urls[] = str_replace('*', '', $cleanFilter);
}
}
}
// Limit to 25 URL patterns to reduce blocking overhead
return array_slice(array_unique($urls), 0, 25);
}
private function getFilters(): array
{
return Cache::remember(self::CACHE_KEY, self::CACHE_TTL, function () {
try {
$response = Http::timeout(30)->get(self::EASYLIST_URL);
if ($response->successful()) {
$content = $response->body();
$lines = explode("\n", $content);
$filters = [];
foreach ($lines as $line) {
$line = trim($line);
if (!empty($line) && strpos($line, '!') !== 0) {
$filters[] = $line;
}
}
Log::info('EasyList filters updated', ['count' => count($filters)]);
return $filters;
}
Log::warning('Failed to fetch EasyList filters');
return $this->getFallbackFilters();
} catch (\Exception $e) {
Log::error('Error fetching EasyList filters: ' . $e->getMessage());
return $this->getFallbackFilters();
}
});
}
private function getFallbackFilters(): array
{
return [
'||googletagmanager.com^',
'||google-analytics.com^',
'||facebook.com/tr^',
'||doubleclick.net^',
'||googlesyndication.com^',
'||amazon-adsystem.com^',
'||adsystem.amazon.com^',
'||googlesyndication.com^',
'||googleadservices.com^'
];
}
private function isValidDomain(string $domain): bool
{
return filter_var($domain, FILTER_VALIDATE_DOMAIN) !== false;
}
}