diff --git a/app/Helpers/Global/string_helper.php b/app/Helpers/Global/string_helper.php index 475cd6a..a476129 100644 --- a/app/Helpers/Global/string_helper.php +++ b/app/Helpers/Global/string_helper.php @@ -3,6 +3,20 @@ use Carbon\Carbon; use Illuminate\Support\Str; +if (! function_exists('count_words')) { + function count_words($string) { + // Remove punctuation and line breaks + $cleanString = preg_replace('/[\p{P}\s]/u', ' ', $string); + + // Split the string into words + $words = preg_split('/\s+/', $cleanString, -1, PREG_SPLIT_NO_EMPTY); + + // Count the words + return count($words); + } +} + + if (! function_exists('dmy')) { function dmy(Carbon $carbon) { diff --git a/app/Jobs/Tasks/ParseUrlBodyTask.php b/app/Jobs/Tasks/ParseUrlBodyTask.php index 70172af..0fb1c2d 100644 --- a/app/Jobs/Tasks/ParseUrlBodyTask.php +++ b/app/Jobs/Tasks/ParseUrlBodyTask.php @@ -31,6 +31,15 @@ public static function handle(int $url_to_crawl_id) if (is_empty($url_to_crawl->output)) { ParseUrlBodyJob::dispatch($url_to_crawl->id)->onQueue('default')->onConnection('default'); + return ; + } + + if (count_words($url_to_crawl->output) < 120) + { + $url_to_crawl->status = 'blocked'; + $url_to_crawl->save(); + + return ; } $url_meta_response = null; diff --git a/database/seeders/NewCategorySeeder.php b/database/seeders/NewCategorySeeder.php index da1a6c5..889ed6b 100644 --- a/database/seeders/NewCategorySeeder.php +++ b/database/seeders/NewCategorySeeder.php @@ -14,7 +14,7 @@ public function run(): void { $parent_categories = [ - ['name' => 'NFSW', 'emoji' => '👅', 'is_top' => true], + ['name' => 'NSFW', 'emoji' => '👅', 'is_top' => true], ]; foreach ($parent_categories as $item) {