Update (parsing): skip words below 120

This commit is contained in:
2023-11-29 12:24:15 +08:00
parent c70b0bccd4
commit 8c0ae63f42
3 changed files with 24 additions and 1 deletions

View File

@@ -31,6 +31,15 @@ public static function handle(int $url_to_crawl_id)
if (is_empty($url_to_crawl->output)) {
ParseUrlBodyJob::dispatch($url_to_crawl->id)->onQueue('default')->onConnection('default');
return ;
}
if (count_words($url_to_crawl->output) < 120)
{
$url_to_crawl->status = 'blocked';
$url_to_crawl->save();
return ;
}
$url_meta_response = null;