Update (fix): infinite loop

This commit is contained in:
2023-11-29 12:46:37 +08:00
parent 8c0ae63f42
commit 6580463fcb
4 changed files with 44 additions and 3 deletions

View File

@@ -23,12 +23,17 @@ public static function handle(int $url_to_crawl_id)
return null; return null;
} }
if (in_array($url_to_crawl->status, ['blocked', 'trashed'])) {
return;
}
$enable_proxy = false; $enable_proxy = false;
$url_to_crawl->is_crawling = true; $url_to_crawl->is_crawling = true;
$url_to_crawl->save(); $url_to_crawl->save();
$url_to_crawl->refresh(); $url_to_crawl->refresh();
// try { // try {
$user_agent = config('platform.proxy.user_agent'); $user_agent = config('platform.proxy.user_agent');
@@ -66,9 +71,13 @@ public static function handle(int $url_to_crawl_id)
// //throw $e; // //throw $e;
// } // }
if (! is_empty($raw_html)) {
$markdown_output = self::getMarkdownFromHtml($raw_html);
if (! is_empty($markdown_output)) {
$url_to_crawl->output_type = 'markdown'; $url_to_crawl->output_type = 'markdown';
$url_to_crawl->output = self::getMarkdownFromHtml($raw_html); $url_to_crawl->output = $markdown_output;
} else { } else {
$url_to_crawl->output = 'EMPTY CONTENT'; $url_to_crawl->output = 'EMPTY CONTENT';
@@ -76,6 +85,7 @@ public static function handle(int $url_to_crawl_id)
} }
$url_to_crawl->is_crawled = true; $url_to_crawl->is_crawled = true;
$url_to_crawl->crawl_counts = $url_to_crawl->crawl_counts + 1;
if ($url_to_crawl->save()) { if ($url_to_crawl->save()) {
if (! in_array($url_to_crawl->status, ['blocked', 'trashed'])) { if (! in_array($url_to_crawl->status, ['blocked', 'trashed'])) {

View File

@@ -4,6 +4,7 @@
use App\Helpers\FirstParty\OpenAI\OpenAI; use App\Helpers\FirstParty\OpenAI\OpenAI;
use App\Jobs\GetAIToolScreenshotJob; use App\Jobs\GetAIToolScreenshotJob;
use App\Jobs\GetUrlBodyJob;
use App\Jobs\ParseUrlBodyJob; use App\Jobs\ParseUrlBodyJob;
use App\Jobs\StoreSearchEmbeddingJob; use App\Jobs\StoreSearchEmbeddingJob;
use App\Models\AiTool; use App\Models\AiTool;
@@ -30,7 +31,7 @@ public static function handle(int $url_to_crawl_id)
} }
if (is_empty($url_to_crawl->output)) { if (is_empty($url_to_crawl->output)) {
ParseUrlBodyJob::dispatch($url_to_crawl->id)->onQueue('default')->onConnection('default'); GetUrlBodyJob::dispatch($url_to_crawl->id)->onQueue('default')->onConnection('default');
return ; return ;
} }

View File

@@ -32,6 +32,7 @@ class UrlToCrawl extends Model
'is_crawling' => 'bool', 'is_crawling' => 'bool',
'is_crawled' => 'bool', 'is_crawled' => 'bool',
'metadata' => 'object', 'metadata' => 'object',
'crawl_counts' => 'integer',
]; ];
protected $fillable = [ protected $fillable = [
@@ -43,5 +44,6 @@ class UrlToCrawl extends Model
'output', 'output',
'metadata', 'metadata',
'status', 'status',
'crawl_counts',
]; ];
} }

View File

@@ -0,0 +1,28 @@
<?php
use Illuminate\Database\Migrations\Migration;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\Schema;
return new class extends Migration
{
/**
* Run the migrations.
*/
public function up(): void
{
Schema::table('url_to_crawls', function (Blueprint $table) {
$table->integer('crawl_counts')->default(0);
});
}
/**
* Reverse the migrations.
*/
public function down(): void
{
Schema::table('url_to_crawls', function (Blueprint $table) {
$table->dropColumn('crawl_counts');
});
}
};