Update (fix): infinite loop

This commit is contained in:
2023-11-29 12:46:37 +08:00
parent 8c0ae63f42
commit 6580463fcb
4 changed files with 44 additions and 3 deletions

View File

@@ -23,12 +23,17 @@ public static function handle(int $url_to_crawl_id)
return null;
}
if (in_array($url_to_crawl->status, ['blocked', 'trashed'])) {
return;
}
$enable_proxy = false;
$url_to_crawl->is_crawling = true;
$url_to_crawl->save();
$url_to_crawl->refresh();
// try {
$user_agent = config('platform.proxy.user_agent');
@@ -66,9 +71,13 @@ public static function handle(int $url_to_crawl_id)
// //throw $e;
// }
if (! is_empty($raw_html)) {
$markdown_output = self::getMarkdownFromHtml($raw_html);
if (! is_empty($markdown_output)) {
$url_to_crawl->output_type = 'markdown';
$url_to_crawl->output = self::getMarkdownFromHtml($raw_html);
$url_to_crawl->output = $markdown_output;
} else {
$url_to_crawl->output = 'EMPTY CONTENT';
@@ -76,6 +85,7 @@ public static function handle(int $url_to_crawl_id)
}
$url_to_crawl->is_crawled = true;
$url_to_crawl->crawl_counts = $url_to_crawl->crawl_counts + 1;
if ($url_to_crawl->save()) {
if (! in_array($url_to_crawl->status, ['blocked', 'trashed'])) {

View File

@@ -4,6 +4,7 @@
use App\Helpers\FirstParty\OpenAI\OpenAI;
use App\Jobs\GetAIToolScreenshotJob;
use App\Jobs\GetUrlBodyJob;
use App\Jobs\ParseUrlBodyJob;
use App\Jobs\StoreSearchEmbeddingJob;
use App\Models\AiTool;
@@ -30,7 +31,7 @@ public static function handle(int $url_to_crawl_id)
}
if (is_empty($url_to_crawl->output)) {
ParseUrlBodyJob::dispatch($url_to_crawl->id)->onQueue('default')->onConnection('default');
GetUrlBodyJob::dispatch($url_to_crawl->id)->onQueue('default')->onConnection('default');
return ;
}

View File

@@ -32,6 +32,7 @@ class UrlToCrawl extends Model
'is_crawling' => 'bool',
'is_crawled' => 'bool',
'metadata' => 'object',
'crawl_counts' => 'integer',
];
protected $fillable = [
@@ -43,5 +44,6 @@ class UrlToCrawl extends Model
'output',
'metadata',
'status',
'crawl_counts',
];
}

View File

@@ -0,0 +1,28 @@
<?php
use Illuminate\Database\Migrations\Migration;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\Schema;
return new class extends Migration
{
/**
* Run the migrations.
*/
public function up(): void
{
Schema::table('url_to_crawls', function (Blueprint $table) {
$table->integer('crawl_counts')->default(0);
});
}
/**
* Reverse the migrations.
*/
public function down(): void
{
Schema::table('url_to_crawls', function (Blueprint $table) {
$table->dropColumn('crawl_counts');
});
}
};