Sync
This commit is contained in:
@@ -4,8 +4,6 @@
|
||||
|
||||
use App\Helpers\FirstParty\OSSUploader\OSSUploader;
|
||||
use App\Models\AiTool;
|
||||
use App\Models\BusinessProfile;
|
||||
use App\Models\SerpUrl;
|
||||
use App\Models\UrlToCrawl;
|
||||
use Exception;
|
||||
use Image;
|
||||
@@ -13,25 +11,21 @@
|
||||
|
||||
class GetAIToolScreenshotTask
|
||||
{
|
||||
|
||||
public static function handle($url_to_crawl_id, $ai_tool_id)
|
||||
{
|
||||
$url_to_crawl = UrlToCrawl::find($url_to_crawl_id);
|
||||
|
||||
if (is_null($url_to_crawl))
|
||||
{
|
||||
return ;
|
||||
if (is_null($url_to_crawl)) {
|
||||
return;
|
||||
}
|
||||
|
||||
$ai_tool = AiTool::find($ai_tool_id);
|
||||
|
||||
if (is_null($ai_tool))
|
||||
{
|
||||
return ;
|
||||
if (is_null($ai_tool)) {
|
||||
return;
|
||||
}
|
||||
|
||||
$userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36";
|
||||
|
||||
$userAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36';
|
||||
|
||||
$browsershot = Browsershot::url($url_to_crawl->url)
|
||||
->timeout(30)
|
||||
|
||||
@@ -23,25 +23,29 @@ public static function handle(int $url_to_crawl_id)
|
||||
return null;
|
||||
}
|
||||
|
||||
$enable_proxy = false;
|
||||
|
||||
$url_to_crawl->is_crawling = true;
|
||||
$url_to_crawl->save();
|
||||
$url_to_crawl->refresh();
|
||||
|
||||
try {
|
||||
$user_agent = config('platform.proxy.user_agent');
|
||||
// try {
|
||||
$user_agent = config('platform.proxy.user_agent');
|
||||
|
||||
$response = Http::withHeaders([
|
||||
'User-Agent' => $user_agent,
|
||||
$response = Http::withHeaders([
|
||||
'User-Agent' => $user_agent,
|
||||
])
|
||||
->withOptions([
|
||||
'proxy' => ($enable_proxy) ? get_smartproxy_rotating_server() : null,
|
||||
'timeout' => 10,
|
||||
'verify' => false,
|
||||
])
|
||||
->withOptions([
|
||||
'proxy' => get_smartproxy_rotating_server(),
|
||||
'timeout' => 10,
|
||||
'verify' => false,
|
||||
])
|
||||
->get($url_to_crawl->url);
|
||||
->get($url_to_crawl->url);
|
||||
|
||||
if ($response->successful()) {
|
||||
$raw_html = $response->body();
|
||||
if ($response->successful()) {
|
||||
$raw_html = $response->body();
|
||||
if ($enable_proxy)
|
||||
{
|
||||
$cost = calculate_smartproxy_cost(round(strlen($raw_html) / 1024, 2), 'rotating_global');
|
||||
|
||||
$service_cost_usage = new ServiceCostUsage;
|
||||
@@ -51,17 +55,19 @@ public static function handle(int $url_to_crawl_id)
|
||||
$service_cost_usage->reference_2 = strval($url_to_crawl_id);
|
||||
$service_cost_usage->output = self::getMarkdownFromHtml($raw_html);
|
||||
$service_cost_usage->save();
|
||||
|
||||
} else {
|
||||
$raw_html = null;
|
||||
$response->throw();
|
||||
}
|
||||
|
||||
} catch (Exception $e) {
|
||||
|
||||
} else {
|
||||
$raw_html = null;
|
||||
//throw $e;
|
||||
$response->throw();
|
||||
}
|
||||
|
||||
// } catch (Exception $e) {
|
||||
// $raw_html = null;
|
||||
// //throw $e;
|
||||
// }
|
||||
|
||||
if (! is_empty($raw_html)) {
|
||||
$url_to_crawl->output_type = 'markdown';
|
||||
$url_to_crawl->output = self::getMarkdownFromHtml($raw_html);
|
||||
|
||||
@@ -65,23 +65,28 @@ public static function handle(int $url_to_crawl_id)
|
||||
$ai_tool->url_to_crawl_id = $url_to_crawl->id;
|
||||
}
|
||||
|
||||
$ai_tool->external_url = $url_to_crawl->url;
|
||||
|
||||
// Tool Name
|
||||
if ((isset($url_meta_response->output->tool_name)) && (! is_empty($url_meta_response->output->tool_name))) {
|
||||
$ai_tool->tool_name = $url_meta_response->output->tool_name;
|
||||
if ((isset($url_meta_response->output->ai_tool_name)) && (! is_empty($url_meta_response->output->ai_tool_name))) {
|
||||
$ai_tool->tool_name = $url_meta_response->output->ai_tool_name;
|
||||
$ai_tool->slug = epoch_now_timestamp(1).'-'.str_slug($url_meta_response->output->ai_tool_name);
|
||||
} else {
|
||||
throw new Exception('OpenAI::getSiteSummary failed, no tool name');
|
||||
}
|
||||
|
||||
// Is AI Tool
|
||||
if ((isset($url_meta_response->output->is_ai_tool)) && (! is_null($url_meta_response->output->is_at_tool)) && is_bool($url_meta_response->output->is_ai_tool)) {
|
||||
if ((isset($url_meta_response->output->is_ai_tool)) && (! is_null($url_meta_response->output->is_ai_tool)) && is_bool($url_meta_response->output->is_ai_tool)) {
|
||||
$ai_tool->is_ai_tool = $url_meta_response->output->is_ai_tool;
|
||||
} else {
|
||||
$ai_tool->is_ai_tool = true;
|
||||
}
|
||||
|
||||
// Is App/Web/Both
|
||||
if ((isset($url_meta_response->output->is_app_web_both)) && (is_array($url_meta_response->output->is_app_web_both)) && in_array($url_meta_response->output->is_app_web_both, ['app', 'web', 'both'])) {
|
||||
if ((isset($url_meta_response->output->is_app_web_both)) && (! is_empty($url_meta_response->output->is_app_web_both)) && in_array($url_meta_response->output->is_app_web_both, ['app', 'web', 'both'])) {
|
||||
$ai_tool->is_app_web_both = $url_meta_response->output->is_app_web_both;
|
||||
} else {
|
||||
$ai_tool->is_app_web_both = 'web';
|
||||
}
|
||||
|
||||
// Tagline
|
||||
@@ -130,9 +135,8 @@ public static function handle(int $url_to_crawl_id)
|
||||
|
||||
$query = $ai_tool->tool_name;
|
||||
|
||||
if (!is_empty($ai_tool->tagline))
|
||||
{
|
||||
$query .= ": " . $ai_tool->tagline;
|
||||
if (! is_empty($ai_tool->tagline)) {
|
||||
$query .= ': '.$ai_tool->tagline;
|
||||
}
|
||||
|
||||
StoreSearchEmbeddingJob::dispatch(
|
||||
@@ -176,8 +180,7 @@ public static function handle(int $url_to_crawl_id)
|
||||
|
||||
// Q&A
|
||||
if ((isset($url_meta_response->output->qna)) && (is_array($url_meta_response->output->qna))) {
|
||||
foreach ($url_meta_response->output->qna as $qna)
|
||||
{
|
||||
foreach ($url_meta_response->output->qna as $qna) {
|
||||
$q = $qna->q;
|
||||
$a = $qna->a;
|
||||
|
||||
@@ -187,7 +190,7 @@ public static function handle(int $url_to_crawl_id)
|
||||
'qna',
|
||||
$ai_tool->category_id,
|
||||
$ai_tool->id,
|
||||
($qna->q . " " . $qna->a)
|
||||
($qna->q.' '.$qna->a)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user