This commit is contained in:
2023-11-28 04:39:36 +08:00
parent a9ac0e48b3
commit dc37274b6c
86 changed files with 2106 additions and 191 deletions

View File

@@ -4,8 +4,6 @@
use App\Helpers\FirstParty\OSSUploader\OSSUploader;
use App\Models\AiTool;
use App\Models\BusinessProfile;
use App\Models\SerpUrl;
use App\Models\UrlToCrawl;
use Exception;
use Image;
@@ -13,25 +11,21 @@
class GetAIToolScreenshotTask
{
public static function handle($url_to_crawl_id, $ai_tool_id)
{
$url_to_crawl = UrlToCrawl::find($url_to_crawl_id);
if (is_null($url_to_crawl))
{
return ;
if (is_null($url_to_crawl)) {
return;
}
$ai_tool = AiTool::find($ai_tool_id);
if (is_null($ai_tool))
{
return ;
if (is_null($ai_tool)) {
return;
}
$userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36";
$userAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36';
$browsershot = Browsershot::url($url_to_crawl->url)
->timeout(30)

View File

@@ -23,25 +23,29 @@ public static function handle(int $url_to_crawl_id)
return null;
}
$enable_proxy = false;
$url_to_crawl->is_crawling = true;
$url_to_crawl->save();
$url_to_crawl->refresh();
try {
$user_agent = config('platform.proxy.user_agent');
// try {
$user_agent = config('platform.proxy.user_agent');
$response = Http::withHeaders([
'User-Agent' => $user_agent,
$response = Http::withHeaders([
'User-Agent' => $user_agent,
])
->withOptions([
'proxy' => ($enable_proxy) ? get_smartproxy_rotating_server() : null,
'timeout' => 10,
'verify' => false,
])
->withOptions([
'proxy' => get_smartproxy_rotating_server(),
'timeout' => 10,
'verify' => false,
])
->get($url_to_crawl->url);
->get($url_to_crawl->url);
if ($response->successful()) {
$raw_html = $response->body();
if ($response->successful()) {
$raw_html = $response->body();
if ($enable_proxy)
{
$cost = calculate_smartproxy_cost(round(strlen($raw_html) / 1024, 2), 'rotating_global');
$service_cost_usage = new ServiceCostUsage;
@@ -51,17 +55,19 @@ public static function handle(int $url_to_crawl_id)
$service_cost_usage->reference_2 = strval($url_to_crawl_id);
$service_cost_usage->output = self::getMarkdownFromHtml($raw_html);
$service_cost_usage->save();
} else {
$raw_html = null;
$response->throw();
}
} catch (Exception $e) {
} else {
$raw_html = null;
//throw $e;
$response->throw();
}
// } catch (Exception $e) {
// $raw_html = null;
// //throw $e;
// }
if (! is_empty($raw_html)) {
$url_to_crawl->output_type = 'markdown';
$url_to_crawl->output = self::getMarkdownFromHtml($raw_html);

View File

@@ -65,23 +65,28 @@ public static function handle(int $url_to_crawl_id)
$ai_tool->url_to_crawl_id = $url_to_crawl->id;
}
$ai_tool->external_url = $url_to_crawl->url;
// Tool Name
if ((isset($url_meta_response->output->tool_name)) && (! is_empty($url_meta_response->output->tool_name))) {
$ai_tool->tool_name = $url_meta_response->output->tool_name;
if ((isset($url_meta_response->output->ai_tool_name)) && (! is_empty($url_meta_response->output->ai_tool_name))) {
$ai_tool->tool_name = $url_meta_response->output->ai_tool_name;
$ai_tool->slug = epoch_now_timestamp(1).'-'.str_slug($url_meta_response->output->ai_tool_name);
} else {
throw new Exception('OpenAI::getSiteSummary failed, no tool name');
}
// Is AI Tool
if ((isset($url_meta_response->output->is_ai_tool)) && (! is_null($url_meta_response->output->is_at_tool)) && is_bool($url_meta_response->output->is_ai_tool)) {
if ((isset($url_meta_response->output->is_ai_tool)) && (! is_null($url_meta_response->output->is_ai_tool)) && is_bool($url_meta_response->output->is_ai_tool)) {
$ai_tool->is_ai_tool = $url_meta_response->output->is_ai_tool;
} else {
$ai_tool->is_ai_tool = true;
}
// Is App/Web/Both
if ((isset($url_meta_response->output->is_app_web_both)) && (is_array($url_meta_response->output->is_app_web_both)) && in_array($url_meta_response->output->is_app_web_both, ['app', 'web', 'both'])) {
if ((isset($url_meta_response->output->is_app_web_both)) && (! is_empty($url_meta_response->output->is_app_web_both)) && in_array($url_meta_response->output->is_app_web_both, ['app', 'web', 'both'])) {
$ai_tool->is_app_web_both = $url_meta_response->output->is_app_web_both;
} else {
$ai_tool->is_app_web_both = 'web';
}
// Tagline
@@ -130,9 +135,8 @@ public static function handle(int $url_to_crawl_id)
$query = $ai_tool->tool_name;
if (!is_empty($ai_tool->tagline))
{
$query .= ": " . $ai_tool->tagline;
if (! is_empty($ai_tool->tagline)) {
$query .= ': '.$ai_tool->tagline;
}
StoreSearchEmbeddingJob::dispatch(
@@ -176,8 +180,7 @@ public static function handle(int $url_to_crawl_id)
// Q&A
if ((isset($url_meta_response->output->qna)) && (is_array($url_meta_response->output->qna))) {
foreach ($url_meta_response->output->qna as $qna)
{
foreach ($url_meta_response->output->qna as $qna) {
$q = $qna->q;
$a = $qna->a;
@@ -187,7 +190,7 @@ public static function handle(int $url_to_crawl_id)
'qna',
$ai_tool->category_id,
$ai_tool->id,
($qna->q . " " . $qna->a)
($qna->q.' '.$qna->a)
);
}
}