Files
aibuddytool/app/Jobs/Tasks/ParseUrlBodyTask.php
2023-11-29 21:16:13 +08:00

210 lines
7.7 KiB
PHP

<?php
namespace App\Jobs\Tasks;
use App\Helpers\FirstParty\OpenAI\OpenAI;
use App\Jobs\GetAIToolScreenshotJob;
use App\Jobs\GetUrlBodyJob;
use App\Jobs\StoreSearchEmbeddingJob;
use App\Models\AiTool;
use App\Models\AiToolKeyword;
use App\Models\Category;
use App\Models\ServiceCostUsage;
use App\Models\UrlToCrawl;
use Exception;
class ParseUrlBodyTask
{
public static function handle(int $url_to_crawl_id)
{
$url_to_crawl = UrlToCrawl::find($url_to_crawl_id);
$parent_categories = Category::whereNull('parent_id')->orderBy('name', 'ASC')->get();
if (is_null($url_to_crawl)) {
return;
}
if (in_array($url_to_crawl->status, ['blocked', 'trashed'])) {
return;
}
if (is_empty($url_to_crawl->output)) {
GetUrlBodyJob::dispatch($url_to_crawl->id)->onQueue('default')->onConnection('default');
return;
}
if (count_words($url_to_crawl->output) < 120) {
$url_to_crawl->status = 'blocked';
$url_to_crawl->save();
return;
}
$url_meta_response = null;
if (! is_null($url_to_crawl->metadata)) {
$url_meta_response = $url_to_crawl->metadata;
} else {
$url_meta_response = OpenAI::getSiteSummary($parent_categories, $url_to_crawl->output, 1536, 30, true);
if ((isset($url_meta_response->output)) && (! is_null($url_meta_response->output))) {
$service_cost_usage = new ServiceCostUsage;
$service_cost_usage->cost = $url_meta_response->cost;
$service_cost_usage->name = 'openai-getSiteSummary';
$service_cost_usage->reference_1 = 'url_to_crawl';
$service_cost_usage->reference_2 = strval($url_to_crawl->id);
$service_cost_usage->output = $url_meta_response;
$service_cost_usage->save();
}
}
if (is_null($url_meta_response->output)) {
throw new Exception('OpenAI::getSiteSummary failed. Empty object');
}
$url_to_crawl->metadata = $url_meta_response;
// Check AI Tool
$ai_tool = AiTool::where('url_to_crawl_id', $url_to_crawl->id)->first();
if (is_null($ai_tool)) {
$ai_tool = new AiTool;
$ai_tool->url_to_crawl_id = $url_to_crawl->id;
}
$ai_tool->external_url = $url_to_crawl->url;
// Tool Name
if ((isset($url_meta_response->output->ai_tool_name)) && (! is_empty($url_meta_response->output->ai_tool_name))) {
$ai_tool->tool_name = $url_meta_response->output->ai_tool_name;
$ai_tool->slug = epoch_now_timestamp(1).'-'.str_slug($url_meta_response->output->ai_tool_name);
} else {
throw new Exception('OpenAI::getSiteSummary failed, no tool name');
}
// Is AI Tool
if ((isset($url_meta_response->output->is_ai_tool)) && (! is_null($url_meta_response->output->is_ai_tool)) && is_bool($url_meta_response->output->is_ai_tool)) {
$ai_tool->is_ai_tool = $url_meta_response->output->is_ai_tool;
} else {
$ai_tool->is_ai_tool = true;
}
// Is App/Web/Both
if ((isset($url_meta_response->output->is_app_web_both)) && (! is_empty($url_meta_response->output->is_app_web_both)) && in_array($url_meta_response->output->is_app_web_both, ['app', 'web', 'both'])) {
$ai_tool->is_app_web_both = $url_meta_response->output->is_app_web_both;
} else {
$ai_tool->is_app_web_both = 'web';
}
// Tagline
if ((isset($url_meta_response->output->tagline)) && (! is_empty($url_meta_response->output->tagline))) {
$ai_tool->tagline = $url_meta_response->output->tagline;
}
// Summary
if ((isset($url_meta_response->output->summary)) && (! is_empty($url_meta_response->output->summary))) {
$ai_tool->summary = $url_meta_response->output->summary;
}
// Pricing Type
if ((isset($url_meta_response->output->pricing_type)) && (is_array($url_meta_response->output->pricing_type)) && in_array($url_meta_response->output->pricing_type, ['Free', 'Free Trial', 'Freemium', 'Subscription', 'Usage Based'])) {
$ai_tool->pricing_type = $url_meta_response->output->pricing_type;
} else {
$ai_tool->pricing_type = 'Free';
}
// Category ID
$has_main_category_record = false;
$main_category = null;
if ((isset($url_meta_response->output->main_category)) && (! is_empty($url_meta_response->output->main_category))) {
$main_category = Category::where('name', $url_meta_response->output->main_category)->first();
}
if (is_null($main_category)) {
$main_category = Category::where('name', 'Productivity')->first();
}
$ai_tool->category_id = $main_category->id;
// Keyword
if ((isset($url_meta_response->output->keywords)) && (is_array($url_meta_response->output->keywords))) {
$ai_tool->keyword_string = implode(',', $url_meta_response->output->keywords);
}
// Q&A
if ((isset($url_meta_response->output->qna)) && (is_array($url_meta_response->output->qna))) {
$ai_tool->qna = $url_meta_response->output->qna;
}
if ($ai_tool->save()) {
$query = $ai_tool->tool_name;
if (! is_empty($ai_tool->tagline)) {
$query .= ': '.$ai_tool->tagline;
}
StoreSearchEmbeddingJob::dispatch(
'ai_tool',
$ai_tool->category_id,
$ai_tool->id,
$query
);
if (is_empty($ai_tool->screenshot_img)) {
GetAIToolScreenshotJob::dispatch($url_to_crawl->id, $ai_tool->id)->onQueue('default')->onConnection('default');
}
// Keyword
if ((isset($url_meta_response->output->keywords)) && (is_array($url_meta_response->output->keywords))) {
foreach ($url_meta_response->output->keywords as $keyword) {
$keyword_lowercased = strtolower(trim($keyword));
$ai_tool_keyword = AiToolKeyword::where('value_lowercased', $keyword_lowercased)
->where('ai_tool_id', $ai_tool->id)
->first();
if (is_null($ai_tool_keyword)) {
$ai_tool_keyword = new AiToolKeyword;
$ai_tool_keyword->category_id = $ai_tool->category_id;
$ai_tool_keyword->ai_tool_id = $ai_tool->id;
$ai_tool_keyword->value = trim($keyword);
$ai_tool_keyword->value_lowercased = $keyword_lowercased;
if ($ai_tool_keyword->save()) {
StoreSearchEmbeddingJob::dispatch(
'ai_tool_keyword',
$ai_tool->category_id,
$ai_tool->id,
$ai_tool_keyword->value
);
}
}
}
}
// Q&A
if ((isset($url_meta_response->output->qna)) && (is_array($url_meta_response->output->qna))) {
foreach ($url_meta_response->output->qna as $qna) {
$q = $qna->q;
$a = $qna->a;
$value = "{$q} {$a}";
StoreSearchEmbeddingJob::dispatch(
'qna',
$ai_tool->category_id,
$ai_tool->id,
($qna->q.' '.$qna->a)
);
}
}
}
}
}