Files
productalert/app/Jobs/Tasks/GenerateShopeeAIArticleTask.php
2023-10-01 04:17:49 +08:00

202 lines
7.2 KiB
PHP

<?php
namespace App\Jobs\Tasks;
use andreskrey\Readability\Configuration as ReadabilityConfiguration;
use andreskrey\Readability\ParseException as ReadabilityParseException;
use andreskrey\Readability\Readability;
use App\Helpers\FirstParty\OpenAI\OpenAI;
use App\Helpers\FirstParty\OSSUploader\OSSUploader;
use App\Models\AiWriteup;
use App\Models\Post;
use App\Models\PostCategory;
use App\Models\ShopeeSellerScrape;
use App\Models\ShopeeSellerScrapedImage;
use Exception;
use Illuminate\Support\Facades\Log;
use LaravelFreelancerNL\LaravelIndexNow\Facades\IndexNow;
use LaravelGoogleIndexing;
use Masterminds\HTML5;
use Symfony\Component\DomCrawler\Crawler;
class GenerateShopeeAIArticleTask
{
public static function handle(ShopeeSellerScrape $shopee_seller_scrape)
{
$serialised = OSSUploader::readFile('r2', 'shopee/seller', $shopee_seller_scrape->filename);
$post = null;
$shopee_seller_scrape->load('category');
if (! is_empty($serialised)) {
$shopee_task = unserialize($serialised);
$shopee_task->shopee_seller_scrape = $shopee_seller_scrape;
}
// dd($shopee_task);
// dd($shopee_task->product_task->response);
$raw_html = $shopee_task->product_task->response->raw_html;
$excerpt = self::stripHtml($raw_html);
$photos = ShopeeSellerScrapedImage::where('shopee_seller_scrape_id', $shopee_seller_scrape->id)->where('featured', false)->orderByRaw('RAND()')->take(3)->get()->pluck('image')->toArray();
$ai_writeup = AiWriteup::where('source', 'shopee')->where('source_url', $shopee_task->product_task->response->url)->first();
if (is_null($ai_writeup)) {
$ai_output = OpenAI::writeProductArticle($excerpt, $photos);
//dd($ai_output);
if (is_null($ai_output)) {
$e = new Exception('Failed to write: Missing ai_output');
Log::error(serialize($ai_writeup?->toArray()));
inspector()->reportException($e);
throw ($e);
} else {
// save
$ai_writeup = new AiWriteup;
$ai_writeup->source = 'shopee';
$ai_writeup->source_url = $shopee_task->product_task->response->url;
$ai_writeup->category_id = $shopee_seller_scrape->category->id;
$ai_writeup->title = $ai_output->title;
$ai_writeup->excerpt = $ai_output->excerpt;
$ai_writeup->featured_image = '';
$ai_writeup->body = $ai_output->body;
$ai_writeup->cost = self::getTotalServiceCost($shopee_task);
$ai_writeup->editor_format = 'markdown';
if ($ai_writeup->save()) {
$featured_photo = ShopeeSellerScrapedImage::where('shopee_seller_scrape_id', $shopee_seller_scrape->id)->where('featured', true)->first();
// new post
$post_data = [
'publish_date' => now(),
'title' => $ai_writeup->title,
'slug' => str_slug($ai_writeup->title),
'excerpt' => $ai_writeup->excerpt,
'cliffhanger' => $ai_writeup->cliffhanger,
'author_id' => 1,
'featured' => false,
'featured_image' => $featured_photo->image,
'editor' => 'markdown',
'body' => $ai_writeup->body,
'post_format' => 'standard',
'status' => 'publish',
];
$post = Post::create($post_data);
if (! is_null($post)) {
PostCategory::create([
'post_id' => $post->id,
'category_id' => $shopee_seller_scrape->category->id,
]);
if (app()->environment() == 'production') {
if ($post->status == 'publish') {
$post_url = route('home.country.post', ['country' => $post->post_category?->category?->country_locale_slug, 'post_slug' => $post->slug]);
LaravelGoogleIndexing::create()->update($post_url);
IndexNow::submit($post_url);
}
}
}
}
}
} else {
$e = new Exception('Failed to write: ai_writeup found');
Log::error(serialize($ai_writeup?->toArray()));
inspector()->reportException($e);
throw ($e);
}
return $post;
}
private static function getTotalServiceCost($shopee_task)
{
$cost = 0.00;
$cost += 0.06; // chatgpt-3.5-turbo $0.03 for 1k, writing for 2k tokens
// Shopee Seller Scraping
if (isset($shopee_task?->seller_shop_task?->response?->total_cost)) {
$cost += $shopee_task?->seller_shop_task?->response?->total_cost;
}
// Shopee Product Scraping
if (isset($shopee_task?->product_task?->response?->total_cost)) {
$cost += $shopee_task?->product_task?->response?->total_cost;
}
return $cost;
}
private static function stripHtml(string $raw_html)
{
$html_content = '';
try {
$r_configuration = new ReadabilityConfiguration();
$r_configuration->setWordThreshold(20);
$readability = new Readability($r_configuration);
$readability->parse($raw_html);
$temp_html_content = $readability->getContent();
// Remove tabs
$temp_html_content = str_replace("\t", '', $temp_html_content);
// Replace newlines with spaces
$temp_html_content = str_replace(["\n", "\r\n"], ' ', $temp_html_content);
// Replace multiple spaces with a single space
$temp_html_content = preg_replace('/\s+/', ' ', $temp_html_content);
// Output the cleaned text
$temp_html_content = trim($temp_html_content); // Using trim to remove any leading or trailing spaces
$temp_html_content = strip_tags($temp_html_content);
$crawler = new Crawler($raw_html);
// Extract meta title
$title = $crawler->filter('title')->text(); // This assumes <title> tags are used for titles.
// Extract meta description
$metaDescriptionNode = $crawler->filter('meta[name="description"]');
$description = $metaDescriptionNode->count() > 0 ? $metaDescriptionNode->attr('content') : null;
$html_content .= $title.' ';
$html_content .= $description.' ';
$html_content .= $temp_html_content;
} catch (ReadabilityParseException|Exception $e) {
$html5 = new HTML5(['preserveWhiteSpace' => true]);
// Parse the HTML into a DOM tree.
$dom = $html5->loadHTML($raw_html);
// Serialize the DOM tree back to a string, formatted.
$html_content = strip_tags($html5->saveHTML($dom));
}
return $html_content;
}
}