202 lines
7.2 KiB
PHP
202 lines
7.2 KiB
PHP
<?php
|
|
|
|
namespace App\Jobs\Tasks;
|
|
|
|
use andreskrey\Readability\Configuration as ReadabilityConfiguration;
|
|
use andreskrey\Readability\ParseException as ReadabilityParseException;
|
|
use andreskrey\Readability\Readability;
|
|
use App\Helpers\FirstParty\OpenAI\OpenAI;
|
|
use App\Helpers\FirstParty\OSSUploader\OSSUploader;
|
|
use App\Models\AiWriteup;
|
|
use App\Models\Post;
|
|
use App\Models\PostCategory;
|
|
use App\Models\ShopeeSellerScrape;
|
|
use App\Models\ShopeeSellerScrapedImage;
|
|
use Exception;
|
|
use Illuminate\Support\Facades\Log;
|
|
use LaravelFreelancerNL\LaravelIndexNow\Facades\IndexNow;
|
|
use LaravelGoogleIndexing;
|
|
use Masterminds\HTML5;
|
|
use Symfony\Component\DomCrawler\Crawler;
|
|
|
|
class GenerateShopeeAIArticleTask
|
|
{
|
|
public static function handle(ShopeeSellerScrape $shopee_seller_scrape)
|
|
{
|
|
$serialised = OSSUploader::readFile('r2', 'shopee/seller', $shopee_seller_scrape->filename);
|
|
|
|
$post = null;
|
|
|
|
$shopee_seller_scrape->load('category');
|
|
|
|
if (! is_empty($serialised)) {
|
|
$shopee_task = unserialize($serialised);
|
|
$shopee_task->shopee_seller_scrape = $shopee_seller_scrape;
|
|
}
|
|
|
|
// dd($shopee_task);
|
|
|
|
// dd($shopee_task->product_task->response);
|
|
|
|
$raw_html = $shopee_task->product_task->response->raw_html;
|
|
|
|
$excerpt = self::stripHtml($raw_html);
|
|
|
|
$photos = ShopeeSellerScrapedImage::where('shopee_seller_scrape_id', $shopee_seller_scrape->id)->where('featured', false)->orderByRaw('RAND()')->take(3)->get()->pluck('image')->toArray();
|
|
|
|
$ai_writeup = AiWriteup::where('source', 'shopee')->where('source_url', $shopee_task->product_task->response->url)->first();
|
|
|
|
if (is_null($ai_writeup)) {
|
|
$ai_output = OpenAI::writeProductArticle($excerpt, $photos);
|
|
|
|
//dd($ai_output);
|
|
|
|
if (is_null($ai_output)) {
|
|
$e = new Exception('Failed to write: Missing ai_output');
|
|
|
|
Log::error(serialize($ai_writeup?->toArray()));
|
|
inspector()->reportException($e);
|
|
throw ($e);
|
|
} else {
|
|
// save
|
|
$ai_writeup = new AiWriteup;
|
|
$ai_writeup->source = 'shopee';
|
|
$ai_writeup->source_url = $shopee_task->product_task->response->url;
|
|
$ai_writeup->category_id = $shopee_seller_scrape->category->id;
|
|
$ai_writeup->title = $ai_output->title;
|
|
$ai_writeup->excerpt = $ai_output->excerpt;
|
|
$ai_writeup->featured_image = '';
|
|
$ai_writeup->body = $ai_output->body;
|
|
$ai_writeup->cost = self::getTotalServiceCost($shopee_task);
|
|
$ai_writeup->editor_format = 'markdown';
|
|
|
|
if ($ai_writeup->save()) {
|
|
$featured_photo = ShopeeSellerScrapedImage::where('shopee_seller_scrape_id', $shopee_seller_scrape->id)->where('featured', true)->first();
|
|
|
|
// new post
|
|
$post_data = [
|
|
'publish_date' => now(),
|
|
'title' => $ai_writeup->title,
|
|
'slug' => str_slug($ai_writeup->title),
|
|
'excerpt' => $ai_writeup->excerpt,
|
|
'cliffhanger' => $ai_writeup->cliffhanger,
|
|
'author_id' => 1,
|
|
'featured' => false,
|
|
'featured_image' => $featured_photo->image,
|
|
'editor' => 'markdown',
|
|
'body' => $ai_writeup->body,
|
|
'post_format' => 'standard',
|
|
'status' => 'publish',
|
|
];
|
|
|
|
$post = Post::create($post_data);
|
|
|
|
if (! is_null($post)) {
|
|
PostCategory::create([
|
|
'post_id' => $post->id,
|
|
'category_id' => $shopee_seller_scrape->category->id,
|
|
]);
|
|
|
|
if (app()->environment() == 'production') {
|
|
if ($post->status == 'publish') {
|
|
|
|
$post_url = route('home.country.post', ['country' => $post->post_category?->category?->country_locale_slug, 'post_slug' => $post->slug]);
|
|
|
|
LaravelGoogleIndexing::create()->update($post_url);
|
|
IndexNow::submit($post_url);
|
|
}
|
|
}
|
|
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
$e = new Exception('Failed to write: ai_writeup found');
|
|
Log::error(serialize($ai_writeup?->toArray()));
|
|
inspector()->reportException($e);
|
|
throw ($e);
|
|
}
|
|
|
|
return $post;
|
|
}
|
|
|
|
private static function getTotalServiceCost($shopee_task)
|
|
{
|
|
|
|
$cost = 0.00;
|
|
|
|
$cost += 0.06; // chatgpt-3.5-turbo $0.03 for 1k, writing for 2k tokens
|
|
|
|
// Shopee Seller Scraping
|
|
if (isset($shopee_task?->seller_shop_task?->response?->total_cost)) {
|
|
$cost += $shopee_task?->seller_shop_task?->response?->total_cost;
|
|
}
|
|
|
|
// Shopee Product Scraping
|
|
if (isset($shopee_task?->product_task?->response?->total_cost)) {
|
|
$cost += $shopee_task?->product_task?->response?->total_cost;
|
|
}
|
|
|
|
return $cost;
|
|
|
|
}
|
|
|
|
private static function stripHtml(string $raw_html)
|
|
{
|
|
|
|
$html_content = '';
|
|
|
|
try {
|
|
|
|
$r_configuration = new ReadabilityConfiguration();
|
|
$r_configuration->setWordThreshold(20);
|
|
|
|
$readability = new Readability($r_configuration);
|
|
|
|
$readability->parse($raw_html);
|
|
|
|
$temp_html_content = $readability->getContent();
|
|
|
|
// Remove tabs
|
|
$temp_html_content = str_replace("\t", '', $temp_html_content);
|
|
|
|
// Replace newlines with spaces
|
|
$temp_html_content = str_replace(["\n", "\r\n"], ' ', $temp_html_content);
|
|
|
|
// Replace multiple spaces with a single space
|
|
$temp_html_content = preg_replace('/\s+/', ' ', $temp_html_content);
|
|
|
|
// Output the cleaned text
|
|
$temp_html_content = trim($temp_html_content); // Using trim to remove any leading or trailing spaces
|
|
|
|
$temp_html_content = strip_tags($temp_html_content);
|
|
|
|
$crawler = new Crawler($raw_html);
|
|
|
|
// Extract meta title
|
|
$title = $crawler->filter('title')->text(); // This assumes <title> tags are used for titles.
|
|
|
|
// Extract meta description
|
|
$metaDescriptionNode = $crawler->filter('meta[name="description"]');
|
|
$description = $metaDescriptionNode->count() > 0 ? $metaDescriptionNode->attr('content') : null;
|
|
|
|
$html_content .= $title.' ';
|
|
$html_content .= $description.' ';
|
|
$html_content .= $temp_html_content;
|
|
|
|
} catch (ReadabilityParseException|Exception $e) {
|
|
|
|
$html5 = new HTML5(['preserveWhiteSpace' => true]);
|
|
|
|
// Parse the HTML into a DOM tree.
|
|
$dom = $html5->loadHTML($raw_html);
|
|
|
|
// Serialize the DOM tree back to a string, formatted.
|
|
$html_content = strip_tags($html5->saveHTML($dom));
|
|
|
|
}
|
|
|
|
return $html_content;
|
|
}
|
|
}
|