284 lines
10 KiB
PHP
284 lines
10 KiB
PHP
<?php
|
|
|
|
namespace App\Jobs\Tasks;
|
|
|
|
use fivefilters\Readability\Configuration as ReadabilityConfiguration;
|
|
use fivefilters\Readability\ParseException as ReadabilityParseException;
|
|
use fivefilters\Readability\Readability;
|
|
use App\Helpers\FirstParty\OpenAI\OpenAI;
|
|
use App\Helpers\FirstParty\OSSUploader\OSSUploader;
|
|
use App\Models\AiWriteup;
|
|
use App\Models\Post;
|
|
use App\Models\PostCategory;
|
|
use App\Models\Category;
|
|
use App\Models\ShopeeSellerCategory;
|
|
use App\Models\ShopeeSellerScrape;
|
|
use App\Models\ShopeeSellerScrapedImage;
|
|
use Exception;
|
|
use Illuminate\Support\Facades\Log;
|
|
use LaravelFreelancerNL\LaravelIndexNow\Facades\IndexNow;
|
|
use LaravelGoogleIndexing;
|
|
use Masterminds\HTML5;
|
|
use Symfony\Component\DomCrawler\Crawler;
|
|
|
|
class GenerateShopeeAIArticleTask
|
|
{
|
|
public static function handle(ShopeeSellerScrape $shopee_seller_scrape)
|
|
{
|
|
$serialised = OSSUploader::readFile('r2', 'shopee/seller', $shopee_seller_scrape->filename);
|
|
|
|
$post = null;
|
|
|
|
$shopee_seller_scrape->load('category');
|
|
|
|
if (! is_empty($serialised)) {
|
|
$shopee_task = unserialize($serialised);
|
|
$shopee_task->shopee_seller_scrape = $shopee_seller_scrape;
|
|
}
|
|
|
|
|
|
|
|
// dd($shopee_task);
|
|
|
|
// dd($shopee_task->product_task->response);
|
|
|
|
$raw_html = $shopee_task->product_task->response->raw_html;
|
|
|
|
$excerpt = self::stripHtml($raw_html);
|
|
|
|
$excerpt = substr($excerpt, 0, 1500); // limit to 1500 (+1500 output token, total 3k token) characters due to OpenAI model limitations unless use 16k model, $$$$
|
|
|
|
$excerpt .= self::getProductPricingExcerpt($shopee_task->product_task->response->jsonld);
|
|
|
|
$photos = ShopeeSellerScrapedImage::where('shopee_seller_scrape_id', $shopee_seller_scrape->id)->where('featured', false)->orderByRaw('RAND()')->take(3)->get()->pluck('image')->toArray();
|
|
|
|
$ai_writeup = AiWriteup::where('source', 'shopee')->where('source_url', $shopee_task->product_task->response->url)->first();
|
|
|
|
if (is_null($ai_writeup)) {
|
|
|
|
$categories = [
|
|
'Beauty',
|
|
'Technology',
|
|
'Home & Living',
|
|
'Health',
|
|
'Fitness'
|
|
];
|
|
|
|
$ai_output = OpenAI::writeProductArticle($excerpt, $photos, $categories);
|
|
|
|
//dd($ai_output);
|
|
|
|
if (is_null($ai_output)) {
|
|
$e = new Exception('Failed to write: Missing ai_output');
|
|
|
|
Log::error(serialize($ai_writeup?->toArray()));
|
|
inspector()->reportException($e);
|
|
throw ($e);
|
|
} else {
|
|
|
|
$picked_category = Category::where('name', $ai_output->category)->where('country_locale_id', $shopee_seller_scrape->category->country_locale_id)->first();
|
|
|
|
if (is_null($picked_category))
|
|
{
|
|
$picked_category = $shopee_seller_scrape->category;
|
|
}
|
|
|
|
// save
|
|
$ai_writeup = new AiWriteup;
|
|
$ai_writeup->source = 'shopee';
|
|
$ai_writeup->source_url = $shopee_task->product_task->response->url;
|
|
$ai_writeup->category_id = $picked_category->id;
|
|
$ai_writeup->title = $ai_output->title;
|
|
$ai_writeup->excerpt = $ai_output->excerpt;
|
|
$ai_writeup->featured_image = '';
|
|
$ai_writeup->body = $ai_output->body;
|
|
$ai_writeup->cost = self::getTotalServiceCost($shopee_task);
|
|
$ai_writeup->editor_format = 'markdown';
|
|
|
|
if ($ai_writeup->save()) {
|
|
$featured_photo = ShopeeSellerScrapedImage::where('shopee_seller_scrape_id', $shopee_seller_scrape->id)->where('featured', true)->first();
|
|
|
|
// new post
|
|
$post_data = [
|
|
'publish_date' => now(),
|
|
'title' => $ai_writeup->title,
|
|
'slug' => str_slug($ai_writeup->title),
|
|
'excerpt' => $ai_writeup->excerpt,
|
|
'cliffhanger' => $ai_writeup->cliffhanger,
|
|
'author_id' => 1,
|
|
'featured' => false,
|
|
'featured_image' => $featured_photo->image,
|
|
'editor' => 'markdown',
|
|
'body' => $ai_writeup->body,
|
|
'post_format' => 'standard',
|
|
'status' => 'publish',
|
|
];
|
|
|
|
$post = Post::create($post_data);
|
|
|
|
if (! is_null($post)) {
|
|
|
|
$shopee_seller_scrape->write_counts = $shopee_seller_scrape->write_counts + 1;
|
|
$shopee_seller_scrape->last_ai_written_at = now();
|
|
$shopee_seller_scrape->save();
|
|
|
|
$shopee_seller_category = ShopeeSellerCategory::where('seller', $shopee_seller_scrape->seller)->first();
|
|
|
|
if (is_null($shopee_seller_category))
|
|
{
|
|
$shopee_seller_category = new ShopeeSellerCategory;
|
|
$shopee_seller_category->seller = $shopee_seller_scrape->seller;
|
|
$shopee_seller_category->category_id = $shopee_seller_scrape->category_id;
|
|
}
|
|
|
|
$shopee_seller_category->last_ai_written_at = $shopee_seller_scrape->last_ai_written_at;
|
|
$shopee_seller_category->write_counts = $shopee_seller_scrape->write_counts;
|
|
|
|
$shopee_seller_category->save();
|
|
|
|
PostCategory::create([
|
|
'post_id' => $post->id,
|
|
'category_id' => $picked_category->id,
|
|
]);
|
|
|
|
if (app()->environment() == 'production') {
|
|
if ($post->status == 'publish') {
|
|
|
|
$post_url = route('home.country.post', ['country' => $post->post_category?->category?->country_locale_slug, 'post_slug' => $post->slug]);
|
|
|
|
LaravelGoogleIndexing::create()->update($post_url);
|
|
IndexNow::submit($post_url);
|
|
}
|
|
}
|
|
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
$e = new Exception('Failed to write: ai_writeup found');
|
|
Log::error(serialize($ai_writeup?->toArray()));
|
|
inspector()->reportException($e);
|
|
throw ($e);
|
|
}
|
|
|
|
return $post;
|
|
}
|
|
|
|
private static function getProductPricingExcerpt(array $jsonLdData)
|
|
{
|
|
foreach ($jsonLdData as $data) {
|
|
// Ensure the type is "Product" before proceeding
|
|
if (isset($data->{'@type'}) && $data->{'@type'} === 'Product') {
|
|
|
|
// Extract necessary data
|
|
$lowPrice = $data->offers->lowPrice ?? null;
|
|
$highPrice = $data->offers->highPrice ?? null;
|
|
$price = $data->offers->price ?? null;
|
|
$currency = $data->offers->priceCurrency ?? null;
|
|
$sellerName = $data->offers->seller->name ?? "online store"; // default to "online store" if name is not set
|
|
|
|
if (!is_empty($currency))
|
|
{
|
|
if ($currency == 'MYR')
|
|
{
|
|
$currency = 'RM';
|
|
}
|
|
}
|
|
|
|
|
|
// Determine and format pricing sentence
|
|
if ($lowPrice && $highPrice) {
|
|
$lowPrice = number_format($lowPrice, 0);
|
|
$highPrice = number_format($highPrice, 0);
|
|
return "Price Range from {$currency} {$lowPrice} to {$highPrice} in {$sellerName} online store";
|
|
} elseif ($price) {
|
|
$price = number_format($price, 0);
|
|
return "Priced at {$currency} {$price} in {$sellerName} online store";
|
|
} else {
|
|
return "Price not stated, refer to {$sellerName} online store";
|
|
}
|
|
|
|
}
|
|
}
|
|
}
|
|
|
|
private static function getTotalServiceCost($shopee_task)
|
|
{
|
|
|
|
$cost = 0.00;
|
|
|
|
$cost += 0.09; // chatgpt-3.5-turbo $0.03 for 1k, writing for 2k tokens
|
|
|
|
// Shopee Seller Scraping
|
|
if (isset($shopee_task?->seller_shop_task?->response?->total_cost)) {
|
|
$cost += $shopee_task?->seller_shop_task?->response?->total_cost;
|
|
}
|
|
|
|
// Shopee Product Scraping
|
|
if (isset($shopee_task?->product_task?->response?->total_cost)) {
|
|
$cost += $shopee_task?->product_task?->response?->total_cost;
|
|
}
|
|
|
|
return $cost;
|
|
|
|
}
|
|
|
|
private static function stripHtml(string $raw_html)
|
|
{
|
|
|
|
$html_content = '';
|
|
|
|
try {
|
|
|
|
$r_configuration = new ReadabilityConfiguration();
|
|
$r_configuration->setCharThreshold(20);
|
|
|
|
$readability = new Readability($r_configuration);
|
|
|
|
$readability->parse($raw_html);
|
|
|
|
$temp_html_content = $readability->getContent();
|
|
|
|
// Remove tabs
|
|
$temp_html_content = str_replace("\t", '', $temp_html_content);
|
|
|
|
// Replace newlines with spaces
|
|
$temp_html_content = str_replace(["\n", "\r\n"], ' ', $temp_html_content);
|
|
|
|
// Replace multiple spaces with a single space
|
|
$temp_html_content = preg_replace('/\s+/', ' ', $temp_html_content);
|
|
|
|
// Output the cleaned text
|
|
$temp_html_content = trim($temp_html_content); // Using trim to remove any leading or trailing spaces
|
|
|
|
$temp_html_content = strip_tags($temp_html_content);
|
|
|
|
$crawler = new Crawler($raw_html);
|
|
|
|
// Extract meta title
|
|
$title = $crawler->filter('title')->text(); // This assumes <title> tags are used for titles.
|
|
|
|
// Extract meta description
|
|
$metaDescriptionNode = $crawler->filter('meta[name="description"]');
|
|
$description = $metaDescriptionNode->count() > 0 ? $metaDescriptionNode->attr('content') : null;
|
|
|
|
$html_content .= $title.' ';
|
|
$html_content .= $description.' ';
|
|
$html_content .= $temp_html_content;
|
|
|
|
} catch (ReadabilityParseException|Exception $e) {
|
|
|
|
$html5 = new HTML5(['preserveWhiteSpace' => true]);
|
|
|
|
// Parse the HTML into a DOM tree.
|
|
$dom = $html5->loadHTML($raw_html);
|
|
|
|
// Serialize the DOM tree back to a string, formatted.
|
|
$html_content = strip_tags($html5->saveHTML($dom));
|
|
|
|
}
|
|
|
|
return $html_content;
|
|
}
|
|
}
|