filename); $post = null; $shopee_seller_scrape->load('category'); if (! is_empty($serialised)) { $shopee_task = unserialize($serialised); $shopee_task->shopee_seller_scrape = $shopee_seller_scrape; } // dd($shopee_task); // dd($shopee_task->product_task->response); $raw_html = $shopee_task->product_task->response->raw_html; $excerpt = self::stripHtml($raw_html); $excerpt .= self::getProductPricingExcerpt($shopee_task->product_task->response->jsonld); $photos = ShopeeSellerScrapedImage::where('shopee_seller_scrape_id', $shopee_seller_scrape->id)->where('featured', false)->orderByRaw('RAND()')->take(3)->get()->pluck('image')->toArray(); $ai_writeup = AiWriteup::where('source', 'shopee')->where('source_url', $shopee_task->product_task->response->url)->first(); if (is_null($ai_writeup)) { $ai_output = OpenAI::writeProductArticle($excerpt, $photos); //dd($ai_output); if (is_null($ai_output)) { $e = new Exception('Failed to write: Missing ai_output'); Log::error(serialize($ai_writeup?->toArray())); inspector()->reportException($e); throw ($e); } else { // save $ai_writeup = new AiWriteup; $ai_writeup->source = 'shopee'; $ai_writeup->source_url = $shopee_task->product_task->response->url; $ai_writeup->category_id = $shopee_seller_scrape->category->id; $ai_writeup->title = $ai_output->title; $ai_writeup->excerpt = $ai_output->excerpt; $ai_writeup->featured_image = ''; $ai_writeup->body = $ai_output->body; $ai_writeup->cost = self::getTotalServiceCost($shopee_task); $ai_writeup->editor_format = 'markdown'; if ($ai_writeup->save()) { $featured_photo = ShopeeSellerScrapedImage::where('shopee_seller_scrape_id', $shopee_seller_scrape->id)->where('featured', true)->first(); // new post $post_data = [ 'publish_date' => now(), 'title' => $ai_writeup->title, 'slug' => str_slug($ai_writeup->title), 'excerpt' => $ai_writeup->excerpt, 'cliffhanger' => $ai_writeup->cliffhanger, 'author_id' => 1, 'featured' => false, 'featured_image' => $featured_photo->image, 'editor' => 'markdown', 'body' => $ai_writeup->body, 'post_format' => 'standard', 'status' => 'publish', ]; $post = Post::create($post_data); if (! is_null($post)) { PostCategory::create([ 'post_id' => $post->id, 'category_id' => $shopee_seller_scrape->category->id, ]); if (app()->environment() == 'production') { if ($post->status == 'publish') { $post_url = route('home.country.post', ['country' => $post->post_category?->category?->country_locale_slug, 'post_slug' => $post->slug]); LaravelGoogleIndexing::create()->update($post_url); IndexNow::submit($post_url); } } } } } } else { $e = new Exception('Failed to write: ai_writeup found'); Log::error(serialize($ai_writeup?->toArray())); inspector()->reportException($e); throw ($e); } return $post; } private static function getProductPricingExcerpt(array $jsonLdData) { foreach ($jsonLdData as $data) { // Ensure the type is "Product" before proceeding if (isset($data->{'@type'}) && $data->{'@type'} === 'Product') { // Extract necessary data $lowPrice = $data->offers->lowPrice ?? null; $highPrice = $data->offers->highPrice ?? null; $price = $data->offers->price ?? null; $currency = $data->offers->priceCurrency ?? null; $sellerName = $data->offers->seller->name ?? "online store"; // default to "online store" if name is not set // Determine and format pricing sentence if ($lowPrice && $highPrice) { return "Price Range from {$currency} {$lowPrice} to {$highPrice} in {$sellerName}"; } elseif ($price) { return "Priced at {$currency} {$price} in {$sellerName}"; } else { return "Price not stated, refer to {$sellerName}"; } } } } private static function getTotalServiceCost($shopee_task) { $cost = 0.00; $cost += 0.09; // chatgpt-3.5-turbo $0.03 for 1k, writing for 2k tokens // Shopee Seller Scraping if (isset($shopee_task?->seller_shop_task?->response?->total_cost)) { $cost += $shopee_task?->seller_shop_task?->response?->total_cost; } // Shopee Product Scraping if (isset($shopee_task?->product_task?->response?->total_cost)) { $cost += $shopee_task?->product_task?->response?->total_cost; } return $cost; } private static function stripHtml(string $raw_html) { $html_content = ''; try { $r_configuration = new ReadabilityConfiguration(); $r_configuration->setCharThreshold(20); $readability = new Readability($r_configuration); $readability->parse($raw_html); $temp_html_content = $readability->getContent(); // Remove tabs $temp_html_content = str_replace("\t", '', $temp_html_content); // Replace newlines with spaces $temp_html_content = str_replace(["\n", "\r\n"], ' ', $temp_html_content); // Replace multiple spaces with a single space $temp_html_content = preg_replace('/\s+/', ' ', $temp_html_content); // Output the cleaned text $temp_html_content = trim($temp_html_content); // Using trim to remove any leading or trailing spaces $temp_html_content = strip_tags($temp_html_content); $crawler = new Crawler($raw_html); // Extract meta title $title = $crawler->filter('title')->text(); // This assumes tags are used for titles. // Extract meta description $metaDescriptionNode = $crawler->filter('meta[name="description"]'); $description = $metaDescriptionNode->count() > 0 ? $metaDescriptionNode->attr('content') : null; $html_content .= $title.' '; $html_content .= $description.' '; $html_content .= $temp_html_content; } catch (ReadabilityParseException|Exception $e) { $html5 = new HTML5(['preserveWhiteSpace' => true]); // Parse the HTML into a DOM tree. $dom = $html5->loadHTML($raw_html); // Serialize the DOM tree back to a string, formatted. $html_content = strip_tags($html5->saveHTML($dom)); } return $html_content; } }