Add (ai gen)
This commit is contained in:
@@ -17,6 +17,7 @@
|
||||
use LaravelFreelancerNL\LaravelIndexNow\Facades\IndexNow;
|
||||
use LaravelGoogleIndexing;
|
||||
use Masterminds\HTML5;
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
|
||||
class GenerateShopeeAIArticleTask
|
||||
{
|
||||
@@ -48,6 +49,8 @@ public static function handle(ShopeeSellerScrape $shopee_seller_scrape)
|
||||
if (is_null($ai_writeup)) {
|
||||
$ai_output = OpenAI::writeProductArticle($excerpt, $photos);
|
||||
|
||||
//dd($ai_output);
|
||||
|
||||
if (is_null($ai_output)) {
|
||||
$e = new Exception('Failed to write: Missing ai_output');
|
||||
|
||||
@@ -140,41 +143,58 @@ private static function getTotalServiceCost($shopee_task)
|
||||
|
||||
private static function stripHtml(string $raw_html)
|
||||
{
|
||||
$r_configuration = new ReadabilityConfiguration();
|
||||
$r_configuration->setWordThreshold(20);
|
||||
|
||||
$readability = new Readability($r_configuration);
|
||||
$html_content = '';
|
||||
|
||||
// try {
|
||||
// $readability->parse($raw_html);
|
||||
try {
|
||||
|
||||
// $html_content = $readability->getContent();
|
||||
$r_configuration = new ReadabilityConfiguration();
|
||||
$r_configuration->setWordThreshold(20);
|
||||
|
||||
// // Remove tabs
|
||||
// $html_content = str_replace("\t", '', $html_content);
|
||||
$readability = new Readability($r_configuration);
|
||||
|
||||
// // Replace newlines with spaces
|
||||
// $html_content = str_replace(["\n", "\r\n"], ' ', $html_content);
|
||||
$readability->parse($raw_html);
|
||||
|
||||
// // Replace multiple spaces with a single space
|
||||
// $html_content = preg_replace('/\s+/', ' ', $html_content);
|
||||
$temp_html_content = $readability->getContent();
|
||||
|
||||
// // Output the cleaned text
|
||||
// $html_content = trim($html_content); // Using trim to remove any leading or trailing spaces
|
||||
// Remove tabs
|
||||
$temp_html_content = str_replace("\t", '', $temp_html_content);
|
||||
|
||||
// $html_content = strip_tags($html_content);
|
||||
// Replace newlines with spaces
|
||||
$temp_html_content = str_replace(["\n", "\r\n"], ' ', $temp_html_content);
|
||||
|
||||
// } catch (ReadabilityParseException|Exception $e) {
|
||||
// Replace multiple spaces with a single space
|
||||
$temp_html_content = preg_replace('/\s+/', ' ', $temp_html_content);
|
||||
|
||||
$html5 = new HTML5(['preserveWhiteSpace' => true]);
|
||||
// Output the cleaned text
|
||||
$temp_html_content = trim($temp_html_content); // Using trim to remove any leading or trailing spaces
|
||||
|
||||
// Parse the HTML into a DOM tree.
|
||||
$dom = $html5->loadHTML($raw_html);
|
||||
$temp_html_content = strip_tags($temp_html_content);
|
||||
|
||||
// Serialize the DOM tree back to a string, formatted.
|
||||
$html_content = strip_tags($html5->saveHTML($dom));
|
||||
$crawler = new Crawler($raw_html);
|
||||
|
||||
// }
|
||||
// Extract meta title
|
||||
$title = $crawler->filter('title')->text(); // This assumes <title> tags are used for titles.
|
||||
|
||||
// Extract meta description
|
||||
$metaDescriptionNode = $crawler->filter('meta[name="description"]');
|
||||
$description = $metaDescriptionNode->count() > 0 ? $metaDescriptionNode->attr('content') : null;
|
||||
|
||||
$html_content .= $title.' ';
|
||||
$html_content .= $description.' ';
|
||||
$html_content .= $temp_html_content;
|
||||
|
||||
} catch (ReadabilityParseException|Exception $e) {
|
||||
|
||||
$html5 = new HTML5(['preserveWhiteSpace' => true]);
|
||||
|
||||
// Parse the HTML into a DOM tree.
|
||||
$dom = $html5->loadHTML($raw_html);
|
||||
|
||||
// Serialize the DOM tree back to a string, formatted.
|
||||
$html_content = strip_tags($html5->saveHTML($dom));
|
||||
|
||||
}
|
||||
|
||||
return $html_content;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user