Add (ai gen)

This commit is contained in:
2023-10-01 04:17:49 +08:00
parent 5fcfa75d97
commit 5b4a02778e
7 changed files with 191 additions and 84 deletions

View File

@@ -17,6 +17,7 @@
use LaravelFreelancerNL\LaravelIndexNow\Facades\IndexNow;
use LaravelGoogleIndexing;
use Masterminds\HTML5;
use Symfony\Component\DomCrawler\Crawler;
class GenerateShopeeAIArticleTask
{
@@ -48,6 +49,8 @@ public static function handle(ShopeeSellerScrape $shopee_seller_scrape)
if (is_null($ai_writeup)) {
$ai_output = OpenAI::writeProductArticle($excerpt, $photos);
//dd($ai_output);
if (is_null($ai_output)) {
$e = new Exception('Failed to write: Missing ai_output');
@@ -140,41 +143,58 @@ private static function getTotalServiceCost($shopee_task)
private static function stripHtml(string $raw_html)
{
$r_configuration = new ReadabilityConfiguration();
$r_configuration->setWordThreshold(20);
$readability = new Readability($r_configuration);
$html_content = '';
// try {
// $readability->parse($raw_html);
try {
// $html_content = $readability->getContent();
$r_configuration = new ReadabilityConfiguration();
$r_configuration->setWordThreshold(20);
// // Remove tabs
// $html_content = str_replace("\t", '', $html_content);
$readability = new Readability($r_configuration);
// // Replace newlines with spaces
// $html_content = str_replace(["\n", "\r\n"], ' ', $html_content);
$readability->parse($raw_html);
// // Replace multiple spaces with a single space
// $html_content = preg_replace('/\s+/', ' ', $html_content);
$temp_html_content = $readability->getContent();
// // Output the cleaned text
// $html_content = trim($html_content); // Using trim to remove any leading or trailing spaces
// Remove tabs
$temp_html_content = str_replace("\t", '', $temp_html_content);
// $html_content = strip_tags($html_content);
// Replace newlines with spaces
$temp_html_content = str_replace(["\n", "\r\n"], ' ', $temp_html_content);
// } catch (ReadabilityParseException|Exception $e) {
// Replace multiple spaces with a single space
$temp_html_content = preg_replace('/\s+/', ' ', $temp_html_content);
$html5 = new HTML5(['preserveWhiteSpace' => true]);
// Output the cleaned text
$temp_html_content = trim($temp_html_content); // Using trim to remove any leading or trailing spaces
// Parse the HTML into a DOM tree.
$dom = $html5->loadHTML($raw_html);
$temp_html_content = strip_tags($temp_html_content);
// Serialize the DOM tree back to a string, formatted.
$html_content = strip_tags($html5->saveHTML($dom));
$crawler = new Crawler($raw_html);
// }
// Extract meta title
$title = $crawler->filter('title')->text(); // This assumes <title> tags are used for titles.
// Extract meta description
$metaDescriptionNode = $crawler->filter('meta[name="description"]');
$description = $metaDescriptionNode->count() > 0 ? $metaDescriptionNode->attr('content') : null;
$html_content .= $title.' ';
$html_content .= $description.' ';
$html_content .= $temp_html_content;
} catch (ReadabilityParseException|Exception $e) {
$html5 = new HTML5(['preserveWhiteSpace' => true]);
// Parse the HTML into a DOM tree.
$dom = $html5->loadHTML($raw_html);
// Serialize the DOM tree back to a string, formatted.
$html_content = strip_tags($html5->saveHTML($dom));
}
return $html_content;
}