Add (ai gen)

This commit is contained in:
2023-10-01 04:17:49 +08:00
parent 5fcfa75d97
commit 5b4a02778e
7 changed files with 191 additions and 84 deletions

View File

@@ -10,9 +10,11 @@ class OpenAI
{ {
public static function writeProductArticle($excerpt, $photos) public static function writeProductArticle($excerpt, $photos)
{ {
$excerpt = substr($excerpt, 0, 900);
$system_prompt = ' $system_prompt = '
You are tasked with writing a comprehensive product introduction article using the provided excerpt. The emphasis should be on the performance, features, and notable aspects of the product. The review should avoid the use of personal pronouns and must not delve into marketplace-related information. Return the output in the following json format:\n\n You are tasked with writing a comprehensive product introduction article using the provided excerpt. The emphasis should be on the performance, features, and notable aspects of the product. The review should avoid the use of personal pronouns and must not delve into marketplace-related information. Return the output in the following json format:\n\n
{"title": "(Article Title)","excerpt": "(One sentence summary, 150-160 characters of an article, do not use start sentence with verb.)","cliffhanger": "(One sentence 70-80 characters of article, cliff-hanging sentence to attract readers)","body": "(Markdown format, 500-700 word count)"}\n\n {"title": "(Article Title)","excerpt": "(One sentence summary, 150-160 characters of an article, do not use start sentence with verb.)","cliffhanger": "(One sentence 70-80 characters of article, cliff-hanging sentence to attract readers)","body": "(Markdown format, 700-900 word count)"}\n\n
Mandatory Requirements:\n Mandatory Requirements:\n
- Write in US grade 8-9 English\n - Write in US grade 8-9 English\n
- Use the following sections whenever applicable:\n - Use the following sections whenever applicable:\n
@@ -24,16 +26,23 @@ public static function writeProductArticle($excerpt, $photos)
- do not make up facts, use facts provided by excerpt only\n - do not make up facts, use facts provided by excerpt only\n
- No article titles inside markdown\n - No article titles inside markdown\n
- All article sections use ### - All article sections use ###
- Add at least 3 markdown images with article title as caption in every section except for Introduction
'; ';
$user_prompt = "Excerpt: {$excerpt}\nPhotos:\n"; $user_prompt = "EXCERPT\n------------\n{$excerpt}\n";
foreach ($photos as $photo) { if (count($photos) > 0) {
$user_prompt .= "{$photo}\n"; $system_prompt .= '- Add at least 3 markdown images with article title as caption in every section except for Introduction';
$user_prompt .= "\n\nPHOTOS\n------------\n";
foreach ($photos as $photo) {
$user_prompt .= "{$photo}\n";
}
} }
$output = (self::chatCompletion($system_prompt, $user_prompt, 'gpt-3.5-turbo', 2000)); $output = (self::chatCompletion($system_prompt, $user_prompt, 'gpt-3.5-turbo', 2500));
// dump($user_prompt);
// dd($output);
if (! is_null($output)) { if (! is_null($output)) {
try { try {

View File

@@ -1,5 +1,39 @@
<?php <?php
if (! function_exists('get_smartproxy_rotating_server')) {
function get_smartproxy_rotating_server()
{
$proxy = config('platform.proxy.smartproxy.rotating_global.server');
$proxy_user = config('platform.proxy.smartproxy.rotating_global.user');
$proxy_psw = config('platform.proxy.smartproxy.rotating_global.password');
$reproxy_enable = config('platform.proxy.smartproxy.rotating_global.reproxy_enable');
if ($reproxy_enable) {
$proxy = config('platform.proxy.smartproxy.rotating_global.reproxy');
}
$proxy_server = "$proxy_user:$proxy_psw@$proxy";
return $proxy_server;
}
}
if (! function_exists('get_smartproxy_unblocker_server')) {
function get_smartproxy_unblocker_server()
{
$proxy = config('platform.proxy.smartproxy.unblocker.server');
$proxy_user = config('platform.proxy.smartproxy.unblocker.user');
$proxy_psw = config('platform.proxy.smartproxy.unblocker.password');
$reproxy_enable = config('platform.proxy.smartproxy.unblocker.reproxy_enable');
if ($reproxy_enable) {
$proxy = config('platform.proxy.smartproxy.unblocker.reproxy');
}
$proxy_server = "$proxy_user:$proxy_psw@$proxy";
return $proxy_server;
}
}
if (! function_exists('get_smartproxy_server')) { if (! function_exists('get_smartproxy_server')) {
function get_smartproxy_server() function get_smartproxy_server()
{ {

View File

@@ -17,6 +17,7 @@
use LaravelFreelancerNL\LaravelIndexNow\Facades\IndexNow; use LaravelFreelancerNL\LaravelIndexNow\Facades\IndexNow;
use LaravelGoogleIndexing; use LaravelGoogleIndexing;
use Masterminds\HTML5; use Masterminds\HTML5;
use Symfony\Component\DomCrawler\Crawler;
class GenerateShopeeAIArticleTask class GenerateShopeeAIArticleTask
{ {
@@ -48,6 +49,8 @@ public static function handle(ShopeeSellerScrape $shopee_seller_scrape)
if (is_null($ai_writeup)) { if (is_null($ai_writeup)) {
$ai_output = OpenAI::writeProductArticle($excerpt, $photos); $ai_output = OpenAI::writeProductArticle($excerpt, $photos);
//dd($ai_output);
if (is_null($ai_output)) { if (is_null($ai_output)) {
$e = new Exception('Failed to write: Missing ai_output'); $e = new Exception('Failed to write: Missing ai_output');
@@ -140,41 +143,58 @@ private static function getTotalServiceCost($shopee_task)
private static function stripHtml(string $raw_html) private static function stripHtml(string $raw_html)
{ {
$r_configuration = new ReadabilityConfiguration();
$r_configuration->setWordThreshold(20);
$readability = new Readability($r_configuration); $html_content = '';
// try { try {
// $readability->parse($raw_html);
// $html_content = $readability->getContent(); $r_configuration = new ReadabilityConfiguration();
$r_configuration->setWordThreshold(20);
// // Remove tabs $readability = new Readability($r_configuration);
// $html_content = str_replace("\t", '', $html_content);
// // Replace newlines with spaces $readability->parse($raw_html);
// $html_content = str_replace(["\n", "\r\n"], ' ', $html_content);
// // Replace multiple spaces with a single space $temp_html_content = $readability->getContent();
// $html_content = preg_replace('/\s+/', ' ', $html_content);
// // Output the cleaned text // Remove tabs
// $html_content = trim($html_content); // Using trim to remove any leading or trailing spaces $temp_html_content = str_replace("\t", '', $temp_html_content);
// $html_content = strip_tags($html_content); // Replace newlines with spaces
$temp_html_content = str_replace(["\n", "\r\n"], ' ', $temp_html_content);
// } catch (ReadabilityParseException|Exception $e) { // Replace multiple spaces with a single space
$temp_html_content = preg_replace('/\s+/', ' ', $temp_html_content);
$html5 = new HTML5(['preserveWhiteSpace' => true]); // Output the cleaned text
$temp_html_content = trim($temp_html_content); // Using trim to remove any leading or trailing spaces
// Parse the HTML into a DOM tree. $temp_html_content = strip_tags($temp_html_content);
$dom = $html5->loadHTML($raw_html);
// Serialize the DOM tree back to a string, formatted. $crawler = new Crawler($raw_html);
$html_content = strip_tags($html5->saveHTML($dom));
// } // Extract meta title
$title = $crawler->filter('title')->text(); // This assumes <title> tags are used for titles.
// Extract meta description
$metaDescriptionNode = $crawler->filter('meta[name="description"]');
$description = $metaDescriptionNode->count() > 0 ? $metaDescriptionNode->attr('content') : null;
$html_content .= $title.' ';
$html_content .= $description.' ';
$html_content .= $temp_html_content;
} catch (ReadabilityParseException|Exception $e) {
$html5 = new HTML5(['preserveWhiteSpace' => true]);
// Parse the HTML into a DOM tree.
$dom = $html5->loadHTML($raw_html);
// Serialize the DOM tree back to a string, formatted.
$html_content = strip_tags($html5->saveHTML($dom));
}
return $html_content; return $html_content;
} }

View File

@@ -19,7 +19,9 @@ public static function handle($shopee_task)
$main_image_url = null; $main_image_url = null;
$proxy_server = get_smartproxy_server(); $unblocker_proxy_server = get_smartproxy_unblocker_server();
$rotating_proxy_server = get_smartproxy_rotating_server();
$user_agent = config('platform.proxy.user_agent'); $user_agent = config('platform.proxy.user_agent');
///////// PART 1 ///////// PART 1
@@ -36,7 +38,7 @@ public static function handle($shopee_task)
$intervention_images = $shopee_task->product_task->intervention->intervention_images; $intervention_images = $shopee_task->product_task->intervention->intervention_images;
} else { } else {
$images = self::getImages($shopee_task->product_task->response->raw_html); $images = self::getImages($shopee_task->product_task->response->raw_html);
$images = self::filterImages($images, $proxy_server, $user_agent, $costs, $intervention_images); $images = self::filterImages($images, $rotating_proxy_server, $user_agent, $costs, $intervention_images);
} }
///////// PART 2 ///////// PART 2
@@ -54,7 +56,7 @@ public static function handle($shopee_task)
$scraped_image = ShopeeSellerScrapedImage::where('original_name', pathinfo($main_image_url, PATHINFO_BASENAME))->where('shopee_seller_scrape_id', $shopee_task->shopee_seller_scrape->id)->first(); $scraped_image = ShopeeSellerScrapedImage::where('original_name', pathinfo($main_image_url, PATHINFO_BASENAME))->where('shopee_seller_scrape_id', $shopee_task->shopee_seller_scrape->id)->first();
if (is_null($scraped_image)) { if (is_null($scraped_image)) {
$main_image = self::getProductImage($shopee_task->product_task->response->jsonld, $proxy_server, $user_agent, $costs, $main_intervention_image); $main_image = self::getProductImage($shopee_task->product_task->response->jsonld, $rotating_proxy_server, $user_agent, $costs, $main_intervention_image);
$scraped_image = self::uploadAndSaveScrapedImage($shopee_task->shopee_seller_scrape, $main_intervention_image, true); $scraped_image = self::uploadAndSaveScrapedImage($shopee_task->shopee_seller_scrape, $main_intervention_image, true);
} }
@@ -137,16 +139,18 @@ private static function getImages(string $raw_html)
$crawler->filter('img')->each(function ($node) use (&$images) { $crawler->filter('img')->each(function ($node) use (&$images) {
$src = $node->attr('src'); $src = $node->attr('src');
$alt = $node->attr('alt') ?? null; // Setting a default value if alt is not present $alt = $node->attr('alt') ?? null; // Setting a default value if alt is not present
$images[] = [
'src' => $src,
'alt' => $alt,
];
});
// if (count($images) > 4) $blacklist_domain = [];
// {
// return $images; foreach ($blacklist_domain as $blacklist) {
// } if (! str_contains($src, $blacklist)) {
$images[] = [
'src' => $src,
'alt' => $alt,
];
}
}
});
return $images; return $images;
} }
@@ -164,7 +168,7 @@ private static function filterImages(array $images, string $proxy, string $user_
$src = $image['src']; $src = $image['src'];
try { try {
$response = Http::withOptions(['proxy' => $proxy])->withHeaders(['User-Agent' => $user_agent])->get($src); $response = Http::withOptions(['proxy' => $proxy, 'verify' => false])->withHeaders(['User-Agent' => $user_agent])->get($src);
// Check if the request was successful // Check if the request was successful
if (! $response->successful()) { if (! $response->successful()) {
@@ -274,7 +278,7 @@ private static function getProductImage(array $jsonLdData, string $proxy, string
if (isset($data->{'@type'}) && $data->{'@type'} === 'Product') { if (isset($data->{'@type'}) && $data->{'@type'} === 'Product') {
if (isset($data->url) && isset($data->image)) { if (isset($data->url) && isset($data->image)) {
try { try {
$response = Http::withOptions(['proxy' => $proxy])->withHeaders(['User-Agent' => $user_agent])->get($data->image); $response = Http::withOptions(['proxy' => $proxy, 'verify' => false])->withHeaders(['User-Agent' => $user_agent])->get($data->image);
// Check if the request was successful // Check if the request was successful
if ($response->successful()) { if ($response->successful()) {

View File

@@ -41,7 +41,7 @@ public static function handle(string $seller, string $country_iso, Category $cat
//dd($seller_shop_task); //dd($seller_shop_task);
if (isset($seller_shop_task->response->jsonld)) { if (isset($seller_shop_task->response->jsonld)) {
$top_rank_products = self::getSortedData($seller_shop_task->response->jsonld, 100); $top_rank_products = self::getSortedData($seller_shop_task->response->jsonld, 400);
if (count($top_rank_products) > 0) { if (count($top_rank_products) > 0) {

View File

@@ -18,7 +18,7 @@ public static function handle(string $url, $directory, $postfix = null, $strip_h
{ {
$slug = str_slug($url); $slug = str_slug($url);
$cached_url = self::getGoogleCachedUrl($url, false); $cached_url = $url; // self::getGoogleCachedUrl($url, false);
$postfix = strval($postfix); $postfix = strval($postfix);
@@ -35,7 +35,8 @@ public static function handle(string $url, $directory, $postfix = null, $strip_h
$main_intervention_image = null; $main_intervention_image = null;
$intervention_images = []; $intervention_images = [];
$proxy_server = get_smartproxy_server(); $unblocker_proxy_server = get_smartproxy_unblocker_server();
$rotating_proxy_server = get_smartproxy_rotating_server();
try { try {
$raw_html = OSSUploader::readFile($driver, $directory, $filename); $raw_html = OSSUploader::readFile($driver, $directory, $filename);
@@ -51,26 +52,47 @@ public static function handle(string $url, $directory, $postfix = null, $strip_h
if (is_null($raw_html)) { if (is_null($raw_html)) {
try { try {
$browsershot = new Browsershot();
$browsershot->setUrl($cached_url) $response = Http::withHeaders([
->setOption('args', ['headless: "new"']) 'User-Agent' => $user_agent,
->noSandbox() ])
->setOption('args', ['--disable-web-security']) ->withOptions([
->userAgent($user_agent) 'proxy' => $unblocker_proxy_server,
->ignoreHttpsErrors() 'timeout' => 1000,
->preventUnsuccessfulResponse() 'verify' => false,
->timeout(10) ])
//->setProxyServer($proxy_server) ->get($cached_url);
->userAgent($user_agent);
if (app()->environment() == 'local') { if ($response->successful()) {
$browsershot->setNodeBinary(config('platform.general.node_binary'))->setNpmBinary(config('platform.general.npm_binary')); $raw_html = $response->body();
// ... your logic here ...
} else {
$raw_html = null;
$status_code = -3;
//throw new Exception('Http response failed');
$response->throw();
} }
//dump($browsershot); // $browsershot = new Browsershot();
$raw_html = $browsershot->bodyHtml(); // $browsershot->setUrl($cached_url)
// ->setOption('args', ['headless: "new"'])
// ->noSandbox()
// ->setOption('args', ['--disable-web-security'])
// ->userAgent($user_agent)
// ->ignoreHttpsErrors()
// ->preventUnsuccessfulResponse()
// ->timeout(10)
// ->setProxyServer($proxy_server)
// ->userAgent($user_agent);
// if (app()->environment() == 'local') {
// $browsershot->setNodeBinary(config('platform.general.node_binary'))->setNpmBinary(config('platform.general.npm_binary'));
// }
// //dump($browsershot);
// $raw_html = $browsershot->bodyHtml();
// $sizeInKb = strlen($raw_html) / 1024; // Convert bytes to kilobytes // $sizeInKb = strlen($raw_html) / 1024; // Convert bytes to kilobytes
// $browsershot_cost = round(calculate_smartproxy_cost($sizeInKb)) ; // $browsershot_cost = round(calculate_smartproxy_cost($sizeInKb)) ;
@@ -97,12 +119,12 @@ public static function handle(string $url, $directory, $postfix = null, $strip_h
if ($parse_images) { if ($parse_images) {
$images = self::getImages($raw_html); $images = self::getImages($raw_html);
$images = self::filterImages($images, $proxy_server, $user_agent, $costs, $intervention_images); $images = self::filterImages($images, $rotating_proxy_server, $user_agent, $costs, $intervention_images);
} else { } else {
$images = []; $images = [];
} }
$main_image = self::getProductImage($jsonld, $proxy_server, $user_agent, $costs, $main_intervention_image); $main_image = self::getProductImage($jsonld, $rotating_proxy_server, $user_agent, $costs, $main_intervention_image);
return (object) [ return (object) [
'intervention' => (object) compact('main_intervention_image', 'intervention_images'), 'intervention' => (object) compact('main_intervention_image', 'intervention_images'),
@@ -169,16 +191,20 @@ private static function getImages(string $raw_html)
$crawler->filter('img')->each(function ($node) use (&$images) { $crawler->filter('img')->each(function ($node) use (&$images) {
$src = $node->attr('src'); $src = $node->attr('src');
$alt = $node->attr('alt') ?? null; // Setting a default value if alt is not present $alt = $node->attr('alt') ?? null; // Setting a default value if alt is not present
$images[] = [
'src' => $src, $blacklist_domain = [];
'alt' => $alt,
]; foreach ($blacklist_domain as $blacklist) {
if (! str_contains($src, $blacklist)) {
$images[] = [
'src' => $src,
'alt' => $alt,
];
}
}
}); });
// if (count($images) > 4) //dd($images);
// {
// return $images;
// }
return $images; return $images;
} }
@@ -196,7 +222,8 @@ private static function filterImages(array $images, string $proxy, string $user_
$src = $image['src']; $src = $image['src'];
try { try {
$response = Http::withOptions(['proxy' => $proxy])->withHeaders(['User-Agent' => $user_agent])->get($src);
$response = Http::withOptions(['proxy' => $proxy, 'verify' => false])->withHeaders(['User-Agent' => $user_agent])->get($src);
// Check if the request was successful // Check if the request was successful
if (! $response->successful()) { if (! $response->successful()) {
@@ -216,7 +243,7 @@ private static function filterImages(array $images, string $proxy, string $user_
$sizeKb = round(strlen($imageData) / 1024, 2); $sizeKb = round(strlen($imageData) / 1024, 2);
// Check constraints // Check constraints
if ($width < 800 || $height < 800 || $sizeKb < 100 || $mime !== 'image/jpeg') { if ($width < 800 || $height < 800 || $sizeKb < 100) {
continue; continue;
} }
$image['width'] = $width; $image['width'] = $width;
@@ -268,17 +295,22 @@ private static function filterImages(array $images, string $proxy, string $user_
$colorCounts[] = $image['color_counts']; $colorCounts[] = $image['color_counts'];
} }
// Compute the median of the color counts if (! empty($colorCounts)) {
sort($colorCounts); // Compute the median of the color counts
$count = count($colorCounts); sort($colorCounts);
$middleIndex = floor($count / 2); $count = count($colorCounts);
$median = $count % 2 === 0 ? ($colorCounts[$middleIndex - 1] + $colorCounts[$middleIndex]) / 2 : $colorCounts[$middleIndex]; $middleIndex = floor($count / 2);
$median = $count % 2 === 0 ? ($colorCounts[$middleIndex - 1] + $colorCounts[$middleIndex]) / 2 : $colorCounts[$middleIndex];
// Use the median to filter out the low outliers // Use the median to filter out the low outliers
$threshold = 0.10 * $median; // Adjust this percentage as needed $threshold = 0.10 * $median; // Adjust this percentage as needed
$filteredImages = array_filter($filteredImages, function ($image) use ($threshold) { $filteredImages = array_filter($filteredImages, function ($image) use ($threshold) {
return $image['color_counts'] > $threshold; return $image['color_counts'] > $threshold;
}); });
} else {
// No images found
$filteredImages = []; // Clear the array or take any other appropriate action
}
usort($filteredImages, function ($a, $b) { usort($filteredImages, function ($a, $b) {
return $b['sizeKb'] <=> $a['sizeKb']; // Using the spaceship operator to sort in descending order return $b['sizeKb'] <=> $a['sizeKb']; // Using the spaceship operator to sort in descending order
@@ -307,7 +339,7 @@ private static function getProductImage(array $jsonLdData, string $proxy, string
if (isset($data->{'@type'}) && $data->{'@type'} === 'Product') { if (isset($data->{'@type'}) && $data->{'@type'} === 'Product') {
if (isset($data->url) && isset($data->image)) { if (isset($data->url) && isset($data->image)) {
try { try {
$response = Http::withOptions(['proxy' => $proxy])->withHeaders(['User-Agent' => $user_agent])->get($data->image); $response = Http::withOptions(['proxy' => $proxy, 'verify' => false])->withHeaders(['User-Agent' => $user_agent])->get($data->image);
// Check if the request was successful // Check if the request was successful
if ($response->successful()) { if ($response->successful()) {

View File

@@ -11,7 +11,15 @@
'server' => 'gate.smartproxy.com:7000', 'server' => 'gate.smartproxy.com:7000',
'reproxy' => '157.230.194.206:7000', 'reproxy' => '157.230.194.206:7000',
'reproxy_enable' => false, 'reproxy_enable' => false,
'cost_per_gb' => 7.00, 'cost_per_gb' => 7,
],
'unblocker' => [
'user' => 'U0000123412',
'password' => 'P$W1bda906aee53c2022d94e22ff1a1142a1',
'server' => 'unblock.smartproxy.com:60000',
'reproxy' => '157.230.194.206:7000',
'reproxy_enable' => false,
'cost_per_gb' => 20.14,
], ],
], ],
]; ];