Add (ai gen)
This commit is contained in:
@@ -10,9 +10,11 @@ class OpenAI
|
||||
{
|
||||
public static function writeProductArticle($excerpt, $photos)
|
||||
{
|
||||
$excerpt = substr($excerpt, 0, 900);
|
||||
|
||||
$system_prompt = '
|
||||
You are tasked with writing a comprehensive product introduction article using the provided excerpt. The emphasis should be on the performance, features, and notable aspects of the product. The review should avoid the use of personal pronouns and must not delve into marketplace-related information. Return the output in the following json format:\n\n
|
||||
{"title": "(Article Title)","excerpt": "(One sentence summary, 150-160 characters of an article, do not use start sentence with verb.)","cliffhanger": "(One sentence 70-80 characters of article, cliff-hanging sentence to attract readers)","body": "(Markdown format, 500-700 word count)"}\n\n
|
||||
{"title": "(Article Title)","excerpt": "(One sentence summary, 150-160 characters of an article, do not use start sentence with verb.)","cliffhanger": "(One sentence 70-80 characters of article, cliff-hanging sentence to attract readers)","body": "(Markdown format, 700-900 word count)"}\n\n
|
||||
Mandatory Requirements:\n
|
||||
- Write in US grade 8-9 English\n
|
||||
- Use the following sections whenever applicable:\n
|
||||
@@ -24,16 +26,23 @@ public static function writeProductArticle($excerpt, $photos)
|
||||
- do not make up facts, use facts provided by excerpt only\n
|
||||
- No article titles inside markdown\n
|
||||
- All article sections use ###
|
||||
- Add at least 3 markdown images with article title as caption in every section except for Introduction
|
||||
|
||||
';
|
||||
|
||||
$user_prompt = "Excerpt: {$excerpt}\nPhotos:\n";
|
||||
$user_prompt = "EXCERPT\n------------\n{$excerpt}\n";
|
||||
|
||||
if (count($photos) > 0) {
|
||||
$system_prompt .= '- Add at least 3 markdown images with article title as caption in every section except for Introduction';
|
||||
$user_prompt .= "\n\nPHOTOS\n------------\n";
|
||||
foreach ($photos as $photo) {
|
||||
$user_prompt .= "{$photo}\n";
|
||||
}
|
||||
}
|
||||
|
||||
$output = (self::chatCompletion($system_prompt, $user_prompt, 'gpt-3.5-turbo', 2000));
|
||||
$output = (self::chatCompletion($system_prompt, $user_prompt, 'gpt-3.5-turbo', 2500));
|
||||
|
||||
// dump($user_prompt);
|
||||
// dd($output);
|
||||
|
||||
if (! is_null($output)) {
|
||||
try {
|
||||
|
||||
@@ -1,5 +1,39 @@
|
||||
<?php
|
||||
|
||||
if (! function_exists('get_smartproxy_rotating_server')) {
|
||||
function get_smartproxy_rotating_server()
|
||||
{
|
||||
$proxy = config('platform.proxy.smartproxy.rotating_global.server');
|
||||
$proxy_user = config('platform.proxy.smartproxy.rotating_global.user');
|
||||
$proxy_psw = config('platform.proxy.smartproxy.rotating_global.password');
|
||||
|
||||
$reproxy_enable = config('platform.proxy.smartproxy.rotating_global.reproxy_enable');
|
||||
if ($reproxy_enable) {
|
||||
$proxy = config('platform.proxy.smartproxy.rotating_global.reproxy');
|
||||
}
|
||||
$proxy_server = "$proxy_user:$proxy_psw@$proxy";
|
||||
|
||||
return $proxy_server;
|
||||
}
|
||||
}
|
||||
|
||||
if (! function_exists('get_smartproxy_unblocker_server')) {
|
||||
function get_smartproxy_unblocker_server()
|
||||
{
|
||||
$proxy = config('platform.proxy.smartproxy.unblocker.server');
|
||||
$proxy_user = config('platform.proxy.smartproxy.unblocker.user');
|
||||
$proxy_psw = config('platform.proxy.smartproxy.unblocker.password');
|
||||
|
||||
$reproxy_enable = config('platform.proxy.smartproxy.unblocker.reproxy_enable');
|
||||
if ($reproxy_enable) {
|
||||
$proxy = config('platform.proxy.smartproxy.unblocker.reproxy');
|
||||
}
|
||||
$proxy_server = "$proxy_user:$proxy_psw@$proxy";
|
||||
|
||||
return $proxy_server;
|
||||
}
|
||||
}
|
||||
|
||||
if (! function_exists('get_smartproxy_server')) {
|
||||
function get_smartproxy_server()
|
||||
{
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
use LaravelFreelancerNL\LaravelIndexNow\Facades\IndexNow;
|
||||
use LaravelGoogleIndexing;
|
||||
use Masterminds\HTML5;
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
|
||||
class GenerateShopeeAIArticleTask
|
||||
{
|
||||
@@ -48,6 +49,8 @@ public static function handle(ShopeeSellerScrape $shopee_seller_scrape)
|
||||
if (is_null($ai_writeup)) {
|
||||
$ai_output = OpenAI::writeProductArticle($excerpt, $photos);
|
||||
|
||||
//dd($ai_output);
|
||||
|
||||
if (is_null($ai_output)) {
|
||||
$e = new Exception('Failed to write: Missing ai_output');
|
||||
|
||||
@@ -140,31 +143,48 @@ private static function getTotalServiceCost($shopee_task)
|
||||
|
||||
private static function stripHtml(string $raw_html)
|
||||
{
|
||||
|
||||
$html_content = '';
|
||||
|
||||
try {
|
||||
|
||||
$r_configuration = new ReadabilityConfiguration();
|
||||
$r_configuration->setWordThreshold(20);
|
||||
|
||||
$readability = new Readability($r_configuration);
|
||||
|
||||
// try {
|
||||
// $readability->parse($raw_html);
|
||||
$readability->parse($raw_html);
|
||||
|
||||
// $html_content = $readability->getContent();
|
||||
$temp_html_content = $readability->getContent();
|
||||
|
||||
// // Remove tabs
|
||||
// $html_content = str_replace("\t", '', $html_content);
|
||||
// Remove tabs
|
||||
$temp_html_content = str_replace("\t", '', $temp_html_content);
|
||||
|
||||
// // Replace newlines with spaces
|
||||
// $html_content = str_replace(["\n", "\r\n"], ' ', $html_content);
|
||||
// Replace newlines with spaces
|
||||
$temp_html_content = str_replace(["\n", "\r\n"], ' ', $temp_html_content);
|
||||
|
||||
// // Replace multiple spaces with a single space
|
||||
// $html_content = preg_replace('/\s+/', ' ', $html_content);
|
||||
// Replace multiple spaces with a single space
|
||||
$temp_html_content = preg_replace('/\s+/', ' ', $temp_html_content);
|
||||
|
||||
// // Output the cleaned text
|
||||
// $html_content = trim($html_content); // Using trim to remove any leading or trailing spaces
|
||||
// Output the cleaned text
|
||||
$temp_html_content = trim($temp_html_content); // Using trim to remove any leading or trailing spaces
|
||||
|
||||
// $html_content = strip_tags($html_content);
|
||||
$temp_html_content = strip_tags($temp_html_content);
|
||||
|
||||
// } catch (ReadabilityParseException|Exception $e) {
|
||||
$crawler = new Crawler($raw_html);
|
||||
|
||||
// Extract meta title
|
||||
$title = $crawler->filter('title')->text(); // This assumes <title> tags are used for titles.
|
||||
|
||||
// Extract meta description
|
||||
$metaDescriptionNode = $crawler->filter('meta[name="description"]');
|
||||
$description = $metaDescriptionNode->count() > 0 ? $metaDescriptionNode->attr('content') : null;
|
||||
|
||||
$html_content .= $title.' ';
|
||||
$html_content .= $description.' ';
|
||||
$html_content .= $temp_html_content;
|
||||
|
||||
} catch (ReadabilityParseException|Exception $e) {
|
||||
|
||||
$html5 = new HTML5(['preserveWhiteSpace' => true]);
|
||||
|
||||
@@ -174,7 +194,7 @@ private static function stripHtml(string $raw_html)
|
||||
// Serialize the DOM tree back to a string, formatted.
|
||||
$html_content = strip_tags($html5->saveHTML($dom));
|
||||
|
||||
// }
|
||||
}
|
||||
|
||||
return $html_content;
|
||||
}
|
||||
|
||||
@@ -19,7 +19,9 @@ public static function handle($shopee_task)
|
||||
|
||||
$main_image_url = null;
|
||||
|
||||
$proxy_server = get_smartproxy_server();
|
||||
$unblocker_proxy_server = get_smartproxy_unblocker_server();
|
||||
$rotating_proxy_server = get_smartproxy_rotating_server();
|
||||
|
||||
$user_agent = config('platform.proxy.user_agent');
|
||||
|
||||
///////// PART 1
|
||||
@@ -36,7 +38,7 @@ public static function handle($shopee_task)
|
||||
$intervention_images = $shopee_task->product_task->intervention->intervention_images;
|
||||
} else {
|
||||
$images = self::getImages($shopee_task->product_task->response->raw_html);
|
||||
$images = self::filterImages($images, $proxy_server, $user_agent, $costs, $intervention_images);
|
||||
$images = self::filterImages($images, $rotating_proxy_server, $user_agent, $costs, $intervention_images);
|
||||
}
|
||||
|
||||
///////// PART 2
|
||||
@@ -54,7 +56,7 @@ public static function handle($shopee_task)
|
||||
$scraped_image = ShopeeSellerScrapedImage::where('original_name', pathinfo($main_image_url, PATHINFO_BASENAME))->where('shopee_seller_scrape_id', $shopee_task->shopee_seller_scrape->id)->first();
|
||||
|
||||
if (is_null($scraped_image)) {
|
||||
$main_image = self::getProductImage($shopee_task->product_task->response->jsonld, $proxy_server, $user_agent, $costs, $main_intervention_image);
|
||||
$main_image = self::getProductImage($shopee_task->product_task->response->jsonld, $rotating_proxy_server, $user_agent, $costs, $main_intervention_image);
|
||||
|
||||
$scraped_image = self::uploadAndSaveScrapedImage($shopee_task->shopee_seller_scrape, $main_intervention_image, true);
|
||||
}
|
||||
@@ -137,17 +139,19 @@ private static function getImages(string $raw_html)
|
||||
$crawler->filter('img')->each(function ($node) use (&$images) {
|
||||
$src = $node->attr('src');
|
||||
$alt = $node->attr('alt') ?? null; // Setting a default value if alt is not present
|
||||
|
||||
$blacklist_domain = [];
|
||||
|
||||
foreach ($blacklist_domain as $blacklist) {
|
||||
if (! str_contains($src, $blacklist)) {
|
||||
$images[] = [
|
||||
'src' => $src,
|
||||
'alt' => $alt,
|
||||
];
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// if (count($images) > 4)
|
||||
// {
|
||||
// return $images;
|
||||
// }
|
||||
|
||||
return $images;
|
||||
}
|
||||
|
||||
@@ -164,7 +168,7 @@ private static function filterImages(array $images, string $proxy, string $user_
|
||||
$src = $image['src'];
|
||||
|
||||
try {
|
||||
$response = Http::withOptions(['proxy' => $proxy])->withHeaders(['User-Agent' => $user_agent])->get($src);
|
||||
$response = Http::withOptions(['proxy' => $proxy, 'verify' => false])->withHeaders(['User-Agent' => $user_agent])->get($src);
|
||||
|
||||
// Check if the request was successful
|
||||
if (! $response->successful()) {
|
||||
@@ -274,7 +278,7 @@ private static function getProductImage(array $jsonLdData, string $proxy, string
|
||||
if (isset($data->{'@type'}) && $data->{'@type'} === 'Product') {
|
||||
if (isset($data->url) && isset($data->image)) {
|
||||
try {
|
||||
$response = Http::withOptions(['proxy' => $proxy])->withHeaders(['User-Agent' => $user_agent])->get($data->image);
|
||||
$response = Http::withOptions(['proxy' => $proxy, 'verify' => false])->withHeaders(['User-Agent' => $user_agent])->get($data->image);
|
||||
|
||||
// Check if the request was successful
|
||||
if ($response->successful()) {
|
||||
|
||||
@@ -41,7 +41,7 @@ public static function handle(string $seller, string $country_iso, Category $cat
|
||||
//dd($seller_shop_task);
|
||||
|
||||
if (isset($seller_shop_task->response->jsonld)) {
|
||||
$top_rank_products = self::getSortedData($seller_shop_task->response->jsonld, 100);
|
||||
$top_rank_products = self::getSortedData($seller_shop_task->response->jsonld, 400);
|
||||
|
||||
if (count($top_rank_products) > 0) {
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ public static function handle(string $url, $directory, $postfix = null, $strip_h
|
||||
{
|
||||
$slug = str_slug($url);
|
||||
|
||||
$cached_url = self::getGoogleCachedUrl($url, false);
|
||||
$cached_url = $url; // self::getGoogleCachedUrl($url, false);
|
||||
|
||||
$postfix = strval($postfix);
|
||||
|
||||
@@ -35,7 +35,8 @@ public static function handle(string $url, $directory, $postfix = null, $strip_h
|
||||
$main_intervention_image = null;
|
||||
$intervention_images = [];
|
||||
|
||||
$proxy_server = get_smartproxy_server();
|
||||
$unblocker_proxy_server = get_smartproxy_unblocker_server();
|
||||
$rotating_proxy_server = get_smartproxy_rotating_server();
|
||||
|
||||
try {
|
||||
$raw_html = OSSUploader::readFile($driver, $directory, $filename);
|
||||
@@ -51,26 +52,47 @@ public static function handle(string $url, $directory, $postfix = null, $strip_h
|
||||
if (is_null($raw_html)) {
|
||||
|
||||
try {
|
||||
$browsershot = new Browsershot();
|
||||
|
||||
$browsershot->setUrl($cached_url)
|
||||
->setOption('args', ['headless: "new"'])
|
||||
->noSandbox()
|
||||
->setOption('args', ['--disable-web-security'])
|
||||
->userAgent($user_agent)
|
||||
->ignoreHttpsErrors()
|
||||
->preventUnsuccessfulResponse()
|
||||
->timeout(10)
|
||||
//->setProxyServer($proxy_server)
|
||||
->userAgent($user_agent);
|
||||
$response = Http::withHeaders([
|
||||
'User-Agent' => $user_agent,
|
||||
])
|
||||
->withOptions([
|
||||
'proxy' => $unblocker_proxy_server,
|
||||
'timeout' => 1000,
|
||||
'verify' => false,
|
||||
])
|
||||
->get($cached_url);
|
||||
|
||||
if (app()->environment() == 'local') {
|
||||
$browsershot->setNodeBinary(config('platform.general.node_binary'))->setNpmBinary(config('platform.general.npm_binary'));
|
||||
if ($response->successful()) {
|
||||
$raw_html = $response->body();
|
||||
// ... your logic here ...
|
||||
} else {
|
||||
$raw_html = null;
|
||||
$status_code = -3;
|
||||
//throw new Exception('Http response failed');
|
||||
$response->throw();
|
||||
}
|
||||
|
||||
//dump($browsershot);
|
||||
// $browsershot = new Browsershot();
|
||||
|
||||
$raw_html = $browsershot->bodyHtml();
|
||||
// $browsershot->setUrl($cached_url)
|
||||
// ->setOption('args', ['headless: "new"'])
|
||||
// ->noSandbox()
|
||||
// ->setOption('args', ['--disable-web-security'])
|
||||
// ->userAgent($user_agent)
|
||||
// ->ignoreHttpsErrors()
|
||||
// ->preventUnsuccessfulResponse()
|
||||
// ->timeout(10)
|
||||
// ->setProxyServer($proxy_server)
|
||||
// ->userAgent($user_agent);
|
||||
|
||||
// if (app()->environment() == 'local') {
|
||||
// $browsershot->setNodeBinary(config('platform.general.node_binary'))->setNpmBinary(config('platform.general.npm_binary'));
|
||||
// }
|
||||
|
||||
// //dump($browsershot);
|
||||
|
||||
// $raw_html = $browsershot->bodyHtml();
|
||||
|
||||
// $sizeInKb = strlen($raw_html) / 1024; // Convert bytes to kilobytes
|
||||
// $browsershot_cost = round(calculate_smartproxy_cost($sizeInKb)) ;
|
||||
@@ -97,12 +119,12 @@ public static function handle(string $url, $directory, $postfix = null, $strip_h
|
||||
|
||||
if ($parse_images) {
|
||||
$images = self::getImages($raw_html);
|
||||
$images = self::filterImages($images, $proxy_server, $user_agent, $costs, $intervention_images);
|
||||
$images = self::filterImages($images, $rotating_proxy_server, $user_agent, $costs, $intervention_images);
|
||||
} else {
|
||||
$images = [];
|
||||
}
|
||||
|
||||
$main_image = self::getProductImage($jsonld, $proxy_server, $user_agent, $costs, $main_intervention_image);
|
||||
$main_image = self::getProductImage($jsonld, $rotating_proxy_server, $user_agent, $costs, $main_intervention_image);
|
||||
|
||||
return (object) [
|
||||
'intervention' => (object) compact('main_intervention_image', 'intervention_images'),
|
||||
@@ -169,16 +191,20 @@ private static function getImages(string $raw_html)
|
||||
$crawler->filter('img')->each(function ($node) use (&$images) {
|
||||
$src = $node->attr('src');
|
||||
$alt = $node->attr('alt') ?? null; // Setting a default value if alt is not present
|
||||
|
||||
$blacklist_domain = [];
|
||||
|
||||
foreach ($blacklist_domain as $blacklist) {
|
||||
if (! str_contains($src, $blacklist)) {
|
||||
$images[] = [
|
||||
'src' => $src,
|
||||
'alt' => $alt,
|
||||
];
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// if (count($images) > 4)
|
||||
// {
|
||||
// return $images;
|
||||
// }
|
||||
//dd($images);
|
||||
|
||||
return $images;
|
||||
}
|
||||
@@ -196,7 +222,8 @@ private static function filterImages(array $images, string $proxy, string $user_
|
||||
$src = $image['src'];
|
||||
|
||||
try {
|
||||
$response = Http::withOptions(['proxy' => $proxy])->withHeaders(['User-Agent' => $user_agent])->get($src);
|
||||
|
||||
$response = Http::withOptions(['proxy' => $proxy, 'verify' => false])->withHeaders(['User-Agent' => $user_agent])->get($src);
|
||||
|
||||
// Check if the request was successful
|
||||
if (! $response->successful()) {
|
||||
@@ -216,7 +243,7 @@ private static function filterImages(array $images, string $proxy, string $user_
|
||||
$sizeKb = round(strlen($imageData) / 1024, 2);
|
||||
|
||||
// Check constraints
|
||||
if ($width < 800 || $height < 800 || $sizeKb < 100 || $mime !== 'image/jpeg') {
|
||||
if ($width < 800 || $height < 800 || $sizeKb < 100) {
|
||||
continue;
|
||||
}
|
||||
$image['width'] = $width;
|
||||
@@ -268,6 +295,7 @@ private static function filterImages(array $images, string $proxy, string $user_
|
||||
$colorCounts[] = $image['color_counts'];
|
||||
}
|
||||
|
||||
if (! empty($colorCounts)) {
|
||||
// Compute the median of the color counts
|
||||
sort($colorCounts);
|
||||
$count = count($colorCounts);
|
||||
@@ -279,6 +307,10 @@ private static function filterImages(array $images, string $proxy, string $user_
|
||||
$filteredImages = array_filter($filteredImages, function ($image) use ($threshold) {
|
||||
return $image['color_counts'] > $threshold;
|
||||
});
|
||||
} else {
|
||||
// No images found
|
||||
$filteredImages = []; // Clear the array or take any other appropriate action
|
||||
}
|
||||
|
||||
usort($filteredImages, function ($a, $b) {
|
||||
return $b['sizeKb'] <=> $a['sizeKb']; // Using the spaceship operator to sort in descending order
|
||||
@@ -307,7 +339,7 @@ private static function getProductImage(array $jsonLdData, string $proxy, string
|
||||
if (isset($data->{'@type'}) && $data->{'@type'} === 'Product') {
|
||||
if (isset($data->url) && isset($data->image)) {
|
||||
try {
|
||||
$response = Http::withOptions(['proxy' => $proxy])->withHeaders(['User-Agent' => $user_agent])->get($data->image);
|
||||
$response = Http::withOptions(['proxy' => $proxy, 'verify' => false])->withHeaders(['User-Agent' => $user_agent])->get($data->image);
|
||||
|
||||
// Check if the request was successful
|
||||
if ($response->successful()) {
|
||||
|
||||
@@ -11,7 +11,15 @@
|
||||
'server' => 'gate.smartproxy.com:7000',
|
||||
'reproxy' => '157.230.194.206:7000',
|
||||
'reproxy_enable' => false,
|
||||
'cost_per_gb' => 7.00,
|
||||
'cost_per_gb' => 7,
|
||||
],
|
||||
'unblocker' => [
|
||||
'user' => 'U0000123412',
|
||||
'password' => 'P$W1bda906aee53c2022d94e22ff1a1142a1',
|
||||
'server' => 'unblock.smartproxy.com:60000',
|
||||
'reproxy' => '157.230.194.206:7000',
|
||||
'reproxy_enable' => false,
|
||||
'cost_per_gb' => 20.14,
|
||||
],
|
||||
],
|
||||
];
|
||||
|
||||
Reference in New Issue
Block a user