Add (scraper)

Update (ai): integrate scraper
This commit is contained in:
2023-09-25 19:39:13 +08:00
parent b6f36e3ccd
commit c53918d03b
7 changed files with 262 additions and 45 deletions

View File

@@ -14,43 +14,52 @@ class GenerateArticleTask
public static function handle(SerpUrl $serp_url)
{
$ai_titles = OpenAI::suggestArticleTitles($serp_url->title, $serp_url->description, 1);
// $ai_titles = OpenAI::suggestArticleTitles($serp_url->title, $serp_url->description, 1);
if (is_null($ai_titles)) {
return self::saveAndReturnSerpProcessStatus($serp_url, -2);
// if (is_null($ai_titles)) {
// return self::saveAndReturnSerpProcessStatus($serp_url, -2);
// }
// $suggestion = null;
// // dump($ai_titles);
// try {
// $random_key = array_rand($ai_titles?->suggestions, 1);
// $suggestion = $ai_titles->suggestions[$random_key];
// } catch (Exception $e) {
// return self::saveAndReturnSerpProcessStatus($serp_url, -1);
// }
// if (is_null($suggestion)) {
// return self::saveAndReturnSerpProcessStatus($serp_url, -3);
// }
$ai_suggestion = OpenAI::createNewArticleTitle($serp_url->title, $serp_url->description);
$readability_content = ScrapeUrlBodyTask::handle($serp_url->url);
if (is_null($readability_content))
{
return self::saveAndReturnSerpProcessStatus($serp_url, -7);
}
$suggestion = null;
// dump($ai_titles);
try {
$random_key = array_rand($ai_titles?->suggestions, 1);
$suggestion = $ai_titles->suggestions[$random_key];
} catch (Exception $e) {
return self::saveAndReturnSerpProcessStatus($serp_url, -1);
}
if (is_null($suggestion)) {
return self::saveAndReturnSerpProcessStatus($serp_url, -3);
}
$markdown = OpenAI::writeArticle($suggestion->title, $suggestion->description, $suggestion->article_type, 500, 800);
$markdown = OpenAI::writeArticle($ai_suggestion->title, $readability_content, $ai_suggestion->article_type ,500, 800);
if (is_empty($markdown)) {
return self::saveAndReturnSerpProcessStatus($serp_url, -4);
}
$post = new Post;
$post->title = $suggestion->title;
$post->type = $suggestion->article_type;
$post->short_title = $suggestion->short_title;
$post->main_keyword = $ai_titles->main_keyword;
$post->keywords = $suggestion->photo_keywords;
$post->slug = str_slug($suggestion->title);
$post->excerpt = $suggestion->description;
$post->title = $ai_suggestion->title;
$post->type = $ai_suggestion->article_type;
$post->short_title = $ai_suggestion->short_title;
$post->main_keyword = $ai_suggestion->main_keyword;
$post->keywords = $ai_suggestion->photo_keywords;
$post->slug = str_slug($ai_suggestion->title);
$post->excerpt = $ai_suggestion->description;
$post->author_id = Author::find(1)->id;
$post->featured = false;
$post->featured_image = null;

View File

@@ -0,0 +1,73 @@
<?php
namespace App\Jobs\Tasks;
use App\Helpers\FirstParty\OSSUploader\OSSUploader;
use \Illuminate\Support\Facades\Http;
use Carbon\Carbon;
use Storage;
use Exception;
use andreskrey\Readability\Readability;
use andreskrey\Readability\Configuration;
use andreskrey\Readability\ParseException;
class ScrapeUrlBodyTask
{
public static function handle(string $url)
{
$slug = str_slug($url);
$disk_url = '/scraped/' . $slug . '.html';
$html_content = null;
try {
$html_content = OSSUploader::readFile('r2','/scraped/',$slug.'.html');
if (is_null($disk_url))
{
throw Exception('Not stored.');
}
}
catch (Exception $e) {
$html_content = null;
}
if (is_null($html_content))
{
$proxy = 'gate.smartproxy.com:10000';
$user = 'sp5bbkzj7e';
$psw = 'yTtk2cc5kg23kIkSSr';
$response = Http::withOptions([
'proxy' => "http://$user:$psw@$proxy",
])->get($url);
if ($response->successful()) {
$html_content = $response->body();
OSSUploader::uploadFile('r2','/scraped/',$slug.'.html', $html_content);
}
}
//dump("Initial: " . strlen($html_content));
$readability = new Readability(new Configuration());
try {
$readability->parse($html_content);
$html_content = strip_tags($readability->getContent());
//dd($readability);
} catch (ParseException $e) {
}
//dump("After: " . strlen($html_content));
return $html_content;
}
}