Update (post): only show table of contents if there are at least 3 toc items
This commit is contained in:
@@ -41,12 +41,11 @@ public static function handle(SerpUrl $serp_url)
|
||||
|
||||
$readability_content = ScrapeUrlBodyTask::handle($serp_url->url);
|
||||
|
||||
if (is_null($readability_content))
|
||||
{
|
||||
return self::saveAndReturnSerpProcessStatus($serp_url, -7);
|
||||
if (is_null($readability_content)) {
|
||||
return self::saveAndReturnSerpProcessStatus($serp_url, -7);
|
||||
}
|
||||
|
||||
$markdown = OpenAI::writeArticle($ai_suggestion->title, $readability_content, $ai_suggestion->article_type ,500, 800);
|
||||
$markdown = OpenAI::writeArticle($ai_suggestion->title, $readability_content, $ai_suggestion->article_type, 500, 800);
|
||||
|
||||
if (is_empty($markdown)) {
|
||||
return self::saveAndReturnSerpProcessStatus($serp_url, -4);
|
||||
|
||||
@@ -2,72 +2,65 @@
|
||||
|
||||
namespace App\Jobs\Tasks;
|
||||
|
||||
use App\Helpers\FirstParty\OSSUploader\OSSUploader;
|
||||
use \Illuminate\Support\Facades\Http;
|
||||
use Carbon\Carbon;
|
||||
use Storage;
|
||||
use Exception;
|
||||
|
||||
use andreskrey\Readability\Readability;
|
||||
use andreskrey\Readability\Configuration;
|
||||
use andreskrey\Readability\ParseException;
|
||||
use andreskrey\Readability\Readability;
|
||||
use App\Helpers\FirstParty\OSSUploader\OSSUploader;
|
||||
use Exception;
|
||||
use Illuminate\Support\Facades\Http;
|
||||
|
||||
class ScrapeUrlBodyTask
|
||||
{
|
||||
public static function handle(string $url)
|
||||
{
|
||||
$slug = str_slug($url);
|
||||
|
||||
$disk_url = '/scraped/' . $slug . '.html';
|
||||
|
||||
$html_content = null;
|
||||
|
||||
try {
|
||||
$html_content = OSSUploader::readFile('r2','/scraped/',$slug.'.html');
|
||||
|
||||
if (is_null($disk_url))
|
||||
{
|
||||
throw Exception('Not stored.');
|
||||
}
|
||||
}
|
||||
catch (Exception $e) {
|
||||
$html_content = null;
|
||||
}
|
||||
|
||||
if (is_null($html_content))
|
||||
public static function handle(string $url)
|
||||
{
|
||||
$proxy = 'gate.smartproxy.com:10000';
|
||||
$user = 'sp5bbkzj7e';
|
||||
$psw = 'yTtk2cc5kg23kIkSSr';
|
||||
$slug = str_slug($url);
|
||||
|
||||
$response = Http::withOptions([
|
||||
'proxy' => "http://$user:$psw@$proxy",
|
||||
])->get($url);
|
||||
$disk_url = '/scraped/'.$slug.'.html';
|
||||
|
||||
if ($response->successful()) {
|
||||
$html_content = $response->body();
|
||||
$html_content = null;
|
||||
|
||||
try {
|
||||
$html_content = OSSUploader::readFile('r2', '/scraped/', $slug.'.html');
|
||||
|
||||
if (is_null($disk_url)) {
|
||||
throw Exception('Not stored.');
|
||||
}
|
||||
} catch (Exception $e) {
|
||||
$html_content = null;
|
||||
}
|
||||
|
||||
if (is_null($html_content)) {
|
||||
$proxy = 'gate.smartproxy.com:10000';
|
||||
$user = 'sp5bbkzj7e';
|
||||
$psw = 'yTtk2cc5kg23kIkSSr';
|
||||
|
||||
$response = Http::withOptions([
|
||||
'proxy' => "http://$user:$psw@$proxy",
|
||||
])->get($url);
|
||||
|
||||
if ($response->successful()) {
|
||||
$html_content = $response->body();
|
||||
|
||||
OSSUploader::uploadFile('r2', '/scraped/', $slug.'.html', $html_content);
|
||||
}
|
||||
}
|
||||
|
||||
//dump("Initial: " . strlen($html_content));
|
||||
|
||||
$readability = new Readability(new Configuration());
|
||||
|
||||
try {
|
||||
$readability->parse($html_content);
|
||||
|
||||
$html_content = strip_tags($readability->getContent());
|
||||
//dd($readability);
|
||||
} catch (ParseException $e) {
|
||||
|
||||
}
|
||||
|
||||
//dump("After: " . strlen($html_content));
|
||||
|
||||
return $html_content;
|
||||
|
||||
OSSUploader::uploadFile('r2','/scraped/',$slug.'.html', $html_content);
|
||||
}
|
||||
}
|
||||
|
||||
//dump("Initial: " . strlen($html_content));
|
||||
|
||||
$readability = new Readability(new Configuration());
|
||||
|
||||
|
||||
try {
|
||||
$readability->parse($html_content);
|
||||
|
||||
$html_content = strip_tags($readability->getContent());
|
||||
//dd($readability);
|
||||
} catch (ParseException $e) {
|
||||
|
||||
}
|
||||
|
||||
//dump("After: " . strlen($html_content));
|
||||
|
||||
return $html_content;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user