From e3fe8bf8778c8408bbae0f6f2aba8303ed3b24c8 Mon Sep 17 00:00:00 2001 From: Charles Teh Date: Mon, 25 Sep 2023 19:54:51 +0800 Subject: [PATCH] Update (post): only show table of contents if there are at least 3 toc items --- .../FirstParty/OSSUploader/OSSUploader.php | 3 +- app/Helpers/FirstParty/OpenAI/OpenAI.php | 1 - .../Controllers/Front/FrontPostController.php | 10 +- app/Jobs/Tasks/GenerateArticleTask.php | 7 +- app/Jobs/Tasks/ScrapeUrlBodyTask.php | 111 ++++++++---------- config/htmlminify.php | 2 +- config/seotools.php | 1 - routes/tests.php | 41 +++---- 8 files changed, 82 insertions(+), 94 deletions(-) diff --git a/app/Helpers/FirstParty/OSSUploader/OSSUploader.php b/app/Helpers/FirstParty/OSSUploader/OSSUploader.php index e13b576..d8a2183 100644 --- a/app/Helpers/FirstParty/OSSUploader/OSSUploader.php +++ b/app/Helpers/FirstParty/OSSUploader/OSSUploader.php @@ -30,7 +30,7 @@ public static function readFile($storage_driver, $relative_directory, $filename) $filepath = rtrim($relative_directory, '/').'/'.$filename; try { - return Storage::disk($storage_driver)->get($filepath); + return Storage::disk($storage_driver)->get($filepath); } catch (\Exception $e) { return null; @@ -39,7 +39,6 @@ public static function readFile($storage_driver, $relative_directory, $filename) return null; } - public static function uploadJson($storage_driver, $relative_directory, $filename, $jsonData) { $jsonString = json_encode($jsonData, JSON_PRETTY_PRINT); diff --git a/app/Helpers/FirstParty/OpenAI/OpenAI.php b/app/Helpers/FirstParty/OpenAI/OpenAI.php index 77a2bd4..756fbe7 100644 --- a/app/Helpers/FirstParty/OpenAI/OpenAI.php +++ b/app/Helpers/FirstParty/OpenAI/OpenAI.php @@ -57,7 +57,6 @@ public static function createNewArticleTitle($current_title, $supporting_data) return in following json format {\"main_keyword\":\"(Main Keyword)\",\"title\":\"(Title in 90-130 letters)\",\"short_title\":\"(Short Title in 30-40 letters)\",\"article_type\":\"(How-tos|Guides|Interview|Review|Commentary|Feature|News|Editorial|Report|Research|Case-study|Overview|Tutorial|Update|Spotlight|Insights)\",\"description\":\"(SEO description based on main keyword)\",\"photo_keywords\":[\"photo keyword 1\",\"photo keyword 2\"]}"; - $user_prompt = "Article Title: {$current_title}\n Article Description: {$supporting_data}\n"; $reply = self::chatCompletion($system_prompt, $user_prompt, 'gpt-3.5-turbo'); diff --git a/app/Http/Controllers/Front/FrontPostController.php b/app/Http/Controllers/Front/FrontPostController.php index 09d4fc6..431b93a 100644 --- a/app/Http/Controllers/Front/FrontPostController.php +++ b/app/Http/Controllers/Front/FrontPostController.php @@ -141,9 +141,16 @@ private function injectTableOfContents($html) { $crawler = new Crawler($html); + $h2Elements = $crawler->filter('h2'); + + if ($h2Elements->count() < 3) { + // Return the original HTML if there are fewer than 3 h2 tags + return $html; + } + // Create the Table of Contents $toc = '
    '; - $crawler->filter('h2')->each(function (Crawler $node, $i) use (&$toc) { + $h2Elements->each(function (Crawler $node, $i) use (&$toc) { $content = $node->text(); $id = 'link-'.$i; // Creating a simple id based on the index $node->getNode(0)->setAttribute('id', $id); // Set the id to the h2 tag @@ -162,7 +169,6 @@ private function injectTableOfContents($html) $updatedHtml = $crawler->filter('body')->html(); return $updatedHtml; - } private function injectFeaturedImage($post, $content) diff --git a/app/Jobs/Tasks/GenerateArticleTask.php b/app/Jobs/Tasks/GenerateArticleTask.php index a360cd9..555ae2a 100644 --- a/app/Jobs/Tasks/GenerateArticleTask.php +++ b/app/Jobs/Tasks/GenerateArticleTask.php @@ -41,12 +41,11 @@ public static function handle(SerpUrl $serp_url) $readability_content = ScrapeUrlBodyTask::handle($serp_url->url); - if (is_null($readability_content)) - { - return self::saveAndReturnSerpProcessStatus($serp_url, -7); + if (is_null($readability_content)) { + return self::saveAndReturnSerpProcessStatus($serp_url, -7); } - $markdown = OpenAI::writeArticle($ai_suggestion->title, $readability_content, $ai_suggestion->article_type ,500, 800); + $markdown = OpenAI::writeArticle($ai_suggestion->title, $readability_content, $ai_suggestion->article_type, 500, 800); if (is_empty($markdown)) { return self::saveAndReturnSerpProcessStatus($serp_url, -4); diff --git a/app/Jobs/Tasks/ScrapeUrlBodyTask.php b/app/Jobs/Tasks/ScrapeUrlBodyTask.php index 3842ce3..0677438 100644 --- a/app/Jobs/Tasks/ScrapeUrlBodyTask.php +++ b/app/Jobs/Tasks/ScrapeUrlBodyTask.php @@ -2,72 +2,65 @@ namespace App\Jobs\Tasks; -use App\Helpers\FirstParty\OSSUploader\OSSUploader; -use \Illuminate\Support\Facades\Http; -use Carbon\Carbon; -use Storage; -use Exception; - -use andreskrey\Readability\Readability; use andreskrey\Readability\Configuration; use andreskrey\Readability\ParseException; +use andreskrey\Readability\Readability; +use App\Helpers\FirstParty\OSSUploader\OSSUploader; +use Exception; +use Illuminate\Support\Facades\Http; class ScrapeUrlBodyTask { - public static function handle(string $url) - { - $slug = str_slug($url); - - $disk_url = '/scraped/' . $slug . '.html'; - - $html_content = null; - - try { - $html_content = OSSUploader::readFile('r2','/scraped/',$slug.'.html'); - - if (is_null($disk_url)) - { - throw Exception('Not stored.'); - } - } - catch (Exception $e) { - $html_content = null; - } - - if (is_null($html_content)) + public static function handle(string $url) { - $proxy = 'gate.smartproxy.com:10000'; - $user = 'sp5bbkzj7e'; - $psw = 'yTtk2cc5kg23kIkSSr'; + $slug = str_slug($url); - $response = Http::withOptions([ - 'proxy' => "http://$user:$psw@$proxy", - ])->get($url); + $disk_url = '/scraped/'.$slug.'.html'; - if ($response->successful()) { - $html_content = $response->body(); + $html_content = null; + + try { + $html_content = OSSUploader::readFile('r2', '/scraped/', $slug.'.html'); + + if (is_null($disk_url)) { + throw Exception('Not stored.'); + } + } catch (Exception $e) { + $html_content = null; + } + + if (is_null($html_content)) { + $proxy = 'gate.smartproxy.com:10000'; + $user = 'sp5bbkzj7e'; + $psw = 'yTtk2cc5kg23kIkSSr'; + + $response = Http::withOptions([ + 'proxy' => "http://$user:$psw@$proxy", + ])->get($url); + + if ($response->successful()) { + $html_content = $response->body(); + + OSSUploader::uploadFile('r2', '/scraped/', $slug.'.html', $html_content); + } + } + + //dump("Initial: " . strlen($html_content)); + + $readability = new Readability(new Configuration()); + + try { + $readability->parse($html_content); + + $html_content = strip_tags($readability->getContent()); + //dd($readability); + } catch (ParseException $e) { + + } + + //dump("After: " . strlen($html_content)); + + return $html_content; - OSSUploader::uploadFile('r2','/scraped/',$slug.'.html', $html_content); - } } - - //dump("Initial: " . strlen($html_content)); - - $readability = new Readability(new Configuration()); - - - try { - $readability->parse($html_content); - - $html_content = strip_tags($readability->getContent()); - //dd($readability); - } catch (ParseException $e) { - - } - - //dump("After: " . strlen($html_content)); - - return $html_content; - - } -} \ No newline at end of file +} diff --git a/config/htmlminify.php b/config/htmlminify.php index 973960b..3282764 100644 --- a/config/htmlminify.php +++ b/config/htmlminify.php @@ -15,5 +15,5 @@ // exclude route name for exclude from minify 'exclude_route' => [ // 'routeName' - ] + ], ]; diff --git a/config/seotools.php b/config/seotools.php index 113c5ca..66d971e 100644 --- a/config/seotools.php +++ b/config/seotools.php @@ -5,7 +5,6 @@ return [ - 'fb_app_id' => '1259730771382460', 'meta' => [ diff --git a/routes/tests.php b/routes/tests.php index 239a18e..f858a2d 100644 --- a/routes/tests.php +++ b/routes/tests.php @@ -33,27 +33,24 @@ Route::get('/step-2', function (Request $request) { $news_serp_result = NewsSerpResult::find($request->input('id', null)); - if (is_null($news_serp_result)) - { - abort(404); + if (is_null($news_serp_result)) { + abort(404); } $task = ParseNewsSerpDomainsTask::handle($news_serp_result); - if ($task) - { - $serp_url = SerpUrl::latest()->first(); + if ($task) { + $serp_url = SerpUrl::latest()->first(); - dd($serp_url->id); + dd($serp_url->id); } }); Route::get('/step-3', function (Request $request) { $serp_url = SerpUrl::find($request->input('id', null)); - if (is_null($serp_url)) - { - abort(404); + if (is_null($serp_url)) { + abort(404); } $task = GenerateArticleJob::dispatch($serp_url)->onQueue('default')->onConnection('default'); @@ -70,18 +67,16 @@ }); Route::get('/step-5', function (Request $request) { - $post = Post::find($request->input('id')); + $post = Post::find($request->input('id')); - if (is_null($post)) - { - return abort(404); - } + if (is_null($post)) { + return abort(404); + } - $post->published_at = now(); - dd($post->save()); + $post->published_at = now(); + dd($post->save()); }); - // Route::get('/suggest_titles', function () { // $results = OpenAI::suggestArticleTitles("It's 2019s Electric: How Fisker Is Reinventing The Automotive Industry And \nExpanding Its Business", "Fisker's approach to building electric vehicles is deeply intertwined with \nits overall business philosophy: use less, use better,...s", 1); // dd($results); @@ -92,14 +87,12 @@ // dd($results); // }); +Route::get('proxy_test', function () { + $url = 'https://www.cnbc.com/2023/09/24/this-southern-city-is-the-no-1-place-to-start-your-own-business.html'; + $task = ScrapeUrlBodyTask::handle($url); -Route::get('proxy_test', function() { - $url = 'https://www.cnbc.com/2023/09/24/this-southern-city-is-the-no-1-place-to-start-your-own-business.html'; - - $task = ScrapeUrlBodyTask::handle($url); - - dd($task); + dd($task); }); // Route::get('/image_gen', function() {