diff --git a/app/Helpers/FirstParty/OSSUploader/OSSUploader.php b/app/Helpers/FirstParty/OSSUploader/OSSUploader.php
index e13b576..d8a2183 100644
--- a/app/Helpers/FirstParty/OSSUploader/OSSUploader.php
+++ b/app/Helpers/FirstParty/OSSUploader/OSSUploader.php
@@ -30,7 +30,7 @@ public static function readFile($storage_driver, $relative_directory, $filename)
$filepath = rtrim($relative_directory, '/').'/'.$filename;
try {
- return Storage::disk($storage_driver)->get($filepath);
+ return Storage::disk($storage_driver)->get($filepath);
} catch (\Exception $e) {
return null;
@@ -39,7 +39,6 @@ public static function readFile($storage_driver, $relative_directory, $filename)
return null;
}
-
public static function uploadJson($storage_driver, $relative_directory, $filename, $jsonData)
{
$jsonString = json_encode($jsonData, JSON_PRETTY_PRINT);
diff --git a/app/Helpers/FirstParty/OpenAI/OpenAI.php b/app/Helpers/FirstParty/OpenAI/OpenAI.php
index 77a2bd4..756fbe7 100644
--- a/app/Helpers/FirstParty/OpenAI/OpenAI.php
+++ b/app/Helpers/FirstParty/OpenAI/OpenAI.php
@@ -57,7 +57,6 @@ public static function createNewArticleTitle($current_title, $supporting_data)
return in following json format {\"main_keyword\":\"(Main Keyword)\",\"title\":\"(Title in 90-130 letters)\",\"short_title\":\"(Short Title in 30-40 letters)\",\"article_type\":\"(How-tos|Guides|Interview|Review|Commentary|Feature|News|Editorial|Report|Research|Case-study|Overview|Tutorial|Update|Spotlight|Insights)\",\"description\":\"(SEO description based on main keyword)\",\"photo_keywords\":[\"photo keyword 1\",\"photo keyword 2\"]}";
-
$user_prompt = "Article Title: {$current_title}\n Article Description: {$supporting_data}\n";
$reply = self::chatCompletion($system_prompt, $user_prompt, 'gpt-3.5-turbo');
diff --git a/app/Http/Controllers/Front/FrontPostController.php b/app/Http/Controllers/Front/FrontPostController.php
index 09d4fc6..431b93a 100644
--- a/app/Http/Controllers/Front/FrontPostController.php
+++ b/app/Http/Controllers/Front/FrontPostController.php
@@ -141,9 +141,16 @@ private function injectTableOfContents($html)
{
$crawler = new Crawler($html);
+ $h2Elements = $crawler->filter('h2');
+
+ if ($h2Elements->count() < 3) {
+ // Return the original HTML if there are fewer than 3 h2 tags
+ return $html;
+ }
+
// Create the Table of Contents
$toc = '
';
- $crawler->filter('h2')->each(function (Crawler $node, $i) use (&$toc) {
+ $h2Elements->each(function (Crawler $node, $i) use (&$toc) {
$content = $node->text();
$id = 'link-'.$i; // Creating a simple id based on the index
$node->getNode(0)->setAttribute('id', $id); // Set the id to the h2 tag
@@ -162,7 +169,6 @@ private function injectTableOfContents($html)
$updatedHtml = $crawler->filter('body')->html();
return $updatedHtml;
-
}
private function injectFeaturedImage($post, $content)
diff --git a/app/Jobs/Tasks/GenerateArticleTask.php b/app/Jobs/Tasks/GenerateArticleTask.php
index a360cd9..555ae2a 100644
--- a/app/Jobs/Tasks/GenerateArticleTask.php
+++ b/app/Jobs/Tasks/GenerateArticleTask.php
@@ -41,12 +41,11 @@ public static function handle(SerpUrl $serp_url)
$readability_content = ScrapeUrlBodyTask::handle($serp_url->url);
- if (is_null($readability_content))
- {
- return self::saveAndReturnSerpProcessStatus($serp_url, -7);
+ if (is_null($readability_content)) {
+ return self::saveAndReturnSerpProcessStatus($serp_url, -7);
}
- $markdown = OpenAI::writeArticle($ai_suggestion->title, $readability_content, $ai_suggestion->article_type ,500, 800);
+ $markdown = OpenAI::writeArticle($ai_suggestion->title, $readability_content, $ai_suggestion->article_type, 500, 800);
if (is_empty($markdown)) {
return self::saveAndReturnSerpProcessStatus($serp_url, -4);
diff --git a/app/Jobs/Tasks/ScrapeUrlBodyTask.php b/app/Jobs/Tasks/ScrapeUrlBodyTask.php
index 3842ce3..0677438 100644
--- a/app/Jobs/Tasks/ScrapeUrlBodyTask.php
+++ b/app/Jobs/Tasks/ScrapeUrlBodyTask.php
@@ -2,72 +2,65 @@
namespace App\Jobs\Tasks;
-use App\Helpers\FirstParty\OSSUploader\OSSUploader;
-use \Illuminate\Support\Facades\Http;
-use Carbon\Carbon;
-use Storage;
-use Exception;
-
-use andreskrey\Readability\Readability;
use andreskrey\Readability\Configuration;
use andreskrey\Readability\ParseException;
+use andreskrey\Readability\Readability;
+use App\Helpers\FirstParty\OSSUploader\OSSUploader;
+use Exception;
+use Illuminate\Support\Facades\Http;
class ScrapeUrlBodyTask
{
- public static function handle(string $url)
- {
- $slug = str_slug($url);
-
- $disk_url = '/scraped/' . $slug . '.html';
-
- $html_content = null;
-
- try {
- $html_content = OSSUploader::readFile('r2','/scraped/',$slug.'.html');
-
- if (is_null($disk_url))
- {
- throw Exception('Not stored.');
- }
- }
- catch (Exception $e) {
- $html_content = null;
- }
-
- if (is_null($html_content))
+ public static function handle(string $url)
{
- $proxy = 'gate.smartproxy.com:10000';
- $user = 'sp5bbkzj7e';
- $psw = 'yTtk2cc5kg23kIkSSr';
+ $slug = str_slug($url);
- $response = Http::withOptions([
- 'proxy' => "http://$user:$psw@$proxy",
- ])->get($url);
+ $disk_url = '/scraped/'.$slug.'.html';
- if ($response->successful()) {
- $html_content = $response->body();
+ $html_content = null;
+
+ try {
+ $html_content = OSSUploader::readFile('r2', '/scraped/', $slug.'.html');
+
+ if (is_null($disk_url)) {
+ throw Exception('Not stored.');
+ }
+ } catch (Exception $e) {
+ $html_content = null;
+ }
+
+ if (is_null($html_content)) {
+ $proxy = 'gate.smartproxy.com:10000';
+ $user = 'sp5bbkzj7e';
+ $psw = 'yTtk2cc5kg23kIkSSr';
+
+ $response = Http::withOptions([
+ 'proxy' => "http://$user:$psw@$proxy",
+ ])->get($url);
+
+ if ($response->successful()) {
+ $html_content = $response->body();
+
+ OSSUploader::uploadFile('r2', '/scraped/', $slug.'.html', $html_content);
+ }
+ }
+
+ //dump("Initial: " . strlen($html_content));
+
+ $readability = new Readability(new Configuration());
+
+ try {
+ $readability->parse($html_content);
+
+ $html_content = strip_tags($readability->getContent());
+ //dd($readability);
+ } catch (ParseException $e) {
+
+ }
+
+ //dump("After: " . strlen($html_content));
+
+ return $html_content;
- OSSUploader::uploadFile('r2','/scraped/',$slug.'.html', $html_content);
- }
}
-
- //dump("Initial: " . strlen($html_content));
-
- $readability = new Readability(new Configuration());
-
-
- try {
- $readability->parse($html_content);
-
- $html_content = strip_tags($readability->getContent());
- //dd($readability);
- } catch (ParseException $e) {
-
- }
-
- //dump("After: " . strlen($html_content));
-
- return $html_content;
-
- }
-}
\ No newline at end of file
+}
diff --git a/config/htmlminify.php b/config/htmlminify.php
index 973960b..3282764 100644
--- a/config/htmlminify.php
+++ b/config/htmlminify.php
@@ -15,5 +15,5 @@
// exclude route name for exclude from minify
'exclude_route' => [
// 'routeName'
- ]
+ ],
];
diff --git a/config/seotools.php b/config/seotools.php
index 113c5ca..66d971e 100644
--- a/config/seotools.php
+++ b/config/seotools.php
@@ -5,7 +5,6 @@
return [
-
'fb_app_id' => '1259730771382460',
'meta' => [
diff --git a/routes/tests.php b/routes/tests.php
index 239a18e..f858a2d 100644
--- a/routes/tests.php
+++ b/routes/tests.php
@@ -33,27 +33,24 @@
Route::get('/step-2', function (Request $request) {
$news_serp_result = NewsSerpResult::find($request->input('id', null));
- if (is_null($news_serp_result))
- {
- abort(404);
+ if (is_null($news_serp_result)) {
+ abort(404);
}
$task = ParseNewsSerpDomainsTask::handle($news_serp_result);
- if ($task)
- {
- $serp_url = SerpUrl::latest()->first();
+ if ($task) {
+ $serp_url = SerpUrl::latest()->first();
- dd($serp_url->id);
+ dd($serp_url->id);
}
});
Route::get('/step-3', function (Request $request) {
$serp_url = SerpUrl::find($request->input('id', null));
- if (is_null($serp_url))
- {
- abort(404);
+ if (is_null($serp_url)) {
+ abort(404);
}
$task = GenerateArticleJob::dispatch($serp_url)->onQueue('default')->onConnection('default');
@@ -70,18 +67,16 @@
});
Route::get('/step-5', function (Request $request) {
- $post = Post::find($request->input('id'));
+ $post = Post::find($request->input('id'));
- if (is_null($post))
- {
- return abort(404);
- }
+ if (is_null($post)) {
+ return abort(404);
+ }
- $post->published_at = now();
- dd($post->save());
+ $post->published_at = now();
+ dd($post->save());
});
-
// Route::get('/suggest_titles', function () {
// $results = OpenAI::suggestArticleTitles("It's 2019s Electric: How Fisker Is Reinventing The Automotive Industry And \nExpanding Its Business", "Fisker's approach to building electric vehicles is deeply intertwined with \nits overall business philosophy: use less, use better,...s", 1);
// dd($results);
@@ -92,14 +87,12 @@
// dd($results);
// });
+Route::get('proxy_test', function () {
+ $url = 'https://www.cnbc.com/2023/09/24/this-southern-city-is-the-no-1-place-to-start-your-own-business.html';
+ $task = ScrapeUrlBodyTask::handle($url);
-Route::get('proxy_test', function() {
- $url = 'https://www.cnbc.com/2023/09/24/this-southern-city-is-the-no-1-place-to-start-your-own-business.html';
-
- $task = ScrapeUrlBodyTask::handle($url);
-
- dd($task);
+ dd($task);
});
// Route::get('/image_gen', function() {