From ed0a481a699f28da0fe6140041e949b5d31487a4 Mon Sep 17 00:00:00 2001 From: Charles Teh Date: Thu, 23 Nov 2023 01:34:33 +0800 Subject: [PATCH] Add (work): set existing fields to be processed --- app/Jobs/SaveOldKeywordsJob.php | 143 ++++++++++++++++++++ app/Jobs/Tasks/ParseRssPostMetadataTask.php | 18 +-- routes/tests.php | 11 ++ 3 files changed, 155 insertions(+), 17 deletions(-) create mode 100644 app/Jobs/SaveOldKeywordsJob.php diff --git a/app/Jobs/SaveOldKeywordsJob.php b/app/Jobs/SaveOldKeywordsJob.php new file mode 100644 index 0000000..0ddbb81 --- /dev/null +++ b/app/Jobs/SaveOldKeywordsJob.php @@ -0,0 +1,143 @@ +rss_post_id = $rss_post_id; + } + + /** + * Execute the job. + */ + public function handle(): void + { + $rss_post = RssPost::find($this->rss_post_id); + + if (is_null($rss_post)) + { + return ; + } + + if ($rss_post->keyword_saved == true) + { + return ; + } + + $words_to_add_in_keyword_list = []; + $words_to_save = []; + + $first_keyword_found = false; + + // Entities + if (isset($rss_post->entities)) { + if (count($rss_post->entities) > 0) { + + foreach ($rss_post->entities as $key => $word) { + + $word = trim($word); + + $words_to_save[] = (object) [ + 'is_main' => ($key == 0) ? true : false, + 'type' => 'entity', + 'value' => $word, + 'value_lowercased' => strtolower($word), + ]; + + $words_to_add_in_keyword_list[] = $word; + } + } + } + + // Keywords + if (isset($rss_post->keywords)) { + if (count($rss_post->keywords) > 0) { + + foreach ($rss_post->keywords as $word) { + + $word = trim($word); + + foreach($words_to_save as $saved_word) + { + if (strtolower($word) == $saved_word->value_lowercased) + { + continue 2; + } + } + + $words_to_save[] = (object) [ + 'is_main' => !$first_keyword_found, + 'type' => 'keyword', + 'value' => $word, + 'value_lowercased' => strtolower($word), + ]; + + $words_to_add_in_keyword_list[] = $word; + + if ($first_keyword_found == false) { + $first_keyword_found = true; + } + + } + } + } + + $rss_post->keyword_list = implode(',', $words_to_add_in_keyword_list); + + $rss_post->status = 'published'; + + if($rss_post->save()) + { + $has_saved_keyword = false; + + $deleted_rpk = RssPostKeyword::where('rss_post_id', $rss_post->id)->delete(); + + foreach ($words_to_save as $word_to_save) + { + + $new_rpk = new RssPostKeyword; + $new_rpk->rss_post_id = $rss_post->id; + $new_rpk->type = $word_to_save->type; + $new_rpk->is_main = $word_to_save->is_main; + $new_rpk->value = $word_to_save->value; + $new_rpk->value_lowercased = $word_to_save->value_lowercased; + + if($new_rpk->save()) + { + if (!$has_saved_keyword) + { + $has_saved_keyword = true; + } + + } + } + + if ($has_saved_keyword) + { + $rss_post->keyword_saved = true; + $rss_post->save(); + } + } + + } +} diff --git a/app/Jobs/Tasks/ParseRssPostMetadataTask.php b/app/Jobs/Tasks/ParseRssPostMetadataTask.php index 1638a98..fd4f7ab 100644 --- a/app/Jobs/Tasks/ParseRssPostMetadataTask.php +++ b/app/Jobs/Tasks/ParseRssPostMetadataTask.php @@ -145,32 +145,16 @@ public static function handle(int $rss_post_id) $rss_post->keyword_list = implode(',', $words_to_add_in_keyword_list); $rss_post->status = 'published'; + if($rss_post->save()) { $has_saved_keyword = false; $deleted_rpk = RssPostKeyword::where('rss_post_id', $rss_post->id)->delete(); - //dump($words_to_save); - foreach ($words_to_save as $word_to_save) { - // * @property int $id - // * @property int $rss_post_id - // * @property string $type - // * @property bool $is_main - // * @property string $value - // * @property string $value_lowercased - // * @property Carbon|null $created_at - // * @property Carbon|null $updated_at - - // $words_to_save[] = (object) [ - // 'type' => 'keyword', - // 'value' => $word, - // 'value_lowercased' => strtolower($word), - // ]; - $new_rpk = new RssPostKeyword; $new_rpk->rss_post_id = $rss_post->id; $new_rpk->type = $word_to_save->type; diff --git a/routes/tests.php b/routes/tests.php index 64729fa..be6f038 100644 --- a/routes/tests.php +++ b/routes/tests.php @@ -39,6 +39,17 @@ Route::get('/crawlTask', [App\Http\Controllers\Tests\TestController::class, 'crawlTask']); +Route::get('/set_keywords', function (Request $request) { + + $last_record = RssPost::where('keyword_saved', false)->orderBy('id','DESC')->first(); + + for ($i= 1; $i <= $last_record->id; $i++) { + SaveOldKeywordsJob::dispatch($i)->onQueue('default')->onConnection('default'); + } + + return 'ok'; +}); + Route::get('/recrawl_from', function (Request $request) { $id = $request->input('id');