diff --git a/app/Http/Controllers/Tests/TestController.php b/app/Http/Controllers/Tests/TestController.php index 8fbaed8..2cb9c3f 100644 --- a/app/Http/Controllers/Tests/TestController.php +++ b/app/Http/Controllers/Tests/TestController.php @@ -6,6 +6,7 @@ use App\Http\Controllers\Controller; use App\Jobs\Tasks\BrowseRSSLatestNewsTask; use App\Jobs\Tasks\CrawlRssPostTask; +use App\Jobs\Tasks\ParseRssPostMetadataTask; use App\Models\Post; use App\Notifications\PostWasPublished; use Illuminate\Http\Request; @@ -15,6 +16,22 @@ class TestController extends Controller { + public function prm(Request $request) + { + $id = $request->input('id'); + + if (is_empty($id)) { + return "Missing 'id'."; + } + + ParseRssPostMetadataTask::handle($id); + + return 'ok'; + + } + + + public function crawlTask(Request $request) { $id = $request->input('id'); diff --git a/app/Jobs/Tasks/ParseRssPostMetadataTask.php b/app/Jobs/Tasks/ParseRssPostMetadataTask.php index dbf568c..d318400 100644 --- a/app/Jobs/Tasks/ParseRssPostMetadataTask.php +++ b/app/Jobs/Tasks/ParseRssPostMetadataTask.php @@ -5,6 +5,7 @@ use App\Helpers\FirstParty\OpenAI\OpenAI; use App\Models\Category; use App\Models\RssPost; +use App\Models\RssPostKeyword; use App\Models\ServiceCostUsage; class ParseRssPostMetadataTask @@ -37,7 +38,10 @@ public static function handle(int $rss_post_id) } } - $words_to_add_in_body = []; + $words_to_add_in_keyword_list = []; + $words_to_save = []; + + $first_keyword_found = false; if ((isset($post_meta_response->output)) && (! is_null($post_meta_response->output))) { @@ -50,22 +54,56 @@ public static function handle(int $rss_post_id) } } + if (isset($post_meta_response->output->entities)) { + if (count($post_meta_response->output->entities) > 0) { + $rss_post->entities = $post_meta_response->output->entities; + + foreach ($post_meta_response->output->entities as $key => $word) { + + $word = trim($word); + + $words_to_save[] = (object) [ + 'is_main' => ($key == 0) ? true : false, + 'type' => 'entity', + 'value' => $word, + 'value_lowercased' => strtolower($word), + ]; + + $words_to_add_in_keyword_list[] = $word; + } + } + } + if (isset($post_meta_response->output->keywords)) { if (count($post_meta_response->output->keywords) > 0) { $rss_post->keywords = $post_meta_response->output->keywords; foreach ($post_meta_response->output->keywords as $word) { - $words_to_add_in_body[] = $word; - } - } - } - if (isset($post_meta_response->output->entities)) { - if (count($post_meta_response->output->entities) > 0) { - $rss_post->entities = $post_meta_response->output->entities; + $word = trim($word); + + foreach($words_to_save as $saved_word) + { + if (strtolower($word) == $saved_word->value_lowercased) + { + continue 2; + } + } + + + $words_to_save[] = (object) [ + 'is_main' => !$first_keyword_found, + 'type' => 'keyword', + 'value' => $word, + 'value_lowercased' => strtolower($word), + ]; + + $words_to_add_in_keyword_list[] = $word; + + if ($first_keyword_found == false) { + $first_keyword_found = true; + } - foreach ($post_meta_response->output->entities as $word) { - $words_to_add_in_body[] = $word; } } } @@ -104,10 +142,42 @@ public static function handle(int $rss_post_id) $rss_post->category_id = $category->id; } - $rss_post->keyword_list = implode(',', $words_to_add_in_body); + $rss_post->keyword_list = implode(',', $words_to_add_in_keyword_list); $rss_post->status = 'published'; - $rss_post->save(); + if($rss_post->save()) + { + $deleted_rpk = RssPostKeyword::where('rss_post_id', $rss_post->id)->delete(); + + //dump($words_to_save); + + foreach ($words_to_save as $word_to_save) + { + + // * @property int $id + // * @property int $rss_post_id + // * @property string $type + // * @property bool $is_main + // * @property string $value + // * @property string $value_lowercased + // * @property Carbon|null $created_at + // * @property Carbon|null $updated_at + + // $words_to_save[] = (object) [ + // 'type' => 'keyword', + // 'value' => $word, + // 'value_lowercased' => strtolower($word), + // ]; + + $new_rpk = new RssPostKeyword; + $new_rpk->rss_post_id = $rss_post->id; + $new_rpk->type = $word_to_save->type; + $new_rpk->is_main = $word_to_save->is_main; + $new_rpk->value = $word_to_save->value; + $new_rpk->value_lowercased = $word_to_save->value_lowercased; + $new_rpk->save(); + } + } } } diff --git a/app/Models/RssPost.php b/app/Models/RssPost.php index 361d493..51b48af 100644 --- a/app/Models/RssPost.php +++ b/app/Models/RssPost.php @@ -27,6 +27,7 @@ * @property string|null $metadata * @property string|null $bites * @property string|null $keyword_list + * @property bool $keyword_saved * @property string|null $impact * @property string $impact_level * @property Carbon $published_at @@ -45,6 +46,7 @@ class RssPost extends Model implements Feedable 'metadata' => 'object', 'keywords' => 'array', 'entities' => 'array', + 'keyword_saved' => 'boolean', ]; protected $fillable = [ @@ -64,6 +66,7 @@ class RssPost extends Model implements Feedable 'impact_level', 'published_at', 'status', + 'keyword_saved', ]; public function category() diff --git a/app/Models/RssPostKeyword.php b/app/Models/RssPostKeyword.php new file mode 100644 index 0000000..ea14210 --- /dev/null +++ b/app/Models/RssPostKeyword.php @@ -0,0 +1,49 @@ + 'int', + 'is_main' => 'bool' + ]; + + protected $fillable = [ + 'rss_post_id', + 'type', + 'is_main', + 'value', + 'value_lowercased' + ]; + + public function rss_post() + { + return $this->belongsTo(RssPost::class); + } +} diff --git a/database/migrations/2023_11_22_151403_create_rss_post_keywords_table.php b/database/migrations/2023_11_22_151403_create_rss_post_keywords_table.php new file mode 100644 index 0000000..63eacf1 --- /dev/null +++ b/database/migrations/2023_11_22_151403_create_rss_post_keywords_table.php @@ -0,0 +1,38 @@ +id(); + $table->foreignId('rss_post_id'); + $table->enum('type',['keyword','entity']); + $table->boolean('is_main')->default(false); + $table->string('value'); + $table->string('value_lowercased'); + $table->timestamps(); + + $table->foreign('rss_post_id')->references('id')->on('rss_posts'); + + $table->index('value'); + $table->index('value_lowercased'); + + }); + } + + /** + * Reverse the migrations. + */ + public function down(): void + { + Schema::dropIfExists('rss_post_keywords'); + } +}; diff --git a/database/migrations/2023_11_22_154104_add_keyword_saved_to_rss_posts_table.php b/database/migrations/2023_11_22_154104_add_keyword_saved_to_rss_posts_table.php new file mode 100644 index 0000000..3820aaa --- /dev/null +++ b/database/migrations/2023_11_22_154104_add_keyword_saved_to_rss_posts_table.php @@ -0,0 +1,28 @@ +boolean('keyword_saved')->default(false); + }); + } + + /** + * Reverse the migrations. + */ + public function down(): void + { + Schema::table('rss_posts', function (Blueprint $table) { + $table->dropColumn('keyword_saved'); + }); + } +}; diff --git a/routes/tests.php b/routes/tests.php index fbb1060..64729fa 100644 --- a/routes/tests.php +++ b/routes/tests.php @@ -56,6 +56,7 @@ return 'ok'; }); +Route::get('/prm', [App\Http\Controllers\Tests\TestController::class, 'prm']); Route::get('/opml', [App\Http\Controllers\Tests\TestController::class, 'opml']);