From 8f6fb3787a809beb301c9a0e59f511a2bfd4e4b4 Mon Sep 17 00:00:00 2001 From: ct Date: Fri, 20 Jun 2025 20:16:34 +0800 Subject: [PATCH] Update --- app/Helpers/FirstParty/AI/CloudflareAI.php | 23 ++- .../KeywordEmbeddingMaintenance.php | 184 ++++++++++++++++++ app/Helpers/FirstParty/Meme/MemeGenerator.php | 72 +++++-- app/Http/Controllers/FrontMediaController.php | 3 +- app/Http/Controllers/TestController.php | 9 + app/Models/KeywordEmbedding.php | 38 ++++ app/Models/Meme.php | 1 + app/Models/MemeMediaEmbedding.php | 51 +++++ ..._06_19_002728_create_meme_medias_table.php | 1 - ...064606_create_keyword_embeddings_table.php | 37 ++++ ...lete_tag_from_keyword_embeddings_table.php | 33 ++++ ...853_create_meme_media_embeddings_table.php | 35 ++++ ...102737_add_embedding_indexes_to_tables.php | 26 +++ routes/test.php | 2 + 14 files changed, 493 insertions(+), 22 deletions(-) create mode 100644 app/Helpers/FirstParty/Maintenance/KeywordEmbeddingMaintenance.php create mode 100644 app/Models/KeywordEmbedding.php create mode 100644 app/Models/MemeMediaEmbedding.php create mode 100644 database/migrations/2025_06_20_064606_create_keyword_embeddings_table.php create mode 100644 database/migrations/2025_06_20_101525_delete_tag_from_keyword_embeddings_table.php create mode 100644 database/migrations/2025_06_20_101853_create_meme_media_embeddings_table.php create mode 100644 database/migrations/2025_06_20_102737_add_embedding_indexes_to_tables.php diff --git a/app/Helpers/FirstParty/AI/CloudflareAI.php b/app/Helpers/FirstParty/AI/CloudflareAI.php index 37b13c8..b4de36d 100644 --- a/app/Helpers/FirstParty/AI/CloudflareAI.php +++ b/app/Helpers/FirstParty/AI/CloudflareAI.php @@ -2,6 +2,7 @@ namespace App\Helpers\FirstParty\AI; +use App\Models\KeywordEmbedding; use Exception; use Illuminate\Support\Facades\Http; use Pgvector\Laravel\Vector; @@ -16,10 +17,18 @@ public static function getVectorEmbeddingBgeSmall($embedding_query) throw new Exception('Empty embedding query.'); } + $keyword_embedding = KeywordEmbedding::where('keyword', $embedding_query)->first(); + + if (! is_null($keyword_embedding)) { + return $keyword_embedding->embedding; + } + $maxRetries = 3; $currentAttempt = 0; - while ($currentAttempt < $maxRetries) { + $embedding = null; + + while ($currentAttempt < $maxRetries && $embedding === null) { try { // Use the new API endpoint $response = Http::withHeaders([])->withOptions(['verify' => (app()->environment() == 'local') ? false : true])->timeout(800) @@ -37,7 +46,17 @@ public static function getVectorEmbeddingBgeSmall($embedding_query) $embedding_response = json_decode($response->body(), true); try { - return new Vector($embedding_response['response']['data'][0]); + $embedding = new Vector($embedding_response['response']['data'][0]); + + // dump($embedding); + // dump($embedding_query); + + KeywordEmbedding::create([ + 'keyword' => $embedding_query, + 'embedding' => $embedding + ]); + + break; } catch (Exception $e) { throw new Exception('Embedding response failed, null response'); } diff --git a/app/Helpers/FirstParty/Maintenance/KeywordEmbeddingMaintenance.php b/app/Helpers/FirstParty/Maintenance/KeywordEmbeddingMaintenance.php new file mode 100644 index 0000000..482d20c --- /dev/null +++ b/app/Helpers/FirstParty/Maintenance/KeywordEmbeddingMaintenance.php @@ -0,0 +1,184 @@ +get(); + + foreach ($categories as $category) { + + $embedding_query = $category->name . " " . $category->description; + + $keyword_embedding = KeywordEmbedding::where('keyword', $embedding_query)->first(); + + if (is_null($keyword_embedding)) { + KeywordEmbedding::create([ + 'keyword' => $embedding_query, + 'embedding' => $category->embedding, + 'tag' => 'category', + ]); + } + } + } + + public static function populateCategoryKeywordEmbeddings() + { + $categories = Category::whereNotNull('keywords')->get(); + + foreach ($categories as $key => $category) { + foreach ($category->keywords as $keyword) { + + dump("Populating keyword embedding for {$keyword}"); + + $embedding = null; + $max_retries = 3; + $current_attempt = 0; + + while ($embedding === null && $current_attempt < $max_retries) { + try { + $current_attempt++; + $embedding = CloudflareAI::getVectorEmbeddingBgeSmall($keyword); + } catch (Exception $e) { + $embedding = null; + } + } + } + } + } + + public static function populateMemeMediasKeywordsEmbeddings() + { + $meme_medias = MemeMedia::whereNotNull('keywords')->get(); + + $max = $meme_medias->count(); + + foreach ($meme_medias as $key => $meme_media) { + + $count = $key + 1; + + dump("{Processing: {$count}/{$max}: {$meme_media->name}"); + + + // keywords: + foreach ($meme_media->keywords as $keyword) { + + dump("Populating keyword embedding for {$keyword}"); + + $embedding = self::fetchAndCacheEmbedding($keyword); + + if ($embedding) { + MemeMediaEmbedding::create([ + 'meme_media_id' => $meme_media->id, + 'keyword' => $keyword, + 'embedding' => $embedding, + 'tag' => 'keywords', + ]); + } + } + + // action_keywords: + foreach ($meme_media->action_keywords as $keyword) { + dump("Populating action keyword embedding for {$keyword}"); + $embedding = self::fetchAndCacheEmbedding($keyword); + + if ($embedding) { + MemeMediaEmbedding::create([ + 'meme_media_id' => $meme_media->id, + 'keyword' => $keyword, + 'embedding' => $embedding, + 'tag' => 'action_keywords', + ]); + } + } + + // emotion_keywords: + foreach ($meme_media->emotion_keywords as $keyword) { + dump("Populating emotion keyword embedding for {$keyword}"); + $embedding = self::fetchAndCacheEmbedding($keyword); + + + if ($embedding) { + MemeMediaEmbedding::create([ + 'meme_media_id' => $meme_media->id, + 'keyword' => $keyword, + 'embedding' => $embedding, + 'tag' => 'emotion_keywords', + ]); + } + } + + // misc_keywords: + foreach ($meme_media->misc_keywords as $keyword) { + dump("Populating misc keyword embedding for {$keyword}"); + $embedding = self::fetchAndCacheEmbedding($keyword); + + if ($embedding) { + MemeMediaEmbedding::create([ + 'meme_media_id' => $meme_media->id, + 'keyword' => $keyword, + 'embedding' => $embedding, + 'tag' => 'misc_keywords', + ]); + } + } + + dump("✓ Successfully processed meme media {$meme_media->id}"); + dump(""); + dump(""); + } + } + + public static function populateMemeMediaEmbeddings() + { + $meme_medias = MemeMedia::whereNotNull('embedding')->get(); + + foreach ($meme_medias as $meme_media) { + $embedding_query = $meme_media->name . " " . $meme_media->description; + + $keyword_embedding = KeywordEmbedding::where('keyword', $embedding_query)->first(); + + if (is_null($keyword_embedding)) { + KeywordEmbedding::create([ + 'keyword' => $embedding_query, + 'embedding' => $meme_media->embedding, + 'tag' => 'meme_media', + ]); + } + } + } + + private static function fetchAndCacheEmbedding($keyword) + { + $embedding = null; + $max_retries = 3; + $current_attempt = 0; + + + while ($embedding === null && $current_attempt < $max_retries) { + $current_attempt++; + try { + $embedding = CloudflareAI::getVectorEmbeddingBgeSmall($keyword); + } catch (Exception $e) { + } + } + + return $embedding; + } +} diff --git a/app/Helpers/FirstParty/Meme/MemeGenerator.php b/app/Helpers/FirstParty/Meme/MemeGenerator.php index 685a29d..e7e93cb 100644 --- a/app/Helpers/FirstParty/Meme/MemeGenerator.php +++ b/app/Helpers/FirstParty/Meme/MemeGenerator.php @@ -11,6 +11,7 @@ use App\Models\Category; use App\Models\Meme; use App\Models\MemeMedia; +use App\Models\MemeMediaEmbedding; use Pgvector\Laravel\Distance; use Str; @@ -22,10 +23,46 @@ class MemeGenerator const STATUS_COMPLETED = 'completed'; - public static function getSuitableMeme(Meme $meme) + public static function getSuitableMemeMedia(Meme $meme) { - //dd($meme->toArray()); - return MemeMedia::first(); + $meme_media = null; + + + $primary_keyword_type = $meme->primary_keyword_type; + + if ($primary_keyword_type == 'action') { + + $meme_media = self::getMemeMediaByKeywords($meme->action_keywords, 2, 'action_keywords'); + + if (is_null($meme_media)) { + + $keywords = array_merge($meme->emotion_keywords, $meme->misc_keywords, $meme->keywords); + + $meme_media = self::getMemeMediaByKeywords($keywords, 2); + } + } else if ($primary_keyword_type == 'emotion') { + $meme_media = self::getMemeMediaByKeywords($meme->emotion_keywords, 2, 'emotion_keywords'); + + if (is_null($meme_media)) { + $keywords = array_merge($meme->action_keywords, $meme->misc_keywords, $meme->keywords); + + $meme_media = self::getMemeMediaByKeywords($keywords, 2); + } + } else if ($primary_keyword_type == 'misc') { + $meme_media = self::getMemeMediaByKeywords($meme->misc_keywords, 2, 'misc_keywords'); + + if (is_null($meme_media)) { + $keywords = array_merge($meme->action_keywords, $meme->emotion_keywords, $meme->keywords); + + $meme_media = self::getMemeMediaByKeywords($keywords, 2); + } + } + + if (is_null($meme_media)) { + $meme_media = MemeMedia::query()->inRandomOrder()->first(); + } + + return $meme_media; } public static function generateMemeByCategory(Category $category) @@ -56,7 +93,7 @@ public static function generateMemeByCategory(Category $category) if (! is_null($meme) && $meme->status == self::STATUS_PENDING) { // populate meme_id - $meme->meme_id = null; // self::getMemeMediaByKeywords($meme_output->keywords)->id; + $meme->meme_id = self::getSuitableMemeMedia($meme)->id; $meme->background_id = self::generateBackgroundMediaWithRunware($meme_output->background)->id; if ( @@ -172,24 +209,23 @@ public static function generateBackgroundMediaWithRunware($prompt) return $background_media; } - public static function getMemeMediaByKeywords(array $keywords) + public static function getMemeMediaByKeywords(array $keywords, int $tolerance = 10, ?string $tag = null) { - $meme_media = null; - $meme_medias = MemeMedia::withAnyTags($keywords)->take(10)->get(); + $meme_embedding = CloudflareAI::getVectorEmbeddingBgeSmall(implode(' ', $keywords)); + + + $meme_medias = MemeMediaEmbedding::query() + ->when(!is_empty($tag), function ($query) use ($tag) { + return $query->where('tag', $tag); + }) + ->nearestNeighbors('embedding', $meme_embedding, Distance::L2) + ->take($tolerance) + ->get(); + if ($meme_medias->count() > 0) { - $meme_media = $meme_medias->random(); - } - - if (is_null($meme_media)) { - $meme_embedding = CloudflareAI::getVectorEmbeddingBgeSmall(implode(' ', $keywords)); - - $meme_medias = MemeMedia::query()->nearestNeighbors('embedding', $meme_embedding, Distance::L2)->take(10)->get(); - - if ($meme_medias->count() > 0) { - $meme_media = $meme_medias->random(); - } + $meme_media = $meme_medias->random()->meme_media; } return $meme_media; diff --git a/app/Http/Controllers/FrontMediaController.php b/app/Http/Controllers/FrontMediaController.php index 3ce72bf..115f563 100644 --- a/app/Http/Controllers/FrontMediaController.php +++ b/app/Http/Controllers/FrontMediaController.php @@ -14,12 +14,13 @@ public function init(Request $request) { $meme = Meme::with('meme_media', 'background_media')->where('status', MemeGenerator::STATUS_COMPLETED)->take(1)->latest()->first(); - $meme_media = MemeGenerator::getSuitableMeme($meme); + $meme_media = MemeGenerator::getSuitableMemeMedia($meme); return response()->json([ 'success' => [ 'data' => [ 'init' => [ + 'info' => $meme, 'caption' => $meme->caption, 'meme' => $meme_media, 'background' => $meme->background_media, diff --git a/app/Http/Controllers/TestController.php b/app/Http/Controllers/TestController.php index 3f291d0..ddc6dad 100644 --- a/app/Http/Controllers/TestController.php +++ b/app/Http/Controllers/TestController.php @@ -7,6 +7,7 @@ use App\Helpers\FirstParty\AspectRatio; use App\Helpers\FirstParty\Meme\MemeGenerator; use App\Models\Category; +use App\Models\Meme; use App\Models\MemeMedia; use Str; @@ -17,6 +18,14 @@ public function index() // } + public function getSuitableMemeMedia() + { + $meme = Meme::inRandomOrder()->first(); + $meme_media = MemeGenerator::getSuitableMemeMedia($meme); + + dd($meme_media); + } + public function getMemeKeywords() { diff --git a/app/Models/KeywordEmbedding.php b/app/Models/KeywordEmbedding.php new file mode 100644 index 0000000..f9b93a6 --- /dev/null +++ b/app/Models/KeywordEmbedding.php @@ -0,0 +1,38 @@ + Vector::class, + ]; + + protected $fillable = [ + 'keyword', + 'embedding', + 'tag' + ]; +} diff --git a/app/Models/Meme.php b/app/Models/Meme.php index 3eb1f6d..5569508 100644 --- a/app/Models/Meme.php +++ b/app/Models/Meme.php @@ -69,6 +69,7 @@ class Meme extends Model 'action_keywords', 'emotion_keywords', 'misc_keywords', + 'primary_keyword_type', ]; public function meme_media() diff --git a/app/Models/MemeMediaEmbedding.php b/app/Models/MemeMediaEmbedding.php new file mode 100644 index 0000000..02b6256 --- /dev/null +++ b/app/Models/MemeMediaEmbedding.php @@ -0,0 +1,51 @@ + 'int', + 'embedding' => Vector::class, + ]; + + protected $fillable = [ + 'meme_media_id', + 'keyword', + 'embedding', + 'tag' + ]; + + public function meme_media() + { + return $this->belongsTo(MemeMedia::class); + } +} diff --git a/database/migrations/2025_06_19_002728_create_meme_medias_table.php b/database/migrations/2025_06_19_002728_create_meme_medias_table.php index 53b7cb0..072acac 100644 --- a/database/migrations/2025_06_19_002728_create_meme_medias_table.php +++ b/database/migrations/2025_06_19_002728_create_meme_medias_table.php @@ -28,7 +28,6 @@ public function up(): void $table->string('webm_url'); $table->string('gif_url'); $table->string('webp_url'); - $table->vector('embedding', 384)->nullable(); $table->double('duration')->nullable(); $table->integer('media_width')->default(720); $table->integer('media_height')->default(1280); diff --git a/database/migrations/2025_06_20_064606_create_keyword_embeddings_table.php b/database/migrations/2025_06_20_064606_create_keyword_embeddings_table.php new file mode 100644 index 0000000..7df4c1b --- /dev/null +++ b/database/migrations/2025_06_20_064606_create_keyword_embeddings_table.php @@ -0,0 +1,37 @@ +id(); + $table->text('keyword')->unique()->index(); + $table->vector('embedding', 384); + $table->string('tag')->nullable(); + $table->timestamps(); + + // Add indexes for performance + $table->index(['keyword']); + $table->index(['tag']); + }); + + // Add HNSW index for fast vector similarity search + DB::statement('CREATE INDEX keyword_embeddings_embedding_hnsw_idx ON keyword_embeddings USING hnsw (embedding vector_cosine_ops)'); + } + + /** + * Reverse the migrations. + */ + public function down(): void + { + Schema::dropIfExists('keyword_embeddings'); + } +}; diff --git a/database/migrations/2025_06_20_101525_delete_tag_from_keyword_embeddings_table.php b/database/migrations/2025_06_20_101525_delete_tag_from_keyword_embeddings_table.php new file mode 100644 index 0000000..787ec02 --- /dev/null +++ b/database/migrations/2025_06_20_101525_delete_tag_from_keyword_embeddings_table.php @@ -0,0 +1,33 @@ +dropIndex(['tag']); + $table->dropColumn('tag'); + }); + } + + /** + * Reverse the migrations. + */ + public function down(): void + { + Schema::table('keyword_embeddings', function (Blueprint $table) { + // Add the column back + $table->string('tag')->nullable(); + // Add the index back + $table->index(['tag']); + }); + } +}; diff --git a/database/migrations/2025_06_20_101853_create_meme_media_embeddings_table.php b/database/migrations/2025_06_20_101853_create_meme_media_embeddings_table.php new file mode 100644 index 0000000..417fe50 --- /dev/null +++ b/database/migrations/2025_06_20_101853_create_meme_media_embeddings_table.php @@ -0,0 +1,35 @@ +id(); + $table->foreignId('meme_media_id')->references('id')->on('meme_medias'); + $table->text('keyword')->index(); + $table->vector('embedding', 384); + $table->string('tag')->nullable(); + $table->timestamps(); + + $table->index('tag'); + }); + + DB::statement('CREATE INDEX meme_media_embeddings_embedding_hnsw_idx ON meme_media_embeddings USING hnsw (embedding vector_cosine_ops)'); + } + + /** + * Reverse the migrations. + */ + public function down(): void + { + Schema::dropIfExists('meme_media_embeddings'); + } +}; diff --git a/database/migrations/2025_06_20_102737_add_embedding_indexes_to_tables.php b/database/migrations/2025_06_20_102737_add_embedding_indexes_to_tables.php new file mode 100644 index 0000000..d493fc3 --- /dev/null +++ b/database/migrations/2025_06_20_102737_add_embedding_indexes_to_tables.php @@ -0,0 +1,26 @@ +