This commit is contained in:
ct
2025-06-20 20:16:34 +08:00
parent b502120091
commit 8f6fb3787a
14 changed files with 493 additions and 22 deletions

View File

@@ -2,6 +2,7 @@
namespace App\Helpers\FirstParty\AI; namespace App\Helpers\FirstParty\AI;
use App\Models\KeywordEmbedding;
use Exception; use Exception;
use Illuminate\Support\Facades\Http; use Illuminate\Support\Facades\Http;
use Pgvector\Laravel\Vector; use Pgvector\Laravel\Vector;
@@ -16,10 +17,18 @@ public static function getVectorEmbeddingBgeSmall($embedding_query)
throw new Exception('Empty embedding query.'); throw new Exception('Empty embedding query.');
} }
$keyword_embedding = KeywordEmbedding::where('keyword', $embedding_query)->first();
if (! is_null($keyword_embedding)) {
return $keyword_embedding->embedding;
}
$maxRetries = 3; $maxRetries = 3;
$currentAttempt = 0; $currentAttempt = 0;
while ($currentAttempt < $maxRetries) { $embedding = null;
while ($currentAttempt < $maxRetries && $embedding === null) {
try { try {
// Use the new API endpoint // Use the new API endpoint
$response = Http::withHeaders([])->withOptions(['verify' => (app()->environment() == 'local') ? false : true])->timeout(800) $response = Http::withHeaders([])->withOptions(['verify' => (app()->environment() == 'local') ? false : true])->timeout(800)
@@ -37,7 +46,17 @@ public static function getVectorEmbeddingBgeSmall($embedding_query)
$embedding_response = json_decode($response->body(), true); $embedding_response = json_decode($response->body(), true);
try { try {
return new Vector($embedding_response['response']['data'][0]); $embedding = new Vector($embedding_response['response']['data'][0]);
// dump($embedding);
// dump($embedding_query);
KeywordEmbedding::create([
'keyword' => $embedding_query,
'embedding' => $embedding
]);
break;
} catch (Exception $e) { } catch (Exception $e) {
throw new Exception('Embedding response failed, null response'); throw new Exception('Embedding response failed, null response');
} }

View File

@@ -0,0 +1,184 @@
<?php
namespace App\Helpers\FirstParty\Maintenance;
use App\Helpers\FirstParty\AI\CloudflareAI;
use App\Models\Category;
use App\Models\KeywordEmbedding;
use App\Models\MemeMedia;
use App\Models\MemeMediaEmbedding;
use Exception;
use Illuminate\Contracts\Filesystem\Cloud;
use PhpParser\Lexer\TokenEmulator\KeywordEmulator;
class KeywordEmbeddingMaintenance
{
public static function populateEmbeddings()
{
self::populateMemeMediasKeywordsEmbeddings();
}
public static function populateCategoryEmbeddings()
{
$categories = Category::whereNotNull('embedding')->get();
foreach ($categories as $category) {
$embedding_query = $category->name . " " . $category->description;
$keyword_embedding = KeywordEmbedding::where('keyword', $embedding_query)->first();
if (is_null($keyword_embedding)) {
KeywordEmbedding::create([
'keyword' => $embedding_query,
'embedding' => $category->embedding,
'tag' => 'category',
]);
}
}
}
public static function populateCategoryKeywordEmbeddings()
{
$categories = Category::whereNotNull('keywords')->get();
foreach ($categories as $key => $category) {
foreach ($category->keywords as $keyword) {
dump("Populating keyword embedding for {$keyword}");
$embedding = null;
$max_retries = 3;
$current_attempt = 0;
while ($embedding === null && $current_attempt < $max_retries) {
try {
$current_attempt++;
$embedding = CloudflareAI::getVectorEmbeddingBgeSmall($keyword);
} catch (Exception $e) {
$embedding = null;
}
}
}
}
}
public static function populateMemeMediasKeywordsEmbeddings()
{
$meme_medias = MemeMedia::whereNotNull('keywords')->get();
$max = $meme_medias->count();
foreach ($meme_medias as $key => $meme_media) {
$count = $key + 1;
dump("{Processing: {$count}/{$max}: {$meme_media->name}");
// keywords:
foreach ($meme_media->keywords as $keyword) {
dump("Populating keyword embedding for {$keyword}");
$embedding = self::fetchAndCacheEmbedding($keyword);
if ($embedding) {
MemeMediaEmbedding::create([
'meme_media_id' => $meme_media->id,
'keyword' => $keyword,
'embedding' => $embedding,
'tag' => 'keywords',
]);
}
}
// action_keywords:
foreach ($meme_media->action_keywords as $keyword) {
dump("Populating action keyword embedding for {$keyword}");
$embedding = self::fetchAndCacheEmbedding($keyword);
if ($embedding) {
MemeMediaEmbedding::create([
'meme_media_id' => $meme_media->id,
'keyword' => $keyword,
'embedding' => $embedding,
'tag' => 'action_keywords',
]);
}
}
// emotion_keywords:
foreach ($meme_media->emotion_keywords as $keyword) {
dump("Populating emotion keyword embedding for {$keyword}");
$embedding = self::fetchAndCacheEmbedding($keyword);
if ($embedding) {
MemeMediaEmbedding::create([
'meme_media_id' => $meme_media->id,
'keyword' => $keyword,
'embedding' => $embedding,
'tag' => 'emotion_keywords',
]);
}
}
// misc_keywords:
foreach ($meme_media->misc_keywords as $keyword) {
dump("Populating misc keyword embedding for {$keyword}");
$embedding = self::fetchAndCacheEmbedding($keyword);
if ($embedding) {
MemeMediaEmbedding::create([
'meme_media_id' => $meme_media->id,
'keyword' => $keyword,
'embedding' => $embedding,
'tag' => 'misc_keywords',
]);
}
}
dump("✓ Successfully processed meme media {$meme_media->id}");
dump("");
dump("");
}
}
public static function populateMemeMediaEmbeddings()
{
$meme_medias = MemeMedia::whereNotNull('embedding')->get();
foreach ($meme_medias as $meme_media) {
$embedding_query = $meme_media->name . " " . $meme_media->description;
$keyword_embedding = KeywordEmbedding::where('keyword', $embedding_query)->first();
if (is_null($keyword_embedding)) {
KeywordEmbedding::create([
'keyword' => $embedding_query,
'embedding' => $meme_media->embedding,
'tag' => 'meme_media',
]);
}
}
}
private static function fetchAndCacheEmbedding($keyword)
{
$embedding = null;
$max_retries = 3;
$current_attempt = 0;
while ($embedding === null && $current_attempt < $max_retries) {
$current_attempt++;
try {
$embedding = CloudflareAI::getVectorEmbeddingBgeSmall($keyword);
} catch (Exception $e) {
}
}
return $embedding;
}
}

View File

@@ -11,6 +11,7 @@
use App\Models\Category; use App\Models\Category;
use App\Models\Meme; use App\Models\Meme;
use App\Models\MemeMedia; use App\Models\MemeMedia;
use App\Models\MemeMediaEmbedding;
use Pgvector\Laravel\Distance; use Pgvector\Laravel\Distance;
use Str; use Str;
@@ -22,10 +23,46 @@ class MemeGenerator
const STATUS_COMPLETED = 'completed'; const STATUS_COMPLETED = 'completed';
public static function getSuitableMeme(Meme $meme) public static function getSuitableMemeMedia(Meme $meme)
{ {
//dd($meme->toArray()); $meme_media = null;
return MemeMedia::first();
$primary_keyword_type = $meme->primary_keyword_type;
if ($primary_keyword_type == 'action') {
$meme_media = self::getMemeMediaByKeywords($meme->action_keywords, 2, 'action_keywords');
if (is_null($meme_media)) {
$keywords = array_merge($meme->emotion_keywords, $meme->misc_keywords, $meme->keywords);
$meme_media = self::getMemeMediaByKeywords($keywords, 2);
}
} else if ($primary_keyword_type == 'emotion') {
$meme_media = self::getMemeMediaByKeywords($meme->emotion_keywords, 2, 'emotion_keywords');
if (is_null($meme_media)) {
$keywords = array_merge($meme->action_keywords, $meme->misc_keywords, $meme->keywords);
$meme_media = self::getMemeMediaByKeywords($keywords, 2);
}
} else if ($primary_keyword_type == 'misc') {
$meme_media = self::getMemeMediaByKeywords($meme->misc_keywords, 2, 'misc_keywords');
if (is_null($meme_media)) {
$keywords = array_merge($meme->action_keywords, $meme->emotion_keywords, $meme->keywords);
$meme_media = self::getMemeMediaByKeywords($keywords, 2);
}
}
if (is_null($meme_media)) {
$meme_media = MemeMedia::query()->inRandomOrder()->first();
}
return $meme_media;
} }
public static function generateMemeByCategory(Category $category) public static function generateMemeByCategory(Category $category)
@@ -56,7 +93,7 @@ public static function generateMemeByCategory(Category $category)
if (! is_null($meme) && $meme->status == self::STATUS_PENDING) { if (! is_null($meme) && $meme->status == self::STATUS_PENDING) {
// populate meme_id // populate meme_id
$meme->meme_id = null; // self::getMemeMediaByKeywords($meme_output->keywords)->id; $meme->meme_id = self::getSuitableMemeMedia($meme)->id;
$meme->background_id = self::generateBackgroundMediaWithRunware($meme_output->background)->id; $meme->background_id = self::generateBackgroundMediaWithRunware($meme_output->background)->id;
if ( if (
@@ -172,24 +209,23 @@ public static function generateBackgroundMediaWithRunware($prompt)
return $background_media; return $background_media;
} }
public static function getMemeMediaByKeywords(array $keywords) public static function getMemeMediaByKeywords(array $keywords, int $tolerance = 10, ?string $tag = null)
{ {
$meme_media = null;
$meme_medias = MemeMedia::withAnyTags($keywords)->take(10)->get(); $meme_embedding = CloudflareAI::getVectorEmbeddingBgeSmall(implode(' ', $keywords));
$meme_medias = MemeMediaEmbedding::query()
->when(!is_empty($tag), function ($query) use ($tag) {
return $query->where('tag', $tag);
})
->nearestNeighbors('embedding', $meme_embedding, Distance::L2)
->take($tolerance)
->get();
if ($meme_medias->count() > 0) { if ($meme_medias->count() > 0) {
$meme_media = $meme_medias->random(); $meme_media = $meme_medias->random()->meme_media;
}
if (is_null($meme_media)) {
$meme_embedding = CloudflareAI::getVectorEmbeddingBgeSmall(implode(' ', $keywords));
$meme_medias = MemeMedia::query()->nearestNeighbors('embedding', $meme_embedding, Distance::L2)->take(10)->get();
if ($meme_medias->count() > 0) {
$meme_media = $meme_medias->random();
}
} }
return $meme_media; return $meme_media;

View File

@@ -14,12 +14,13 @@ public function init(Request $request)
{ {
$meme = Meme::with('meme_media', 'background_media')->where('status', MemeGenerator::STATUS_COMPLETED)->take(1)->latest()->first(); $meme = Meme::with('meme_media', 'background_media')->where('status', MemeGenerator::STATUS_COMPLETED)->take(1)->latest()->first();
$meme_media = MemeGenerator::getSuitableMeme($meme); $meme_media = MemeGenerator::getSuitableMemeMedia($meme);
return response()->json([ return response()->json([
'success' => [ 'success' => [
'data' => [ 'data' => [
'init' => [ 'init' => [
'info' => $meme,
'caption' => $meme->caption, 'caption' => $meme->caption,
'meme' => $meme_media, 'meme' => $meme_media,
'background' => $meme->background_media, 'background' => $meme->background_media,

View File

@@ -7,6 +7,7 @@
use App\Helpers\FirstParty\AspectRatio; use App\Helpers\FirstParty\AspectRatio;
use App\Helpers\FirstParty\Meme\MemeGenerator; use App\Helpers\FirstParty\Meme\MemeGenerator;
use App\Models\Category; use App\Models\Category;
use App\Models\Meme;
use App\Models\MemeMedia; use App\Models\MemeMedia;
use Str; use Str;
@@ -17,6 +18,14 @@ public function index()
// //
} }
public function getSuitableMemeMedia()
{
$meme = Meme::inRandomOrder()->first();
$meme_media = MemeGenerator::getSuitableMemeMedia($meme);
dd($meme_media);
}
public function getMemeKeywords() public function getMemeKeywords()
{ {

View File

@@ -0,0 +1,38 @@
<?php
/**
* Created by Reliese Model.
*/
namespace App\Models;
use Carbon\Carbon;
use Illuminate\Database\Eloquent\Model;
use Pgvector\Laravel\Vector;
/**
* Class KeywordEmbedding
*
* @property int $id
* @property string $keyword
* @property USER-DEFINED $embedding
* @property string|null $tag
* @property Carbon|null $created_at
* @property Carbon|null $updated_at
*
* @package App\Models
*/
class KeywordEmbedding extends Model
{
protected $table = 'keyword_embeddings';
protected $casts = [
'embedding' => Vector::class,
];
protected $fillable = [
'keyword',
'embedding',
'tag'
];
}

View File

@@ -69,6 +69,7 @@ class Meme extends Model
'action_keywords', 'action_keywords',
'emotion_keywords', 'emotion_keywords',
'misc_keywords', 'misc_keywords',
'primary_keyword_type',
]; ];
public function meme_media() public function meme_media()

View File

@@ -0,0 +1,51 @@
<?php
/**
* Created by Reliese Model.
*/
namespace App\Models;
use Carbon\Carbon;
use Illuminate\Database\Eloquent\Model;
use Pgvector\Laravel\HasNeighbors;
use Pgvector\Laravel\Vector;
/**
* Class MemeMediaEmbedding
*
* @property int $id
* @property int $meme_media_id
* @property string $keyword
* @property Vector $embedding
* @property string|null $tag
* @property Carbon|null $created_at
* @property Carbon|null $updated_at
*
* @property MemeMedia $meme_media
*
* @package App\Models
*/
class MemeMediaEmbedding extends Model
{
use HasNeighbors;
protected $table = 'meme_media_embeddings';
protected $casts = [
'meme_media_id' => 'int',
'embedding' => Vector::class,
];
protected $fillable = [
'meme_media_id',
'keyword',
'embedding',
'tag'
];
public function meme_media()
{
return $this->belongsTo(MemeMedia::class);
}
}

View File

@@ -28,7 +28,6 @@ public function up(): void
$table->string('webm_url'); $table->string('webm_url');
$table->string('gif_url'); $table->string('gif_url');
$table->string('webp_url'); $table->string('webp_url');
$table->vector('embedding', 384)->nullable();
$table->double('duration')->nullable(); $table->double('duration')->nullable();
$table->integer('media_width')->default(720); $table->integer('media_width')->default(720);
$table->integer('media_height')->default(1280); $table->integer('media_height')->default(1280);

View File

@@ -0,0 +1,37 @@
<?php
use Illuminate\Database\Migrations\Migration;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\Schema;
return new class extends Migration
{
/**
* Run the migrations.
*/
public function up(): void
{
Schema::create('keyword_embeddings', function (Blueprint $table) {
$table->id();
$table->text('keyword')->unique()->index();
$table->vector('embedding', 384);
$table->string('tag')->nullable();
$table->timestamps();
// Add indexes for performance
$table->index(['keyword']);
$table->index(['tag']);
});
// Add HNSW index for fast vector similarity search
DB::statement('CREATE INDEX keyword_embeddings_embedding_hnsw_idx ON keyword_embeddings USING hnsw (embedding vector_cosine_ops)');
}
/**
* Reverse the migrations.
*/
public function down(): void
{
Schema::dropIfExists('keyword_embeddings');
}
};

View File

@@ -0,0 +1,33 @@
<?php
use Illuminate\Database\Migrations\Migration;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\Schema;
return new class extends Migration
{
/**
* Run the migrations.
*/
public function up(): void
{
Schema::table('keyword_embeddings', function (Blueprint $table) {
// Drop the index first, then the column
$table->dropIndex(['tag']);
$table->dropColumn('tag');
});
}
/**
* Reverse the migrations.
*/
public function down(): void
{
Schema::table('keyword_embeddings', function (Blueprint $table) {
// Add the column back
$table->string('tag')->nullable();
// Add the index back
$table->index(['tag']);
});
}
};

View File

@@ -0,0 +1,35 @@
<?php
use Illuminate\Database\Migrations\Migration;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\Schema;
return new class extends Migration
{
/**
* Run the migrations.
*/
public function up(): void
{
Schema::create('meme_media_embeddings', function (Blueprint $table) {
$table->id();
$table->foreignId('meme_media_id')->references('id')->on('meme_medias');
$table->text('keyword')->index();
$table->vector('embedding', 384);
$table->string('tag')->nullable();
$table->timestamps();
$table->index('tag');
});
DB::statement('CREATE INDEX meme_media_embeddings_embedding_hnsw_idx ON meme_media_embeddings USING hnsw (embedding vector_cosine_ops)');
}
/**
* Reverse the migrations.
*/
public function down(): void
{
Schema::dropIfExists('meme_media_embeddings');
}
};

View File

@@ -0,0 +1,26 @@
<?php
use Illuminate\Database\Migrations\Migration;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\Schema;
return new class extends Migration
{
/**
* Run the migrations.
*/
public function up(): void
{
DB::statement('CREATE INDEX category_embeddings_embedding_hnsw_idx ON categories USING hnsw (embedding vector_cosine_ops)');
DB::statement('CREATE INDEX background_media_embeddings_embedding_hnsw_idx ON background_medias USING hnsw (embedding vector_cosine_ops)');
}
/**
* Reverse the migrations.
*/
public function down(): void
{
DB::statement('DROP INDEX IF EXISTS category_embeddings_embedding_hnsw_idx');
DB::statement('DROP INDEX IF EXISTS background_media_embeddings_embedding_hnsw_idx');
}
};

View File

@@ -15,3 +15,5 @@
Route::get('/aspectRatio', [TestController::class, 'aspectRatio']); Route::get('/aspectRatio', [TestController::class, 'aspectRatio']);
Route::get('/getMemeKeywords', [TestController::class, 'getMemeKeywords']); Route::get('/getMemeKeywords', [TestController::class, 'getMemeKeywords']);
Route::get('/getSuitableMemeMedia', [TestController::class, 'getSuitableMemeMedia']);