This commit is contained in:
ct
2025-06-20 20:16:34 +08:00
parent b502120091
commit 8f6fb3787a
14 changed files with 493 additions and 22 deletions

View File

@@ -2,6 +2,7 @@
namespace App\Helpers\FirstParty\AI;
use App\Models\KeywordEmbedding;
use Exception;
use Illuminate\Support\Facades\Http;
use Pgvector\Laravel\Vector;
@@ -16,10 +17,18 @@ public static function getVectorEmbeddingBgeSmall($embedding_query)
throw new Exception('Empty embedding query.');
}
$keyword_embedding = KeywordEmbedding::where('keyword', $embedding_query)->first();
if (! is_null($keyword_embedding)) {
return $keyword_embedding->embedding;
}
$maxRetries = 3;
$currentAttempt = 0;
while ($currentAttempt < $maxRetries) {
$embedding = null;
while ($currentAttempt < $maxRetries && $embedding === null) {
try {
// Use the new API endpoint
$response = Http::withHeaders([])->withOptions(['verify' => (app()->environment() == 'local') ? false : true])->timeout(800)
@@ -37,7 +46,17 @@ public static function getVectorEmbeddingBgeSmall($embedding_query)
$embedding_response = json_decode($response->body(), true);
try {
return new Vector($embedding_response['response']['data'][0]);
$embedding = new Vector($embedding_response['response']['data'][0]);
// dump($embedding);
// dump($embedding_query);
KeywordEmbedding::create([
'keyword' => $embedding_query,
'embedding' => $embedding
]);
break;
} catch (Exception $e) {
throw new Exception('Embedding response failed, null response');
}

View File

@@ -0,0 +1,184 @@
<?php
namespace App\Helpers\FirstParty\Maintenance;
use App\Helpers\FirstParty\AI\CloudflareAI;
use App\Models\Category;
use App\Models\KeywordEmbedding;
use App\Models\MemeMedia;
use App\Models\MemeMediaEmbedding;
use Exception;
use Illuminate\Contracts\Filesystem\Cloud;
use PhpParser\Lexer\TokenEmulator\KeywordEmulator;
class KeywordEmbeddingMaintenance
{
public static function populateEmbeddings()
{
self::populateMemeMediasKeywordsEmbeddings();
}
public static function populateCategoryEmbeddings()
{
$categories = Category::whereNotNull('embedding')->get();
foreach ($categories as $category) {
$embedding_query = $category->name . " " . $category->description;
$keyword_embedding = KeywordEmbedding::where('keyword', $embedding_query)->first();
if (is_null($keyword_embedding)) {
KeywordEmbedding::create([
'keyword' => $embedding_query,
'embedding' => $category->embedding,
'tag' => 'category',
]);
}
}
}
public static function populateCategoryKeywordEmbeddings()
{
$categories = Category::whereNotNull('keywords')->get();
foreach ($categories as $key => $category) {
foreach ($category->keywords as $keyword) {
dump("Populating keyword embedding for {$keyword}");
$embedding = null;
$max_retries = 3;
$current_attempt = 0;
while ($embedding === null && $current_attempt < $max_retries) {
try {
$current_attempt++;
$embedding = CloudflareAI::getVectorEmbeddingBgeSmall($keyword);
} catch (Exception $e) {
$embedding = null;
}
}
}
}
}
public static function populateMemeMediasKeywordsEmbeddings()
{
$meme_medias = MemeMedia::whereNotNull('keywords')->get();
$max = $meme_medias->count();
foreach ($meme_medias as $key => $meme_media) {
$count = $key + 1;
dump("{Processing: {$count}/{$max}: {$meme_media->name}");
// keywords:
foreach ($meme_media->keywords as $keyword) {
dump("Populating keyword embedding for {$keyword}");
$embedding = self::fetchAndCacheEmbedding($keyword);
if ($embedding) {
MemeMediaEmbedding::create([
'meme_media_id' => $meme_media->id,
'keyword' => $keyword,
'embedding' => $embedding,
'tag' => 'keywords',
]);
}
}
// action_keywords:
foreach ($meme_media->action_keywords as $keyword) {
dump("Populating action keyword embedding for {$keyword}");
$embedding = self::fetchAndCacheEmbedding($keyword);
if ($embedding) {
MemeMediaEmbedding::create([
'meme_media_id' => $meme_media->id,
'keyword' => $keyword,
'embedding' => $embedding,
'tag' => 'action_keywords',
]);
}
}
// emotion_keywords:
foreach ($meme_media->emotion_keywords as $keyword) {
dump("Populating emotion keyword embedding for {$keyword}");
$embedding = self::fetchAndCacheEmbedding($keyword);
if ($embedding) {
MemeMediaEmbedding::create([
'meme_media_id' => $meme_media->id,
'keyword' => $keyword,
'embedding' => $embedding,
'tag' => 'emotion_keywords',
]);
}
}
// misc_keywords:
foreach ($meme_media->misc_keywords as $keyword) {
dump("Populating misc keyword embedding for {$keyword}");
$embedding = self::fetchAndCacheEmbedding($keyword);
if ($embedding) {
MemeMediaEmbedding::create([
'meme_media_id' => $meme_media->id,
'keyword' => $keyword,
'embedding' => $embedding,
'tag' => 'misc_keywords',
]);
}
}
dump("✓ Successfully processed meme media {$meme_media->id}");
dump("");
dump("");
}
}
public static function populateMemeMediaEmbeddings()
{
$meme_medias = MemeMedia::whereNotNull('embedding')->get();
foreach ($meme_medias as $meme_media) {
$embedding_query = $meme_media->name . " " . $meme_media->description;
$keyword_embedding = KeywordEmbedding::where('keyword', $embedding_query)->first();
if (is_null($keyword_embedding)) {
KeywordEmbedding::create([
'keyword' => $embedding_query,
'embedding' => $meme_media->embedding,
'tag' => 'meme_media',
]);
}
}
}
private static function fetchAndCacheEmbedding($keyword)
{
$embedding = null;
$max_retries = 3;
$current_attempt = 0;
while ($embedding === null && $current_attempt < $max_retries) {
$current_attempt++;
try {
$embedding = CloudflareAI::getVectorEmbeddingBgeSmall($keyword);
} catch (Exception $e) {
}
}
return $embedding;
}
}

View File

@@ -11,6 +11,7 @@
use App\Models\Category;
use App\Models\Meme;
use App\Models\MemeMedia;
use App\Models\MemeMediaEmbedding;
use Pgvector\Laravel\Distance;
use Str;
@@ -22,10 +23,46 @@ class MemeGenerator
const STATUS_COMPLETED = 'completed';
public static function getSuitableMeme(Meme $meme)
public static function getSuitableMemeMedia(Meme $meme)
{
//dd($meme->toArray());
return MemeMedia::first();
$meme_media = null;
$primary_keyword_type = $meme->primary_keyword_type;
if ($primary_keyword_type == 'action') {
$meme_media = self::getMemeMediaByKeywords($meme->action_keywords, 2, 'action_keywords');
if (is_null($meme_media)) {
$keywords = array_merge($meme->emotion_keywords, $meme->misc_keywords, $meme->keywords);
$meme_media = self::getMemeMediaByKeywords($keywords, 2);
}
} else if ($primary_keyword_type == 'emotion') {
$meme_media = self::getMemeMediaByKeywords($meme->emotion_keywords, 2, 'emotion_keywords');
if (is_null($meme_media)) {
$keywords = array_merge($meme->action_keywords, $meme->misc_keywords, $meme->keywords);
$meme_media = self::getMemeMediaByKeywords($keywords, 2);
}
} else if ($primary_keyword_type == 'misc') {
$meme_media = self::getMemeMediaByKeywords($meme->misc_keywords, 2, 'misc_keywords');
if (is_null($meme_media)) {
$keywords = array_merge($meme->action_keywords, $meme->emotion_keywords, $meme->keywords);
$meme_media = self::getMemeMediaByKeywords($keywords, 2);
}
}
if (is_null($meme_media)) {
$meme_media = MemeMedia::query()->inRandomOrder()->first();
}
return $meme_media;
}
public static function generateMemeByCategory(Category $category)
@@ -56,7 +93,7 @@ public static function generateMemeByCategory(Category $category)
if (! is_null($meme) && $meme->status == self::STATUS_PENDING) {
// populate meme_id
$meme->meme_id = null; // self::getMemeMediaByKeywords($meme_output->keywords)->id;
$meme->meme_id = self::getSuitableMemeMedia($meme)->id;
$meme->background_id = self::generateBackgroundMediaWithRunware($meme_output->background)->id;
if (
@@ -172,24 +209,23 @@ public static function generateBackgroundMediaWithRunware($prompt)
return $background_media;
}
public static function getMemeMediaByKeywords(array $keywords)
public static function getMemeMediaByKeywords(array $keywords, int $tolerance = 10, ?string $tag = null)
{
$meme_media = null;
$meme_medias = MemeMedia::withAnyTags($keywords)->take(10)->get();
$meme_embedding = CloudflareAI::getVectorEmbeddingBgeSmall(implode(' ', $keywords));
$meme_medias = MemeMediaEmbedding::query()
->when(!is_empty($tag), function ($query) use ($tag) {
return $query->where('tag', $tag);
})
->nearestNeighbors('embedding', $meme_embedding, Distance::L2)
->take($tolerance)
->get();
if ($meme_medias->count() > 0) {
$meme_media = $meme_medias->random();
}
if (is_null($meme_media)) {
$meme_embedding = CloudflareAI::getVectorEmbeddingBgeSmall(implode(' ', $keywords));
$meme_medias = MemeMedia::query()->nearestNeighbors('embedding', $meme_embedding, Distance::L2)->take(10)->get();
if ($meme_medias->count() > 0) {
$meme_media = $meme_medias->random();
}
$meme_media = $meme_medias->random()->meme_media;
}
return $meme_media;