Files
memefast/app/Helpers/FirstParty/Maintenance/KeywordEmbeddingMaintenance.php
2025-06-20 20:16:34 +08:00

185 lines
5.7 KiB
PHP

<?php
namespace App\Helpers\FirstParty\Maintenance;
use App\Helpers\FirstParty\AI\CloudflareAI;
use App\Models\Category;
use App\Models\KeywordEmbedding;
use App\Models\MemeMedia;
use App\Models\MemeMediaEmbedding;
use Exception;
use Illuminate\Contracts\Filesystem\Cloud;
use PhpParser\Lexer\TokenEmulator\KeywordEmulator;
class KeywordEmbeddingMaintenance
{
public static function populateEmbeddings()
{
self::populateMemeMediasKeywordsEmbeddings();
}
public static function populateCategoryEmbeddings()
{
$categories = Category::whereNotNull('embedding')->get();
foreach ($categories as $category) {
$embedding_query = $category->name . " " . $category->description;
$keyword_embedding = KeywordEmbedding::where('keyword', $embedding_query)->first();
if (is_null($keyword_embedding)) {
KeywordEmbedding::create([
'keyword' => $embedding_query,
'embedding' => $category->embedding,
'tag' => 'category',
]);
}
}
}
public static function populateCategoryKeywordEmbeddings()
{
$categories = Category::whereNotNull('keywords')->get();
foreach ($categories as $key => $category) {
foreach ($category->keywords as $keyword) {
dump("Populating keyword embedding for {$keyword}");
$embedding = null;
$max_retries = 3;
$current_attempt = 0;
while ($embedding === null && $current_attempt < $max_retries) {
try {
$current_attempt++;
$embedding = CloudflareAI::getVectorEmbeddingBgeSmall($keyword);
} catch (Exception $e) {
$embedding = null;
}
}
}
}
}
public static function populateMemeMediasKeywordsEmbeddings()
{
$meme_medias = MemeMedia::whereNotNull('keywords')->get();
$max = $meme_medias->count();
foreach ($meme_medias as $key => $meme_media) {
$count = $key + 1;
dump("{Processing: {$count}/{$max}: {$meme_media->name}");
// keywords:
foreach ($meme_media->keywords as $keyword) {
dump("Populating keyword embedding for {$keyword}");
$embedding = self::fetchAndCacheEmbedding($keyword);
if ($embedding) {
MemeMediaEmbedding::create([
'meme_media_id' => $meme_media->id,
'keyword' => $keyword,
'embedding' => $embedding,
'tag' => 'keywords',
]);
}
}
// action_keywords:
foreach ($meme_media->action_keywords as $keyword) {
dump("Populating action keyword embedding for {$keyword}");
$embedding = self::fetchAndCacheEmbedding($keyword);
if ($embedding) {
MemeMediaEmbedding::create([
'meme_media_id' => $meme_media->id,
'keyword' => $keyword,
'embedding' => $embedding,
'tag' => 'action_keywords',
]);
}
}
// emotion_keywords:
foreach ($meme_media->emotion_keywords as $keyword) {
dump("Populating emotion keyword embedding for {$keyword}");
$embedding = self::fetchAndCacheEmbedding($keyword);
if ($embedding) {
MemeMediaEmbedding::create([
'meme_media_id' => $meme_media->id,
'keyword' => $keyword,
'embedding' => $embedding,
'tag' => 'emotion_keywords',
]);
}
}
// misc_keywords:
foreach ($meme_media->misc_keywords as $keyword) {
dump("Populating misc keyword embedding for {$keyword}");
$embedding = self::fetchAndCacheEmbedding($keyword);
if ($embedding) {
MemeMediaEmbedding::create([
'meme_media_id' => $meme_media->id,
'keyword' => $keyword,
'embedding' => $embedding,
'tag' => 'misc_keywords',
]);
}
}
dump("✓ Successfully processed meme media {$meme_media->id}");
dump("");
dump("");
}
}
public static function populateMemeMediaEmbeddings()
{
$meme_medias = MemeMedia::whereNotNull('embedding')->get();
foreach ($meme_medias as $meme_media) {
$embedding_query = $meme_media->name . " " . $meme_media->description;
$keyword_embedding = KeywordEmbedding::where('keyword', $embedding_query)->first();
if (is_null($keyword_embedding)) {
KeywordEmbedding::create([
'keyword' => $embedding_query,
'embedding' => $meme_media->embedding,
'tag' => 'meme_media',
]);
}
}
}
private static function fetchAndCacheEmbedding($keyword)
{
$embedding = null;
$max_retries = 3;
$current_attempt = 0;
while ($embedding === null && $current_attempt < $max_retries) {
$current_attempt++;
try {
$embedding = CloudflareAI::getVectorEmbeddingBgeSmall($keyword);
} catch (Exception $e) {
}
}
return $embedding;
}
}