180 lines
5.6 KiB
PHP
180 lines
5.6 KiB
PHP
<?php
|
|
|
|
namespace App\Helpers\FirstParty\Maintenance;
|
|
|
|
use App\Helpers\FirstParty\AI\CloudflareAI;
|
|
use App\Models\Category;
|
|
use App\Models\KeywordEmbedding;
|
|
use App\Models\MemeMedia;
|
|
use App\Models\MemeMediaEmbedding;
|
|
use Exception;
|
|
|
|
class KeywordEmbeddingMaintenance
|
|
{
|
|
public static function populateEmbeddings()
|
|
{
|
|
self::populateMemeMediasKeywordsEmbeddings();
|
|
}
|
|
|
|
public static function populateCategoryEmbeddings()
|
|
{
|
|
$categories = Category::whereNotNull('embedding')->get();
|
|
|
|
foreach ($categories as $category) {
|
|
|
|
$embedding_query = $category->name.' '.$category->description;
|
|
|
|
$keyword_embedding = KeywordEmbedding::where('keyword', $embedding_query)->first();
|
|
|
|
if (is_null($keyword_embedding)) {
|
|
KeywordEmbedding::create([
|
|
'keyword' => $embedding_query,
|
|
'embedding' => $category->embedding,
|
|
'tag' => 'category',
|
|
]);
|
|
}
|
|
}
|
|
}
|
|
|
|
public static function populateCategoryKeywordEmbeddings()
|
|
{
|
|
$categories = Category::whereNotNull('keywords')->get();
|
|
|
|
foreach ($categories as $key => $category) {
|
|
foreach ($category->keywords as $keyword) {
|
|
|
|
dump("Populating keyword embedding for {$keyword}");
|
|
|
|
$embedding = null;
|
|
$max_retries = 3;
|
|
$current_attempt = 0;
|
|
|
|
while ($embedding === null && $current_attempt < $max_retries) {
|
|
try {
|
|
$current_attempt++;
|
|
$embedding = CloudflareAI::getVectorEmbeddingBgeSmall($keyword);
|
|
} catch (Exception $e) {
|
|
$embedding = null;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
public static function populateMemeMediasKeywordsEmbeddings()
|
|
{
|
|
$meme_medias = MemeMedia::whereNotNull('keywords')->get();
|
|
|
|
$max = $meme_medias->count();
|
|
|
|
foreach ($meme_medias as $key => $meme_media) {
|
|
|
|
$count = $key + 1;
|
|
|
|
dump("{Processing: {$count}/{$max}: {$meme_media->name}");
|
|
|
|
// keywords:
|
|
foreach ($meme_media->keywords as $keyword) {
|
|
|
|
dump("Populating keyword embedding for {$keyword}");
|
|
|
|
$embedding = self::fetchAndCacheEmbedding($keyword);
|
|
|
|
if ($embedding) {
|
|
MemeMediaEmbedding::create([
|
|
'meme_media_id' => $meme_media->id,
|
|
'keyword' => $keyword,
|
|
'embedding' => $embedding,
|
|
'tag' => 'keywords',
|
|
]);
|
|
}
|
|
}
|
|
|
|
// action_keywords:
|
|
foreach ($meme_media->action_keywords as $keyword) {
|
|
dump("Populating action keyword embedding for {$keyword}");
|
|
$embedding = self::fetchAndCacheEmbedding($keyword);
|
|
|
|
if ($embedding) {
|
|
MemeMediaEmbedding::create([
|
|
'meme_media_id' => $meme_media->id,
|
|
'keyword' => $keyword,
|
|
'embedding' => $embedding,
|
|
'tag' => 'action_keywords',
|
|
]);
|
|
}
|
|
}
|
|
|
|
// emotion_keywords:
|
|
foreach ($meme_media->emotion_keywords as $keyword) {
|
|
dump("Populating emotion keyword embedding for {$keyword}");
|
|
$embedding = self::fetchAndCacheEmbedding($keyword);
|
|
|
|
if ($embedding) {
|
|
MemeMediaEmbedding::create([
|
|
'meme_media_id' => $meme_media->id,
|
|
'keyword' => $keyword,
|
|
'embedding' => $embedding,
|
|
'tag' => 'emotion_keywords',
|
|
]);
|
|
}
|
|
}
|
|
|
|
// misc_keywords:
|
|
foreach ($meme_media->misc_keywords as $keyword) {
|
|
dump("Populating misc keyword embedding for {$keyword}");
|
|
$embedding = self::fetchAndCacheEmbedding($keyword);
|
|
|
|
if ($embedding) {
|
|
MemeMediaEmbedding::create([
|
|
'meme_media_id' => $meme_media->id,
|
|
'keyword' => $keyword,
|
|
'embedding' => $embedding,
|
|
'tag' => 'misc_keywords',
|
|
]);
|
|
}
|
|
}
|
|
|
|
dump("✓ Successfully processed meme media {$meme_media->id}");
|
|
dump('');
|
|
dump('');
|
|
}
|
|
}
|
|
|
|
public static function populateMemeMediaEmbeddings()
|
|
{
|
|
$meme_medias = MemeMedia::whereNotNull('embedding')->get();
|
|
|
|
foreach ($meme_medias as $meme_media) {
|
|
$embedding_query = $meme_media->name.' '.$meme_media->description;
|
|
|
|
$keyword_embedding = KeywordEmbedding::where('keyword', $embedding_query)->first();
|
|
|
|
if (is_null($keyword_embedding)) {
|
|
KeywordEmbedding::create([
|
|
'keyword' => $embedding_query,
|
|
'embedding' => $meme_media->embedding,
|
|
'tag' => 'meme_media',
|
|
]);
|
|
}
|
|
}
|
|
}
|
|
|
|
private static function fetchAndCacheEmbedding($keyword)
|
|
{
|
|
$embedding = null;
|
|
$max_retries = 3;
|
|
$current_attempt = 0;
|
|
|
|
while ($embedding === null && $current_attempt < $max_retries) {
|
|
$current_attempt++;
|
|
try {
|
|
$embedding = CloudflareAI::getVectorEmbeddingBgeSmall($keyword);
|
|
} catch (Exception $e) {
|
|
}
|
|
}
|
|
|
|
return $embedding;
|
|
}
|
|
}
|