Update
This commit is contained in:
@@ -0,0 +1,184 @@
|
||||
<?php
|
||||
|
||||
namespace App\Helpers\FirstParty\Maintenance;
|
||||
|
||||
use App\Helpers\FirstParty\AI\CloudflareAI;
|
||||
use App\Models\Category;
|
||||
use App\Models\KeywordEmbedding;
|
||||
use App\Models\MemeMedia;
|
||||
use App\Models\MemeMediaEmbedding;
|
||||
use Exception;
|
||||
use Illuminate\Contracts\Filesystem\Cloud;
|
||||
use PhpParser\Lexer\TokenEmulator\KeywordEmulator;
|
||||
|
||||
class KeywordEmbeddingMaintenance
|
||||
{
|
||||
public static function populateEmbeddings()
|
||||
{
|
||||
self::populateMemeMediasKeywordsEmbeddings();
|
||||
}
|
||||
|
||||
public static function populateCategoryEmbeddings()
|
||||
{
|
||||
$categories = Category::whereNotNull('embedding')->get();
|
||||
|
||||
foreach ($categories as $category) {
|
||||
|
||||
$embedding_query = $category->name . " " . $category->description;
|
||||
|
||||
$keyword_embedding = KeywordEmbedding::where('keyword', $embedding_query)->first();
|
||||
|
||||
if (is_null($keyword_embedding)) {
|
||||
KeywordEmbedding::create([
|
||||
'keyword' => $embedding_query,
|
||||
'embedding' => $category->embedding,
|
||||
'tag' => 'category',
|
||||
]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static function populateCategoryKeywordEmbeddings()
|
||||
{
|
||||
$categories = Category::whereNotNull('keywords')->get();
|
||||
|
||||
foreach ($categories as $key => $category) {
|
||||
foreach ($category->keywords as $keyword) {
|
||||
|
||||
dump("Populating keyword embedding for {$keyword}");
|
||||
|
||||
$embedding = null;
|
||||
$max_retries = 3;
|
||||
$current_attempt = 0;
|
||||
|
||||
while ($embedding === null && $current_attempt < $max_retries) {
|
||||
try {
|
||||
$current_attempt++;
|
||||
$embedding = CloudflareAI::getVectorEmbeddingBgeSmall($keyword);
|
||||
} catch (Exception $e) {
|
||||
$embedding = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static function populateMemeMediasKeywordsEmbeddings()
|
||||
{
|
||||
$meme_medias = MemeMedia::whereNotNull('keywords')->get();
|
||||
|
||||
$max = $meme_medias->count();
|
||||
|
||||
foreach ($meme_medias as $key => $meme_media) {
|
||||
|
||||
$count = $key + 1;
|
||||
|
||||
dump("{Processing: {$count}/{$max}: {$meme_media->name}");
|
||||
|
||||
|
||||
// keywords:
|
||||
foreach ($meme_media->keywords as $keyword) {
|
||||
|
||||
dump("Populating keyword embedding for {$keyword}");
|
||||
|
||||
$embedding = self::fetchAndCacheEmbedding($keyword);
|
||||
|
||||
if ($embedding) {
|
||||
MemeMediaEmbedding::create([
|
||||
'meme_media_id' => $meme_media->id,
|
||||
'keyword' => $keyword,
|
||||
'embedding' => $embedding,
|
||||
'tag' => 'keywords',
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
// action_keywords:
|
||||
foreach ($meme_media->action_keywords as $keyword) {
|
||||
dump("Populating action keyword embedding for {$keyword}");
|
||||
$embedding = self::fetchAndCacheEmbedding($keyword);
|
||||
|
||||
if ($embedding) {
|
||||
MemeMediaEmbedding::create([
|
||||
'meme_media_id' => $meme_media->id,
|
||||
'keyword' => $keyword,
|
||||
'embedding' => $embedding,
|
||||
'tag' => 'action_keywords',
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
// emotion_keywords:
|
||||
foreach ($meme_media->emotion_keywords as $keyword) {
|
||||
dump("Populating emotion keyword embedding for {$keyword}");
|
||||
$embedding = self::fetchAndCacheEmbedding($keyword);
|
||||
|
||||
|
||||
if ($embedding) {
|
||||
MemeMediaEmbedding::create([
|
||||
'meme_media_id' => $meme_media->id,
|
||||
'keyword' => $keyword,
|
||||
'embedding' => $embedding,
|
||||
'tag' => 'emotion_keywords',
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
// misc_keywords:
|
||||
foreach ($meme_media->misc_keywords as $keyword) {
|
||||
dump("Populating misc keyword embedding for {$keyword}");
|
||||
$embedding = self::fetchAndCacheEmbedding($keyword);
|
||||
|
||||
if ($embedding) {
|
||||
MemeMediaEmbedding::create([
|
||||
'meme_media_id' => $meme_media->id,
|
||||
'keyword' => $keyword,
|
||||
'embedding' => $embedding,
|
||||
'tag' => 'misc_keywords',
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
dump("✓ Successfully processed meme media {$meme_media->id}");
|
||||
dump("");
|
||||
dump("");
|
||||
}
|
||||
}
|
||||
|
||||
public static function populateMemeMediaEmbeddings()
|
||||
{
|
||||
$meme_medias = MemeMedia::whereNotNull('embedding')->get();
|
||||
|
||||
foreach ($meme_medias as $meme_media) {
|
||||
$embedding_query = $meme_media->name . " " . $meme_media->description;
|
||||
|
||||
$keyword_embedding = KeywordEmbedding::where('keyword', $embedding_query)->first();
|
||||
|
||||
if (is_null($keyword_embedding)) {
|
||||
KeywordEmbedding::create([
|
||||
'keyword' => $embedding_query,
|
||||
'embedding' => $meme_media->embedding,
|
||||
'tag' => 'meme_media',
|
||||
]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static function fetchAndCacheEmbedding($keyword)
|
||||
{
|
||||
$embedding = null;
|
||||
$max_retries = 3;
|
||||
$current_attempt = 0;
|
||||
|
||||
|
||||
while ($embedding === null && $current_attempt < $max_retries) {
|
||||
$current_attempt++;
|
||||
try {
|
||||
$embedding = CloudflareAI::getVectorEmbeddingBgeSmall($keyword);
|
||||
} catch (Exception $e) {
|
||||
}
|
||||
}
|
||||
|
||||
return $embedding;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user