This commit is contained in:
ct
2025-06-19 13:16:17 +08:00
parent 4545fd64c6
commit 5d3a3c8818
19 changed files with 854 additions and 108 deletions

View File

@@ -0,0 +1,230 @@
<?php
namespace Database\Seeders;
use App\Helpers\FirstParty\AI\CloudflareAI;
use Illuminate\Database\Console\Seeds\WithoutModelEvents;
use Illuminate\Database\Seeder;
use App\Models\Category;
use Illuminate\Support\Facades\File;
use Illuminate\Support\Facades\Log;
class CategorySeeder extends Seeder
{
/**
* Run the database seeds.
*/
public function run(): void
{
// Path to the JSON files
$jsonPath = database_path('seeders/data/json/category');
// Check if directory exists
if (!File::exists($jsonPath)) {
$this->command->error("JSON directory not found: {$jsonPath}");
return;
}
// Get all JSON files except the schema file
$jsonFiles = File::glob($jsonPath . '/*.json');
$jsonFiles = array_filter($jsonFiles, function ($file) {
return !str_contains(basename($file), 'schema');
});
$this->command->info('Starting to seed categories from JSON files...');
$this->command->info('Found ' . count($jsonFiles) . ' JSON files to process.');
foreach ($jsonFiles as $jsonFile) {
$this->processJsonFile($jsonFile);
}
$this->command->info('Category seeding completed successfully!');
}
/**
* Process a single JSON file
*/
private function processJsonFile(string $filePath): void
{
$fileName = basename($filePath);
$this->command->info("Processing: {$fileName}");
try {
// Read and decode JSON content
$jsonContent = File::get($filePath);
$data = json_decode($jsonContent, true);
if (json_last_error() !== JSON_ERROR_NONE) {
$this->command->error("Invalid JSON in file: {$fileName} - " . json_last_error_msg());
return;
}
// Validate JSON structure
if (!isset($data['category'])) {
$this->command->error("Missing 'category' key in file: {$fileName}");
return;
}
$categoryData = $data['category'];
// Validate required fields
if (!isset($categoryData['name']) || !isset($categoryData['description'])) {
$this->command->error("Missing required fields (name/description) in file: {$fileName}");
return;
}
// Create main category
$mainCategory = $this->createMainCategory($categoryData, $data);
if (!$mainCategory) {
$this->command->error("Failed to create main category for file: {$fileName}");
return;
}
// Create subcategories
if (isset($categoryData['subcategories']) && is_array($categoryData['subcategories'])) {
foreach ($categoryData['subcategories'] as $index => $subcategoryData) {
if (!$this->createSubcategory($subcategoryData, $mainCategory, $data, $index)) {
$this->command->warn("Failed to create subcategory at index {$index} for file: {$fileName}");
}
}
}
$this->command->info("✓ Successfully processed: {$fileName}");
} catch (\Exception $e) {
$this->command->error("Error processing {$fileName}: " . $e->getMessage());
Log::error("CategorySeeder error for {$fileName}", [
'error' => $e->getMessage(),
'trace' => $e->getTraceAsString()
]);
}
}
/**
* Create a main category
*/
private function createMainCategory(array $categoryData, array $originalData): ?Category
{
try {
// Check if category already exists
$existingCategory = Category::where('name', $categoryData['name'])
->whereNull('parent_id')
->first();
if ($existingCategory) {
$this->command->warn("Main category '{$categoryData['name']}' already exists. Skipping...");
return $existingCategory;
}
// Create the main category
$category = Category::create([
'is_enabled' => true,
'name' => $categoryData['name'],
'description' => $categoryData['description'],
'subcategories' => $categoryData['subcategories'] ?? null,
'keywords' => $categoryData['keywords'] ?? null,
'meme_angles' => null, // Main categories don't have meme_angles
'sample_captions' => null, // Main categories don't have sample_captions
'payload' => $originalData,
'embedding' => CloudflareAI::getVectorEmbeddingBgeSmall($categoryData['name'] . " " . $categoryData['description']),
]);
// Add keywords as tags
if (isset($categoryData['keywords']) && is_array($categoryData['keywords'])) {
$this->attachKeywordsAsTags($category, $categoryData['keywords']);
}
$this->command->line(" ✓ Created main category: {$category->name}");
return $category;
} catch (\Exception $e) {
$this->command->error("Error creating main category: " . $e->getMessage());
Log::error("Error creating main category", [
'category_data' => $categoryData,
'error' => $e->getMessage()
]);
return null;
}
}
/**
* Create a subcategory
*/
private function createSubcategory(array $subcategoryData, Category $parentCategory, array $originalData, int $index): ?Category
{
try {
// Validate required subcategory fields
if (!isset($subcategoryData['name']) || !isset($subcategoryData['description'])) {
$this->command->warn("Subcategory at index {$index} missing required fields (name/description). Skipping...");
return null;
}
// Check if subcategory already exists
$existingSubcategory = Category::where('name', $subcategoryData['name'])
->where('parent_id', $parentCategory->id)
->first();
if ($existingSubcategory) {
$this->command->warn(" Subcategory '{$subcategoryData['name']}' already exists. Skipping...");
return $existingSubcategory;
}
// Create subcategory payload
$subcategoryPayload = [
'subcategory' => $subcategoryData,
'parent_category' => [
'name' => $parentCategory->name,
'description' => $parentCategory->description
]
];
// Create the subcategory using the correct nested set method
$subcategory = Category::create([
'is_enabled' => false,
'name' => $subcategoryData['name'],
'description' => $subcategoryData['description'],
'meme_angles' => $subcategoryData['meme_angles'] ?? null,
'sample_captions' => $subcategoryData['sample_captions'] ?? null,
'keywords' => $subcategoryData['keywords'] ?? null,
'subcategories' => null, // Subcategories don't have subcategories
'payload' => $subcategoryPayload,
'parent_id' => $parentCategory->id, // Set parent_id directly
'embedding' => CloudflareAI::getVectorEmbeddingBgeSmall($subcategoryData['name'] . " " . $subcategoryData['description']),
]);
// Add keywords as tags
if (isset($subcategoryData['keywords']) && is_array($subcategoryData['keywords'])) {
$this->attachKeywordsAsTags($subcategory, $subcategoryData['keywords']);
}
$this->command->line(" ✓ Created subcategory: {$subcategory->name}");
return $subcategory;
} catch (\Exception $e) {
$this->command->error("Error creating subcategory at index {$index}: " . $e->getMessage());
Log::error("Error creating subcategory", [
'subcategory_data' => $subcategoryData,
'parent_id' => $parentCategory->id,
'error' => $e->getMessage()
]);
return null;
}
}
/**
* Attach keywords as tags to a category
*/
private function attachKeywordsAsTags(Category $category, array $keywords): void
{
try {
$category->attachTags($keywords, 'category');
} catch (\Exception $e) {
$this->command->warn("Failed to attach tags to category '{$category->name}': " . $e->getMessage());
Log::warning("Failed to attach tags", [
'category_id' => $category->id,
'keywords' => $keywords,
'error' => $e->getMessage()
]);
}
}
}

View File

@@ -20,10 +20,12 @@
namespace Database\Seeders;
use App\Helpers\FirstParty\AI\CloudflareAI;
use App\Helpers\FirstParty\Maintenance\MemeMediaMaintenance;
use App\Helpers\FirstParty\MediaEngine\MediaEngine;
use App\Models\MediaCollection;
use App\Models\MemeMedia;
use Illuminate\Database\Seeder;
use Log;
class MemeMediaSeeder extends Seeder
{
@@ -50,7 +52,7 @@ public function run(): void
$csv_path = database_path('seeders/data/webm_metadata.csv');
$meme_data = $this->parseCsvFile($csv_path);
$this->command->info('📊 Found '.count($meme_data).' memes to import');
$this->command->info('📊 Found ' . count($meme_data) . ' memes to import');
// Process records individually for PostgreSQL compatibility
$total_processed = 0;
@@ -58,7 +60,11 @@ public function run(): void
$total_failed = 0;
foreach ($meme_data as $index => $meme_record) {
$this->command->info('Processing '.($index + 1).'/'.count($meme_data).': '.$meme_record['filename']);
$this->command->info('Processing ' . ($index + 1) . '/' . count($meme_data) . ': ' . $meme_record['filename']);
$meme_record['keywords'] = $this->stringToCleanArray($meme_record['keywords']);
try {
// Check for duplicates OUTSIDE of transaction
@@ -146,6 +152,18 @@ private function parseCsvFile(string $csv_path): array
return $meme_data;
}
private function stringToCleanArray($string)
{
// Split by comma, clean each element, and filter empty ones
return array_filter(array_map(function ($item) {
$item = trim($item); // Remove whitespace
$item = preg_replace('/[^\w\s]/', '', $item); // Remove punctuation
return trim(preg_replace('/\s+/', ' ', $item)); // Clean extra spaces
}, explode(',', $string)), function ($value) {
return $value !== '';
});
}
/**
* Import a single meme with all its formats
*/
@@ -174,13 +192,13 @@ private function importSingleMeme(array $meme_record): bool
'save_url', // Mode: just save URL reference
null, // Auto-generate filename
'r2', // Disk (not used for URL mode)
trim($meme_record['name'])." ({$format})", // Name with format
trim($meme_record['name']) . " ({$format})", // Name with format
null, // No specific user
$config['mime'] // MIME type
);
$media_uuids[$format.'_uuid'] = $media->uuid;
$media_urls[$format.'_url'] = $url;
$media_uuids[$format . '_uuid'] = $media->uuid;
$media_urls[$format . '_url'] = $url;
} catch (\Exception $e) {
$this->command->error("Failed to create {$format} media for {$meme_record['filename']}: {$e->getMessage()}");
throw $e;
@@ -190,7 +208,7 @@ private function importSingleMeme(array $meme_record): bool
// Generate embedding
try {
$embedding = CloudflareAI::getVectorEmbeddingBgeSmall(
$meme_record['name'].' '.$meme_record['description'].' '.$meme_record['keywords']
$meme_record['name'] . ' ' . $meme_record['description'] . ' ' . implode(' ', $meme_record['keywords'])
);
} catch (\Exception $e) {
$this->command->warn("Failed to generate embedding for {$meme_record['filename']}: {$e->getMessage()}");
@@ -209,7 +227,8 @@ private function importSingleMeme(array $meme_record): bool
}
// Create MemeMedia record
MemeMedia::create([
$meme_media = MemeMedia::create([
'is_enabled' => true,
'original_id' => $meme_record['filename'],
'type' => $meme_record['type'],
'sub_type' => $meme_record['sub_type'],
@@ -228,12 +247,16 @@ private function importSingleMeme(array $meme_record): bool
'webm_url' => $media_urls['webm_url'],
'gif_url' => $media_urls['gif_url'],
'webp_url' => $media_urls['webp_url'],
// Embedding (may be null)
'embedding' => $embedding,
]);
$this->command->info('✅ Imported: '.trim($meme_record['name']));
$meme_media->duration = MemeMediaMaintenance::getDurationUsingFfmpeg($meme_media);
$meme_media->embedding = $embedding;
$meme_media->save();
// Add keywords as tags
$this->attachKeywordsAsTags($meme_media, $meme_record['keywords']);
$this->command->info('✅ Imported: ' . trim($meme_record['name']));
return true;
} catch (\Exception $e) {
@@ -242,12 +265,26 @@ private function importSingleMeme(array $meme_record): bool
}
}
private function attachKeywordsAsTags(MemeMedia $meme_media, array $keywords): void
{
try {
$meme_media->attachTags($keywords, 'meme');
} catch (\Exception $e) {
$this->command->warn("Failed to attach tags to meme media '{$meme_media->name}': " . $e->getMessage());
Log::warning("Failed to attach tags", [
'category_id' => $meme_media->id,
'keywords' => $keywords,
'error' => $e->getMessage()
]);
}
}
/**
* Generate CDN URL for specific format
*/
private function generateCdnUrl(string $base_filename, string $extension): string
{
return self::CDN_BASE_URL."/{$extension}/{$base_filename}.{$extension}";
return self::CDN_BASE_URL . "/{$extension}/{$base_filename}.{$extension}";
}
/**