306 lines
11 KiB
PHP
306 lines
11 KiB
PHP
<?php
|
|
|
|
/**
|
|
* MemeMediaSeeder - Simple Laravel Seeder
|
|
*
|
|
* Usage: php artisan db:seed --class=MemeMediaSeeder
|
|
*
|
|
* Setup:
|
|
* 1. Place webm_metadata.csv in: database/seeders/data/webm_metadata.csv
|
|
* 2. Run: php artisan db:seed --class=MemeMediaSeeder
|
|
*
|
|
* What it does:
|
|
* - Reads 223 meme records from CSV
|
|
* - Creates 4 MediaEngine entries per meme (webm, mov, webp, gif)
|
|
* - Creates MemeMedia records with all UUIDs and URLs
|
|
* - Uses save_url mode for fast CDN references
|
|
* - Processes each record individually without transactions
|
|
*/
|
|
|
|
namespace Database\Seeders;
|
|
|
|
use App\Helpers\FirstParty\AI\CloudflareAI;
|
|
use App\Helpers\FirstParty\Maintenance\MemeMediaMaintenance;
|
|
use App\Helpers\FirstParty\MediaEngine\MediaEngine;
|
|
use App\Models\MediaCollection;
|
|
use App\Models\MemeMedia;
|
|
use Illuminate\Database\Seeder;
|
|
use Log;
|
|
|
|
class MemeMediaSeeder extends Seeder
|
|
{
|
|
private const CDN_BASE_URL = 'https://cdn.memeaigen.com/g1';
|
|
|
|
private const FORMATS = [
|
|
'webm' => ['ext' => 'webm', 'mime' => 'video/webm'],
|
|
'mov' => ['ext' => 'mov', 'mime' => 'video/quicktime'],
|
|
'webp' => ['ext' => 'webp', 'mime' => 'image/webp'],
|
|
'gif' => ['ext' => 'gif', 'mime' => 'image/gif'],
|
|
];
|
|
|
|
/**
|
|
* Run the database seeds.
|
|
*/
|
|
public function run(): void
|
|
{
|
|
$this->command->info('🚀 Starting meme media import...');
|
|
|
|
// Ensure media collection exists
|
|
$this->ensureMediaCollectionExists();
|
|
|
|
// Read CSV file
|
|
$csv_path = database_path('seeders/data/webm_metadata.csv');
|
|
$meme_data = $this->parseCsvFile($csv_path);
|
|
|
|
$this->command->info('📊 Found ' . count($meme_data) . ' memes to import');
|
|
|
|
// Process records individually for PostgreSQL compatibility
|
|
$total_processed = 0;
|
|
$total_skipped = 0;
|
|
$total_failed = 0;
|
|
|
|
foreach ($meme_data as $index => $meme_record) {
|
|
$this->command->info('Processing ' . ($index + 1) . '/' . count($meme_data) . ': ' . $meme_record['filename']);
|
|
|
|
|
|
$meme_record['keywords'] = $this->stringToCleanArray($meme_record['keywords']);
|
|
|
|
|
|
try {
|
|
// Check for duplicates OUTSIDE of transaction
|
|
$base_filename = pathinfo($meme_record['filename'], PATHINFO_FILENAME);
|
|
|
|
if ($this->isDuplicate($base_filename)) {
|
|
$this->command->warn("⏭️ Skipping existing: {$meme_record['filename']} ({$meme_record['name']})");
|
|
$total_skipped++;
|
|
|
|
continue;
|
|
}
|
|
|
|
// Process single meme
|
|
$result = $this->importSingleMeme($meme_record);
|
|
|
|
if ($result) {
|
|
$total_processed++;
|
|
if ($total_processed % 10 === 0) {
|
|
$this->command->info("✅ Processed {$total_processed} memes...");
|
|
}
|
|
} else {
|
|
$total_failed++;
|
|
}
|
|
} catch (\Exception $e) {
|
|
$total_failed++;
|
|
$this->command->error("❌ Failed to import: {$meme_record['filename']} - {$e->getMessage()}");
|
|
}
|
|
}
|
|
|
|
// Summary
|
|
$this->command->info("\n🎯 Import Summary:");
|
|
$this->command->info("✅ Successfully imported: {$total_processed} memes");
|
|
if ($total_skipped > 0) {
|
|
$this->command->info("⏭️ Skipped (existing): {$total_skipped} memes");
|
|
}
|
|
if ($total_failed > 0) {
|
|
$this->command->error("❌ Failed: {$total_failed} memes");
|
|
}
|
|
|
|
$total_media_records = $total_processed * 4;
|
|
$this->command->info("📊 Created {$total_media_records} media records and {$total_processed} meme_media records");
|
|
}
|
|
|
|
/**
|
|
* Check if meme already exists (outside transaction)
|
|
*/
|
|
private function isDuplicate(string $base_filename): bool
|
|
{
|
|
try {
|
|
return MemeMedia::where('webm_url', 'like', "%/{$base_filename}.webm")->exists();
|
|
} catch (\Exception $e) {
|
|
$this->command->warn("⚠️ Could not check duplicate for {$base_filename}: {$e->getMessage()}");
|
|
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parse CSV file and return array of meme data
|
|
*/
|
|
private function parseCsvFile(string $csv_path): array
|
|
{
|
|
if (! file_exists($csv_path)) {
|
|
throw new \RuntimeException("CSV file not found: {$csv_path}");
|
|
}
|
|
|
|
$csv_content = file_get_contents($csv_path);
|
|
$lines = str_getcsv($csv_content, "\n");
|
|
|
|
// Parse header row
|
|
$headers = str_getcsv(array_shift($lines));
|
|
|
|
$meme_data = [];
|
|
foreach ($lines as $line) {
|
|
if (empty(trim($line))) {
|
|
continue;
|
|
}
|
|
|
|
$row = str_getcsv($line);
|
|
if (count($row) === count($headers)) {
|
|
$meme_data[] = array_combine($headers, $row);
|
|
}
|
|
}
|
|
|
|
return $meme_data;
|
|
}
|
|
|
|
private function stringToCleanArray($string)
|
|
{
|
|
// Split by comma, clean each element, and filter empty ones
|
|
return array_filter(array_map(function ($item) {
|
|
$item = trim($item); // Remove whitespace
|
|
$item = preg_replace('/[^\w\s]/', '', $item); // Remove punctuation
|
|
return trim(preg_replace('/\s+/', ' ', $item)); // Clean extra spaces
|
|
}, explode(',', $string)), function ($value) {
|
|
return $value !== '';
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Import a single meme with all its formats
|
|
*/
|
|
private function importSingleMeme(array $meme_record): bool
|
|
{
|
|
try {
|
|
// Extract base filename (remove .webm extension)
|
|
$base_filename = pathinfo($meme_record['filename'], PATHINFO_FILENAME);
|
|
|
|
$media_uuids = [];
|
|
$media_urls = [];
|
|
|
|
// Create MediaEngine entries for each format
|
|
foreach (self::FORMATS as $format => $config) {
|
|
$url = $this->generateCdnUrl($base_filename, $config['ext']);
|
|
|
|
try {
|
|
// Create media entry using save_url mode
|
|
$media = MediaEngine::addMedia(
|
|
'temps', // Media collection key
|
|
$meme_record['type'], // video or image
|
|
'system_uploaded', // Media source
|
|
'meme_cdn', // Media provider
|
|
null, // No file content
|
|
$url, // CDN URL
|
|
'save_url', // Mode: just save URL reference
|
|
null, // Auto-generate filename
|
|
'r2', // Disk (not used for URL mode)
|
|
trim($meme_record['name']) . " ({$format})", // Name with format
|
|
null, // No specific user
|
|
$config['mime'] // MIME type
|
|
);
|
|
|
|
$media_uuids[$format . '_uuid'] = $media->uuid;
|
|
$media_urls[$format . '_url'] = $url;
|
|
} catch (\Exception $e) {
|
|
$this->command->error("Failed to create {$format} media for {$meme_record['filename']}: {$e->getMessage()}");
|
|
throw $e;
|
|
}
|
|
}
|
|
|
|
// Generate embedding
|
|
try {
|
|
$embedding = CloudflareAI::getVectorEmbeddingBgeSmall(
|
|
$meme_record['name'] . ' ' . $meme_record['description'] . ' ' . implode(' ', $meme_record['keywords'])
|
|
);
|
|
} catch (\Exception $e) {
|
|
$this->command->warn("Failed to generate embedding for {$meme_record['filename']}: {$e->getMessage()}");
|
|
$embedding = null; // Continue without embedding
|
|
}
|
|
|
|
// dd($embedding);
|
|
|
|
// Check if record exists one more time within transaction
|
|
$existing_meme = MemeMedia::where('original_id', $meme_record['filename'])->first();
|
|
|
|
if ($existing_meme) {
|
|
$this->command->warn("Record already exists for {$meme_record['filename']}, skipping...");
|
|
|
|
return false;
|
|
}
|
|
|
|
// Create MemeMedia record
|
|
$meme_media = MemeMedia::create([
|
|
'is_enabled' => true,
|
|
'original_id' => $meme_record['filename'],
|
|
'type' => $meme_record['type'],
|
|
'sub_type' => $meme_record['sub_type'],
|
|
'name' => trim($meme_record['name']),
|
|
'description' => $meme_record['description'],
|
|
'keywords' => $meme_record['keywords'],
|
|
|
|
// UUIDs from MediaEngine
|
|
'mov_uuid' => $media_uuids['mov_uuid'],
|
|
'webm_uuid' => $media_uuids['webm_uuid'],
|
|
'gif_uuid' => $media_uuids['gif_uuid'],
|
|
'webp_uuid' => $media_uuids['webp_uuid'],
|
|
|
|
// Direct CDN URLs
|
|
'mov_url' => $media_urls['mov_url'],
|
|
'webm_url' => $media_urls['webm_url'],
|
|
'gif_url' => $media_urls['gif_url'],
|
|
'webp_url' => $media_urls['webp_url'],
|
|
]);
|
|
|
|
$meme_media->duration = MemeMediaMaintenance::getDurationUsingFfmpeg($meme_media);
|
|
$meme_media->embedding = $embedding;
|
|
$meme_media->save();
|
|
|
|
// Add keywords as tags
|
|
$this->attachKeywordsAsTags($meme_media, $meme_record['keywords']);
|
|
|
|
$this->command->info('✅ Imported: ' . trim($meme_record['name']));
|
|
|
|
return true;
|
|
} catch (\Exception $e) {
|
|
$this->command->error("Error importing {$meme_record['filename']}: {$e->getMessage()}");
|
|
throw $e;
|
|
}
|
|
}
|
|
|
|
private function attachKeywordsAsTags(MemeMedia $meme_media, array $keywords): void
|
|
{
|
|
try {
|
|
$meme_media->attachTags($keywords, 'meme_media');
|
|
} catch (\Exception $e) {
|
|
$this->command->warn("Failed to attach tags to meme media '{$meme_media->name}': " . $e->getMessage());
|
|
Log::warning("Failed to attach tags", [
|
|
'category_id' => $meme_media->id,
|
|
'keywords' => $keywords,
|
|
'error' => $e->getMessage()
|
|
]);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Generate CDN URL for specific format
|
|
*/
|
|
private function generateCdnUrl(string $base_filename, string $extension): string
|
|
{
|
|
return self::CDN_BASE_URL . "/{$extension}/{$base_filename}.{$extension}";
|
|
}
|
|
|
|
/**
|
|
* Ensure the temps media collection exists
|
|
*/
|
|
private function ensureMediaCollectionExists(): void
|
|
{
|
|
$collection = MediaCollection::firstOrCreate([
|
|
'key' => 'temps',
|
|
], [
|
|
'name' => 'Temp Files',
|
|
'description' => 'Temporary and external file references',
|
|
'is_system' => true,
|
|
]);
|
|
|
|
$this->command->info("📁 Using media collection: {$collection->key}");
|
|
}
|
|
}
|