Files
memefast/database/seeders/G2MemeMediaSeeder.php
2025-07-15 00:43:37 +08:00

344 lines
13 KiB
PHP

<?php
/**
* MemeMediaSeeder - Simple Laravel Seeder
*
* Usage: php artisan db:seed --class=MemeMediaSeeder
*
* Setup:
* 1. Place webm_metadata.csv in: database/seeders/data/webm_metadata.csv
* 2. Run: php artisan db:seed --class=MemeMediaSeeder
*
* What it does:
* - Reads 223 meme records from CSV
* - Creates 4 MediaEngine entries per meme (webm, mov, webp, gif)
* - Creates MemeMedia records with all UUIDs and URLs
* - Uses save_url mode for fast CDN references
* - Processes each record individually without transactions
*/
namespace Database\Seeders;
use App\Helpers\FirstParty\AI\CloudflareAI;
use App\Helpers\FirstParty\Maintenance\MemeMediaMaintenance;
use App\Helpers\FirstParty\MediaEngine\MediaEngine;
use App\Models\MediaCollection;
use App\Models\MemeMedia;
use Illuminate\Database\Seeder;
use Log;
use Maatwebsite\Excel\Facades\Excel;
class G2MemeMediaSeeder extends Seeder
{
private const CDN_BASE_URL = 'https://cdn.memefa.st/g2';
private const FORMATS = [
'webm' => ['ext' => 'webm', 'mime' => 'video/webm'],
'mov' => ['ext' => 'mov', 'mime' => 'video/quicktime'],
'webp' => ['ext' => 'webp', 'mime' => 'image/webp'],
'gif' => ['ext' => 'gif', 'mime' => 'image/gif'],
];
/**
* Run the database seeds.
*/
public function run(): void
{
$this->command->info('🚀 Starting meme media import...');
// Ensure media collection exists
$this->ensureMediaCollectionExists();
// Read CSV file
$csv_path = database_path('seeders/data/g2.csv');
$meme_data = $this->parseCsvFile($csv_path);
$this->command->info('📊 Found '.count($meme_data).' memes to import');
// Process records individually for PostgreSQL compatibility
$total_processed = 0;
$total_skipped = 0;
$total_failed = 0;
foreach ($meme_data as $index => $meme_record) {
$this->command->info('Processing '.($index + 1).'/'.count($meme_data).': '.$meme_record['filename']);
// Skip empty or malformed records
if (empty($meme_record['filename']) || empty($meme_record['type']) || empty($meme_record['name'])) {
$this->command->warn('⏭️ Skipping malformed CSV record at line '.($index + 1).': missing filename, type, or name');
$total_skipped++;
continue;
}
$meme_record['keywords'] = $this->stringToCleanArray($meme_record['keywords']);
try {
// Check for duplicates OUTSIDE of transaction
$base_filename = pathinfo($meme_record['filename'], PATHINFO_FILENAME);
if ($this->isDuplicate($base_filename)) {
$this->command->warn("⏭️ Skipping existing: {$meme_record['filename']} ({$meme_record['name']})");
$total_skipped++;
continue;
}
// Process single meme
$result = $this->importSingleMeme($meme_record);
if ($result) {
$total_processed++;
if ($total_processed % 10 === 0) {
$this->command->info("✅ Processed {$total_processed} memes...");
}
} else {
$this->command->error("❌ Failed to import: {$meme_record['filename']} - Import returned false");
$this->command->error('🛑 Halting seeder to investigate the issue');
throw new \RuntimeException("Import failed for {$meme_record['filename']}");
}
} catch (\Exception $e) {
$this->command->error("❌ Failed to import: {$meme_record['filename']} - {$e->getMessage()}");
$this->command->error('🛑 Halting seeder to investigate the issue');
throw $e;
}
}
// Summary
$this->command->info("\n🎯 Import Summary:");
$this->command->info("✅ Successfully imported: {$total_processed} memes");
if ($total_skipped > 0) {
$this->command->info("⏭️ Skipped (existing): {$total_skipped} memes");
}
if ($total_failed > 0) {
$this->command->error("❌ Failed: {$total_failed} memes");
}
$total_media_records = $total_processed * 4;
$this->command->info("📊 Created {$total_media_records} media records and {$total_processed} meme_media records");
}
/**
* Check if meme already exists (outside transaction)
*/
private function isDuplicate(string $base_filename): bool
{
try {
return MemeMedia::where('webm_url', 'like', "%/{$base_filename}.webm")
->where('group', 2)
->exists();
} catch (\Exception $e) {
$this->command->warn("⚠️ Could not check duplicate for {$base_filename}: {$e->getMessage()}");
return false;
}
}
/**
* Parse CSV file and return array of meme data
*/
private function parseCsvFile(string $csv_path): array
{
if (! file_exists($csv_path)) {
throw new \RuntimeException("CSV file not found: {$csv_path}");
}
// Use Laravel Excel to parse CSV with proper handling of multi-line fields
$collection = Excel::toCollection(null, $csv_path)->first();
if ($collection->isEmpty()) {
throw new \RuntimeException('CSV file is empty or could not be parsed');
}
// Get headers from first row
$headers = $collection->first()->toArray();
// Convert remaining rows to associative arrays
$meme_data = [];
foreach ($collection->skip(1) as $row) {
$row_array = $row->toArray();
// Skip empty rows
if (empty(array_filter($row_array))) {
continue;
}
// Ensure row has same number of columns as headers
if (count($row_array) === count($headers)) {
$record = array_combine($headers, $row_array);
$meme_data[] = $record;
}
}
return $meme_data;
}
private function stringToCleanArray($string)
{
// Split by comma, clean each element, and filter empty ones
return array_filter(array_map(function ($item) {
$item = trim($item); // Remove whitespace
$item = preg_replace('/[^\w\s]/', '', $item); // Remove punctuation
return trim(preg_replace('/\s+/', ' ', $item)); // Clean extra spaces
}, explode(',', $string)), function ($value) {
return $value !== '';
});
}
/**
* Import a single meme with all its formats
*/
private function importSingleMeme(array $meme_record): bool
{
try {
// Extract base filename (remove .webm extension)
$base_filename = pathinfo($meme_record['filename'], PATHINFO_FILENAME);
$media_uuids = [];
$media_urls = [];
// Create MediaEngine entries for each format
foreach (self::FORMATS as $format => $config) {
$url = $this->generateCdnUrl($base_filename, $config['ext']);
try {
// Create media entry using save_url mode
$media = MediaEngine::addMedia(
'temps', // Media collection key
$meme_record['type'], // video or image
'system_uploaded', // Media source
'meme_cdn', // Media provider
null, // No file content
$url, // CDN URL
'save_url', // Mode: just save URL reference
null, // Auto-generate filename
'r2', // Disk (not used for URL mode)
trim($meme_record['name'])." ({$format})", // Name with format
null, // No specific user
$config['mime'] // MIME type
);
$media_uuids[$format.'_uuid'] = $media->uuid;
$media_urls[$format.'_url'] = $url;
} catch (\Exception $e) {
$this->command->error("Failed to create {$format} media for {$meme_record['filename']}: {$e->getMessage()}");
throw $e;
}
}
// dump($meme_record);
// // Generate embedding
// try {
// $embedding = CloudflareAI::getVectorEmbeddingBgeSmall(
// $meme_record['name'] . ' ' . $meme_record['description'] . ' ' . implode(' ', $meme_record['keywords'])
// );
// } catch (\Exception $e) {
// $this->command->warn("Failed to generate embedding for {$meme_record['filename']}: {$e->getMessage()}");
// $embedding = null; // Continue without embedding
// }
// dd($embedding);
// Check if record exists one more time within transaction
$existing_meme = MemeMedia::where('original_id', $meme_record['filename'])
->where('group', 2)
->first();
if ($existing_meme) {
$this->command->warn("Record already exists for {$meme_record['filename']}, skipping...");
return false;
}
// Check for null description before creating record
if ($meme_record['description'] === null) {
$this->command->error('❌ NULL DESCRIPTION DETECTED:');
$this->command->error(" Filename: {$meme_record['filename']}");
$this->command->error(" Name: {$meme_record['name']}");
$this->command->error(' Description field is NULL in CSV data');
$this->command->error(' CSV row data: '.json_encode($meme_record));
$this->command->error('🛑 HALTING SEEDER - FIX THE CSV DATA');
throw new \RuntimeException("NULL description found for {$meme_record['filename']} - CSV data needs to be fixed");
}
// Create MemeMedia record
$meme_media = MemeMedia::create([
'is_enabled' => true,
'original_id' => $meme_record['filename'],
'type' => $meme_record['type'],
'sub_type' => $meme_record['sub_type'],
'name' => trim($meme_record['name']),
'description' => $meme_record['description'],
'keywords' => $meme_record['keywords'],
'group' => 2,
// UUIDs from MediaEngine
'mov_uuid' => $media_uuids['mov_uuid'],
'webm_uuid' => $media_uuids['webm_uuid'],
'gif_uuid' => $media_uuids['gif_uuid'],
'webp_uuid' => $media_uuids['webp_uuid'],
// Direct CDN URLs
'mov_url' => $media_urls['mov_url'],
'webm_url' => $media_urls['webm_url'],
'gif_url' => $media_urls['gif_url'],
'webp_url' => $media_urls['webp_url'],
]);
$meme_media->duration = MemeMediaMaintenance::getDurationUsingFfmpeg($meme_media);
// $meme_media->embedding = $embedding;
$meme_media->save();
// Add keywords as tags
$this->attachKeywordsAsTags($meme_media, $meme_record['keywords']);
$this->command->info('✅ Imported: '.trim($meme_record['name']));
return true;
} catch (\Exception $e) {
$this->command->error("Error importing {$meme_record['filename']}: {$e->getMessage()}");
throw $e;
}
}
private function attachKeywordsAsTags(MemeMedia $meme_media, array $keywords): void
{
try {
$meme_media->attachTags($keywords, 'meme_media');
} catch (\Exception $e) {
$this->command->warn("Failed to attach tags to meme media '{$meme_media->name}': ".$e->getMessage());
Log::warning('Failed to attach tags', [
'category_id' => $meme_media->id,
'keywords' => $keywords,
'error' => $e->getMessage(),
]);
}
}
/**
* Generate CDN URL for specific format
*/
private function generateCdnUrl(string $base_filename, string $extension): string
{
return self::CDN_BASE_URL."/{$extension}/{$base_filename}.{$extension}";
}
/**
* Ensure the temps media collection exists
*/
private function ensureMediaCollectionExists(): void
{
$collection = MediaCollection::firstOrCreate([
'key' => 'temps',
], [
'name' => 'Temp Files',
'description' => 'Temporary and external file references',
'is_system' => true,
]);
$this->command->info("📁 Using media collection: {$collection->key}");
}
}