memefast/database/seeders/G2MemeMediaSeeder.php

<?php

/**
 * MemeMediaSeeder - Simple Laravel Seeder
 *
 * Usage: php artisan db:seed --class=MemeMediaSeeder
 *
 * Setup:
 * 1. Place webm_metadata.csv in: database/seeders/data/webm_metadata.csv
 * 2. Run: php artisan db:seed --class=MemeMediaSeeder
 *
 * What it does:
 * - Reads 223 meme records from CSV
 * - Creates 4 MediaEngine entries per meme (webm, mov, webp, gif)
 * - Creates MemeMedia records with all UUIDs and URLs
 * - Uses save_url mode for fast CDN references
 * - Processes each record individually without transactions
 */

namespace Database\Seeders;

use App\Helpers\FirstParty\AI\CloudflareAI;
use App\Helpers\FirstParty\Maintenance\MemeMediaMaintenance;
use App\Helpers\FirstParty\MediaEngine\MediaEngine;
use App\Models\MediaCollection;
use App\Models\MemeMedia;
use Illuminate\Database\Seeder;
use Log;
use Maatwebsite\Excel\Facades\Excel;

class G2MemeMediaSeeder extends Seeder
{
    private const CDN_BASE_URL = 'https://cdn.memefa.st/g2';

    private const FORMATS = [
        'webm' => ['ext' => 'webm', 'mime' => 'video/webm'],
        'mov' => ['ext' => 'mov', 'mime' => 'video/quicktime'],
        'webp' => ['ext' => 'webp', 'mime' => 'image/webp'],
        'gif' => ['ext' => 'gif', 'mime' => 'image/gif'],
    ];

    /**
     * Run the database seeds.
     */
    public function run(): void
    {
        $this->command->info('🚀 Starting meme media import...');

        // Ensure media collection exists
        $this->ensureMediaCollectionExists();

        // Read CSV file
        $csv_path = database_path('seeders/data/g2.csv');
        $meme_data = $this->parseCsvFile($csv_path);

        $this->command->info('📊 Found '.count($meme_data).' memes to import');

        // Process records individually for PostgreSQL compatibility
        $total_processed = 0;
        $total_skipped = 0;
        $total_failed = 0;

        foreach ($meme_data as $index => $meme_record) {
            $this->command->info('Processing '.($index + 1).'/'.count($meme_data).': '.$meme_record['filename']);

            // Skip empty or malformed records
            if (empty($meme_record['filename']) || empty($meme_record['type']) || empty($meme_record['name'])) {
                $this->command->warn('⏭️  Skipping malformed CSV record at line '.($index + 1).': missing filename, type, or name');
                $total_skipped++;

                continue;
            }

            $meme_record['keywords'] = $this->stringToCleanArray($meme_record['keywords']);

            try {
                // Check for duplicates OUTSIDE of transaction
                $base_filename = pathinfo($meme_record['filename'], PATHINFO_FILENAME);

                if ($this->isDuplicate($base_filename)) {
                    $this->command->warn("⏭️  Skipping existing: {$meme_record['filename']} ({$meme_record['name']})");
                    $total_skipped++;

                    continue;
                }

                // Process single meme
                $result = $this->importSingleMeme($meme_record);

                if ($result) {
                    $total_processed++;
                    if ($total_processed % 10 === 0) {
                        $this->command->info("✅ Processed {$total_processed} memes...");
                    }
                } else {
                    $this->command->error("❌ Failed to import: {$meme_record['filename']} - Import returned false");
                    $this->command->error('🛑 Halting seeder to investigate the issue');
                    throw new \RuntimeException("Import failed for {$meme_record['filename']}");
                }
            } catch (\Exception $e) {
                $this->command->error("❌ Failed to import: {$meme_record['filename']} - {$e->getMessage()}");
                $this->command->error('🛑 Halting seeder to investigate the issue');
                throw $e;
            }
        }

        // Summary
        $this->command->info("\n🎯 Import Summary:");
        $this->command->info("✅ Successfully imported: {$total_processed} memes");
        if ($total_skipped > 0) {
            $this->command->info("⏭️  Skipped (existing): {$total_skipped} memes");
        }
        if ($total_failed > 0) {
            $this->command->error("❌ Failed: {$total_failed} memes");
        }

        $total_media_records = $total_processed * 4;
        $this->command->info("📊 Created {$total_media_records} media records and {$total_processed} meme_media records");
    }

    /**
     * Check if meme already exists (outside transaction)
     */
    private function isDuplicate(string $base_filename): bool
    {
        try {
            return MemeMedia::where('webm_url', 'like', "%/{$base_filename}.webm")
                ->where('group', 2)
                ->exists();
        } catch (\Exception $e) {
            $this->command->warn("⚠️  Could not check duplicate for {$base_filename}: {$e->getMessage()}");

            return false;
        }
    }

    /**
     * Parse CSV file and return array of meme data
     */
    private function parseCsvFile(string $csv_path): array
    {
        if (! file_exists($csv_path)) {
            throw new \RuntimeException("CSV file not found: {$csv_path}");
        }

        // Use Laravel Excel to parse CSV with proper handling of multi-line fields
        $collection = Excel::toCollection(null, $csv_path)->first();

        if ($collection->isEmpty()) {
            throw new \RuntimeException('CSV file is empty or could not be parsed');
        }

        // Get headers from first row
        $headers = $collection->first()->toArray();

        // Convert remaining rows to associative arrays
        $meme_data = [];
        foreach ($collection->skip(1) as $row) {
            $row_array = $row->toArray();

            // Skip empty rows
            if (empty(array_filter($row_array))) {
                continue;
            }

            // Ensure row has same number of columns as headers
            if (count($row_array) === count($headers)) {
                $record = array_combine($headers, $row_array);
                $meme_data[] = $record;
            }
        }

        return $meme_data;
    }

    private function stringToCleanArray($string)
    {
        // Split by comma, clean each element, and filter empty ones
        return array_filter(array_map(function ($item) {
            $item = trim($item); // Remove whitespace
            $item = preg_replace('/[^\w\s]/', '', $item); // Remove punctuation

            return trim(preg_replace('/\s+/', ' ', $item)); // Clean extra spaces
        }, explode(',', $string)), function ($value) {
            return $value !== '';
        });
    }

    /**
     * Import a single meme with all its formats
     */
    private function importSingleMeme(array $meme_record): bool
    {
        try {
            // Extract base filename (remove .webm extension)
            $base_filename = pathinfo($meme_record['filename'], PATHINFO_FILENAME);

            $media_uuids = [];
            $media_urls = [];

            // Create MediaEngine entries for each format
            foreach (self::FORMATS as $format => $config) {
                $url = $this->generateCdnUrl($base_filename, $config['ext']);

                try {
                    // Create media entry using save_url mode
                    $media = MediaEngine::addMedia(
                        'temps', // Media collection key
                        $meme_record['type'], // video or image
                        'system_uploaded', // Media source
                        'meme_cdn', // Media provider
                        null, // No file content
                        $url, // CDN URL
                        'save_url', // Mode: just save URL reference
                        null, // Auto-generate filename
                        'r2', // Disk (not used for URL mode)
                        trim($meme_record['name'])." ({$format})", // Name with format
                        null, // No specific user
                        $config['mime'] // MIME type
                    );

                    $media_uuids[$format.'_uuid'] = $media->uuid;
                    $media_urls[$format.'_url'] = $url;
                } catch (\Exception $e) {
                    $this->command->error("Failed to create {$format} media for {$meme_record['filename']}: {$e->getMessage()}");
                    throw $e;
                }
            }

            // dump($meme_record);

            // // Generate embedding
            // try {
            //     $embedding = CloudflareAI::getVectorEmbeddingBgeSmall(
            //         $meme_record['name'] . ' ' . $meme_record['description'] . ' ' . implode(' ', $meme_record['keywords'])
            //     );
            // } catch (\Exception $e) {
            //     $this->command->warn("Failed to generate embedding for {$meme_record['filename']}: {$e->getMessage()}");
            //     $embedding = null; // Continue without embedding
            // }

            // dd($embedding);

            // Check if record exists one more time within transaction
            $existing_meme = MemeMedia::where('original_id', $meme_record['filename'])
                ->where('group', 2)
                ->first();

            if ($existing_meme) {
                $this->command->warn("Record already exists for {$meme_record['filename']}, skipping...");

                return false;
            }

            // Check for null description before creating record
            if ($meme_record['description'] === null) {
                $this->command->error('❌ NULL DESCRIPTION DETECTED:');
                $this->command->error("  Filename: {$meme_record['filename']}");
                $this->command->error("  Name: {$meme_record['name']}");
                $this->command->error('  Description field is NULL in CSV data');
                $this->command->error('  CSV row data: '.json_encode($meme_record));
                $this->command->error('🛑 HALTING SEEDER - FIX THE CSV DATA');
                throw new \RuntimeException("NULL description found for {$meme_record['filename']} - CSV data needs to be fixed");
            }

            // Create MemeMedia record
            $meme_media = MemeMedia::create([
                'is_enabled' => true,
                'original_id' => $meme_record['filename'],
                'type' => $meme_record['type'],
                'sub_type' => $meme_record['sub_type'],
                'name' => trim($meme_record['name']),
                'description' => $meme_record['description'],
                'keywords' => $meme_record['keywords'],
                'group' => 2,

                // UUIDs from MediaEngine
                'mov_uuid' => $media_uuids['mov_uuid'],
                'webm_uuid' => $media_uuids['webm_uuid'],
                'gif_uuid' => $media_uuids['gif_uuid'],
                'webp_uuid' => $media_uuids['webp_uuid'],

                // Direct CDN URLs
                'mov_url' => $media_urls['mov_url'],
                'webm_url' => $media_urls['webm_url'],
                'gif_url' => $media_urls['gif_url'],
                'webp_url' => $media_urls['webp_url'],
            ]);

            $meme_media->duration = MemeMediaMaintenance::getDurationUsingFfmpeg($meme_media);
            // $meme_media->embedding = $embedding;
            $meme_media->save();

            // Add keywords as tags
            $this->attachKeywordsAsTags($meme_media, $meme_record['keywords']);

            $this->command->info('✅ Imported: '.trim($meme_record['name']));

            return true;
        } catch (\Exception $e) {
            $this->command->error("Error importing {$meme_record['filename']}: {$e->getMessage()}");
            throw $e;
        }
    }

    private function attachKeywordsAsTags(MemeMedia $meme_media, array $keywords): void
    {
        try {
            $meme_media->attachTags($keywords, 'meme_media');
        } catch (\Exception $e) {
            $this->command->warn("Failed to attach tags to meme media '{$meme_media->name}': ".$e->getMessage());
            Log::warning('Failed to attach tags', [
                'category_id' => $meme_media->id,
                'keywords' => $keywords,
                'error' => $e->getMessage(),
            ]);
        }
    }

    /**
     * Generate CDN URL for specific format
     */
    private function generateCdnUrl(string $base_filename, string $extension): string
    {
        return self::CDN_BASE_URL."/{$extension}/{$base_filename}.{$extension}";
    }

    /**
     * Ensure the temps media collection exists
     */
    private function ensureMediaCollectionExists(): void
    {
        $collection = MediaCollection::firstOrCreate([
            'key' => 'temps',
        ], [
            'name' => 'Temp Files',
            'description' => 'Temporary and external file references',
            'is_system' => true,
        ]);

        $this->command->info("📁 Using media collection: {$collection->key}");
    }
}