255 lines
8.7 KiB
PHP
255 lines
8.7 KiB
PHP
<?php
|
|
|
|
/**
|
|
* BackgroundMediaSeeder - Laravel Seeder for Background Media Records
|
|
*
|
|
* Usage: php artisan db:seed --class=BackgroundMediaSeeder
|
|
*
|
|
* Setup:
|
|
* 1. Ensure CSV files are in: database/seeders/data/areas/
|
|
* 2. Run: php artisan db:seed --class=BackgroundMediaSeeder
|
|
*
|
|
* What it does:
|
|
* - Reads location data from 12 CSV files (commercial, cultural, historical, etc.)
|
|
* - Creates BackgroundMedia records with status 'pending_media'
|
|
* - Records are ready for media generation/upload later
|
|
* - Generates vector embeddings for semantic search
|
|
* - Processes each record individually for PostgreSQL compatibility
|
|
*/
|
|
|
|
namespace Database\Seeders;
|
|
|
|
use App\Helpers\FirstParty\AI\CloudflareAI;
|
|
use App\Models\BackgroundMedia;
|
|
use Illuminate\Database\Seeder;
|
|
|
|
class BackgroundMediaSeeder extends Seeder
|
|
{
|
|
private const CSV_FILES = [
|
|
'commercial.csv',
|
|
'cultural.csv',
|
|
'historical.csv',
|
|
'industrial.csv',
|
|
'natural.csv',
|
|
'public_transportation.csv',
|
|
'public.csv',
|
|
'recreational.csv',
|
|
'religious.csv',
|
|
'residential.csv',
|
|
'shopping.csv',
|
|
'tourist.csv',
|
|
];
|
|
|
|
/**
|
|
* Run the database seeds.
|
|
*/
|
|
public function run(): void
|
|
{
|
|
$this->command->info('Starting background media records import...');
|
|
|
|
$total_processed = 0;
|
|
$total_skipped = 0;
|
|
$total_failed = 0;
|
|
|
|
// Process each CSV file
|
|
foreach (self::CSV_FILES as $csv_file) {
|
|
$this->command->info("\nProcessing {$csv_file}...");
|
|
|
|
$csv_path = database_path("seeders/data/areas/{$csv_file}");
|
|
|
|
if (! file_exists($csv_path)) {
|
|
$this->command->warn("CSV file not found: {$csv_path}");
|
|
|
|
continue;
|
|
}
|
|
|
|
try {
|
|
$location_data = $this->readCsv($csv_path);
|
|
$list_type = pathinfo($csv_file, PATHINFO_FILENAME);
|
|
|
|
$this->command->info('Found '.count($location_data)." locations in {$csv_file}");
|
|
|
|
// Process each location record
|
|
foreach ($location_data as $index => $location_record) {
|
|
$this->command->info('Processing '.($index + 1).'/'.count($location_data).": {$location_record['location']}");
|
|
|
|
try {
|
|
// Check for duplicates
|
|
if ($this->recordExists($list_type, $location_record['location'], $location_record['area'])) {
|
|
$this->command->warn("Skipping existing: {$location_record['location']} in {$location_record['area']}");
|
|
$total_skipped++;
|
|
|
|
continue;
|
|
}
|
|
|
|
// Create the record
|
|
$result = $this->createRecord($list_type, $location_record);
|
|
|
|
if ($result) {
|
|
$total_processed++;
|
|
if ($total_processed % 25 === 0) {
|
|
$this->command->info("Processed {$total_processed} background media records...");
|
|
}
|
|
} else {
|
|
$total_failed++;
|
|
}
|
|
} catch (\Exception $e) {
|
|
$total_failed++;
|
|
$this->command->error("Failed to import: {$location_record['location']} - {$e->getMessage()}");
|
|
}
|
|
}
|
|
} catch (\Exception $e) {
|
|
$this->command->error("Failed to process {$csv_file}: {$e->getMessage()}");
|
|
$total_failed++;
|
|
}
|
|
}
|
|
|
|
// Summary
|
|
$this->command->info("\nImport Summary:");
|
|
$this->command->info("Successfully imported: {$total_processed} background media records");
|
|
if ($total_skipped > 0) {
|
|
$this->command->info("Skipped (existing): {$total_skipped} records");
|
|
}
|
|
if ($total_failed > 0) {
|
|
$this->command->error("Failed: {$total_failed} records");
|
|
}
|
|
$this->command->info("All records created with status 'pending_media' - ready for media generation/upload");
|
|
}
|
|
|
|
/**
|
|
* Check if record already exists
|
|
*/
|
|
private function recordExists(string $list_type, string $location_name, string $area): bool
|
|
{
|
|
try {
|
|
return BackgroundMedia::where('list_type', $list_type)
|
|
->where('location_name', $location_name)
|
|
->where('area', $area)
|
|
->exists();
|
|
} catch (\Exception $e) {
|
|
$this->command->warn("Could not check duplicate for {$location_name}: {$e->getMessage()}");
|
|
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Read CSV file and return data
|
|
*/
|
|
private function readCsv(string $csv_path): array
|
|
{
|
|
if (! file_exists($csv_path)) {
|
|
throw new \RuntimeException("CSV file not found: {$csv_path}");
|
|
}
|
|
|
|
$csv_content = file_get_contents($csv_path);
|
|
$lines = str_getcsv($csv_content, "\n");
|
|
|
|
// Parse header row
|
|
$headers = str_getcsv(array_shift($lines));
|
|
|
|
$location_data = [];
|
|
foreach ($lines as $line) {
|
|
if (empty(trim($line))) {
|
|
continue;
|
|
}
|
|
|
|
$row = str_getcsv($line);
|
|
if (count($row) === count($headers)) {
|
|
$location_data[] = array_combine($headers, $row);
|
|
}
|
|
}
|
|
|
|
return $location_data;
|
|
}
|
|
|
|
/**
|
|
* Create a background media record
|
|
*/
|
|
private function createRecord(string $list_type, array $location_record): bool
|
|
{
|
|
try {
|
|
// Validate area enum value
|
|
$area = strtolower($location_record['area']);
|
|
if (! in_array($area, ['interior', 'exterior'])) {
|
|
$this->command->warn("Invalid area value '{$area}' for {$location_record['location']}, skipping...");
|
|
|
|
return false;
|
|
}
|
|
|
|
// Generate embedding based on location data
|
|
try {
|
|
$embedding_text = $this->buildEmbeddingText($list_type, $location_record);
|
|
$embedding = CloudflareAI::getVectorEmbeddingBgeSmall($embedding_text);
|
|
} catch (\Exception $e) {
|
|
$this->command->warn("Failed to generate embedding for {$location_record['location']}: {$e->getMessage()}");
|
|
$embedding = null; // Continue without embedding
|
|
}
|
|
|
|
// Check if record exists one more time
|
|
$existing_media = BackgroundMedia::where('list_type', $list_type)
|
|
->where('location_name', $location_record['location'])
|
|
->where('area', $area)
|
|
->first();
|
|
|
|
if ($existing_media) {
|
|
$this->command->warn("Record already exists for {$location_record['location']}, skipping...");
|
|
|
|
return false;
|
|
}
|
|
|
|
// Create BackgroundMedia record with pending_media status
|
|
BackgroundMedia::create([
|
|
'list_type' => $list_type,
|
|
'area' => $area,
|
|
'location_name' => $location_record['location'],
|
|
'status' => 'pending_media', // Default status - ready for media generation
|
|
'media_uuid' => null, // Will be set when media is generated/uploaded
|
|
'media_url' => null, // Will be set when media is generated/uploaded
|
|
'embedding' => $embedding,
|
|
]);
|
|
|
|
$this->command->info("Created: {$location_record['location']} ({$list_type}, {$area})");
|
|
|
|
return true;
|
|
} catch (\Exception $e) {
|
|
$this->command->error("Error importing {$location_record['location']}: {$e->getMessage()}");
|
|
throw $e;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Build text for embedding
|
|
*/
|
|
private function buildEmbeddingText(string $list_type, array $location_record): string
|
|
{
|
|
$text_parts = [
|
|
$list_type,
|
|
$location_record['location'],
|
|
$location_record['area'],
|
|
$location_record['type'] ?? '',
|
|
];
|
|
|
|
// Add geographic information if available
|
|
if (isset($location_record['city'])) {
|
|
$text_parts[] = $location_record['city'];
|
|
}
|
|
if (isset($location_record['state'])) {
|
|
$text_parts[] = $location_record['state'];
|
|
}
|
|
if (isset($location_record['country'])) {
|
|
$text_parts[] = $location_record['country'];
|
|
}
|
|
if (isset($location_record['continent'])) {
|
|
$text_parts[] = $location_record['continent'];
|
|
}
|
|
|
|
// Clean and join
|
|
$text_parts = array_filter($text_parts, function ($part) {
|
|
return ! empty(trim($part));
|
|
});
|
|
|
|
return implode(' ', $text_parts);
|
|
}
|
|
}
|