This commit is contained in:
ct
2025-06-13 13:51:16 +08:00
parent 63a516b124
commit 248a717898
4 changed files with 338 additions and 1 deletions

View File

@@ -0,0 +1,48 @@
<?php
/**
* Created by Reliese Model.
*/
namespace App\Models;
use Carbon\Carbon;
use Illuminate\Database\Eloquent\Model;
use Illuminate\Database\Eloquent\SoftDeletes;
use Pgvector\Laravel\Vector;
/**
* Class BackgroundMedia
*
* @property int $id
* @property string $list_type
* @property string $area
* @property string $location_name
* @property string $status
* @property uuid|null $media_uuid
* @property string|null $media_url
* @property Vector|null $embedding
* @property Carbon|null $created_at
* @property Carbon|null $updated_at
* @property string|null $deleted_at
*/
class BackgroundMedia extends Model
{
use SoftDeletes;
protected $table = 'background_medias';
protected $casts = [
'embedding' => Vector::class,
];
protected $fillable = [
'list_type',
'area',
'location_name',
'status',
'media_uuid',
'media_url',
'embedding',
];
}

View File

@@ -26,7 +26,7 @@
* @property uuid|null $media_2_uuid
* @property string $media_1_mime_type
* @property string|null $media_2_mime_type
* @property USER-DEFINED|null $embedding
* @property Vector|null $embedding
* @property Carbon|null $created_at
* @property Carbon|null $updated_at
* @property string|null $deleted_at

View File

@@ -0,0 +1,35 @@
<?php
use Illuminate\Database\Migrations\Migration;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\Schema;
return new class extends Migration
{
/**
* Run the migrations.
*/
public function up(): void
{
Schema::create('background_medias', function (Blueprint $table) {
$table->id();
$table->string('list_type');
$table->enum('area', ['interior', 'exterior']);
$table->string('location_name');
$table->enum('status', ['pending_media', 'completed'])->default('pending_media');
$table->uuid('media_uuid')->nullable();
$table->string('media_url')->nullable();
$table->vector('embedding', 384)->nullable();
$table->timestamps();
$table->softDeletes();
});
}
/**
* Reverse the migrations.
*/
public function down(): void
{
Schema::dropIfExists('background_medias');
}
};

View File

@@ -0,0 +1,254 @@
<?php
/**
* BackgroundMediaSeeder - Laravel Seeder for Background Media Records
*
* Usage: php artisan db:seed --class=BackgroundMediaSeeder
*
* Setup:
* 1. Ensure CSV files are in: database/seeders/data/areas/
* 2. Run: php artisan db:seed --class=BackgroundMediaSeeder
*
* What it does:
* - Reads location data from 12 CSV files (commercial, cultural, historical, etc.)
* - Creates BackgroundMedia records with status 'pending_media'
* - Records are ready for media generation/upload later
* - Generates vector embeddings for semantic search
* - Processes each record individually for PostgreSQL compatibility
*/
namespace Database\Seeders;
use App\Helpers\FirstParty\AI\CloudflareAI;
use App\Models\BackgroundMedia;
use Illuminate\Database\Seeder;
class BackgroundMediaSeeder extends Seeder
{
private const CSV_FILES = [
'commercial.csv',
'cultural.csv',
'historical.csv',
'industrial.csv',
'natural.csv',
'public_transportation.csv',
'public.csv',
'recreational.csv',
'religious.csv',
'residential.csv',
'shopping.csv',
'tourist.csv',
];
/**
* Run the database seeds.
*/
public function run(): void
{
$this->command->info('Starting background media records import...');
$total_processed = 0;
$total_skipped = 0;
$total_failed = 0;
// Process each CSV file
foreach (self::CSV_FILES as $csv_file) {
$this->command->info("\nProcessing {$csv_file}...");
$csv_path = database_path("seeders/data/areas/{$csv_file}");
if (! file_exists($csv_path)) {
$this->command->warn("CSV file not found: {$csv_path}");
continue;
}
try {
$location_data = $this->readCsv($csv_path);
$list_type = pathinfo($csv_file, PATHINFO_FILENAME);
$this->command->info('Found '.count($location_data)." locations in {$csv_file}");
// Process each location record
foreach ($location_data as $index => $location_record) {
$this->command->info('Processing '.($index + 1).'/'.count($location_data).": {$location_record['location']}");
try {
// Check for duplicates
if ($this->recordExists($list_type, $location_record['location'], $location_record['area'])) {
$this->command->warn("Skipping existing: {$location_record['location']} in {$location_record['area']}");
$total_skipped++;
continue;
}
// Create the record
$result = $this->createRecord($list_type, $location_record);
if ($result) {
$total_processed++;
if ($total_processed % 25 === 0) {
$this->command->info("Processed {$total_processed} background media records...");
}
} else {
$total_failed++;
}
} catch (\Exception $e) {
$total_failed++;
$this->command->error("Failed to import: {$location_record['location']} - {$e->getMessage()}");
}
}
} catch (\Exception $e) {
$this->command->error("Failed to process {$csv_file}: {$e->getMessage()}");
$total_failed++;
}
}
// Summary
$this->command->info("\nImport Summary:");
$this->command->info("Successfully imported: {$total_processed} background media records");
if ($total_skipped > 0) {
$this->command->info("Skipped (existing): {$total_skipped} records");
}
if ($total_failed > 0) {
$this->command->error("Failed: {$total_failed} records");
}
$this->command->info("All records created with status 'pending_media' - ready for media generation/upload");
}
/**
* Check if record already exists
*/
private function recordExists(string $list_type, string $location_name, string $area): bool
{
try {
return BackgroundMedia::where('list_type', $list_type)
->where('location_name', $location_name)
->where('area', $area)
->exists();
} catch (\Exception $e) {
$this->command->warn("Could not check duplicate for {$location_name}: {$e->getMessage()}");
return false;
}
}
/**
* Read CSV file and return data
*/
private function readCsv(string $csv_path): array
{
if (! file_exists($csv_path)) {
throw new \RuntimeException("CSV file not found: {$csv_path}");
}
$csv_content = file_get_contents($csv_path);
$lines = str_getcsv($csv_content, "\n");
// Parse header row
$headers = str_getcsv(array_shift($lines));
$location_data = [];
foreach ($lines as $line) {
if (empty(trim($line))) {
continue;
}
$row = str_getcsv($line);
if (count($row) === count($headers)) {
$location_data[] = array_combine($headers, $row);
}
}
return $location_data;
}
/**
* Create a background media record
*/
private function createRecord(string $list_type, array $location_record): bool
{
try {
// Validate area enum value
$area = strtolower($location_record['area']);
if (! in_array($area, ['interior', 'exterior'])) {
$this->command->warn("Invalid area value '{$area}' for {$location_record['location']}, skipping...");
return false;
}
// Generate embedding based on location data
try {
$embedding_text = $this->buildEmbeddingText($list_type, $location_record);
$embedding = CloudflareAI::getVectorEmbeddingBgeSmall($embedding_text);
} catch (\Exception $e) {
$this->command->warn("Failed to generate embedding for {$location_record['location']}: {$e->getMessage()}");
$embedding = null; // Continue without embedding
}
// Check if record exists one more time
$existing_media = BackgroundMedia::where('list_type', $list_type)
->where('location_name', $location_record['location'])
->where('area', $area)
->first();
if ($existing_media) {
$this->command->warn("Record already exists for {$location_record['location']}, skipping...");
return false;
}
// Create BackgroundMedia record with pending_media status
BackgroundMedia::create([
'list_type' => $list_type,
'area' => $area,
'location_name' => $location_record['location'],
'status' => 'pending_media', // Default status - ready for media generation
'media_uuid' => null, // Will be set when media is generated/uploaded
'media_url' => null, // Will be set when media is generated/uploaded
'embedding' => $embedding,
]);
$this->command->info("Created: {$location_record['location']} ({$list_type}, {$area})");
return true;
} catch (\Exception $e) {
$this->command->error("Error importing {$location_record['location']}: {$e->getMessage()}");
throw $e;
}
}
/**
* Build text for embedding
*/
private function buildEmbeddingText(string $list_type, array $location_record): string
{
$text_parts = [
$list_type,
$location_record['location'],
$location_record['area'],
$location_record['type'] ?? '',
];
// Add geographic information if available
if (isset($location_record['city'])) {
$text_parts[] = $location_record['city'];
}
if (isset($location_record['state'])) {
$text_parts[] = $location_record['state'];
}
if (isset($location_record['country'])) {
$text_parts[] = $location_record['country'];
}
if (isset($location_record['continent'])) {
$text_parts[] = $location_record['continent'];
}
// Clean and join
$text_parts = array_filter($text_parts, function ($part) {
return ! empty(trim($part));
});
return implode(' ', $text_parts);
}
}