Update
This commit is contained in:
124
app/Console/Commands/FindDuplicateImages.php
Normal file
124
app/Console/Commands/FindDuplicateImages.php
Normal file
@@ -0,0 +1,124 @@
|
||||
<?php
|
||||
|
||||
namespace App\Console\Commands;
|
||||
|
||||
use App\Helpers\FirstParty\ImageHash\ImageHashService;
|
||||
use App\Models\MemeMedia;
|
||||
use Illuminate\Console\Command;
|
||||
|
||||
class FindDuplicateImages extends Command
|
||||
{
|
||||
/**
|
||||
* The name and signature of the console command.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $signature = 'app:find-duplicate-images {--threshold=5 : Hamming distance threshold for duplicates}';
|
||||
|
||||
/**
|
||||
* The console command description.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $description = 'Find duplicate images using Hamming distance comparison of WebP hashes';
|
||||
|
||||
private ImageHashService $imageHashService;
|
||||
|
||||
public function __construct(ImageHashService $imageHashService)
|
||||
{
|
||||
parent::__construct();
|
||||
$this->imageHashService = $imageHashService;
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute the console command.
|
||||
*/
|
||||
public function handle()
|
||||
{
|
||||
$threshold = (int) $this->option('threshold');
|
||||
|
||||
$records = MemeMedia::whereNotNull('image_hash')
|
||||
->whereNotNull('webp_url')
|
||||
->get(['id', 'name', 'image_hash', 'webp_url']);
|
||||
|
||||
if ($records->isEmpty()) {
|
||||
$this->info('No records with image hashes found. Run app:generate-image-hashes first.');
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
$this->info("Checking {$records->count()} records for duplicates with threshold: {$threshold}");
|
||||
|
||||
$duplicates = [];
|
||||
$processed = [];
|
||||
|
||||
foreach ($records as $record) {
|
||||
if (in_array($record->id, $processed)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$similarRecords = [];
|
||||
|
||||
foreach ($records as $compareRecord) {
|
||||
if ($record->id === $compareRecord->id || in_array($compareRecord->id, $processed)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$distance = $this->imageHashService->calculateHammingDistance(
|
||||
$record->image_hash,
|
||||
$compareRecord->image_hash
|
||||
);
|
||||
|
||||
if ($distance <= $threshold) {
|
||||
$similarRecords[] = [
|
||||
'id' => $compareRecord->id,
|
||||
'name' => $compareRecord->name,
|
||||
'distance' => $distance,
|
||||
'url' => $compareRecord->webp_url,
|
||||
];
|
||||
$processed[] = $compareRecord->id;
|
||||
}
|
||||
}
|
||||
|
||||
if (! empty($similarRecords)) {
|
||||
$duplicates[] = [
|
||||
'original' => [
|
||||
'id' => $record->id,
|
||||
'name' => $record->name,
|
||||
'url' => $record->webp_url,
|
||||
],
|
||||
'duplicates' => $similarRecords,
|
||||
];
|
||||
$processed[] = $record->id;
|
||||
}
|
||||
}
|
||||
|
||||
if (empty($duplicates)) {
|
||||
$this->info('No duplicates found.');
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
$this->info('Found '.count($duplicates).' duplicate groups:');
|
||||
|
||||
foreach ($duplicates as $group) {
|
||||
$this->newLine();
|
||||
$this->line("Original: [{$group['original']['id']}] {$group['original']['name']}");
|
||||
$this->line("URL: {$group['original']['url']}");
|
||||
|
||||
foreach ($group['duplicates'] as $duplicate) {
|
||||
$this->line(" → [{$duplicate['id']}] {$duplicate['name']} (distance: {$duplicate['distance']})");
|
||||
$this->line(" URL: {$duplicate['url']}");
|
||||
}
|
||||
}
|
||||
|
||||
$this->newLine();
|
||||
$this->info('Total duplicate groups: '.count($duplicates));
|
||||
|
||||
$totalDuplicates = array_sum(array_map(function ($group) {
|
||||
return count($group['duplicates']);
|
||||
}, $duplicates));
|
||||
|
||||
$this->info("Total duplicate records: {$totalDuplicates}");
|
||||
}
|
||||
}
|
||||
84
app/Console/Commands/GenerateImageHashes.php
Normal file
84
app/Console/Commands/GenerateImageHashes.php
Normal file
@@ -0,0 +1,84 @@
|
||||
<?php
|
||||
|
||||
namespace App\Console\Commands;
|
||||
|
||||
use App\Helpers\FirstParty\ImageHash\ImageHashService;
|
||||
use App\Models\MemeMedia;
|
||||
use Illuminate\Console\Command;
|
||||
|
||||
class GenerateImageHashes extends Command
|
||||
{
|
||||
/**
|
||||
* The name and signature of the console command.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $signature = 'app:generate-image-hashes {--force : Force regeneration of existing hashes}';
|
||||
|
||||
/**
|
||||
* The console command description.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $description = 'Generate image hashes for existing WebP URLs in MemeMedia records';
|
||||
|
||||
private ImageHashService $imageHashService;
|
||||
|
||||
public function __construct(ImageHashService $imageHashService)
|
||||
{
|
||||
parent::__construct();
|
||||
$this->imageHashService = $imageHashService;
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute the console command.
|
||||
*/
|
||||
public function handle()
|
||||
{
|
||||
$force = $this->option('force');
|
||||
|
||||
$query = MemeMedia::query();
|
||||
|
||||
if (! $force) {
|
||||
$query->whereNull('image_hash');
|
||||
}
|
||||
|
||||
$records = $query->whereNotNull('webp_url')->get();
|
||||
|
||||
if ($records->isEmpty()) {
|
||||
$this->info('No records found to process.');
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
$this->info("Processing {$records->count()} records...");
|
||||
|
||||
$progressBar = $this->output->createProgressBar($records->count());
|
||||
$progressBar->start();
|
||||
|
||||
$processed = 0;
|
||||
$failed = 0;
|
||||
|
||||
foreach ($records as $record) {
|
||||
$hash = $this->imageHashService->generateHashFromUrl($record->webp_url);
|
||||
|
||||
if ($hash) {
|
||||
$record->update(['image_hash' => $hash]);
|
||||
$processed++;
|
||||
} else {
|
||||
$failed++;
|
||||
$this->newLine();
|
||||
$this->error("Failed to generate hash for ID: {$record->id} - {$record->webp_url}");
|
||||
}
|
||||
|
||||
$progressBar->advance();
|
||||
}
|
||||
|
||||
$progressBar->finish();
|
||||
$this->newLine();
|
||||
|
||||
$this->info('Processing complete!');
|
||||
$this->info("Processed: {$processed}");
|
||||
$this->info("Failed: {$failed}");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user