Files
memefast/app/Console/Commands/FindDuplicateImages.php
2025-07-15 20:03:10 +08:00

125 lines
3.7 KiB
PHP

<?php
namespace App\Console\Commands;
use App\Helpers\FirstParty\ImageHash\ImageHashService;
use App\Models\MemeMedia;
use Illuminate\Console\Command;
class FindDuplicateImages extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'app:find-duplicate-images {--threshold=5 : Hamming distance threshold for duplicates}';
/**
* The console command description.
*
* @var string
*/
protected $description = 'Find duplicate images using Hamming distance comparison of WebP hashes';
private ImageHashService $imageHashService;
public function __construct(ImageHashService $imageHashService)
{
parent::__construct();
$this->imageHashService = $imageHashService;
}
/**
* Execute the console command.
*/
public function handle()
{
$threshold = (int) $this->option('threshold');
$records = MemeMedia::whereNotNull('image_hash')
->whereNotNull('webp_url')
->get(['id', 'name', 'image_hash', 'webp_url']);
if ($records->isEmpty()) {
$this->info('No records with image hashes found. Run app:generate-image-hashes first.');
return;
}
$this->info("Checking {$records->count()} records for duplicates with threshold: {$threshold}");
$duplicates = [];
$processed = [];
foreach ($records as $record) {
if (in_array($record->id, $processed)) {
continue;
}
$similarRecords = [];
foreach ($records as $compareRecord) {
if ($record->id === $compareRecord->id || in_array($compareRecord->id, $processed)) {
continue;
}
$distance = $this->imageHashService->calculateHammingDistance(
$record->image_hash,
$compareRecord->image_hash
);
if ($distance <= $threshold) {
$similarRecords[] = [
'id' => $compareRecord->id,
'name' => $compareRecord->name,
'distance' => $distance,
'url' => $compareRecord->webp_url,
];
$processed[] = $compareRecord->id;
}
}
if (! empty($similarRecords)) {
$duplicates[] = [
'original' => [
'id' => $record->id,
'name' => $record->name,
'url' => $record->webp_url,
],
'duplicates' => $similarRecords,
];
$processed[] = $record->id;
}
}
if (empty($duplicates)) {
$this->info('No duplicates found.');
return;
}
$this->info('Found '.count($duplicates).' duplicate groups:');
foreach ($duplicates as $group) {
$this->newLine();
$this->line("Original: [{$group['original']['id']}] {$group['original']['name']}");
$this->line("URL: {$group['original']['url']}");
foreach ($group['duplicates'] as $duplicate) {
$this->line(" → [{$duplicate['id']}] {$duplicate['name']} (distance: {$duplicate['distance']})");
$this->line(" URL: {$duplicate['url']}");
}
}
$this->newLine();
$this->info('Total duplicate groups: '.count($duplicates));
$totalDuplicates = array_sum(array_map(function ($group) {
return count($group['duplicates']);
}, $duplicates));
$this->info("Total duplicate records: {$totalDuplicates}");
}
}