Update
This commit is contained in:
124
app/Console/Commands/FindDuplicateImages.php
Normal file
124
app/Console/Commands/FindDuplicateImages.php
Normal file
@@ -0,0 +1,124 @@
|
||||
<?php
|
||||
|
||||
namespace App\Console\Commands;
|
||||
|
||||
use App\Helpers\FirstParty\ImageHash\ImageHashService;
|
||||
use App\Models\MemeMedia;
|
||||
use Illuminate\Console\Command;
|
||||
|
||||
class FindDuplicateImages extends Command
|
||||
{
|
||||
/**
|
||||
* The name and signature of the console command.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $signature = 'app:find-duplicate-images {--threshold=5 : Hamming distance threshold for duplicates}';
|
||||
|
||||
/**
|
||||
* The console command description.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $description = 'Find duplicate images using Hamming distance comparison of WebP hashes';
|
||||
|
||||
private ImageHashService $imageHashService;
|
||||
|
||||
public function __construct(ImageHashService $imageHashService)
|
||||
{
|
||||
parent::__construct();
|
||||
$this->imageHashService = $imageHashService;
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute the console command.
|
||||
*/
|
||||
public function handle()
|
||||
{
|
||||
$threshold = (int) $this->option('threshold');
|
||||
|
||||
$records = MemeMedia::whereNotNull('image_hash')
|
||||
->whereNotNull('webp_url')
|
||||
->get(['id', 'name', 'image_hash', 'webp_url']);
|
||||
|
||||
if ($records->isEmpty()) {
|
||||
$this->info('No records with image hashes found. Run app:generate-image-hashes first.');
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
$this->info("Checking {$records->count()} records for duplicates with threshold: {$threshold}");
|
||||
|
||||
$duplicates = [];
|
||||
$processed = [];
|
||||
|
||||
foreach ($records as $record) {
|
||||
if (in_array($record->id, $processed)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$similarRecords = [];
|
||||
|
||||
foreach ($records as $compareRecord) {
|
||||
if ($record->id === $compareRecord->id || in_array($compareRecord->id, $processed)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$distance = $this->imageHashService->calculateHammingDistance(
|
||||
$record->image_hash,
|
||||
$compareRecord->image_hash
|
||||
);
|
||||
|
||||
if ($distance <= $threshold) {
|
||||
$similarRecords[] = [
|
||||
'id' => $compareRecord->id,
|
||||
'name' => $compareRecord->name,
|
||||
'distance' => $distance,
|
||||
'url' => $compareRecord->webp_url,
|
||||
];
|
||||
$processed[] = $compareRecord->id;
|
||||
}
|
||||
}
|
||||
|
||||
if (! empty($similarRecords)) {
|
||||
$duplicates[] = [
|
||||
'original' => [
|
||||
'id' => $record->id,
|
||||
'name' => $record->name,
|
||||
'url' => $record->webp_url,
|
||||
],
|
||||
'duplicates' => $similarRecords,
|
||||
];
|
||||
$processed[] = $record->id;
|
||||
}
|
||||
}
|
||||
|
||||
if (empty($duplicates)) {
|
||||
$this->info('No duplicates found.');
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
$this->info('Found '.count($duplicates).' duplicate groups:');
|
||||
|
||||
foreach ($duplicates as $group) {
|
||||
$this->newLine();
|
||||
$this->line("Original: [{$group['original']['id']}] {$group['original']['name']}");
|
||||
$this->line("URL: {$group['original']['url']}");
|
||||
|
||||
foreach ($group['duplicates'] as $duplicate) {
|
||||
$this->line(" → [{$duplicate['id']}] {$duplicate['name']} (distance: {$duplicate['distance']})");
|
||||
$this->line(" URL: {$duplicate['url']}");
|
||||
}
|
||||
}
|
||||
|
||||
$this->newLine();
|
||||
$this->info('Total duplicate groups: '.count($duplicates));
|
||||
|
||||
$totalDuplicates = array_sum(array_map(function ($group) {
|
||||
return count($group['duplicates']);
|
||||
}, $duplicates));
|
||||
|
||||
$this->info("Total duplicate records: {$totalDuplicates}");
|
||||
}
|
||||
}
|
||||
84
app/Console/Commands/GenerateImageHashes.php
Normal file
84
app/Console/Commands/GenerateImageHashes.php
Normal file
@@ -0,0 +1,84 @@
|
||||
<?php
|
||||
|
||||
namespace App\Console\Commands;
|
||||
|
||||
use App\Helpers\FirstParty\ImageHash\ImageHashService;
|
||||
use App\Models\MemeMedia;
|
||||
use Illuminate\Console\Command;
|
||||
|
||||
class GenerateImageHashes extends Command
|
||||
{
|
||||
/**
|
||||
* The name and signature of the console command.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $signature = 'app:generate-image-hashes {--force : Force regeneration of existing hashes}';
|
||||
|
||||
/**
|
||||
* The console command description.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $description = 'Generate image hashes for existing WebP URLs in MemeMedia records';
|
||||
|
||||
private ImageHashService $imageHashService;
|
||||
|
||||
public function __construct(ImageHashService $imageHashService)
|
||||
{
|
||||
parent::__construct();
|
||||
$this->imageHashService = $imageHashService;
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute the console command.
|
||||
*/
|
||||
public function handle()
|
||||
{
|
||||
$force = $this->option('force');
|
||||
|
||||
$query = MemeMedia::query();
|
||||
|
||||
if (! $force) {
|
||||
$query->whereNull('image_hash');
|
||||
}
|
||||
|
||||
$records = $query->whereNotNull('webp_url')->get();
|
||||
|
||||
if ($records->isEmpty()) {
|
||||
$this->info('No records found to process.');
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
$this->info("Processing {$records->count()} records...");
|
||||
|
||||
$progressBar = $this->output->createProgressBar($records->count());
|
||||
$progressBar->start();
|
||||
|
||||
$processed = 0;
|
||||
$failed = 0;
|
||||
|
||||
foreach ($records as $record) {
|
||||
$hash = $this->imageHashService->generateHashFromUrl($record->webp_url);
|
||||
|
||||
if ($hash) {
|
||||
$record->update(['image_hash' => $hash]);
|
||||
$processed++;
|
||||
} else {
|
||||
$failed++;
|
||||
$this->newLine();
|
||||
$this->error("Failed to generate hash for ID: {$record->id} - {$record->webp_url}");
|
||||
}
|
||||
|
||||
$progressBar->advance();
|
||||
}
|
||||
|
||||
$progressBar->finish();
|
||||
$this->newLine();
|
||||
|
||||
$this->info('Processing complete!');
|
||||
$this->info("Processed: {$processed}");
|
||||
$this->info("Failed: {$failed}");
|
||||
}
|
||||
}
|
||||
100
app/Helpers/FirstParty/ImageHash/ImageHashService.php
Normal file
100
app/Helpers/FirstParty/ImageHash/ImageHashService.php
Normal file
@@ -0,0 +1,100 @@
|
||||
<?php
|
||||
|
||||
namespace App\Helpers\FirstParty\ImageHash;
|
||||
|
||||
use Illuminate\Support\Facades\Http;
|
||||
use Illuminate\Support\Facades\Log;
|
||||
use Jenssegers\ImageHash\ImageHash;
|
||||
use Jenssegers\ImageHash\Implementations\DifferenceHash;
|
||||
|
||||
class ImageHashService
|
||||
{
|
||||
private ImageHash $hasher;
|
||||
|
||||
public function __construct()
|
||||
{
|
||||
$this->hasher = new ImageHash(new DifferenceHash);
|
||||
}
|
||||
|
||||
public function generateHashFromUrl(string $url): ?string
|
||||
{
|
||||
try {
|
||||
$response = Http::timeout(30)->get($url);
|
||||
|
||||
if (! $response->successful()) {
|
||||
Log::warning("Failed to download image from URL: {$url}");
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
$imageData = $response->body();
|
||||
|
||||
return $this->generateHashFromData($imageData);
|
||||
|
||||
} catch (\Exception $e) {
|
||||
Log::error("Error generating hash from URL {$url}: ".$e->getMessage());
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public function generateHashFromData(string $imageData): ?string
|
||||
{
|
||||
try {
|
||||
$tempFile = tempnam(sys_get_temp_dir(), 'imagehash_');
|
||||
file_put_contents($tempFile, $imageData);
|
||||
|
||||
$hash = $this->hasher->hash($tempFile);
|
||||
unlink($tempFile);
|
||||
|
||||
return $hash->toHex();
|
||||
|
||||
} catch (\Exception $e) {
|
||||
Log::error('Error generating hash from image data: '.$e->getMessage());
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public function calculateHammingDistance(string $hash1, string $hash2): int
|
||||
{
|
||||
// Validate hashes are not empty
|
||||
if (empty($hash1) || empty($hash2)) {
|
||||
return PHP_INT_MAX; // Return max distance for invalid hashes
|
||||
}
|
||||
|
||||
// Pad shorter hash with zeros to make them equal length
|
||||
$maxLength = max(strlen($hash1), strlen($hash2));
|
||||
$hash1 = str_pad($hash1, $maxLength, '0', STR_PAD_LEFT);
|
||||
$hash2 = str_pad($hash2, $maxLength, '0', STR_PAD_LEFT);
|
||||
|
||||
$distance = 0;
|
||||
for ($i = 0; $i < $maxLength; $i++) {
|
||||
if ($hash1[$i] !== $hash2[$i]) {
|
||||
$distance++;
|
||||
}
|
||||
}
|
||||
|
||||
return $distance;
|
||||
}
|
||||
|
||||
public function areHashesSimilar(string $hash1, string $hash2, int $threshold = 5): bool
|
||||
{
|
||||
return $this->calculateHammingDistance($hash1, $hash2) <= $threshold;
|
||||
}
|
||||
|
||||
public function findSimilarHashes(string $targetHash, array $hashes, int $threshold = 5): array
|
||||
{
|
||||
$similar = [];
|
||||
|
||||
foreach ($hashes as $id => $hash) {
|
||||
if ($this->areHashesSimilar($targetHash, $hash, $threshold)) {
|
||||
$similar[$id] = $this->calculateHammingDistance($targetHash, $hash);
|
||||
}
|
||||
}
|
||||
|
||||
asort($similar);
|
||||
|
||||
return $similar;
|
||||
}
|
||||
}
|
||||
170
app/Http/Controllers/AdminDuplicateController.php
Normal file
170
app/Http/Controllers/AdminDuplicateController.php
Normal file
@@ -0,0 +1,170 @@
|
||||
<?php
|
||||
|
||||
namespace App\Http\Controllers;
|
||||
|
||||
use App\Helpers\FirstParty\ImageHash\ImageHashService;
|
||||
use App\Models\MemeMedia;
|
||||
use Illuminate\Http\Request;
|
||||
use Inertia\Inertia;
|
||||
|
||||
class AdminDuplicateController extends Controller
|
||||
{
|
||||
private ImageHashService $imageHashService;
|
||||
|
||||
public function __construct(ImageHashService $imageHashService)
|
||||
{
|
||||
$this->imageHashService = $imageHashService;
|
||||
}
|
||||
|
||||
public function index()
|
||||
{
|
||||
return Inertia::render('admin/duplicate-management', [
|
||||
'title' => 'Duplicate Management',
|
||||
]);
|
||||
}
|
||||
|
||||
public function scan(Request $request)
|
||||
{
|
||||
$threshold = $request->input('threshold', 5);
|
||||
|
||||
$records = MemeMedia::whereNotNull('image_hash')
|
||||
->whereNotNull('webp_url')
|
||||
->where('image_hash', '!=', '')
|
||||
->get(['id', 'name', 'image_hash', 'webp_url', 'group']);
|
||||
|
||||
if ($records->isEmpty()) {
|
||||
return response()->json([
|
||||
'duplicates' => [],
|
||||
'message' => 'No records with image hashes found.',
|
||||
]);
|
||||
}
|
||||
|
||||
$duplicates = [];
|
||||
$processed = [];
|
||||
|
||||
foreach ($records as $record) {
|
||||
if (in_array($record->id, $processed)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip if hash is empty or invalid
|
||||
if (empty($record->image_hash)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$similarRecords = [];
|
||||
|
||||
foreach ($records as $compareRecord) {
|
||||
if ($record->id === $compareRecord->id || in_array($compareRecord->id, $processed)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip if either hash is empty or invalid
|
||||
if (empty($record->image_hash) || empty($compareRecord->image_hash)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$distance = $this->imageHashService->calculateHammingDistance(
|
||||
$record->image_hash,
|
||||
$compareRecord->image_hash
|
||||
);
|
||||
|
||||
// Skip if distance calculation failed (returns PHP_INT_MAX)
|
||||
if ($distance === PHP_INT_MAX) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($distance <= $threshold) {
|
||||
$similarRecords[] = [
|
||||
'id' => $compareRecord->id,
|
||||
'name' => $compareRecord->name,
|
||||
'distance' => $distance,
|
||||
'url' => $compareRecord->webp_url,
|
||||
'group' => $compareRecord->group,
|
||||
];
|
||||
$processed[] = $compareRecord->id;
|
||||
}
|
||||
}
|
||||
|
||||
if (! empty($similarRecords)) {
|
||||
// Sort similar records to prioritize group 2, then by ID
|
||||
usort($similarRecords, function ($a, $b) {
|
||||
if ($a['group'] == $b['group']) {
|
||||
return $a['id'] <=> $b['id'];
|
||||
}
|
||||
|
||||
return $b['group'] <=> $a['group']; // Higher group first
|
||||
});
|
||||
|
||||
$duplicates[] = [
|
||||
'original' => [
|
||||
'id' => $record->id,
|
||||
'name' => $record->name,
|
||||
'url' => $record->webp_url,
|
||||
'group' => $record->group,
|
||||
],
|
||||
'duplicates' => $similarRecords,
|
||||
];
|
||||
$processed[] = $record->id;
|
||||
}
|
||||
}
|
||||
|
||||
return response()->json([
|
||||
'duplicates' => $duplicates,
|
||||
'total_groups' => count($duplicates),
|
||||
'total_duplicates' => array_sum(array_map(function ($group) {
|
||||
return count($group['duplicates']);
|
||||
}, $duplicates)),
|
||||
]);
|
||||
}
|
||||
|
||||
public function delete(Request $request)
|
||||
{
|
||||
$request->validate([
|
||||
'id' => 'required|exists:meme_medias,id',
|
||||
]);
|
||||
|
||||
$record = MemeMedia::findOrFail($request->id);
|
||||
|
||||
// Soft delete the record
|
||||
$record->delete();
|
||||
|
||||
return response()->json([
|
||||
'success' => true,
|
||||
'message' => "Deleted '{$record->name}' (Group {$record->group})",
|
||||
]);
|
||||
}
|
||||
|
||||
public function regenerateHash(Request $request)
|
||||
{
|
||||
$request->validate([
|
||||
'id' => 'required|exists:meme_medias,id',
|
||||
]);
|
||||
|
||||
$record = MemeMedia::findOrFail($request->id);
|
||||
|
||||
if (! $record->webp_url) {
|
||||
return response()->json([
|
||||
'success' => false,
|
||||
'message' => 'No WebP URL found for this record',
|
||||
], 400);
|
||||
}
|
||||
|
||||
$hash = $this->imageHashService->generateHashFromUrl($record->webp_url);
|
||||
|
||||
if (! $hash) {
|
||||
return response()->json([
|
||||
'success' => false,
|
||||
'message' => 'Failed to generate hash',
|
||||
], 500);
|
||||
}
|
||||
|
||||
$record->update(['image_hash' => $hash]);
|
||||
|
||||
return response()->json([
|
||||
'success' => true,
|
||||
'message' => 'Hash regenerated successfully',
|
||||
'hash' => $hash,
|
||||
]);
|
||||
}
|
||||
}
|
||||
47
app/Http/Middleware/BasicAuthMiddleware.php
Normal file
47
app/Http/Middleware/BasicAuthMiddleware.php
Normal file
@@ -0,0 +1,47 @@
|
||||
<?php
|
||||
|
||||
namespace App\Http\Middleware;
|
||||
|
||||
use Closure;
|
||||
use Illuminate\Http\Request;
|
||||
use Symfony\Component\HttpFoundation\Response;
|
||||
|
||||
class BasicAuthMiddleware
|
||||
{
|
||||
/**
|
||||
* Handle an incoming request.
|
||||
*
|
||||
* @param \Closure(\Illuminate\Http\Request): (\Symfony\Component\HttpFoundation\Response) $next
|
||||
*/
|
||||
public function handle(Request $request, Closure $next): Response
|
||||
{
|
||||
$username = env('BASIC_AUTH_USERNAME');
|
||||
$password = env('BASIC_AUTH_PASSWORD');
|
||||
|
||||
// If credentials are not set, deny access
|
||||
if (! $username || ! $password) {
|
||||
return response('Unauthorized', 401, ['WWW-Authenticate' => 'Basic']);
|
||||
}
|
||||
|
||||
// Check if Authorization header is present
|
||||
if (! $request->header('Authorization')) {
|
||||
return response('Unauthorized', 401, ['WWW-Authenticate' => 'Basic']);
|
||||
}
|
||||
|
||||
// Extract credentials from Authorization header
|
||||
$authHeader = $request->header('Authorization');
|
||||
if (! str_starts_with($authHeader, 'Basic ')) {
|
||||
return response('Unauthorized', 401, ['WWW-Authenticate' => 'Basic']);
|
||||
}
|
||||
|
||||
$credentials = base64_decode(substr($authHeader, 6));
|
||||
[$inputUsername, $inputPassword] = explode(':', $credentials, 2);
|
||||
|
||||
// Verify credentials
|
||||
if ($inputUsername !== $username || $inputPassword !== $password) {
|
||||
return response('Unauthorized', 401, ['WWW-Authenticate' => 'Basic']);
|
||||
}
|
||||
|
||||
return $next($request);
|
||||
}
|
||||
}
|
||||
@@ -68,6 +68,7 @@ class MemeMedia extends Model
|
||||
'action_keywords',
|
||||
'emotion_keywords',
|
||||
'misc_keywords',
|
||||
'image_hash',
|
||||
];
|
||||
|
||||
protected $hidden = [
|
||||
@@ -86,6 +87,7 @@ class MemeMedia extends Model
|
||||
// 'mov_url',
|
||||
// 'webm_url',
|
||||
'embedding',
|
||||
'image_hash',
|
||||
];
|
||||
|
||||
protected $appends = [
|
||||
|
||||
68
app/Observers/MemeMediaObserver.php
Normal file
68
app/Observers/MemeMediaObserver.php
Normal file
@@ -0,0 +1,68 @@
|
||||
<?php
|
||||
|
||||
namespace App\Observers;
|
||||
|
||||
use App\Helpers\FirstParty\ImageHash\ImageHashService;
|
||||
use App\Models\MemeMedia;
|
||||
|
||||
class MemeMediaObserver
|
||||
{
|
||||
private ImageHashService $imageHashService;
|
||||
|
||||
public function __construct(ImageHashService $imageHashService)
|
||||
{
|
||||
$this->imageHashService = $imageHashService;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle the MemeMedia "created" event.
|
||||
*/
|
||||
public function created(MemeMedia $memeMedia): void
|
||||
{
|
||||
$this->generateHashIfNeeded($memeMedia);
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle the MemeMedia "updated" event.
|
||||
*/
|
||||
public function updated(MemeMedia $memeMedia): void
|
||||
{
|
||||
if ($memeMedia->wasChanged('webp_url')) {
|
||||
$this->generateHashIfNeeded($memeMedia);
|
||||
}
|
||||
}
|
||||
|
||||
private function generateHashIfNeeded(MemeMedia $memeMedia): void
|
||||
{
|
||||
if ($memeMedia->webp_url && ! $memeMedia->image_hash) {
|
||||
$hash = $this->imageHashService->generateHashFromUrl($memeMedia->webp_url);
|
||||
if ($hash) {
|
||||
$memeMedia->updateQuietly(['image_hash' => $hash]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle the MemeMedia "deleted" event.
|
||||
*/
|
||||
public function deleted(MemeMedia $memeMedia): void
|
||||
{
|
||||
//
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle the MemeMedia "restored" event.
|
||||
*/
|
||||
public function restored(MemeMedia $memeMedia): void
|
||||
{
|
||||
//
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle the MemeMedia "force deleted" event.
|
||||
*/
|
||||
public function forceDeleted(MemeMedia $memeMedia): void
|
||||
{
|
||||
//
|
||||
}
|
||||
}
|
||||
@@ -2,6 +2,8 @@
|
||||
|
||||
namespace App\Providers;
|
||||
|
||||
use App\Models\MemeMedia;
|
||||
use App\Observers\MemeMediaObserver;
|
||||
use Illuminate\Support\ServiceProvider;
|
||||
|
||||
class AppServiceProvider extends ServiceProvider
|
||||
@@ -19,6 +21,6 @@ public function register(): void
|
||||
*/
|
||||
public function boot(): void
|
||||
{
|
||||
//
|
||||
MemeMedia::observe(MemeMediaObserver::class);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user