This commit is contained in:
ct
2025-07-15 20:03:10 +08:00
parent b54e4f2092
commit 096f515f58
15 changed files with 1161 additions and 3 deletions

View File

@@ -0,0 +1,124 @@
<?php
namespace App\Console\Commands;
use App\Helpers\FirstParty\ImageHash\ImageHashService;
use App\Models\MemeMedia;
use Illuminate\Console\Command;
class FindDuplicateImages extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'app:find-duplicate-images {--threshold=5 : Hamming distance threshold for duplicates}';
/**
* The console command description.
*
* @var string
*/
protected $description = 'Find duplicate images using Hamming distance comparison of WebP hashes';
private ImageHashService $imageHashService;
public function __construct(ImageHashService $imageHashService)
{
parent::__construct();
$this->imageHashService = $imageHashService;
}
/**
* Execute the console command.
*/
public function handle()
{
$threshold = (int) $this->option('threshold');
$records = MemeMedia::whereNotNull('image_hash')
->whereNotNull('webp_url')
->get(['id', 'name', 'image_hash', 'webp_url']);
if ($records->isEmpty()) {
$this->info('No records with image hashes found. Run app:generate-image-hashes first.');
return;
}
$this->info("Checking {$records->count()} records for duplicates with threshold: {$threshold}");
$duplicates = [];
$processed = [];
foreach ($records as $record) {
if (in_array($record->id, $processed)) {
continue;
}
$similarRecords = [];
foreach ($records as $compareRecord) {
if ($record->id === $compareRecord->id || in_array($compareRecord->id, $processed)) {
continue;
}
$distance = $this->imageHashService->calculateHammingDistance(
$record->image_hash,
$compareRecord->image_hash
);
if ($distance <= $threshold) {
$similarRecords[] = [
'id' => $compareRecord->id,
'name' => $compareRecord->name,
'distance' => $distance,
'url' => $compareRecord->webp_url,
];
$processed[] = $compareRecord->id;
}
}
if (! empty($similarRecords)) {
$duplicates[] = [
'original' => [
'id' => $record->id,
'name' => $record->name,
'url' => $record->webp_url,
],
'duplicates' => $similarRecords,
];
$processed[] = $record->id;
}
}
if (empty($duplicates)) {
$this->info('No duplicates found.');
return;
}
$this->info('Found '.count($duplicates).' duplicate groups:');
foreach ($duplicates as $group) {
$this->newLine();
$this->line("Original: [{$group['original']['id']}] {$group['original']['name']}");
$this->line("URL: {$group['original']['url']}");
foreach ($group['duplicates'] as $duplicate) {
$this->line(" → [{$duplicate['id']}] {$duplicate['name']} (distance: {$duplicate['distance']})");
$this->line(" URL: {$duplicate['url']}");
}
}
$this->newLine();
$this->info('Total duplicate groups: '.count($duplicates));
$totalDuplicates = array_sum(array_map(function ($group) {
return count($group['duplicates']);
}, $duplicates));
$this->info("Total duplicate records: {$totalDuplicates}");
}
}

View File

@@ -0,0 +1,84 @@
<?php
namespace App\Console\Commands;
use App\Helpers\FirstParty\ImageHash\ImageHashService;
use App\Models\MemeMedia;
use Illuminate\Console\Command;
class GenerateImageHashes extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'app:generate-image-hashes {--force : Force regeneration of existing hashes}';
/**
* The console command description.
*
* @var string
*/
protected $description = 'Generate image hashes for existing WebP URLs in MemeMedia records';
private ImageHashService $imageHashService;
public function __construct(ImageHashService $imageHashService)
{
parent::__construct();
$this->imageHashService = $imageHashService;
}
/**
* Execute the console command.
*/
public function handle()
{
$force = $this->option('force');
$query = MemeMedia::query();
if (! $force) {
$query->whereNull('image_hash');
}
$records = $query->whereNotNull('webp_url')->get();
if ($records->isEmpty()) {
$this->info('No records found to process.');
return;
}
$this->info("Processing {$records->count()} records...");
$progressBar = $this->output->createProgressBar($records->count());
$progressBar->start();
$processed = 0;
$failed = 0;
foreach ($records as $record) {
$hash = $this->imageHashService->generateHashFromUrl($record->webp_url);
if ($hash) {
$record->update(['image_hash' => $hash]);
$processed++;
} else {
$failed++;
$this->newLine();
$this->error("Failed to generate hash for ID: {$record->id} - {$record->webp_url}");
}
$progressBar->advance();
}
$progressBar->finish();
$this->newLine();
$this->info('Processing complete!');
$this->info("Processed: {$processed}");
$this->info("Failed: {$failed}");
}
}

View File

@@ -0,0 +1,100 @@
<?php
namespace App\Helpers\FirstParty\ImageHash;
use Illuminate\Support\Facades\Http;
use Illuminate\Support\Facades\Log;
use Jenssegers\ImageHash\ImageHash;
use Jenssegers\ImageHash\Implementations\DifferenceHash;
class ImageHashService
{
private ImageHash $hasher;
public function __construct()
{
$this->hasher = new ImageHash(new DifferenceHash);
}
public function generateHashFromUrl(string $url): ?string
{
try {
$response = Http::timeout(30)->get($url);
if (! $response->successful()) {
Log::warning("Failed to download image from URL: {$url}");
return null;
}
$imageData = $response->body();
return $this->generateHashFromData($imageData);
} catch (\Exception $e) {
Log::error("Error generating hash from URL {$url}: ".$e->getMessage());
return null;
}
}
public function generateHashFromData(string $imageData): ?string
{
try {
$tempFile = tempnam(sys_get_temp_dir(), 'imagehash_');
file_put_contents($tempFile, $imageData);
$hash = $this->hasher->hash($tempFile);
unlink($tempFile);
return $hash->toHex();
} catch (\Exception $e) {
Log::error('Error generating hash from image data: '.$e->getMessage());
return null;
}
}
public function calculateHammingDistance(string $hash1, string $hash2): int
{
// Validate hashes are not empty
if (empty($hash1) || empty($hash2)) {
return PHP_INT_MAX; // Return max distance for invalid hashes
}
// Pad shorter hash with zeros to make them equal length
$maxLength = max(strlen($hash1), strlen($hash2));
$hash1 = str_pad($hash1, $maxLength, '0', STR_PAD_LEFT);
$hash2 = str_pad($hash2, $maxLength, '0', STR_PAD_LEFT);
$distance = 0;
for ($i = 0; $i < $maxLength; $i++) {
if ($hash1[$i] !== $hash2[$i]) {
$distance++;
}
}
return $distance;
}
public function areHashesSimilar(string $hash1, string $hash2, int $threshold = 5): bool
{
return $this->calculateHammingDistance($hash1, $hash2) <= $threshold;
}
public function findSimilarHashes(string $targetHash, array $hashes, int $threshold = 5): array
{
$similar = [];
foreach ($hashes as $id => $hash) {
if ($this->areHashesSimilar($targetHash, $hash, $threshold)) {
$similar[$id] = $this->calculateHammingDistance($targetHash, $hash);
}
}
asort($similar);
return $similar;
}
}

View File

@@ -0,0 +1,170 @@
<?php
namespace App\Http\Controllers;
use App\Helpers\FirstParty\ImageHash\ImageHashService;
use App\Models\MemeMedia;
use Illuminate\Http\Request;
use Inertia\Inertia;
class AdminDuplicateController extends Controller
{
private ImageHashService $imageHashService;
public function __construct(ImageHashService $imageHashService)
{
$this->imageHashService = $imageHashService;
}
public function index()
{
return Inertia::render('admin/duplicate-management', [
'title' => 'Duplicate Management',
]);
}
public function scan(Request $request)
{
$threshold = $request->input('threshold', 5);
$records = MemeMedia::whereNotNull('image_hash')
->whereNotNull('webp_url')
->where('image_hash', '!=', '')
->get(['id', 'name', 'image_hash', 'webp_url', 'group']);
if ($records->isEmpty()) {
return response()->json([
'duplicates' => [],
'message' => 'No records with image hashes found.',
]);
}
$duplicates = [];
$processed = [];
foreach ($records as $record) {
if (in_array($record->id, $processed)) {
continue;
}
// Skip if hash is empty or invalid
if (empty($record->image_hash)) {
continue;
}
$similarRecords = [];
foreach ($records as $compareRecord) {
if ($record->id === $compareRecord->id || in_array($compareRecord->id, $processed)) {
continue;
}
// Skip if either hash is empty or invalid
if (empty($record->image_hash) || empty($compareRecord->image_hash)) {
continue;
}
$distance = $this->imageHashService->calculateHammingDistance(
$record->image_hash,
$compareRecord->image_hash
);
// Skip if distance calculation failed (returns PHP_INT_MAX)
if ($distance === PHP_INT_MAX) {
continue;
}
if ($distance <= $threshold) {
$similarRecords[] = [
'id' => $compareRecord->id,
'name' => $compareRecord->name,
'distance' => $distance,
'url' => $compareRecord->webp_url,
'group' => $compareRecord->group,
];
$processed[] = $compareRecord->id;
}
}
if (! empty($similarRecords)) {
// Sort similar records to prioritize group 2, then by ID
usort($similarRecords, function ($a, $b) {
if ($a['group'] == $b['group']) {
return $a['id'] <=> $b['id'];
}
return $b['group'] <=> $a['group']; // Higher group first
});
$duplicates[] = [
'original' => [
'id' => $record->id,
'name' => $record->name,
'url' => $record->webp_url,
'group' => $record->group,
],
'duplicates' => $similarRecords,
];
$processed[] = $record->id;
}
}
return response()->json([
'duplicates' => $duplicates,
'total_groups' => count($duplicates),
'total_duplicates' => array_sum(array_map(function ($group) {
return count($group['duplicates']);
}, $duplicates)),
]);
}
public function delete(Request $request)
{
$request->validate([
'id' => 'required|exists:meme_medias,id',
]);
$record = MemeMedia::findOrFail($request->id);
// Soft delete the record
$record->delete();
return response()->json([
'success' => true,
'message' => "Deleted '{$record->name}' (Group {$record->group})",
]);
}
public function regenerateHash(Request $request)
{
$request->validate([
'id' => 'required|exists:meme_medias,id',
]);
$record = MemeMedia::findOrFail($request->id);
if (! $record->webp_url) {
return response()->json([
'success' => false,
'message' => 'No WebP URL found for this record',
], 400);
}
$hash = $this->imageHashService->generateHashFromUrl($record->webp_url);
if (! $hash) {
return response()->json([
'success' => false,
'message' => 'Failed to generate hash',
], 500);
}
$record->update(['image_hash' => $hash]);
return response()->json([
'success' => true,
'message' => 'Hash regenerated successfully',
'hash' => $hash,
]);
}
}

View File

@@ -0,0 +1,47 @@
<?php
namespace App\Http\Middleware;
use Closure;
use Illuminate\Http\Request;
use Symfony\Component\HttpFoundation\Response;
class BasicAuthMiddleware
{
/**
* Handle an incoming request.
*
* @param \Closure(\Illuminate\Http\Request): (\Symfony\Component\HttpFoundation\Response) $next
*/
public function handle(Request $request, Closure $next): Response
{
$username = env('BASIC_AUTH_USERNAME');
$password = env('BASIC_AUTH_PASSWORD');
// If credentials are not set, deny access
if (! $username || ! $password) {
return response('Unauthorized', 401, ['WWW-Authenticate' => 'Basic']);
}
// Check if Authorization header is present
if (! $request->header('Authorization')) {
return response('Unauthorized', 401, ['WWW-Authenticate' => 'Basic']);
}
// Extract credentials from Authorization header
$authHeader = $request->header('Authorization');
if (! str_starts_with($authHeader, 'Basic ')) {
return response('Unauthorized', 401, ['WWW-Authenticate' => 'Basic']);
}
$credentials = base64_decode(substr($authHeader, 6));
[$inputUsername, $inputPassword] = explode(':', $credentials, 2);
// Verify credentials
if ($inputUsername !== $username || $inputPassword !== $password) {
return response('Unauthorized', 401, ['WWW-Authenticate' => 'Basic']);
}
return $next($request);
}
}

View File

@@ -68,6 +68,7 @@ class MemeMedia extends Model
'action_keywords',
'emotion_keywords',
'misc_keywords',
'image_hash',
];
protected $hidden = [
@@ -86,6 +87,7 @@ class MemeMedia extends Model
// 'mov_url',
// 'webm_url',
'embedding',
'image_hash',
];
protected $appends = [

View File

@@ -0,0 +1,68 @@
<?php
namespace App\Observers;
use App\Helpers\FirstParty\ImageHash\ImageHashService;
use App\Models\MemeMedia;
class MemeMediaObserver
{
private ImageHashService $imageHashService;
public function __construct(ImageHashService $imageHashService)
{
$this->imageHashService = $imageHashService;
}
/**
* Handle the MemeMedia "created" event.
*/
public function created(MemeMedia $memeMedia): void
{
$this->generateHashIfNeeded($memeMedia);
}
/**
* Handle the MemeMedia "updated" event.
*/
public function updated(MemeMedia $memeMedia): void
{
if ($memeMedia->wasChanged('webp_url')) {
$this->generateHashIfNeeded($memeMedia);
}
}
private function generateHashIfNeeded(MemeMedia $memeMedia): void
{
if ($memeMedia->webp_url && ! $memeMedia->image_hash) {
$hash = $this->imageHashService->generateHashFromUrl($memeMedia->webp_url);
if ($hash) {
$memeMedia->updateQuietly(['image_hash' => $hash]);
}
}
}
/**
* Handle the MemeMedia "deleted" event.
*/
public function deleted(MemeMedia $memeMedia): void
{
//
}
/**
* Handle the MemeMedia "restored" event.
*/
public function restored(MemeMedia $memeMedia): void
{
//
}
/**
* Handle the MemeMedia "force deleted" event.
*/
public function forceDeleted(MemeMedia $memeMedia): void
{
//
}
}

View File

@@ -2,6 +2,8 @@
namespace App\Providers;
use App\Models\MemeMedia;
use App\Observers\MemeMediaObserver;
use Illuminate\Support\ServiceProvider;
class AppServiceProvider extends ServiceProvider
@@ -19,6 +21,6 @@ public function register(): void
*/
public function boot(): void
{
//
MemeMedia::observe(MemeMediaObserver::class);
}
}