<?php

namespace App\Jobs\Tasks;

use App\Helpers\FirstParty\OSSUploader\OSSUploader;
use App\Models\Category;
use App\Models\NewsSerpResult;
use App\Models\SerpUrl;
use Carbon\Carbon;
use Exception;

class ParseNewsSerpDomainsTask
{
    public static function handle(NewsSerpResult $news_serp_result, $serp_counts = 1)
    {
        //dd($news_serp_result->category->serp_at);

        $serp_results = null;

        $success = false;

        try {

            $serp_results = OSSUploader::readJson(
                config('platform.dataset.news.news_serp.driver'),
                config('platform.dataset.news.news_serp.path'),
                $news_serp_result->filename)?->tasks[0]?->result[0]?->items;

        } catch (Exception $e) {
            $serp_results = null;
        }

        if (! is_null($serp_results)) {

            $valid_serps = [];

            foreach ($serp_results as $serp_item) {

                $news_date = Carbon::parse($serp_item->timestamp);

                if (is_empty($serp_item->url)) {
                    continue;
                }

                // if (!str_contains($serp_item->time_published, "hours"))
                // {
                //   continue;
                // }

                $serp_url = SerpUrl::where('url', $serp_item->url)->first();

                if (! is_null($serp_url)) {
                    if ($serp_url->status == 'blocked') {
                        continue;
                    }

                }

                if (str_contains($serp_item->title, ':')) {
                    continue;
                }

                $valid_serps[] = $serp_item;

                if (count($valid_serps) >= $serp_counts) {
                    break;
                }

            }

            //dd($valid_serps);

            foreach ($valid_serps as $serp_item) {

                //dd($serp_item);

                if (is_null($serp_url)) {
                    $serp_url = new SerpUrl;
                    $serp_url->category_id = $news_serp_result->category_id;
                    $serp_url->category_name = $news_serp_result->category_name;
                    $serp_url->news_serp_result_id = $news_serp_result->id;
                }

                $serp_url->source = 'serp';
                $serp_url->url = self::normalizeUrl($serp_item->url);
                $serp_url->country_iso = $news_serp_result->serp_country_iso;

                if (! is_empty($serp_item->title)) {
                    $serp_url->title = $serp_item->title;
                }

                if (! is_empty($serp_item->snippet)) {
                    $serp_url->description = $serp_item->snippet;
                }

                if ($serp_url->isDirty()) {
                    $serp_url->serp_at = $news_serp_result->category->serp_at;
                }

                if ($serp_url->save()) {
                    $success = true;
                }
            }
        }

        return $success;
    }

    private static function normalizeUrl($url)
    {
        try {
            $parsedUrl = parse_url($url);

            // Force the scheme to https to avoid duplicate content issues
            $parsedUrl['scheme'] = 'https';

            if (! isset($parsedUrl['host'])) {
                // If the host is not present, throw an exception
                throw new \Exception('Host not found in URL');
            }

            // Check if the path is set and ends with a trailing slash, if so, remove it
            if (isset($parsedUrl['path']) && substr($parsedUrl['path'], -1) === '/') {
                $parsedUrl['path'] = rtrim($parsedUrl['path'], '/');
            }

            // Remove query parameters
            unset($parsedUrl['query']);

            $normalizedUrl = sprintf(
                '%s://%s%s',
                $parsedUrl['scheme'],
                $parsedUrl['host'],
                $parsedUrl['path'] ?? ''
            );

            // Remove fragment if exists
            $normalizedUrl = preg_replace('/#.*$/', '', $normalizedUrl);

            return $normalizedUrl;
        } catch (\Exception $e) {
            // In case of an exception, return the original URL
            return $url;
        }
    }
}