Update (copies)

This commit is contained in:
2023-12-03 11:20:03 +08:00
parent 97dc13e785
commit 38c53b4045
13 changed files with 103 additions and 104 deletions

View File

@@ -2,8 +2,8 @@
namespace App\Helpers\FirstParty\SitemapCrawler;
use Spatie\Crawler\CrawlProfiles\CrawlProfile;
use Psr\Http\Message\UriInterface;
use Spatie\Crawler\CrawlProfiles\CrawlProfile;
class CustomCrawlProfile extends CrawlProfile
{
@@ -20,9 +20,7 @@ public function shouldCrawl(UriInterface $url): bool
if ($url->getQuery() !== '') {
return false;
}
return ($this->callback)($url);
}
}

View File

@@ -4,7 +4,8 @@
use Illuminate\Support\Str;
if (! function_exists('count_words')) {
function count_words($string) {
function count_words($string)
{
// Remove punctuation and line breaks
$cleanString = preg_replace('/[\p{P}\s]/u', ' ', $string);
@@ -16,9 +17,9 @@ function count_words($string) {
}
}
if (! function_exists('get_country_names')) {
function get_country_names($lowercase = false) {
function get_country_names($lowercase = false)
{
$countryCodes = config('platform.country_codes');
$countryNames = [];
@@ -34,7 +35,6 @@ function get_country_names($lowercase = false) {
}
}
if (! function_exists('is_valid_url')) {
function is_valid_url($url)
{
@@ -195,7 +195,7 @@ function get_domain_from_url($url)
$parse = parse_url($url);
// Check if 'host' key exists in the parsed URL array
if (!isset($parse['host'])) {
if (! isset($parse['host'])) {
return null; // or you can throw an exception or handle this case as per your requirement
}

View File

@@ -26,11 +26,15 @@ public function home(Request $request)
// $query->whereNotIn('id', $featured_posts->pluck('id')->toArray());
// })->where('status', 'publish')->where('published_at', '<=', now())->orderBy('published_at', 'desc')->limit(10)->get();
$rss_count = RssPost::where('status', 'published')
->where('published_at', '>=', now()->subDay())
->count();
$top_rss_keywords = HybridTopRssPostKeywords::get(1, 16);
$rss_posts = RssPost::with('entities_keywords')->whereNotNull('keywords')->where('status', 'published')->orderBy('published_at', 'desc')->paginate(15);
$rss_posts = RssPost::with('entities_keywords')->whereNotNull('keywords')->where('status', 'published')->orderBy('published_at', 'desc')->paginate(30);
return response(view('front.welcome', compact('rss_posts', 'top_rss_keywords')), 200);
return response(view('front.welcome', compact('rss_posts', 'top_rss_keywords', 'rss_count')), 200);
}
public function terms(Request $request)

View File

@@ -18,9 +18,9 @@ class TestController extends Controller
{
public function blacklistkw(Request $request)
{
$country_names = get_country_names(true);
$country_names = get_country_names(true);
dd($country_names);
dd($country_names);
}

View File

@@ -51,16 +51,12 @@ public static function handleSingle($rss_url, $hours = 3)
$blacklist_rss_post_keywords = array_merge($blacklist_rss_post_keywords, get_country_names(true));
foreach ($blacklist_rss_post_keywords as $blacklist_keyword)
{
if (str_contains(strtolower($title), $blacklist_keyword))
{
continue 2;
}
foreach ($blacklist_rss_post_keywords as $blacklist_keyword) {
if (str_contains(strtolower($title), $blacklist_keyword)) {
continue 2;
}
}
$raw_posts[] = (object) [
'source' => $f->get_title(),
'source_url' => $rss_url,

View File

@@ -28,7 +28,6 @@ public static function handle(int $rss_post_id)
$final_content = "TITLE: {$rss_post->title}";
if (in_array($rss_post->status, ['blocked', 'trashed'])) {
return;
}
@@ -160,12 +159,9 @@ public static function handle(int $rss_post_id)
$rss_post->status = 'published';
if (!$rss_post->status != 'blocked')
{
if (isset($post_meta_response->output->is_ai_or_tech_news))
{
if ($post_meta_response->output->is_ai_or_tech_news != true)
{
if (! $rss_post->status != 'blocked') {
if (isset($post_meta_response->output->is_ai_or_tech_news)) {
if ($post_meta_response->output->is_ai_or_tech_news != true) {
$rss_post->status = 'blocked';
}
// else
@@ -182,9 +178,8 @@ public static function handle(int $rss_post_id)
}
if (count($words_to_save) <= 0)
{
$rss_post->status = 'blocked';
if (count($words_to_save) <= 0) {
$rss_post->status = 'blocked';
}
if ($rss_post->save()) {

View File

@@ -31,7 +31,7 @@ public static function get($days = 1, $limit = 10)
$queryResults = DB::table('rss_post_keywords')
->select('value', 'value_lowercased', DB::raw('COUNT(value_lowercased) as value_count'))
->where('created_at', '>=', now()->subDays($days))
->whereNotIn('value_lowercased', ['techcrunch', 'the verge', 'forbes', 'producthunt', 'vox media','engadget'])
->whereNotIn('value_lowercased', ['techcrunch', 'the verge', 'forbes', 'producthunt', 'vox media', 'engadget'])
->groupBy('value', 'value_lowercased')
->orderBy(DB::raw('COUNT(value_lowercased)'), 'desc')
->limit($limit)