Update (copies)
This commit is contained in:
@@ -2,8 +2,8 @@
|
||||
|
||||
namespace App\Helpers\FirstParty\SitemapCrawler;
|
||||
|
||||
use Spatie\Crawler\CrawlProfiles\CrawlProfile;
|
||||
use Psr\Http\Message\UriInterface;
|
||||
use Spatie\Crawler\CrawlProfiles\CrawlProfile;
|
||||
|
||||
class CustomCrawlProfile extends CrawlProfile
|
||||
{
|
||||
@@ -20,9 +20,7 @@ public function shouldCrawl(UriInterface $url): bool
|
||||
if ($url->getQuery() !== '') {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
return ($this->callback)($url);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -4,7 +4,8 @@
|
||||
use Illuminate\Support\Str;
|
||||
|
||||
if (! function_exists('count_words')) {
|
||||
function count_words($string) {
|
||||
function count_words($string)
|
||||
{
|
||||
// Remove punctuation and line breaks
|
||||
$cleanString = preg_replace('/[\p{P}\s]/u', ' ', $string);
|
||||
|
||||
@@ -16,9 +17,9 @@ function count_words($string) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (! function_exists('get_country_names')) {
|
||||
function get_country_names($lowercase = false) {
|
||||
function get_country_names($lowercase = false)
|
||||
{
|
||||
$countryCodes = config('platform.country_codes');
|
||||
$countryNames = [];
|
||||
|
||||
@@ -34,7 +35,6 @@ function get_country_names($lowercase = false) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (! function_exists('is_valid_url')) {
|
||||
function is_valid_url($url)
|
||||
{
|
||||
@@ -195,7 +195,7 @@ function get_domain_from_url($url)
|
||||
$parse = parse_url($url);
|
||||
|
||||
// Check if 'host' key exists in the parsed URL array
|
||||
if (!isset($parse['host'])) {
|
||||
if (! isset($parse['host'])) {
|
||||
return null; // or you can throw an exception or handle this case as per your requirement
|
||||
}
|
||||
|
||||
|
||||
@@ -26,11 +26,15 @@ public function home(Request $request)
|
||||
// $query->whereNotIn('id', $featured_posts->pluck('id')->toArray());
|
||||
// })->where('status', 'publish')->where('published_at', '<=', now())->orderBy('published_at', 'desc')->limit(10)->get();
|
||||
|
||||
$rss_count = RssPost::where('status', 'published')
|
||||
->where('published_at', '>=', now()->subDay())
|
||||
->count();
|
||||
|
||||
$top_rss_keywords = HybridTopRssPostKeywords::get(1, 16);
|
||||
|
||||
$rss_posts = RssPost::with('entities_keywords')->whereNotNull('keywords')->where('status', 'published')->orderBy('published_at', 'desc')->paginate(15);
|
||||
$rss_posts = RssPost::with('entities_keywords')->whereNotNull('keywords')->where('status', 'published')->orderBy('published_at', 'desc')->paginate(30);
|
||||
|
||||
return response(view('front.welcome', compact('rss_posts', 'top_rss_keywords')), 200);
|
||||
return response(view('front.welcome', compact('rss_posts', 'top_rss_keywords', 'rss_count')), 200);
|
||||
}
|
||||
|
||||
public function terms(Request $request)
|
||||
|
||||
@@ -18,9 +18,9 @@ class TestController extends Controller
|
||||
{
|
||||
public function blacklistkw(Request $request)
|
||||
{
|
||||
$country_names = get_country_names(true);
|
||||
$country_names = get_country_names(true);
|
||||
|
||||
dd($country_names);
|
||||
dd($country_names);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -51,16 +51,12 @@ public static function handleSingle($rss_url, $hours = 3)
|
||||
|
||||
$blacklist_rss_post_keywords = array_merge($blacklist_rss_post_keywords, get_country_names(true));
|
||||
|
||||
foreach ($blacklist_rss_post_keywords as $blacklist_keyword)
|
||||
{
|
||||
if (str_contains(strtolower($title), $blacklist_keyword))
|
||||
{
|
||||
continue 2;
|
||||
}
|
||||
foreach ($blacklist_rss_post_keywords as $blacklist_keyword) {
|
||||
if (str_contains(strtolower($title), $blacklist_keyword)) {
|
||||
continue 2;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
$raw_posts[] = (object) [
|
||||
'source' => $f->get_title(),
|
||||
'source_url' => $rss_url,
|
||||
|
||||
@@ -28,7 +28,6 @@ public static function handle(int $rss_post_id)
|
||||
|
||||
$final_content = "TITLE: {$rss_post->title}";
|
||||
|
||||
|
||||
if (in_array($rss_post->status, ['blocked', 'trashed'])) {
|
||||
return;
|
||||
}
|
||||
@@ -160,12 +159,9 @@ public static function handle(int $rss_post_id)
|
||||
|
||||
$rss_post->status = 'published';
|
||||
|
||||
if (!$rss_post->status != 'blocked')
|
||||
{
|
||||
if (isset($post_meta_response->output->is_ai_or_tech_news))
|
||||
{
|
||||
if ($post_meta_response->output->is_ai_or_tech_news != true)
|
||||
{
|
||||
if (! $rss_post->status != 'blocked') {
|
||||
if (isset($post_meta_response->output->is_ai_or_tech_news)) {
|
||||
if ($post_meta_response->output->is_ai_or_tech_news != true) {
|
||||
$rss_post->status = 'blocked';
|
||||
}
|
||||
// else
|
||||
@@ -182,9 +178,8 @@ public static function handle(int $rss_post_id)
|
||||
|
||||
}
|
||||
|
||||
if (count($words_to_save) <= 0)
|
||||
{
|
||||
$rss_post->status = 'blocked';
|
||||
if (count($words_to_save) <= 0) {
|
||||
$rss_post->status = 'blocked';
|
||||
}
|
||||
|
||||
if ($rss_post->save()) {
|
||||
|
||||
@@ -31,7 +31,7 @@ public static function get($days = 1, $limit = 10)
|
||||
$queryResults = DB::table('rss_post_keywords')
|
||||
->select('value', 'value_lowercased', DB::raw('COUNT(value_lowercased) as value_count'))
|
||||
->where('created_at', '>=', now()->subDays($days))
|
||||
->whereNotIn('value_lowercased', ['techcrunch', 'the verge', 'forbes', 'producthunt', 'vox media','engadget'])
|
||||
->whereNotIn('value_lowercased', ['techcrunch', 'the verge', 'forbes', 'producthunt', 'vox media', 'engadget'])
|
||||
->groupBy('value', 'value_lowercased')
|
||||
->orderBy(DB::raw('COUNT(value_lowercased)'), 'desc')
|
||||
->limit($limit)
|
||||
|
||||
Reference in New Issue
Block a user