Update (copies)

This commit is contained in:
2023-12-03 11:20:03 +08:00
parent 97dc13e785
commit 38c53b4045
13 changed files with 103 additions and 104 deletions

View File

@@ -2,8 +2,8 @@
namespace App\Helpers\FirstParty\SitemapCrawler;
use Spatie\Crawler\CrawlProfiles\CrawlProfile;
use Psr\Http\Message\UriInterface;
use Spatie\Crawler\CrawlProfiles\CrawlProfile;
class CustomCrawlProfile extends CrawlProfile
{
@@ -20,9 +20,7 @@ public function shouldCrawl(UriInterface $url): bool
if ($url->getQuery() !== '') {
return false;
}
return ($this->callback)($url);
}
}

View File

@@ -4,7 +4,8 @@
use Illuminate\Support\Str;
if (! function_exists('count_words')) {
function count_words($string) {
function count_words($string)
{
// Remove punctuation and line breaks
$cleanString = preg_replace('/[\p{P}\s]/u', ' ', $string);
@@ -16,9 +17,9 @@ function count_words($string) {
}
}
if (! function_exists('get_country_names')) {
function get_country_names($lowercase = false) {
function get_country_names($lowercase = false)
{
$countryCodes = config('platform.country_codes');
$countryNames = [];
@@ -34,7 +35,6 @@ function get_country_names($lowercase = false) {
}
}
if (! function_exists('is_valid_url')) {
function is_valid_url($url)
{
@@ -195,7 +195,7 @@ function get_domain_from_url($url)
$parse = parse_url($url);
// Check if 'host' key exists in the parsed URL array
if (!isset($parse['host'])) {
if (! isset($parse['host'])) {
return null; // or you can throw an exception or handle this case as per your requirement
}

View File

@@ -26,11 +26,15 @@ public function home(Request $request)
// $query->whereNotIn('id', $featured_posts->pluck('id')->toArray());
// })->where('status', 'publish')->where('published_at', '<=', now())->orderBy('published_at', 'desc')->limit(10)->get();
$rss_count = RssPost::where('status', 'published')
->where('published_at', '>=', now()->subDay())
->count();
$top_rss_keywords = HybridTopRssPostKeywords::get(1, 16);
$rss_posts = RssPost::with('entities_keywords')->whereNotNull('keywords')->where('status', 'published')->orderBy('published_at', 'desc')->paginate(15);
$rss_posts = RssPost::with('entities_keywords')->whereNotNull('keywords')->where('status', 'published')->orderBy('published_at', 'desc')->paginate(30);
return response(view('front.welcome', compact('rss_posts', 'top_rss_keywords')), 200);
return response(view('front.welcome', compact('rss_posts', 'top_rss_keywords', 'rss_count')), 200);
}
public function terms(Request $request)

View File

@@ -18,9 +18,9 @@ class TestController extends Controller
{
public function blacklistkw(Request $request)
{
$country_names = get_country_names(true);
$country_names = get_country_names(true);
dd($country_names);
dd($country_names);
}

View File

@@ -51,16 +51,12 @@ public static function handleSingle($rss_url, $hours = 3)
$blacklist_rss_post_keywords = array_merge($blacklist_rss_post_keywords, get_country_names(true));
foreach ($blacklist_rss_post_keywords as $blacklist_keyword)
{
if (str_contains(strtolower($title), $blacklist_keyword))
{
continue 2;
}
foreach ($blacklist_rss_post_keywords as $blacklist_keyword) {
if (str_contains(strtolower($title), $blacklist_keyword)) {
continue 2;
}
}
$raw_posts[] = (object) [
'source' => $f->get_title(),
'source_url' => $rss_url,

View File

@@ -28,7 +28,6 @@ public static function handle(int $rss_post_id)
$final_content = "TITLE: {$rss_post->title}";
if (in_array($rss_post->status, ['blocked', 'trashed'])) {
return;
}
@@ -160,12 +159,9 @@ public static function handle(int $rss_post_id)
$rss_post->status = 'published';
if (!$rss_post->status != 'blocked')
{
if (isset($post_meta_response->output->is_ai_or_tech_news))
{
if ($post_meta_response->output->is_ai_or_tech_news != true)
{
if (! $rss_post->status != 'blocked') {
if (isset($post_meta_response->output->is_ai_or_tech_news)) {
if ($post_meta_response->output->is_ai_or_tech_news != true) {
$rss_post->status = 'blocked';
}
// else
@@ -182,9 +178,8 @@ public static function handle(int $rss_post_id)
}
if (count($words_to_save) <= 0)
{
$rss_post->status = 'blocked';
if (count($words_to_save) <= 0) {
$rss_post->status = 'blocked';
}
if ($rss_post->save()) {

View File

@@ -31,7 +31,7 @@ public static function get($days = 1, $limit = 10)
$queryResults = DB::table('rss_post_keywords')
->select('value', 'value_lowercased', DB::raw('COUNT(value_lowercased) as value_count'))
->where('created_at', '>=', now()->subDays($days))
->whereNotIn('value_lowercased', ['techcrunch', 'the verge', 'forbes', 'producthunt', 'vox media','engadget'])
->whereNotIn('value_lowercased', ['techcrunch', 'the verge', 'forbes', 'producthunt', 'vox media', 'engadget'])
->groupBy('value', 'value_lowercased')
->orderBy(DB::raw('COUNT(value_lowercased)'), 'desc')
->limit($limit)

View File

@@ -18,8 +18,8 @@
],
'blacklist_rss_post_keywords' => [
'deal'
],
'deal',
],
'rss' => [
'http://news.ycombinator.com/rss',

View File

@@ -2,7 +2,6 @@
use App\Helpers\FirstParty\SitemapCrawler\CustomCrawlProfile;
use GuzzleHttp\RequestOptions;
use Spatie\Sitemap\Crawler\Profile;
return [

View File

@@ -1,38 +1,36 @@
@foreach ($rss_posts as $key => $post)
<div class="card mb-1">
<div class="card-body">
<div class="d-flex justify-content-between">
<div class="">
<h3 class="h6 mb-1 fw fw-semibold font-family-roboto-condensed">
{{ $post->title }}
</h3>
<div class="card mb-1">
<div class="card-body">
<div class="d-flex justify-content-between">
<div class="">
<h3 class="h6 mb-1 fw fw-semibold font-family-roboto-condensed">
{{ $post->title }}
</h3>
@if ($post->entities_keywords->count() > 0)
@if ($post->entities_keywords->count() > 0)
<div class="d-flex flex-wrap mb-1">
{{-- @if ($post->entities) --}}
@foreach ($post->entities_keywords as $keyword)
{{-- @if ($keyword->type == 'entity') --}}
@if ($loop->iteration <= 2)
@if ($keyword->is_main)
<h4
class="mb-0 pb-1 d-inline badge bg-secondary border-secondary text-white border me-1 small fw-bold">
{{ $keyword->value }}
</h4>
@else
<h4
class="mb-0 pb-1 d-inline badge text-bg-light border me-1 small fw-normal">
{{ $keyword->value }}
</h4>
@endif
@elseif ($loop->iteration == 3)
<h4
class="mb-0 pb-1 d-inline badge text-bg-light border me-1 small fw-normal">
+{{ count($post->entities_keywords) - 3 }} more
</h4>
@break
@endif
{{-- @endif --}}
@endforeach
{{-- @if ($post->entities) --}}
@foreach ($post->entities_keywords as $keyword)
{{-- @if ($keyword->type == 'entity') --}}
@if ($loop->iteration <= 3)
@if ($keyword->is_main)
<h4 class="mb-0 pb-1 d-inline badge bg-secondary border-secondary text-white border me-1 small fw-bold">
{{ $keyword->value }}
</h4>
@else
<h4 class="mb-0 pb-1 d-inline badge text-bg-light border me-1 small fw-normal">
{{ $keyword->value }}
</h4>
@endif
@elseif ($loop->iteration > 3 && count($post->entities_keywords) > 3)
<h4 class="mb-0 pb-1 d-inline badge text-bg-light border me-1 small fw-normal">
+{{ count($post->entities_keywords) - 3 }} more
</h4>
@break
@endif
{{-- @endif --}}
@endforeach
{{-- @endif --}}
@if ($post->impact_level == 'high')
@@ -42,41 +40,42 @@ class="font-family-roboto-condensed mb-0 pb-1 d-inline badge bg-danger border-da
</h4>
@endif
</div>
@endif
<div class="small">
@if ($post->category)
<span class="d-inline text-secondary small">{{ $post->category->name }}</span>
<i class="bi bi-dot"></i>
@endif
<div class="small">
@if ($post->category)
<span class="d-inline text-secondary small">{{ $post->category->name }}</span>
<i class="bi bi-dot"></i>
<span class="d-inline text-secondary small">
@if ($post->published_at->isBetween(now()->subDays(1), now()))
{{ $post->published_at->diffForHumans() }}
@else
{{ $post->published_at->format('d M') }}
@endif
<span class="d-inline text-secondary small">
@if ($post->published_at->isBetween(now()->subDays(1), now()))
{{ $post->published_at->diffForHumans() }}
@else
{{ $post->published_at->format('d M') }}
@endif
</span>
</span>
@if (!is_empty($post->bites))
@if (!is_empty($post->bites))
<i class="bi bi-dot"></i>
<span class="d-inline text-secondary small">{{ min_read($post->bites) }}</span>
@endif
<span class="d-inline text-secondary small">{{ min_read($post->bites) }}</span>
@endif
<i class="bi bi-dot"></i>
<span class="d-inline text-secondary small">
<i class="bi bi-dot"></i>
<span class="d-inline text-secondary small">
{{ get_domain_from_url($post->post_url) }}
</span>
</div>
</span>
</div>
</div>
<div class=" text-end d-flex justify-content-end">
<div class="align-self-center">
<a class="btn btn-outline-secondary btn-sm rounded-pill px-3 text-decoration-none" target="_blank" rel="nofollow noopener noreferrer" href="{{ $post->post_url }}">👉 Read </a>
</div>
<div class=" text-end d-flex justify-content-end">
<div class="align-self-center">
<a class="btn btn-outline-secondary btn-sm rounded-pill px-3 text-decoration-none"
target="_blank" rel="nofollow noopener noreferrer" href="{{ $post->post_url }}">👉 Read article</a>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
@endforeach

View File

@@ -5,21 +5,24 @@
<div class="container-lg">
<div class="text-center row justify-content-center mb-4">
<div class="col-12 col-md-10 col-lg-8">
<div class="display-6 fw-bold font-family-roboto-condensed mb-2">Your Future Depends<wbr> on Today's
<h1 class="fw-bolder font-family-roboto-condensed mb-3">Latest AI & tech news in 1 hyper-focused platform</h1>
{{-- <div class="display-6 fw-bold font-family-roboto-condensed mb-2">Your Future Depends<wbr> on Today's
News</div>
<h1 class="h4 fw-normal mb-4">In the fast-evolving world of AI and tech, staying updated is not
optional—it's critical for your future success. Stay updated with hourly news
optional—it's critical for your future success. Stay updated with hourly news
<strong>FutureWalker</strong>.
</h1>
<a href="#latest-news" class="btn btn-primary px-4 rounded-pill text-decoration-none">Start reading
now</a>
</div>
</div> --}}
</div>
<div class="text-center row justify-content-center">
<div class="col-12 col-md-10 col-lg-6">
<h2 class="h4 fw-semibold mb-3">📡 Monitoring top tags for the past 24 hours</h2>
<p data-nosnippet class="h4 mb-3">📡 Top keywords identified for the past 24 hours across {{ $rss_count }} articles identified by GPT</p>
@foreach ($top_rss_keywords as $rss_keyword)
<a href="{{ get_route_search_result($rss_keyword->value_lowercased) }}"

View File

@@ -4,7 +4,6 @@
use App\Jobs\AISerpGenArticleJob;
use App\Jobs\BrowseAndWriteWithAIJob;
use App\Jobs\BrowseRSSPostJob;
use App\Jobs\BrowseSingleRSSJob;
use App\Jobs\CrawlRssPostJob;
use App\Jobs\FillPostMetadataJob;
use App\Jobs\GenerateArticleFeaturedImageJob;
@@ -42,15 +41,12 @@
Route::get('/fix-broken-keywords', function (Request $request) {
$rss_posts = RssPost::whereNull('bites')->take(50)->orderBy('published_at','DESC')->get();
$rss_posts = RssPost::whereNull('bites')->take(50)->orderBy('published_at', 'DESC')->get();
foreach ($rss_posts as $rss_post)
{
ParseRssPostMetadataJob::dispatch($rss_post->id)->onQueue('default')->onConnection('default');
foreach ($rss_posts as $rss_post) {
ParseRssPostMetadataJob::dispatch($rss_post->id)->onQueue('default')->onConnection('default');
}
});
Route::get('/seed', function (Request $request) {

View File

@@ -23,11 +23,20 @@
Route::get('/disclaimer', [App\Http\Controllers\Front\FrontHomeController::class, 'disclaimer'])->name('front.disclaimer')->middleware('cacheResponse:2630000');
Route::get('/bites', [App\Http\Controllers\Front\FrontListController::class, 'index'])->name('front.all')->middleware('cacheResponse:1800');
Route::get('/bites/', function ($query) {
return redirect()->route('front.all', 301);
});
Route::get('/digest', [App\Http\Controllers\Front\FrontListController::class, 'index'])->name('front.all')->middleware('cacheResponse:1800');
Route::post('/search', [App\Http\Controllers\Front\FrontListController::class, 'search'])->name('front.search');
Route::get('/bites/{query}', [App\Http\Controllers\Front\FrontListController::class, 'searchResults'])->name('front.search.results')->middleware('cacheResponse:1800');
Route::get('/bites/{query}', function ($query) {
return redirect()->route('front.search.results', ['query' => $query], 301);
});
Route::get('/digest/{query}', [App\Http\Controllers\Front\FrontListController::class, 'searchResults'])->name('front.search.results')->middleware('cacheResponse:1800');
Route::get('/{category_slug}/{slug}', [App\Http\Controllers\Front\FrontPostController::class, 'index'])->name('front.post');